{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 9258, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003240440699935191, "grad_norm": 4.436971187591553, "learning_rate": 3.597122302158274e-08, "loss": 0.7408, "step": 1 }, { "epoch": 0.0006480881399870382, "grad_norm": 4.44703483581543, "learning_rate": 7.194244604316547e-08, "loss": 0.7573, "step": 2 }, { "epoch": 0.0009721322099805574, "grad_norm": 4.3990912437438965, "learning_rate": 1.0791366906474822e-07, "loss": 0.7465, "step": 3 }, { "epoch": 0.0012961762799740765, "grad_norm": 4.6868085861206055, "learning_rate": 1.4388489208633095e-07, "loss": 0.7651, "step": 4 }, { "epoch": 0.0016202203499675956, "grad_norm": 4.419036865234375, "learning_rate": 1.7985611510791368e-07, "loss": 0.74, "step": 5 }, { "epoch": 0.0019442644199611147, "grad_norm": 4.579814910888672, "learning_rate": 2.1582733812949643e-07, "loss": 0.7446, "step": 6 }, { "epoch": 0.002268308489954634, "grad_norm": 4.31318998336792, "learning_rate": 2.5179856115107916e-07, "loss": 0.7432, "step": 7 }, { "epoch": 0.002592352559948153, "grad_norm": 4.498172760009766, "learning_rate": 2.877697841726619e-07, "loss": 0.7532, "step": 8 }, { "epoch": 0.002916396629941672, "grad_norm": 4.535513877868652, "learning_rate": 3.237410071942446e-07, "loss": 0.7587, "step": 9 }, { "epoch": 0.0032404406999351912, "grad_norm": 4.351844787597656, "learning_rate": 3.5971223021582736e-07, "loss": 0.7507, "step": 10 }, { "epoch": 0.0035644847699287103, "grad_norm": 4.505545616149902, "learning_rate": 3.956834532374101e-07, "loss": 0.7368, "step": 11 }, { "epoch": 0.0038885288399222295, "grad_norm": 4.367278575897217, "learning_rate": 4.3165467625899287e-07, "loss": 0.7298, "step": 12 }, { "epoch": 0.004212572909915748, "grad_norm": 4.497063636779785, "learning_rate": 4.676258992805756e-07, "loss": 0.7495, "step": 13 }, { "epoch": 0.004536616979909268, "grad_norm": 4.694037914276123, "learning_rate": 5.035971223021583e-07, "loss": 0.755, "step": 14 }, { "epoch": 0.004860661049902786, "grad_norm": 4.476596832275391, "learning_rate": 5.39568345323741e-07, "loss": 0.7404, "step": 15 }, { "epoch": 0.005184705119896306, "grad_norm": 4.339330673217773, "learning_rate": 5.755395683453238e-07, "loss": 0.7447, "step": 16 }, { "epoch": 0.005508749189889825, "grad_norm": 4.3112335205078125, "learning_rate": 6.115107913669066e-07, "loss": 0.7217, "step": 17 }, { "epoch": 0.005832793259883344, "grad_norm": 4.041561603546143, "learning_rate": 6.474820143884893e-07, "loss": 0.6987, "step": 18 }, { "epoch": 0.006156837329876863, "grad_norm": 4.28841495513916, "learning_rate": 6.83453237410072e-07, "loss": 0.7106, "step": 19 }, { "epoch": 0.0064808813998703824, "grad_norm": 4.189178943634033, "learning_rate": 7.194244604316547e-07, "loss": 0.7135, "step": 20 }, { "epoch": 0.006804925469863901, "grad_norm": 4.380631446838379, "learning_rate": 7.553956834532375e-07, "loss": 0.7017, "step": 21 }, { "epoch": 0.007128969539857421, "grad_norm": 4.28890323638916, "learning_rate": 7.913669064748202e-07, "loss": 0.6724, "step": 22 }, { "epoch": 0.007453013609850939, "grad_norm": 4.130223274230957, "learning_rate": 8.27338129496403e-07, "loss": 0.587, "step": 23 }, { "epoch": 0.007777057679844459, "grad_norm": 4.008565902709961, "learning_rate": 8.633093525179857e-07, "loss": 0.589, "step": 24 }, { "epoch": 0.008101101749837978, "grad_norm": 4.349413871765137, "learning_rate": 8.992805755395684e-07, "loss": 0.6014, "step": 25 }, { "epoch": 0.008425145819831496, "grad_norm": 3.9135501384735107, "learning_rate": 9.352517985611512e-07, "loss": 0.5901, "step": 26 }, { "epoch": 0.008749189889825017, "grad_norm": 3.899057149887085, "learning_rate": 9.71223021582734e-07, "loss": 0.5715, "step": 27 }, { "epoch": 0.009073233959818535, "grad_norm": 3.7998855113983154, "learning_rate": 1.0071942446043167e-06, "loss": 0.5493, "step": 28 }, { "epoch": 0.009397278029812054, "grad_norm": 3.450526475906372, "learning_rate": 1.0431654676258993e-06, "loss": 0.5471, "step": 29 }, { "epoch": 0.009721322099805573, "grad_norm": 3.6078693866729736, "learning_rate": 1.079136690647482e-06, "loss": 0.528, "step": 30 }, { "epoch": 0.010045366169799093, "grad_norm": 2.785630941390991, "learning_rate": 1.115107913669065e-06, "loss": 0.4635, "step": 31 }, { "epoch": 0.010369410239792612, "grad_norm": 2.324575424194336, "learning_rate": 1.1510791366906476e-06, "loss": 0.4333, "step": 32 }, { "epoch": 0.01069345430978613, "grad_norm": 2.026649236679077, "learning_rate": 1.1870503597122303e-06, "loss": 0.4412, "step": 33 }, { "epoch": 0.01101749837977965, "grad_norm": 1.9035625457763672, "learning_rate": 1.2230215827338131e-06, "loss": 0.4399, "step": 34 }, { "epoch": 0.01134154244977317, "grad_norm": 1.7783927917480469, "learning_rate": 1.2589928057553958e-06, "loss": 0.443, "step": 35 }, { "epoch": 0.011665586519766688, "grad_norm": 1.6150323152542114, "learning_rate": 1.2949640287769785e-06, "loss": 0.4127, "step": 36 }, { "epoch": 0.011989630589760207, "grad_norm": 1.469312310218811, "learning_rate": 1.3309352517985614e-06, "loss": 0.3816, "step": 37 }, { "epoch": 0.012313674659753726, "grad_norm": 1.3809667825698853, "learning_rate": 1.366906474820144e-06, "loss": 0.3966, "step": 38 }, { "epoch": 0.012637718729747246, "grad_norm": 1.4537124633789062, "learning_rate": 1.4028776978417265e-06, "loss": 0.4121, "step": 39 }, { "epoch": 0.012961762799740765, "grad_norm": 1.2365580797195435, "learning_rate": 1.4388489208633094e-06, "loss": 0.3753, "step": 40 }, { "epoch": 0.013285806869734284, "grad_norm": 1.2684414386749268, "learning_rate": 1.474820143884892e-06, "loss": 0.3963, "step": 41 }, { "epoch": 0.013609850939727802, "grad_norm": 1.1836400032043457, "learning_rate": 1.510791366906475e-06, "loss": 0.3849, "step": 42 }, { "epoch": 0.013933895009721323, "grad_norm": 1.1855645179748535, "learning_rate": 1.5467625899280579e-06, "loss": 0.3775, "step": 43 }, { "epoch": 0.014257939079714841, "grad_norm": 1.2605303525924683, "learning_rate": 1.5827338129496403e-06, "loss": 0.3701, "step": 44 }, { "epoch": 0.01458198314970836, "grad_norm": 1.2494381666183472, "learning_rate": 1.618705035971223e-06, "loss": 0.3669, "step": 45 }, { "epoch": 0.014906027219701879, "grad_norm": 1.0984803438186646, "learning_rate": 1.654676258992806e-06, "loss": 0.3553, "step": 46 }, { "epoch": 0.0152300712896954, "grad_norm": 1.0629351139068604, "learning_rate": 1.6906474820143886e-06, "loss": 0.3355, "step": 47 }, { "epoch": 0.015554115359688918, "grad_norm": 1.001863718032837, "learning_rate": 1.7266187050359715e-06, "loss": 0.342, "step": 48 }, { "epoch": 0.015878159429682438, "grad_norm": 1.0362595319747925, "learning_rate": 1.762589928057554e-06, "loss": 0.3459, "step": 49 }, { "epoch": 0.016202203499675955, "grad_norm": 1.2314352989196777, "learning_rate": 1.7985611510791368e-06, "loss": 0.3801, "step": 50 }, { "epoch": 0.016526247569669476, "grad_norm": 1.0542044639587402, "learning_rate": 1.8345323741007195e-06, "loss": 0.3256, "step": 51 }, { "epoch": 0.016850291639662993, "grad_norm": 1.041985034942627, "learning_rate": 1.8705035971223024e-06, "loss": 0.3332, "step": 52 }, { "epoch": 0.017174335709656513, "grad_norm": 1.0842797756195068, "learning_rate": 1.906474820143885e-06, "loss": 0.3541, "step": 53 }, { "epoch": 0.017498379779650033, "grad_norm": 1.0450321435928345, "learning_rate": 1.942446043165468e-06, "loss": 0.3626, "step": 54 }, { "epoch": 0.01782242384964355, "grad_norm": 0.9607313871383667, "learning_rate": 1.9784172661870504e-06, "loss": 0.3229, "step": 55 }, { "epoch": 0.01814646791963707, "grad_norm": 1.0011342763900757, "learning_rate": 2.0143884892086333e-06, "loss": 0.335, "step": 56 }, { "epoch": 0.01847051198963059, "grad_norm": 1.0107240676879883, "learning_rate": 2.050359712230216e-06, "loss": 0.3531, "step": 57 }, { "epoch": 0.018794556059624108, "grad_norm": 0.9726276397705078, "learning_rate": 2.0863309352517987e-06, "loss": 0.326, "step": 58 }, { "epoch": 0.01911860012961763, "grad_norm": 0.9930094480514526, "learning_rate": 2.1223021582733816e-06, "loss": 0.3047, "step": 59 }, { "epoch": 0.019442644199611146, "grad_norm": 1.0234602689743042, "learning_rate": 2.158273381294964e-06, "loss": 0.3608, "step": 60 }, { "epoch": 0.019766688269604666, "grad_norm": 0.9958673715591431, "learning_rate": 2.194244604316547e-06, "loss": 0.3374, "step": 61 }, { "epoch": 0.020090732339598186, "grad_norm": 0.8935235738754272, "learning_rate": 2.23021582733813e-06, "loss": 0.3058, "step": 62 }, { "epoch": 0.020414776409591703, "grad_norm": 0.9323809146881104, "learning_rate": 2.2661870503597123e-06, "loss": 0.3105, "step": 63 }, { "epoch": 0.020738820479585224, "grad_norm": 0.9058836102485657, "learning_rate": 2.302158273381295e-06, "loss": 0.2958, "step": 64 }, { "epoch": 0.021062864549578744, "grad_norm": 0.9206125736236572, "learning_rate": 2.3381294964028776e-06, "loss": 0.3013, "step": 65 }, { "epoch": 0.02138690861957226, "grad_norm": 0.9556636214256287, "learning_rate": 2.3741007194244605e-06, "loss": 0.3155, "step": 66 }, { "epoch": 0.02171095268956578, "grad_norm": 0.9843814373016357, "learning_rate": 2.4100719424460434e-06, "loss": 0.3254, "step": 67 }, { "epoch": 0.0220349967595593, "grad_norm": 0.9352341890335083, "learning_rate": 2.4460431654676263e-06, "loss": 0.281, "step": 68 }, { "epoch": 0.02235904082955282, "grad_norm": 0.905490517616272, "learning_rate": 2.4820143884892088e-06, "loss": 0.2842, "step": 69 }, { "epoch": 0.02268308489954634, "grad_norm": 0.9274157285690308, "learning_rate": 2.5179856115107916e-06, "loss": 0.3019, "step": 70 }, { "epoch": 0.023007128969539856, "grad_norm": 0.9611228704452515, "learning_rate": 2.5539568345323745e-06, "loss": 0.2737, "step": 71 }, { "epoch": 0.023331173039533377, "grad_norm": 0.9289448857307434, "learning_rate": 2.589928057553957e-06, "loss": 0.2978, "step": 72 }, { "epoch": 0.023655217109526897, "grad_norm": 0.9755799174308777, "learning_rate": 2.6258992805755395e-06, "loss": 0.3216, "step": 73 }, { "epoch": 0.023979261179520414, "grad_norm": 0.8912398219108582, "learning_rate": 2.6618705035971228e-06, "loss": 0.3141, "step": 74 }, { "epoch": 0.024303305249513935, "grad_norm": 0.9067403674125671, "learning_rate": 2.6978417266187052e-06, "loss": 0.3239, "step": 75 }, { "epoch": 0.02462734931950745, "grad_norm": 1.0051820278167725, "learning_rate": 2.733812949640288e-06, "loss": 0.3297, "step": 76 }, { "epoch": 0.024951393389500972, "grad_norm": 0.9007921814918518, "learning_rate": 2.7697841726618706e-06, "loss": 0.2907, "step": 77 }, { "epoch": 0.025275437459494492, "grad_norm": 0.9421604871749878, "learning_rate": 2.805755395683453e-06, "loss": 0.3225, "step": 78 }, { "epoch": 0.02559948152948801, "grad_norm": 0.9415566921234131, "learning_rate": 2.8417266187050364e-06, "loss": 0.3117, "step": 79 }, { "epoch": 0.02592352559948153, "grad_norm": 0.8771030902862549, "learning_rate": 2.877697841726619e-06, "loss": 0.2962, "step": 80 }, { "epoch": 0.02624756966947505, "grad_norm": 0.9552968740463257, "learning_rate": 2.9136690647482017e-06, "loss": 0.3261, "step": 81 }, { "epoch": 0.026571613739468567, "grad_norm": 0.875261127948761, "learning_rate": 2.949640287769784e-06, "loss": 0.2574, "step": 82 }, { "epoch": 0.026895657809462088, "grad_norm": 0.9335787892341614, "learning_rate": 2.985611510791367e-06, "loss": 0.3017, "step": 83 }, { "epoch": 0.027219701879455604, "grad_norm": 0.9566580653190613, "learning_rate": 3.02158273381295e-06, "loss": 0.2518, "step": 84 }, { "epoch": 0.027543745949449125, "grad_norm": 1.029236078262329, "learning_rate": 3.0575539568345324e-06, "loss": 0.3018, "step": 85 }, { "epoch": 0.027867790019442645, "grad_norm": 0.9539006352424622, "learning_rate": 3.0935251798561158e-06, "loss": 0.2737, "step": 86 }, { "epoch": 0.028191834089436162, "grad_norm": 0.8749252557754517, "learning_rate": 3.1294964028776982e-06, "loss": 0.2658, "step": 87 }, { "epoch": 0.028515878159429683, "grad_norm": 0.9875462055206299, "learning_rate": 3.1654676258992807e-06, "loss": 0.2728, "step": 88 }, { "epoch": 0.028839922229423203, "grad_norm": 1.0440022945404053, "learning_rate": 3.2014388489208636e-06, "loss": 0.2765, "step": 89 }, { "epoch": 0.02916396629941672, "grad_norm": 0.9891248345375061, "learning_rate": 3.237410071942446e-06, "loss": 0.2938, "step": 90 }, { "epoch": 0.02948801036941024, "grad_norm": 0.9206273555755615, "learning_rate": 3.2733812949640294e-06, "loss": 0.292, "step": 91 }, { "epoch": 0.029812054439403757, "grad_norm": 0.8831924200057983, "learning_rate": 3.309352517985612e-06, "loss": 0.2889, "step": 92 }, { "epoch": 0.030136098509397278, "grad_norm": 0.934688150882721, "learning_rate": 3.3453237410071943e-06, "loss": 0.2814, "step": 93 }, { "epoch": 0.0304601425793908, "grad_norm": 0.9249264001846313, "learning_rate": 3.381294964028777e-06, "loss": 0.2681, "step": 94 }, { "epoch": 0.030784186649384315, "grad_norm": 0.9179894328117371, "learning_rate": 3.4172661870503596e-06, "loss": 0.2711, "step": 95 }, { "epoch": 0.031108230719377836, "grad_norm": 0.8751144409179688, "learning_rate": 3.453237410071943e-06, "loss": 0.2876, "step": 96 }, { "epoch": 0.031432274789371356, "grad_norm": 0.8831710815429688, "learning_rate": 3.4892086330935254e-06, "loss": 0.2742, "step": 97 }, { "epoch": 0.031756318859364877, "grad_norm": 0.876036524772644, "learning_rate": 3.525179856115108e-06, "loss": 0.2656, "step": 98 }, { "epoch": 0.03208036292935839, "grad_norm": 0.906802773475647, "learning_rate": 3.561151079136691e-06, "loss": 0.2778, "step": 99 }, { "epoch": 0.03240440699935191, "grad_norm": 0.9074854254722595, "learning_rate": 3.5971223021582737e-06, "loss": 0.2745, "step": 100 }, { "epoch": 0.03272845106934543, "grad_norm": 0.8964940309524536, "learning_rate": 3.6330935251798566e-06, "loss": 0.2771, "step": 101 }, { "epoch": 0.03305249513933895, "grad_norm": 0.9870682954788208, "learning_rate": 3.669064748201439e-06, "loss": 0.2511, "step": 102 }, { "epoch": 0.03337653920933247, "grad_norm": 0.8685485124588013, "learning_rate": 3.7050359712230215e-06, "loss": 0.2774, "step": 103 }, { "epoch": 0.033700583279325985, "grad_norm": 0.8977862000465393, "learning_rate": 3.741007194244605e-06, "loss": 0.2451, "step": 104 }, { "epoch": 0.034024627349319506, "grad_norm": 0.9542827606201172, "learning_rate": 3.7769784172661873e-06, "loss": 0.2577, "step": 105 }, { "epoch": 0.034348671419313026, "grad_norm": 0.9061799049377441, "learning_rate": 3.81294964028777e-06, "loss": 0.2587, "step": 106 }, { "epoch": 0.034672715489306546, "grad_norm": 0.9147353172302246, "learning_rate": 3.848920863309353e-06, "loss": 0.2791, "step": 107 }, { "epoch": 0.03499675955930007, "grad_norm": 0.9400584101676941, "learning_rate": 3.884892086330936e-06, "loss": 0.2735, "step": 108 }, { "epoch": 0.03532080362929359, "grad_norm": 0.9575251936912537, "learning_rate": 3.920863309352518e-06, "loss": 0.2476, "step": 109 }, { "epoch": 0.0356448476992871, "grad_norm": 0.9304046034812927, "learning_rate": 3.956834532374101e-06, "loss": 0.2667, "step": 110 }, { "epoch": 0.03596889176928062, "grad_norm": 0.9660559296607971, "learning_rate": 3.992805755395684e-06, "loss": 0.2579, "step": 111 }, { "epoch": 0.03629293583927414, "grad_norm": 0.8823477029800415, "learning_rate": 4.028776978417267e-06, "loss": 0.2525, "step": 112 }, { "epoch": 0.03661697990926766, "grad_norm": 0.9394170641899109, "learning_rate": 4.0647482014388495e-06, "loss": 0.2739, "step": 113 }, { "epoch": 0.03694102397926118, "grad_norm": 0.8887931704521179, "learning_rate": 4.100719424460432e-06, "loss": 0.2813, "step": 114 }, { "epoch": 0.037265068049254696, "grad_norm": 0.9069398045539856, "learning_rate": 4.1366906474820145e-06, "loss": 0.2868, "step": 115 }, { "epoch": 0.037589112119248216, "grad_norm": 0.9058663249015808, "learning_rate": 4.172661870503597e-06, "loss": 0.2669, "step": 116 }, { "epoch": 0.03791315618924174, "grad_norm": 0.8527618646621704, "learning_rate": 4.20863309352518e-06, "loss": 0.2499, "step": 117 }, { "epoch": 0.03823720025923526, "grad_norm": 0.8536428809165955, "learning_rate": 4.244604316546763e-06, "loss": 0.2398, "step": 118 }, { "epoch": 0.03856124432922878, "grad_norm": 0.8949153423309326, "learning_rate": 4.280575539568346e-06, "loss": 0.28, "step": 119 }, { "epoch": 0.03888528839922229, "grad_norm": 0.9180740714073181, "learning_rate": 4.316546762589928e-06, "loss": 0.2479, "step": 120 }, { "epoch": 0.03920933246921581, "grad_norm": 0.9510776996612549, "learning_rate": 4.352517985611511e-06, "loss": 0.2592, "step": 121 }, { "epoch": 0.03953337653920933, "grad_norm": 0.8663166165351868, "learning_rate": 4.388489208633094e-06, "loss": 0.2733, "step": 122 }, { "epoch": 0.03985742060920285, "grad_norm": 0.8462815284729004, "learning_rate": 4.424460431654677e-06, "loss": 0.2434, "step": 123 }, { "epoch": 0.04018146467919637, "grad_norm": 0.8777222037315369, "learning_rate": 4.46043165467626e-06, "loss": 0.2746, "step": 124 }, { "epoch": 0.04050550874918989, "grad_norm": 0.8853914737701416, "learning_rate": 4.496402877697842e-06, "loss": 0.2502, "step": 125 }, { "epoch": 0.04082955281918341, "grad_norm": 0.8713170886039734, "learning_rate": 4.5323741007194245e-06, "loss": 0.2437, "step": 126 }, { "epoch": 0.04115359688917693, "grad_norm": 0.9855924248695374, "learning_rate": 4.5683453237410074e-06, "loss": 0.2915, "step": 127 }, { "epoch": 0.04147764095917045, "grad_norm": 0.8738729953765869, "learning_rate": 4.60431654676259e-06, "loss": 0.2375, "step": 128 }, { "epoch": 0.04180168502916397, "grad_norm": 1.1465152502059937, "learning_rate": 4.640287769784173e-06, "loss": 0.2789, "step": 129 }, { "epoch": 0.04212572909915749, "grad_norm": 0.9107539057731628, "learning_rate": 4.676258992805755e-06, "loss": 0.2642, "step": 130 }, { "epoch": 0.042449773169151, "grad_norm": 0.8694542050361633, "learning_rate": 4.712230215827339e-06, "loss": 0.2586, "step": 131 }, { "epoch": 0.04277381723914452, "grad_norm": 0.86564040184021, "learning_rate": 4.748201438848921e-06, "loss": 0.2501, "step": 132 }, { "epoch": 0.04309786130913804, "grad_norm": 0.8690810799598694, "learning_rate": 4.784172661870504e-06, "loss": 0.2424, "step": 133 }, { "epoch": 0.04342190537913156, "grad_norm": 0.8725466728210449, "learning_rate": 4.820143884892087e-06, "loss": 0.2496, "step": 134 }, { "epoch": 0.043745949449125084, "grad_norm": 0.9374155402183533, "learning_rate": 4.856115107913669e-06, "loss": 0.2509, "step": 135 }, { "epoch": 0.0440699935191186, "grad_norm": 0.9191246628761292, "learning_rate": 4.892086330935253e-06, "loss": 0.2552, "step": 136 }, { "epoch": 0.04439403758911212, "grad_norm": 0.9410930871963501, "learning_rate": 4.928057553956835e-06, "loss": 0.2721, "step": 137 }, { "epoch": 0.04471808165910564, "grad_norm": 0.8686294555664062, "learning_rate": 4.9640287769784175e-06, "loss": 0.2457, "step": 138 }, { "epoch": 0.04504212572909916, "grad_norm": 0.8420104384422302, "learning_rate": 5e-06, "loss": 0.2469, "step": 139 }, { "epoch": 0.04536616979909268, "grad_norm": 0.8580688834190369, "learning_rate": 5.035971223021583e-06, "loss": 0.2439, "step": 140 }, { "epoch": 0.04569021386908619, "grad_norm": 0.9308591485023499, "learning_rate": 5.071942446043165e-06, "loss": 0.276, "step": 141 }, { "epoch": 0.04601425793907971, "grad_norm": 0.9194615483283997, "learning_rate": 5.107913669064749e-06, "loss": 0.2731, "step": 142 }, { "epoch": 0.04633830200907323, "grad_norm": 0.9039499759674072, "learning_rate": 5.143884892086332e-06, "loss": 0.2535, "step": 143 }, { "epoch": 0.046662346079066754, "grad_norm": 0.8582255840301514, "learning_rate": 5.179856115107914e-06, "loss": 0.2396, "step": 144 }, { "epoch": 0.046986390149060274, "grad_norm": 0.8596925735473633, "learning_rate": 5.215827338129497e-06, "loss": 0.253, "step": 145 }, { "epoch": 0.047310434219053794, "grad_norm": 0.895989716053009, "learning_rate": 5.251798561151079e-06, "loss": 0.235, "step": 146 }, { "epoch": 0.04763447828904731, "grad_norm": 0.8556272983551025, "learning_rate": 5.287769784172663e-06, "loss": 0.2552, "step": 147 }, { "epoch": 0.04795852235904083, "grad_norm": 0.9312075972557068, "learning_rate": 5.3237410071942456e-06, "loss": 0.2618, "step": 148 }, { "epoch": 0.04828256642903435, "grad_norm": 0.791953444480896, "learning_rate": 5.359712230215828e-06, "loss": 0.2401, "step": 149 }, { "epoch": 0.04860661049902787, "grad_norm": 0.8349822163581848, "learning_rate": 5.3956834532374105e-06, "loss": 0.2266, "step": 150 }, { "epoch": 0.04893065456902139, "grad_norm": 0.8655434250831604, "learning_rate": 5.4316546762589925e-06, "loss": 0.2502, "step": 151 }, { "epoch": 0.0492546986390149, "grad_norm": 0.9085103869438171, "learning_rate": 5.467625899280576e-06, "loss": 0.2561, "step": 152 }, { "epoch": 0.04957874270900842, "grad_norm": 0.9157651662826538, "learning_rate": 5.503597122302159e-06, "loss": 0.248, "step": 153 }, { "epoch": 0.049902786779001944, "grad_norm": 0.895823061466217, "learning_rate": 5.539568345323741e-06, "loss": 0.2582, "step": 154 }, { "epoch": 0.050226830848995464, "grad_norm": 0.8751013278961182, "learning_rate": 5.575539568345324e-06, "loss": 0.2537, "step": 155 }, { "epoch": 0.050550874918988985, "grad_norm": 0.9522944092750549, "learning_rate": 5.611510791366906e-06, "loss": 0.2764, "step": 156 }, { "epoch": 0.0508749189889825, "grad_norm": 0.9214081168174744, "learning_rate": 5.64748201438849e-06, "loss": 0.2592, "step": 157 }, { "epoch": 0.05119896305897602, "grad_norm": 0.8131060004234314, "learning_rate": 5.683453237410073e-06, "loss": 0.2302, "step": 158 }, { "epoch": 0.05152300712896954, "grad_norm": 1.0334426164627075, "learning_rate": 5.719424460431655e-06, "loss": 0.253, "step": 159 }, { "epoch": 0.05184705119896306, "grad_norm": 0.876676082611084, "learning_rate": 5.755395683453238e-06, "loss": 0.2428, "step": 160 }, { "epoch": 0.05217109526895658, "grad_norm": 0.892617404460907, "learning_rate": 5.79136690647482e-06, "loss": 0.2324, "step": 161 }, { "epoch": 0.0524951393389501, "grad_norm": 0.8631848692893982, "learning_rate": 5.8273381294964035e-06, "loss": 0.2433, "step": 162 }, { "epoch": 0.052819183408943614, "grad_norm": 0.9174634218215942, "learning_rate": 5.863309352517986e-06, "loss": 0.2717, "step": 163 }, { "epoch": 0.053143227478937134, "grad_norm": 0.890937864780426, "learning_rate": 5.899280575539568e-06, "loss": 0.2683, "step": 164 }, { "epoch": 0.053467271548930655, "grad_norm": 0.8314776420593262, "learning_rate": 5.935251798561151e-06, "loss": 0.2417, "step": 165 }, { "epoch": 0.053791315618924175, "grad_norm": 0.8143745064735413, "learning_rate": 5.971223021582734e-06, "loss": 0.2451, "step": 166 }, { "epoch": 0.054115359688917695, "grad_norm": 0.8573601245880127, "learning_rate": 6.007194244604317e-06, "loss": 0.2611, "step": 167 }, { "epoch": 0.05443940375891121, "grad_norm": 0.8676037788391113, "learning_rate": 6.0431654676259e-06, "loss": 0.249, "step": 168 }, { "epoch": 0.05476344782890473, "grad_norm": 0.9017927646636963, "learning_rate": 6.079136690647483e-06, "loss": 0.2486, "step": 169 }, { "epoch": 0.05508749189889825, "grad_norm": 0.8411036729812622, "learning_rate": 6.115107913669065e-06, "loss": 0.2378, "step": 170 }, { "epoch": 0.05541153596889177, "grad_norm": 0.8910001516342163, "learning_rate": 6.151079136690648e-06, "loss": 0.2714, "step": 171 }, { "epoch": 0.05573558003888529, "grad_norm": 0.8422628045082092, "learning_rate": 6.1870503597122315e-06, "loss": 0.2484, "step": 172 }, { "epoch": 0.056059624108878804, "grad_norm": 0.8160513639450073, "learning_rate": 6.2230215827338136e-06, "loss": 0.2442, "step": 173 }, { "epoch": 0.056383668178872325, "grad_norm": 0.9058359265327454, "learning_rate": 6.2589928057553964e-06, "loss": 0.272, "step": 174 }, { "epoch": 0.056707712248865845, "grad_norm": 0.8583076000213623, "learning_rate": 6.2949640287769785e-06, "loss": 0.2545, "step": 175 }, { "epoch": 0.057031756318859365, "grad_norm": 0.8205615282058716, "learning_rate": 6.330935251798561e-06, "loss": 0.2359, "step": 176 }, { "epoch": 0.057355800388852886, "grad_norm": 0.8605383038520813, "learning_rate": 6.366906474820145e-06, "loss": 0.2571, "step": 177 }, { "epoch": 0.057679844458846406, "grad_norm": 0.9030138254165649, "learning_rate": 6.402877697841727e-06, "loss": 0.237, "step": 178 }, { "epoch": 0.05800388852883992, "grad_norm": 0.8796730637550354, "learning_rate": 6.43884892086331e-06, "loss": 0.2538, "step": 179 }, { "epoch": 0.05832793259883344, "grad_norm": 0.8664329648017883, "learning_rate": 6.474820143884892e-06, "loss": 0.2429, "step": 180 }, { "epoch": 0.05865197666882696, "grad_norm": 0.8648440837860107, "learning_rate": 6.510791366906475e-06, "loss": 0.2111, "step": 181 }, { "epoch": 0.05897602073882048, "grad_norm": 0.8421782851219177, "learning_rate": 6.546762589928059e-06, "loss": 0.2377, "step": 182 }, { "epoch": 0.059300064808814, "grad_norm": 1.0346119403839111, "learning_rate": 6.582733812949641e-06, "loss": 0.2555, "step": 183 }, { "epoch": 0.059624108878807515, "grad_norm": 0.9008530378341675, "learning_rate": 6.618705035971224e-06, "loss": 0.2621, "step": 184 }, { "epoch": 0.059948152948801035, "grad_norm": 0.8390885591506958, "learning_rate": 6.654676258992806e-06, "loss": 0.2286, "step": 185 }, { "epoch": 0.060272197018794556, "grad_norm": 0.8984460830688477, "learning_rate": 6.6906474820143886e-06, "loss": 0.2778, "step": 186 }, { "epoch": 0.060596241088788076, "grad_norm": 0.8588024973869324, "learning_rate": 6.726618705035972e-06, "loss": 0.247, "step": 187 }, { "epoch": 0.0609202851587816, "grad_norm": 0.7865144610404968, "learning_rate": 6.762589928057554e-06, "loss": 0.2186, "step": 188 }, { "epoch": 0.06124432922877511, "grad_norm": 1.1462135314941406, "learning_rate": 6.798561151079137e-06, "loss": 0.225, "step": 189 }, { "epoch": 0.06156837329876863, "grad_norm": 0.8443127870559692, "learning_rate": 6.834532374100719e-06, "loss": 0.2369, "step": 190 }, { "epoch": 0.06189241736876215, "grad_norm": 0.8886812925338745, "learning_rate": 6.870503597122302e-06, "loss": 0.2253, "step": 191 }, { "epoch": 0.06221646143875567, "grad_norm": 0.8370912671089172, "learning_rate": 6.906474820143886e-06, "loss": 0.2419, "step": 192 }, { "epoch": 0.06254050550874919, "grad_norm": 0.8914476633071899, "learning_rate": 6.942446043165468e-06, "loss": 0.2395, "step": 193 }, { "epoch": 0.06286454957874271, "grad_norm": 0.7934096455574036, "learning_rate": 6.978417266187051e-06, "loss": 0.2276, "step": 194 }, { "epoch": 0.06318859364873623, "grad_norm": 0.8254308700561523, "learning_rate": 7.014388489208634e-06, "loss": 0.2287, "step": 195 }, { "epoch": 0.06351263771872975, "grad_norm": 0.8662794828414917, "learning_rate": 7.050359712230216e-06, "loss": 0.2424, "step": 196 }, { "epoch": 0.06383668178872326, "grad_norm": 0.8179991841316223, "learning_rate": 7.0863309352517995e-06, "loss": 0.245, "step": 197 }, { "epoch": 0.06416072585871678, "grad_norm": 0.849475622177124, "learning_rate": 7.122302158273382e-06, "loss": 0.2559, "step": 198 }, { "epoch": 0.0644847699287103, "grad_norm": 0.8367174863815308, "learning_rate": 7.1582733812949644e-06, "loss": 0.2409, "step": 199 }, { "epoch": 0.06480881399870382, "grad_norm": 0.8685291409492493, "learning_rate": 7.194244604316547e-06, "loss": 0.2552, "step": 200 }, { "epoch": 0.06513285806869734, "grad_norm": 0.8459415435791016, "learning_rate": 7.230215827338129e-06, "loss": 0.2281, "step": 201 }, { "epoch": 0.06545690213869086, "grad_norm": 0.8228902816772461, "learning_rate": 7.266187050359713e-06, "loss": 0.2223, "step": 202 }, { "epoch": 0.06578094620868438, "grad_norm": 0.920698881149292, "learning_rate": 7.302158273381296e-06, "loss": 0.2479, "step": 203 }, { "epoch": 0.0661049902786779, "grad_norm": 0.8067958354949951, "learning_rate": 7.338129496402878e-06, "loss": 0.2158, "step": 204 }, { "epoch": 0.06642903434867142, "grad_norm": 0.8358391523361206, "learning_rate": 7.374100719424461e-06, "loss": 0.2352, "step": 205 }, { "epoch": 0.06675307841866494, "grad_norm": 0.8111941814422607, "learning_rate": 7.410071942446043e-06, "loss": 0.2319, "step": 206 }, { "epoch": 0.06707712248865846, "grad_norm": 0.8654391169548035, "learning_rate": 7.446043165467627e-06, "loss": 0.251, "step": 207 }, { "epoch": 0.06740116655865197, "grad_norm": 0.8868721127510071, "learning_rate": 7.48201438848921e-06, "loss": 0.2259, "step": 208 }, { "epoch": 0.06772521062864549, "grad_norm": 0.828054666519165, "learning_rate": 7.517985611510792e-06, "loss": 0.2236, "step": 209 }, { "epoch": 0.06804925469863901, "grad_norm": 0.8440101146697998, "learning_rate": 7.5539568345323745e-06, "loss": 0.2405, "step": 210 }, { "epoch": 0.06837329876863253, "grad_norm": 0.8946020603179932, "learning_rate": 7.589928057553958e-06, "loss": 0.2357, "step": 211 }, { "epoch": 0.06869734283862605, "grad_norm": 0.8750216364860535, "learning_rate": 7.62589928057554e-06, "loss": 0.2351, "step": 212 }, { "epoch": 0.06902138690861957, "grad_norm": 0.8706597685813904, "learning_rate": 7.661870503597123e-06, "loss": 0.2213, "step": 213 }, { "epoch": 0.06934543097861309, "grad_norm": 0.8614869713783264, "learning_rate": 7.697841726618706e-06, "loss": 0.2317, "step": 214 }, { "epoch": 0.06966947504860661, "grad_norm": 0.8689197897911072, "learning_rate": 7.733812949640287e-06, "loss": 0.234, "step": 215 }, { "epoch": 0.06999351911860013, "grad_norm": 0.8240856528282166, "learning_rate": 7.769784172661872e-06, "loss": 0.2291, "step": 216 }, { "epoch": 0.07031756318859365, "grad_norm": 0.7766245603561401, "learning_rate": 7.805755395683455e-06, "loss": 0.2302, "step": 217 }, { "epoch": 0.07064160725858717, "grad_norm": 0.8634687066078186, "learning_rate": 7.841726618705036e-06, "loss": 0.2436, "step": 218 }, { "epoch": 0.07096565132858068, "grad_norm": 0.9289287328720093, "learning_rate": 7.877697841726619e-06, "loss": 0.2374, "step": 219 }, { "epoch": 0.0712896953985742, "grad_norm": 0.8757550716400146, "learning_rate": 7.913669064748202e-06, "loss": 0.2577, "step": 220 }, { "epoch": 0.07161373946856772, "grad_norm": 0.8134459853172302, "learning_rate": 7.949640287769785e-06, "loss": 0.2292, "step": 221 }, { "epoch": 0.07193778353856124, "grad_norm": 0.8444440960884094, "learning_rate": 7.985611510791367e-06, "loss": 0.2299, "step": 222 }, { "epoch": 0.07226182760855476, "grad_norm": 0.8103510737419128, "learning_rate": 8.02158273381295e-06, "loss": 0.231, "step": 223 }, { "epoch": 0.07258587167854828, "grad_norm": 0.900696873664856, "learning_rate": 8.057553956834533e-06, "loss": 0.2424, "step": 224 }, { "epoch": 0.0729099157485418, "grad_norm": 0.8289245367050171, "learning_rate": 8.093525179856116e-06, "loss": 0.2419, "step": 225 }, { "epoch": 0.07323395981853532, "grad_norm": 0.7995141744613647, "learning_rate": 8.129496402877699e-06, "loss": 0.2033, "step": 226 }, { "epoch": 0.07355800388852884, "grad_norm": 0.8330203890800476, "learning_rate": 8.165467625899282e-06, "loss": 0.2265, "step": 227 }, { "epoch": 0.07388204795852236, "grad_norm": 0.8520117402076721, "learning_rate": 8.201438848920865e-06, "loss": 0.2273, "step": 228 }, { "epoch": 0.07420609202851587, "grad_norm": 0.8868409991264343, "learning_rate": 8.237410071942446e-06, "loss": 0.2531, "step": 229 }, { "epoch": 0.07453013609850939, "grad_norm": 0.8631070256233215, "learning_rate": 8.273381294964029e-06, "loss": 0.2416, "step": 230 }, { "epoch": 0.07485418016850291, "grad_norm": 0.8252522349357605, "learning_rate": 8.309352517985614e-06, "loss": 0.2352, "step": 231 }, { "epoch": 0.07517822423849643, "grad_norm": 0.8407851457595825, "learning_rate": 8.345323741007195e-06, "loss": 0.2537, "step": 232 }, { "epoch": 0.07550226830848995, "grad_norm": 0.8433776497840881, "learning_rate": 8.381294964028778e-06, "loss": 0.2518, "step": 233 }, { "epoch": 0.07582631237848347, "grad_norm": 0.8409492373466492, "learning_rate": 8.41726618705036e-06, "loss": 0.2292, "step": 234 }, { "epoch": 0.076150356448477, "grad_norm": 0.8289313912391663, "learning_rate": 8.453237410071943e-06, "loss": 0.244, "step": 235 }, { "epoch": 0.07647440051847051, "grad_norm": 0.8278692960739136, "learning_rate": 8.489208633093526e-06, "loss": 0.2642, "step": 236 }, { "epoch": 0.07679844458846403, "grad_norm": 0.868140697479248, "learning_rate": 8.525179856115109e-06, "loss": 0.2503, "step": 237 }, { "epoch": 0.07712248865845756, "grad_norm": 0.933914303779602, "learning_rate": 8.561151079136692e-06, "loss": 0.2429, "step": 238 }, { "epoch": 0.07744653272845108, "grad_norm": 0.8058993816375732, "learning_rate": 8.597122302158273e-06, "loss": 0.228, "step": 239 }, { "epoch": 0.07777057679844458, "grad_norm": 0.9699552059173584, "learning_rate": 8.633093525179856e-06, "loss": 0.2513, "step": 240 }, { "epoch": 0.0780946208684381, "grad_norm": 0.874172031879425, "learning_rate": 8.66906474820144e-06, "loss": 0.26, "step": 241 }, { "epoch": 0.07841866493843162, "grad_norm": 0.8918880820274353, "learning_rate": 8.705035971223022e-06, "loss": 0.2304, "step": 242 }, { "epoch": 0.07874270900842514, "grad_norm": 0.7745374441146851, "learning_rate": 8.741007194244605e-06, "loss": 0.2204, "step": 243 }, { "epoch": 0.07906675307841866, "grad_norm": 0.8315483331680298, "learning_rate": 8.776978417266188e-06, "loss": 0.2322, "step": 244 }, { "epoch": 0.07939079714841218, "grad_norm": 0.8538967370986938, "learning_rate": 8.81294964028777e-06, "loss": 0.2491, "step": 245 }, { "epoch": 0.0797148412184057, "grad_norm": 0.8524037003517151, "learning_rate": 8.848920863309353e-06, "loss": 0.2322, "step": 246 }, { "epoch": 0.08003888528839923, "grad_norm": 0.8282663226127625, "learning_rate": 8.884892086330936e-06, "loss": 0.2338, "step": 247 }, { "epoch": 0.08036292935839275, "grad_norm": 0.8466488122940063, "learning_rate": 8.92086330935252e-06, "loss": 0.2177, "step": 248 }, { "epoch": 0.08068697342838627, "grad_norm": 0.7749168276786804, "learning_rate": 8.956834532374102e-06, "loss": 0.2156, "step": 249 }, { "epoch": 0.08101101749837979, "grad_norm": 0.7421470880508423, "learning_rate": 8.992805755395683e-06, "loss": 0.2064, "step": 250 }, { "epoch": 0.08133506156837329, "grad_norm": 0.8627637028694153, "learning_rate": 9.028776978417268e-06, "loss": 0.2348, "step": 251 }, { "epoch": 0.08165910563836681, "grad_norm": 0.8397968411445618, "learning_rate": 9.064748201438849e-06, "loss": 0.2108, "step": 252 }, { "epoch": 0.08198314970836033, "grad_norm": 0.8088095784187317, "learning_rate": 9.100719424460432e-06, "loss": 0.2269, "step": 253 }, { "epoch": 0.08230719377835385, "grad_norm": 0.8072569966316223, "learning_rate": 9.136690647482015e-06, "loss": 0.23, "step": 254 }, { "epoch": 0.08263123784834737, "grad_norm": 0.9000716209411621, "learning_rate": 9.172661870503598e-06, "loss": 0.2442, "step": 255 }, { "epoch": 0.0829552819183409, "grad_norm": 0.7782520651817322, "learning_rate": 9.20863309352518e-06, "loss": 0.2184, "step": 256 }, { "epoch": 0.08327932598833442, "grad_norm": 0.8208126425743103, "learning_rate": 9.244604316546764e-06, "loss": 0.2235, "step": 257 }, { "epoch": 0.08360337005832794, "grad_norm": 0.7606107592582703, "learning_rate": 9.280575539568346e-06, "loss": 0.2232, "step": 258 }, { "epoch": 0.08392741412832146, "grad_norm": 0.7701852321624756, "learning_rate": 9.31654676258993e-06, "loss": 0.2252, "step": 259 }, { "epoch": 0.08425145819831498, "grad_norm": 0.8124762177467346, "learning_rate": 9.35251798561151e-06, "loss": 0.2281, "step": 260 }, { "epoch": 0.08457550226830848, "grad_norm": 0.8278365135192871, "learning_rate": 9.388489208633095e-06, "loss": 0.2354, "step": 261 }, { "epoch": 0.084899546338302, "grad_norm": 0.7792116403579712, "learning_rate": 9.424460431654678e-06, "loss": 0.2214, "step": 262 }, { "epoch": 0.08522359040829552, "grad_norm": 0.819582998752594, "learning_rate": 9.46043165467626e-06, "loss": 0.2218, "step": 263 }, { "epoch": 0.08554763447828904, "grad_norm": 0.7760310173034668, "learning_rate": 9.496402877697842e-06, "loss": 0.2066, "step": 264 }, { "epoch": 0.08587167854828257, "grad_norm": 0.8136816024780273, "learning_rate": 9.532374100719425e-06, "loss": 0.2381, "step": 265 }, { "epoch": 0.08619572261827609, "grad_norm": 0.8015646934509277, "learning_rate": 9.568345323741008e-06, "loss": 0.2275, "step": 266 }, { "epoch": 0.0865197666882696, "grad_norm": 0.7706541419029236, "learning_rate": 9.60431654676259e-06, "loss": 0.2259, "step": 267 }, { "epoch": 0.08684381075826313, "grad_norm": 0.7953770160675049, "learning_rate": 9.640287769784174e-06, "loss": 0.2233, "step": 268 }, { "epoch": 0.08716785482825665, "grad_norm": 0.8387556672096252, "learning_rate": 9.676258992805757e-06, "loss": 0.2349, "step": 269 }, { "epoch": 0.08749189889825017, "grad_norm": 0.8319289684295654, "learning_rate": 9.712230215827338e-06, "loss": 0.2326, "step": 270 }, { "epoch": 0.08781594296824369, "grad_norm": 0.864242672920227, "learning_rate": 9.748201438848922e-06, "loss": 0.211, "step": 271 }, { "epoch": 0.0881399870382372, "grad_norm": 0.7795359492301941, "learning_rate": 9.784172661870505e-06, "loss": 0.2206, "step": 272 }, { "epoch": 0.08846403110823071, "grad_norm": 0.8175662159919739, "learning_rate": 9.820143884892086e-06, "loss": 0.2325, "step": 273 }, { "epoch": 0.08878807517822424, "grad_norm": 0.755852460861206, "learning_rate": 9.85611510791367e-06, "loss": 0.226, "step": 274 }, { "epoch": 0.08911211924821776, "grad_norm": 0.8484703898429871, "learning_rate": 9.892086330935252e-06, "loss": 0.2396, "step": 275 }, { "epoch": 0.08943616331821128, "grad_norm": 0.8286850452423096, "learning_rate": 9.928057553956835e-06, "loss": 0.2586, "step": 276 }, { "epoch": 0.0897602073882048, "grad_norm": 0.7314445972442627, "learning_rate": 9.964028776978418e-06, "loss": 0.2392, "step": 277 }, { "epoch": 0.09008425145819832, "grad_norm": 0.7226484417915344, "learning_rate": 1e-05, "loss": 0.1949, "step": 278 }, { "epoch": 0.09040829552819184, "grad_norm": 0.8189452290534973, "learning_rate": 9.999999694024202e-06, "loss": 0.2115, "step": 279 }, { "epoch": 0.09073233959818536, "grad_norm": 0.7656586170196533, "learning_rate": 9.999998776096847e-06, "loss": 0.2323, "step": 280 }, { "epoch": 0.09105638366817888, "grad_norm": 0.7828707695007324, "learning_rate": 9.999997246218044e-06, "loss": 0.2014, "step": 281 }, { "epoch": 0.09138042773817238, "grad_norm": 0.8037205338478088, "learning_rate": 9.99999510438798e-06, "loss": 0.231, "step": 282 }, { "epoch": 0.0917044718081659, "grad_norm": 0.8756486177444458, "learning_rate": 9.999992350606919e-06, "loss": 0.2363, "step": 283 }, { "epoch": 0.09202851587815943, "grad_norm": 0.8126011490821838, "learning_rate": 9.9999889848752e-06, "loss": 0.2334, "step": 284 }, { "epoch": 0.09235255994815295, "grad_norm": 0.7885010242462158, "learning_rate": 9.999985007193232e-06, "loss": 0.2207, "step": 285 }, { "epoch": 0.09267660401814647, "grad_norm": 0.702421247959137, "learning_rate": 9.999980417561503e-06, "loss": 0.2074, "step": 286 }, { "epoch": 0.09300064808813999, "grad_norm": 0.778176486492157, "learning_rate": 9.999975215980575e-06, "loss": 0.2389, "step": 287 }, { "epoch": 0.09332469215813351, "grad_norm": 0.7950177192687988, "learning_rate": 9.999969402451084e-06, "loss": 0.2336, "step": 288 }, { "epoch": 0.09364873622812703, "grad_norm": 0.7845720052719116, "learning_rate": 9.999962976973741e-06, "loss": 0.2375, "step": 289 }, { "epoch": 0.09397278029812055, "grad_norm": 0.8304017186164856, "learning_rate": 9.999955939549333e-06, "loss": 0.2314, "step": 290 }, { "epoch": 0.09429682436811407, "grad_norm": 0.7669353485107422, "learning_rate": 9.99994829017872e-06, "loss": 0.2178, "step": 291 }, { "epoch": 0.09462086843810759, "grad_norm": 0.7424461841583252, "learning_rate": 9.999940028862843e-06, "loss": 0.2304, "step": 292 }, { "epoch": 0.0949449125081011, "grad_norm": 0.7105286121368408, "learning_rate": 9.999931155602707e-06, "loss": 0.2165, "step": 293 }, { "epoch": 0.09526895657809462, "grad_norm": 0.7596817016601562, "learning_rate": 9.999921670399401e-06, "loss": 0.2198, "step": 294 }, { "epoch": 0.09559300064808814, "grad_norm": 0.7864949703216553, "learning_rate": 9.999911573254085e-06, "loss": 0.2224, "step": 295 }, { "epoch": 0.09591704471808166, "grad_norm": 0.7551132440567017, "learning_rate": 9.999900864167996e-06, "loss": 0.2074, "step": 296 }, { "epoch": 0.09624108878807518, "grad_norm": 0.7826792597770691, "learning_rate": 9.999889543142444e-06, "loss": 0.2213, "step": 297 }, { "epoch": 0.0965651328580687, "grad_norm": 0.7614756226539612, "learning_rate": 9.999877610178814e-06, "loss": 0.2182, "step": 298 }, { "epoch": 0.09688917692806222, "grad_norm": 0.820511519908905, "learning_rate": 9.99986506527857e-06, "loss": 0.224, "step": 299 }, { "epoch": 0.09721322099805574, "grad_norm": 0.7976818680763245, "learning_rate": 9.99985190844324e-06, "loss": 0.2322, "step": 300 }, { "epoch": 0.09753726506804926, "grad_norm": 0.8125640749931335, "learning_rate": 9.999838139674443e-06, "loss": 0.2515, "step": 301 }, { "epoch": 0.09786130913804278, "grad_norm": 0.7696121335029602, "learning_rate": 9.999823758973857e-06, "loss": 0.2177, "step": 302 }, { "epoch": 0.0981853532080363, "grad_norm": 0.750672459602356, "learning_rate": 9.999808766343246e-06, "loss": 0.2245, "step": 303 }, { "epoch": 0.0985093972780298, "grad_norm": 0.7182970643043518, "learning_rate": 9.999793161784443e-06, "loss": 0.1997, "step": 304 }, { "epoch": 0.09883344134802333, "grad_norm": 0.7711732387542725, "learning_rate": 9.99977694529936e-06, "loss": 0.2175, "step": 305 }, { "epoch": 0.09915748541801685, "grad_norm": 0.7251968383789062, "learning_rate": 9.99976011688998e-06, "loss": 0.1967, "step": 306 }, { "epoch": 0.09948152948801037, "grad_norm": 0.7583543062210083, "learning_rate": 9.999742676558363e-06, "loss": 0.214, "step": 307 }, { "epoch": 0.09980557355800389, "grad_norm": 0.7657378911972046, "learning_rate": 9.999724624306644e-06, "loss": 0.2081, "step": 308 }, { "epoch": 0.10012961762799741, "grad_norm": 0.7284070253372192, "learning_rate": 9.999705960137032e-06, "loss": 0.1964, "step": 309 }, { "epoch": 0.10045366169799093, "grad_norm": 0.7148271799087524, "learning_rate": 9.999686684051811e-06, "loss": 0.2017, "step": 310 }, { "epoch": 0.10077770576798445, "grad_norm": 0.7276371121406555, "learning_rate": 9.99966679605334e-06, "loss": 0.2083, "step": 311 }, { "epoch": 0.10110174983797797, "grad_norm": 0.791381299495697, "learning_rate": 9.999646296144054e-06, "loss": 0.2403, "step": 312 }, { "epoch": 0.10142579390797149, "grad_norm": 0.7297627925872803, "learning_rate": 9.999625184326463e-06, "loss": 0.2132, "step": 313 }, { "epoch": 0.101749837977965, "grad_norm": 0.7267171144485474, "learning_rate": 9.999603460603147e-06, "loss": 0.2011, "step": 314 }, { "epoch": 0.10207388204795852, "grad_norm": 0.7363854646682739, "learning_rate": 9.99958112497677e-06, "loss": 0.225, "step": 315 }, { "epoch": 0.10239792611795204, "grad_norm": 0.7794610857963562, "learning_rate": 9.999558177450062e-06, "loss": 0.203, "step": 316 }, { "epoch": 0.10272197018794556, "grad_norm": 0.7780491709709167, "learning_rate": 9.999534618025833e-06, "loss": 0.2095, "step": 317 }, { "epoch": 0.10304601425793908, "grad_norm": 0.8182850480079651, "learning_rate": 9.999510446706966e-06, "loss": 0.2327, "step": 318 }, { "epoch": 0.1033700583279326, "grad_norm": 0.779887855052948, "learning_rate": 9.999485663496417e-06, "loss": 0.2045, "step": 319 }, { "epoch": 0.10369410239792612, "grad_norm": 0.735507071018219, "learning_rate": 9.999460268397225e-06, "loss": 0.2075, "step": 320 }, { "epoch": 0.10401814646791964, "grad_norm": 0.7778931856155396, "learning_rate": 9.999434261412493e-06, "loss": 0.2161, "step": 321 }, { "epoch": 0.10434219053791316, "grad_norm": 0.7456550598144531, "learning_rate": 9.999407642545404e-06, "loss": 0.2109, "step": 322 }, { "epoch": 0.10466623460790668, "grad_norm": 0.8102906942367554, "learning_rate": 9.99938041179922e-06, "loss": 0.2417, "step": 323 }, { "epoch": 0.1049902786779002, "grad_norm": 0.7794447541236877, "learning_rate": 9.99935256917727e-06, "loss": 0.2354, "step": 324 }, { "epoch": 0.10531432274789371, "grad_norm": 0.8024318814277649, "learning_rate": 9.999324114682964e-06, "loss": 0.2345, "step": 325 }, { "epoch": 0.10563836681788723, "grad_norm": 0.7224960327148438, "learning_rate": 9.999295048319785e-06, "loss": 0.2146, "step": 326 }, { "epoch": 0.10596241088788075, "grad_norm": 0.7515241503715515, "learning_rate": 9.999265370091287e-06, "loss": 0.207, "step": 327 }, { "epoch": 0.10628645495787427, "grad_norm": 0.7258550524711609, "learning_rate": 9.999235080001105e-06, "loss": 0.1973, "step": 328 }, { "epoch": 0.10661049902786779, "grad_norm": 0.7604589462280273, "learning_rate": 9.999204178052944e-06, "loss": 0.1959, "step": 329 }, { "epoch": 0.10693454309786131, "grad_norm": 0.8284873366355896, "learning_rate": 9.999172664250589e-06, "loss": 0.2337, "step": 330 }, { "epoch": 0.10725858716785483, "grad_norm": 0.765424370765686, "learning_rate": 9.999140538597895e-06, "loss": 0.2385, "step": 331 }, { "epoch": 0.10758263123784835, "grad_norm": 0.7512611150741577, "learning_rate": 9.999107801098796e-06, "loss": 0.2078, "step": 332 }, { "epoch": 0.10790667530784187, "grad_norm": 0.6939177513122559, "learning_rate": 9.999074451757295e-06, "loss": 0.2072, "step": 333 }, { "epoch": 0.10823071937783539, "grad_norm": 0.7670846581459045, "learning_rate": 9.999040490577478e-06, "loss": 0.2033, "step": 334 }, { "epoch": 0.10855476344782891, "grad_norm": 0.7212173342704773, "learning_rate": 9.999005917563498e-06, "loss": 0.2049, "step": 335 }, { "epoch": 0.10887880751782242, "grad_norm": 0.7780339121818542, "learning_rate": 9.998970732719588e-06, "loss": 0.2321, "step": 336 }, { "epoch": 0.10920285158781594, "grad_norm": 0.7524231672286987, "learning_rate": 9.998934936050055e-06, "loss": 0.2285, "step": 337 }, { "epoch": 0.10952689565780946, "grad_norm": 0.7963482141494751, "learning_rate": 9.99889852755928e-06, "loss": 0.2369, "step": 338 }, { "epoch": 0.10985093972780298, "grad_norm": 0.7501022219657898, "learning_rate": 9.998861507251717e-06, "loss": 0.2064, "step": 339 }, { "epoch": 0.1101749837977965, "grad_norm": 0.7451834678649902, "learning_rate": 9.998823875131898e-06, "loss": 0.2087, "step": 340 }, { "epoch": 0.11049902786779002, "grad_norm": 0.787020206451416, "learning_rate": 9.998785631204428e-06, "loss": 0.2261, "step": 341 }, { "epoch": 0.11082307193778354, "grad_norm": 0.717600405216217, "learning_rate": 9.998746775473992e-06, "loss": 0.2163, "step": 342 }, { "epoch": 0.11114711600777706, "grad_norm": 0.7501165270805359, "learning_rate": 9.998707307945339e-06, "loss": 0.2046, "step": 343 }, { "epoch": 0.11147116007777058, "grad_norm": 0.7228868007659912, "learning_rate": 9.998667228623304e-06, "loss": 0.2002, "step": 344 }, { "epoch": 0.1117952041477641, "grad_norm": 0.7500896453857422, "learning_rate": 9.998626537512792e-06, "loss": 0.1977, "step": 345 }, { "epoch": 0.11211924821775761, "grad_norm": 0.7988274693489075, "learning_rate": 9.99858523461878e-06, "loss": 0.2194, "step": 346 }, { "epoch": 0.11244329228775113, "grad_norm": 0.7718932628631592, "learning_rate": 9.998543319946328e-06, "loss": 0.2284, "step": 347 }, { "epoch": 0.11276733635774465, "grad_norm": 0.7662320137023926, "learning_rate": 9.998500793500562e-06, "loss": 0.2156, "step": 348 }, { "epoch": 0.11309138042773817, "grad_norm": 0.7333149909973145, "learning_rate": 9.998457655286689e-06, "loss": 0.2173, "step": 349 }, { "epoch": 0.11341542449773169, "grad_norm": 0.762129008769989, "learning_rate": 9.998413905309986e-06, "loss": 0.2351, "step": 350 }, { "epoch": 0.11373946856772521, "grad_norm": 0.7451883554458618, "learning_rate": 9.99836954357581e-06, "loss": 0.2135, "step": 351 }, { "epoch": 0.11406351263771873, "grad_norm": 0.7003828883171082, "learning_rate": 9.99832457008959e-06, "loss": 0.1902, "step": 352 }, { "epoch": 0.11438755670771225, "grad_norm": 0.7704355716705322, "learning_rate": 9.998278984856831e-06, "loss": 0.2068, "step": 353 }, { "epoch": 0.11471160077770577, "grad_norm": 0.780831515789032, "learning_rate": 9.998232787883111e-06, "loss": 0.2059, "step": 354 }, { "epoch": 0.11503564484769929, "grad_norm": 0.7990095019340515, "learning_rate": 9.998185979174084e-06, "loss": 0.2142, "step": 355 }, { "epoch": 0.11535968891769281, "grad_norm": 0.7470694184303284, "learning_rate": 9.998138558735479e-06, "loss": 0.1885, "step": 356 }, { "epoch": 0.11568373298768632, "grad_norm": 0.7477388978004456, "learning_rate": 9.998090526573101e-06, "loss": 0.1947, "step": 357 }, { "epoch": 0.11600777705767984, "grad_norm": 0.7454971671104431, "learning_rate": 9.998041882692828e-06, "loss": 0.203, "step": 358 }, { "epoch": 0.11633182112767336, "grad_norm": 0.7200604677200317, "learning_rate": 9.997992627100612e-06, "loss": 0.1936, "step": 359 }, { "epoch": 0.11665586519766688, "grad_norm": 0.7129573225975037, "learning_rate": 9.997942759802483e-06, "loss": 0.2073, "step": 360 }, { "epoch": 0.1169799092676604, "grad_norm": 0.7242521047592163, "learning_rate": 9.997892280804545e-06, "loss": 0.2211, "step": 361 }, { "epoch": 0.11730395333765392, "grad_norm": 0.7934162020683289, "learning_rate": 9.997841190112975e-06, "loss": 0.2302, "step": 362 }, { "epoch": 0.11762799740764744, "grad_norm": 0.7240173816680908, "learning_rate": 9.997789487734027e-06, "loss": 0.2145, "step": 363 }, { "epoch": 0.11795204147764096, "grad_norm": 0.7518720626831055, "learning_rate": 9.997737173674027e-06, "loss": 0.2165, "step": 364 }, { "epoch": 0.11827608554763448, "grad_norm": 0.7862987518310547, "learning_rate": 9.997684247939378e-06, "loss": 0.1998, "step": 365 }, { "epoch": 0.118600129617628, "grad_norm": 0.73781418800354, "learning_rate": 9.997630710536559e-06, "loss": 0.2104, "step": 366 }, { "epoch": 0.11892417368762152, "grad_norm": 0.7208497524261475, "learning_rate": 9.997576561472122e-06, "loss": 0.2169, "step": 367 }, { "epoch": 0.11924821775761503, "grad_norm": 0.7701983451843262, "learning_rate": 9.997521800752695e-06, "loss": 0.2275, "step": 368 }, { "epoch": 0.11957226182760855, "grad_norm": 0.7541593313217163, "learning_rate": 9.99746642838498e-06, "loss": 0.1995, "step": 369 }, { "epoch": 0.11989630589760207, "grad_norm": 0.6978981494903564, "learning_rate": 9.99741044437575e-06, "loss": 0.2075, "step": 370 }, { "epoch": 0.12022034996759559, "grad_norm": 0.6774824857711792, "learning_rate": 9.997353848731862e-06, "loss": 0.205, "step": 371 }, { "epoch": 0.12054439403758911, "grad_norm": 0.736003041267395, "learning_rate": 9.997296641460242e-06, "loss": 0.193, "step": 372 }, { "epoch": 0.12086843810758263, "grad_norm": 0.7393019795417786, "learning_rate": 9.99723882256789e-06, "loss": 0.2138, "step": 373 }, { "epoch": 0.12119248217757615, "grad_norm": 0.765889048576355, "learning_rate": 9.997180392061883e-06, "loss": 0.2211, "step": 374 }, { "epoch": 0.12151652624756967, "grad_norm": 0.733389675617218, "learning_rate": 9.997121349949372e-06, "loss": 0.2199, "step": 375 }, { "epoch": 0.1218405703175632, "grad_norm": 0.7976531386375427, "learning_rate": 9.997061696237584e-06, "loss": 0.2237, "step": 376 }, { "epoch": 0.12216461438755671, "grad_norm": 0.6957002282142639, "learning_rate": 9.99700143093382e-06, "loss": 0.224, "step": 377 }, { "epoch": 0.12248865845755022, "grad_norm": 0.6930292844772339, "learning_rate": 9.996940554045455e-06, "loss": 0.2044, "step": 378 }, { "epoch": 0.12281270252754374, "grad_norm": 0.8116925954818726, "learning_rate": 9.99687906557994e-06, "loss": 0.2241, "step": 379 }, { "epoch": 0.12313674659753726, "grad_norm": 0.7239817380905151, "learning_rate": 9.996816965544802e-06, "loss": 0.214, "step": 380 }, { "epoch": 0.12346079066753078, "grad_norm": 0.7183677554130554, "learning_rate": 9.99675425394764e-06, "loss": 0.2092, "step": 381 }, { "epoch": 0.1237848347375243, "grad_norm": 0.6525163650512695, "learning_rate": 9.996690930796132e-06, "loss": 0.1909, "step": 382 }, { "epoch": 0.12410887880751782, "grad_norm": 0.7569447159767151, "learning_rate": 9.996626996098021e-06, "loss": 0.2257, "step": 383 }, { "epoch": 0.12443292287751134, "grad_norm": 0.7216403484344482, "learning_rate": 9.996562449861141e-06, "loss": 0.2057, "step": 384 }, { "epoch": 0.12475696694750486, "grad_norm": 0.7046488523483276, "learning_rate": 9.996497292093386e-06, "loss": 0.197, "step": 385 }, { "epoch": 0.12508101101749838, "grad_norm": 0.7048548460006714, "learning_rate": 9.996431522802733e-06, "loss": 0.2103, "step": 386 }, { "epoch": 0.1254050550874919, "grad_norm": 0.7337021231651306, "learning_rate": 9.996365141997229e-06, "loss": 0.218, "step": 387 }, { "epoch": 0.12572909915748542, "grad_norm": 0.7375513911247253, "learning_rate": 9.996298149685e-06, "loss": 0.2033, "step": 388 }, { "epoch": 0.12605314322747893, "grad_norm": 0.7464216947555542, "learning_rate": 9.996230545874247e-06, "loss": 0.2115, "step": 389 }, { "epoch": 0.12637718729747247, "grad_norm": 0.7280207276344299, "learning_rate": 9.996162330573243e-06, "loss": 0.206, "step": 390 }, { "epoch": 0.12670123136746597, "grad_norm": 0.7100143432617188, "learning_rate": 9.996093503790333e-06, "loss": 0.2193, "step": 391 }, { "epoch": 0.1270252754374595, "grad_norm": 0.721896767616272, "learning_rate": 9.996024065533948e-06, "loss": 0.2073, "step": 392 }, { "epoch": 0.127349319507453, "grad_norm": 0.7025097608566284, "learning_rate": 9.995954015812582e-06, "loss": 0.2016, "step": 393 }, { "epoch": 0.12767336357744652, "grad_norm": 0.7675989866256714, "learning_rate": 9.995883354634806e-06, "loss": 0.2212, "step": 394 }, { "epoch": 0.12799740764744005, "grad_norm": 0.7080046534538269, "learning_rate": 9.995812082009274e-06, "loss": 0.2008, "step": 395 }, { "epoch": 0.12832145171743356, "grad_norm": 0.6863510608673096, "learning_rate": 9.995740197944706e-06, "loss": 0.2137, "step": 396 }, { "epoch": 0.1286454957874271, "grad_norm": 0.7432951331138611, "learning_rate": 9.9956677024499e-06, "loss": 0.2193, "step": 397 }, { "epoch": 0.1289695398574206, "grad_norm": 0.6628539562225342, "learning_rate": 9.995594595533729e-06, "loss": 0.1771, "step": 398 }, { "epoch": 0.12929358392741414, "grad_norm": 0.7454772591590881, "learning_rate": 9.995520877205141e-06, "loss": 0.2032, "step": 399 }, { "epoch": 0.12961762799740764, "grad_norm": 0.7162988781929016, "learning_rate": 9.995446547473157e-06, "loss": 0.2083, "step": 400 }, { "epoch": 0.12994167206740118, "grad_norm": 0.7362358570098877, "learning_rate": 9.995371606346875e-06, "loss": 0.1914, "step": 401 }, { "epoch": 0.13026571613739468, "grad_norm": 0.6585822701454163, "learning_rate": 9.995296053835469e-06, "loss": 0.1903, "step": 402 }, { "epoch": 0.13058976020738822, "grad_norm": 0.6886841654777527, "learning_rate": 9.995219889948184e-06, "loss": 0.1908, "step": 403 }, { "epoch": 0.13091380427738172, "grad_norm": 0.718048095703125, "learning_rate": 9.99514311469434e-06, "loss": 0.2034, "step": 404 }, { "epoch": 0.13123784834737523, "grad_norm": 0.7566750049591064, "learning_rate": 9.995065728083337e-06, "loss": 0.2011, "step": 405 }, { "epoch": 0.13156189241736876, "grad_norm": 0.7040491700172424, "learning_rate": 9.994987730124646e-06, "loss": 0.2081, "step": 406 }, { "epoch": 0.13188593648736227, "grad_norm": 0.7720775604248047, "learning_rate": 9.994909120827811e-06, "loss": 0.2185, "step": 407 }, { "epoch": 0.1322099805573558, "grad_norm": 0.725797176361084, "learning_rate": 9.994829900202454e-06, "loss": 0.2071, "step": 408 }, { "epoch": 0.1325340246273493, "grad_norm": 0.7542802095413208, "learning_rate": 9.99475006825827e-06, "loss": 0.2148, "step": 409 }, { "epoch": 0.13285806869734285, "grad_norm": 0.7492075562477112, "learning_rate": 9.994669625005032e-06, "loss": 0.2237, "step": 410 }, { "epoch": 0.13318211276733635, "grad_norm": 0.7159206867218018, "learning_rate": 9.994588570452583e-06, "loss": 0.2014, "step": 411 }, { "epoch": 0.1335061568373299, "grad_norm": 0.7251393795013428, "learning_rate": 9.994506904610846e-06, "loss": 0.204, "step": 412 }, { "epoch": 0.1338302009073234, "grad_norm": 0.697099506855011, "learning_rate": 9.994424627489813e-06, "loss": 0.2048, "step": 413 }, { "epoch": 0.13415424497731693, "grad_norm": 0.7721277475357056, "learning_rate": 9.994341739099556e-06, "loss": 0.2188, "step": 414 }, { "epoch": 0.13447828904731043, "grad_norm": 0.704058051109314, "learning_rate": 9.99425823945022e-06, "loss": 0.2005, "step": 415 }, { "epoch": 0.13480233311730394, "grad_norm": 0.6567177772521973, "learning_rate": 9.99417412855202e-06, "loss": 0.185, "step": 416 }, { "epoch": 0.13512637718729748, "grad_norm": 0.7193306088447571, "learning_rate": 9.994089406415258e-06, "loss": 0.1941, "step": 417 }, { "epoch": 0.13545042125729098, "grad_norm": 0.7080915570259094, "learning_rate": 9.994004073050297e-06, "loss": 0.2167, "step": 418 }, { "epoch": 0.13577446532728452, "grad_norm": 0.7174451351165771, "learning_rate": 9.993918128467583e-06, "loss": 0.2102, "step": 419 }, { "epoch": 0.13609850939727802, "grad_norm": 0.7323854565620422, "learning_rate": 9.993831572677636e-06, "loss": 0.1976, "step": 420 }, { "epoch": 0.13642255346727156, "grad_norm": 0.7107775211334229, "learning_rate": 9.993744405691049e-06, "loss": 0.2114, "step": 421 }, { "epoch": 0.13674659753726506, "grad_norm": 0.7212236523628235, "learning_rate": 9.99365662751849e-06, "loss": 0.2044, "step": 422 }, { "epoch": 0.1370706416072586, "grad_norm": 0.7694576978683472, "learning_rate": 9.9935682381707e-06, "loss": 0.2152, "step": 423 }, { "epoch": 0.1373946856772521, "grad_norm": 0.6720986366271973, "learning_rate": 9.993479237658501e-06, "loss": 0.1975, "step": 424 }, { "epoch": 0.13771872974724564, "grad_norm": 0.7393246293067932, "learning_rate": 9.993389625992783e-06, "loss": 0.2103, "step": 425 }, { "epoch": 0.13804277381723914, "grad_norm": 0.7491670846939087, "learning_rate": 9.993299403184515e-06, "loss": 0.193, "step": 426 }, { "epoch": 0.13836681788723265, "grad_norm": 0.780389130115509, "learning_rate": 9.99320856924474e-06, "loss": 0.2256, "step": 427 }, { "epoch": 0.13869086195722619, "grad_norm": 0.6445964574813843, "learning_rate": 9.993117124184572e-06, "loss": 0.1863, "step": 428 }, { "epoch": 0.1390149060272197, "grad_norm": 0.6863457560539246, "learning_rate": 9.993025068015205e-06, "loss": 0.1811, "step": 429 }, { "epoch": 0.13933895009721323, "grad_norm": 0.7053923010826111, "learning_rate": 9.992932400747908e-06, "loss": 0.2131, "step": 430 }, { "epoch": 0.13966299416720673, "grad_norm": 0.7096285223960876, "learning_rate": 9.99283912239402e-06, "loss": 0.2006, "step": 431 }, { "epoch": 0.13998703823720027, "grad_norm": 0.7141770720481873, "learning_rate": 9.992745232964957e-06, "loss": 0.2034, "step": 432 }, { "epoch": 0.14031108230719377, "grad_norm": 0.7291443943977356, "learning_rate": 9.99265073247221e-06, "loss": 0.1955, "step": 433 }, { "epoch": 0.1406351263771873, "grad_norm": 0.7909582257270813, "learning_rate": 9.99255562092735e-06, "loss": 0.2202, "step": 434 }, { "epoch": 0.14095917044718081, "grad_norm": 0.7880704998970032, "learning_rate": 9.992459898342008e-06, "loss": 0.2242, "step": 435 }, { "epoch": 0.14128321451717435, "grad_norm": 0.7351171970367432, "learning_rate": 9.99236356472791e-06, "loss": 0.212, "step": 436 }, { "epoch": 0.14160725858716786, "grad_norm": 0.7005438208580017, "learning_rate": 9.99226662009684e-06, "loss": 0.204, "step": 437 }, { "epoch": 0.14193130265716136, "grad_norm": 0.6722733378410339, "learning_rate": 9.992169064460663e-06, "loss": 0.2046, "step": 438 }, { "epoch": 0.1422553467271549, "grad_norm": 0.6662759184837341, "learning_rate": 9.992070897831322e-06, "loss": 0.2081, "step": 439 }, { "epoch": 0.1425793907971484, "grad_norm": 0.7334291338920593, "learning_rate": 9.99197212022083e-06, "loss": 0.2198, "step": 440 }, { "epoch": 0.14290343486714194, "grad_norm": 0.7043226361274719, "learning_rate": 9.991872731641276e-06, "loss": 0.2086, "step": 441 }, { "epoch": 0.14322747893713544, "grad_norm": 0.6686563491821289, "learning_rate": 9.991772732104825e-06, "loss": 0.2065, "step": 442 }, { "epoch": 0.14355152300712898, "grad_norm": 0.609669029712677, "learning_rate": 9.991672121623717e-06, "loss": 0.1653, "step": 443 }, { "epoch": 0.14387556707712248, "grad_norm": 0.690898597240448, "learning_rate": 9.991570900210262e-06, "loss": 0.2024, "step": 444 }, { "epoch": 0.14419961114711602, "grad_norm": 0.6984000205993652, "learning_rate": 9.991469067876854e-06, "loss": 0.1927, "step": 445 }, { "epoch": 0.14452365521710953, "grad_norm": 0.6699127554893494, "learning_rate": 9.99136662463595e-06, "loss": 0.1903, "step": 446 }, { "epoch": 0.14484769928710303, "grad_norm": 0.6831761598587036, "learning_rate": 9.991263570500093e-06, "loss": 0.2032, "step": 447 }, { "epoch": 0.14517174335709657, "grad_norm": 0.6796483993530273, "learning_rate": 9.991159905481893e-06, "loss": 0.191, "step": 448 }, { "epoch": 0.14549578742709007, "grad_norm": 0.7104837894439697, "learning_rate": 9.99105562959404e-06, "loss": 0.2004, "step": 449 }, { "epoch": 0.1458198314970836, "grad_norm": 0.7294518351554871, "learning_rate": 9.990950742849295e-06, "loss": 0.1984, "step": 450 }, { "epoch": 0.14614387556707711, "grad_norm": 0.6870996952056885, "learning_rate": 9.990845245260495e-06, "loss": 0.196, "step": 451 }, { "epoch": 0.14646791963707065, "grad_norm": 0.6710522770881653, "learning_rate": 9.990739136840552e-06, "loss": 0.1906, "step": 452 }, { "epoch": 0.14679196370706415, "grad_norm": 0.6412180662155151, "learning_rate": 9.990632417602452e-06, "loss": 0.1865, "step": 453 }, { "epoch": 0.1471160077770577, "grad_norm": 0.7567189931869507, "learning_rate": 9.99052508755926e-06, "loss": 0.209, "step": 454 }, { "epoch": 0.1474400518470512, "grad_norm": 0.7240652441978455, "learning_rate": 9.990417146724106e-06, "loss": 0.2081, "step": 455 }, { "epoch": 0.14776409591704473, "grad_norm": 0.6918161511421204, "learning_rate": 9.990308595110206e-06, "loss": 0.2005, "step": 456 }, { "epoch": 0.14808813998703824, "grad_norm": 0.7122319936752319, "learning_rate": 9.990199432730842e-06, "loss": 0.1936, "step": 457 }, { "epoch": 0.14841218405703174, "grad_norm": 0.6836199164390564, "learning_rate": 9.990089659599378e-06, "loss": 0.2088, "step": 458 }, { "epoch": 0.14873622812702528, "grad_norm": 0.6745359301567078, "learning_rate": 9.989979275729248e-06, "loss": 0.2043, "step": 459 }, { "epoch": 0.14906027219701878, "grad_norm": 0.743945300579071, "learning_rate": 9.98986828113396e-06, "loss": 0.1958, "step": 460 }, { "epoch": 0.14938431626701232, "grad_norm": 0.7083187103271484, "learning_rate": 9.9897566758271e-06, "loss": 0.1946, "step": 461 }, { "epoch": 0.14970836033700582, "grad_norm": 0.7363801598548889, "learning_rate": 9.989644459822329e-06, "loss": 0.1964, "step": 462 }, { "epoch": 0.15003240440699936, "grad_norm": 0.7157705426216125, "learning_rate": 9.989531633133379e-06, "loss": 0.1995, "step": 463 }, { "epoch": 0.15035644847699287, "grad_norm": 0.6821460127830505, "learning_rate": 9.989418195774058e-06, "loss": 0.2009, "step": 464 }, { "epoch": 0.1506804925469864, "grad_norm": 0.8093439340591431, "learning_rate": 9.989304147758254e-06, "loss": 0.2163, "step": 465 }, { "epoch": 0.1510045366169799, "grad_norm": 0.7202951908111572, "learning_rate": 9.989189489099921e-06, "loss": 0.2036, "step": 466 }, { "epoch": 0.15132858068697344, "grad_norm": 0.6870996356010437, "learning_rate": 9.989074219813093e-06, "loss": 0.1963, "step": 467 }, { "epoch": 0.15165262475696695, "grad_norm": 0.6940014362335205, "learning_rate": 9.98895833991188e-06, "loss": 0.1812, "step": 468 }, { "epoch": 0.15197666882696045, "grad_norm": 0.6710031628608704, "learning_rate": 9.98884184941046e-06, "loss": 0.182, "step": 469 }, { "epoch": 0.152300712896954, "grad_norm": 0.637710690498352, "learning_rate": 9.988724748323096e-06, "loss": 0.1757, "step": 470 }, { "epoch": 0.1526247569669475, "grad_norm": 0.6982388496398926, "learning_rate": 9.988607036664118e-06, "loss": 0.2097, "step": 471 }, { "epoch": 0.15294880103694103, "grad_norm": 0.6776319742202759, "learning_rate": 9.98848871444793e-06, "loss": 0.2038, "step": 472 }, { "epoch": 0.15327284510693454, "grad_norm": 0.7191901803016663, "learning_rate": 9.988369781689018e-06, "loss": 0.1931, "step": 473 }, { "epoch": 0.15359688917692807, "grad_norm": 0.6711001992225647, "learning_rate": 9.988250238401933e-06, "loss": 0.1883, "step": 474 }, { "epoch": 0.15392093324692158, "grad_norm": 0.7274355888366699, "learning_rate": 9.98813008460131e-06, "loss": 0.2202, "step": 475 }, { "epoch": 0.1542449773169151, "grad_norm": 0.7254967093467712, "learning_rate": 9.988009320301854e-06, "loss": 0.22, "step": 476 }, { "epoch": 0.15456902138690862, "grad_norm": 0.6807737350463867, "learning_rate": 9.987887945518346e-06, "loss": 0.2028, "step": 477 }, { "epoch": 0.15489306545690215, "grad_norm": 0.6453770399093628, "learning_rate": 9.987765960265639e-06, "loss": 0.1875, "step": 478 }, { "epoch": 0.15521710952689566, "grad_norm": 0.6426407098770142, "learning_rate": 9.987643364558664e-06, "loss": 0.1741, "step": 479 }, { "epoch": 0.15554115359688916, "grad_norm": 0.673290491104126, "learning_rate": 9.987520158412424e-06, "loss": 0.1934, "step": 480 }, { "epoch": 0.1558651976668827, "grad_norm": 0.7302583456039429, "learning_rate": 9.987396341841999e-06, "loss": 0.2002, "step": 481 }, { "epoch": 0.1561892417368762, "grad_norm": 0.7037186026573181, "learning_rate": 9.987271914862547e-06, "loss": 0.2137, "step": 482 }, { "epoch": 0.15651328580686974, "grad_norm": 0.6902836561203003, "learning_rate": 9.98714687748929e-06, "loss": 0.1804, "step": 483 }, { "epoch": 0.15683732987686325, "grad_norm": 0.6849866509437561, "learning_rate": 9.987021229737535e-06, "loss": 0.2028, "step": 484 }, { "epoch": 0.15716137394685678, "grad_norm": 0.645197331905365, "learning_rate": 9.98689497162266e-06, "loss": 0.1771, "step": 485 }, { "epoch": 0.1574854180168503, "grad_norm": 0.7027577757835388, "learning_rate": 9.986768103160119e-06, "loss": 0.1971, "step": 486 }, { "epoch": 0.15780946208684382, "grad_norm": 0.7298611998558044, "learning_rate": 9.986640624365436e-06, "loss": 0.2195, "step": 487 }, { "epoch": 0.15813350615683733, "grad_norm": 0.6794098615646362, "learning_rate": 9.986512535254215e-06, "loss": 0.2028, "step": 488 }, { "epoch": 0.15845755022683086, "grad_norm": 0.7098777294158936, "learning_rate": 9.986383835842133e-06, "loss": 0.2034, "step": 489 }, { "epoch": 0.15878159429682437, "grad_norm": 0.6703172326087952, "learning_rate": 9.986254526144941e-06, "loss": 0.1852, "step": 490 }, { "epoch": 0.15910563836681788, "grad_norm": 0.6819800138473511, "learning_rate": 9.986124606178466e-06, "loss": 0.19, "step": 491 }, { "epoch": 0.1594296824368114, "grad_norm": 0.6795838475227356, "learning_rate": 9.985994075958609e-06, "loss": 0.2027, "step": 492 }, { "epoch": 0.15975372650680492, "grad_norm": 0.7014299631118774, "learning_rate": 9.985862935501346e-06, "loss": 0.2033, "step": 493 }, { "epoch": 0.16007777057679845, "grad_norm": 0.7198210954666138, "learning_rate": 9.985731184822724e-06, "loss": 0.1794, "step": 494 }, { "epoch": 0.16040181464679196, "grad_norm": 0.6685338616371155, "learning_rate": 9.985598823938873e-06, "loss": 0.1948, "step": 495 }, { "epoch": 0.1607258587167855, "grad_norm": 0.6871777772903442, "learning_rate": 9.98546585286599e-06, "loss": 0.1727, "step": 496 }, { "epoch": 0.161049902786779, "grad_norm": 0.7162487506866455, "learning_rate": 9.985332271620349e-06, "loss": 0.2239, "step": 497 }, { "epoch": 0.16137394685677253, "grad_norm": 0.6949173808097839, "learning_rate": 9.985198080218301e-06, "loss": 0.2019, "step": 498 }, { "epoch": 0.16169799092676604, "grad_norm": 0.690800130367279, "learning_rate": 9.985063278676266e-06, "loss": 0.1974, "step": 499 }, { "epoch": 0.16202203499675957, "grad_norm": 0.7009753584861755, "learning_rate": 9.984927867010748e-06, "loss": 0.2018, "step": 500 }, { "epoch": 0.16234607906675308, "grad_norm": 0.6902207136154175, "learning_rate": 9.984791845238315e-06, "loss": 0.1825, "step": 501 }, { "epoch": 0.16267012313674659, "grad_norm": 0.6592951416969299, "learning_rate": 9.984655213375615e-06, "loss": 0.2048, "step": 502 }, { "epoch": 0.16299416720674012, "grad_norm": 0.7007604241371155, "learning_rate": 9.984517971439375e-06, "loss": 0.2017, "step": 503 }, { "epoch": 0.16331821127673363, "grad_norm": 0.6989747881889343, "learning_rate": 9.984380119446388e-06, "loss": 0.2144, "step": 504 }, { "epoch": 0.16364225534672716, "grad_norm": 0.6398130059242249, "learning_rate": 9.984241657413526e-06, "loss": 0.1808, "step": 505 }, { "epoch": 0.16396629941672067, "grad_norm": 0.6692783236503601, "learning_rate": 9.98410258535774e-06, "loss": 0.1968, "step": 506 }, { "epoch": 0.1642903434867142, "grad_norm": 0.6372814774513245, "learning_rate": 9.983962903296044e-06, "loss": 0.186, "step": 507 }, { "epoch": 0.1646143875567077, "grad_norm": 0.6585953235626221, "learning_rate": 9.983822611245537e-06, "loss": 0.1843, "step": 508 }, { "epoch": 0.16493843162670124, "grad_norm": 0.6987937688827515, "learning_rate": 9.983681709223392e-06, "loss": 0.1974, "step": 509 }, { "epoch": 0.16526247569669475, "grad_norm": 0.6531558632850647, "learning_rate": 9.98354019724685e-06, "loss": 0.1742, "step": 510 }, { "epoch": 0.16558651976668826, "grad_norm": 0.6775031089782715, "learning_rate": 9.983398075333231e-06, "loss": 0.1885, "step": 511 }, { "epoch": 0.1659105638366818, "grad_norm": 0.6820382475852966, "learning_rate": 9.983255343499932e-06, "loss": 0.1743, "step": 512 }, { "epoch": 0.1662346079066753, "grad_norm": 0.672417402267456, "learning_rate": 9.983112001764421e-06, "loss": 0.1927, "step": 513 }, { "epoch": 0.16655865197666883, "grad_norm": 0.6865410208702087, "learning_rate": 9.98296805014424e-06, "loss": 0.1925, "step": 514 }, { "epoch": 0.16688269604666234, "grad_norm": 0.6832920908927917, "learning_rate": 9.98282348865701e-06, "loss": 0.2008, "step": 515 }, { "epoch": 0.16720674011665587, "grad_norm": 0.6926618814468384, "learning_rate": 9.982678317320423e-06, "loss": 0.2011, "step": 516 }, { "epoch": 0.16753078418664938, "grad_norm": 0.6344230771064758, "learning_rate": 9.982532536152242e-06, "loss": 0.19, "step": 517 }, { "epoch": 0.1678548282566429, "grad_norm": 0.6745365858078003, "learning_rate": 9.982386145170317e-06, "loss": 0.2045, "step": 518 }, { "epoch": 0.16817887232663642, "grad_norm": 0.6516968011856079, "learning_rate": 9.98223914439256e-06, "loss": 0.196, "step": 519 }, { "epoch": 0.16850291639662995, "grad_norm": 0.6410155892372131, "learning_rate": 9.982091533836964e-06, "loss": 0.1841, "step": 520 }, { "epoch": 0.16882696046662346, "grad_norm": 0.6878783702850342, "learning_rate": 9.981943313521594e-06, "loss": 0.1866, "step": 521 }, { "epoch": 0.16915100453661697, "grad_norm": 0.7064847946166992, "learning_rate": 9.981794483464592e-06, "loss": 0.1985, "step": 522 }, { "epoch": 0.1694750486066105, "grad_norm": 0.700658917427063, "learning_rate": 9.981645043684172e-06, "loss": 0.2151, "step": 523 }, { "epoch": 0.169799092676604, "grad_norm": 0.663432776927948, "learning_rate": 9.981494994198624e-06, "loss": 0.216, "step": 524 }, { "epoch": 0.17012313674659754, "grad_norm": 0.6575750708580017, "learning_rate": 9.981344335026316e-06, "loss": 0.1759, "step": 525 }, { "epoch": 0.17044718081659105, "grad_norm": 0.7086677551269531, "learning_rate": 9.981193066185682e-06, "loss": 0.2133, "step": 526 }, { "epoch": 0.17077122488658458, "grad_norm": 0.6817197799682617, "learning_rate": 9.981041187695239e-06, "loss": 0.1996, "step": 527 }, { "epoch": 0.1710952689565781, "grad_norm": 0.6739192605018616, "learning_rate": 9.980888699573576e-06, "loss": 0.2071, "step": 528 }, { "epoch": 0.17141931302657162, "grad_norm": 0.7066790461540222, "learning_rate": 9.980735601839354e-06, "loss": 0.1997, "step": 529 }, { "epoch": 0.17174335709656513, "grad_norm": 0.5890233516693115, "learning_rate": 9.980581894511313e-06, "loss": 0.1704, "step": 530 }, { "epoch": 0.17206740116655866, "grad_norm": 0.7064293026924133, "learning_rate": 9.980427577608261e-06, "loss": 0.2072, "step": 531 }, { "epoch": 0.17239144523655217, "grad_norm": 0.6045970916748047, "learning_rate": 9.98027265114909e-06, "loss": 0.1747, "step": 532 }, { "epoch": 0.17271548930654568, "grad_norm": 0.6477690935134888, "learning_rate": 9.980117115152758e-06, "loss": 0.166, "step": 533 }, { "epoch": 0.1730395333765392, "grad_norm": 0.6647665500640869, "learning_rate": 9.979960969638303e-06, "loss": 0.1954, "step": 534 }, { "epoch": 0.17336357744653272, "grad_norm": 0.6770102977752686, "learning_rate": 9.979804214624835e-06, "loss": 0.2032, "step": 535 }, { "epoch": 0.17368762151652625, "grad_norm": 0.695477306842804, "learning_rate": 9.979646850131539e-06, "loss": 0.2023, "step": 536 }, { "epoch": 0.17401166558651976, "grad_norm": 0.6723573207855225, "learning_rate": 9.979488876177676e-06, "loss": 0.1991, "step": 537 }, { "epoch": 0.1743357096565133, "grad_norm": 0.6520310044288635, "learning_rate": 9.97933029278258e-06, "loss": 0.1836, "step": 538 }, { "epoch": 0.1746597537265068, "grad_norm": 0.6617633700370789, "learning_rate": 9.97917109996566e-06, "loss": 0.2047, "step": 539 }, { "epoch": 0.17498379779650033, "grad_norm": 0.6775153279304504, "learning_rate": 9.979011297746396e-06, "loss": 0.1946, "step": 540 }, { "epoch": 0.17530784186649384, "grad_norm": 0.7150312066078186, "learning_rate": 9.978850886144353e-06, "loss": 0.2092, "step": 541 }, { "epoch": 0.17563188593648738, "grad_norm": 0.6678467988967896, "learning_rate": 9.978689865179161e-06, "loss": 0.1901, "step": 542 }, { "epoch": 0.17595593000648088, "grad_norm": 0.6910053491592407, "learning_rate": 9.978528234870526e-06, "loss": 0.2106, "step": 543 }, { "epoch": 0.1762799740764744, "grad_norm": 0.6965245604515076, "learning_rate": 9.978365995238231e-06, "loss": 0.195, "step": 544 }, { "epoch": 0.17660401814646792, "grad_norm": 0.7062773108482361, "learning_rate": 9.978203146302133e-06, "loss": 0.2061, "step": 545 }, { "epoch": 0.17692806221646143, "grad_norm": 0.6020629405975342, "learning_rate": 9.978039688082161e-06, "loss": 0.1698, "step": 546 }, { "epoch": 0.17725210628645496, "grad_norm": 0.6427762508392334, "learning_rate": 9.977875620598323e-06, "loss": 0.1747, "step": 547 }, { "epoch": 0.17757615035644847, "grad_norm": 0.7219037413597107, "learning_rate": 9.9777109438707e-06, "loss": 0.1879, "step": 548 }, { "epoch": 0.177900194426442, "grad_norm": 0.6616566777229309, "learning_rate": 9.977545657919444e-06, "loss": 0.1941, "step": 549 }, { "epoch": 0.1782242384964355, "grad_norm": 0.6537032723426819, "learning_rate": 9.977379762764785e-06, "loss": 0.181, "step": 550 }, { "epoch": 0.17854828256642905, "grad_norm": 0.6982765793800354, "learning_rate": 9.97721325842703e-06, "loss": 0.2023, "step": 551 }, { "epoch": 0.17887232663642255, "grad_norm": 0.6358122229576111, "learning_rate": 9.977046144926555e-06, "loss": 0.1618, "step": 552 }, { "epoch": 0.17919637070641609, "grad_norm": 0.6477236151695251, "learning_rate": 9.976878422283811e-06, "loss": 0.1782, "step": 553 }, { "epoch": 0.1795204147764096, "grad_norm": 0.6369034647941589, "learning_rate": 9.97671009051933e-06, "loss": 0.1872, "step": 554 }, { "epoch": 0.1798444588464031, "grad_norm": 0.679492175579071, "learning_rate": 9.976541149653714e-06, "loss": 0.185, "step": 555 }, { "epoch": 0.18016850291639663, "grad_norm": 0.6448071599006653, "learning_rate": 9.976371599707635e-06, "loss": 0.1879, "step": 556 }, { "epoch": 0.18049254698639014, "grad_norm": 0.6247570514678955, "learning_rate": 9.976201440701848e-06, "loss": 0.1812, "step": 557 }, { "epoch": 0.18081659105638367, "grad_norm": 0.6881939172744751, "learning_rate": 9.976030672657177e-06, "loss": 0.1984, "step": 558 }, { "epoch": 0.18114063512637718, "grad_norm": 0.6799702048301697, "learning_rate": 9.975859295594526e-06, "loss": 0.1981, "step": 559 }, { "epoch": 0.18146467919637072, "grad_norm": 0.6491541862487793, "learning_rate": 9.975687309534865e-06, "loss": 0.1663, "step": 560 }, { "epoch": 0.18178872326636422, "grad_norm": 0.6550642251968384, "learning_rate": 9.975514714499247e-06, "loss": 0.1861, "step": 561 }, { "epoch": 0.18211276733635776, "grad_norm": 0.6994182467460632, "learning_rate": 9.975341510508793e-06, "loss": 0.2076, "step": 562 }, { "epoch": 0.18243681140635126, "grad_norm": 0.692759096622467, "learning_rate": 9.975167697584706e-06, "loss": 0.2181, "step": 563 }, { "epoch": 0.18276085547634477, "grad_norm": 0.6435503363609314, "learning_rate": 9.974993275748253e-06, "loss": 0.18, "step": 564 }, { "epoch": 0.1830848995463383, "grad_norm": 0.6955358982086182, "learning_rate": 9.974818245020788e-06, "loss": 0.2005, "step": 565 }, { "epoch": 0.1834089436163318, "grad_norm": 0.6618521213531494, "learning_rate": 9.974642605423727e-06, "loss": 0.2049, "step": 566 }, { "epoch": 0.18373298768632534, "grad_norm": 0.6213456392288208, "learning_rate": 9.97446635697857e-06, "loss": 0.1781, "step": 567 }, { "epoch": 0.18405703175631885, "grad_norm": 0.6700173616409302, "learning_rate": 9.974289499706888e-06, "loss": 0.1862, "step": 568 }, { "epoch": 0.18438107582631239, "grad_norm": 0.6925329566001892, "learning_rate": 9.974112033630326e-06, "loss": 0.2066, "step": 569 }, { "epoch": 0.1847051198963059, "grad_norm": 0.6212252378463745, "learning_rate": 9.973933958770604e-06, "loss": 0.1817, "step": 570 }, { "epoch": 0.18502916396629943, "grad_norm": 0.6753741502761841, "learning_rate": 9.973755275149516e-06, "loss": 0.2056, "step": 571 }, { "epoch": 0.18535320803629293, "grad_norm": 0.687791645526886, "learning_rate": 9.973575982788934e-06, "loss": 0.1862, "step": 572 }, { "epoch": 0.18567725210628647, "grad_norm": 0.6519484519958496, "learning_rate": 9.9733960817108e-06, "loss": 0.1567, "step": 573 }, { "epoch": 0.18600129617627997, "grad_norm": 0.700171709060669, "learning_rate": 9.97321557193713e-06, "loss": 0.1883, "step": 574 }, { "epoch": 0.18632534024627348, "grad_norm": 0.6499386429786682, "learning_rate": 9.973034453490017e-06, "loss": 0.1906, "step": 575 }, { "epoch": 0.18664938431626701, "grad_norm": 0.6470776200294495, "learning_rate": 9.972852726391633e-06, "loss": 0.196, "step": 576 }, { "epoch": 0.18697342838626052, "grad_norm": 0.6618337631225586, "learning_rate": 9.972670390664214e-06, "loss": 0.1812, "step": 577 }, { "epoch": 0.18729747245625405, "grad_norm": 0.6731171607971191, "learning_rate": 9.972487446330079e-06, "loss": 0.1869, "step": 578 }, { "epoch": 0.18762151652624756, "grad_norm": 0.6692624092102051, "learning_rate": 9.972303893411619e-06, "loss": 0.1912, "step": 579 }, { "epoch": 0.1879455605962411, "grad_norm": 0.6499179601669312, "learning_rate": 9.972119731931298e-06, "loss": 0.2035, "step": 580 }, { "epoch": 0.1882696046662346, "grad_norm": 0.6587321162223816, "learning_rate": 9.971934961911653e-06, "loss": 0.1904, "step": 581 }, { "epoch": 0.18859364873622814, "grad_norm": 0.657788872718811, "learning_rate": 9.971749583375303e-06, "loss": 0.1948, "step": 582 }, { "epoch": 0.18891769280622164, "grad_norm": 0.6337559819221497, "learning_rate": 9.971563596344934e-06, "loss": 0.1925, "step": 583 }, { "epoch": 0.18924173687621518, "grad_norm": 0.6866735219955444, "learning_rate": 9.971377000843309e-06, "loss": 0.1981, "step": 584 }, { "epoch": 0.18956578094620868, "grad_norm": 0.7488305568695068, "learning_rate": 9.971189796893266e-06, "loss": 0.1973, "step": 585 }, { "epoch": 0.1898898250162022, "grad_norm": 0.7035614848136902, "learning_rate": 9.971001984517717e-06, "loss": 0.1853, "step": 586 }, { "epoch": 0.19021386908619572, "grad_norm": 0.6761707663536072, "learning_rate": 9.970813563739647e-06, "loss": 0.1879, "step": 587 }, { "epoch": 0.19053791315618923, "grad_norm": 0.6215554475784302, "learning_rate": 9.97062453458212e-06, "loss": 0.1721, "step": 588 }, { "epoch": 0.19086195722618277, "grad_norm": 0.6934155225753784, "learning_rate": 9.970434897068268e-06, "loss": 0.2102, "step": 589 }, { "epoch": 0.19118600129617627, "grad_norm": 0.6098140478134155, "learning_rate": 9.970244651221302e-06, "loss": 0.1685, "step": 590 }, { "epoch": 0.1915100453661698, "grad_norm": 0.7065131068229675, "learning_rate": 9.970053797064506e-06, "loss": 0.2038, "step": 591 }, { "epoch": 0.1918340894361633, "grad_norm": 0.6565381288528442, "learning_rate": 9.96986233462124e-06, "loss": 0.1781, "step": 592 }, { "epoch": 0.19215813350615685, "grad_norm": 0.642196774482727, "learning_rate": 9.969670263914936e-06, "loss": 0.1783, "step": 593 }, { "epoch": 0.19248217757615035, "grad_norm": 0.6986721158027649, "learning_rate": 9.9694775849691e-06, "loss": 0.2, "step": 594 }, { "epoch": 0.1928062216461439, "grad_norm": 0.6566448211669922, "learning_rate": 9.969284297807319e-06, "loss": 0.1821, "step": 595 }, { "epoch": 0.1931302657161374, "grad_norm": 0.7404976487159729, "learning_rate": 9.969090402453246e-06, "loss": 0.2051, "step": 596 }, { "epoch": 0.1934543097861309, "grad_norm": 0.7691987156867981, "learning_rate": 9.96889589893061e-06, "loss": 0.1989, "step": 597 }, { "epoch": 0.19377835385612444, "grad_norm": 0.6925591230392456, "learning_rate": 9.96870078726322e-06, "loss": 0.1992, "step": 598 }, { "epoch": 0.19410239792611794, "grad_norm": 0.6497508883476257, "learning_rate": 9.968505067474954e-06, "loss": 0.1855, "step": 599 }, { "epoch": 0.19442644199611148, "grad_norm": 0.6990604996681213, "learning_rate": 9.968308739589767e-06, "loss": 0.1962, "step": 600 }, { "epoch": 0.19475048606610498, "grad_norm": 0.6696603894233704, "learning_rate": 9.968111803631688e-06, "loss": 0.1942, "step": 601 }, { "epoch": 0.19507453013609852, "grad_norm": 0.6557125449180603, "learning_rate": 9.967914259624817e-06, "loss": 0.1787, "step": 602 }, { "epoch": 0.19539857420609202, "grad_norm": 0.6662872433662415, "learning_rate": 9.967716107593335e-06, "loss": 0.1853, "step": 603 }, { "epoch": 0.19572261827608556, "grad_norm": 0.6978006958961487, "learning_rate": 9.967517347561493e-06, "loss": 0.2104, "step": 604 }, { "epoch": 0.19604666234607906, "grad_norm": 0.6399781107902527, "learning_rate": 9.967317979553617e-06, "loss": 0.1894, "step": 605 }, { "epoch": 0.1963707064160726, "grad_norm": 0.6516302824020386, "learning_rate": 9.967118003594107e-06, "loss": 0.1792, "step": 606 }, { "epoch": 0.1966947504860661, "grad_norm": 0.6929931044578552, "learning_rate": 9.966917419707439e-06, "loss": 0.2056, "step": 607 }, { "epoch": 0.1970187945560596, "grad_norm": 0.6575304269790649, "learning_rate": 9.966716227918163e-06, "loss": 0.1844, "step": 608 }, { "epoch": 0.19734283862605315, "grad_norm": 0.6778562664985657, "learning_rate": 9.966514428250902e-06, "loss": 0.1927, "step": 609 }, { "epoch": 0.19766688269604665, "grad_norm": 0.6473074555397034, "learning_rate": 9.966312020730353e-06, "loss": 0.2007, "step": 610 }, { "epoch": 0.1979909267660402, "grad_norm": 0.6930627226829529, "learning_rate": 9.966109005381292e-06, "loss": 0.2148, "step": 611 }, { "epoch": 0.1983149708360337, "grad_norm": 0.7073266506195068, "learning_rate": 9.965905382228565e-06, "loss": 0.176, "step": 612 }, { "epoch": 0.19863901490602723, "grad_norm": 0.6246760487556458, "learning_rate": 9.96570115129709e-06, "loss": 0.1628, "step": 613 }, { "epoch": 0.19896305897602073, "grad_norm": 0.6829102039337158, "learning_rate": 9.965496312611869e-06, "loss": 0.1851, "step": 614 }, { "epoch": 0.19928710304601427, "grad_norm": 0.647975504398346, "learning_rate": 9.965290866197967e-06, "loss": 0.1891, "step": 615 }, { "epoch": 0.19961114711600778, "grad_norm": 0.6556454300880432, "learning_rate": 9.965084812080533e-06, "loss": 0.1792, "step": 616 }, { "epoch": 0.1999351911860013, "grad_norm": 0.6431975960731506, "learning_rate": 9.96487815028478e-06, "loss": 0.1978, "step": 617 }, { "epoch": 0.20025923525599482, "grad_norm": 0.62309730052948, "learning_rate": 9.964670880836009e-06, "loss": 0.1811, "step": 618 }, { "epoch": 0.20058327932598832, "grad_norm": 0.705092191696167, "learning_rate": 9.96446300375958e-06, "loss": 0.1945, "step": 619 }, { "epoch": 0.20090732339598186, "grad_norm": 0.6346619725227356, "learning_rate": 9.964254519080943e-06, "loss": 0.1764, "step": 620 }, { "epoch": 0.20123136746597536, "grad_norm": 0.6662610769271851, "learning_rate": 9.964045426825609e-06, "loss": 0.1881, "step": 621 }, { "epoch": 0.2015554115359689, "grad_norm": 0.7057406306266785, "learning_rate": 9.96383572701917e-06, "loss": 0.2075, "step": 622 }, { "epoch": 0.2018794556059624, "grad_norm": 0.6177229285240173, "learning_rate": 9.963625419687292e-06, "loss": 0.1937, "step": 623 }, { "epoch": 0.20220349967595594, "grad_norm": 0.6827666759490967, "learning_rate": 9.963414504855714e-06, "loss": 0.1938, "step": 624 }, { "epoch": 0.20252754374594945, "grad_norm": 0.6624761819839478, "learning_rate": 9.96320298255025e-06, "loss": 0.1962, "step": 625 }, { "epoch": 0.20285158781594298, "grad_norm": 0.6548133492469788, "learning_rate": 9.96299085279679e-06, "loss": 0.1857, "step": 626 }, { "epoch": 0.2031756318859365, "grad_norm": 0.6573795080184937, "learning_rate": 9.962778115621294e-06, "loss": 0.1887, "step": 627 }, { "epoch": 0.20349967595593, "grad_norm": 0.6657042503356934, "learning_rate": 9.962564771049799e-06, "loss": 0.1888, "step": 628 }, { "epoch": 0.20382372002592353, "grad_norm": 0.691179096698761, "learning_rate": 9.96235081910842e-06, "loss": 0.1902, "step": 629 }, { "epoch": 0.20414776409591703, "grad_norm": 0.6811088919639587, "learning_rate": 9.962136259823337e-06, "loss": 0.1788, "step": 630 }, { "epoch": 0.20447180816591057, "grad_norm": 0.6676942110061646, "learning_rate": 9.961921093220815e-06, "loss": 0.1877, "step": 631 }, { "epoch": 0.20479585223590407, "grad_norm": 0.6651305556297302, "learning_rate": 9.961705319327186e-06, "loss": 0.1868, "step": 632 }, { "epoch": 0.2051198963058976, "grad_norm": 0.6629343628883362, "learning_rate": 9.961488938168859e-06, "loss": 0.1705, "step": 633 }, { "epoch": 0.20544394037589112, "grad_norm": 0.6603417992591858, "learning_rate": 9.961271949772316e-06, "loss": 0.1831, "step": 634 }, { "epoch": 0.20576798444588465, "grad_norm": 0.7644823789596558, "learning_rate": 9.961054354164116e-06, "loss": 0.2072, "step": 635 }, { "epoch": 0.20609202851587816, "grad_norm": 0.690443217754364, "learning_rate": 9.96083615137089e-06, "loss": 0.2103, "step": 636 }, { "epoch": 0.2064160725858717, "grad_norm": 0.6906901597976685, "learning_rate": 9.960617341419342e-06, "loss": 0.2025, "step": 637 }, { "epoch": 0.2067401166558652, "grad_norm": 0.6573625206947327, "learning_rate": 9.960397924336256e-06, "loss": 0.1952, "step": 638 }, { "epoch": 0.2070641607258587, "grad_norm": 0.6751140356063843, "learning_rate": 9.960177900148483e-06, "loss": 0.2044, "step": 639 }, { "epoch": 0.20738820479585224, "grad_norm": 0.6589855551719666, "learning_rate": 9.959957268882954e-06, "loss": 0.2033, "step": 640 }, { "epoch": 0.20771224886584574, "grad_norm": 0.6459863781929016, "learning_rate": 9.959736030566672e-06, "loss": 0.177, "step": 641 }, { "epoch": 0.20803629293583928, "grad_norm": 0.6876140236854553, "learning_rate": 9.959514185226714e-06, "loss": 0.2077, "step": 642 }, { "epoch": 0.20836033700583279, "grad_norm": 0.6192476749420166, "learning_rate": 9.959291732890228e-06, "loss": 0.186, "step": 643 }, { "epoch": 0.20868438107582632, "grad_norm": 0.683696985244751, "learning_rate": 9.959068673584447e-06, "loss": 0.2169, "step": 644 }, { "epoch": 0.20900842514581983, "grad_norm": 0.6164054870605469, "learning_rate": 9.958845007336667e-06, "loss": 0.1758, "step": 645 }, { "epoch": 0.20933246921581336, "grad_norm": 0.6763623356819153, "learning_rate": 9.958620734174263e-06, "loss": 0.1731, "step": 646 }, { "epoch": 0.20965651328580687, "grad_norm": 0.6871886849403381, "learning_rate": 9.958395854124686e-06, "loss": 0.1931, "step": 647 }, { "epoch": 0.2099805573558004, "grad_norm": 0.6257924437522888, "learning_rate": 9.958170367215456e-06, "loss": 0.1886, "step": 648 }, { "epoch": 0.2103046014257939, "grad_norm": 0.6464048027992249, "learning_rate": 9.957944273474172e-06, "loss": 0.1945, "step": 649 }, { "epoch": 0.21062864549578741, "grad_norm": 0.6848668456077576, "learning_rate": 9.957717572928504e-06, "loss": 0.2027, "step": 650 }, { "epoch": 0.21095268956578095, "grad_norm": 0.6994589567184448, "learning_rate": 9.957490265606202e-06, "loss": 0.1994, "step": 651 }, { "epoch": 0.21127673363577446, "grad_norm": 0.7209458947181702, "learning_rate": 9.957262351535085e-06, "loss": 0.2014, "step": 652 }, { "epoch": 0.211600777705768, "grad_norm": 0.6271886825561523, "learning_rate": 9.957033830743043e-06, "loss": 0.171, "step": 653 }, { "epoch": 0.2119248217757615, "grad_norm": 0.6906067728996277, "learning_rate": 9.956804703258048e-06, "loss": 0.1983, "step": 654 }, { "epoch": 0.21224886584575503, "grad_norm": 0.6128396987915039, "learning_rate": 9.956574969108143e-06, "loss": 0.1756, "step": 655 }, { "epoch": 0.21257290991574854, "grad_norm": 0.695341169834137, "learning_rate": 9.956344628321448e-06, "loss": 0.2226, "step": 656 }, { "epoch": 0.21289695398574207, "grad_norm": 0.6387926936149597, "learning_rate": 9.956113680926149e-06, "loss": 0.1929, "step": 657 }, { "epoch": 0.21322099805573558, "grad_norm": 0.6557097434997559, "learning_rate": 9.955882126950516e-06, "loss": 0.1995, "step": 658 }, { "epoch": 0.2135450421257291, "grad_norm": 0.6979703903198242, "learning_rate": 9.955649966422886e-06, "loss": 0.192, "step": 659 }, { "epoch": 0.21386908619572262, "grad_norm": 0.6957211494445801, "learning_rate": 9.955417199371674e-06, "loss": 0.2079, "step": 660 }, { "epoch": 0.21419313026571613, "grad_norm": 0.6489683389663696, "learning_rate": 9.95518382582537e-06, "loss": 0.1781, "step": 661 }, { "epoch": 0.21451717433570966, "grad_norm": 0.6822352409362793, "learning_rate": 9.954949845812536e-06, "loss": 0.1976, "step": 662 }, { "epoch": 0.21484121840570317, "grad_norm": 0.7062362432479858, "learning_rate": 9.954715259361806e-06, "loss": 0.1958, "step": 663 }, { "epoch": 0.2151652624756967, "grad_norm": 0.6700916886329651, "learning_rate": 9.954480066501896e-06, "loss": 0.2005, "step": 664 }, { "epoch": 0.2154893065456902, "grad_norm": 0.5978602170944214, "learning_rate": 9.95424426726159e-06, "loss": 0.173, "step": 665 }, { "epoch": 0.21581335061568374, "grad_norm": 0.6139827966690063, "learning_rate": 9.954007861669745e-06, "loss": 0.1801, "step": 666 }, { "epoch": 0.21613739468567725, "grad_norm": 0.6229221224784851, "learning_rate": 9.953770849755295e-06, "loss": 0.1825, "step": 667 }, { "epoch": 0.21646143875567078, "grad_norm": 0.6633325815200806, "learning_rate": 9.953533231547251e-06, "loss": 0.1875, "step": 668 }, { "epoch": 0.2167854828256643, "grad_norm": 0.666135847568512, "learning_rate": 9.953295007074693e-06, "loss": 0.1946, "step": 669 }, { "epoch": 0.21710952689565782, "grad_norm": 0.6448487639427185, "learning_rate": 9.953056176366777e-06, "loss": 0.1795, "step": 670 }, { "epoch": 0.21743357096565133, "grad_norm": 0.6257104873657227, "learning_rate": 9.952816739452735e-06, "loss": 0.1689, "step": 671 }, { "epoch": 0.21775761503564484, "grad_norm": 0.7119507193565369, "learning_rate": 9.95257669636187e-06, "loss": 0.1887, "step": 672 }, { "epoch": 0.21808165910563837, "grad_norm": 0.6453043818473816, "learning_rate": 9.952336047123565e-06, "loss": 0.1945, "step": 673 }, { "epoch": 0.21840570317563188, "grad_norm": 0.6554058790206909, "learning_rate": 9.952094791767267e-06, "loss": 0.1848, "step": 674 }, { "epoch": 0.2187297472456254, "grad_norm": 0.678406298160553, "learning_rate": 9.951852930322507e-06, "loss": 0.1989, "step": 675 }, { "epoch": 0.21905379131561892, "grad_norm": 0.641863226890564, "learning_rate": 9.951610462818888e-06, "loss": 0.1936, "step": 676 }, { "epoch": 0.21937783538561245, "grad_norm": 0.6582136154174805, "learning_rate": 9.951367389286082e-06, "loss": 0.1813, "step": 677 }, { "epoch": 0.21970187945560596, "grad_norm": 0.622146725654602, "learning_rate": 9.95112370975384e-06, "loss": 0.1771, "step": 678 }, { "epoch": 0.2200259235255995, "grad_norm": 0.6097822189331055, "learning_rate": 9.950879424251987e-06, "loss": 0.1697, "step": 679 }, { "epoch": 0.220349967595593, "grad_norm": 0.6120715141296387, "learning_rate": 9.950634532810421e-06, "loss": 0.1921, "step": 680 }, { "epoch": 0.22067401166558653, "grad_norm": 0.672498881816864, "learning_rate": 9.950389035459114e-06, "loss": 0.1979, "step": 681 }, { "epoch": 0.22099805573558004, "grad_norm": 0.6239397525787354, "learning_rate": 9.950142932228114e-06, "loss": 0.1761, "step": 682 }, { "epoch": 0.22132209980557355, "grad_norm": 0.647966742515564, "learning_rate": 9.949896223147537e-06, "loss": 0.1774, "step": 683 }, { "epoch": 0.22164614387556708, "grad_norm": 0.6499220728874207, "learning_rate": 9.949648908247583e-06, "loss": 0.1943, "step": 684 }, { "epoch": 0.2219701879455606, "grad_norm": 0.6279643774032593, "learning_rate": 9.94940098755852e-06, "loss": 0.1695, "step": 685 }, { "epoch": 0.22229423201555412, "grad_norm": 0.6042583584785461, "learning_rate": 9.949152461110688e-06, "loss": 0.1764, "step": 686 }, { "epoch": 0.22261827608554763, "grad_norm": 0.6498985886573792, "learning_rate": 9.948903328934507e-06, "loss": 0.1863, "step": 687 }, { "epoch": 0.22294232015554116, "grad_norm": 0.6190524101257324, "learning_rate": 9.948653591060468e-06, "loss": 0.1696, "step": 688 }, { "epoch": 0.22326636422553467, "grad_norm": 0.7013121247291565, "learning_rate": 9.948403247519135e-06, "loss": 0.1951, "step": 689 }, { "epoch": 0.2235904082955282, "grad_norm": 0.6156609654426575, "learning_rate": 9.94815229834115e-06, "loss": 0.1852, "step": 690 }, { "epoch": 0.2239144523655217, "grad_norm": 0.6260215640068054, "learning_rate": 9.947900743557224e-06, "loss": 0.1715, "step": 691 }, { "epoch": 0.22423849643551522, "grad_norm": 0.6351551413536072, "learning_rate": 9.947648583198148e-06, "loss": 0.1885, "step": 692 }, { "epoch": 0.22456254050550875, "grad_norm": 0.6523791551589966, "learning_rate": 9.947395817294781e-06, "loss": 0.1877, "step": 693 }, { "epoch": 0.22488658457550226, "grad_norm": 0.6115183234214783, "learning_rate": 9.947142445878062e-06, "loss": 0.1754, "step": 694 }, { "epoch": 0.2252106286454958, "grad_norm": 0.6498876810073853, "learning_rate": 9.946888468978999e-06, "loss": 0.1869, "step": 695 }, { "epoch": 0.2255346727154893, "grad_norm": 0.6169878244400024, "learning_rate": 9.946633886628676e-06, "loss": 0.1929, "step": 696 }, { "epoch": 0.22585871678548283, "grad_norm": 0.6811516880989075, "learning_rate": 9.946378698858255e-06, "loss": 0.2001, "step": 697 }, { "epoch": 0.22618276085547634, "grad_norm": 0.6628179550170898, "learning_rate": 9.946122905698962e-06, "loss": 0.1813, "step": 698 }, { "epoch": 0.22650680492546987, "grad_norm": 0.6483691334724426, "learning_rate": 9.945866507182112e-06, "loss": 0.1712, "step": 699 }, { "epoch": 0.22683084899546338, "grad_norm": 0.6645535230636597, "learning_rate": 9.945609503339078e-06, "loss": 0.184, "step": 700 }, { "epoch": 0.22715489306545691, "grad_norm": 0.6483830213546753, "learning_rate": 9.945351894201318e-06, "loss": 0.1954, "step": 701 }, { "epoch": 0.22747893713545042, "grad_norm": 0.6874852180480957, "learning_rate": 9.945093679800363e-06, "loss": 0.1936, "step": 702 }, { "epoch": 0.22780298120544393, "grad_norm": 0.6744926571846008, "learning_rate": 9.944834860167812e-06, "loss": 0.2008, "step": 703 }, { "epoch": 0.22812702527543746, "grad_norm": 0.7001661062240601, "learning_rate": 9.944575435335344e-06, "loss": 0.1946, "step": 704 }, { "epoch": 0.22845106934543097, "grad_norm": 0.6359503865242004, "learning_rate": 9.944315405334712e-06, "loss": 0.1725, "step": 705 }, { "epoch": 0.2287751134154245, "grad_norm": 0.628150224685669, "learning_rate": 9.944054770197736e-06, "loss": 0.1774, "step": 706 }, { "epoch": 0.229099157485418, "grad_norm": 0.6892385482788086, "learning_rate": 9.94379352995632e-06, "loss": 0.1859, "step": 707 }, { "epoch": 0.22942320155541154, "grad_norm": 0.6501577496528625, "learning_rate": 9.943531684642435e-06, "loss": 0.1847, "step": 708 }, { "epoch": 0.22974724562540505, "grad_norm": 0.626654863357544, "learning_rate": 9.94326923428813e-06, "loss": 0.1867, "step": 709 }, { "epoch": 0.23007128969539858, "grad_norm": 0.6670397520065308, "learning_rate": 9.943006178925525e-06, "loss": 0.1871, "step": 710 }, { "epoch": 0.2303953337653921, "grad_norm": 0.6162786483764648, "learning_rate": 9.942742518586815e-06, "loss": 0.1661, "step": 711 }, { "epoch": 0.23071937783538563, "grad_norm": 0.65184485912323, "learning_rate": 9.94247825330427e-06, "loss": 0.1869, "step": 712 }, { "epoch": 0.23104342190537913, "grad_norm": 0.6438986659049988, "learning_rate": 9.942213383110232e-06, "loss": 0.1847, "step": 713 }, { "epoch": 0.23136746597537264, "grad_norm": 0.6406044960021973, "learning_rate": 9.941947908037123e-06, "loss": 0.1969, "step": 714 }, { "epoch": 0.23169151004536617, "grad_norm": 0.6293928027153015, "learning_rate": 9.941681828117432e-06, "loss": 0.1732, "step": 715 }, { "epoch": 0.23201555411535968, "grad_norm": 0.5930849313735962, "learning_rate": 9.941415143383723e-06, "loss": 0.1592, "step": 716 }, { "epoch": 0.2323395981853532, "grad_norm": 0.6804683208465576, "learning_rate": 9.941147853868638e-06, "loss": 0.1954, "step": 717 }, { "epoch": 0.23266364225534672, "grad_norm": 0.6410490274429321, "learning_rate": 9.94087995960489e-06, "loss": 0.1986, "step": 718 }, { "epoch": 0.23298768632534025, "grad_norm": 0.6373780965805054, "learning_rate": 9.940611460625264e-06, "loss": 0.185, "step": 719 }, { "epoch": 0.23331173039533376, "grad_norm": 0.6732212901115417, "learning_rate": 9.940342356962627e-06, "loss": 0.1935, "step": 720 }, { "epoch": 0.2336357744653273, "grad_norm": 0.6203884482383728, "learning_rate": 9.94007264864991e-06, "loss": 0.1661, "step": 721 }, { "epoch": 0.2339598185353208, "grad_norm": 0.6237133145332336, "learning_rate": 9.939802335720126e-06, "loss": 0.1768, "step": 722 }, { "epoch": 0.23428386260531434, "grad_norm": 0.6512324213981628, "learning_rate": 9.939531418206355e-06, "loss": 0.1828, "step": 723 }, { "epoch": 0.23460790667530784, "grad_norm": 0.6099259853363037, "learning_rate": 9.939259896141757e-06, "loss": 0.1684, "step": 724 }, { "epoch": 0.23493195074530135, "grad_norm": 0.6380852460861206, "learning_rate": 9.938987769559565e-06, "loss": 0.1808, "step": 725 }, { "epoch": 0.23525599481529488, "grad_norm": 0.6275023818016052, "learning_rate": 9.938715038493083e-06, "loss": 0.1911, "step": 726 }, { "epoch": 0.2355800388852884, "grad_norm": 0.6486274600028992, "learning_rate": 9.938441702975689e-06, "loss": 0.1885, "step": 727 }, { "epoch": 0.23590408295528192, "grad_norm": 0.6302095055580139, "learning_rate": 9.93816776304084e-06, "loss": 0.1861, "step": 728 }, { "epoch": 0.23622812702527543, "grad_norm": 0.600139319896698, "learning_rate": 9.937893218722062e-06, "loss": 0.1736, "step": 729 }, { "epoch": 0.23655217109526896, "grad_norm": 0.5886365175247192, "learning_rate": 9.937618070052954e-06, "loss": 0.1683, "step": 730 }, { "epoch": 0.23687621516526247, "grad_norm": 0.68076491355896, "learning_rate": 9.937342317067197e-06, "loss": 0.1949, "step": 731 }, { "epoch": 0.237200259235256, "grad_norm": 0.5942913293838501, "learning_rate": 9.937065959798538e-06, "loss": 0.1755, "step": 732 }, { "epoch": 0.2375243033052495, "grad_norm": 0.6711556315422058, "learning_rate": 9.936788998280797e-06, "loss": 0.1861, "step": 733 }, { "epoch": 0.23784834737524305, "grad_norm": 0.6712557077407837, "learning_rate": 9.936511432547877e-06, "loss": 0.1827, "step": 734 }, { "epoch": 0.23817239144523655, "grad_norm": 0.6791927814483643, "learning_rate": 9.936233262633746e-06, "loss": 0.1791, "step": 735 }, { "epoch": 0.23849643551523006, "grad_norm": 0.6469034552574158, "learning_rate": 9.93595448857245e-06, "loss": 0.1856, "step": 736 }, { "epoch": 0.2388204795852236, "grad_norm": 0.6158778667449951, "learning_rate": 9.935675110398107e-06, "loss": 0.1594, "step": 737 }, { "epoch": 0.2391445236552171, "grad_norm": 0.6263102293014526, "learning_rate": 9.935395128144914e-06, "loss": 0.1907, "step": 738 }, { "epoch": 0.23946856772521063, "grad_norm": 0.6166470050811768, "learning_rate": 9.935114541847133e-06, "loss": 0.1925, "step": 739 }, { "epoch": 0.23979261179520414, "grad_norm": 0.6434534192085266, "learning_rate": 9.93483335153911e-06, "loss": 0.1972, "step": 740 }, { "epoch": 0.24011665586519768, "grad_norm": 0.6516762971878052, "learning_rate": 9.934551557255257e-06, "loss": 0.1889, "step": 741 }, { "epoch": 0.24044069993519118, "grad_norm": 0.6424468755722046, "learning_rate": 9.934269159030064e-06, "loss": 0.2006, "step": 742 }, { "epoch": 0.24076474400518472, "grad_norm": 0.6505478620529175, "learning_rate": 9.933986156898092e-06, "loss": 0.1934, "step": 743 }, { "epoch": 0.24108878807517822, "grad_norm": 0.628738522529602, "learning_rate": 9.93370255089398e-06, "loss": 0.1779, "step": 744 }, { "epoch": 0.24141283214517173, "grad_norm": 0.6392041444778442, "learning_rate": 9.933418341052437e-06, "loss": 0.1792, "step": 745 }, { "epoch": 0.24173687621516526, "grad_norm": 0.6310383081436157, "learning_rate": 9.933133527408248e-06, "loss": 0.1881, "step": 746 }, { "epoch": 0.24206092028515877, "grad_norm": 0.6338793635368347, "learning_rate": 9.932848109996273e-06, "loss": 0.2015, "step": 747 }, { "epoch": 0.2423849643551523, "grad_norm": 0.6490474343299866, "learning_rate": 9.932562088851444e-06, "loss": 0.1904, "step": 748 }, { "epoch": 0.2427090084251458, "grad_norm": 0.6636795997619629, "learning_rate": 9.932275464008763e-06, "loss": 0.1894, "step": 749 }, { "epoch": 0.24303305249513935, "grad_norm": 0.589769184589386, "learning_rate": 9.931988235503316e-06, "loss": 0.1749, "step": 750 }, { "epoch": 0.24335709656513285, "grad_norm": 0.6744698882102966, "learning_rate": 9.931700403370253e-06, "loss": 0.2061, "step": 751 }, { "epoch": 0.2436811406351264, "grad_norm": 0.648323118686676, "learning_rate": 9.931411967644802e-06, "loss": 0.1867, "step": 752 }, { "epoch": 0.2440051847051199, "grad_norm": 0.6055197715759277, "learning_rate": 9.931122928362268e-06, "loss": 0.1821, "step": 753 }, { "epoch": 0.24432922877511343, "grad_norm": 0.596246600151062, "learning_rate": 9.930833285558024e-06, "loss": 0.1658, "step": 754 }, { "epoch": 0.24465327284510693, "grad_norm": 0.6292993426322937, "learning_rate": 9.93054303926752e-06, "loss": 0.1836, "step": 755 }, { "epoch": 0.24497731691510044, "grad_norm": 0.6322280764579773, "learning_rate": 9.930252189526279e-06, "loss": 0.1798, "step": 756 }, { "epoch": 0.24530136098509397, "grad_norm": 0.5868021249771118, "learning_rate": 9.929960736369899e-06, "loss": 0.1647, "step": 757 }, { "epoch": 0.24562540505508748, "grad_norm": 0.6106928586959839, "learning_rate": 9.929668679834051e-06, "loss": 0.1824, "step": 758 }, { "epoch": 0.24594944912508102, "grad_norm": 0.6683313250541687, "learning_rate": 9.929376019954478e-06, "loss": 0.1863, "step": 759 }, { "epoch": 0.24627349319507452, "grad_norm": 0.6021707653999329, "learning_rate": 9.929082756767001e-06, "loss": 0.1621, "step": 760 }, { "epoch": 0.24659753726506806, "grad_norm": 0.6000505685806274, "learning_rate": 9.928788890307513e-06, "loss": 0.1626, "step": 761 }, { "epoch": 0.24692158133506156, "grad_norm": 0.6177734732627869, "learning_rate": 9.928494420611977e-06, "loss": 0.1775, "step": 762 }, { "epoch": 0.2472456254050551, "grad_norm": 0.6518813371658325, "learning_rate": 9.928199347716439e-06, "loss": 0.1734, "step": 763 }, { "epoch": 0.2475696694750486, "grad_norm": 0.6121587157249451, "learning_rate": 9.927903671657007e-06, "loss": 0.1739, "step": 764 }, { "epoch": 0.24789371354504214, "grad_norm": 0.6971595883369446, "learning_rate": 9.92760739246987e-06, "loss": 0.1897, "step": 765 }, { "epoch": 0.24821775761503564, "grad_norm": 0.6398004293441772, "learning_rate": 9.927310510191293e-06, "loss": 0.1878, "step": 766 }, { "epoch": 0.24854180168502915, "grad_norm": 0.6389423608779907, "learning_rate": 9.927013024857609e-06, "loss": 0.1824, "step": 767 }, { "epoch": 0.24886584575502269, "grad_norm": 0.6484429836273193, "learning_rate": 9.926714936505228e-06, "loss": 0.196, "step": 768 }, { "epoch": 0.2491898898250162, "grad_norm": 0.6396540403366089, "learning_rate": 9.926416245170633e-06, "loss": 0.1971, "step": 769 }, { "epoch": 0.24951393389500973, "grad_norm": 0.6225264668464661, "learning_rate": 9.926116950890381e-06, "loss": 0.2028, "step": 770 }, { "epoch": 0.24983797796500323, "grad_norm": 0.6579481363296509, "learning_rate": 9.925817053701103e-06, "loss": 0.189, "step": 771 }, { "epoch": 0.25016202203499677, "grad_norm": 0.6142616271972656, "learning_rate": 9.925516553639503e-06, "loss": 0.177, "step": 772 }, { "epoch": 0.2504860661049903, "grad_norm": 0.625681459903717, "learning_rate": 9.925215450742361e-06, "loss": 0.1819, "step": 773 }, { "epoch": 0.2508101101749838, "grad_norm": 0.62339186668396, "learning_rate": 9.924913745046526e-06, "loss": 0.1753, "step": 774 }, { "epoch": 0.2511341542449773, "grad_norm": 0.5808535814285278, "learning_rate": 9.924611436588924e-06, "loss": 0.1787, "step": 775 }, { "epoch": 0.25145819831497085, "grad_norm": 0.6875622868537903, "learning_rate": 9.92430852540656e-06, "loss": 0.1908, "step": 776 }, { "epoch": 0.2517822423849643, "grad_norm": 0.6806918978691101, "learning_rate": 9.924005011536501e-06, "loss": 0.1945, "step": 777 }, { "epoch": 0.25210628645495786, "grad_norm": 0.6135112643241882, "learning_rate": 9.923700895015896e-06, "loss": 0.1734, "step": 778 }, { "epoch": 0.2524303305249514, "grad_norm": 0.6002721786499023, "learning_rate": 9.923396175881968e-06, "loss": 0.1703, "step": 779 }, { "epoch": 0.25275437459494493, "grad_norm": 0.6776991486549377, "learning_rate": 9.923090854172011e-06, "loss": 0.1878, "step": 780 }, { "epoch": 0.2530784186649384, "grad_norm": 0.6653709411621094, "learning_rate": 9.92278492992339e-06, "loss": 0.1758, "step": 781 }, { "epoch": 0.25340246273493194, "grad_norm": 0.5887860059738159, "learning_rate": 9.922478403173553e-06, "loss": 0.179, "step": 782 }, { "epoch": 0.2537265068049255, "grad_norm": 0.6444964408874512, "learning_rate": 9.92217127396001e-06, "loss": 0.2074, "step": 783 }, { "epoch": 0.254050550874919, "grad_norm": 0.6349043250083923, "learning_rate": 9.921863542320354e-06, "loss": 0.188, "step": 784 }, { "epoch": 0.2543745949449125, "grad_norm": 0.6593102216720581, "learning_rate": 9.921555208292248e-06, "loss": 0.1977, "step": 785 }, { "epoch": 0.254698639014906, "grad_norm": 0.6344327926635742, "learning_rate": 9.921246271913429e-06, "loss": 0.1729, "step": 786 }, { "epoch": 0.25502268308489956, "grad_norm": 0.6412277221679688, "learning_rate": 9.920936733221708e-06, "loss": 0.186, "step": 787 }, { "epoch": 0.25534672715489304, "grad_norm": 0.6043070554733276, "learning_rate": 9.92062659225497e-06, "loss": 0.1722, "step": 788 }, { "epoch": 0.2556707712248866, "grad_norm": 0.5798800587654114, "learning_rate": 9.92031584905117e-06, "loss": 0.1584, "step": 789 }, { "epoch": 0.2559948152948801, "grad_norm": 0.5982307195663452, "learning_rate": 9.920004503648344e-06, "loss": 0.1774, "step": 790 }, { "epoch": 0.25631885936487364, "grad_norm": 0.6444849967956543, "learning_rate": 9.919692556084596e-06, "loss": 0.1786, "step": 791 }, { "epoch": 0.2566429034348671, "grad_norm": 0.6478809118270874, "learning_rate": 9.919380006398105e-06, "loss": 0.2069, "step": 792 }, { "epoch": 0.25696694750486065, "grad_norm": 0.6714723110198975, "learning_rate": 9.919066854627124e-06, "loss": 0.1943, "step": 793 }, { "epoch": 0.2572909915748542, "grad_norm": 0.5991042256355286, "learning_rate": 9.91875310080998e-06, "loss": 0.1685, "step": 794 }, { "epoch": 0.2576150356448477, "grad_norm": 0.6321060657501221, "learning_rate": 9.918438744985078e-06, "loss": 0.1737, "step": 795 }, { "epoch": 0.2579390797148412, "grad_norm": 0.618497371673584, "learning_rate": 9.918123787190883e-06, "loss": 0.1788, "step": 796 }, { "epoch": 0.25826312378483474, "grad_norm": 0.6653442978858948, "learning_rate": 9.91780822746595e-06, "loss": 0.2012, "step": 797 }, { "epoch": 0.25858716785482827, "grad_norm": 0.6287248730659485, "learning_rate": 9.917492065848898e-06, "loss": 0.1664, "step": 798 }, { "epoch": 0.25891121192482175, "grad_norm": 0.6177788972854614, "learning_rate": 9.91717530237842e-06, "loss": 0.1842, "step": 799 }, { "epoch": 0.2592352559948153, "grad_norm": 0.6276010274887085, "learning_rate": 9.916857937093289e-06, "loss": 0.1832, "step": 800 }, { "epoch": 0.2595593000648088, "grad_norm": 0.6513774991035461, "learning_rate": 9.916539970032344e-06, "loss": 0.1848, "step": 801 }, { "epoch": 0.25988334413480235, "grad_norm": 0.5932650566101074, "learning_rate": 9.916221401234502e-06, "loss": 0.1633, "step": 802 }, { "epoch": 0.26020738820479583, "grad_norm": 0.66648930311203, "learning_rate": 9.915902230738756e-06, "loss": 0.1735, "step": 803 }, { "epoch": 0.26053143227478937, "grad_norm": 0.5852811932563782, "learning_rate": 9.915582458584164e-06, "loss": 0.1667, "step": 804 }, { "epoch": 0.2608554763447829, "grad_norm": 0.6674695611000061, "learning_rate": 9.915262084809868e-06, "loss": 0.184, "step": 805 }, { "epoch": 0.26117952041477643, "grad_norm": 0.6292879581451416, "learning_rate": 9.914941109455072e-06, "loss": 0.1807, "step": 806 }, { "epoch": 0.2615035644847699, "grad_norm": 0.6502755880355835, "learning_rate": 9.914619532559069e-06, "loss": 0.1969, "step": 807 }, { "epoch": 0.26182760855476345, "grad_norm": 0.6338673830032349, "learning_rate": 9.91429735416121e-06, "loss": 0.1726, "step": 808 }, { "epoch": 0.262151652624757, "grad_norm": 0.6485314965248108, "learning_rate": 9.91397457430093e-06, "loss": 0.1916, "step": 809 }, { "epoch": 0.26247569669475046, "grad_norm": 0.624607264995575, "learning_rate": 9.913651193017733e-06, "loss": 0.1833, "step": 810 }, { "epoch": 0.262799740764744, "grad_norm": 0.5967383980751038, "learning_rate": 9.913327210351197e-06, "loss": 0.1801, "step": 811 }, { "epoch": 0.26312378483473753, "grad_norm": 0.638460636138916, "learning_rate": 9.913002626340975e-06, "loss": 0.198, "step": 812 }, { "epoch": 0.26344782890473106, "grad_norm": 0.6392395496368408, "learning_rate": 9.912677441026794e-06, "loss": 0.1975, "step": 813 }, { "epoch": 0.26377187297472454, "grad_norm": 0.6315566301345825, "learning_rate": 9.912351654448453e-06, "loss": 0.1935, "step": 814 }, { "epoch": 0.2640959170447181, "grad_norm": 0.6048063635826111, "learning_rate": 9.912025266645824e-06, "loss": 0.1761, "step": 815 }, { "epoch": 0.2644199611147116, "grad_norm": 0.5906796455383301, "learning_rate": 9.911698277658855e-06, "loss": 0.1623, "step": 816 }, { "epoch": 0.26474400518470514, "grad_norm": 0.5543292760848999, "learning_rate": 9.911370687527564e-06, "loss": 0.1599, "step": 817 }, { "epoch": 0.2650680492546986, "grad_norm": 0.5940440893173218, "learning_rate": 9.911042496292049e-06, "loss": 0.1726, "step": 818 }, { "epoch": 0.26539209332469216, "grad_norm": 0.5984013080596924, "learning_rate": 9.910713703992473e-06, "loss": 0.177, "step": 819 }, { "epoch": 0.2657161373946857, "grad_norm": 0.6281025409698486, "learning_rate": 9.910384310669078e-06, "loss": 0.1788, "step": 820 }, { "epoch": 0.26604018146467917, "grad_norm": 0.6289580464363098, "learning_rate": 9.910054316362184e-06, "loss": 0.1905, "step": 821 }, { "epoch": 0.2663642255346727, "grad_norm": 0.6344680786132812, "learning_rate": 9.909723721112171e-06, "loss": 0.19, "step": 822 }, { "epoch": 0.26668826960466624, "grad_norm": 0.6040329337120056, "learning_rate": 9.909392524959506e-06, "loss": 0.1877, "step": 823 }, { "epoch": 0.2670123136746598, "grad_norm": 0.5945601463317871, "learning_rate": 9.909060727944721e-06, "loss": 0.167, "step": 824 }, { "epoch": 0.26733635774465325, "grad_norm": 0.6815995573997498, "learning_rate": 9.908728330108428e-06, "loss": 0.1922, "step": 825 }, { "epoch": 0.2676604018146468, "grad_norm": 0.641444742679596, "learning_rate": 9.908395331491307e-06, "loss": 0.1734, "step": 826 }, { "epoch": 0.2679844458846403, "grad_norm": 0.6499939560890198, "learning_rate": 9.908061732134115e-06, "loss": 0.168, "step": 827 }, { "epoch": 0.26830848995463386, "grad_norm": 0.682487428188324, "learning_rate": 9.90772753207768e-06, "loss": 0.1719, "step": 828 }, { "epoch": 0.26863253402462733, "grad_norm": 0.672627329826355, "learning_rate": 9.907392731362905e-06, "loss": 0.1951, "step": 829 }, { "epoch": 0.26895657809462087, "grad_norm": 0.6161004900932312, "learning_rate": 9.907057330030766e-06, "loss": 0.1712, "step": 830 }, { "epoch": 0.2692806221646144, "grad_norm": 0.6189830303192139, "learning_rate": 9.906721328122317e-06, "loss": 0.173, "step": 831 }, { "epoch": 0.2696046662346079, "grad_norm": 0.6291322708129883, "learning_rate": 9.906384725678676e-06, "loss": 0.1867, "step": 832 }, { "epoch": 0.2699287103046014, "grad_norm": 0.6070644855499268, "learning_rate": 9.906047522741042e-06, "loss": 0.1755, "step": 833 }, { "epoch": 0.27025275437459495, "grad_norm": 0.6049726605415344, "learning_rate": 9.905709719350686e-06, "loss": 0.1635, "step": 834 }, { "epoch": 0.2705767984445885, "grad_norm": 0.6292316913604736, "learning_rate": 9.905371315548952e-06, "loss": 0.1946, "step": 835 }, { "epoch": 0.27090084251458196, "grad_norm": 0.6292069554328918, "learning_rate": 9.905032311377257e-06, "loss": 0.1828, "step": 836 }, { "epoch": 0.2712248865845755, "grad_norm": 0.6271639466285706, "learning_rate": 9.904692706877089e-06, "loss": 0.1862, "step": 837 }, { "epoch": 0.27154893065456903, "grad_norm": 0.6449607014656067, "learning_rate": 9.904352502090016e-06, "loss": 0.1785, "step": 838 }, { "epoch": 0.27187297472456257, "grad_norm": 0.6190186738967896, "learning_rate": 9.904011697057675e-06, "loss": 0.1756, "step": 839 }, { "epoch": 0.27219701879455604, "grad_norm": 0.5763127207756042, "learning_rate": 9.903670291821776e-06, "loss": 0.1665, "step": 840 }, { "epoch": 0.2725210628645496, "grad_norm": 0.6554551124572754, "learning_rate": 9.903328286424105e-06, "loss": 0.1853, "step": 841 }, { "epoch": 0.2728451069345431, "grad_norm": 0.6263623833656311, "learning_rate": 9.90298568090652e-06, "loss": 0.185, "step": 842 }, { "epoch": 0.2731691510045366, "grad_norm": 0.6207384467124939, "learning_rate": 9.902642475310953e-06, "loss": 0.161, "step": 843 }, { "epoch": 0.2734931950745301, "grad_norm": 0.6042211651802063, "learning_rate": 9.902298669679406e-06, "loss": 0.1887, "step": 844 }, { "epoch": 0.27381723914452366, "grad_norm": 0.6446455717086792, "learning_rate": 9.901954264053961e-06, "loss": 0.1849, "step": 845 }, { "epoch": 0.2741412832145172, "grad_norm": 0.6734024882316589, "learning_rate": 9.901609258476769e-06, "loss": 0.1997, "step": 846 }, { "epoch": 0.2744653272845107, "grad_norm": 0.6023538708686829, "learning_rate": 9.901263652990054e-06, "loss": 0.1664, "step": 847 }, { "epoch": 0.2747893713545042, "grad_norm": 0.6193968653678894, "learning_rate": 9.900917447636116e-06, "loss": 0.175, "step": 848 }, { "epoch": 0.27511341542449774, "grad_norm": 0.6214480996131897, "learning_rate": 9.900570642457327e-06, "loss": 0.1776, "step": 849 }, { "epoch": 0.2754374594944913, "grad_norm": 0.5912626385688782, "learning_rate": 9.900223237496134e-06, "loss": 0.1607, "step": 850 }, { "epoch": 0.27576150356448476, "grad_norm": 0.6071320176124573, "learning_rate": 9.899875232795054e-06, "loss": 0.1859, "step": 851 }, { "epoch": 0.2760855476344783, "grad_norm": 0.5959563851356506, "learning_rate": 9.899526628396678e-06, "loss": 0.1746, "step": 852 }, { "epoch": 0.2764095917044718, "grad_norm": 0.6067162752151489, "learning_rate": 9.899177424343676e-06, "loss": 0.1836, "step": 853 }, { "epoch": 0.2767336357744653, "grad_norm": 0.6264017224311829, "learning_rate": 9.898827620678784e-06, "loss": 0.1792, "step": 854 }, { "epoch": 0.27705767984445884, "grad_norm": 0.6678988933563232, "learning_rate": 9.898477217444817e-06, "loss": 0.1834, "step": 855 }, { "epoch": 0.27738172391445237, "grad_norm": 0.6405592560768127, "learning_rate": 9.898126214684658e-06, "loss": 0.2035, "step": 856 }, { "epoch": 0.2777057679844459, "grad_norm": 0.6418700814247131, "learning_rate": 9.897774612441268e-06, "loss": 0.204, "step": 857 }, { "epoch": 0.2780298120544394, "grad_norm": 0.6275181174278259, "learning_rate": 9.89742241075768e-06, "loss": 0.169, "step": 858 }, { "epoch": 0.2783538561244329, "grad_norm": 0.5936182141304016, "learning_rate": 9.897069609677e-06, "loss": 0.1709, "step": 859 }, { "epoch": 0.27867790019442645, "grad_norm": 0.5788288712501526, "learning_rate": 9.896716209242405e-06, "loss": 0.1734, "step": 860 }, { "epoch": 0.27900194426442, "grad_norm": 0.6212310791015625, "learning_rate": 9.896362209497152e-06, "loss": 0.1745, "step": 861 }, { "epoch": 0.27932598833441347, "grad_norm": 0.6616557240486145, "learning_rate": 9.896007610484564e-06, "loss": 0.1905, "step": 862 }, { "epoch": 0.279650032404407, "grad_norm": 0.629759669303894, "learning_rate": 9.895652412248043e-06, "loss": 0.1706, "step": 863 }, { "epoch": 0.27997407647440054, "grad_norm": 0.5903453826904297, "learning_rate": 9.895296614831058e-06, "loss": 0.1826, "step": 864 }, { "epoch": 0.280298120544394, "grad_norm": 0.6063392758369446, "learning_rate": 9.894940218277158e-06, "loss": 0.1534, "step": 865 }, { "epoch": 0.28062216461438755, "grad_norm": 0.6047586798667908, "learning_rate": 9.894583222629963e-06, "loss": 0.1533, "step": 866 }, { "epoch": 0.2809462086843811, "grad_norm": 0.6632272005081177, "learning_rate": 9.894225627933165e-06, "loss": 0.1935, "step": 867 }, { "epoch": 0.2812702527543746, "grad_norm": 0.5961798429489136, "learning_rate": 9.893867434230529e-06, "loss": 0.1763, "step": 868 }, { "epoch": 0.2815942968243681, "grad_norm": 0.6355636715888977, "learning_rate": 9.893508641565896e-06, "loss": 0.1849, "step": 869 }, { "epoch": 0.28191834089436163, "grad_norm": 0.6150272488594055, "learning_rate": 9.893149249983178e-06, "loss": 0.1868, "step": 870 }, { "epoch": 0.28224238496435516, "grad_norm": 0.7121578454971313, "learning_rate": 9.892789259526361e-06, "loss": 0.1916, "step": 871 }, { "epoch": 0.2825664290343487, "grad_norm": 0.5670008063316345, "learning_rate": 9.892428670239504e-06, "loss": 0.1643, "step": 872 }, { "epoch": 0.2828904731043422, "grad_norm": 0.5904400944709778, "learning_rate": 9.89206748216674e-06, "loss": 0.168, "step": 873 }, { "epoch": 0.2832145171743357, "grad_norm": 0.6115680932998657, "learning_rate": 9.891705695352276e-06, "loss": 0.1887, "step": 874 }, { "epoch": 0.28353856124432925, "grad_norm": 0.6273742914199829, "learning_rate": 9.89134330984039e-06, "loss": 0.1884, "step": 875 }, { "epoch": 0.2838626053143227, "grad_norm": 0.6091495156288147, "learning_rate": 9.890980325675436e-06, "loss": 0.1782, "step": 876 }, { "epoch": 0.28418664938431626, "grad_norm": 0.6677464246749878, "learning_rate": 9.890616742901837e-06, "loss": 0.1734, "step": 877 }, { "epoch": 0.2845106934543098, "grad_norm": 0.7175049781799316, "learning_rate": 9.890252561564094e-06, "loss": 0.1725, "step": 878 }, { "epoch": 0.2848347375243033, "grad_norm": 0.6149059534072876, "learning_rate": 9.889887781706777e-06, "loss": 0.1907, "step": 879 }, { "epoch": 0.2851587815942968, "grad_norm": 0.6019971370697021, "learning_rate": 9.889522403374536e-06, "loss": 0.1764, "step": 880 }, { "epoch": 0.28548282566429034, "grad_norm": 0.6241403222084045, "learning_rate": 9.889156426612086e-06, "loss": 0.1646, "step": 881 }, { "epoch": 0.2858068697342839, "grad_norm": 0.6610787510871887, "learning_rate": 9.88878985146422e-06, "loss": 0.199, "step": 882 }, { "epoch": 0.2861309138042774, "grad_norm": 0.6264455318450928, "learning_rate": 9.888422677975801e-06, "loss": 0.1943, "step": 883 }, { "epoch": 0.2864549578742709, "grad_norm": 0.6266207098960876, "learning_rate": 9.888054906191773e-06, "loss": 0.1757, "step": 884 }, { "epoch": 0.2867790019442644, "grad_norm": 0.5767809748649597, "learning_rate": 9.887686536157145e-06, "loss": 0.1659, "step": 885 }, { "epoch": 0.28710304601425796, "grad_norm": 0.6064203977584839, "learning_rate": 9.887317567917e-06, "loss": 0.1764, "step": 886 }, { "epoch": 0.28742709008425144, "grad_norm": 0.6159385442733765, "learning_rate": 9.886948001516497e-06, "loss": 0.1706, "step": 887 }, { "epoch": 0.28775113415424497, "grad_norm": 0.6020685434341431, "learning_rate": 9.886577837000869e-06, "loss": 0.1836, "step": 888 }, { "epoch": 0.2880751782242385, "grad_norm": 0.6020832657814026, "learning_rate": 9.88620707441542e-06, "loss": 0.1691, "step": 889 }, { "epoch": 0.28839922229423204, "grad_norm": 0.5680634379386902, "learning_rate": 9.885835713805526e-06, "loss": 0.1566, "step": 890 }, { "epoch": 0.2887232663642255, "grad_norm": 0.649798572063446, "learning_rate": 9.885463755216638e-06, "loss": 0.188, "step": 891 }, { "epoch": 0.28904731043421905, "grad_norm": 0.5846118927001953, "learning_rate": 9.885091198694283e-06, "loss": 0.1695, "step": 892 }, { "epoch": 0.2893713545042126, "grad_norm": 0.6667026877403259, "learning_rate": 9.884718044284056e-06, "loss": 0.185, "step": 893 }, { "epoch": 0.28969539857420606, "grad_norm": 0.5462652444839478, "learning_rate": 9.884344292031629e-06, "loss": 0.1551, "step": 894 }, { "epoch": 0.2900194426441996, "grad_norm": 0.6042917966842651, "learning_rate": 9.883969941982744e-06, "loss": 0.1671, "step": 895 }, { "epoch": 0.29034348671419313, "grad_norm": 0.6363921165466309, "learning_rate": 9.883594994183219e-06, "loss": 0.1793, "step": 896 }, { "epoch": 0.29066753078418667, "grad_norm": 0.6049078106880188, "learning_rate": 9.883219448678945e-06, "loss": 0.168, "step": 897 }, { "epoch": 0.29099157485418015, "grad_norm": 0.6222366094589233, "learning_rate": 9.88284330551588e-06, "loss": 0.1908, "step": 898 }, { "epoch": 0.2913156189241737, "grad_norm": 0.5962700843811035, "learning_rate": 9.882466564740067e-06, "loss": 0.1711, "step": 899 }, { "epoch": 0.2916396629941672, "grad_norm": 0.6368878483772278, "learning_rate": 9.882089226397614e-06, "loss": 0.1841, "step": 900 }, { "epoch": 0.29196370706416075, "grad_norm": 0.6085599660873413, "learning_rate": 9.881711290534699e-06, "loss": 0.1715, "step": 901 }, { "epoch": 0.29228775113415423, "grad_norm": 0.567490816116333, "learning_rate": 9.88133275719758e-06, "loss": 0.1601, "step": 902 }, { "epoch": 0.29261179520414776, "grad_norm": 0.6248665452003479, "learning_rate": 9.880953626432588e-06, "loss": 0.1835, "step": 903 }, { "epoch": 0.2929358392741413, "grad_norm": 0.581226110458374, "learning_rate": 9.880573898286123e-06, "loss": 0.1779, "step": 904 }, { "epoch": 0.2932598833441348, "grad_norm": 0.6184927225112915, "learning_rate": 9.880193572804662e-06, "loss": 0.1796, "step": 905 }, { "epoch": 0.2935839274141283, "grad_norm": 0.6253811120986938, "learning_rate": 9.879812650034748e-06, "loss": 0.1709, "step": 906 }, { "epoch": 0.29390797148412184, "grad_norm": 0.6019658446311951, "learning_rate": 9.879431130023009e-06, "loss": 0.1761, "step": 907 }, { "epoch": 0.2942320155541154, "grad_norm": 0.597907304763794, "learning_rate": 9.879049012816136e-06, "loss": 0.1744, "step": 908 }, { "epoch": 0.29455605962410886, "grad_norm": 0.5939311385154724, "learning_rate": 9.878666298460895e-06, "loss": 0.179, "step": 909 }, { "epoch": 0.2948801036941024, "grad_norm": 0.5980244278907776, "learning_rate": 9.878282987004131e-06, "loss": 0.19, "step": 910 }, { "epoch": 0.2952041477640959, "grad_norm": 0.6104245781898499, "learning_rate": 9.877899078492752e-06, "loss": 0.1696, "step": 911 }, { "epoch": 0.29552819183408946, "grad_norm": 0.5415026545524597, "learning_rate": 9.877514572973748e-06, "loss": 0.1523, "step": 912 }, { "epoch": 0.29585223590408294, "grad_norm": 0.6522943377494812, "learning_rate": 9.87712947049418e-06, "loss": 0.1712, "step": 913 }, { "epoch": 0.2961762799740765, "grad_norm": 0.6318775415420532, "learning_rate": 9.876743771101178e-06, "loss": 0.1744, "step": 914 }, { "epoch": 0.29650032404407, "grad_norm": 0.5871029496192932, "learning_rate": 9.876357474841949e-06, "loss": 0.1605, "step": 915 }, { "epoch": 0.2968243681140635, "grad_norm": 0.5952814817428589, "learning_rate": 9.875970581763771e-06, "loss": 0.1761, "step": 916 }, { "epoch": 0.297148412184057, "grad_norm": 0.5859237313270569, "learning_rate": 9.875583091913999e-06, "loss": 0.1583, "step": 917 }, { "epoch": 0.29747245625405055, "grad_norm": 0.58506178855896, "learning_rate": 9.875195005340054e-06, "loss": 0.1576, "step": 918 }, { "epoch": 0.2977965003240441, "grad_norm": 0.6198616027832031, "learning_rate": 9.874806322089437e-06, "loss": 0.1822, "step": 919 }, { "epoch": 0.29812054439403757, "grad_norm": 0.5580076575279236, "learning_rate": 9.874417042209717e-06, "loss": 0.1594, "step": 920 }, { "epoch": 0.2984445884640311, "grad_norm": 0.610212504863739, "learning_rate": 9.874027165748538e-06, "loss": 0.1668, "step": 921 }, { "epoch": 0.29876863253402464, "grad_norm": 0.6204732656478882, "learning_rate": 9.87363669275362e-06, "loss": 0.179, "step": 922 }, { "epoch": 0.29909267660401817, "grad_norm": 0.6051819920539856, "learning_rate": 9.873245623272752e-06, "loss": 0.1844, "step": 923 }, { "epoch": 0.29941672067401165, "grad_norm": 0.6267421841621399, "learning_rate": 9.872853957353794e-06, "loss": 0.1676, "step": 924 }, { "epoch": 0.2997407647440052, "grad_norm": 0.5782517790794373, "learning_rate": 9.872461695044686e-06, "loss": 0.1619, "step": 925 }, { "epoch": 0.3000648088139987, "grad_norm": 0.6506045460700989, "learning_rate": 9.872068836393437e-06, "loss": 0.1812, "step": 926 }, { "epoch": 0.3003888528839922, "grad_norm": 0.6031262874603271, "learning_rate": 9.871675381448126e-06, "loss": 0.1848, "step": 927 }, { "epoch": 0.30071289695398573, "grad_norm": 0.6256006360054016, "learning_rate": 9.871281330256907e-06, "loss": 0.1712, "step": 928 }, { "epoch": 0.30103694102397927, "grad_norm": 0.5751093626022339, "learning_rate": 9.870886682868015e-06, "loss": 0.1631, "step": 929 }, { "epoch": 0.3013609850939728, "grad_norm": 0.6084175705909729, "learning_rate": 9.870491439329745e-06, "loss": 0.1729, "step": 930 }, { "epoch": 0.3016850291639663, "grad_norm": 0.5739203691482544, "learning_rate": 9.870095599690475e-06, "loss": 0.1723, "step": 931 }, { "epoch": 0.3020090732339598, "grad_norm": 0.6068074107170105, "learning_rate": 9.869699163998647e-06, "loss": 0.1681, "step": 932 }, { "epoch": 0.30233311730395335, "grad_norm": 0.6015316843986511, "learning_rate": 9.869302132302785e-06, "loss": 0.1656, "step": 933 }, { "epoch": 0.3026571613739469, "grad_norm": 0.624249279499054, "learning_rate": 9.86890450465148e-06, "loss": 0.1888, "step": 934 }, { "epoch": 0.30298120544394036, "grad_norm": 0.6235883831977844, "learning_rate": 9.8685062810934e-06, "loss": 0.1834, "step": 935 }, { "epoch": 0.3033052495139339, "grad_norm": 0.6266400218009949, "learning_rate": 9.868107461677279e-06, "loss": 0.1821, "step": 936 }, { "epoch": 0.30362929358392743, "grad_norm": 0.6055128574371338, "learning_rate": 9.867708046451933e-06, "loss": 0.1687, "step": 937 }, { "epoch": 0.3039533376539209, "grad_norm": 0.6110880970954895, "learning_rate": 9.867308035466245e-06, "loss": 0.1732, "step": 938 }, { "epoch": 0.30427738172391444, "grad_norm": 0.5936911106109619, "learning_rate": 9.866907428769175e-06, "loss": 0.1693, "step": 939 }, { "epoch": 0.304601425793908, "grad_norm": 0.5937286019325256, "learning_rate": 9.866506226409748e-06, "loss": 0.1866, "step": 940 }, { "epoch": 0.3049254698639015, "grad_norm": 0.5901283621788025, "learning_rate": 9.86610442843707e-06, "loss": 0.1759, "step": 941 }, { "epoch": 0.305249513933895, "grad_norm": 0.6732702255249023, "learning_rate": 9.86570203490032e-06, "loss": 0.1953, "step": 942 }, { "epoch": 0.3055735580038885, "grad_norm": 0.6250772476196289, "learning_rate": 9.865299045848744e-06, "loss": 0.1822, "step": 943 }, { "epoch": 0.30589760207388206, "grad_norm": 0.6017111539840698, "learning_rate": 9.864895461331664e-06, "loss": 0.1741, "step": 944 }, { "epoch": 0.3062216461438756, "grad_norm": 0.5704911947250366, "learning_rate": 9.864491281398476e-06, "loss": 0.1534, "step": 945 }, { "epoch": 0.30654569021386907, "grad_norm": 0.5726393461227417, "learning_rate": 9.864086506098646e-06, "loss": 0.1738, "step": 946 }, { "epoch": 0.3068697342838626, "grad_norm": 0.6365552544593811, "learning_rate": 9.863681135481715e-06, "loss": 0.1867, "step": 947 }, { "epoch": 0.30719377835385614, "grad_norm": 0.5838772654533386, "learning_rate": 9.8632751695973e-06, "loss": 0.1651, "step": 948 }, { "epoch": 0.3075178224238496, "grad_norm": 0.6326878070831299, "learning_rate": 9.862868608495082e-06, "loss": 0.2011, "step": 949 }, { "epoch": 0.30784186649384315, "grad_norm": 0.6173710823059082, "learning_rate": 9.862461452224823e-06, "loss": 0.1722, "step": 950 }, { "epoch": 0.3081659105638367, "grad_norm": 0.5816516280174255, "learning_rate": 9.862053700836353e-06, "loss": 0.1808, "step": 951 }, { "epoch": 0.3084899546338302, "grad_norm": 0.613852322101593, "learning_rate": 9.861645354379581e-06, "loss": 0.183, "step": 952 }, { "epoch": 0.3088139987038237, "grad_norm": 0.5804192423820496, "learning_rate": 9.86123641290448e-06, "loss": 0.1585, "step": 953 }, { "epoch": 0.30913804277381723, "grad_norm": 0.580578625202179, "learning_rate": 9.860826876461105e-06, "loss": 0.1616, "step": 954 }, { "epoch": 0.30946208684381077, "grad_norm": 0.5804625153541565, "learning_rate": 9.860416745099574e-06, "loss": 0.167, "step": 955 }, { "epoch": 0.3097861309138043, "grad_norm": 0.5484138131141663, "learning_rate": 9.860006018870087e-06, "loss": 0.1573, "step": 956 }, { "epoch": 0.3101101749837978, "grad_norm": 0.6087974309921265, "learning_rate": 9.859594697822913e-06, "loss": 0.1733, "step": 957 }, { "epoch": 0.3104342190537913, "grad_norm": 0.5645603537559509, "learning_rate": 9.859182782008392e-06, "loss": 0.162, "step": 958 }, { "epoch": 0.31075826312378485, "grad_norm": 0.7392151355743408, "learning_rate": 9.858770271476939e-06, "loss": 0.187, "step": 959 }, { "epoch": 0.31108230719377833, "grad_norm": 0.552918553352356, "learning_rate": 9.85835716627904e-06, "loss": 0.1602, "step": 960 }, { "epoch": 0.31140635126377186, "grad_norm": 0.6616688966751099, "learning_rate": 9.857943466465256e-06, "loss": 0.1907, "step": 961 }, { "epoch": 0.3117303953337654, "grad_norm": 0.5790378451347351, "learning_rate": 9.857529172086222e-06, "loss": 0.166, "step": 962 }, { "epoch": 0.31205443940375893, "grad_norm": 0.6137049198150635, "learning_rate": 9.857114283192641e-06, "loss": 0.1772, "step": 963 }, { "epoch": 0.3123784834737524, "grad_norm": 0.6005215644836426, "learning_rate": 9.856698799835292e-06, "loss": 0.173, "step": 964 }, { "epoch": 0.31270252754374595, "grad_norm": 0.6044658422470093, "learning_rate": 9.856282722065026e-06, "loss": 0.1819, "step": 965 }, { "epoch": 0.3130265716137395, "grad_norm": 0.5882754921913147, "learning_rate": 9.85586604993277e-06, "loss": 0.1643, "step": 966 }, { "epoch": 0.313350615683733, "grad_norm": 0.5900786519050598, "learning_rate": 9.855448783489517e-06, "loss": 0.1663, "step": 967 }, { "epoch": 0.3136746597537265, "grad_norm": 0.5808376669883728, "learning_rate": 9.855030922786334e-06, "loss": 0.1672, "step": 968 }, { "epoch": 0.31399870382372, "grad_norm": 0.5942047834396362, "learning_rate": 9.85461246787437e-06, "loss": 0.1779, "step": 969 }, { "epoch": 0.31432274789371356, "grad_norm": 0.587271511554718, "learning_rate": 9.854193418804835e-06, "loss": 0.1588, "step": 970 }, { "epoch": 0.31464679196370704, "grad_norm": 0.6117364764213562, "learning_rate": 9.853773775629018e-06, "loss": 0.1686, "step": 971 }, { "epoch": 0.3149708360337006, "grad_norm": 0.5825042724609375, "learning_rate": 9.853353538398278e-06, "loss": 0.1714, "step": 972 }, { "epoch": 0.3152948801036941, "grad_norm": 0.5845495462417603, "learning_rate": 9.852932707164051e-06, "loss": 0.1677, "step": 973 }, { "epoch": 0.31561892417368764, "grad_norm": 0.6072530746459961, "learning_rate": 9.852511281977838e-06, "loss": 0.1763, "step": 974 }, { "epoch": 0.3159429682436811, "grad_norm": 0.5852010846138, "learning_rate": 9.852089262891222e-06, "loss": 0.1764, "step": 975 }, { "epoch": 0.31626701231367466, "grad_norm": 0.5684648752212524, "learning_rate": 9.851666649955853e-06, "loss": 0.1727, "step": 976 }, { "epoch": 0.3165910563836682, "grad_norm": 0.5730621218681335, "learning_rate": 9.851243443223451e-06, "loss": 0.1789, "step": 977 }, { "epoch": 0.3169151004536617, "grad_norm": 0.5981029272079468, "learning_rate": 9.850819642745816e-06, "loss": 0.1709, "step": 978 }, { "epoch": 0.3172391445236552, "grad_norm": 0.6033071279525757, "learning_rate": 9.850395248574818e-06, "loss": 0.1869, "step": 979 }, { "epoch": 0.31756318859364874, "grad_norm": 0.632879912853241, "learning_rate": 9.849970260762396e-06, "loss": 0.1974, "step": 980 }, { "epoch": 0.31788723266364227, "grad_norm": 0.6106759309768677, "learning_rate": 9.849544679360566e-06, "loss": 0.1572, "step": 981 }, { "epoch": 0.31821127673363575, "grad_norm": 0.6135556697845459, "learning_rate": 9.849118504421413e-06, "loss": 0.188, "step": 982 }, { "epoch": 0.3185353208036293, "grad_norm": 0.5774173140525818, "learning_rate": 9.8486917359971e-06, "loss": 0.1806, "step": 983 }, { "epoch": 0.3188593648736228, "grad_norm": 0.5769929885864258, "learning_rate": 9.848264374139855e-06, "loss": 0.1675, "step": 984 }, { "epoch": 0.31918340894361635, "grad_norm": 0.5752699971199036, "learning_rate": 9.847836418901988e-06, "loss": 0.1764, "step": 985 }, { "epoch": 0.31950745301360983, "grad_norm": 0.5664514899253845, "learning_rate": 9.847407870335873e-06, "loss": 0.1677, "step": 986 }, { "epoch": 0.31983149708360337, "grad_norm": 0.6190240979194641, "learning_rate": 9.846978728493961e-06, "loss": 0.1871, "step": 987 }, { "epoch": 0.3201555411535969, "grad_norm": 0.592129111289978, "learning_rate": 9.846548993428775e-06, "loss": 0.1748, "step": 988 }, { "epoch": 0.32047958522359044, "grad_norm": 0.6201058030128479, "learning_rate": 9.846118665192912e-06, "loss": 0.1767, "step": 989 }, { "epoch": 0.3208036292935839, "grad_norm": 0.6250030994415283, "learning_rate": 9.845687743839036e-06, "loss": 0.1727, "step": 990 }, { "epoch": 0.32112767336357745, "grad_norm": 0.5911518931388855, "learning_rate": 9.84525622941989e-06, "loss": 0.1792, "step": 991 }, { "epoch": 0.321451717433571, "grad_norm": 0.5523138642311096, "learning_rate": 9.84482412198829e-06, "loss": 0.1637, "step": 992 }, { "epoch": 0.32177576150356446, "grad_norm": 0.6329092383384705, "learning_rate": 9.844391421597118e-06, "loss": 0.1785, "step": 993 }, { "epoch": 0.322099805573558, "grad_norm": 0.6084178686141968, "learning_rate": 9.843958128299331e-06, "loss": 0.1555, "step": 994 }, { "epoch": 0.32242384964355153, "grad_norm": 0.6443354487419128, "learning_rate": 9.843524242147965e-06, "loss": 0.1761, "step": 995 }, { "epoch": 0.32274789371354506, "grad_norm": 0.5739508271217346, "learning_rate": 9.843089763196119e-06, "loss": 0.1739, "step": 996 }, { "epoch": 0.32307193778353854, "grad_norm": 0.5695315599441528, "learning_rate": 9.842654691496972e-06, "loss": 0.1785, "step": 997 }, { "epoch": 0.3233959818535321, "grad_norm": 0.6078848838806152, "learning_rate": 9.84221902710377e-06, "loss": 0.1765, "step": 998 }, { "epoch": 0.3237200259235256, "grad_norm": 0.5366340279579163, "learning_rate": 9.841782770069837e-06, "loss": 0.1437, "step": 999 }, { "epoch": 0.32404406999351915, "grad_norm": 0.5579332709312439, "learning_rate": 9.841345920448564e-06, "loss": 0.1588, "step": 1000 }, { "epoch": 0.3243681140635126, "grad_norm": 0.5986455082893372, "learning_rate": 9.840908478293418e-06, "loss": 0.1844, "step": 1001 }, { "epoch": 0.32469215813350616, "grad_norm": 0.5817307233810425, "learning_rate": 9.84047044365794e-06, "loss": 0.1741, "step": 1002 }, { "epoch": 0.3250162022034997, "grad_norm": 0.5661544799804688, "learning_rate": 9.840031816595737e-06, "loss": 0.1472, "step": 1003 }, { "epoch": 0.32534024627349317, "grad_norm": 0.6002435684204102, "learning_rate": 9.839592597160494e-06, "loss": 0.1773, "step": 1004 }, { "epoch": 0.3256642903434867, "grad_norm": 0.5806878209114075, "learning_rate": 9.839152785405968e-06, "loss": 0.1658, "step": 1005 }, { "epoch": 0.32598833441348024, "grad_norm": 0.5673524141311646, "learning_rate": 9.838712381385989e-06, "loss": 0.1786, "step": 1006 }, { "epoch": 0.3263123784834738, "grad_norm": 0.6064449548721313, "learning_rate": 9.838271385154457e-06, "loss": 0.1831, "step": 1007 }, { "epoch": 0.32663642255346725, "grad_norm": 0.566112756729126, "learning_rate": 9.837829796765345e-06, "loss": 0.1513, "step": 1008 }, { "epoch": 0.3269604666234608, "grad_norm": 0.547829270362854, "learning_rate": 9.8373876162727e-06, "loss": 0.1711, "step": 1009 }, { "epoch": 0.3272845106934543, "grad_norm": 0.5666298270225525, "learning_rate": 9.83694484373064e-06, "loss": 0.1784, "step": 1010 }, { "epoch": 0.3276085547634478, "grad_norm": 0.6046202778816223, "learning_rate": 9.836501479193356e-06, "loss": 0.1555, "step": 1011 }, { "epoch": 0.32793259883344134, "grad_norm": 0.5862842202186584, "learning_rate": 9.836057522715114e-06, "loss": 0.1766, "step": 1012 }, { "epoch": 0.32825664290343487, "grad_norm": 0.5938090085983276, "learning_rate": 9.835612974350247e-06, "loss": 0.1635, "step": 1013 }, { "epoch": 0.3285806869734284, "grad_norm": 0.5398661494255066, "learning_rate": 9.835167834153162e-06, "loss": 0.1575, "step": 1014 }, { "epoch": 0.3289047310434219, "grad_norm": 0.5675678253173828, "learning_rate": 9.834722102178344e-06, "loss": 0.1592, "step": 1015 }, { "epoch": 0.3292287751134154, "grad_norm": 0.6059688329696655, "learning_rate": 9.834275778480345e-06, "loss": 0.1849, "step": 1016 }, { "epoch": 0.32955281918340895, "grad_norm": 0.5854890942573547, "learning_rate": 9.83382886311379e-06, "loss": 0.1751, "step": 1017 }, { "epoch": 0.3298768632534025, "grad_norm": 0.592852771282196, "learning_rate": 9.833381356133376e-06, "loss": 0.1771, "step": 1018 }, { "epoch": 0.33020090732339596, "grad_norm": 0.6050461530685425, "learning_rate": 9.832933257593875e-06, "loss": 0.1778, "step": 1019 }, { "epoch": 0.3305249513933895, "grad_norm": 0.5728069543838501, "learning_rate": 9.832484567550131e-06, "loss": 0.1707, "step": 1020 }, { "epoch": 0.33084899546338303, "grad_norm": 0.5393828749656677, "learning_rate": 9.832035286057057e-06, "loss": 0.1545, "step": 1021 }, { "epoch": 0.3311730395333765, "grad_norm": 0.6079297661781311, "learning_rate": 9.831585413169642e-06, "loss": 0.1579, "step": 1022 }, { "epoch": 0.33149708360337005, "grad_norm": 0.5757361054420471, "learning_rate": 9.831134948942945e-06, "loss": 0.1565, "step": 1023 }, { "epoch": 0.3318211276733636, "grad_norm": 0.5705800652503967, "learning_rate": 9.8306838934321e-06, "loss": 0.1531, "step": 1024 }, { "epoch": 0.3321451717433571, "grad_norm": 0.6327515244483948, "learning_rate": 9.83023224669231e-06, "loss": 0.1742, "step": 1025 }, { "epoch": 0.3324692158133506, "grad_norm": 0.6158990859985352, "learning_rate": 9.829780008778855e-06, "loss": 0.189, "step": 1026 }, { "epoch": 0.33279325988334413, "grad_norm": 0.563691258430481, "learning_rate": 9.829327179747083e-06, "loss": 0.1844, "step": 1027 }, { "epoch": 0.33311730395333766, "grad_norm": 0.5974298715591431, "learning_rate": 9.828873759652415e-06, "loss": 0.1708, "step": 1028 }, { "epoch": 0.3334413480233312, "grad_norm": 0.5656067728996277, "learning_rate": 9.828419748550345e-06, "loss": 0.1706, "step": 1029 }, { "epoch": 0.3337653920933247, "grad_norm": 0.6119730472564697, "learning_rate": 9.827965146496441e-06, "loss": 0.168, "step": 1030 }, { "epoch": 0.3340894361633182, "grad_norm": 0.5695846080780029, "learning_rate": 9.82750995354634e-06, "loss": 0.1653, "step": 1031 }, { "epoch": 0.33441348023331174, "grad_norm": 0.6336842775344849, "learning_rate": 9.827054169755759e-06, "loss": 0.1961, "step": 1032 }, { "epoch": 0.3347375243033052, "grad_norm": 0.5704305768013, "learning_rate": 9.826597795180474e-06, "loss": 0.174, "step": 1033 }, { "epoch": 0.33506156837329876, "grad_norm": 0.6015166640281677, "learning_rate": 9.826140829876344e-06, "loss": 0.1776, "step": 1034 }, { "epoch": 0.3353856124432923, "grad_norm": 0.5965490341186523, "learning_rate": 9.825683273899298e-06, "loss": 0.1764, "step": 1035 }, { "epoch": 0.3357096565132858, "grad_norm": 0.6270467638969421, "learning_rate": 9.825225127305334e-06, "loss": 0.1738, "step": 1036 }, { "epoch": 0.3360337005832793, "grad_norm": 0.5696086287498474, "learning_rate": 9.824766390150528e-06, "loss": 0.17, "step": 1037 }, { "epoch": 0.33635774465327284, "grad_norm": 0.5731741786003113, "learning_rate": 9.824307062491022e-06, "loss": 0.1727, "step": 1038 }, { "epoch": 0.3366817887232664, "grad_norm": 0.5837844014167786, "learning_rate": 9.823847144383035e-06, "loss": 0.1857, "step": 1039 }, { "epoch": 0.3370058327932599, "grad_norm": 0.6010358929634094, "learning_rate": 9.823386635882856e-06, "loss": 0.1534, "step": 1040 }, { "epoch": 0.3373298768632534, "grad_norm": 0.5789628028869629, "learning_rate": 9.822925537046846e-06, "loss": 0.166, "step": 1041 }, { "epoch": 0.3376539209332469, "grad_norm": 0.575428307056427, "learning_rate": 9.822463847931441e-06, "loss": 0.1704, "step": 1042 }, { "epoch": 0.33797796500324045, "grad_norm": 0.5323755145072937, "learning_rate": 9.822001568593144e-06, "loss": 0.1743, "step": 1043 }, { "epoch": 0.33830200907323393, "grad_norm": 0.6056416630744934, "learning_rate": 9.821538699088538e-06, "loss": 0.1722, "step": 1044 }, { "epoch": 0.33862605314322747, "grad_norm": 0.5860357880592346, "learning_rate": 9.821075239474271e-06, "loss": 0.1607, "step": 1045 }, { "epoch": 0.338950097213221, "grad_norm": 0.59328293800354, "learning_rate": 9.820611189807065e-06, "loss": 0.1749, "step": 1046 }, { "epoch": 0.33927414128321454, "grad_norm": 0.5994810461997986, "learning_rate": 9.820146550143717e-06, "loss": 0.1937, "step": 1047 }, { "epoch": 0.339598185353208, "grad_norm": 0.6299530863761902, "learning_rate": 9.819681320541094e-06, "loss": 0.1882, "step": 1048 }, { "epoch": 0.33992222942320155, "grad_norm": 0.5794624090194702, "learning_rate": 9.819215501056136e-06, "loss": 0.1664, "step": 1049 }, { "epoch": 0.3402462734931951, "grad_norm": 0.5705100297927856, "learning_rate": 9.818749091745854e-06, "loss": 0.1631, "step": 1050 }, { "epoch": 0.3405703175631886, "grad_norm": 0.5567938685417175, "learning_rate": 9.818282092667332e-06, "loss": 0.1465, "step": 1051 }, { "epoch": 0.3408943616331821, "grad_norm": 0.6096282005310059, "learning_rate": 9.817814503877728e-06, "loss": 0.1684, "step": 1052 }, { "epoch": 0.34121840570317563, "grad_norm": 0.6146951913833618, "learning_rate": 9.817346325434266e-06, "loss": 0.1739, "step": 1053 }, { "epoch": 0.34154244977316917, "grad_norm": 0.5440871715545654, "learning_rate": 9.81687755739425e-06, "loss": 0.1598, "step": 1054 }, { "epoch": 0.34186649384316264, "grad_norm": 0.6106154918670654, "learning_rate": 9.816408199815054e-06, "loss": 0.1765, "step": 1055 }, { "epoch": 0.3421905379131562, "grad_norm": 0.6298217177391052, "learning_rate": 9.815938252754117e-06, "loss": 0.1849, "step": 1056 }, { "epoch": 0.3425145819831497, "grad_norm": 0.5941401720046997, "learning_rate": 9.815467716268964e-06, "loss": 0.1741, "step": 1057 }, { "epoch": 0.34283862605314325, "grad_norm": 0.5596537590026855, "learning_rate": 9.814996590417178e-06, "loss": 0.1651, "step": 1058 }, { "epoch": 0.3431626701231367, "grad_norm": 0.5766615867614746, "learning_rate": 9.814524875256422e-06, "loss": 0.1774, "step": 1059 }, { "epoch": 0.34348671419313026, "grad_norm": 0.5682839155197144, "learning_rate": 9.81405257084443e-06, "loss": 0.174, "step": 1060 }, { "epoch": 0.3438107582631238, "grad_norm": 0.6307297348976135, "learning_rate": 9.813579677239008e-06, "loss": 0.1799, "step": 1061 }, { "epoch": 0.34413480233311733, "grad_norm": 0.5764597058296204, "learning_rate": 9.81310619449803e-06, "loss": 0.1824, "step": 1062 }, { "epoch": 0.3444588464031108, "grad_norm": 0.5531014204025269, "learning_rate": 9.812632122679448e-06, "loss": 0.157, "step": 1063 }, { "epoch": 0.34478289047310434, "grad_norm": 0.5836609601974487, "learning_rate": 9.812157461841287e-06, "loss": 0.1764, "step": 1064 }, { "epoch": 0.3451069345430979, "grad_norm": 0.5902723073959351, "learning_rate": 9.811682212041636e-06, "loss": 0.1738, "step": 1065 }, { "epoch": 0.34543097861309136, "grad_norm": 0.629046618938446, "learning_rate": 9.811206373338664e-06, "loss": 0.1966, "step": 1066 }, { "epoch": 0.3457550226830849, "grad_norm": 0.63542640209198, "learning_rate": 9.810729945790607e-06, "loss": 0.1631, "step": 1067 }, { "epoch": 0.3460790667530784, "grad_norm": 0.6045624613761902, "learning_rate": 9.810252929455777e-06, "loss": 0.1834, "step": 1068 }, { "epoch": 0.34640311082307196, "grad_norm": 0.5919462442398071, "learning_rate": 9.809775324392554e-06, "loss": 0.1691, "step": 1069 }, { "epoch": 0.34672715489306544, "grad_norm": 0.5924056172370911, "learning_rate": 9.809297130659394e-06, "loss": 0.1504, "step": 1070 }, { "epoch": 0.34705119896305897, "grad_norm": 0.5948939323425293, "learning_rate": 9.808818348314824e-06, "loss": 0.1856, "step": 1071 }, { "epoch": 0.3473752430330525, "grad_norm": 0.575698733329773, "learning_rate": 9.80833897741744e-06, "loss": 0.1736, "step": 1072 }, { "epoch": 0.34769928710304604, "grad_norm": 0.5729328393936157, "learning_rate": 9.807859018025914e-06, "loss": 0.169, "step": 1073 }, { "epoch": 0.3480233311730395, "grad_norm": 0.5551473498344421, "learning_rate": 9.807378470198987e-06, "loss": 0.1544, "step": 1074 }, { "epoch": 0.34834737524303305, "grad_norm": 0.5853785276412964, "learning_rate": 9.806897333995475e-06, "loss": 0.1683, "step": 1075 }, { "epoch": 0.3486714193130266, "grad_norm": 0.5476290583610535, "learning_rate": 9.806415609474264e-06, "loss": 0.1627, "step": 1076 }, { "epoch": 0.34899546338302007, "grad_norm": 0.6016427278518677, "learning_rate": 9.805933296694312e-06, "loss": 0.172, "step": 1077 }, { "epoch": 0.3493195074530136, "grad_norm": 0.5918509364128113, "learning_rate": 9.80545039571465e-06, "loss": 0.1643, "step": 1078 }, { "epoch": 0.34964355152300713, "grad_norm": 0.6145140528678894, "learning_rate": 9.804966906594378e-06, "loss": 0.1733, "step": 1079 }, { "epoch": 0.34996759559300067, "grad_norm": 0.5936920642852783, "learning_rate": 9.804482829392674e-06, "loss": 0.1788, "step": 1080 }, { "epoch": 0.35029163966299415, "grad_norm": 0.5736120343208313, "learning_rate": 9.803998164168783e-06, "loss": 0.1561, "step": 1081 }, { "epoch": 0.3506156837329877, "grad_norm": 0.5619922280311584, "learning_rate": 9.803512910982022e-06, "loss": 0.1539, "step": 1082 }, { "epoch": 0.3509397278029812, "grad_norm": 0.5910178422927856, "learning_rate": 9.803027069891782e-06, "loss": 0.1886, "step": 1083 }, { "epoch": 0.35126377187297475, "grad_norm": 0.5958241820335388, "learning_rate": 9.802540640957526e-06, "loss": 0.1782, "step": 1084 }, { "epoch": 0.35158781594296823, "grad_norm": 0.5879762768745422, "learning_rate": 9.802053624238788e-06, "loss": 0.168, "step": 1085 }, { "epoch": 0.35191186001296176, "grad_norm": 0.6157310009002686, "learning_rate": 9.801566019795175e-06, "loss": 0.1748, "step": 1086 }, { "epoch": 0.3522359040829553, "grad_norm": 0.6026695370674133, "learning_rate": 9.801077827686361e-06, "loss": 0.173, "step": 1087 }, { "epoch": 0.3525599481529488, "grad_norm": 0.5551148056983948, "learning_rate": 9.800589047972101e-06, "loss": 0.1671, "step": 1088 }, { "epoch": 0.3528839922229423, "grad_norm": 0.6182596683502197, "learning_rate": 9.800099680712214e-06, "loss": 0.1984, "step": 1089 }, { "epoch": 0.35320803629293585, "grad_norm": 0.6011072397232056, "learning_rate": 9.799609725966596e-06, "loss": 0.186, "step": 1090 }, { "epoch": 0.3535320803629294, "grad_norm": 0.5565938353538513, "learning_rate": 9.799119183795209e-06, "loss": 0.1531, "step": 1091 }, { "epoch": 0.35385612443292286, "grad_norm": 0.5287664532661438, "learning_rate": 9.798628054258094e-06, "loss": 0.1679, "step": 1092 }, { "epoch": 0.3541801685029164, "grad_norm": 0.5842703580856323, "learning_rate": 9.79813633741536e-06, "loss": 0.173, "step": 1093 }, { "epoch": 0.3545042125729099, "grad_norm": 0.5793323516845703, "learning_rate": 9.79764403332719e-06, "loss": 0.1769, "step": 1094 }, { "epoch": 0.35482825664290346, "grad_norm": 0.5644661784172058, "learning_rate": 9.797151142053831e-06, "loss": 0.1684, "step": 1095 }, { "epoch": 0.35515230071289694, "grad_norm": 0.6257332563400269, "learning_rate": 9.796657663655616e-06, "loss": 0.1931, "step": 1096 }, { "epoch": 0.3554763447828905, "grad_norm": 0.5619518756866455, "learning_rate": 9.796163598192934e-06, "loss": 0.1623, "step": 1097 }, { "epoch": 0.355800388852884, "grad_norm": 0.5887840986251831, "learning_rate": 9.795668945726263e-06, "loss": 0.1617, "step": 1098 }, { "epoch": 0.3561244329228775, "grad_norm": 0.6297451853752136, "learning_rate": 9.795173706316135e-06, "loss": 0.1737, "step": 1099 }, { "epoch": 0.356448476992871, "grad_norm": 0.6247619390487671, "learning_rate": 9.794677880023166e-06, "loss": 0.1679, "step": 1100 }, { "epoch": 0.35677252106286456, "grad_norm": 0.586454451084137, "learning_rate": 9.794181466908042e-06, "loss": 0.1497, "step": 1101 }, { "epoch": 0.3570965651328581, "grad_norm": 0.6023052930831909, "learning_rate": 9.793684467031518e-06, "loss": 0.187, "step": 1102 }, { "epoch": 0.35742060920285157, "grad_norm": 0.6439177393913269, "learning_rate": 9.79318688045442e-06, "loss": 0.1871, "step": 1103 }, { "epoch": 0.3577446532728451, "grad_norm": 0.5750577449798584, "learning_rate": 9.792688707237651e-06, "loss": 0.1741, "step": 1104 }, { "epoch": 0.35806869734283864, "grad_norm": 0.574123740196228, "learning_rate": 9.792189947442182e-06, "loss": 0.1666, "step": 1105 }, { "epoch": 0.35839274141283217, "grad_norm": 0.5401131510734558, "learning_rate": 9.791690601129053e-06, "loss": 0.1553, "step": 1106 }, { "epoch": 0.35871678548282565, "grad_norm": 0.5300427675247192, "learning_rate": 9.791190668359383e-06, "loss": 0.1447, "step": 1107 }, { "epoch": 0.3590408295528192, "grad_norm": 0.6172694563865662, "learning_rate": 9.79069014919436e-06, "loss": 0.1956, "step": 1108 }, { "epoch": 0.3593648736228127, "grad_norm": 0.6034216284751892, "learning_rate": 9.790189043695235e-06, "loss": 0.1796, "step": 1109 }, { "epoch": 0.3596889176928062, "grad_norm": 0.5867305397987366, "learning_rate": 9.789687351923346e-06, "loss": 0.1755, "step": 1110 }, { "epoch": 0.36001296176279973, "grad_norm": 0.5331410765647888, "learning_rate": 9.789185073940095e-06, "loss": 0.1517, "step": 1111 }, { "epoch": 0.36033700583279327, "grad_norm": 0.5819225311279297, "learning_rate": 9.788682209806951e-06, "loss": 0.1544, "step": 1112 }, { "epoch": 0.3606610499027868, "grad_norm": 0.5827124118804932, "learning_rate": 9.788178759585463e-06, "loss": 0.1726, "step": 1113 }, { "epoch": 0.3609850939727803, "grad_norm": 0.5797318816184998, "learning_rate": 9.787674723337248e-06, "loss": 0.1692, "step": 1114 }, { "epoch": 0.3613091380427738, "grad_norm": 0.6464574933052063, "learning_rate": 9.787170101123997e-06, "loss": 0.1816, "step": 1115 }, { "epoch": 0.36163318211276735, "grad_norm": 0.5869221091270447, "learning_rate": 9.786664893007467e-06, "loss": 0.1788, "step": 1116 }, { "epoch": 0.3619572261827609, "grad_norm": 0.5861597061157227, "learning_rate": 9.786159099049494e-06, "loss": 0.1766, "step": 1117 }, { "epoch": 0.36228127025275436, "grad_norm": 0.584846019744873, "learning_rate": 9.78565271931198e-06, "loss": 0.165, "step": 1118 }, { "epoch": 0.3626053143227479, "grad_norm": 0.5789152979850769, "learning_rate": 9.785145753856901e-06, "loss": 0.1602, "step": 1119 }, { "epoch": 0.36292935839274143, "grad_norm": 0.5569080114364624, "learning_rate": 9.784638202746308e-06, "loss": 0.1514, "step": 1120 }, { "epoch": 0.3632534024627349, "grad_norm": 0.6112030744552612, "learning_rate": 9.784130066042316e-06, "loss": 0.1727, "step": 1121 }, { "epoch": 0.36357744653272844, "grad_norm": 0.5604220628738403, "learning_rate": 9.783621343807119e-06, "loss": 0.1753, "step": 1122 }, { "epoch": 0.363901490602722, "grad_norm": 0.543032169342041, "learning_rate": 9.783112036102978e-06, "loss": 0.1671, "step": 1123 }, { "epoch": 0.3642255346727155, "grad_norm": 0.6327988505363464, "learning_rate": 9.782602142992227e-06, "loss": 0.1836, "step": 1124 }, { "epoch": 0.364549578742709, "grad_norm": 0.5535159707069397, "learning_rate": 9.782091664537274e-06, "loss": 0.1622, "step": 1125 }, { "epoch": 0.3648736228127025, "grad_norm": 0.6538394689559937, "learning_rate": 9.781580600800595e-06, "loss": 0.1913, "step": 1126 }, { "epoch": 0.36519766688269606, "grad_norm": 0.568351149559021, "learning_rate": 9.781068951844739e-06, "loss": 0.1839, "step": 1127 }, { "epoch": 0.36552171095268954, "grad_norm": 0.5809906125068665, "learning_rate": 9.78055671773233e-06, "loss": 0.1791, "step": 1128 }, { "epoch": 0.3658457550226831, "grad_norm": 0.5744894742965698, "learning_rate": 9.780043898526054e-06, "loss": 0.1679, "step": 1129 }, { "epoch": 0.3661697990926766, "grad_norm": 0.5658349394798279, "learning_rate": 9.779530494288682e-06, "loss": 0.1688, "step": 1130 }, { "epoch": 0.36649384316267014, "grad_norm": 0.5783803462982178, "learning_rate": 9.779016505083047e-06, "loss": 0.1649, "step": 1131 }, { "epoch": 0.3668178872326636, "grad_norm": 0.6075659394264221, "learning_rate": 9.778501930972058e-06, "loss": 0.1846, "step": 1132 }, { "epoch": 0.36714193130265715, "grad_norm": 0.5325374603271484, "learning_rate": 9.77798677201869e-06, "loss": 0.1511, "step": 1133 }, { "epoch": 0.3674659753726507, "grad_norm": 0.5618516802787781, "learning_rate": 9.777471028285996e-06, "loss": 0.1561, "step": 1134 }, { "epoch": 0.3677900194426442, "grad_norm": 0.5520266890525818, "learning_rate": 9.776954699837097e-06, "loss": 0.1556, "step": 1135 }, { "epoch": 0.3681140635126377, "grad_norm": 0.56380695104599, "learning_rate": 9.776437786735187e-06, "loss": 0.1573, "step": 1136 }, { "epoch": 0.36843810758263124, "grad_norm": 0.5827215909957886, "learning_rate": 9.775920289043532e-06, "loss": 0.1581, "step": 1137 }, { "epoch": 0.36876215165262477, "grad_norm": 0.5803936719894409, "learning_rate": 9.775402206825468e-06, "loss": 0.1735, "step": 1138 }, { "epoch": 0.36908619572261825, "grad_norm": 0.5602681040763855, "learning_rate": 9.774883540144405e-06, "loss": 0.1567, "step": 1139 }, { "epoch": 0.3694102397926118, "grad_norm": 0.5713690519332886, "learning_rate": 9.774364289063817e-06, "loss": 0.1643, "step": 1140 }, { "epoch": 0.3697342838626053, "grad_norm": 0.5246069431304932, "learning_rate": 9.773844453647263e-06, "loss": 0.1551, "step": 1141 }, { "epoch": 0.37005832793259885, "grad_norm": 0.5214044451713562, "learning_rate": 9.773324033958363e-06, "loss": 0.1547, "step": 1142 }, { "epoch": 0.37038237200259233, "grad_norm": 0.5714074373245239, "learning_rate": 9.772803030060809e-06, "loss": 0.176, "step": 1143 }, { "epoch": 0.37070641607258586, "grad_norm": 0.5851354598999023, "learning_rate": 9.772281442018368e-06, "loss": 0.1579, "step": 1144 }, { "epoch": 0.3710304601425794, "grad_norm": 0.5684588551521301, "learning_rate": 9.771759269894878e-06, "loss": 0.1753, "step": 1145 }, { "epoch": 0.37135450421257293, "grad_norm": 0.5431103706359863, "learning_rate": 9.771236513754247e-06, "loss": 0.1529, "step": 1146 }, { "epoch": 0.3716785482825664, "grad_norm": 0.6564229726791382, "learning_rate": 9.770713173660458e-06, "loss": 0.1872, "step": 1147 }, { "epoch": 0.37200259235255995, "grad_norm": 0.5576322674751282, "learning_rate": 9.770189249677559e-06, "loss": 0.1544, "step": 1148 }, { "epoch": 0.3723266364225535, "grad_norm": 0.5599045753479004, "learning_rate": 9.769664741869676e-06, "loss": 0.1513, "step": 1149 }, { "epoch": 0.37265068049254696, "grad_norm": 0.5974574685096741, "learning_rate": 9.769139650301e-06, "loss": 0.1633, "step": 1150 }, { "epoch": 0.3729747245625405, "grad_norm": 0.5352615118026733, "learning_rate": 9.768613975035801e-06, "loss": 0.1458, "step": 1151 }, { "epoch": 0.37329876863253403, "grad_norm": 0.5834352374076843, "learning_rate": 9.768087716138417e-06, "loss": 0.1803, "step": 1152 }, { "epoch": 0.37362281270252756, "grad_norm": 0.5821603536605835, "learning_rate": 9.767560873673253e-06, "loss": 0.1678, "step": 1153 }, { "epoch": 0.37394685677252104, "grad_norm": 0.5908502340316772, "learning_rate": 9.767033447704792e-06, "loss": 0.1715, "step": 1154 }, { "epoch": 0.3742709008425146, "grad_norm": 0.5501680970191956, "learning_rate": 9.766505438297587e-06, "loss": 0.1656, "step": 1155 }, { "epoch": 0.3745949449125081, "grad_norm": 0.551476240158081, "learning_rate": 9.765976845516259e-06, "loss": 0.1651, "step": 1156 }, { "epoch": 0.37491898898250164, "grad_norm": 0.6020034551620483, "learning_rate": 9.7654476694255e-06, "loss": 0.1722, "step": 1157 }, { "epoch": 0.3752430330524951, "grad_norm": 0.5460115075111389, "learning_rate": 9.764917910090083e-06, "loss": 0.1485, "step": 1158 }, { "epoch": 0.37556707712248866, "grad_norm": 0.5414779782295227, "learning_rate": 9.764387567574842e-06, "loss": 0.1508, "step": 1159 }, { "epoch": 0.3758911211924822, "grad_norm": 0.5620276927947998, "learning_rate": 9.763856641944684e-06, "loss": 0.1762, "step": 1160 }, { "epoch": 0.37621516526247567, "grad_norm": 0.5907630920410156, "learning_rate": 9.763325133264592e-06, "loss": 0.1813, "step": 1161 }, { "epoch": 0.3765392093324692, "grad_norm": 0.626511812210083, "learning_rate": 9.762793041599616e-06, "loss": 0.1792, "step": 1162 }, { "epoch": 0.37686325340246274, "grad_norm": 0.5377705693244934, "learning_rate": 9.762260367014877e-06, "loss": 0.1559, "step": 1163 }, { "epoch": 0.3771872974724563, "grad_norm": 0.6175006628036499, "learning_rate": 9.761727109575574e-06, "loss": 0.1824, "step": 1164 }, { "epoch": 0.37751134154244975, "grad_norm": 0.5645531415939331, "learning_rate": 9.761193269346968e-06, "loss": 0.1671, "step": 1165 }, { "epoch": 0.3778353856124433, "grad_norm": 0.5879248380661011, "learning_rate": 9.7606588463944e-06, "loss": 0.1832, "step": 1166 }, { "epoch": 0.3781594296824368, "grad_norm": 0.6032773852348328, "learning_rate": 9.760123840783275e-06, "loss": 0.1725, "step": 1167 }, { "epoch": 0.37848347375243035, "grad_norm": 0.6185351014137268, "learning_rate": 9.759588252579073e-06, "loss": 0.1988, "step": 1168 }, { "epoch": 0.37880751782242383, "grad_norm": 0.5803781151771545, "learning_rate": 9.759052081847345e-06, "loss": 0.182, "step": 1169 }, { "epoch": 0.37913156189241737, "grad_norm": 0.5212180018424988, "learning_rate": 9.758515328653712e-06, "loss": 0.1479, "step": 1170 }, { "epoch": 0.3794556059624109, "grad_norm": 0.5354920625686646, "learning_rate": 9.757977993063871e-06, "loss": 0.1608, "step": 1171 }, { "epoch": 0.3797796500324044, "grad_norm": 0.6095584034919739, "learning_rate": 9.757440075143585e-06, "loss": 0.1858, "step": 1172 }, { "epoch": 0.3801036941023979, "grad_norm": 0.5753458142280579, "learning_rate": 9.756901574958688e-06, "loss": 0.1669, "step": 1173 }, { "epoch": 0.38042773817239145, "grad_norm": 0.582177996635437, "learning_rate": 9.756362492575088e-06, "loss": 0.1722, "step": 1174 }, { "epoch": 0.380751782242385, "grad_norm": 0.5849246382713318, "learning_rate": 9.755822828058765e-06, "loss": 0.1683, "step": 1175 }, { "epoch": 0.38107582631237846, "grad_norm": 0.550369918346405, "learning_rate": 9.755282581475769e-06, "loss": 0.1622, "step": 1176 }, { "epoch": 0.381399870382372, "grad_norm": 0.5477217435836792, "learning_rate": 9.754741752892219e-06, "loss": 0.1587, "step": 1177 }, { "epoch": 0.38172391445236553, "grad_norm": 0.5430402159690857, "learning_rate": 9.754200342374308e-06, "loss": 0.1674, "step": 1178 }, { "epoch": 0.38204795852235907, "grad_norm": 0.5677325129508972, "learning_rate": 9.753658349988298e-06, "loss": 0.1744, "step": 1179 }, { "epoch": 0.38237200259235254, "grad_norm": 0.5597923994064331, "learning_rate": 9.753115775800525e-06, "loss": 0.1639, "step": 1180 }, { "epoch": 0.3826960466623461, "grad_norm": 0.5822575688362122, "learning_rate": 9.752572619877397e-06, "loss": 0.1478, "step": 1181 }, { "epoch": 0.3830200907323396, "grad_norm": 0.5696035623550415, "learning_rate": 9.752028882285385e-06, "loss": 0.166, "step": 1182 }, { "epoch": 0.3833441348023331, "grad_norm": 0.5821458697319031, "learning_rate": 9.751484563091043e-06, "loss": 0.1703, "step": 1183 }, { "epoch": 0.3836681788723266, "grad_norm": 0.5574982762336731, "learning_rate": 9.75093966236099e-06, "loss": 0.167, "step": 1184 }, { "epoch": 0.38399222294232016, "grad_norm": 0.5701562762260437, "learning_rate": 9.750394180161913e-06, "loss": 0.1589, "step": 1185 }, { "epoch": 0.3843162670123137, "grad_norm": 0.583560049533844, "learning_rate": 9.749848116560576e-06, "loss": 0.1691, "step": 1186 }, { "epoch": 0.3846403110823072, "grad_norm": 0.613233745098114, "learning_rate": 9.749301471623813e-06, "loss": 0.1868, "step": 1187 }, { "epoch": 0.3849643551523007, "grad_norm": 0.5886712074279785, "learning_rate": 9.748754245418526e-06, "loss": 0.1795, "step": 1188 }, { "epoch": 0.38528839922229424, "grad_norm": 0.5409032106399536, "learning_rate": 9.74820643801169e-06, "loss": 0.1618, "step": 1189 }, { "epoch": 0.3856124432922878, "grad_norm": 0.5986441373825073, "learning_rate": 9.747658049470353e-06, "loss": 0.1643, "step": 1190 }, { "epoch": 0.38593648736228126, "grad_norm": 0.5554662346839905, "learning_rate": 9.74710907986163e-06, "loss": 0.1652, "step": 1191 }, { "epoch": 0.3862605314322748, "grad_norm": 0.578743040561676, "learning_rate": 9.746559529252713e-06, "loss": 0.1698, "step": 1192 }, { "epoch": 0.3865845755022683, "grad_norm": 0.6013315916061401, "learning_rate": 9.74600939771086e-06, "loss": 0.1797, "step": 1193 }, { "epoch": 0.3869086195722618, "grad_norm": 0.5931842923164368, "learning_rate": 9.745458685303402e-06, "loss": 0.1741, "step": 1194 }, { "epoch": 0.38723266364225534, "grad_norm": 0.6375784873962402, "learning_rate": 9.74490739209774e-06, "loss": 0.1929, "step": 1195 }, { "epoch": 0.38755670771224887, "grad_norm": 0.6102494597434998, "learning_rate": 9.744355518161346e-06, "loss": 0.1677, "step": 1196 }, { "epoch": 0.3878807517822424, "grad_norm": 0.5642244219779968, "learning_rate": 9.743803063561767e-06, "loss": 0.176, "step": 1197 }, { "epoch": 0.3882047958522359, "grad_norm": 0.5792483687400818, "learning_rate": 9.743250028366615e-06, "loss": 0.1693, "step": 1198 }, { "epoch": 0.3885288399222294, "grad_norm": 0.5659141540527344, "learning_rate": 9.742696412643579e-06, "loss": 0.1578, "step": 1199 }, { "epoch": 0.38885288399222295, "grad_norm": 0.5681812763214111, "learning_rate": 9.742142216460416e-06, "loss": 0.1593, "step": 1200 }, { "epoch": 0.3891769280622165, "grad_norm": 0.5868738889694214, "learning_rate": 9.741587439884951e-06, "loss": 0.1765, "step": 1201 }, { "epoch": 0.38950097213220997, "grad_norm": 0.5223450064659119, "learning_rate": 9.741032082985086e-06, "loss": 0.1612, "step": 1202 }, { "epoch": 0.3898250162022035, "grad_norm": 0.6357806921005249, "learning_rate": 9.740476145828792e-06, "loss": 0.1856, "step": 1203 }, { "epoch": 0.39014906027219703, "grad_norm": 0.5512497425079346, "learning_rate": 9.739919628484108e-06, "loss": 0.1589, "step": 1204 }, { "epoch": 0.3904731043421905, "grad_norm": 0.5599070191383362, "learning_rate": 9.739362531019149e-06, "loss": 0.1663, "step": 1205 }, { "epoch": 0.39079714841218405, "grad_norm": 0.5793858766555786, "learning_rate": 9.738804853502095e-06, "loss": 0.1649, "step": 1206 }, { "epoch": 0.3911211924821776, "grad_norm": 0.5827155709266663, "learning_rate": 9.738246596001203e-06, "loss": 0.1746, "step": 1207 }, { "epoch": 0.3914452365521711, "grad_norm": 0.6015954613685608, "learning_rate": 9.737687758584797e-06, "loss": 0.1794, "step": 1208 }, { "epoch": 0.3917692806221646, "grad_norm": 0.5880870819091797, "learning_rate": 9.737128341321274e-06, "loss": 0.183, "step": 1209 }, { "epoch": 0.39209332469215813, "grad_norm": 0.5348117351531982, "learning_rate": 9.736568344279101e-06, "loss": 0.1472, "step": 1210 }, { "epoch": 0.39241736876215166, "grad_norm": 0.5760438442230225, "learning_rate": 9.736007767526817e-06, "loss": 0.16, "step": 1211 }, { "epoch": 0.3927414128321452, "grad_norm": 0.5510878562927246, "learning_rate": 9.735446611133029e-06, "loss": 0.1674, "step": 1212 }, { "epoch": 0.3930654569021387, "grad_norm": 0.609878420829773, "learning_rate": 9.73488487516642e-06, "loss": 0.1895, "step": 1213 }, { "epoch": 0.3933895009721322, "grad_norm": 0.6492441296577454, "learning_rate": 9.734322559695737e-06, "loss": 0.1774, "step": 1214 }, { "epoch": 0.39371354504212575, "grad_norm": 0.5391560792922974, "learning_rate": 9.733759664789807e-06, "loss": 0.1611, "step": 1215 }, { "epoch": 0.3940375891121192, "grad_norm": 0.5455146431922913, "learning_rate": 9.73319619051752e-06, "loss": 0.1722, "step": 1216 }, { "epoch": 0.39436163318211276, "grad_norm": 0.595698356628418, "learning_rate": 9.732632136947838e-06, "loss": 0.1833, "step": 1217 }, { "epoch": 0.3946856772521063, "grad_norm": 0.563563346862793, "learning_rate": 9.7320675041498e-06, "loss": 0.159, "step": 1218 }, { "epoch": 0.3950097213220998, "grad_norm": 0.5537634491920471, "learning_rate": 9.73150229219251e-06, "loss": 0.1598, "step": 1219 }, { "epoch": 0.3953337653920933, "grad_norm": 0.5871871113777161, "learning_rate": 9.730936501145141e-06, "loss": 0.1868, "step": 1220 }, { "epoch": 0.39565780946208684, "grad_norm": 0.526485800743103, "learning_rate": 9.730370131076945e-06, "loss": 0.146, "step": 1221 }, { "epoch": 0.3959818535320804, "grad_norm": 0.5196928381919861, "learning_rate": 9.72980318205724e-06, "loss": 0.1515, "step": 1222 }, { "epoch": 0.3963058976020739, "grad_norm": 0.5631489157676697, "learning_rate": 9.729235654155411e-06, "loss": 0.1723, "step": 1223 }, { "epoch": 0.3966299416720674, "grad_norm": 0.5770860910415649, "learning_rate": 9.728667547440923e-06, "loss": 0.1733, "step": 1224 }, { "epoch": 0.3969539857420609, "grad_norm": 0.5769538879394531, "learning_rate": 9.728098861983301e-06, "loss": 0.1568, "step": 1225 }, { "epoch": 0.39727802981205446, "grad_norm": 0.5829085111618042, "learning_rate": 9.727529597852152e-06, "loss": 0.166, "step": 1226 }, { "epoch": 0.39760207388204793, "grad_norm": 0.6667234897613525, "learning_rate": 9.726959755117146e-06, "loss": 0.191, "step": 1227 }, { "epoch": 0.39792611795204147, "grad_norm": 0.5731337666511536, "learning_rate": 9.726389333848026e-06, "loss": 0.1598, "step": 1228 }, { "epoch": 0.398250162022035, "grad_norm": 0.5425414443016052, "learning_rate": 9.725818334114608e-06, "loss": 0.1528, "step": 1229 }, { "epoch": 0.39857420609202854, "grad_norm": 0.5781131982803345, "learning_rate": 9.725246755986774e-06, "loss": 0.1803, "step": 1230 }, { "epoch": 0.398898250162022, "grad_norm": 0.5605859756469727, "learning_rate": 9.724674599534481e-06, "loss": 0.1525, "step": 1231 }, { "epoch": 0.39922229423201555, "grad_norm": 0.5850882530212402, "learning_rate": 9.724101864827756e-06, "loss": 0.1627, "step": 1232 }, { "epoch": 0.3995463383020091, "grad_norm": 0.5748516321182251, "learning_rate": 9.723528551936695e-06, "loss": 0.1667, "step": 1233 }, { "epoch": 0.3998703823720026, "grad_norm": 0.5731942653656006, "learning_rate": 9.722954660931468e-06, "loss": 0.1682, "step": 1234 }, { "epoch": 0.4001944264419961, "grad_norm": 0.560596227645874, "learning_rate": 9.72238019188231e-06, "loss": 0.1582, "step": 1235 }, { "epoch": 0.40051847051198963, "grad_norm": 0.6035265922546387, "learning_rate": 9.721805144859533e-06, "loss": 0.1714, "step": 1236 }, { "epoch": 0.40084251458198317, "grad_norm": 0.5419968962669373, "learning_rate": 9.72122951993352e-06, "loss": 0.1735, "step": 1237 }, { "epoch": 0.40116655865197665, "grad_norm": 0.56743985414505, "learning_rate": 9.720653317174716e-06, "loss": 0.1717, "step": 1238 }, { "epoch": 0.4014906027219702, "grad_norm": 0.5820173025131226, "learning_rate": 9.720076536653646e-06, "loss": 0.1817, "step": 1239 }, { "epoch": 0.4018146467919637, "grad_norm": 0.5778538584709167, "learning_rate": 9.7194991784409e-06, "loss": 0.1694, "step": 1240 }, { "epoch": 0.40213869086195725, "grad_norm": 0.58612060546875, "learning_rate": 9.718921242607145e-06, "loss": 0.1799, "step": 1241 }, { "epoch": 0.4024627349319507, "grad_norm": 0.5183675289154053, "learning_rate": 9.718342729223112e-06, "loss": 0.1576, "step": 1242 }, { "epoch": 0.40278677900194426, "grad_norm": 0.5352326035499573, "learning_rate": 9.717763638359607e-06, "loss": 0.1736, "step": 1243 }, { "epoch": 0.4031108230719378, "grad_norm": 0.5465376973152161, "learning_rate": 9.717183970087503e-06, "loss": 0.1596, "step": 1244 }, { "epoch": 0.40343486714193133, "grad_norm": 0.5443048477172852, "learning_rate": 9.716603724477748e-06, "loss": 0.1672, "step": 1245 }, { "epoch": 0.4037589112119248, "grad_norm": 0.5582151412963867, "learning_rate": 9.716022901601356e-06, "loss": 0.1662, "step": 1246 }, { "epoch": 0.40408295528191834, "grad_norm": 0.5699895620346069, "learning_rate": 9.715441501529417e-06, "loss": 0.1691, "step": 1247 }, { "epoch": 0.4044069993519119, "grad_norm": 0.5725207924842834, "learning_rate": 9.714859524333086e-06, "loss": 0.1641, "step": 1248 }, { "epoch": 0.40473104342190536, "grad_norm": 0.5575113892555237, "learning_rate": 9.714276970083594e-06, "loss": 0.1668, "step": 1249 }, { "epoch": 0.4050550874918989, "grad_norm": 0.5504116415977478, "learning_rate": 9.713693838852236e-06, "loss": 0.1576, "step": 1250 }, { "epoch": 0.4053791315618924, "grad_norm": 0.606964111328125, "learning_rate": 9.713110130710387e-06, "loss": 0.1731, "step": 1251 }, { "epoch": 0.40570317563188596, "grad_norm": 0.5411376953125, "learning_rate": 9.712525845729483e-06, "loss": 0.1648, "step": 1252 }, { "epoch": 0.40602721970187944, "grad_norm": 0.576723575592041, "learning_rate": 9.711940983981036e-06, "loss": 0.1628, "step": 1253 }, { "epoch": 0.406351263771873, "grad_norm": 0.5311942100524902, "learning_rate": 9.71135554553663e-06, "loss": 0.1584, "step": 1254 }, { "epoch": 0.4066753078418665, "grad_norm": 0.5140619277954102, "learning_rate": 9.710769530467912e-06, "loss": 0.1626, "step": 1255 }, { "epoch": 0.40699935191186, "grad_norm": 0.5483884215354919, "learning_rate": 9.710182938846609e-06, "loss": 0.1579, "step": 1256 }, { "epoch": 0.4073233959818535, "grad_norm": 0.5419959425926208, "learning_rate": 9.70959577074451e-06, "loss": 0.1613, "step": 1257 }, { "epoch": 0.40764744005184705, "grad_norm": 0.6026223301887512, "learning_rate": 9.709008026233483e-06, "loss": 0.1801, "step": 1258 }, { "epoch": 0.4079714841218406, "grad_norm": 0.5318058133125305, "learning_rate": 9.708419705385461e-06, "loss": 0.1456, "step": 1259 }, { "epoch": 0.40829552819183407, "grad_norm": 0.5593992471694946, "learning_rate": 9.707830808272446e-06, "loss": 0.1605, "step": 1260 }, { "epoch": 0.4086195722618276, "grad_norm": 0.5445616841316223, "learning_rate": 9.707241334966517e-06, "loss": 0.1635, "step": 1261 }, { "epoch": 0.40894361633182114, "grad_norm": 0.5666195750236511, "learning_rate": 9.706651285539817e-06, "loss": 0.1587, "step": 1262 }, { "epoch": 0.40926766040181467, "grad_norm": 0.6224789619445801, "learning_rate": 9.706060660064565e-06, "loss": 0.1945, "step": 1263 }, { "epoch": 0.40959170447180815, "grad_norm": 0.5528307557106018, "learning_rate": 9.705469458613046e-06, "loss": 0.1605, "step": 1264 }, { "epoch": 0.4099157485418017, "grad_norm": 0.582804799079895, "learning_rate": 9.704877681257616e-06, "loss": 0.1705, "step": 1265 }, { "epoch": 0.4102397926117952, "grad_norm": 0.5233073234558105, "learning_rate": 9.704285328070706e-06, "loss": 0.1484, "step": 1266 }, { "epoch": 0.4105638366817887, "grad_norm": 0.546563982963562, "learning_rate": 9.703692399124813e-06, "loss": 0.1607, "step": 1267 }, { "epoch": 0.41088788075178223, "grad_norm": 0.5636500716209412, "learning_rate": 9.703098894492506e-06, "loss": 0.1601, "step": 1268 }, { "epoch": 0.41121192482177576, "grad_norm": 0.5653511881828308, "learning_rate": 9.702504814246423e-06, "loss": 0.174, "step": 1269 }, { "epoch": 0.4115359688917693, "grad_norm": 0.555579423904419, "learning_rate": 9.701910158459275e-06, "loss": 0.1686, "step": 1270 }, { "epoch": 0.4118600129617628, "grad_norm": 0.5712722539901733, "learning_rate": 9.701314927203841e-06, "loss": 0.1605, "step": 1271 }, { "epoch": 0.4121840570317563, "grad_norm": 0.5150348544120789, "learning_rate": 9.700719120552972e-06, "loss": 0.1507, "step": 1272 }, { "epoch": 0.41250810110174985, "grad_norm": 0.5115662217140198, "learning_rate": 9.70012273857959e-06, "loss": 0.1543, "step": 1273 }, { "epoch": 0.4128321451717434, "grad_norm": 0.5579255223274231, "learning_rate": 9.699525781356685e-06, "loss": 0.1775, "step": 1274 }, { "epoch": 0.41315618924173686, "grad_norm": 0.5776585936546326, "learning_rate": 9.69892824895732e-06, "loss": 0.1683, "step": 1275 }, { "epoch": 0.4134802333117304, "grad_norm": 0.6621776819229126, "learning_rate": 9.698330141454626e-06, "loss": 0.1793, "step": 1276 }, { "epoch": 0.41380427738172393, "grad_norm": 0.5133375525474548, "learning_rate": 9.697731458921806e-06, "loss": 0.1442, "step": 1277 }, { "epoch": 0.4141283214517174, "grad_norm": 0.5586656332015991, "learning_rate": 9.697132201432133e-06, "loss": 0.147, "step": 1278 }, { "epoch": 0.41445236552171094, "grad_norm": 0.5529789328575134, "learning_rate": 9.69653236905895e-06, "loss": 0.1671, "step": 1279 }, { "epoch": 0.4147764095917045, "grad_norm": 0.6018351912498474, "learning_rate": 9.695931961875673e-06, "loss": 0.1849, "step": 1280 }, { "epoch": 0.415100453661698, "grad_norm": 0.5406911373138428, "learning_rate": 9.695330979955782e-06, "loss": 0.1519, "step": 1281 }, { "epoch": 0.4154244977316915, "grad_norm": 0.5286436676979065, "learning_rate": 9.694729423372834e-06, "loss": 0.1592, "step": 1282 }, { "epoch": 0.415748541801685, "grad_norm": 0.5377227067947388, "learning_rate": 9.694127292200452e-06, "loss": 0.1658, "step": 1283 }, { "epoch": 0.41607258587167856, "grad_norm": 0.5881343483924866, "learning_rate": 9.693524586512333e-06, "loss": 0.1728, "step": 1284 }, { "epoch": 0.4163966299416721, "grad_norm": 0.5579766035079956, "learning_rate": 9.692921306382241e-06, "loss": 0.1643, "step": 1285 }, { "epoch": 0.41672067401166557, "grad_norm": 0.5597804188728333, "learning_rate": 9.692317451884012e-06, "loss": 0.1746, "step": 1286 }, { "epoch": 0.4170447180816591, "grad_norm": 0.5385024547576904, "learning_rate": 9.691713023091554e-06, "loss": 0.1738, "step": 1287 }, { "epoch": 0.41736876215165264, "grad_norm": 0.5481756329536438, "learning_rate": 9.69110802007884e-06, "loss": 0.1706, "step": 1288 }, { "epoch": 0.4176928062216461, "grad_norm": 0.5710784196853638, "learning_rate": 9.690502442919917e-06, "loss": 0.1636, "step": 1289 }, { "epoch": 0.41801685029163965, "grad_norm": 0.5947467088699341, "learning_rate": 9.689896291688903e-06, "loss": 0.1819, "step": 1290 }, { "epoch": 0.4183408943616332, "grad_norm": 0.5301339626312256, "learning_rate": 9.689289566459986e-06, "loss": 0.1426, "step": 1291 }, { "epoch": 0.4186649384316267, "grad_norm": 0.606670081615448, "learning_rate": 9.688682267307418e-06, "loss": 0.1724, "step": 1292 }, { "epoch": 0.4189889825016202, "grad_norm": 0.5852813124656677, "learning_rate": 9.688074394305535e-06, "loss": 0.1906, "step": 1293 }, { "epoch": 0.41931302657161373, "grad_norm": 0.5888477563858032, "learning_rate": 9.687465947528727e-06, "loss": 0.1713, "step": 1294 }, { "epoch": 0.41963707064160727, "grad_norm": 0.5683367252349854, "learning_rate": 9.686856927051467e-06, "loss": 0.1661, "step": 1295 }, { "epoch": 0.4199611147116008, "grad_norm": 0.5632486939430237, "learning_rate": 9.686247332948291e-06, "loss": 0.168, "step": 1296 }, { "epoch": 0.4202851587815943, "grad_norm": 0.5726707577705383, "learning_rate": 9.685637165293808e-06, "loss": 0.1651, "step": 1297 }, { "epoch": 0.4206092028515878, "grad_norm": 0.5684093832969666, "learning_rate": 9.685026424162696e-06, "loss": 0.1693, "step": 1298 }, { "epoch": 0.42093324692158135, "grad_norm": 0.6308449506759644, "learning_rate": 9.684415109629705e-06, "loss": 0.1767, "step": 1299 }, { "epoch": 0.42125729099157483, "grad_norm": 0.5411693453788757, "learning_rate": 9.683803221769651e-06, "loss": 0.1509, "step": 1300 }, { "epoch": 0.42158133506156836, "grad_norm": 0.5787219405174255, "learning_rate": 9.683190760657428e-06, "loss": 0.1796, "step": 1301 }, { "epoch": 0.4219053791315619, "grad_norm": 0.5470903515815735, "learning_rate": 9.682577726367993e-06, "loss": 0.1657, "step": 1302 }, { "epoch": 0.42222942320155543, "grad_norm": 0.547219455242157, "learning_rate": 9.681964118976372e-06, "loss": 0.1653, "step": 1303 }, { "epoch": 0.4225534672715489, "grad_norm": 0.5840909481048584, "learning_rate": 9.681349938557672e-06, "loss": 0.1736, "step": 1304 }, { "epoch": 0.42287751134154244, "grad_norm": 0.5541393756866455, "learning_rate": 9.680735185187055e-06, "loss": 0.1623, "step": 1305 }, { "epoch": 0.423201555411536, "grad_norm": 0.5564125776290894, "learning_rate": 9.680119858939765e-06, "loss": 0.1664, "step": 1306 }, { "epoch": 0.4235255994815295, "grad_norm": 0.5307673215866089, "learning_rate": 9.679503959891112e-06, "loss": 0.1521, "step": 1307 }, { "epoch": 0.423849643551523, "grad_norm": 0.5410294532775879, "learning_rate": 9.678887488116476e-06, "loss": 0.1498, "step": 1308 }, { "epoch": 0.4241736876215165, "grad_norm": 0.5974996089935303, "learning_rate": 9.678270443691307e-06, "loss": 0.1701, "step": 1309 }, { "epoch": 0.42449773169151006, "grad_norm": 0.5369963049888611, "learning_rate": 9.677652826691122e-06, "loss": 0.1584, "step": 1310 }, { "epoch": 0.42482177576150354, "grad_norm": 0.583291232585907, "learning_rate": 9.677034637191516e-06, "loss": 0.1696, "step": 1311 }, { "epoch": 0.4251458198314971, "grad_norm": 0.6203243732452393, "learning_rate": 9.676415875268147e-06, "loss": 0.1773, "step": 1312 }, { "epoch": 0.4254698639014906, "grad_norm": 0.6253352165222168, "learning_rate": 9.675796540996747e-06, "loss": 0.165, "step": 1313 }, { "epoch": 0.42579390797148414, "grad_norm": 0.5516331791877747, "learning_rate": 9.675176634453117e-06, "loss": 0.1637, "step": 1314 }, { "epoch": 0.4261179520414776, "grad_norm": 0.5999361872673035, "learning_rate": 9.674556155713125e-06, "loss": 0.1729, "step": 1315 }, { "epoch": 0.42644199611147116, "grad_norm": 0.589998185634613, "learning_rate": 9.67393510485271e-06, "loss": 0.1827, "step": 1316 }, { "epoch": 0.4267660401814647, "grad_norm": 0.5959166884422302, "learning_rate": 9.673313481947888e-06, "loss": 0.1542, "step": 1317 }, { "epoch": 0.4270900842514582, "grad_norm": 0.5973237752914429, "learning_rate": 9.672691287074736e-06, "loss": 0.165, "step": 1318 }, { "epoch": 0.4274141283214517, "grad_norm": 0.5623382329940796, "learning_rate": 9.672068520309408e-06, "loss": 0.1741, "step": 1319 }, { "epoch": 0.42773817239144524, "grad_norm": 0.5273089408874512, "learning_rate": 9.67144518172812e-06, "loss": 0.1554, "step": 1320 }, { "epoch": 0.42806221646143877, "grad_norm": 0.5496593713760376, "learning_rate": 9.670821271407164e-06, "loss": 0.1631, "step": 1321 }, { "epoch": 0.42838626053143225, "grad_norm": 0.5944601893424988, "learning_rate": 9.670196789422903e-06, "loss": 0.164, "step": 1322 }, { "epoch": 0.4287103046014258, "grad_norm": 0.5003495812416077, "learning_rate": 9.669571735851766e-06, "loss": 0.146, "step": 1323 }, { "epoch": 0.4290343486714193, "grad_norm": 0.5393743515014648, "learning_rate": 9.668946110770255e-06, "loss": 0.164, "step": 1324 }, { "epoch": 0.42935839274141285, "grad_norm": 0.5703218579292297, "learning_rate": 9.668319914254936e-06, "loss": 0.1682, "step": 1325 }, { "epoch": 0.42968243681140633, "grad_norm": 0.5486487746238708, "learning_rate": 9.667693146382453e-06, "loss": 0.1702, "step": 1326 }, { "epoch": 0.43000648088139987, "grad_norm": 0.5758382081985474, "learning_rate": 9.667065807229516e-06, "loss": 0.1617, "step": 1327 }, { "epoch": 0.4303305249513934, "grad_norm": 0.5864109992980957, "learning_rate": 9.666437896872905e-06, "loss": 0.1724, "step": 1328 }, { "epoch": 0.43065456902138693, "grad_norm": 0.5836167931556702, "learning_rate": 9.665809415389471e-06, "loss": 0.1737, "step": 1329 }, { "epoch": 0.4309786130913804, "grad_norm": 0.5557619333267212, "learning_rate": 9.665180362856132e-06, "loss": 0.1607, "step": 1330 }, { "epoch": 0.43130265716137395, "grad_norm": 0.5266228914260864, "learning_rate": 9.664550739349878e-06, "loss": 0.1708, "step": 1331 }, { "epoch": 0.4316267012313675, "grad_norm": 0.5815237164497375, "learning_rate": 9.663920544947771e-06, "loss": 0.1395, "step": 1332 }, { "epoch": 0.43195074530136096, "grad_norm": 0.561438262462616, "learning_rate": 9.663289779726941e-06, "loss": 0.161, "step": 1333 }, { "epoch": 0.4322747893713545, "grad_norm": 0.6023616790771484, "learning_rate": 9.662658443764583e-06, "loss": 0.1806, "step": 1334 }, { "epoch": 0.43259883344134803, "grad_norm": 0.5528768301010132, "learning_rate": 9.662026537137972e-06, "loss": 0.1591, "step": 1335 }, { "epoch": 0.43292287751134156, "grad_norm": 0.5628527998924255, "learning_rate": 9.661394059924444e-06, "loss": 0.1647, "step": 1336 }, { "epoch": 0.43324692158133504, "grad_norm": 0.6160968542098999, "learning_rate": 9.660761012201409e-06, "loss": 0.1623, "step": 1337 }, { "epoch": 0.4335709656513286, "grad_norm": 0.5395432114601135, "learning_rate": 9.660127394046346e-06, "loss": 0.1612, "step": 1338 }, { "epoch": 0.4338950097213221, "grad_norm": 0.5769019722938538, "learning_rate": 9.659493205536802e-06, "loss": 0.1741, "step": 1339 }, { "epoch": 0.43421905379131565, "grad_norm": 0.5266570448875427, "learning_rate": 9.6588584467504e-06, "loss": 0.1574, "step": 1340 }, { "epoch": 0.4345430978613091, "grad_norm": 0.5223969221115112, "learning_rate": 9.658223117764822e-06, "loss": 0.1443, "step": 1341 }, { "epoch": 0.43486714193130266, "grad_norm": 0.5220791101455688, "learning_rate": 9.657587218657832e-06, "loss": 0.1583, "step": 1342 }, { "epoch": 0.4351911860012962, "grad_norm": 0.5109962224960327, "learning_rate": 9.656950749507253e-06, "loss": 0.1545, "step": 1343 }, { "epoch": 0.43551523007128967, "grad_norm": 0.5817949771881104, "learning_rate": 9.656313710390986e-06, "loss": 0.1683, "step": 1344 }, { "epoch": 0.4358392741412832, "grad_norm": 0.5956231355667114, "learning_rate": 9.655676101386999e-06, "loss": 0.1827, "step": 1345 }, { "epoch": 0.43616331821127674, "grad_norm": 0.552972137928009, "learning_rate": 9.655037922573325e-06, "loss": 0.1603, "step": 1346 }, { "epoch": 0.4364873622812703, "grad_norm": 0.5825382471084595, "learning_rate": 9.654399174028077e-06, "loss": 0.1782, "step": 1347 }, { "epoch": 0.43681140635126375, "grad_norm": 0.564332902431488, "learning_rate": 9.653759855829425e-06, "loss": 0.1629, "step": 1348 }, { "epoch": 0.4371354504212573, "grad_norm": 0.6328169703483582, "learning_rate": 9.65311996805562e-06, "loss": 0.1889, "step": 1349 }, { "epoch": 0.4374594944912508, "grad_norm": 0.5259972214698792, "learning_rate": 9.652479510784976e-06, "loss": 0.1588, "step": 1350 }, { "epoch": 0.43778353856124436, "grad_norm": 0.5787382125854492, "learning_rate": 9.651838484095879e-06, "loss": 0.1721, "step": 1351 }, { "epoch": 0.43810758263123784, "grad_norm": 0.5727863907814026, "learning_rate": 9.651196888066787e-06, "loss": 0.1837, "step": 1352 }, { "epoch": 0.43843162670123137, "grad_norm": 0.5699387192726135, "learning_rate": 9.65055472277622e-06, "loss": 0.1824, "step": 1353 }, { "epoch": 0.4387556707712249, "grad_norm": 0.5503055453300476, "learning_rate": 9.649911988302778e-06, "loss": 0.1701, "step": 1354 }, { "epoch": 0.4390797148412184, "grad_norm": 0.5899806022644043, "learning_rate": 9.649268684725122e-06, "loss": 0.1668, "step": 1355 }, { "epoch": 0.4394037589112119, "grad_norm": 0.5935372710227966, "learning_rate": 9.64862481212199e-06, "loss": 0.1881, "step": 1356 }, { "epoch": 0.43972780298120545, "grad_norm": 0.5293922424316406, "learning_rate": 9.64798037057218e-06, "loss": 0.1607, "step": 1357 }, { "epoch": 0.440051847051199, "grad_norm": 0.5702993273735046, "learning_rate": 9.64733536015457e-06, "loss": 0.1626, "step": 1358 }, { "epoch": 0.44037589112119246, "grad_norm": 0.5957580804824829, "learning_rate": 9.646689780948101e-06, "loss": 0.1715, "step": 1359 }, { "epoch": 0.440699935191186, "grad_norm": 0.6047354936599731, "learning_rate": 9.646043633031786e-06, "loss": 0.185, "step": 1360 }, { "epoch": 0.44102397926117953, "grad_norm": 0.5889037251472473, "learning_rate": 9.645396916484709e-06, "loss": 0.1695, "step": 1361 }, { "epoch": 0.44134802333117307, "grad_norm": 0.5471667051315308, "learning_rate": 9.64474963138602e-06, "loss": 0.1498, "step": 1362 }, { "epoch": 0.44167206740116655, "grad_norm": 0.5360984206199646, "learning_rate": 9.644101777814939e-06, "loss": 0.1655, "step": 1363 }, { "epoch": 0.4419961114711601, "grad_norm": 0.563440203666687, "learning_rate": 9.64345335585076e-06, "loss": 0.1502, "step": 1364 }, { "epoch": 0.4423201555411536, "grad_norm": 0.5638374090194702, "learning_rate": 9.642804365572841e-06, "loss": 0.1567, "step": 1365 }, { "epoch": 0.4426441996111471, "grad_norm": 0.5967015027999878, "learning_rate": 9.642154807060617e-06, "loss": 0.1721, "step": 1366 }, { "epoch": 0.4429682436811406, "grad_norm": 0.5432306528091431, "learning_rate": 9.64150468039358e-06, "loss": 0.1659, "step": 1367 }, { "epoch": 0.44329228775113416, "grad_norm": 0.5840265154838562, "learning_rate": 9.640853985651306e-06, "loss": 0.1726, "step": 1368 }, { "epoch": 0.4436163318211277, "grad_norm": 0.5982809066772461, "learning_rate": 9.64020272291343e-06, "loss": 0.1846, "step": 1369 }, { "epoch": 0.4439403758911212, "grad_norm": 0.5622856616973877, "learning_rate": 9.639550892259663e-06, "loss": 0.1789, "step": 1370 }, { "epoch": 0.4442644199611147, "grad_norm": 0.580660343170166, "learning_rate": 9.638898493769779e-06, "loss": 0.1592, "step": 1371 }, { "epoch": 0.44458846403110824, "grad_norm": 0.5534035563468933, "learning_rate": 9.638245527523629e-06, "loss": 0.153, "step": 1372 }, { "epoch": 0.4449125081011017, "grad_norm": 0.5400548577308655, "learning_rate": 9.637591993601127e-06, "loss": 0.1607, "step": 1373 }, { "epoch": 0.44523655217109526, "grad_norm": 0.5466241836547852, "learning_rate": 9.636937892082261e-06, "loss": 0.1647, "step": 1374 }, { "epoch": 0.4455605962410888, "grad_norm": 0.5731942057609558, "learning_rate": 9.636283223047087e-06, "loss": 0.1773, "step": 1375 }, { "epoch": 0.4458846403110823, "grad_norm": 0.5324536561965942, "learning_rate": 9.635627986575727e-06, "loss": 0.1694, "step": 1376 }, { "epoch": 0.4462086843810758, "grad_norm": 0.5857275128364563, "learning_rate": 9.634972182748378e-06, "loss": 0.16, "step": 1377 }, { "epoch": 0.44653272845106934, "grad_norm": 0.5256370306015015, "learning_rate": 9.634315811645305e-06, "loss": 0.148, "step": 1378 }, { "epoch": 0.4468567725210629, "grad_norm": 0.5678802132606506, "learning_rate": 9.633658873346841e-06, "loss": 0.1726, "step": 1379 }, { "epoch": 0.4471808165910564, "grad_norm": 0.5549466609954834, "learning_rate": 9.633001367933387e-06, "loss": 0.1682, "step": 1380 }, { "epoch": 0.4475048606610499, "grad_norm": 0.5725024342536926, "learning_rate": 9.632343295485416e-06, "loss": 0.1621, "step": 1381 }, { "epoch": 0.4478289047310434, "grad_norm": 0.5248334407806396, "learning_rate": 9.631684656083472e-06, "loss": 0.1604, "step": 1382 }, { "epoch": 0.44815294880103695, "grad_norm": 0.5629571676254272, "learning_rate": 9.631025449808163e-06, "loss": 0.1697, "step": 1383 }, { "epoch": 0.44847699287103043, "grad_norm": 0.5652890205383301, "learning_rate": 9.63036567674017e-06, "loss": 0.1517, "step": 1384 }, { "epoch": 0.44880103694102397, "grad_norm": 0.6064323782920837, "learning_rate": 9.629705336960244e-06, "loss": 0.1673, "step": 1385 }, { "epoch": 0.4491250810110175, "grad_norm": 0.55648273229599, "learning_rate": 9.629044430549206e-06, "loss": 0.1584, "step": 1386 }, { "epoch": 0.44944912508101104, "grad_norm": 0.533288300037384, "learning_rate": 9.62838295758794e-06, "loss": 0.1635, "step": 1387 }, { "epoch": 0.4497731691510045, "grad_norm": 0.5898700952529907, "learning_rate": 9.627720918157407e-06, "loss": 0.1837, "step": 1388 }, { "epoch": 0.45009721322099805, "grad_norm": 0.56904536485672, "learning_rate": 9.627058312338634e-06, "loss": 0.1591, "step": 1389 }, { "epoch": 0.4504212572909916, "grad_norm": 0.5584230422973633, "learning_rate": 9.626395140212714e-06, "loss": 0.1632, "step": 1390 }, { "epoch": 0.4507453013609851, "grad_norm": 0.5398111939430237, "learning_rate": 9.625731401860819e-06, "loss": 0.1592, "step": 1391 }, { "epoch": 0.4510693454309786, "grad_norm": 0.5425682663917542, "learning_rate": 9.625067097364181e-06, "loss": 0.1528, "step": 1392 }, { "epoch": 0.45139338950097213, "grad_norm": 0.5614835619926453, "learning_rate": 9.624402226804101e-06, "loss": 0.1521, "step": 1393 }, { "epoch": 0.45171743357096567, "grad_norm": 0.5337006449699402, "learning_rate": 9.623736790261959e-06, "loss": 0.1518, "step": 1394 }, { "epoch": 0.45204147764095914, "grad_norm": 0.5660865902900696, "learning_rate": 9.623070787819195e-06, "loss": 0.1563, "step": 1395 }, { "epoch": 0.4523655217109527, "grad_norm": 0.586026132106781, "learning_rate": 9.622404219557322e-06, "loss": 0.1907, "step": 1396 }, { "epoch": 0.4526895657809462, "grad_norm": 0.5866766571998596, "learning_rate": 9.621737085557918e-06, "loss": 0.1645, "step": 1397 }, { "epoch": 0.45301360985093975, "grad_norm": 0.5219502449035645, "learning_rate": 9.62106938590264e-06, "loss": 0.1652, "step": 1398 }, { "epoch": 0.4533376539209332, "grad_norm": 0.5699257254600525, "learning_rate": 9.620401120673202e-06, "loss": 0.166, "step": 1399 }, { "epoch": 0.45366169799092676, "grad_norm": 0.543327808380127, "learning_rate": 9.619732289951399e-06, "loss": 0.1655, "step": 1400 }, { "epoch": 0.4539857420609203, "grad_norm": 0.5347340703010559, "learning_rate": 9.619062893819082e-06, "loss": 0.1605, "step": 1401 }, { "epoch": 0.45430978613091383, "grad_norm": 0.571562647819519, "learning_rate": 9.618392932358185e-06, "loss": 0.184, "step": 1402 }, { "epoch": 0.4546338302009073, "grad_norm": 0.5329867005348206, "learning_rate": 9.617722405650702e-06, "loss": 0.1566, "step": 1403 }, { "epoch": 0.45495787427090084, "grad_norm": 0.5062045454978943, "learning_rate": 9.6170513137787e-06, "loss": 0.1566, "step": 1404 }, { "epoch": 0.4552819183408944, "grad_norm": 0.5401381254196167, "learning_rate": 9.616379656824314e-06, "loss": 0.1468, "step": 1405 }, { "epoch": 0.45560596241088785, "grad_norm": 0.5379137992858887, "learning_rate": 9.615707434869748e-06, "loss": 0.1692, "step": 1406 }, { "epoch": 0.4559300064808814, "grad_norm": 0.6190140843391418, "learning_rate": 9.615034647997274e-06, "loss": 0.1794, "step": 1407 }, { "epoch": 0.4562540505508749, "grad_norm": 0.5723645091056824, "learning_rate": 9.614361296289239e-06, "loss": 0.1594, "step": 1408 }, { "epoch": 0.45657809462086846, "grad_norm": 0.5665557384490967, "learning_rate": 9.61368737982805e-06, "loss": 0.1563, "step": 1409 }, { "epoch": 0.45690213869086194, "grad_norm": 0.571121335029602, "learning_rate": 9.613012898696187e-06, "loss": 0.167, "step": 1410 }, { "epoch": 0.45722618276085547, "grad_norm": 0.5885666012763977, "learning_rate": 9.612337852976207e-06, "loss": 0.1704, "step": 1411 }, { "epoch": 0.457550226830849, "grad_norm": 0.5780050754547119, "learning_rate": 9.611662242750723e-06, "loss": 0.1686, "step": 1412 }, { "epoch": 0.45787427090084254, "grad_norm": 0.5515772700309753, "learning_rate": 9.610986068102425e-06, "loss": 0.1682, "step": 1413 }, { "epoch": 0.458198314970836, "grad_norm": 0.5495187640190125, "learning_rate": 9.610309329114069e-06, "loss": 0.1633, "step": 1414 }, { "epoch": 0.45852235904082955, "grad_norm": 0.5032498240470886, "learning_rate": 9.609632025868484e-06, "loss": 0.1419, "step": 1415 }, { "epoch": 0.4588464031108231, "grad_norm": 0.5348735451698303, "learning_rate": 9.608954158448563e-06, "loss": 0.1721, "step": 1416 }, { "epoch": 0.45917044718081657, "grad_norm": 0.4963846802711487, "learning_rate": 9.608275726937271e-06, "loss": 0.1469, "step": 1417 }, { "epoch": 0.4594944912508101, "grad_norm": 0.6138606071472168, "learning_rate": 9.607596731417643e-06, "loss": 0.1589, "step": 1418 }, { "epoch": 0.45981853532080363, "grad_norm": 0.5531137585639954, "learning_rate": 9.606917171972778e-06, "loss": 0.16, "step": 1419 }, { "epoch": 0.46014257939079717, "grad_norm": 0.605338454246521, "learning_rate": 9.60623704868585e-06, "loss": 0.1865, "step": 1420 }, { "epoch": 0.46046662346079065, "grad_norm": 0.5759372711181641, "learning_rate": 9.6055563616401e-06, "loss": 0.1586, "step": 1421 }, { "epoch": 0.4607906675307842, "grad_norm": 0.5326551795005798, "learning_rate": 9.604875110918836e-06, "loss": 0.1545, "step": 1422 }, { "epoch": 0.4611147116007777, "grad_norm": 0.5569169521331787, "learning_rate": 9.604193296605437e-06, "loss": 0.165, "step": 1423 }, { "epoch": 0.46143875567077125, "grad_norm": 0.6162808537483215, "learning_rate": 9.60351091878335e-06, "loss": 0.2078, "step": 1424 }, { "epoch": 0.46176279974076473, "grad_norm": 0.6384009122848511, "learning_rate": 9.602827977536094e-06, "loss": 0.1826, "step": 1425 }, { "epoch": 0.46208684381075826, "grad_norm": 0.5420453548431396, "learning_rate": 9.60214447294725e-06, "loss": 0.163, "step": 1426 }, { "epoch": 0.4624108878807518, "grad_norm": 0.5329408645629883, "learning_rate": 9.601460405100475e-06, "loss": 0.1625, "step": 1427 }, { "epoch": 0.4627349319507453, "grad_norm": 0.5379124283790588, "learning_rate": 9.600775774079493e-06, "loss": 0.1621, "step": 1428 }, { "epoch": 0.4630589760207388, "grad_norm": 0.5374495387077332, "learning_rate": 9.600090579968095e-06, "loss": 0.1557, "step": 1429 }, { "epoch": 0.46338302009073234, "grad_norm": 0.5596601366996765, "learning_rate": 9.599404822850142e-06, "loss": 0.1518, "step": 1430 }, { "epoch": 0.4637070641607259, "grad_norm": 0.6118326783180237, "learning_rate": 9.598718502809565e-06, "loss": 0.168, "step": 1431 }, { "epoch": 0.46403110823071936, "grad_norm": 0.5268542766571045, "learning_rate": 9.598031619930363e-06, "loss": 0.1548, "step": 1432 }, { "epoch": 0.4643551523007129, "grad_norm": 0.5452706813812256, "learning_rate": 9.597344174296601e-06, "loss": 0.1634, "step": 1433 }, { "epoch": 0.4646791963707064, "grad_norm": 0.5382551550865173, "learning_rate": 9.59665616599242e-06, "loss": 0.1695, "step": 1434 }, { "epoch": 0.46500324044069996, "grad_norm": 0.4947247803211212, "learning_rate": 9.595967595102022e-06, "loss": 0.1436, "step": 1435 }, { "epoch": 0.46532728451069344, "grad_norm": 0.5326928496360779, "learning_rate": 9.595278461709683e-06, "loss": 0.1647, "step": 1436 }, { "epoch": 0.465651328580687, "grad_norm": 0.5893187522888184, "learning_rate": 9.594588765899746e-06, "loss": 0.174, "step": 1437 }, { "epoch": 0.4659753726506805, "grad_norm": 0.5314544439315796, "learning_rate": 9.593898507756622e-06, "loss": 0.1577, "step": 1438 }, { "epoch": 0.466299416720674, "grad_norm": 0.5155757069587708, "learning_rate": 9.593207687364795e-06, "loss": 0.1537, "step": 1439 }, { "epoch": 0.4666234607906675, "grad_norm": 0.5500592589378357, "learning_rate": 9.592516304808811e-06, "loss": 0.1613, "step": 1440 }, { "epoch": 0.46694750486066106, "grad_norm": 0.5493935942649841, "learning_rate": 9.591824360173292e-06, "loss": 0.1682, "step": 1441 }, { "epoch": 0.4672715489306546, "grad_norm": 0.5465030074119568, "learning_rate": 9.591131853542922e-06, "loss": 0.1592, "step": 1442 }, { "epoch": 0.46759559300064807, "grad_norm": 0.5221512317657471, "learning_rate": 9.59043878500246e-06, "loss": 0.1636, "step": 1443 }, { "epoch": 0.4679196370706416, "grad_norm": 0.5554530620574951, "learning_rate": 9.589745154636729e-06, "loss": 0.1619, "step": 1444 }, { "epoch": 0.46824368114063514, "grad_norm": 0.5898672938346863, "learning_rate": 9.589050962530624e-06, "loss": 0.1744, "step": 1445 }, { "epoch": 0.46856772521062867, "grad_norm": 0.5853576064109802, "learning_rate": 9.588356208769108e-06, "loss": 0.1782, "step": 1446 }, { "epoch": 0.46889176928062215, "grad_norm": 0.5318002700805664, "learning_rate": 9.587660893437207e-06, "loss": 0.1556, "step": 1447 }, { "epoch": 0.4692158133506157, "grad_norm": 0.5409642457962036, "learning_rate": 9.586965016620027e-06, "loss": 0.1627, "step": 1448 }, { "epoch": 0.4695398574206092, "grad_norm": 0.49974918365478516, "learning_rate": 9.586268578402734e-06, "loss": 0.1451, "step": 1449 }, { "epoch": 0.4698639014906027, "grad_norm": 0.5798755884170532, "learning_rate": 9.585571578870565e-06, "loss": 0.1572, "step": 1450 }, { "epoch": 0.47018794556059623, "grad_norm": 0.4880210757255554, "learning_rate": 9.584874018108827e-06, "loss": 0.1434, "step": 1451 }, { "epoch": 0.47051198963058977, "grad_norm": 0.5324985384941101, "learning_rate": 9.584175896202893e-06, "loss": 0.1514, "step": 1452 }, { "epoch": 0.4708360337005833, "grad_norm": 0.5340385437011719, "learning_rate": 9.58347721323821e-06, "loss": 0.1642, "step": 1453 }, { "epoch": 0.4711600777705768, "grad_norm": 0.5225486159324646, "learning_rate": 9.582777969300286e-06, "loss": 0.1619, "step": 1454 }, { "epoch": 0.4714841218405703, "grad_norm": 0.5330610871315002, "learning_rate": 9.582078164474704e-06, "loss": 0.1476, "step": 1455 }, { "epoch": 0.47180816591056385, "grad_norm": 0.5599969625473022, "learning_rate": 9.581377798847111e-06, "loss": 0.1815, "step": 1456 }, { "epoch": 0.4721322099805574, "grad_norm": 0.4793117940425873, "learning_rate": 9.580676872503227e-06, "loss": 0.1414, "step": 1457 }, { "epoch": 0.47245625405055086, "grad_norm": 0.5731749534606934, "learning_rate": 9.57997538552884e-06, "loss": 0.1748, "step": 1458 }, { "epoch": 0.4727802981205444, "grad_norm": 0.5908327698707581, "learning_rate": 9.579273338009803e-06, "loss": 0.1724, "step": 1459 }, { "epoch": 0.47310434219053793, "grad_norm": 0.5376626253128052, "learning_rate": 9.578570730032039e-06, "loss": 0.1558, "step": 1460 }, { "epoch": 0.4734283862605314, "grad_norm": 0.532558023929596, "learning_rate": 9.577867561681542e-06, "loss": 0.1537, "step": 1461 }, { "epoch": 0.47375243033052494, "grad_norm": 0.5878148078918457, "learning_rate": 9.577163833044372e-06, "loss": 0.1555, "step": 1462 }, { "epoch": 0.4740764744005185, "grad_norm": 0.5399042367935181, "learning_rate": 9.57645954420666e-06, "loss": 0.165, "step": 1463 }, { "epoch": 0.474400518470512, "grad_norm": 0.5977577567100525, "learning_rate": 9.575754695254604e-06, "loss": 0.1713, "step": 1464 }, { "epoch": 0.4747245625405055, "grad_norm": 0.5240918397903442, "learning_rate": 9.57504928627447e-06, "loss": 0.1517, "step": 1465 }, { "epoch": 0.475048606610499, "grad_norm": 0.5246391296386719, "learning_rate": 9.574343317352593e-06, "loss": 0.1582, "step": 1466 }, { "epoch": 0.47537265068049256, "grad_norm": 0.5721025466918945, "learning_rate": 9.573636788575376e-06, "loss": 0.1815, "step": 1467 }, { "epoch": 0.4756966947504861, "grad_norm": 0.5447560548782349, "learning_rate": 9.572929700029292e-06, "loss": 0.1665, "step": 1468 }, { "epoch": 0.47602073882047957, "grad_norm": 0.5570774078369141, "learning_rate": 9.572222051800884e-06, "loss": 0.1615, "step": 1469 }, { "epoch": 0.4763447828904731, "grad_norm": 0.5612460374832153, "learning_rate": 9.571513843976758e-06, "loss": 0.1724, "step": 1470 }, { "epoch": 0.47666882696046664, "grad_norm": 0.5506044030189514, "learning_rate": 9.570805076643595e-06, "loss": 0.1629, "step": 1471 }, { "epoch": 0.4769928710304601, "grad_norm": 0.5385878682136536, "learning_rate": 9.570095749888138e-06, "loss": 0.1643, "step": 1472 }, { "epoch": 0.47731691510045365, "grad_norm": 0.5353468060493469, "learning_rate": 9.569385863797202e-06, "loss": 0.156, "step": 1473 }, { "epoch": 0.4776409591704472, "grad_norm": 0.5617091655731201, "learning_rate": 9.568675418457673e-06, "loss": 0.1461, "step": 1474 }, { "epoch": 0.4779650032404407, "grad_norm": 0.5673995018005371, "learning_rate": 9.567964413956501e-06, "loss": 0.1619, "step": 1475 }, { "epoch": 0.4782890473104342, "grad_norm": 0.5591347217559814, "learning_rate": 9.567252850380705e-06, "loss": 0.1635, "step": 1476 }, { "epoch": 0.47861309138042774, "grad_norm": 0.5915917754173279, "learning_rate": 9.566540727817375e-06, "loss": 0.1651, "step": 1477 }, { "epoch": 0.47893713545042127, "grad_norm": 0.6004805564880371, "learning_rate": 9.565828046353669e-06, "loss": 0.1689, "step": 1478 }, { "epoch": 0.4792611795204148, "grad_norm": 0.6274728775024414, "learning_rate": 9.565114806076808e-06, "loss": 0.1821, "step": 1479 }, { "epoch": 0.4795852235904083, "grad_norm": 0.5600235462188721, "learning_rate": 9.564401007074091e-06, "loss": 0.1638, "step": 1480 }, { "epoch": 0.4799092676604018, "grad_norm": 0.5907046794891357, "learning_rate": 9.563686649432874e-06, "loss": 0.16, "step": 1481 }, { "epoch": 0.48023331173039535, "grad_norm": 0.5883347988128662, "learning_rate": 9.562971733240595e-06, "loss": 0.1844, "step": 1482 }, { "epoch": 0.48055735580038883, "grad_norm": 0.5312012434005737, "learning_rate": 9.562256258584749e-06, "loss": 0.1545, "step": 1483 }, { "epoch": 0.48088139987038236, "grad_norm": 0.520148754119873, "learning_rate": 9.561540225552901e-06, "loss": 0.1646, "step": 1484 }, { "epoch": 0.4812054439403759, "grad_norm": 0.5252670049667358, "learning_rate": 9.560823634232688e-06, "loss": 0.1583, "step": 1485 }, { "epoch": 0.48152948801036943, "grad_norm": 0.5525287985801697, "learning_rate": 9.560106484711816e-06, "loss": 0.178, "step": 1486 }, { "epoch": 0.4818535320803629, "grad_norm": 0.5581156015396118, "learning_rate": 9.559388777078054e-06, "loss": 0.1762, "step": 1487 }, { "epoch": 0.48217757615035645, "grad_norm": 0.5156373977661133, "learning_rate": 9.558670511419246e-06, "loss": 0.1664, "step": 1488 }, { "epoch": 0.48250162022035, "grad_norm": 0.5569294095039368, "learning_rate": 9.557951687823298e-06, "loss": 0.1593, "step": 1489 }, { "epoch": 0.48282566429034346, "grad_norm": 0.5495864152908325, "learning_rate": 9.557232306378186e-06, "loss": 0.1563, "step": 1490 }, { "epoch": 0.483149708360337, "grad_norm": 0.5372532606124878, "learning_rate": 9.556512367171959e-06, "loss": 0.1602, "step": 1491 }, { "epoch": 0.48347375243033053, "grad_norm": 0.5491994023323059, "learning_rate": 9.555791870292727e-06, "loss": 0.1668, "step": 1492 }, { "epoch": 0.48379779650032406, "grad_norm": 0.5422798991203308, "learning_rate": 9.555070815828676e-06, "loss": 0.165, "step": 1493 }, { "epoch": 0.48412184057031754, "grad_norm": 0.5528333187103271, "learning_rate": 9.554349203868052e-06, "loss": 0.1559, "step": 1494 }, { "epoch": 0.4844458846403111, "grad_norm": 0.5231409668922424, "learning_rate": 9.553627034499176e-06, "loss": 0.1637, "step": 1495 }, { "epoch": 0.4847699287103046, "grad_norm": 0.561600387096405, "learning_rate": 9.552904307810432e-06, "loss": 0.1691, "step": 1496 }, { "epoch": 0.48509397278029814, "grad_norm": 0.6043953895568848, "learning_rate": 9.552181023890277e-06, "loss": 0.1861, "step": 1497 }, { "epoch": 0.4854180168502916, "grad_norm": 0.5333933234214783, "learning_rate": 9.551457182827233e-06, "loss": 0.158, "step": 1498 }, { "epoch": 0.48574206092028516, "grad_norm": 0.5546240210533142, "learning_rate": 9.550732784709892e-06, "loss": 0.1602, "step": 1499 }, { "epoch": 0.4860661049902787, "grad_norm": 0.6027463674545288, "learning_rate": 9.550007829626912e-06, "loss": 0.1604, "step": 1500 }, { "epoch": 0.48639014906027217, "grad_norm": 0.5604615807533264, "learning_rate": 9.549282317667021e-06, "loss": 0.1626, "step": 1501 }, { "epoch": 0.4867141931302657, "grad_norm": 0.5694551467895508, "learning_rate": 9.548556248919017e-06, "loss": 0.1754, "step": 1502 }, { "epoch": 0.48703823720025924, "grad_norm": 0.5441313982009888, "learning_rate": 9.547829623471758e-06, "loss": 0.1655, "step": 1503 }, { "epoch": 0.4873622812702528, "grad_norm": 0.5215234756469727, "learning_rate": 9.547102441414182e-06, "loss": 0.1541, "step": 1504 }, { "epoch": 0.48768632534024625, "grad_norm": 0.5377838611602783, "learning_rate": 9.546374702835286e-06, "loss": 0.1584, "step": 1505 }, { "epoch": 0.4880103694102398, "grad_norm": 0.5404870510101318, "learning_rate": 9.545646407824138e-06, "loss": 0.1674, "step": 1506 }, { "epoch": 0.4883344134802333, "grad_norm": 0.5669370889663696, "learning_rate": 9.544917556469876e-06, "loss": 0.1681, "step": 1507 }, { "epoch": 0.48865845755022685, "grad_norm": 0.5905188918113708, "learning_rate": 9.544188148861703e-06, "loss": 0.1789, "step": 1508 }, { "epoch": 0.48898250162022033, "grad_norm": 0.5530914068222046, "learning_rate": 9.543458185088892e-06, "loss": 0.169, "step": 1509 }, { "epoch": 0.48930654569021387, "grad_norm": 0.5152124762535095, "learning_rate": 9.542727665240783e-06, "loss": 0.1535, "step": 1510 }, { "epoch": 0.4896305897602074, "grad_norm": 0.5777947902679443, "learning_rate": 9.541996589406784e-06, "loss": 0.1825, "step": 1511 }, { "epoch": 0.4899546338302009, "grad_norm": 0.5618809461593628, "learning_rate": 9.541264957676373e-06, "loss": 0.1692, "step": 1512 }, { "epoch": 0.4902786779001944, "grad_norm": 0.5675665140151978, "learning_rate": 9.540532770139093e-06, "loss": 0.1727, "step": 1513 }, { "epoch": 0.49060272197018795, "grad_norm": 0.5648574233055115, "learning_rate": 9.539800026884558e-06, "loss": 0.17, "step": 1514 }, { "epoch": 0.4909267660401815, "grad_norm": 0.5508841872215271, "learning_rate": 9.53906672800245e-06, "loss": 0.1628, "step": 1515 }, { "epoch": 0.49125081011017496, "grad_norm": 0.614479660987854, "learning_rate": 9.538332873582515e-06, "loss": 0.1751, "step": 1516 }, { "epoch": 0.4915748541801685, "grad_norm": 0.5082097053527832, "learning_rate": 9.53759846371457e-06, "loss": 0.1466, "step": 1517 }, { "epoch": 0.49189889825016203, "grad_norm": 0.5089106559753418, "learning_rate": 9.536863498488502e-06, "loss": 0.1395, "step": 1518 }, { "epoch": 0.49222294232015557, "grad_norm": 0.5683304667472839, "learning_rate": 9.53612797799426e-06, "loss": 0.1702, "step": 1519 }, { "epoch": 0.49254698639014904, "grad_norm": 0.5439754724502563, "learning_rate": 9.535391902321868e-06, "loss": 0.1634, "step": 1520 }, { "epoch": 0.4928710304601426, "grad_norm": 0.5556536912918091, "learning_rate": 9.534655271561415e-06, "loss": 0.1678, "step": 1521 }, { "epoch": 0.4931950745301361, "grad_norm": 0.5609848499298096, "learning_rate": 9.533918085803053e-06, "loss": 0.1695, "step": 1522 }, { "epoch": 0.4935191186001296, "grad_norm": 0.5703451037406921, "learning_rate": 9.533180345137009e-06, "loss": 0.157, "step": 1523 }, { "epoch": 0.4938431626701231, "grad_norm": 0.568932056427002, "learning_rate": 9.532442049653577e-06, "loss": 0.1619, "step": 1524 }, { "epoch": 0.49416720674011666, "grad_norm": 0.5227696299552917, "learning_rate": 9.531703199443113e-06, "loss": 0.1626, "step": 1525 }, { "epoch": 0.4944912508101102, "grad_norm": 0.5611757636070251, "learning_rate": 9.53096379459605e-06, "loss": 0.1662, "step": 1526 }, { "epoch": 0.4948152948801037, "grad_norm": 0.5827354788780212, "learning_rate": 9.53022383520288e-06, "loss": 0.1674, "step": 1527 }, { "epoch": 0.4951393389500972, "grad_norm": 0.5978224277496338, "learning_rate": 9.52948332135417e-06, "loss": 0.1508, "step": 1528 }, { "epoch": 0.49546338302009074, "grad_norm": 0.5594303011894226, "learning_rate": 9.52874225314055e-06, "loss": 0.1821, "step": 1529 }, { "epoch": 0.4957874270900843, "grad_norm": 0.5278100967407227, "learning_rate": 9.52800063065272e-06, "loss": 0.1539, "step": 1530 }, { "epoch": 0.49611147116007775, "grad_norm": 0.5450807213783264, "learning_rate": 9.527258453981448e-06, "loss": 0.1571, "step": 1531 }, { "epoch": 0.4964355152300713, "grad_norm": 0.5510877370834351, "learning_rate": 9.526515723217566e-06, "loss": 0.1718, "step": 1532 }, { "epoch": 0.4967595593000648, "grad_norm": 0.6029154658317566, "learning_rate": 9.525772438451982e-06, "loss": 0.157, "step": 1533 }, { "epoch": 0.4970836033700583, "grad_norm": 0.5283669233322144, "learning_rate": 9.525028599775662e-06, "loss": 0.1463, "step": 1534 }, { "epoch": 0.49740764744005184, "grad_norm": 0.5479215979576111, "learning_rate": 9.524284207279648e-06, "loss": 0.1605, "step": 1535 }, { "epoch": 0.49773169151004537, "grad_norm": 0.6442349553108215, "learning_rate": 9.523539261055046e-06, "loss": 0.18, "step": 1536 }, { "epoch": 0.4980557355800389, "grad_norm": 0.5268247723579407, "learning_rate": 9.52279376119303e-06, "loss": 0.1546, "step": 1537 }, { "epoch": 0.4983797796500324, "grad_norm": 0.5354442000389099, "learning_rate": 9.52204770778484e-06, "loss": 0.1525, "step": 1538 }, { "epoch": 0.4987038237200259, "grad_norm": 0.5068316459655762, "learning_rate": 9.52130110092179e-06, "loss": 0.149, "step": 1539 }, { "epoch": 0.49902786779001945, "grad_norm": 0.5335785746574402, "learning_rate": 9.520553940695253e-06, "loss": 0.1541, "step": 1540 }, { "epoch": 0.499351911860013, "grad_norm": 0.5971035361289978, "learning_rate": 9.519806227196676e-06, "loss": 0.159, "step": 1541 }, { "epoch": 0.49967595593000647, "grad_norm": 0.5317016243934631, "learning_rate": 9.519057960517572e-06, "loss": 0.1726, "step": 1542 }, { "epoch": 0.5, "grad_norm": 0.5037248730659485, "learning_rate": 9.518309140749521e-06, "loss": 0.1377, "step": 1543 }, { "epoch": 0.5003240440699935, "grad_norm": 0.5816386938095093, "learning_rate": 9.517559767984175e-06, "loss": 0.1796, "step": 1544 }, { "epoch": 0.5006480881399871, "grad_norm": 0.5142626762390137, "learning_rate": 9.516809842313244e-06, "loss": 0.1576, "step": 1545 }, { "epoch": 0.5009721322099806, "grad_norm": 0.5820870399475098, "learning_rate": 9.516059363828513e-06, "loss": 0.1785, "step": 1546 }, { "epoch": 0.501296176279974, "grad_norm": 0.5120531320571899, "learning_rate": 9.515308332621838e-06, "loss": 0.1459, "step": 1547 }, { "epoch": 0.5016202203499676, "grad_norm": 0.5186384320259094, "learning_rate": 9.514556748785133e-06, "loss": 0.1523, "step": 1548 }, { "epoch": 0.5019442644199611, "grad_norm": 0.569932222366333, "learning_rate": 9.513804612410387e-06, "loss": 0.168, "step": 1549 }, { "epoch": 0.5022683084899546, "grad_norm": 0.544551432132721, "learning_rate": 9.513051923589652e-06, "loss": 0.1585, "step": 1550 }, { "epoch": 0.5025923525599482, "grad_norm": 0.5181910991668701, "learning_rate": 9.512298682415052e-06, "loss": 0.1476, "step": 1551 }, { "epoch": 0.5029163966299417, "grad_norm": 0.5322809219360352, "learning_rate": 9.511544888978777e-06, "loss": 0.1629, "step": 1552 }, { "epoch": 0.5032404406999352, "grad_norm": 0.5483154654502869, "learning_rate": 9.51079054337308e-06, "loss": 0.1617, "step": 1553 }, { "epoch": 0.5035644847699287, "grad_norm": 0.5534182786941528, "learning_rate": 9.51003564569029e-06, "loss": 0.1747, "step": 1554 }, { "epoch": 0.5038885288399222, "grad_norm": 0.6017401218414307, "learning_rate": 9.509280196022798e-06, "loss": 0.1766, "step": 1555 }, { "epoch": 0.5042125729099157, "grad_norm": 0.5111786723136902, "learning_rate": 9.508524194463062e-06, "loss": 0.1473, "step": 1556 }, { "epoch": 0.5045366169799093, "grad_norm": 0.5252443552017212, "learning_rate": 9.507767641103612e-06, "loss": 0.153, "step": 1557 }, { "epoch": 0.5048606610499028, "grad_norm": 0.5776534676551819, "learning_rate": 9.50701053603704e-06, "loss": 0.1591, "step": 1558 }, { "epoch": 0.5051847051198963, "grad_norm": 0.5355959534645081, "learning_rate": 9.50625287935601e-06, "loss": 0.1664, "step": 1559 }, { "epoch": 0.5055087491898899, "grad_norm": 0.5488954782485962, "learning_rate": 9.505494671153252e-06, "loss": 0.1632, "step": 1560 }, { "epoch": 0.5058327932598834, "grad_norm": 0.5629811882972717, "learning_rate": 9.504735911521562e-06, "loss": 0.1622, "step": 1561 }, { "epoch": 0.5061568373298768, "grad_norm": 0.5401865243911743, "learning_rate": 9.503976600553805e-06, "loss": 0.1624, "step": 1562 }, { "epoch": 0.5064808813998704, "grad_norm": 0.5585532784461975, "learning_rate": 9.503216738342916e-06, "loss": 0.1675, "step": 1563 }, { "epoch": 0.5068049254698639, "grad_norm": 0.5027323365211487, "learning_rate": 9.502456324981892e-06, "loss": 0.1515, "step": 1564 }, { "epoch": 0.5071289695398574, "grad_norm": 0.5550998449325562, "learning_rate": 9.501695360563801e-06, "loss": 0.1596, "step": 1565 }, { "epoch": 0.507453013609851, "grad_norm": 0.5721212029457092, "learning_rate": 9.500933845181776e-06, "loss": 0.1602, "step": 1566 }, { "epoch": 0.5077770576798445, "grad_norm": 0.5230451226234436, "learning_rate": 9.500171778929023e-06, "loss": 0.1496, "step": 1567 }, { "epoch": 0.508101101749838, "grad_norm": 0.5238634943962097, "learning_rate": 9.499409161898808e-06, "loss": 0.1646, "step": 1568 }, { "epoch": 0.5084251458198314, "grad_norm": 0.5461115837097168, "learning_rate": 9.49864599418447e-06, "loss": 0.1654, "step": 1569 }, { "epoch": 0.508749189889825, "grad_norm": 0.5428388714790344, "learning_rate": 9.497882275879412e-06, "loss": 0.1532, "step": 1570 }, { "epoch": 0.5090732339598185, "grad_norm": 0.6161945462226868, "learning_rate": 9.497118007077106e-06, "loss": 0.1675, "step": 1571 }, { "epoch": 0.509397278029812, "grad_norm": 0.541619598865509, "learning_rate": 9.496353187871092e-06, "loss": 0.1468, "step": 1572 }, { "epoch": 0.5097213220998056, "grad_norm": 0.5606821179389954, "learning_rate": 9.495587818354977e-06, "loss": 0.1734, "step": 1573 }, { "epoch": 0.5100453661697991, "grad_norm": 0.5038211941719055, "learning_rate": 9.494821898622431e-06, "loss": 0.1453, "step": 1574 }, { "epoch": 0.5103694102397927, "grad_norm": 0.5432626605033875, "learning_rate": 9.4940554287672e-06, "loss": 0.1581, "step": 1575 }, { "epoch": 0.5106934543097861, "grad_norm": 0.5432832837104797, "learning_rate": 9.49328840888309e-06, "loss": 0.1696, "step": 1576 }, { "epoch": 0.5110174983797796, "grad_norm": 0.5713906288146973, "learning_rate": 9.492520839063977e-06, "loss": 0.1759, "step": 1577 }, { "epoch": 0.5113415424497731, "grad_norm": 0.5782821178436279, "learning_rate": 9.491752719403805e-06, "loss": 0.1735, "step": 1578 }, { "epoch": 0.5116655865197667, "grad_norm": 0.5207881331443787, "learning_rate": 9.490984049996581e-06, "loss": 0.1621, "step": 1579 }, { "epoch": 0.5119896305897602, "grad_norm": 0.5409039258956909, "learning_rate": 9.490214830936388e-06, "loss": 0.1696, "step": 1580 }, { "epoch": 0.5123136746597537, "grad_norm": 0.5568174719810486, "learning_rate": 9.489445062317367e-06, "loss": 0.1652, "step": 1581 }, { "epoch": 0.5126377187297473, "grad_norm": 0.5258395671844482, "learning_rate": 9.488674744233732e-06, "loss": 0.1556, "step": 1582 }, { "epoch": 0.5129617627997408, "grad_norm": 0.5342602729797363, "learning_rate": 9.487903876779763e-06, "loss": 0.1605, "step": 1583 }, { "epoch": 0.5132858068697342, "grad_norm": 0.5090218782424927, "learning_rate": 9.487132460049804e-06, "loss": 0.1454, "step": 1584 }, { "epoch": 0.5136098509397278, "grad_norm": 0.5238171815872192, "learning_rate": 9.486360494138271e-06, "loss": 0.1492, "step": 1585 }, { "epoch": 0.5139338950097213, "grad_norm": 0.5661818981170654, "learning_rate": 9.485587979139647e-06, "loss": 0.1644, "step": 1586 }, { "epoch": 0.5142579390797148, "grad_norm": 0.5241592526435852, "learning_rate": 9.484814915148477e-06, "loss": 0.1532, "step": 1587 }, { "epoch": 0.5145819831497084, "grad_norm": 0.5554935336112976, "learning_rate": 9.484041302259377e-06, "loss": 0.1644, "step": 1588 }, { "epoch": 0.5149060272197019, "grad_norm": 0.4919261038303375, "learning_rate": 9.483267140567031e-06, "loss": 0.1473, "step": 1589 }, { "epoch": 0.5152300712896954, "grad_norm": 0.5082678198814392, "learning_rate": 9.482492430166188e-06, "loss": 0.1583, "step": 1590 }, { "epoch": 0.5155541153596889, "grad_norm": 0.5806257724761963, "learning_rate": 9.481717171151665e-06, "loss": 0.1757, "step": 1591 }, { "epoch": 0.5158781594296824, "grad_norm": 0.5450208187103271, "learning_rate": 9.480941363618347e-06, "loss": 0.1635, "step": 1592 }, { "epoch": 0.5162022034996759, "grad_norm": 0.5514866709709167, "learning_rate": 9.480165007661186e-06, "loss": 0.1678, "step": 1593 }, { "epoch": 0.5165262475696695, "grad_norm": 0.49130842089653015, "learning_rate": 9.479388103375199e-06, "loss": 0.1282, "step": 1594 }, { "epoch": 0.516850291639663, "grad_norm": 0.5273051857948303, "learning_rate": 9.478610650855472e-06, "loss": 0.1648, "step": 1595 }, { "epoch": 0.5171743357096565, "grad_norm": 0.4903629422187805, "learning_rate": 9.477832650197158e-06, "loss": 0.1459, "step": 1596 }, { "epoch": 0.5174983797796501, "grad_norm": 0.56658536195755, "learning_rate": 9.477054101495476e-06, "loss": 0.1604, "step": 1597 }, { "epoch": 0.5178224238496435, "grad_norm": 0.5279373526573181, "learning_rate": 9.476275004845712e-06, "loss": 0.1595, "step": 1598 }, { "epoch": 0.518146467919637, "grad_norm": 0.5263311266899109, "learning_rate": 9.475495360343222e-06, "loss": 0.1467, "step": 1599 }, { "epoch": 0.5184705119896306, "grad_norm": 0.5892171263694763, "learning_rate": 9.474715168083426e-06, "loss": 0.171, "step": 1600 }, { "epoch": 0.5187945560596241, "grad_norm": 0.5727109313011169, "learning_rate": 9.473934428161813e-06, "loss": 0.177, "step": 1601 }, { "epoch": 0.5191186001296176, "grad_norm": 0.5030838251113892, "learning_rate": 9.473153140673937e-06, "loss": 0.1477, "step": 1602 }, { "epoch": 0.5194426441996112, "grad_norm": 0.540828287601471, "learning_rate": 9.472371305715417e-06, "loss": 0.1764, "step": 1603 }, { "epoch": 0.5197666882696047, "grad_norm": 0.5530744791030884, "learning_rate": 9.471588923381949e-06, "loss": 0.1512, "step": 1604 }, { "epoch": 0.5200907323395982, "grad_norm": 0.5362316370010376, "learning_rate": 9.470805993769284e-06, "loss": 0.1593, "step": 1605 }, { "epoch": 0.5204147764095917, "grad_norm": 0.5342429280281067, "learning_rate": 9.470022516973245e-06, "loss": 0.1606, "step": 1606 }, { "epoch": 0.5207388204795852, "grad_norm": 0.49285590648651123, "learning_rate": 9.469238493089723e-06, "loss": 0.1602, "step": 1607 }, { "epoch": 0.5210628645495787, "grad_norm": 0.5392868518829346, "learning_rate": 9.468453922214676e-06, "loss": 0.1704, "step": 1608 }, { "epoch": 0.5213869086195723, "grad_norm": 0.5489588975906372, "learning_rate": 9.467668804444127e-06, "loss": 0.1605, "step": 1609 }, { "epoch": 0.5217109526895658, "grad_norm": 0.5366129875183105, "learning_rate": 9.466883139874165e-06, "loss": 0.1594, "step": 1610 }, { "epoch": 0.5220349967595593, "grad_norm": 0.5764620304107666, "learning_rate": 9.466096928600953e-06, "loss": 0.1809, "step": 1611 }, { "epoch": 0.5223590408295529, "grad_norm": 0.4988090693950653, "learning_rate": 9.46531017072071e-06, "loss": 0.1449, "step": 1612 }, { "epoch": 0.5226830848995463, "grad_norm": 0.5809919834136963, "learning_rate": 9.464522866329729e-06, "loss": 0.1798, "step": 1613 }, { "epoch": 0.5230071289695398, "grad_norm": 0.5060127973556519, "learning_rate": 9.463735015524369e-06, "loss": 0.1525, "step": 1614 }, { "epoch": 0.5233311730395334, "grad_norm": 0.5147068500518799, "learning_rate": 9.462946618401057e-06, "loss": 0.1512, "step": 1615 }, { "epoch": 0.5236552171095269, "grad_norm": 0.5227774381637573, "learning_rate": 9.462157675056282e-06, "loss": 0.1486, "step": 1616 }, { "epoch": 0.5239792611795204, "grad_norm": 0.575864851474762, "learning_rate": 9.461368185586604e-06, "loss": 0.1647, "step": 1617 }, { "epoch": 0.524303305249514, "grad_norm": 0.5135976672172546, "learning_rate": 9.460578150088652e-06, "loss": 0.1527, "step": 1618 }, { "epoch": 0.5246273493195075, "grad_norm": 0.5435746908187866, "learning_rate": 9.459787568659115e-06, "loss": 0.1675, "step": 1619 }, { "epoch": 0.5249513933895009, "grad_norm": 0.511205792427063, "learning_rate": 9.458996441394753e-06, "loss": 0.1493, "step": 1620 }, { "epoch": 0.5252754374594945, "grad_norm": 0.5185942053794861, "learning_rate": 9.458204768392394e-06, "loss": 0.1618, "step": 1621 }, { "epoch": 0.525599481529488, "grad_norm": 0.5057900547981262, "learning_rate": 9.45741254974893e-06, "loss": 0.1502, "step": 1622 }, { "epoch": 0.5259235255994815, "grad_norm": 0.5402381420135498, "learning_rate": 9.45661978556132e-06, "loss": 0.1623, "step": 1623 }, { "epoch": 0.5262475696694751, "grad_norm": 0.5072924494743347, "learning_rate": 9.455826475926593e-06, "loss": 0.1693, "step": 1624 }, { "epoch": 0.5265716137394686, "grad_norm": 0.5598753690719604, "learning_rate": 9.45503262094184e-06, "loss": 0.1722, "step": 1625 }, { "epoch": 0.5268956578094621, "grad_norm": 0.5614151954650879, "learning_rate": 9.454238220704223e-06, "loss": 0.1871, "step": 1626 }, { "epoch": 0.5272197018794557, "grad_norm": 0.5165886282920837, "learning_rate": 9.453443275310967e-06, "loss": 0.1554, "step": 1627 }, { "epoch": 0.5275437459494491, "grad_norm": 0.5177538394927979, "learning_rate": 9.452647784859367e-06, "loss": 0.1633, "step": 1628 }, { "epoch": 0.5278677900194426, "grad_norm": 0.518203616142273, "learning_rate": 9.451851749446786e-06, "loss": 0.1581, "step": 1629 }, { "epoch": 0.5281918340894362, "grad_norm": 0.5033408999443054, "learning_rate": 9.451055169170644e-06, "loss": 0.1522, "step": 1630 }, { "epoch": 0.5285158781594297, "grad_norm": 0.5274224281311035, "learning_rate": 9.450258044128441e-06, "loss": 0.1575, "step": 1631 }, { "epoch": 0.5288399222294232, "grad_norm": 0.4896572530269623, "learning_rate": 9.449460374417737e-06, "loss": 0.1375, "step": 1632 }, { "epoch": 0.5291639662994168, "grad_norm": 0.5398424863815308, "learning_rate": 9.448662160136154e-06, "loss": 0.1622, "step": 1633 }, { "epoch": 0.5294880103694103, "grad_norm": 0.5725162625312805, "learning_rate": 9.44786340138139e-06, "loss": 0.1748, "step": 1634 }, { "epoch": 0.5298120544394037, "grad_norm": 0.5098135471343994, "learning_rate": 9.447064098251205e-06, "loss": 0.1544, "step": 1635 }, { "epoch": 0.5301360985093972, "grad_norm": 0.5335975289344788, "learning_rate": 9.446264250843425e-06, "loss": 0.1484, "step": 1636 }, { "epoch": 0.5304601425793908, "grad_norm": 0.5592305660247803, "learning_rate": 9.445463859255943e-06, "loss": 0.1675, "step": 1637 }, { "epoch": 0.5307841866493843, "grad_norm": 0.5780206918716431, "learning_rate": 9.444662923586722e-06, "loss": 0.1668, "step": 1638 }, { "epoch": 0.5311082307193778, "grad_norm": 0.5241395831108093, "learning_rate": 9.443861443933786e-06, "loss": 0.1527, "step": 1639 }, { "epoch": 0.5314322747893714, "grad_norm": 0.5185185670852661, "learning_rate": 9.443059420395229e-06, "loss": 0.1421, "step": 1640 }, { "epoch": 0.5317563188593649, "grad_norm": 0.5295687913894653, "learning_rate": 9.44225685306921e-06, "loss": 0.1593, "step": 1641 }, { "epoch": 0.5320803629293583, "grad_norm": 0.5327252745628357, "learning_rate": 9.441453742053956e-06, "loss": 0.1614, "step": 1642 }, { "epoch": 0.5324044069993519, "grad_norm": 0.5810564756393433, "learning_rate": 9.440650087447762e-06, "loss": 0.171, "step": 1643 }, { "epoch": 0.5327284510693454, "grad_norm": 0.5510779619216919, "learning_rate": 9.439845889348987e-06, "loss": 0.1645, "step": 1644 }, { "epoch": 0.5330524951393389, "grad_norm": 0.5120063424110413, "learning_rate": 9.439041147856056e-06, "loss": 0.152, "step": 1645 }, { "epoch": 0.5333765392093325, "grad_norm": 0.577538251876831, "learning_rate": 9.43823586306746e-06, "loss": 0.1759, "step": 1646 }, { "epoch": 0.533700583279326, "grad_norm": 0.535686731338501, "learning_rate": 9.437430035081761e-06, "loss": 0.1642, "step": 1647 }, { "epoch": 0.5340246273493195, "grad_norm": 0.5769588947296143, "learning_rate": 9.436623663997584e-06, "loss": 0.1751, "step": 1648 }, { "epoch": 0.5343486714193131, "grad_norm": 0.5599579215049744, "learning_rate": 9.43581674991362e-06, "loss": 0.1736, "step": 1649 }, { "epoch": 0.5346727154893065, "grad_norm": 0.514129102230072, "learning_rate": 9.435009292928628e-06, "loss": 0.1574, "step": 1650 }, { "epoch": 0.5349967595593, "grad_norm": 0.52392578125, "learning_rate": 9.434201293141431e-06, "loss": 0.1584, "step": 1651 }, { "epoch": 0.5353208036292936, "grad_norm": 0.511712372303009, "learning_rate": 9.433392750650923e-06, "loss": 0.1502, "step": 1652 }, { "epoch": 0.5356448476992871, "grad_norm": 0.5033168196678162, "learning_rate": 9.432583665556062e-06, "loss": 0.14, "step": 1653 }, { "epoch": 0.5359688917692806, "grad_norm": 0.5233294367790222, "learning_rate": 9.43177403795587e-06, "loss": 0.1607, "step": 1654 }, { "epoch": 0.5362929358392742, "grad_norm": 0.48273828625679016, "learning_rate": 9.430963867949439e-06, "loss": 0.1498, "step": 1655 }, { "epoch": 0.5366169799092677, "grad_norm": 0.5318136811256409, "learning_rate": 9.430153155635926e-06, "loss": 0.1538, "step": 1656 }, { "epoch": 0.5369410239792611, "grad_norm": 0.5394309759140015, "learning_rate": 9.429341901114553e-06, "loss": 0.1619, "step": 1657 }, { "epoch": 0.5372650680492547, "grad_norm": 0.5881305932998657, "learning_rate": 9.428530104484612e-06, "loss": 0.1886, "step": 1658 }, { "epoch": 0.5375891121192482, "grad_norm": 0.5513939261436462, "learning_rate": 9.427717765845457e-06, "loss": 0.1633, "step": 1659 }, { "epoch": 0.5379131561892417, "grad_norm": 0.5329276919364929, "learning_rate": 9.42690488529651e-06, "loss": 0.1579, "step": 1660 }, { "epoch": 0.5382372002592353, "grad_norm": 0.5297683477401733, "learning_rate": 9.426091462937263e-06, "loss": 0.1556, "step": 1661 }, { "epoch": 0.5385612443292288, "grad_norm": 0.5444457530975342, "learning_rate": 9.425277498867267e-06, "loss": 0.1704, "step": 1662 }, { "epoch": 0.5388852883992223, "grad_norm": 0.5043376088142395, "learning_rate": 9.424462993186145e-06, "loss": 0.1413, "step": 1663 }, { "epoch": 0.5392093324692158, "grad_norm": 0.5424678921699524, "learning_rate": 9.423647945993586e-06, "loss": 0.1598, "step": 1664 }, { "epoch": 0.5395333765392093, "grad_norm": 0.5627645254135132, "learning_rate": 9.422832357389341e-06, "loss": 0.183, "step": 1665 }, { "epoch": 0.5398574206092028, "grad_norm": 0.5190801620483398, "learning_rate": 9.422016227473233e-06, "loss": 0.1809, "step": 1666 }, { "epoch": 0.5401814646791964, "grad_norm": 0.5414610505104065, "learning_rate": 9.42119955634515e-06, "loss": 0.157, "step": 1667 }, { "epoch": 0.5405055087491899, "grad_norm": 0.5154577493667603, "learning_rate": 9.420382344105037e-06, "loss": 0.1565, "step": 1668 }, { "epoch": 0.5408295528191834, "grad_norm": 0.5236411690711975, "learning_rate": 9.41956459085292e-06, "loss": 0.1492, "step": 1669 }, { "epoch": 0.541153596889177, "grad_norm": 0.5545842051506042, "learning_rate": 9.418746296688881e-06, "loss": 0.1688, "step": 1670 }, { "epoch": 0.5414776409591704, "grad_norm": 0.5425675511360168, "learning_rate": 9.417927461713073e-06, "loss": 0.1587, "step": 1671 }, { "epoch": 0.5418016850291639, "grad_norm": 0.5389112830162048, "learning_rate": 9.417108086025713e-06, "loss": 0.1582, "step": 1672 }, { "epoch": 0.5421257290991575, "grad_norm": 0.5404740571975708, "learning_rate": 9.416288169727082e-06, "loss": 0.1605, "step": 1673 }, { "epoch": 0.542449773169151, "grad_norm": 0.544076144695282, "learning_rate": 9.415467712917535e-06, "loss": 0.1533, "step": 1674 }, { "epoch": 0.5427738172391445, "grad_norm": 0.5358872413635254, "learning_rate": 9.414646715697482e-06, "loss": 0.1814, "step": 1675 }, { "epoch": 0.5430978613091381, "grad_norm": 0.5488872528076172, "learning_rate": 9.413825178167408e-06, "loss": 0.1644, "step": 1676 }, { "epoch": 0.5434219053791316, "grad_norm": 0.5466941595077515, "learning_rate": 9.413003100427864e-06, "loss": 0.1736, "step": 1677 }, { "epoch": 0.5437459494491251, "grad_norm": 0.531701385974884, "learning_rate": 9.41218048257946e-06, "loss": 0.1549, "step": 1678 }, { "epoch": 0.5440699935191186, "grad_norm": 0.561321496963501, "learning_rate": 9.411357324722879e-06, "loss": 0.1757, "step": 1679 }, { "epoch": 0.5443940375891121, "grad_norm": 0.5582754015922546, "learning_rate": 9.410533626958867e-06, "loss": 0.1611, "step": 1680 }, { "epoch": 0.5447180816591056, "grad_norm": 0.5068044066429138, "learning_rate": 9.409709389388234e-06, "loss": 0.1458, "step": 1681 }, { "epoch": 0.5450421257290992, "grad_norm": 0.5205656290054321, "learning_rate": 9.408884612111865e-06, "loss": 0.1569, "step": 1682 }, { "epoch": 0.5453661697990927, "grad_norm": 0.5100316405296326, "learning_rate": 9.408059295230696e-06, "loss": 0.1525, "step": 1683 }, { "epoch": 0.5456902138690862, "grad_norm": 0.5226548910140991, "learning_rate": 9.407233438845746e-06, "loss": 0.1601, "step": 1684 }, { "epoch": 0.5460142579390798, "grad_norm": 0.5404941439628601, "learning_rate": 9.406407043058087e-06, "loss": 0.168, "step": 1685 }, { "epoch": 0.5463383020090732, "grad_norm": 0.547059953212738, "learning_rate": 9.405580107968864e-06, "loss": 0.157, "step": 1686 }, { "epoch": 0.5466623460790667, "grad_norm": 0.5400663614273071, "learning_rate": 9.404752633679284e-06, "loss": 0.152, "step": 1687 }, { "epoch": 0.5469863901490603, "grad_norm": 0.5538695454597473, "learning_rate": 9.403924620290624e-06, "loss": 0.1645, "step": 1688 }, { "epoch": 0.5473104342190538, "grad_norm": 0.5280716419219971, "learning_rate": 9.403096067904223e-06, "loss": 0.1593, "step": 1689 }, { "epoch": 0.5476344782890473, "grad_norm": 0.5219088792800903, "learning_rate": 9.402266976621489e-06, "loss": 0.1514, "step": 1690 }, { "epoch": 0.5479585223590409, "grad_norm": 0.5405663251876831, "learning_rate": 9.401437346543893e-06, "loss": 0.1651, "step": 1691 }, { "epoch": 0.5482825664290344, "grad_norm": 0.48504170775413513, "learning_rate": 9.400607177772978e-06, "loss": 0.1427, "step": 1692 }, { "epoch": 0.5486066104990278, "grad_norm": 0.5304465293884277, "learning_rate": 9.399776470410344e-06, "loss": 0.1542, "step": 1693 }, { "epoch": 0.5489306545690213, "grad_norm": 0.551505446434021, "learning_rate": 9.398945224557662e-06, "loss": 0.1607, "step": 1694 }, { "epoch": 0.5492546986390149, "grad_norm": 0.5069938898086548, "learning_rate": 9.398113440316672e-06, "loss": 0.1725, "step": 1695 }, { "epoch": 0.5495787427090084, "grad_norm": 0.5524683594703674, "learning_rate": 9.397281117789173e-06, "loss": 0.1627, "step": 1696 }, { "epoch": 0.549902786779002, "grad_norm": 0.5232834815979004, "learning_rate": 9.396448257077034e-06, "loss": 0.1612, "step": 1697 }, { "epoch": 0.5502268308489955, "grad_norm": 0.5372471213340759, "learning_rate": 9.395614858282187e-06, "loss": 0.159, "step": 1698 }, { "epoch": 0.550550874918989, "grad_norm": 0.544275164604187, "learning_rate": 9.394780921506636e-06, "loss": 0.1579, "step": 1699 }, { "epoch": 0.5508749189889826, "grad_norm": 0.5532092452049255, "learning_rate": 9.393946446852447e-06, "loss": 0.1756, "step": 1700 }, { "epoch": 0.551198963058976, "grad_norm": 0.4750637710094452, "learning_rate": 9.393111434421747e-06, "loss": 0.1335, "step": 1701 }, { "epoch": 0.5515230071289695, "grad_norm": 0.517975389957428, "learning_rate": 9.392275884316737e-06, "loss": 0.1468, "step": 1702 }, { "epoch": 0.551847051198963, "grad_norm": 0.5643892288208008, "learning_rate": 9.391439796639679e-06, "loss": 0.1596, "step": 1703 }, { "epoch": 0.5521710952689566, "grad_norm": 0.5320152640342712, "learning_rate": 9.390603171492902e-06, "loss": 0.152, "step": 1704 }, { "epoch": 0.5524951393389501, "grad_norm": 0.5514765381813049, "learning_rate": 9.389766008978803e-06, "loss": 0.167, "step": 1705 }, { "epoch": 0.5528191834089436, "grad_norm": 0.5518473982810974, "learning_rate": 9.388928309199839e-06, "loss": 0.167, "step": 1706 }, { "epoch": 0.5531432274789372, "grad_norm": 0.5376887321472168, "learning_rate": 9.388090072258538e-06, "loss": 0.1582, "step": 1707 }, { "epoch": 0.5534672715489306, "grad_norm": 0.5329182147979736, "learning_rate": 9.387251298257492e-06, "loss": 0.1641, "step": 1708 }, { "epoch": 0.5537913156189241, "grad_norm": 0.5687870383262634, "learning_rate": 9.38641198729936e-06, "loss": 0.1604, "step": 1709 }, { "epoch": 0.5541153596889177, "grad_norm": 0.5174402594566345, "learning_rate": 9.385572139486864e-06, "loss": 0.1685, "step": 1710 }, { "epoch": 0.5544394037589112, "grad_norm": 0.5301437973976135, "learning_rate": 9.384731754922793e-06, "loss": 0.1549, "step": 1711 }, { "epoch": 0.5547634478289047, "grad_norm": 0.5403845310211182, "learning_rate": 9.383890833710004e-06, "loss": 0.1713, "step": 1712 }, { "epoch": 0.5550874918988983, "grad_norm": 0.5424365401268005, "learning_rate": 9.383049375951417e-06, "loss": 0.1661, "step": 1713 }, { "epoch": 0.5554115359688918, "grad_norm": 0.5190292596817017, "learning_rate": 9.382207381750015e-06, "loss": 0.159, "step": 1714 }, { "epoch": 0.5557355800388852, "grad_norm": 0.5062054991722107, "learning_rate": 9.381364851208855e-06, "loss": 0.1513, "step": 1715 }, { "epoch": 0.5560596241088788, "grad_norm": 0.535383403301239, "learning_rate": 9.38052178443105e-06, "loss": 0.1645, "step": 1716 }, { "epoch": 0.5563836681788723, "grad_norm": 0.534705638885498, "learning_rate": 9.379678181519787e-06, "loss": 0.1628, "step": 1717 }, { "epoch": 0.5567077122488658, "grad_norm": 0.524269700050354, "learning_rate": 9.378834042578314e-06, "loss": 0.165, "step": 1718 }, { "epoch": 0.5570317563188594, "grad_norm": 0.5700576901435852, "learning_rate": 9.37798936770994e-06, "loss": 0.1719, "step": 1719 }, { "epoch": 0.5573558003888529, "grad_norm": 0.5169806480407715, "learning_rate": 9.377144157018054e-06, "loss": 0.1573, "step": 1720 }, { "epoch": 0.5576798444588464, "grad_norm": 0.5423029661178589, "learning_rate": 9.376298410606096e-06, "loss": 0.1578, "step": 1721 }, { "epoch": 0.55800388852884, "grad_norm": 0.5279255509376526, "learning_rate": 9.375452128577578e-06, "loss": 0.1539, "step": 1722 }, { "epoch": 0.5583279325988334, "grad_norm": 0.5555424094200134, "learning_rate": 9.374605311036077e-06, "loss": 0.1632, "step": 1723 }, { "epoch": 0.5586519766688269, "grad_norm": 0.4893963634967804, "learning_rate": 9.373757958085237e-06, "loss": 0.1394, "step": 1724 }, { "epoch": 0.5589760207388205, "grad_norm": 0.5070914626121521, "learning_rate": 9.372910069828763e-06, "loss": 0.1571, "step": 1725 }, { "epoch": 0.559300064808814, "grad_norm": 0.5325096249580383, "learning_rate": 9.37206164637043e-06, "loss": 0.1566, "step": 1726 }, { "epoch": 0.5596241088788075, "grad_norm": 0.5089950561523438, "learning_rate": 9.371212687814076e-06, "loss": 0.1474, "step": 1727 }, { "epoch": 0.5599481529488011, "grad_norm": 0.5487457513809204, "learning_rate": 9.370363194263604e-06, "loss": 0.1818, "step": 1728 }, { "epoch": 0.5602721970187946, "grad_norm": 0.5325713753700256, "learning_rate": 9.369513165822987e-06, "loss": 0.1654, "step": 1729 }, { "epoch": 0.560596241088788, "grad_norm": 0.5347417593002319, "learning_rate": 9.368662602596259e-06, "loss": 0.1665, "step": 1730 }, { "epoch": 0.5609202851587816, "grad_norm": 0.5145535469055176, "learning_rate": 9.367811504687521e-06, "loss": 0.159, "step": 1731 }, { "epoch": 0.5612443292287751, "grad_norm": 0.5164404511451721, "learning_rate": 9.366959872200935e-06, "loss": 0.1641, "step": 1732 }, { "epoch": 0.5615683732987686, "grad_norm": 0.49573078751564026, "learning_rate": 9.36610770524074e-06, "loss": 0.1671, "step": 1733 }, { "epoch": 0.5618924173687622, "grad_norm": 0.5579914450645447, "learning_rate": 9.365255003911227e-06, "loss": 0.1592, "step": 1734 }, { "epoch": 0.5622164614387557, "grad_norm": 0.5098734498023987, "learning_rate": 9.364401768316762e-06, "loss": 0.1494, "step": 1735 }, { "epoch": 0.5625405055087492, "grad_norm": 0.5225284099578857, "learning_rate": 9.363547998561771e-06, "loss": 0.1491, "step": 1736 }, { "epoch": 0.5628645495787427, "grad_norm": 0.5344732999801636, "learning_rate": 9.362693694750747e-06, "loss": 0.1707, "step": 1737 }, { "epoch": 0.5631885936487362, "grad_norm": 0.5236510038375854, "learning_rate": 9.361838856988247e-06, "loss": 0.1617, "step": 1738 }, { "epoch": 0.5635126377187297, "grad_norm": 0.5190684199333191, "learning_rate": 9.360983485378899e-06, "loss": 0.1692, "step": 1739 }, { "epoch": 0.5638366817887233, "grad_norm": 0.5158076286315918, "learning_rate": 9.360127580027389e-06, "loss": 0.1555, "step": 1740 }, { "epoch": 0.5641607258587168, "grad_norm": 0.4986492395401001, "learning_rate": 9.359271141038473e-06, "loss": 0.14, "step": 1741 }, { "epoch": 0.5644847699287103, "grad_norm": 0.6440570950508118, "learning_rate": 9.358414168516971e-06, "loss": 0.1608, "step": 1742 }, { "epoch": 0.5648088139987039, "grad_norm": 0.5538783073425293, "learning_rate": 9.357556662567767e-06, "loss": 0.1755, "step": 1743 }, { "epoch": 0.5651328580686974, "grad_norm": 0.5434886813163757, "learning_rate": 9.35669862329581e-06, "loss": 0.1706, "step": 1744 }, { "epoch": 0.5654569021386908, "grad_norm": 0.5213068723678589, "learning_rate": 9.35584005080612e-06, "loss": 0.1529, "step": 1745 }, { "epoch": 0.5657809462086844, "grad_norm": 0.5256164073944092, "learning_rate": 9.354980945203776e-06, "loss": 0.1594, "step": 1746 }, { "epoch": 0.5661049902786779, "grad_norm": 0.5316346287727356, "learning_rate": 9.354121306593922e-06, "loss": 0.1512, "step": 1747 }, { "epoch": 0.5664290343486714, "grad_norm": 0.5344158411026001, "learning_rate": 9.353261135081773e-06, "loss": 0.1616, "step": 1748 }, { "epoch": 0.566753078418665, "grad_norm": 0.49767589569091797, "learning_rate": 9.3524004307726e-06, "loss": 0.1422, "step": 1749 }, { "epoch": 0.5670771224886585, "grad_norm": 0.4954655170440674, "learning_rate": 9.351539193771753e-06, "loss": 0.1523, "step": 1750 }, { "epoch": 0.567401166558652, "grad_norm": 0.5749108195304871, "learning_rate": 9.350677424184632e-06, "loss": 0.1658, "step": 1751 }, { "epoch": 0.5677252106286454, "grad_norm": 0.5306864976882935, "learning_rate": 9.349815122116715e-06, "loss": 0.166, "step": 1752 }, { "epoch": 0.568049254698639, "grad_norm": 0.4998403787612915, "learning_rate": 9.348952287673536e-06, "loss": 0.1451, "step": 1753 }, { "epoch": 0.5683732987686325, "grad_norm": 0.5465645790100098, "learning_rate": 9.348088920960695e-06, "loss": 0.156, "step": 1754 }, { "epoch": 0.568697342838626, "grad_norm": 0.550026535987854, "learning_rate": 9.347225022083866e-06, "loss": 0.1664, "step": 1755 }, { "epoch": 0.5690213869086196, "grad_norm": 0.5608965754508972, "learning_rate": 9.346360591148778e-06, "loss": 0.1697, "step": 1756 }, { "epoch": 0.5693454309786131, "grad_norm": 0.5068145990371704, "learning_rate": 9.34549562826123e-06, "loss": 0.1487, "step": 1757 }, { "epoch": 0.5696694750486067, "grad_norm": 0.5745015144348145, "learning_rate": 9.344630133527084e-06, "loss": 0.1769, "step": 1758 }, { "epoch": 0.5699935191186001, "grad_norm": 0.521942138671875, "learning_rate": 9.34376410705227e-06, "loss": 0.1518, "step": 1759 }, { "epoch": 0.5703175631885936, "grad_norm": 0.49625447392463684, "learning_rate": 9.342897548942778e-06, "loss": 0.15, "step": 1760 }, { "epoch": 0.5706416072585871, "grad_norm": 0.5072271227836609, "learning_rate": 9.34203045930467e-06, "loss": 0.1461, "step": 1761 }, { "epoch": 0.5709656513285807, "grad_norm": 0.5093204379081726, "learning_rate": 9.341162838244068e-06, "loss": 0.1686, "step": 1762 }, { "epoch": 0.5712896953985742, "grad_norm": 0.5371447205543518, "learning_rate": 9.34029468586716e-06, "loss": 0.1585, "step": 1763 }, { "epoch": 0.5716137394685677, "grad_norm": 0.5677884817123413, "learning_rate": 9.3394260022802e-06, "loss": 0.167, "step": 1764 }, { "epoch": 0.5719377835385613, "grad_norm": 0.4986721873283386, "learning_rate": 9.338556787589505e-06, "loss": 0.1506, "step": 1765 }, { "epoch": 0.5722618276085548, "grad_norm": 0.5552408695220947, "learning_rate": 9.337687041901461e-06, "loss": 0.1626, "step": 1766 }, { "epoch": 0.5725858716785482, "grad_norm": 0.5374127626419067, "learning_rate": 9.336816765322514e-06, "loss": 0.157, "step": 1767 }, { "epoch": 0.5729099157485418, "grad_norm": 0.5247153043746948, "learning_rate": 9.33594595795918e-06, "loss": 0.149, "step": 1768 }, { "epoch": 0.5732339598185353, "grad_norm": 0.5264690518379211, "learning_rate": 9.335074619918036e-06, "loss": 0.1611, "step": 1769 }, { "epoch": 0.5735580038885288, "grad_norm": 0.5231035947799683, "learning_rate": 9.334202751305724e-06, "loss": 0.1603, "step": 1770 }, { "epoch": 0.5738820479585224, "grad_norm": 0.5216938257217407, "learning_rate": 9.333330352228954e-06, "loss": 0.1588, "step": 1771 }, { "epoch": 0.5742060920285159, "grad_norm": 0.5302786231040955, "learning_rate": 9.332457422794498e-06, "loss": 0.1582, "step": 1772 }, { "epoch": 0.5745301360985094, "grad_norm": 0.521327018737793, "learning_rate": 9.331583963109196e-06, "loss": 0.1636, "step": 1773 }, { "epoch": 0.5748541801685029, "grad_norm": 0.5170415639877319, "learning_rate": 9.33070997327995e-06, "loss": 0.1558, "step": 1774 }, { "epoch": 0.5751782242384964, "grad_norm": 0.5299942493438721, "learning_rate": 9.329835453413729e-06, "loss": 0.1612, "step": 1775 }, { "epoch": 0.5755022683084899, "grad_norm": 0.4877796173095703, "learning_rate": 9.328960403617561e-06, "loss": 0.1353, "step": 1776 }, { "epoch": 0.5758263123784835, "grad_norm": 0.5184668302536011, "learning_rate": 9.328084823998551e-06, "loss": 0.1578, "step": 1777 }, { "epoch": 0.576150356448477, "grad_norm": 0.5334933400154114, "learning_rate": 9.327208714663856e-06, "loss": 0.1631, "step": 1778 }, { "epoch": 0.5764744005184705, "grad_norm": 0.5227888822555542, "learning_rate": 9.326332075720705e-06, "loss": 0.1626, "step": 1779 }, { "epoch": 0.5767984445884641, "grad_norm": 0.533147394657135, "learning_rate": 9.32545490727639e-06, "loss": 0.1652, "step": 1780 }, { "epoch": 0.5771224886584575, "grad_norm": 0.49876242876052856, "learning_rate": 9.324577209438269e-06, "loss": 0.1443, "step": 1781 }, { "epoch": 0.577446532728451, "grad_norm": 0.5057281255722046, "learning_rate": 9.32369898231376e-06, "loss": 0.1436, "step": 1782 }, { "epoch": 0.5777705767984446, "grad_norm": 0.5506377816200256, "learning_rate": 9.322820226010354e-06, "loss": 0.1626, "step": 1783 }, { "epoch": 0.5780946208684381, "grad_norm": 0.5420976877212524, "learning_rate": 9.3219409406356e-06, "loss": 0.1562, "step": 1784 }, { "epoch": 0.5784186649384316, "grad_norm": 0.5418969988822937, "learning_rate": 9.321061126297115e-06, "loss": 0.1616, "step": 1785 }, { "epoch": 0.5787427090084252, "grad_norm": 0.5089748501777649, "learning_rate": 9.32018078310258e-06, "loss": 0.1483, "step": 1786 }, { "epoch": 0.5790667530784187, "grad_norm": 0.5087575316429138, "learning_rate": 9.319299911159738e-06, "loss": 0.1396, "step": 1787 }, { "epoch": 0.5793907971484121, "grad_norm": 0.537186324596405, "learning_rate": 9.318418510576402e-06, "loss": 0.1569, "step": 1788 }, { "epoch": 0.5797148412184057, "grad_norm": 0.5755051970481873, "learning_rate": 9.317536581460444e-06, "loss": 0.1531, "step": 1789 }, { "epoch": 0.5800388852883992, "grad_norm": 0.5934356451034546, "learning_rate": 9.316654123919808e-06, "loss": 0.1786, "step": 1790 }, { "epoch": 0.5803629293583927, "grad_norm": 0.5637712478637695, "learning_rate": 9.315771138062495e-06, "loss": 0.1699, "step": 1791 }, { "epoch": 0.5806869734283863, "grad_norm": 0.5092173218727112, "learning_rate": 9.314887623996574e-06, "loss": 0.1481, "step": 1792 }, { "epoch": 0.5810110174983798, "grad_norm": 0.5008086562156677, "learning_rate": 9.31400358183018e-06, "loss": 0.1502, "step": 1793 }, { "epoch": 0.5813350615683733, "grad_norm": 0.5082639455795288, "learning_rate": 9.31311901167151e-06, "loss": 0.1388, "step": 1794 }, { "epoch": 0.5816591056383669, "grad_norm": 0.5039761662483215, "learning_rate": 9.312233913628828e-06, "loss": 0.153, "step": 1795 }, { "epoch": 0.5819831497083603, "grad_norm": 0.4801679849624634, "learning_rate": 9.311348287810459e-06, "loss": 0.149, "step": 1796 }, { "epoch": 0.5823071937783538, "grad_norm": 0.5526052117347717, "learning_rate": 9.310462134324797e-06, "loss": 0.1686, "step": 1797 }, { "epoch": 0.5826312378483474, "grad_norm": 0.4989832937717438, "learning_rate": 9.3095754532803e-06, "loss": 0.1435, "step": 1798 }, { "epoch": 0.5829552819183409, "grad_norm": 0.44254204630851746, "learning_rate": 9.308688244785485e-06, "loss": 0.1336, "step": 1799 }, { "epoch": 0.5832793259883344, "grad_norm": 0.5272495746612549, "learning_rate": 9.307800508948941e-06, "loss": 0.1493, "step": 1800 }, { "epoch": 0.583603370058328, "grad_norm": 0.4973255693912506, "learning_rate": 9.306912245879318e-06, "loss": 0.1436, "step": 1801 }, { "epoch": 0.5839274141283215, "grad_norm": 0.5235045552253723, "learning_rate": 9.30602345568533e-06, "loss": 0.1529, "step": 1802 }, { "epoch": 0.5842514581983149, "grad_norm": 0.48296064138412476, "learning_rate": 9.305134138475755e-06, "loss": 0.1378, "step": 1803 }, { "epoch": 0.5845755022683085, "grad_norm": 0.5345264077186584, "learning_rate": 9.304244294359442e-06, "loss": 0.1631, "step": 1804 }, { "epoch": 0.584899546338302, "grad_norm": 0.5436072945594788, "learning_rate": 9.303353923445293e-06, "loss": 0.1476, "step": 1805 }, { "epoch": 0.5852235904082955, "grad_norm": 0.5196661353111267, "learning_rate": 9.302463025842284e-06, "loss": 0.1645, "step": 1806 }, { "epoch": 0.5855476344782891, "grad_norm": 0.541592001914978, "learning_rate": 9.301571601659452e-06, "loss": 0.159, "step": 1807 }, { "epoch": 0.5858716785482826, "grad_norm": 0.5426565408706665, "learning_rate": 9.300679651005898e-06, "loss": 0.1677, "step": 1808 }, { "epoch": 0.5861957226182761, "grad_norm": 0.5050917267799377, "learning_rate": 9.299787173990789e-06, "loss": 0.1487, "step": 1809 }, { "epoch": 0.5865197666882696, "grad_norm": 0.48838865756988525, "learning_rate": 9.298894170723353e-06, "loss": 0.1498, "step": 1810 }, { "epoch": 0.5868438107582631, "grad_norm": 0.5278536081314087, "learning_rate": 9.29800064131289e-06, "loss": 0.1651, "step": 1811 }, { "epoch": 0.5871678548282566, "grad_norm": 0.5049905180931091, "learning_rate": 9.297106585868753e-06, "loss": 0.1485, "step": 1812 }, { "epoch": 0.5874918988982502, "grad_norm": 0.5237796902656555, "learning_rate": 9.296212004500373e-06, "loss": 0.1678, "step": 1813 }, { "epoch": 0.5878159429682437, "grad_norm": 0.5402454137802124, "learning_rate": 9.295316897317232e-06, "loss": 0.1638, "step": 1814 }, { "epoch": 0.5881399870382372, "grad_norm": 0.5265448689460754, "learning_rate": 9.294421264428886e-06, "loss": 0.1581, "step": 1815 }, { "epoch": 0.5884640311082308, "grad_norm": 0.4858977794647217, "learning_rate": 9.29352510594495e-06, "loss": 0.1521, "step": 1816 }, { "epoch": 0.5887880751782243, "grad_norm": 0.5320647358894348, "learning_rate": 9.292628421975104e-06, "loss": 0.1517, "step": 1817 }, { "epoch": 0.5891121192482177, "grad_norm": 0.5612199902534485, "learning_rate": 9.291731212629096e-06, "loss": 0.1662, "step": 1818 }, { "epoch": 0.5894361633182112, "grad_norm": 0.557144820690155, "learning_rate": 9.290833478016735e-06, "loss": 0.1622, "step": 1819 }, { "epoch": 0.5897602073882048, "grad_norm": 0.5538755059242249, "learning_rate": 9.289935218247895e-06, "loss": 0.1526, "step": 1820 }, { "epoch": 0.5900842514581983, "grad_norm": 0.5562155246734619, "learning_rate": 9.289036433432513e-06, "loss": 0.1676, "step": 1821 }, { "epoch": 0.5904082955281919, "grad_norm": 0.5501015782356262, "learning_rate": 9.288137123680595e-06, "loss": 0.1635, "step": 1822 }, { "epoch": 0.5907323395981854, "grad_norm": 0.5297316312789917, "learning_rate": 9.287237289102202e-06, "loss": 0.1649, "step": 1823 }, { "epoch": 0.5910563836681789, "grad_norm": 0.4975319504737854, "learning_rate": 9.286336929807471e-06, "loss": 0.1551, "step": 1824 }, { "epoch": 0.5913804277381723, "grad_norm": 0.5168135166168213, "learning_rate": 9.285436045906593e-06, "loss": 0.1526, "step": 1825 }, { "epoch": 0.5917044718081659, "grad_norm": 0.5180168747901917, "learning_rate": 9.28453463750983e-06, "loss": 0.1674, "step": 1826 }, { "epoch": 0.5920285158781594, "grad_norm": 0.5400831699371338, "learning_rate": 9.283632704727507e-06, "loss": 0.162, "step": 1827 }, { "epoch": 0.592352559948153, "grad_norm": 0.5392692685127258, "learning_rate": 9.282730247670008e-06, "loss": 0.1572, "step": 1828 }, { "epoch": 0.5926766040181465, "grad_norm": 0.5672776103019714, "learning_rate": 9.281827266447787e-06, "loss": 0.1712, "step": 1829 }, { "epoch": 0.59300064808814, "grad_norm": 0.522743821144104, "learning_rate": 9.28092376117136e-06, "loss": 0.1572, "step": 1830 }, { "epoch": 0.5933246921581335, "grad_norm": 0.5244477987289429, "learning_rate": 9.280019731951305e-06, "loss": 0.1672, "step": 1831 }, { "epoch": 0.593648736228127, "grad_norm": 0.505828320980072, "learning_rate": 9.27911517889827e-06, "loss": 0.1542, "step": 1832 }, { "epoch": 0.5939727802981205, "grad_norm": 0.4986834228038788, "learning_rate": 9.278210102122962e-06, "loss": 0.1493, "step": 1833 }, { "epoch": 0.594296824368114, "grad_norm": 0.5014796257019043, "learning_rate": 9.277304501736156e-06, "loss": 0.1544, "step": 1834 }, { "epoch": 0.5946208684381076, "grad_norm": 0.5235145092010498, "learning_rate": 9.276398377848683e-06, "loss": 0.1587, "step": 1835 }, { "epoch": 0.5949449125081011, "grad_norm": 0.5145488977432251, "learning_rate": 9.27549173057145e-06, "loss": 0.1591, "step": 1836 }, { "epoch": 0.5952689565780946, "grad_norm": 0.5658987760543823, "learning_rate": 9.274584560015419e-06, "loss": 0.1698, "step": 1837 }, { "epoch": 0.5955930006480882, "grad_norm": 0.5141145586967468, "learning_rate": 9.273676866291617e-06, "loss": 0.1467, "step": 1838 }, { "epoch": 0.5959170447180817, "grad_norm": 0.5158601403236389, "learning_rate": 9.27276864951114e-06, "loss": 0.1618, "step": 1839 }, { "epoch": 0.5962410887880751, "grad_norm": 0.5314555764198303, "learning_rate": 9.271859909785144e-06, "loss": 0.1599, "step": 1840 }, { "epoch": 0.5965651328580687, "grad_norm": 0.4693741202354431, "learning_rate": 9.270950647224851e-06, "loss": 0.1418, "step": 1841 }, { "epoch": 0.5968891769280622, "grad_norm": 0.5171167850494385, "learning_rate": 9.270040861941542e-06, "loss": 0.152, "step": 1842 }, { "epoch": 0.5972132209980557, "grad_norm": 0.5105248093605042, "learning_rate": 9.269130554046571e-06, "loss": 0.1562, "step": 1843 }, { "epoch": 0.5975372650680493, "grad_norm": 0.5515754222869873, "learning_rate": 9.268219723651349e-06, "loss": 0.1575, "step": 1844 }, { "epoch": 0.5978613091380428, "grad_norm": 0.47018948197364807, "learning_rate": 9.267308370867352e-06, "loss": 0.1364, "step": 1845 }, { "epoch": 0.5981853532080363, "grad_norm": 0.5275624394416809, "learning_rate": 9.26639649580612e-06, "loss": 0.1698, "step": 1846 }, { "epoch": 0.5985093972780298, "grad_norm": 0.5570939183235168, "learning_rate": 9.265484098579259e-06, "loss": 0.1586, "step": 1847 }, { "epoch": 0.5988334413480233, "grad_norm": 0.5090274810791016, "learning_rate": 9.264571179298438e-06, "loss": 0.1477, "step": 1848 }, { "epoch": 0.5991574854180168, "grad_norm": 0.5754071474075317, "learning_rate": 9.263657738075387e-06, "loss": 0.1657, "step": 1849 }, { "epoch": 0.5994815294880104, "grad_norm": 0.5546247363090515, "learning_rate": 9.262743775021907e-06, "loss": 0.1523, "step": 1850 }, { "epoch": 0.5998055735580039, "grad_norm": 0.519078254699707, "learning_rate": 9.261829290249855e-06, "loss": 0.1658, "step": 1851 }, { "epoch": 0.6001296176279974, "grad_norm": 0.5111896991729736, "learning_rate": 9.260914283871154e-06, "loss": 0.1628, "step": 1852 }, { "epoch": 0.600453661697991, "grad_norm": 0.5172817707061768, "learning_rate": 9.259998755997796e-06, "loss": 0.1494, "step": 1853 }, { "epoch": 0.6007777057679844, "grad_norm": 0.5435042977333069, "learning_rate": 9.259082706741828e-06, "loss": 0.1839, "step": 1854 }, { "epoch": 0.6011017498379779, "grad_norm": 0.5118913054466248, "learning_rate": 9.258166136215369e-06, "loss": 0.1489, "step": 1855 }, { "epoch": 0.6014257939079715, "grad_norm": 0.5078453421592712, "learning_rate": 9.257249044530596e-06, "loss": 0.1555, "step": 1856 }, { "epoch": 0.601749837977965, "grad_norm": 0.49562156200408936, "learning_rate": 9.256331431799754e-06, "loss": 0.1619, "step": 1857 }, { "epoch": 0.6020738820479585, "grad_norm": 0.5592288970947266, "learning_rate": 9.25541329813515e-06, "loss": 0.1618, "step": 1858 }, { "epoch": 0.6023979261179521, "grad_norm": 0.5321934819221497, "learning_rate": 9.254494643649152e-06, "loss": 0.1641, "step": 1859 }, { "epoch": 0.6027219701879456, "grad_norm": 0.4995349645614624, "learning_rate": 9.2535754684542e-06, "loss": 0.1504, "step": 1860 }, { "epoch": 0.6030460142579391, "grad_norm": 0.5189761519432068, "learning_rate": 9.252655772662784e-06, "loss": 0.1414, "step": 1861 }, { "epoch": 0.6033700583279326, "grad_norm": 0.5353453755378723, "learning_rate": 9.251735556387473e-06, "loss": 0.1696, "step": 1862 }, { "epoch": 0.6036941023979261, "grad_norm": 0.49074551463127136, "learning_rate": 9.250814819740888e-06, "loss": 0.1524, "step": 1863 }, { "epoch": 0.6040181464679196, "grad_norm": 0.46947991847991943, "learning_rate": 9.249893562835723e-06, "loss": 0.1413, "step": 1864 }, { "epoch": 0.6043421905379132, "grad_norm": 0.49254897236824036, "learning_rate": 9.248971785784726e-06, "loss": 0.1457, "step": 1865 }, { "epoch": 0.6046662346079067, "grad_norm": 0.5511367917060852, "learning_rate": 9.248049488700717e-06, "loss": 0.1761, "step": 1866 }, { "epoch": 0.6049902786779002, "grad_norm": 0.5265299677848816, "learning_rate": 9.247126671696573e-06, "loss": 0.1541, "step": 1867 }, { "epoch": 0.6053143227478938, "grad_norm": 0.5218799114227295, "learning_rate": 9.24620333488524e-06, "loss": 0.162, "step": 1868 }, { "epoch": 0.6056383668178872, "grad_norm": 0.4817226827144623, "learning_rate": 9.245279478379726e-06, "loss": 0.1429, "step": 1869 }, { "epoch": 0.6059624108878807, "grad_norm": 0.5035088062286377, "learning_rate": 9.2443551022931e-06, "loss": 0.1436, "step": 1870 }, { "epoch": 0.6062864549578743, "grad_norm": 0.5474444627761841, "learning_rate": 9.2434302067385e-06, "loss": 0.1656, "step": 1871 }, { "epoch": 0.6066104990278678, "grad_norm": 0.5090144872665405, "learning_rate": 9.242504791829123e-06, "loss": 0.1614, "step": 1872 }, { "epoch": 0.6069345430978613, "grad_norm": 0.6202980279922485, "learning_rate": 9.241578857678228e-06, "loss": 0.1633, "step": 1873 }, { "epoch": 0.6072585871678549, "grad_norm": 0.480751097202301, "learning_rate": 9.240652404399145e-06, "loss": 0.1439, "step": 1874 }, { "epoch": 0.6075826312378484, "grad_norm": 0.5152072310447693, "learning_rate": 9.239725432105258e-06, "loss": 0.1493, "step": 1875 }, { "epoch": 0.6079066753078418, "grad_norm": 0.5336874723434448, "learning_rate": 9.238797940910021e-06, "loss": 0.1649, "step": 1876 }, { "epoch": 0.6082307193778353, "grad_norm": 0.5547704696655273, "learning_rate": 9.237869930926953e-06, "loss": 0.1666, "step": 1877 }, { "epoch": 0.6085547634478289, "grad_norm": 0.5038400292396545, "learning_rate": 9.23694140226963e-06, "loss": 0.1526, "step": 1878 }, { "epoch": 0.6088788075178224, "grad_norm": 0.5584294199943542, "learning_rate": 9.236012355051697e-06, "loss": 0.1708, "step": 1879 }, { "epoch": 0.609202851587816, "grad_norm": 0.4911188781261444, "learning_rate": 9.23508278938686e-06, "loss": 0.1412, "step": 1880 }, { "epoch": 0.6095268956578095, "grad_norm": 0.5308417081832886, "learning_rate": 9.234152705388885e-06, "loss": 0.1554, "step": 1881 }, { "epoch": 0.609850939727803, "grad_norm": 0.5310256481170654, "learning_rate": 9.233222103171612e-06, "loss": 0.1553, "step": 1882 }, { "epoch": 0.6101749837977966, "grad_norm": 0.5036491751670837, "learning_rate": 9.232290982848933e-06, "loss": 0.1382, "step": 1883 }, { "epoch": 0.61049902786779, "grad_norm": 0.4999232888221741, "learning_rate": 9.23135934453481e-06, "loss": 0.1558, "step": 1884 }, { "epoch": 0.6108230719377835, "grad_norm": 0.4958760440349579, "learning_rate": 9.230427188343266e-06, "loss": 0.1469, "step": 1885 }, { "epoch": 0.611147116007777, "grad_norm": 0.5068649053573608, "learning_rate": 9.229494514388388e-06, "loss": 0.1408, "step": 1886 }, { "epoch": 0.6114711600777706, "grad_norm": 0.5678321719169617, "learning_rate": 9.228561322784326e-06, "loss": 0.172, "step": 1887 }, { "epoch": 0.6117952041477641, "grad_norm": 0.527591347694397, "learning_rate": 9.227627613645294e-06, "loss": 0.1642, "step": 1888 }, { "epoch": 0.6121192482177576, "grad_norm": 0.4956148862838745, "learning_rate": 9.226693387085568e-06, "loss": 0.1551, "step": 1889 }, { "epoch": 0.6124432922877512, "grad_norm": 0.4847855269908905, "learning_rate": 9.225758643219489e-06, "loss": 0.1374, "step": 1890 }, { "epoch": 0.6127673363577446, "grad_norm": 0.505673348903656, "learning_rate": 9.22482338216146e-06, "loss": 0.1498, "step": 1891 }, { "epoch": 0.6130913804277381, "grad_norm": 0.49389687180519104, "learning_rate": 9.22388760402595e-06, "loss": 0.1405, "step": 1892 }, { "epoch": 0.6134154244977317, "grad_norm": 0.48019102215766907, "learning_rate": 9.222951308927485e-06, "loss": 0.1413, "step": 1893 }, { "epoch": 0.6137394685677252, "grad_norm": 0.5360737442970276, "learning_rate": 9.222014496980665e-06, "loss": 0.1479, "step": 1894 }, { "epoch": 0.6140635126377187, "grad_norm": 0.5291153192520142, "learning_rate": 9.221077168300142e-06, "loss": 0.1649, "step": 1895 }, { "epoch": 0.6143875567077123, "grad_norm": 0.5272452235221863, "learning_rate": 9.220139323000634e-06, "loss": 0.156, "step": 1896 }, { "epoch": 0.6147116007777058, "grad_norm": 0.5477816462516785, "learning_rate": 9.219200961196929e-06, "loss": 0.166, "step": 1897 }, { "epoch": 0.6150356448476992, "grad_norm": 0.522372841835022, "learning_rate": 9.218262083003871e-06, "loss": 0.1643, "step": 1898 }, { "epoch": 0.6153596889176928, "grad_norm": 0.5748099088668823, "learning_rate": 9.21732268853637e-06, "loss": 0.1773, "step": 1899 }, { "epoch": 0.6156837329876863, "grad_norm": 0.4743006229400635, "learning_rate": 9.216382777909398e-06, "loss": 0.1358, "step": 1900 }, { "epoch": 0.6160077770576798, "grad_norm": 0.507614254951477, "learning_rate": 9.215442351237993e-06, "loss": 0.149, "step": 1901 }, { "epoch": 0.6163318211276734, "grad_norm": 0.5415900945663452, "learning_rate": 9.214501408637253e-06, "loss": 0.1543, "step": 1902 }, { "epoch": 0.6166558651976669, "grad_norm": 0.5349360108375549, "learning_rate": 9.21355995022234e-06, "loss": 0.1611, "step": 1903 }, { "epoch": 0.6169799092676604, "grad_norm": 0.5085405111312866, "learning_rate": 9.212617976108478e-06, "loss": 0.153, "step": 1904 }, { "epoch": 0.6173039533376539, "grad_norm": 0.537108302116394, "learning_rate": 9.211675486410959e-06, "loss": 0.1559, "step": 1905 }, { "epoch": 0.6176279974076474, "grad_norm": 0.503128170967102, "learning_rate": 9.21073248124513e-06, "loss": 0.1531, "step": 1906 }, { "epoch": 0.6179520414776409, "grad_norm": 0.5284308791160583, "learning_rate": 9.20978896072641e-06, "loss": 0.1648, "step": 1907 }, { "epoch": 0.6182760855476345, "grad_norm": 0.5065122246742249, "learning_rate": 9.208844924970276e-06, "loss": 0.1551, "step": 1908 }, { "epoch": 0.618600129617628, "grad_norm": 0.5227933526039124, "learning_rate": 9.207900374092268e-06, "loss": 0.1589, "step": 1909 }, { "epoch": 0.6189241736876215, "grad_norm": 0.5541921257972717, "learning_rate": 9.206955308207988e-06, "loss": 0.1717, "step": 1910 }, { "epoch": 0.6192482177576151, "grad_norm": 0.5353711843490601, "learning_rate": 9.206009727433106e-06, "loss": 0.1744, "step": 1911 }, { "epoch": 0.6195722618276086, "grad_norm": 0.4900938868522644, "learning_rate": 9.205063631883351e-06, "loss": 0.1454, "step": 1912 }, { "epoch": 0.619896305897602, "grad_norm": 0.5369064211845398, "learning_rate": 9.204117021674515e-06, "loss": 0.1573, "step": 1913 }, { "epoch": 0.6202203499675956, "grad_norm": 0.5292285680770874, "learning_rate": 9.203169896922453e-06, "loss": 0.1614, "step": 1914 }, { "epoch": 0.6205443940375891, "grad_norm": 0.5318422317504883, "learning_rate": 9.202222257743088e-06, "loss": 0.165, "step": 1915 }, { "epoch": 0.6208684381075826, "grad_norm": 0.5160491466522217, "learning_rate": 9.201274104252398e-06, "loss": 0.1462, "step": 1916 }, { "epoch": 0.6211924821775762, "grad_norm": 0.5053558349609375, "learning_rate": 9.20032543656643e-06, "loss": 0.1485, "step": 1917 }, { "epoch": 0.6215165262475697, "grad_norm": 0.5057937502861023, "learning_rate": 9.19937625480129e-06, "loss": 0.1559, "step": 1918 }, { "epoch": 0.6218405703175632, "grad_norm": 0.5348033905029297, "learning_rate": 9.19842655907315e-06, "loss": 0.1707, "step": 1919 }, { "epoch": 0.6221646143875567, "grad_norm": 0.5293624997138977, "learning_rate": 9.197476349498243e-06, "loss": 0.1629, "step": 1920 }, { "epoch": 0.6224886584575502, "grad_norm": 0.5092240571975708, "learning_rate": 9.196525626192865e-06, "loss": 0.1699, "step": 1921 }, { "epoch": 0.6228127025275437, "grad_norm": 0.5243478417396545, "learning_rate": 9.195574389273375e-06, "loss": 0.1565, "step": 1922 }, { "epoch": 0.6231367465975373, "grad_norm": 0.5151318907737732, "learning_rate": 9.194622638856198e-06, "loss": 0.1464, "step": 1923 }, { "epoch": 0.6234607906675308, "grad_norm": 0.5310802459716797, "learning_rate": 9.193670375057816e-06, "loss": 0.1573, "step": 1924 }, { "epoch": 0.6237848347375243, "grad_norm": 0.5187256336212158, "learning_rate": 9.19271759799478e-06, "loss": 0.1602, "step": 1925 }, { "epoch": 0.6241088788075179, "grad_norm": 0.48503974080085754, "learning_rate": 9.191764307783698e-06, "loss": 0.1418, "step": 1926 }, { "epoch": 0.6244329228775113, "grad_norm": 0.48676154017448425, "learning_rate": 9.190810504541244e-06, "loss": 0.1403, "step": 1927 }, { "epoch": 0.6247569669475048, "grad_norm": 0.529976487159729, "learning_rate": 9.189856188384152e-06, "loss": 0.1638, "step": 1928 }, { "epoch": 0.6250810110174984, "grad_norm": 0.5011406540870667, "learning_rate": 9.188901359429226e-06, "loss": 0.1522, "step": 1929 }, { "epoch": 0.6254050550874919, "grad_norm": 0.4782002866268158, "learning_rate": 9.187946017793324e-06, "loss": 0.1445, "step": 1930 }, { "epoch": 0.6257290991574854, "grad_norm": 0.48546773195266724, "learning_rate": 9.186990163593371e-06, "loss": 0.1413, "step": 1931 }, { "epoch": 0.626053143227479, "grad_norm": 0.5287179946899414, "learning_rate": 9.18603379694636e-06, "loss": 0.1532, "step": 1932 }, { "epoch": 0.6263771872974725, "grad_norm": 0.5079996585845947, "learning_rate": 9.185076917969331e-06, "loss": 0.1398, "step": 1933 }, { "epoch": 0.626701231367466, "grad_norm": 0.5039437413215637, "learning_rate": 9.184119526779403e-06, "loss": 0.157, "step": 1934 }, { "epoch": 0.6270252754374595, "grad_norm": 0.5134934782981873, "learning_rate": 9.183161623493753e-06, "loss": 0.1481, "step": 1935 }, { "epoch": 0.627349319507453, "grad_norm": 0.5130413770675659, "learning_rate": 9.182203208229614e-06, "loss": 0.1609, "step": 1936 }, { "epoch": 0.6276733635774465, "grad_norm": 0.5762905478477478, "learning_rate": 9.181244281104289e-06, "loss": 0.1708, "step": 1937 }, { "epoch": 0.62799740764744, "grad_norm": 0.5197169780731201, "learning_rate": 9.180284842235143e-06, "loss": 0.1631, "step": 1938 }, { "epoch": 0.6283214517174336, "grad_norm": 0.5109285116195679, "learning_rate": 9.1793248917396e-06, "loss": 0.1493, "step": 1939 }, { "epoch": 0.6286454957874271, "grad_norm": 0.5167864561080933, "learning_rate": 9.178364429735149e-06, "loss": 0.1513, "step": 1940 }, { "epoch": 0.6289695398574207, "grad_norm": 0.5290215611457825, "learning_rate": 9.177403456339342e-06, "loss": 0.1447, "step": 1941 }, { "epoch": 0.6292935839274141, "grad_norm": 0.5334181785583496, "learning_rate": 9.176441971669791e-06, "loss": 0.1597, "step": 1942 }, { "epoch": 0.6296176279974076, "grad_norm": 0.5249089002609253, "learning_rate": 9.175479975844175e-06, "loss": 0.1572, "step": 1943 }, { "epoch": 0.6299416720674011, "grad_norm": 0.5331795811653137, "learning_rate": 9.17451746898023e-06, "loss": 0.1602, "step": 1944 }, { "epoch": 0.6302657161373947, "grad_norm": 0.4850247800350189, "learning_rate": 9.173554451195763e-06, "loss": 0.1398, "step": 1945 }, { "epoch": 0.6305897602073882, "grad_norm": 0.5020791888237, "learning_rate": 9.17259092260863e-06, "loss": 0.1494, "step": 1946 }, { "epoch": 0.6309138042773818, "grad_norm": 0.5400851964950562, "learning_rate": 9.171626883336766e-06, "loss": 0.17, "step": 1947 }, { "epoch": 0.6312378483473753, "grad_norm": 0.4870317280292511, "learning_rate": 9.170662333498153e-06, "loss": 0.147, "step": 1948 }, { "epoch": 0.6315618924173687, "grad_norm": 0.5228290557861328, "learning_rate": 9.169697273210846e-06, "loss": 0.1528, "step": 1949 }, { "epoch": 0.6318859364873622, "grad_norm": 0.48440635204315186, "learning_rate": 9.16873170259296e-06, "loss": 0.1442, "step": 1950 }, { "epoch": 0.6322099805573558, "grad_norm": 0.533446729183197, "learning_rate": 9.167765621762668e-06, "loss": 0.1524, "step": 1951 }, { "epoch": 0.6325340246273493, "grad_norm": 0.5196256041526794, "learning_rate": 9.166799030838212e-06, "loss": 0.1628, "step": 1952 }, { "epoch": 0.6328580686973428, "grad_norm": 0.5105645060539246, "learning_rate": 9.165831929937892e-06, "loss": 0.1429, "step": 1953 }, { "epoch": 0.6331821127673364, "grad_norm": 0.5194631218910217, "learning_rate": 9.164864319180074e-06, "loss": 0.1642, "step": 1954 }, { "epoch": 0.6335061568373299, "grad_norm": 0.5209739804267883, "learning_rate": 9.16389619868318e-06, "loss": 0.1742, "step": 1955 }, { "epoch": 0.6338302009073234, "grad_norm": 0.5078026056289673, "learning_rate": 9.162927568565701e-06, "loss": 0.1484, "step": 1956 }, { "epoch": 0.6341542449773169, "grad_norm": 0.4925689399242401, "learning_rate": 9.16195842894619e-06, "loss": 0.1509, "step": 1957 }, { "epoch": 0.6344782890473104, "grad_norm": 0.5168012380599976, "learning_rate": 9.160988779943257e-06, "loss": 0.1572, "step": 1958 }, { "epoch": 0.6348023331173039, "grad_norm": 0.5228518843650818, "learning_rate": 9.160018621675577e-06, "loss": 0.1566, "step": 1959 }, { "epoch": 0.6351263771872975, "grad_norm": 0.5400591492652893, "learning_rate": 9.159047954261892e-06, "loss": 0.1583, "step": 1960 }, { "epoch": 0.635450421257291, "grad_norm": 0.4899173974990845, "learning_rate": 9.158076777820998e-06, "loss": 0.1445, "step": 1961 }, { "epoch": 0.6357744653272845, "grad_norm": 0.5116453170776367, "learning_rate": 9.157105092471764e-06, "loss": 0.1631, "step": 1962 }, { "epoch": 0.6360985093972781, "grad_norm": 0.5398380756378174, "learning_rate": 9.156132898333108e-06, "loss": 0.1683, "step": 1963 }, { "epoch": 0.6364225534672715, "grad_norm": 0.49203190207481384, "learning_rate": 9.15516019552402e-06, "loss": 0.1467, "step": 1964 }, { "epoch": 0.636746597537265, "grad_norm": 0.5170449018478394, "learning_rate": 9.154186984163547e-06, "loss": 0.1537, "step": 1965 }, { "epoch": 0.6370706416072586, "grad_norm": 0.4930606484413147, "learning_rate": 9.153213264370805e-06, "loss": 0.1439, "step": 1966 }, { "epoch": 0.6373946856772521, "grad_norm": 0.48055657744407654, "learning_rate": 9.152239036264965e-06, "loss": 0.1383, "step": 1967 }, { "epoch": 0.6377187297472456, "grad_norm": 0.49675148725509644, "learning_rate": 9.151264299965263e-06, "loss": 0.1519, "step": 1968 }, { "epoch": 0.6380427738172392, "grad_norm": 0.4874381422996521, "learning_rate": 9.150289055591e-06, "loss": 0.1409, "step": 1969 }, { "epoch": 0.6383668178872327, "grad_norm": 0.4918610155582428, "learning_rate": 9.149313303261534e-06, "loss": 0.1473, "step": 1970 }, { "epoch": 0.6386908619572261, "grad_norm": 0.526047945022583, "learning_rate": 9.148337043096287e-06, "loss": 0.1563, "step": 1971 }, { "epoch": 0.6390149060272197, "grad_norm": 0.5674269199371338, "learning_rate": 9.147360275214746e-06, "loss": 0.1631, "step": 1972 }, { "epoch": 0.6393389500972132, "grad_norm": 0.500179648399353, "learning_rate": 9.146382999736455e-06, "loss": 0.1521, "step": 1973 }, { "epoch": 0.6396629941672067, "grad_norm": 0.5363490581512451, "learning_rate": 9.145405216781026e-06, "loss": 0.1432, "step": 1974 }, { "epoch": 0.6399870382372003, "grad_norm": 0.5230416059494019, "learning_rate": 9.14442692646813e-06, "loss": 0.1531, "step": 1975 }, { "epoch": 0.6403110823071938, "grad_norm": 0.5501431226730347, "learning_rate": 9.143448128917499e-06, "loss": 0.1595, "step": 1976 }, { "epoch": 0.6406351263771873, "grad_norm": 0.5650454163551331, "learning_rate": 9.142468824248928e-06, "loss": 0.1617, "step": 1977 }, { "epoch": 0.6409591704471809, "grad_norm": 0.5224339962005615, "learning_rate": 9.141489012582277e-06, "loss": 0.1635, "step": 1978 }, { "epoch": 0.6412832145171743, "grad_norm": 0.5190390348434448, "learning_rate": 9.140508694037462e-06, "loss": 0.1548, "step": 1979 }, { "epoch": 0.6416072585871678, "grad_norm": 0.5318208336830139, "learning_rate": 9.139527868734465e-06, "loss": 0.17, "step": 1980 }, { "epoch": 0.6419313026571614, "grad_norm": 0.5096891522407532, "learning_rate": 9.138546536793334e-06, "loss": 0.1619, "step": 1981 }, { "epoch": 0.6422553467271549, "grad_norm": 0.45812585949897766, "learning_rate": 9.137564698334167e-06, "loss": 0.128, "step": 1982 }, { "epoch": 0.6425793907971484, "grad_norm": 0.5666882991790771, "learning_rate": 9.13658235347714e-06, "loss": 0.1581, "step": 1983 }, { "epoch": 0.642903434867142, "grad_norm": 0.5142824053764343, "learning_rate": 9.135599502342474e-06, "loss": 0.1486, "step": 1984 }, { "epoch": 0.6432274789371355, "grad_norm": 0.48653656244277954, "learning_rate": 9.134616145050466e-06, "loss": 0.1339, "step": 1985 }, { "epoch": 0.6435515230071289, "grad_norm": 0.5284735560417175, "learning_rate": 9.13363228172147e-06, "loss": 0.1585, "step": 1986 }, { "epoch": 0.6438755670771225, "grad_norm": 0.503960132598877, "learning_rate": 9.132647912475897e-06, "loss": 0.1369, "step": 1987 }, { "epoch": 0.644199611147116, "grad_norm": 0.5423457622528076, "learning_rate": 9.131663037434228e-06, "loss": 0.1658, "step": 1988 }, { "epoch": 0.6445236552171095, "grad_norm": 0.5273451805114746, "learning_rate": 9.130677656717e-06, "loss": 0.1451, "step": 1989 }, { "epoch": 0.6448476992871031, "grad_norm": 0.5250473022460938, "learning_rate": 9.129691770444815e-06, "loss": 0.1539, "step": 1990 }, { "epoch": 0.6451717433570966, "grad_norm": 0.5005070567131042, "learning_rate": 9.128705378738336e-06, "loss": 0.1438, "step": 1991 }, { "epoch": 0.6454957874270901, "grad_norm": 0.49627885222435, "learning_rate": 9.127718481718288e-06, "loss": 0.1517, "step": 1992 }, { "epoch": 0.6458198314970836, "grad_norm": 0.5641838908195496, "learning_rate": 9.126731079505457e-06, "loss": 0.1742, "step": 1993 }, { "epoch": 0.6461438755670771, "grad_norm": 0.5262545347213745, "learning_rate": 9.125743172220691e-06, "loss": 0.1595, "step": 1994 }, { "epoch": 0.6464679196370706, "grad_norm": 0.49203866720199585, "learning_rate": 9.124754759984901e-06, "loss": 0.1422, "step": 1995 }, { "epoch": 0.6467919637070642, "grad_norm": 0.5152744650840759, "learning_rate": 9.12376584291906e-06, "loss": 0.1515, "step": 1996 }, { "epoch": 0.6471160077770577, "grad_norm": 0.5341880917549133, "learning_rate": 9.122776421144201e-06, "loss": 0.1588, "step": 1997 }, { "epoch": 0.6474400518470512, "grad_norm": 0.5317200422286987, "learning_rate": 9.12178649478142e-06, "loss": 0.1654, "step": 1998 }, { "epoch": 0.6477640959170448, "grad_norm": 0.4992202818393707, "learning_rate": 9.120796063951873e-06, "loss": 0.1632, "step": 1999 }, { "epoch": 0.6480881399870383, "grad_norm": 0.5180873274803162, "learning_rate": 9.11980512877678e-06, "loss": 0.1541, "step": 2000 }, { "epoch": 0.6484121840570317, "grad_norm": 0.4791898727416992, "learning_rate": 9.118813689377422e-06, "loss": 0.1371, "step": 2001 }, { "epoch": 0.6487362281270252, "grad_norm": 0.531326413154602, "learning_rate": 9.117821745875143e-06, "loss": 0.1539, "step": 2002 }, { "epoch": 0.6490602721970188, "grad_norm": 0.6741142868995667, "learning_rate": 9.116829298391345e-06, "loss": 0.1579, "step": 2003 }, { "epoch": 0.6493843162670123, "grad_norm": 0.48655468225479126, "learning_rate": 9.115836347047495e-06, "loss": 0.1452, "step": 2004 }, { "epoch": 0.6497083603370059, "grad_norm": 0.5300689339637756, "learning_rate": 9.11484289196512e-06, "loss": 0.1534, "step": 2005 }, { "epoch": 0.6500324044069994, "grad_norm": 0.5319254994392395, "learning_rate": 9.113848933265811e-06, "loss": 0.1508, "step": 2006 }, { "epoch": 0.6503564484769929, "grad_norm": 0.5492068529129028, "learning_rate": 9.112854471071217e-06, "loss": 0.1482, "step": 2007 }, { "epoch": 0.6506804925469863, "grad_norm": 0.5193006992340088, "learning_rate": 9.111859505503052e-06, "loss": 0.1501, "step": 2008 }, { "epoch": 0.6510045366169799, "grad_norm": 0.4945833683013916, "learning_rate": 9.110864036683087e-06, "loss": 0.1423, "step": 2009 }, { "epoch": 0.6513285806869734, "grad_norm": 0.48565852642059326, "learning_rate": 9.109868064733163e-06, "loss": 0.143, "step": 2010 }, { "epoch": 0.651652624756967, "grad_norm": 0.5576429963111877, "learning_rate": 9.108871589775173e-06, "loss": 0.1578, "step": 2011 }, { "epoch": 0.6519766688269605, "grad_norm": 0.5786350965499878, "learning_rate": 9.107874611931077e-06, "loss": 0.1648, "step": 2012 }, { "epoch": 0.652300712896954, "grad_norm": 0.5137717723846436, "learning_rate": 9.106877131322897e-06, "loss": 0.1389, "step": 2013 }, { "epoch": 0.6526247569669476, "grad_norm": 0.5338220596313477, "learning_rate": 9.105879148072712e-06, "loss": 0.1633, "step": 2014 }, { "epoch": 0.652948801036941, "grad_norm": 0.5126753449440002, "learning_rate": 9.104880662302668e-06, "loss": 0.1604, "step": 2015 }, { "epoch": 0.6532728451069345, "grad_norm": 0.5010611414909363, "learning_rate": 9.103881674134972e-06, "loss": 0.1557, "step": 2016 }, { "epoch": 0.653596889176928, "grad_norm": 0.5101701021194458, "learning_rate": 9.102882183691884e-06, "loss": 0.1614, "step": 2017 }, { "epoch": 0.6539209332469216, "grad_norm": 0.5002159476280212, "learning_rate": 9.101882191095738e-06, "loss": 0.1496, "step": 2018 }, { "epoch": 0.6542449773169151, "grad_norm": 0.5012000203132629, "learning_rate": 9.10088169646892e-06, "loss": 0.1597, "step": 2019 }, { "epoch": 0.6545690213869086, "grad_norm": 0.48333480954170227, "learning_rate": 9.099880699933883e-06, "loss": 0.1474, "step": 2020 }, { "epoch": 0.6548930654569022, "grad_norm": 0.5311923027038574, "learning_rate": 9.098879201613136e-06, "loss": 0.1596, "step": 2021 }, { "epoch": 0.6552171095268956, "grad_norm": 0.4770635962486267, "learning_rate": 9.097877201629258e-06, "loss": 0.1391, "step": 2022 }, { "epoch": 0.6555411535968891, "grad_norm": 0.5176326036453247, "learning_rate": 9.096874700104879e-06, "loss": 0.158, "step": 2023 }, { "epoch": 0.6558651976668827, "grad_norm": 0.5453210473060608, "learning_rate": 9.095871697162698e-06, "loss": 0.1613, "step": 2024 }, { "epoch": 0.6561892417368762, "grad_norm": 0.5248459577560425, "learning_rate": 9.094868192925473e-06, "loss": 0.152, "step": 2025 }, { "epoch": 0.6565132858068697, "grad_norm": 0.5020378232002258, "learning_rate": 9.093864187516021e-06, "loss": 0.1593, "step": 2026 }, { "epoch": 0.6568373298768633, "grad_norm": 0.4973413646221161, "learning_rate": 9.092859681057224e-06, "loss": 0.1481, "step": 2027 }, { "epoch": 0.6571613739468568, "grad_norm": 0.5216763615608215, "learning_rate": 9.091854673672026e-06, "loss": 0.1453, "step": 2028 }, { "epoch": 0.6574854180168503, "grad_norm": 0.5470162630081177, "learning_rate": 9.090849165483428e-06, "loss": 0.1698, "step": 2029 }, { "epoch": 0.6578094620868438, "grad_norm": 0.5171158909797668, "learning_rate": 9.089843156614493e-06, "loss": 0.1399, "step": 2030 }, { "epoch": 0.6581335061568373, "grad_norm": 0.5428946018218994, "learning_rate": 9.08883664718835e-06, "loss": 0.1724, "step": 2031 }, { "epoch": 0.6584575502268308, "grad_norm": 0.5003471374511719, "learning_rate": 9.087829637328183e-06, "loss": 0.1521, "step": 2032 }, { "epoch": 0.6587815942968244, "grad_norm": 0.5318998098373413, "learning_rate": 9.086822127157243e-06, "loss": 0.1627, "step": 2033 }, { "epoch": 0.6591056383668179, "grad_norm": 0.4693848490715027, "learning_rate": 9.085814116798837e-06, "loss": 0.1434, "step": 2034 }, { "epoch": 0.6594296824368114, "grad_norm": 0.4785168170928955, "learning_rate": 9.084805606376337e-06, "loss": 0.1407, "step": 2035 }, { "epoch": 0.659753726506805, "grad_norm": 0.49794092774391174, "learning_rate": 9.083796596013175e-06, "loss": 0.1648, "step": 2036 }, { "epoch": 0.6600777705767984, "grad_norm": 0.5339946746826172, "learning_rate": 9.082787085832845e-06, "loss": 0.1556, "step": 2037 }, { "epoch": 0.6604018146467919, "grad_norm": 0.5245910286903381, "learning_rate": 9.081777075958898e-06, "loss": 0.1632, "step": 2038 }, { "epoch": 0.6607258587167855, "grad_norm": 0.4721158742904663, "learning_rate": 9.080766566514954e-06, "loss": 0.1502, "step": 2039 }, { "epoch": 0.661049902786779, "grad_norm": 0.56708163022995, "learning_rate": 9.079755557624684e-06, "loss": 0.1633, "step": 2040 }, { "epoch": 0.6613739468567725, "grad_norm": 0.4877159297466278, "learning_rate": 9.078744049411832e-06, "loss": 0.1349, "step": 2041 }, { "epoch": 0.6616979909267661, "grad_norm": 0.5330770015716553, "learning_rate": 9.077732042000192e-06, "loss": 0.1535, "step": 2042 }, { "epoch": 0.6620220349967596, "grad_norm": 0.5046741962432861, "learning_rate": 9.076719535513626e-06, "loss": 0.1567, "step": 2043 }, { "epoch": 0.662346079066753, "grad_norm": 0.5472025275230408, "learning_rate": 9.075706530076054e-06, "loss": 0.1667, "step": 2044 }, { "epoch": 0.6626701231367466, "grad_norm": 0.515630841255188, "learning_rate": 9.074693025811458e-06, "loss": 0.1623, "step": 2045 }, { "epoch": 0.6629941672067401, "grad_norm": 0.521040678024292, "learning_rate": 9.073679022843882e-06, "loss": 0.1632, "step": 2046 }, { "epoch": 0.6633182112767336, "grad_norm": 0.4941205382347107, "learning_rate": 9.072664521297432e-06, "loss": 0.142, "step": 2047 }, { "epoch": 0.6636422553467272, "grad_norm": 0.5200309753417969, "learning_rate": 9.07164952129627e-06, "loss": 0.1538, "step": 2048 }, { "epoch": 0.6639662994167207, "grad_norm": 0.5273143649101257, "learning_rate": 9.070634022964622e-06, "loss": 0.1555, "step": 2049 }, { "epoch": 0.6642903434867142, "grad_norm": 0.5260372757911682, "learning_rate": 9.069618026426779e-06, "loss": 0.1637, "step": 2050 }, { "epoch": 0.6646143875567078, "grad_norm": 0.4782349169254303, "learning_rate": 9.068601531807084e-06, "loss": 0.1451, "step": 2051 }, { "epoch": 0.6649384316267012, "grad_norm": 0.47235560417175293, "learning_rate": 9.067584539229948e-06, "loss": 0.1401, "step": 2052 }, { "epoch": 0.6652624756966947, "grad_norm": 0.5347378253936768, "learning_rate": 9.066567048819844e-06, "loss": 0.1723, "step": 2053 }, { "epoch": 0.6655865197666883, "grad_norm": 0.49886566400527954, "learning_rate": 9.0655490607013e-06, "loss": 0.1545, "step": 2054 }, { "epoch": 0.6659105638366818, "grad_norm": 0.5145599842071533, "learning_rate": 9.064530574998907e-06, "loss": 0.1571, "step": 2055 }, { "epoch": 0.6662346079066753, "grad_norm": 0.4755716621875763, "learning_rate": 9.063511591837322e-06, "loss": 0.1421, "step": 2056 }, { "epoch": 0.6665586519766689, "grad_norm": 0.535316526889801, "learning_rate": 9.062492111341254e-06, "loss": 0.1547, "step": 2057 }, { "epoch": 0.6668826960466624, "grad_norm": 0.5307443141937256, "learning_rate": 9.06147213363548e-06, "loss": 0.1716, "step": 2058 }, { "epoch": 0.6672067401166558, "grad_norm": 0.48614341020584106, "learning_rate": 9.060451658844835e-06, "loss": 0.1395, "step": 2059 }, { "epoch": 0.6675307841866494, "grad_norm": 0.4861319363117218, "learning_rate": 9.059430687094215e-06, "loss": 0.1462, "step": 2060 }, { "epoch": 0.6678548282566429, "grad_norm": 0.5452821850776672, "learning_rate": 9.058409218508577e-06, "loss": 0.1834, "step": 2061 }, { "epoch": 0.6681788723266364, "grad_norm": 0.48399338126182556, "learning_rate": 9.05738725321294e-06, "loss": 0.1464, "step": 2062 }, { "epoch": 0.66850291639663, "grad_norm": 0.49779587984085083, "learning_rate": 9.056364791332381e-06, "loss": 0.1512, "step": 2063 }, { "epoch": 0.6688269604666235, "grad_norm": 0.5269302725791931, "learning_rate": 9.055341832992041e-06, "loss": 0.1555, "step": 2064 }, { "epoch": 0.669151004536617, "grad_norm": 0.5232394933700562, "learning_rate": 9.05431837831712e-06, "loss": 0.1593, "step": 2065 }, { "epoch": 0.6694750486066104, "grad_norm": 0.5113489031791687, "learning_rate": 9.053294427432877e-06, "loss": 0.154, "step": 2066 }, { "epoch": 0.669799092676604, "grad_norm": 0.48380905389785767, "learning_rate": 9.052269980464634e-06, "loss": 0.1402, "step": 2067 }, { "epoch": 0.6701231367465975, "grad_norm": 0.4939453899860382, "learning_rate": 9.051245037537777e-06, "loss": 0.1592, "step": 2068 }, { "epoch": 0.670447180816591, "grad_norm": 0.5096998810768127, "learning_rate": 9.050219598777745e-06, "loss": 0.1562, "step": 2069 }, { "epoch": 0.6707712248865846, "grad_norm": 0.5372338891029358, "learning_rate": 9.049193664310043e-06, "loss": 0.1541, "step": 2070 }, { "epoch": 0.6710952689565781, "grad_norm": 0.4848030209541321, "learning_rate": 9.048167234260235e-06, "loss": 0.1562, "step": 2071 }, { "epoch": 0.6714193130265717, "grad_norm": 0.4721747040748596, "learning_rate": 9.04714030875395e-06, "loss": 0.1437, "step": 2072 }, { "epoch": 0.6717433570965652, "grad_norm": 0.49510085582733154, "learning_rate": 9.046112887916867e-06, "loss": 0.1535, "step": 2073 }, { "epoch": 0.6720674011665586, "grad_norm": 0.468925803899765, "learning_rate": 9.045084971874738e-06, "loss": 0.1395, "step": 2074 }, { "epoch": 0.6723914452365521, "grad_norm": 0.48380059003829956, "learning_rate": 9.044056560753367e-06, "loss": 0.1463, "step": 2075 }, { "epoch": 0.6727154893065457, "grad_norm": 0.5094043612480164, "learning_rate": 9.043027654678623e-06, "loss": 0.1554, "step": 2076 }, { "epoch": 0.6730395333765392, "grad_norm": 0.4993102252483368, "learning_rate": 9.041998253776433e-06, "loss": 0.1587, "step": 2077 }, { "epoch": 0.6733635774465327, "grad_norm": 0.5299800634384155, "learning_rate": 9.040968358172787e-06, "loss": 0.1671, "step": 2078 }, { "epoch": 0.6736876215165263, "grad_norm": 0.5652223825454712, "learning_rate": 9.039937967993734e-06, "loss": 0.1554, "step": 2079 }, { "epoch": 0.6740116655865198, "grad_norm": 0.5279286503791809, "learning_rate": 9.038907083365382e-06, "loss": 0.1616, "step": 2080 }, { "epoch": 0.6743357096565132, "grad_norm": 0.5427567362785339, "learning_rate": 9.037875704413904e-06, "loss": 0.1617, "step": 2081 }, { "epoch": 0.6746597537265068, "grad_norm": 0.5197331309318542, "learning_rate": 9.036843831265528e-06, "loss": 0.157, "step": 2082 }, { "epoch": 0.6749837977965003, "grad_norm": 0.49779677391052246, "learning_rate": 9.035811464046547e-06, "loss": 0.1458, "step": 2083 }, { "epoch": 0.6753078418664938, "grad_norm": 0.49823495745658875, "learning_rate": 9.034778602883313e-06, "loss": 0.1273, "step": 2084 }, { "epoch": 0.6756318859364874, "grad_norm": 0.5144118070602417, "learning_rate": 9.03374524790224e-06, "loss": 0.1529, "step": 2085 }, { "epoch": 0.6759559300064809, "grad_norm": 0.4925682544708252, "learning_rate": 9.032711399229794e-06, "loss": 0.1433, "step": 2086 }, { "epoch": 0.6762799740764744, "grad_norm": 0.485436350107193, "learning_rate": 9.031677056992514e-06, "loss": 0.1453, "step": 2087 }, { "epoch": 0.6766040181464679, "grad_norm": 0.5172211527824402, "learning_rate": 9.030642221316993e-06, "loss": 0.1654, "step": 2088 }, { "epoch": 0.6769280622164614, "grad_norm": 0.5097691416740417, "learning_rate": 9.029606892329883e-06, "loss": 0.1562, "step": 2089 }, { "epoch": 0.6772521062864549, "grad_norm": 0.49525973200798035, "learning_rate": 9.028571070157899e-06, "loss": 0.1505, "step": 2090 }, { "epoch": 0.6775761503564485, "grad_norm": 0.5409114360809326, "learning_rate": 9.027534754927815e-06, "loss": 0.1643, "step": 2091 }, { "epoch": 0.677900194426442, "grad_norm": 0.4941219389438629, "learning_rate": 9.026497946766468e-06, "loss": 0.1453, "step": 2092 }, { "epoch": 0.6782242384964355, "grad_norm": 0.4984894394874573, "learning_rate": 9.02546064580075e-06, "loss": 0.1491, "step": 2093 }, { "epoch": 0.6785482825664291, "grad_norm": 0.5567848086357117, "learning_rate": 9.02442285215762e-06, "loss": 0.1796, "step": 2094 }, { "epoch": 0.6788723266364226, "grad_norm": 0.5158399939537048, "learning_rate": 9.023384565964093e-06, "loss": 0.1624, "step": 2095 }, { "epoch": 0.679196370706416, "grad_norm": 0.4754045009613037, "learning_rate": 9.022345787347241e-06, "loss": 0.1411, "step": 2096 }, { "epoch": 0.6795204147764096, "grad_norm": 0.4885212182998657, "learning_rate": 9.021306516434207e-06, "loss": 0.1442, "step": 2097 }, { "epoch": 0.6798444588464031, "grad_norm": 0.5083634257316589, "learning_rate": 9.020266753352185e-06, "loss": 0.1576, "step": 2098 }, { "epoch": 0.6801685029163966, "grad_norm": 0.5063057541847229, "learning_rate": 9.01922649822843e-06, "loss": 0.1545, "step": 2099 }, { "epoch": 0.6804925469863902, "grad_norm": 0.541229784488678, "learning_rate": 9.018185751190261e-06, "loss": 0.1634, "step": 2100 }, { "epoch": 0.6808165910563837, "grad_norm": 0.49940717220306396, "learning_rate": 9.017144512365055e-06, "loss": 0.153, "step": 2101 }, { "epoch": 0.6811406351263772, "grad_norm": 0.4865690767765045, "learning_rate": 9.01610278188025e-06, "loss": 0.148, "step": 2102 }, { "epoch": 0.6814646791963707, "grad_norm": 0.49564990401268005, "learning_rate": 9.015060559863345e-06, "loss": 0.1392, "step": 2103 }, { "epoch": 0.6817887232663642, "grad_norm": 0.49053755402565, "learning_rate": 9.014017846441893e-06, "loss": 0.1494, "step": 2104 }, { "epoch": 0.6821127673363577, "grad_norm": 0.5497640371322632, "learning_rate": 9.012974641743517e-06, "loss": 0.1617, "step": 2105 }, { "epoch": 0.6824368114063513, "grad_norm": 0.5298976898193359, "learning_rate": 9.011930945895895e-06, "loss": 0.1529, "step": 2106 }, { "epoch": 0.6827608554763448, "grad_norm": 0.4656003713607788, "learning_rate": 9.010886759026762e-06, "loss": 0.1398, "step": 2107 }, { "epoch": 0.6830848995463383, "grad_norm": 0.5337942838668823, "learning_rate": 9.009842081263917e-06, "loss": 0.1531, "step": 2108 }, { "epoch": 0.6834089436163319, "grad_norm": 0.5253130793571472, "learning_rate": 9.008796912735221e-06, "loss": 0.1582, "step": 2109 }, { "epoch": 0.6837329876863253, "grad_norm": 0.49264976382255554, "learning_rate": 9.00775125356859e-06, "loss": 0.1501, "step": 2110 }, { "epoch": 0.6840570317563188, "grad_norm": 0.49911728501319885, "learning_rate": 9.006705103892006e-06, "loss": 0.162, "step": 2111 }, { "epoch": 0.6843810758263124, "grad_norm": 0.4753684103488922, "learning_rate": 9.005658463833503e-06, "loss": 0.1368, "step": 2112 }, { "epoch": 0.6847051198963059, "grad_norm": 0.5304966568946838, "learning_rate": 9.004611333521183e-06, "loss": 0.1676, "step": 2113 }, { "epoch": 0.6850291639662994, "grad_norm": 0.5524003505706787, "learning_rate": 9.003563713083203e-06, "loss": 0.1587, "step": 2114 }, { "epoch": 0.685353208036293, "grad_norm": 0.46181124448776245, "learning_rate": 9.00251560264778e-06, "loss": 0.139, "step": 2115 }, { "epoch": 0.6856772521062865, "grad_norm": 0.5070650577545166, "learning_rate": 9.001467002343198e-06, "loss": 0.1448, "step": 2116 }, { "epoch": 0.68600129617628, "grad_norm": 0.5460571050643921, "learning_rate": 9.00041791229779e-06, "loss": 0.1827, "step": 2117 }, { "epoch": 0.6863253402462735, "grad_norm": 0.523775041103363, "learning_rate": 8.999368332639957e-06, "loss": 0.1715, "step": 2118 }, { "epoch": 0.686649384316267, "grad_norm": 0.48703733086586, "learning_rate": 8.998318263498158e-06, "loss": 0.1492, "step": 2119 }, { "epoch": 0.6869734283862605, "grad_norm": 0.48289304971694946, "learning_rate": 8.99726770500091e-06, "loss": 0.145, "step": 2120 }, { "epoch": 0.687297472456254, "grad_norm": 0.5123062133789062, "learning_rate": 8.99621665727679e-06, "loss": 0.1537, "step": 2121 }, { "epoch": 0.6876215165262476, "grad_norm": 0.5621348023414612, "learning_rate": 8.995165120454437e-06, "loss": 0.1464, "step": 2122 }, { "epoch": 0.6879455605962411, "grad_norm": 0.5511627793312073, "learning_rate": 8.994113094662552e-06, "loss": 0.1579, "step": 2123 }, { "epoch": 0.6882696046662347, "grad_norm": 0.5453408360481262, "learning_rate": 8.99306058002989e-06, "loss": 0.1646, "step": 2124 }, { "epoch": 0.6885936487362281, "grad_norm": 0.5363773703575134, "learning_rate": 8.992007576685266e-06, "loss": 0.1623, "step": 2125 }, { "epoch": 0.6889176928062216, "grad_norm": 0.5111467242240906, "learning_rate": 8.990954084757562e-06, "loss": 0.1526, "step": 2126 }, { "epoch": 0.6892417368762151, "grad_norm": 0.5060132145881653, "learning_rate": 8.989900104375715e-06, "loss": 0.1478, "step": 2127 }, { "epoch": 0.6895657809462087, "grad_norm": 0.49885737895965576, "learning_rate": 8.988845635668719e-06, "loss": 0.1405, "step": 2128 }, { "epoch": 0.6898898250162022, "grad_norm": 0.5323303937911987, "learning_rate": 8.98779067876563e-06, "loss": 0.1532, "step": 2129 }, { "epoch": 0.6902138690861958, "grad_norm": 0.4989731013774872, "learning_rate": 8.98673523379557e-06, "loss": 0.1426, "step": 2130 }, { "epoch": 0.6905379131561893, "grad_norm": 0.480033814907074, "learning_rate": 8.985679300887711e-06, "loss": 0.1428, "step": 2131 }, { "epoch": 0.6908619572261827, "grad_norm": 0.5180311799049377, "learning_rate": 8.984622880171289e-06, "loss": 0.1536, "step": 2132 }, { "epoch": 0.6911860012961762, "grad_norm": 0.51394122838974, "learning_rate": 8.983565971775604e-06, "loss": 0.1537, "step": 2133 }, { "epoch": 0.6915100453661698, "grad_norm": 0.5116256475448608, "learning_rate": 8.982508575830005e-06, "loss": 0.1586, "step": 2134 }, { "epoch": 0.6918340894361633, "grad_norm": 0.49658048152923584, "learning_rate": 8.981450692463909e-06, "loss": 0.1406, "step": 2135 }, { "epoch": 0.6921581335061568, "grad_norm": 0.5321874618530273, "learning_rate": 8.980392321806793e-06, "loss": 0.1686, "step": 2136 }, { "epoch": 0.6924821775761504, "grad_norm": 0.5163205862045288, "learning_rate": 8.97933346398819e-06, "loss": 0.1601, "step": 2137 }, { "epoch": 0.6928062216461439, "grad_norm": 0.47858354449272156, "learning_rate": 8.978274119137694e-06, "loss": 0.149, "step": 2138 }, { "epoch": 0.6931302657161373, "grad_norm": 0.4877544045448303, "learning_rate": 8.97721428738496e-06, "loss": 0.1445, "step": 2139 }, { "epoch": 0.6934543097861309, "grad_norm": 0.5431718826293945, "learning_rate": 8.976153968859697e-06, "loss": 0.1671, "step": 2140 }, { "epoch": 0.6937783538561244, "grad_norm": 0.48591312766075134, "learning_rate": 8.975093163691681e-06, "loss": 0.1495, "step": 2141 }, { "epoch": 0.6941023979261179, "grad_norm": 0.5136705040931702, "learning_rate": 8.974031872010745e-06, "loss": 0.1417, "step": 2142 }, { "epoch": 0.6944264419961115, "grad_norm": 0.547904908657074, "learning_rate": 8.972970093946777e-06, "loss": 0.1652, "step": 2143 }, { "epoch": 0.694750486066105, "grad_norm": 0.48400211334228516, "learning_rate": 8.971907829629734e-06, "loss": 0.1349, "step": 2144 }, { "epoch": 0.6950745301360985, "grad_norm": 0.4963843524456024, "learning_rate": 8.970845079189622e-06, "loss": 0.1412, "step": 2145 }, { "epoch": 0.6953985742060921, "grad_norm": 0.48876792192459106, "learning_rate": 8.969781842756513e-06, "loss": 0.1505, "step": 2146 }, { "epoch": 0.6957226182760855, "grad_norm": 0.5260959267616272, "learning_rate": 8.968718120460538e-06, "loss": 0.1563, "step": 2147 }, { "epoch": 0.696046662346079, "grad_norm": 0.48220258951187134, "learning_rate": 8.967653912431884e-06, "loss": 0.1319, "step": 2148 }, { "epoch": 0.6963707064160726, "grad_norm": 0.4762630760669708, "learning_rate": 8.9665892188008e-06, "loss": 0.1476, "step": 2149 }, { "epoch": 0.6966947504860661, "grad_norm": 0.48830246925354004, "learning_rate": 8.965524039697598e-06, "loss": 0.1458, "step": 2150 }, { "epoch": 0.6970187945560596, "grad_norm": 0.5447869896888733, "learning_rate": 8.96445837525264e-06, "loss": 0.1609, "step": 2151 }, { "epoch": 0.6973428386260532, "grad_norm": 0.4892789423465729, "learning_rate": 8.963392225596357e-06, "loss": 0.1536, "step": 2152 }, { "epoch": 0.6976668826960467, "grad_norm": 0.502695620059967, "learning_rate": 8.962325590859236e-06, "loss": 0.1545, "step": 2153 }, { "epoch": 0.6979909267660401, "grad_norm": 0.4724867641925812, "learning_rate": 8.961258471171818e-06, "loss": 0.1451, "step": 2154 }, { "epoch": 0.6983149708360337, "grad_norm": 0.4843321740627289, "learning_rate": 8.960190866664713e-06, "loss": 0.1484, "step": 2155 }, { "epoch": 0.6986390149060272, "grad_norm": 0.4993053674697876, "learning_rate": 8.959122777468583e-06, "loss": 0.1536, "step": 2156 }, { "epoch": 0.6989630589760207, "grad_norm": 0.509663462638855, "learning_rate": 8.958054203714152e-06, "loss": 0.1496, "step": 2157 }, { "epoch": 0.6992871030460143, "grad_norm": 0.475522518157959, "learning_rate": 8.956985145532205e-06, "loss": 0.1418, "step": 2158 }, { "epoch": 0.6996111471160078, "grad_norm": 0.523635983467102, "learning_rate": 8.95591560305358e-06, "loss": 0.1629, "step": 2159 }, { "epoch": 0.6999351911860013, "grad_norm": 0.5369279980659485, "learning_rate": 8.954845576409184e-06, "loss": 0.1583, "step": 2160 }, { "epoch": 0.7002592352559948, "grad_norm": 0.5319356918334961, "learning_rate": 8.953775065729972e-06, "loss": 0.1585, "step": 2161 }, { "epoch": 0.7005832793259883, "grad_norm": 0.4694063067436218, "learning_rate": 8.952704071146972e-06, "loss": 0.1277, "step": 2162 }, { "epoch": 0.7009073233959818, "grad_norm": 0.5389366745948792, "learning_rate": 8.951632592791255e-06, "loss": 0.1594, "step": 2163 }, { "epoch": 0.7012313674659754, "grad_norm": 0.47011804580688477, "learning_rate": 8.950560630793965e-06, "loss": 0.1384, "step": 2164 }, { "epoch": 0.7015554115359689, "grad_norm": 0.5274106860160828, "learning_rate": 8.949488185286297e-06, "loss": 0.1647, "step": 2165 }, { "epoch": 0.7018794556059624, "grad_norm": 0.5034454464912415, "learning_rate": 8.948415256399512e-06, "loss": 0.154, "step": 2166 }, { "epoch": 0.702203499675956, "grad_norm": 0.47073930501937866, "learning_rate": 8.94734184426492e-06, "loss": 0.1442, "step": 2167 }, { "epoch": 0.7025275437459495, "grad_norm": 0.4921237826347351, "learning_rate": 8.946267949013902e-06, "loss": 0.1436, "step": 2168 }, { "epoch": 0.7028515878159429, "grad_norm": 0.5035290122032166, "learning_rate": 8.945193570777888e-06, "loss": 0.1452, "step": 2169 }, { "epoch": 0.7031756318859365, "grad_norm": 0.5418218374252319, "learning_rate": 8.944118709688375e-06, "loss": 0.1778, "step": 2170 }, { "epoch": 0.70349967595593, "grad_norm": 0.5310963988304138, "learning_rate": 8.943043365876913e-06, "loss": 0.1561, "step": 2171 }, { "epoch": 0.7038237200259235, "grad_norm": 0.49203693866729736, "learning_rate": 8.941967539475115e-06, "loss": 0.1657, "step": 2172 }, { "epoch": 0.7041477640959171, "grad_norm": 0.5172531008720398, "learning_rate": 8.94089123061465e-06, "loss": 0.1677, "step": 2173 }, { "epoch": 0.7044718081659106, "grad_norm": 0.5177010893821716, "learning_rate": 8.939814439427251e-06, "loss": 0.1648, "step": 2174 }, { "epoch": 0.7047958522359041, "grad_norm": 0.526005744934082, "learning_rate": 8.938737166044705e-06, "loss": 0.1709, "step": 2175 }, { "epoch": 0.7051198963058976, "grad_norm": 0.5326618552207947, "learning_rate": 8.937659410598857e-06, "loss": 0.1698, "step": 2176 }, { "epoch": 0.7054439403758911, "grad_norm": 0.5253227353096008, "learning_rate": 8.936581173221619e-06, "loss": 0.1632, "step": 2177 }, { "epoch": 0.7057679844458846, "grad_norm": 0.5025566816329956, "learning_rate": 8.935502454044955e-06, "loss": 0.1635, "step": 2178 }, { "epoch": 0.7060920285158782, "grad_norm": 0.5111781358718872, "learning_rate": 8.934423253200887e-06, "loss": 0.1437, "step": 2179 }, { "epoch": 0.7064160725858717, "grad_norm": 0.49436041712760925, "learning_rate": 8.933343570821504e-06, "loss": 0.1654, "step": 2180 }, { "epoch": 0.7067401166558652, "grad_norm": 0.49264398217201233, "learning_rate": 8.932263407038943e-06, "loss": 0.1532, "step": 2181 }, { "epoch": 0.7070641607258588, "grad_norm": 0.500124454498291, "learning_rate": 8.931182761985409e-06, "loss": 0.1501, "step": 2182 }, { "epoch": 0.7073882047958522, "grad_norm": 0.5058132410049438, "learning_rate": 8.930101635793163e-06, "loss": 0.1554, "step": 2183 }, { "epoch": 0.7077122488658457, "grad_norm": 0.5359315276145935, "learning_rate": 8.929020028594521e-06, "loss": 0.155, "step": 2184 }, { "epoch": 0.7080362929358393, "grad_norm": 0.5466138124465942, "learning_rate": 8.927937940521865e-06, "loss": 0.1464, "step": 2185 }, { "epoch": 0.7083603370058328, "grad_norm": 0.49145621061325073, "learning_rate": 8.92685537170763e-06, "loss": 0.147, "step": 2186 }, { "epoch": 0.7086843810758263, "grad_norm": 0.5450732111930847, "learning_rate": 8.925772322284314e-06, "loss": 0.1548, "step": 2187 }, { "epoch": 0.7090084251458199, "grad_norm": 0.5080904364585876, "learning_rate": 8.924688792384467e-06, "loss": 0.1596, "step": 2188 }, { "epoch": 0.7093324692158134, "grad_norm": 0.5188769102096558, "learning_rate": 8.923604782140708e-06, "loss": 0.1578, "step": 2189 }, { "epoch": 0.7096565132858069, "grad_norm": 0.5725817680358887, "learning_rate": 8.922520291685705e-06, "loss": 0.1571, "step": 2190 }, { "epoch": 0.7099805573558003, "grad_norm": 0.5092363953590393, "learning_rate": 8.921435321152194e-06, "loss": 0.148, "step": 2191 }, { "epoch": 0.7103046014257939, "grad_norm": 0.5569043755531311, "learning_rate": 8.92034987067296e-06, "loss": 0.1619, "step": 2192 }, { "epoch": 0.7106286454957874, "grad_norm": 0.512306809425354, "learning_rate": 8.919263940380855e-06, "loss": 0.1513, "step": 2193 }, { "epoch": 0.710952689565781, "grad_norm": 0.5137192010879517, "learning_rate": 8.918177530408785e-06, "loss": 0.1606, "step": 2194 }, { "epoch": 0.7112767336357745, "grad_norm": 0.4800979793071747, "learning_rate": 8.917090640889715e-06, "loss": 0.1474, "step": 2195 }, { "epoch": 0.711600777705768, "grad_norm": 0.5407817959785461, "learning_rate": 8.91600327195667e-06, "loss": 0.1655, "step": 2196 }, { "epoch": 0.7119248217757616, "grad_norm": 0.49153977632522583, "learning_rate": 8.914915423742737e-06, "loss": 0.149, "step": 2197 }, { "epoch": 0.712248865845755, "grad_norm": 0.5214946269989014, "learning_rate": 8.913827096381055e-06, "loss": 0.1601, "step": 2198 }, { "epoch": 0.7125729099157485, "grad_norm": 0.4982912242412567, "learning_rate": 8.912738290004824e-06, "loss": 0.1451, "step": 2199 }, { "epoch": 0.712896953985742, "grad_norm": 0.49095019698143005, "learning_rate": 8.911649004747307e-06, "loss": 0.143, "step": 2200 }, { "epoch": 0.7132209980557356, "grad_norm": 0.553778350353241, "learning_rate": 8.910559240741816e-06, "loss": 0.142, "step": 2201 }, { "epoch": 0.7135450421257291, "grad_norm": 0.4920446276664734, "learning_rate": 8.909468998121733e-06, "loss": 0.152, "step": 2202 }, { "epoch": 0.7138690861957226, "grad_norm": 0.4924328029155731, "learning_rate": 8.908378277020491e-06, "loss": 0.1551, "step": 2203 }, { "epoch": 0.7141931302657162, "grad_norm": 0.5034648180007935, "learning_rate": 8.907287077571585e-06, "loss": 0.1514, "step": 2204 }, { "epoch": 0.7145171743357096, "grad_norm": 0.5196908116340637, "learning_rate": 8.906195399908563e-06, "loss": 0.1599, "step": 2205 }, { "epoch": 0.7148412184057031, "grad_norm": 0.5454964637756348, "learning_rate": 8.905103244165044e-06, "loss": 0.1681, "step": 2206 }, { "epoch": 0.7151652624756967, "grad_norm": 0.5191643238067627, "learning_rate": 8.904010610474687e-06, "loss": 0.1444, "step": 2207 }, { "epoch": 0.7154893065456902, "grad_norm": 0.5437571406364441, "learning_rate": 8.902917498971228e-06, "loss": 0.1583, "step": 2208 }, { "epoch": 0.7158133506156837, "grad_norm": 0.5087147951126099, "learning_rate": 8.901823909788449e-06, "loss": 0.1481, "step": 2209 }, { "epoch": 0.7161373946856773, "grad_norm": 0.528440535068512, "learning_rate": 8.900729843060199e-06, "loss": 0.1535, "step": 2210 }, { "epoch": 0.7164614387556708, "grad_norm": 0.48496097326278687, "learning_rate": 8.899635298920374e-06, "loss": 0.1526, "step": 2211 }, { "epoch": 0.7167854828256643, "grad_norm": 0.4830402731895447, "learning_rate": 8.898540277502943e-06, "loss": 0.1442, "step": 2212 }, { "epoch": 0.7171095268956578, "grad_norm": 0.507747232913971, "learning_rate": 8.897444778941921e-06, "loss": 0.1426, "step": 2213 }, { "epoch": 0.7174335709656513, "grad_norm": 0.4960160255432129, "learning_rate": 8.896348803371388e-06, "loss": 0.1377, "step": 2214 }, { "epoch": 0.7177576150356448, "grad_norm": 0.5086054801940918, "learning_rate": 8.895252350925482e-06, "loss": 0.1478, "step": 2215 }, { "epoch": 0.7180816591056384, "grad_norm": 0.5340855121612549, "learning_rate": 8.894155421738398e-06, "loss": 0.1491, "step": 2216 }, { "epoch": 0.7184057031756319, "grad_norm": 0.5328924059867859, "learning_rate": 8.893058015944387e-06, "loss": 0.1587, "step": 2217 }, { "epoch": 0.7187297472456254, "grad_norm": 0.5267184972763062, "learning_rate": 8.891960133677763e-06, "loss": 0.1555, "step": 2218 }, { "epoch": 0.719053791315619, "grad_norm": 0.48961344361305237, "learning_rate": 8.890861775072897e-06, "loss": 0.1493, "step": 2219 }, { "epoch": 0.7193778353856124, "grad_norm": 0.5472536683082581, "learning_rate": 8.889762940264216e-06, "loss": 0.1722, "step": 2220 }, { "epoch": 0.7197018794556059, "grad_norm": 0.49720636010169983, "learning_rate": 8.888663629386206e-06, "loss": 0.1447, "step": 2221 }, { "epoch": 0.7200259235255995, "grad_norm": 0.5057947039604187, "learning_rate": 8.887563842573412e-06, "loss": 0.1484, "step": 2222 }, { "epoch": 0.720349967595593, "grad_norm": 0.4652838110923767, "learning_rate": 8.886463579960441e-06, "loss": 0.1433, "step": 2223 }, { "epoch": 0.7206740116655865, "grad_norm": 0.5056071281433105, "learning_rate": 8.885362841681948e-06, "loss": 0.1527, "step": 2224 }, { "epoch": 0.7209980557355801, "grad_norm": 0.4978449046611786, "learning_rate": 8.88426162787266e-06, "loss": 0.1537, "step": 2225 }, { "epoch": 0.7213220998055736, "grad_norm": 0.46882349252700806, "learning_rate": 8.88315993866735e-06, "loss": 0.1304, "step": 2226 }, { "epoch": 0.721646143875567, "grad_norm": 0.48129644989967346, "learning_rate": 8.882057774200855e-06, "loss": 0.1487, "step": 2227 }, { "epoch": 0.7219701879455606, "grad_norm": 0.5160327553749084, "learning_rate": 8.880955134608069e-06, "loss": 0.1689, "step": 2228 }, { "epoch": 0.7222942320155541, "grad_norm": 0.5019292831420898, "learning_rate": 8.879852020023945e-06, "loss": 0.155, "step": 2229 }, { "epoch": 0.7226182760855476, "grad_norm": 0.5016565322875977, "learning_rate": 8.878748430583496e-06, "loss": 0.155, "step": 2230 }, { "epoch": 0.7229423201555412, "grad_norm": 0.48340004682540894, "learning_rate": 8.877644366421787e-06, "loss": 0.1504, "step": 2231 }, { "epoch": 0.7232663642255347, "grad_norm": 0.44818857312202454, "learning_rate": 8.876539827673944e-06, "loss": 0.1338, "step": 2232 }, { "epoch": 0.7235904082955282, "grad_norm": 0.48734670877456665, "learning_rate": 8.875434814475157e-06, "loss": 0.1517, "step": 2233 }, { "epoch": 0.7239144523655218, "grad_norm": 0.4977285861968994, "learning_rate": 8.874329326960664e-06, "loss": 0.1512, "step": 2234 }, { "epoch": 0.7242384964355152, "grad_norm": 0.5024043321609497, "learning_rate": 8.873223365265768e-06, "loss": 0.1488, "step": 2235 }, { "epoch": 0.7245625405055087, "grad_norm": 0.4885674715042114, "learning_rate": 8.87211692952583e-06, "loss": 0.1509, "step": 2236 }, { "epoch": 0.7248865845755023, "grad_norm": 0.49454137682914734, "learning_rate": 8.871010019876263e-06, "loss": 0.1527, "step": 2237 }, { "epoch": 0.7252106286454958, "grad_norm": 0.5014051198959351, "learning_rate": 8.869902636452544e-06, "loss": 0.1506, "step": 2238 }, { "epoch": 0.7255346727154893, "grad_norm": 0.4825291037559509, "learning_rate": 8.868794779390205e-06, "loss": 0.1364, "step": 2239 }, { "epoch": 0.7258587167854829, "grad_norm": 0.5182274580001831, "learning_rate": 8.867686448824839e-06, "loss": 0.1591, "step": 2240 }, { "epoch": 0.7261827608554764, "grad_norm": 0.4794568121433258, "learning_rate": 8.866577644892093e-06, "loss": 0.1401, "step": 2241 }, { "epoch": 0.7265068049254698, "grad_norm": 0.49534085392951965, "learning_rate": 8.865468367727674e-06, "loss": 0.1454, "step": 2242 }, { "epoch": 0.7268308489954634, "grad_norm": 0.5006011128425598, "learning_rate": 8.864358617467348e-06, "loss": 0.1501, "step": 2243 }, { "epoch": 0.7271548930654569, "grad_norm": 0.48030370473861694, "learning_rate": 8.86324839424694e-06, "loss": 0.1474, "step": 2244 }, { "epoch": 0.7274789371354504, "grad_norm": 0.49514061212539673, "learning_rate": 8.862137698202324e-06, "loss": 0.1553, "step": 2245 }, { "epoch": 0.727802981205444, "grad_norm": 0.48254290223121643, "learning_rate": 8.861026529469443e-06, "loss": 0.1482, "step": 2246 }, { "epoch": 0.7281270252754375, "grad_norm": 0.5079731941223145, "learning_rate": 8.859914888184293e-06, "loss": 0.1585, "step": 2247 }, { "epoch": 0.728451069345431, "grad_norm": 0.5570839047431946, "learning_rate": 8.858802774482928e-06, "loss": 0.1881, "step": 2248 }, { "epoch": 0.7287751134154244, "grad_norm": 0.5489197969436646, "learning_rate": 8.857690188501457e-06, "loss": 0.1667, "step": 2249 }, { "epoch": 0.729099157485418, "grad_norm": 0.5182020664215088, "learning_rate": 8.856577130376056e-06, "loss": 0.1596, "step": 2250 }, { "epoch": 0.7294232015554115, "grad_norm": 0.47524747252464294, "learning_rate": 8.855463600242946e-06, "loss": 0.1479, "step": 2251 }, { "epoch": 0.729747245625405, "grad_norm": 0.4884457588195801, "learning_rate": 8.854349598238417e-06, "loss": 0.1458, "step": 2252 }, { "epoch": 0.7300712896953986, "grad_norm": 0.5070611834526062, "learning_rate": 8.85323512449881e-06, "loss": 0.1521, "step": 2253 }, { "epoch": 0.7303953337653921, "grad_norm": 0.4800879955291748, "learning_rate": 8.852120179160524e-06, "loss": 0.1525, "step": 2254 }, { "epoch": 0.7307193778353857, "grad_norm": 0.5337240695953369, "learning_rate": 8.85100476236002e-06, "loss": 0.1596, "step": 2255 }, { "epoch": 0.7310434219053791, "grad_norm": 0.4707546532154083, "learning_rate": 8.849888874233815e-06, "loss": 0.1422, "step": 2256 }, { "epoch": 0.7313674659753726, "grad_norm": 0.5401502847671509, "learning_rate": 8.848772514918482e-06, "loss": 0.1561, "step": 2257 }, { "epoch": 0.7316915100453661, "grad_norm": 0.5288102626800537, "learning_rate": 8.84765568455065e-06, "loss": 0.1634, "step": 2258 }, { "epoch": 0.7320155541153597, "grad_norm": 0.5149606466293335, "learning_rate": 8.846538383267011e-06, "loss": 0.1564, "step": 2259 }, { "epoch": 0.7323395981853532, "grad_norm": 0.4856564402580261, "learning_rate": 8.845420611204312e-06, "loss": 0.134, "step": 2260 }, { "epoch": 0.7326636422553467, "grad_norm": 0.5287962555885315, "learning_rate": 8.844302368499358e-06, "loss": 0.1557, "step": 2261 }, { "epoch": 0.7329876863253403, "grad_norm": 0.5078102350234985, "learning_rate": 8.843183655289007e-06, "loss": 0.1584, "step": 2262 }, { "epoch": 0.7333117303953338, "grad_norm": 0.4822060465812683, "learning_rate": 8.842064471710183e-06, "loss": 0.1495, "step": 2263 }, { "epoch": 0.7336357744653272, "grad_norm": 0.5514498949050903, "learning_rate": 8.840944817899861e-06, "loss": 0.1614, "step": 2264 }, { "epoch": 0.7339598185353208, "grad_norm": 0.4666975140571594, "learning_rate": 8.839824693995078e-06, "loss": 0.1352, "step": 2265 }, { "epoch": 0.7342838626053143, "grad_norm": 0.44550567865371704, "learning_rate": 8.838704100132925e-06, "loss": 0.1288, "step": 2266 }, { "epoch": 0.7346079066753078, "grad_norm": 0.5307706594467163, "learning_rate": 8.83758303645055e-06, "loss": 0.1632, "step": 2267 }, { "epoch": 0.7349319507453014, "grad_norm": 0.4947357177734375, "learning_rate": 8.836461503085162e-06, "loss": 0.1442, "step": 2268 }, { "epoch": 0.7352559948152949, "grad_norm": 0.5394967198371887, "learning_rate": 8.835339500174028e-06, "loss": 0.1594, "step": 2269 }, { "epoch": 0.7355800388852884, "grad_norm": 0.49313005805015564, "learning_rate": 8.834217027854466e-06, "loss": 0.141, "step": 2270 }, { "epoch": 0.7359040829552819, "grad_norm": 0.5317650437355042, "learning_rate": 8.833094086263859e-06, "loss": 0.1328, "step": 2271 }, { "epoch": 0.7362281270252754, "grad_norm": 0.46917349100112915, "learning_rate": 8.83197067553964e-06, "loss": 0.1365, "step": 2272 }, { "epoch": 0.7365521710952689, "grad_norm": 0.5269829034805298, "learning_rate": 8.83084679581931e-06, "loss": 0.1634, "step": 2273 }, { "epoch": 0.7368762151652625, "grad_norm": 0.4746880531311035, "learning_rate": 8.829722447240418e-06, "loss": 0.1504, "step": 2274 }, { "epoch": 0.737200259235256, "grad_norm": 0.4481014013290405, "learning_rate": 8.828597629940572e-06, "loss": 0.1288, "step": 2275 }, { "epoch": 0.7375243033052495, "grad_norm": 0.47481676936149597, "learning_rate": 8.82747234405744e-06, "loss": 0.1519, "step": 2276 }, { "epoch": 0.7378483473752431, "grad_norm": 0.4851224422454834, "learning_rate": 8.826346589728746e-06, "loss": 0.145, "step": 2277 }, { "epoch": 0.7381723914452365, "grad_norm": 0.5078859925270081, "learning_rate": 8.82522036709227e-06, "loss": 0.1546, "step": 2278 }, { "epoch": 0.73849643551523, "grad_norm": 0.48216864466667175, "learning_rate": 8.824093676285854e-06, "loss": 0.1415, "step": 2279 }, { "epoch": 0.7388204795852236, "grad_norm": 0.47693872451782227, "learning_rate": 8.82296651744739e-06, "loss": 0.1472, "step": 2280 }, { "epoch": 0.7391445236552171, "grad_norm": 0.5153552889823914, "learning_rate": 8.821838890714836e-06, "loss": 0.1522, "step": 2281 }, { "epoch": 0.7394685677252106, "grad_norm": 0.5131588578224182, "learning_rate": 8.820710796226197e-06, "loss": 0.1529, "step": 2282 }, { "epoch": 0.7397926117952042, "grad_norm": 0.5055592656135559, "learning_rate": 8.819582234119546e-06, "loss": 0.1429, "step": 2283 }, { "epoch": 0.7401166558651977, "grad_norm": 0.5131552815437317, "learning_rate": 8.818453204533005e-06, "loss": 0.145, "step": 2284 }, { "epoch": 0.7404406999351912, "grad_norm": 0.5160530209541321, "learning_rate": 8.817323707604759e-06, "loss": 0.1559, "step": 2285 }, { "epoch": 0.7407647440051847, "grad_norm": 0.4976802170276642, "learning_rate": 8.816193743473044e-06, "loss": 0.1552, "step": 2286 }, { "epoch": 0.7410887880751782, "grad_norm": 0.5457192063331604, "learning_rate": 8.815063312276159e-06, "loss": 0.1698, "step": 2287 }, { "epoch": 0.7414128321451717, "grad_norm": 0.5098389983177185, "learning_rate": 8.813932414152458e-06, "loss": 0.1533, "step": 2288 }, { "epoch": 0.7417368762151653, "grad_norm": 0.4771139621734619, "learning_rate": 8.812801049240349e-06, "loss": 0.1444, "step": 2289 }, { "epoch": 0.7420609202851588, "grad_norm": 0.4948260486125946, "learning_rate": 8.811669217678303e-06, "loss": 0.1397, "step": 2290 }, { "epoch": 0.7423849643551523, "grad_norm": 0.4971177577972412, "learning_rate": 8.810536919604846e-06, "loss": 0.1507, "step": 2291 }, { "epoch": 0.7427090084251459, "grad_norm": 0.5016118884086609, "learning_rate": 8.809404155158558e-06, "loss": 0.1555, "step": 2292 }, { "epoch": 0.7430330524951393, "grad_norm": 0.5108845233917236, "learning_rate": 8.808270924478079e-06, "loss": 0.1719, "step": 2293 }, { "epoch": 0.7433570965651328, "grad_norm": 0.46601563692092896, "learning_rate": 8.807137227702108e-06, "loss": 0.1384, "step": 2294 }, { "epoch": 0.7436811406351264, "grad_norm": 0.5223451256752014, "learning_rate": 8.806003064969394e-06, "loss": 0.1611, "step": 2295 }, { "epoch": 0.7440051847051199, "grad_norm": 0.5406107306480408, "learning_rate": 8.804868436418749e-06, "loss": 0.1557, "step": 2296 }, { "epoch": 0.7443292287751134, "grad_norm": 0.49755993485450745, "learning_rate": 8.803733342189044e-06, "loss": 0.1603, "step": 2297 }, { "epoch": 0.744653272845107, "grad_norm": 0.474341481924057, "learning_rate": 8.8025977824192e-06, "loss": 0.1454, "step": 2298 }, { "epoch": 0.7449773169151005, "grad_norm": 0.47189074754714966, "learning_rate": 8.8014617572482e-06, "loss": 0.1355, "step": 2299 }, { "epoch": 0.7453013609850939, "grad_norm": 0.5056459307670593, "learning_rate": 8.80032526681508e-06, "loss": 0.1431, "step": 2300 }, { "epoch": 0.7456254050550875, "grad_norm": 0.5132352113723755, "learning_rate": 8.799188311258939e-06, "loss": 0.138, "step": 2301 }, { "epoch": 0.745949449125081, "grad_norm": 0.5066553950309753, "learning_rate": 8.798050890718927e-06, "loss": 0.1431, "step": 2302 }, { "epoch": 0.7462734931950745, "grad_norm": 0.5290509462356567, "learning_rate": 8.796913005334254e-06, "loss": 0.1438, "step": 2303 }, { "epoch": 0.7465975372650681, "grad_norm": 0.512313187122345, "learning_rate": 8.795774655244187e-06, "loss": 0.1566, "step": 2304 }, { "epoch": 0.7469215813350616, "grad_norm": 0.5207310914993286, "learning_rate": 8.794635840588046e-06, "loss": 0.1627, "step": 2305 }, { "epoch": 0.7472456254050551, "grad_norm": 0.5040437579154968, "learning_rate": 8.793496561505216e-06, "loss": 0.1516, "step": 2306 }, { "epoch": 0.7475696694750487, "grad_norm": 0.5168375372886658, "learning_rate": 8.792356818135128e-06, "loss": 0.1576, "step": 2307 }, { "epoch": 0.7478937135450421, "grad_norm": 0.5095380544662476, "learning_rate": 8.791216610617278e-06, "loss": 0.1626, "step": 2308 }, { "epoch": 0.7482177576150356, "grad_norm": 0.5392544865608215, "learning_rate": 8.790075939091218e-06, "loss": 0.1668, "step": 2309 }, { "epoch": 0.7485418016850292, "grad_norm": 0.5280436873435974, "learning_rate": 8.788934803696554e-06, "loss": 0.1626, "step": 2310 }, { "epoch": 0.7488658457550227, "grad_norm": 0.4907281696796417, "learning_rate": 8.78779320457295e-06, "loss": 0.1477, "step": 2311 }, { "epoch": 0.7491898898250162, "grad_norm": 0.4908786714076996, "learning_rate": 8.786651141860127e-06, "loss": 0.1539, "step": 2312 }, { "epoch": 0.7495139338950098, "grad_norm": 0.528580904006958, "learning_rate": 8.785508615697859e-06, "loss": 0.1633, "step": 2313 }, { "epoch": 0.7498379779650033, "grad_norm": 0.5029335618019104, "learning_rate": 8.784365626225986e-06, "loss": 0.1623, "step": 2314 }, { "epoch": 0.7501620220349967, "grad_norm": 0.48301079869270325, "learning_rate": 8.783222173584396e-06, "loss": 0.1505, "step": 2315 }, { "epoch": 0.7504860661049902, "grad_norm": 0.5065808892250061, "learning_rate": 8.782078257913033e-06, "loss": 0.1486, "step": 2316 }, { "epoch": 0.7508101101749838, "grad_norm": 0.5093285441398621, "learning_rate": 8.780933879351907e-06, "loss": 0.1541, "step": 2317 }, { "epoch": 0.7511341542449773, "grad_norm": 0.5059759616851807, "learning_rate": 8.779789038041078e-06, "loss": 0.1527, "step": 2318 }, { "epoch": 0.7514581983149708, "grad_norm": 0.44241490960121155, "learning_rate": 8.77864373412066e-06, "loss": 0.1367, "step": 2319 }, { "epoch": 0.7517822423849644, "grad_norm": 0.5083364844322205, "learning_rate": 8.777497967730828e-06, "loss": 0.1475, "step": 2320 }, { "epoch": 0.7521062864549579, "grad_norm": 0.5217186212539673, "learning_rate": 8.776351739011815e-06, "loss": 0.1617, "step": 2321 }, { "epoch": 0.7524303305249513, "grad_norm": 0.5198972821235657, "learning_rate": 8.775205048103908e-06, "loss": 0.1489, "step": 2322 }, { "epoch": 0.7527543745949449, "grad_norm": 0.5171259641647339, "learning_rate": 8.774057895147448e-06, "loss": 0.1397, "step": 2323 }, { "epoch": 0.7530784186649384, "grad_norm": 0.4689252972602844, "learning_rate": 8.772910280282839e-06, "loss": 0.128, "step": 2324 }, { "epoch": 0.7534024627349319, "grad_norm": 0.5266039371490479, "learning_rate": 8.771762203650536e-06, "loss": 0.1685, "step": 2325 }, { "epoch": 0.7537265068049255, "grad_norm": 0.5220149755477905, "learning_rate": 8.770613665391053e-06, "loss": 0.1588, "step": 2326 }, { "epoch": 0.754050550874919, "grad_norm": 0.48700717091560364, "learning_rate": 8.769464665644958e-06, "loss": 0.1444, "step": 2327 }, { "epoch": 0.7543745949449125, "grad_norm": 0.48634734749794006, "learning_rate": 8.76831520455288e-06, "loss": 0.1448, "step": 2328 }, { "epoch": 0.7546986390149061, "grad_norm": 0.4782525599002838, "learning_rate": 8.7671652822555e-06, "loss": 0.1524, "step": 2329 }, { "epoch": 0.7550226830848995, "grad_norm": 0.48710310459136963, "learning_rate": 8.766014898893563e-06, "loss": 0.1464, "step": 2330 }, { "epoch": 0.755346727154893, "grad_norm": 0.5369205474853516, "learning_rate": 8.764864054607856e-06, "loss": 0.1651, "step": 2331 }, { "epoch": 0.7556707712248866, "grad_norm": 0.4831956624984741, "learning_rate": 8.763712749539235e-06, "loss": 0.1392, "step": 2332 }, { "epoch": 0.7559948152948801, "grad_norm": 0.4816092550754547, "learning_rate": 8.762560983828611e-06, "loss": 0.1573, "step": 2333 }, { "epoch": 0.7563188593648736, "grad_norm": 0.513157308101654, "learning_rate": 8.761408757616947e-06, "loss": 0.1445, "step": 2334 }, { "epoch": 0.7566429034348672, "grad_norm": 0.5071210265159607, "learning_rate": 8.760256071045264e-06, "loss": 0.1665, "step": 2335 }, { "epoch": 0.7569669475048607, "grad_norm": 0.5065087080001831, "learning_rate": 8.759102924254638e-06, "loss": 0.1595, "step": 2336 }, { "epoch": 0.7572909915748541, "grad_norm": 0.5367597937583923, "learning_rate": 8.757949317386207e-06, "loss": 0.1817, "step": 2337 }, { "epoch": 0.7576150356448477, "grad_norm": 0.5146887302398682, "learning_rate": 8.75679525058116e-06, "loss": 0.1625, "step": 2338 }, { "epoch": 0.7579390797148412, "grad_norm": 0.4877246916294098, "learning_rate": 8.755640723980743e-06, "loss": 0.1344, "step": 2339 }, { "epoch": 0.7582631237848347, "grad_norm": 0.5225220322608948, "learning_rate": 8.754485737726257e-06, "loss": 0.1629, "step": 2340 }, { "epoch": 0.7585871678548283, "grad_norm": 0.49157649278640747, "learning_rate": 8.753330291959064e-06, "loss": 0.1424, "step": 2341 }, { "epoch": 0.7589112119248218, "grad_norm": 0.48859721422195435, "learning_rate": 8.752174386820578e-06, "loss": 0.1458, "step": 2342 }, { "epoch": 0.7592352559948153, "grad_norm": 0.5182003974914551, "learning_rate": 8.75101802245227e-06, "loss": 0.1606, "step": 2343 }, { "epoch": 0.7595593000648088, "grad_norm": 0.5214172601699829, "learning_rate": 8.74986119899567e-06, "loss": 0.1393, "step": 2344 }, { "epoch": 0.7598833441348023, "grad_norm": 0.5092251896858215, "learning_rate": 8.74870391659236e-06, "loss": 0.1473, "step": 2345 }, { "epoch": 0.7602073882047958, "grad_norm": 0.5081760287284851, "learning_rate": 8.747546175383984e-06, "loss": 0.1439, "step": 2346 }, { "epoch": 0.7605314322747894, "grad_norm": 0.5062338709831238, "learning_rate": 8.746387975512232e-06, "loss": 0.1515, "step": 2347 }, { "epoch": 0.7608554763447829, "grad_norm": 0.47075754404067993, "learning_rate": 8.745229317118859e-06, "loss": 0.1474, "step": 2348 }, { "epoch": 0.7611795204147764, "grad_norm": 0.4888347387313843, "learning_rate": 8.744070200345675e-06, "loss": 0.1551, "step": 2349 }, { "epoch": 0.76150356448477, "grad_norm": 0.50382399559021, "learning_rate": 8.742910625334545e-06, "loss": 0.1563, "step": 2350 }, { "epoch": 0.7618276085547635, "grad_norm": 0.5092306137084961, "learning_rate": 8.741750592227388e-06, "loss": 0.1601, "step": 2351 }, { "epoch": 0.7621516526247569, "grad_norm": 0.5268076062202454, "learning_rate": 8.740590101166181e-06, "loss": 0.1753, "step": 2352 }, { "epoch": 0.7624756966947505, "grad_norm": 0.4805704355239868, "learning_rate": 8.739429152292957e-06, "loss": 0.1489, "step": 2353 }, { "epoch": 0.762799740764744, "grad_norm": 0.5004841089248657, "learning_rate": 8.738267745749806e-06, "loss": 0.1554, "step": 2354 }, { "epoch": 0.7631237848347375, "grad_norm": 0.5143179297447205, "learning_rate": 8.737105881678872e-06, "loss": 0.1598, "step": 2355 }, { "epoch": 0.7634478289047311, "grad_norm": 0.49346303939819336, "learning_rate": 8.735943560222358e-06, "loss": 0.1676, "step": 2356 }, { "epoch": 0.7637718729747246, "grad_norm": 0.4914379119873047, "learning_rate": 8.734780781522516e-06, "loss": 0.1452, "step": 2357 }, { "epoch": 0.7640959170447181, "grad_norm": 0.46416085958480835, "learning_rate": 8.733617545721663e-06, "loss": 0.1283, "step": 2358 }, { "epoch": 0.7644199611147116, "grad_norm": 0.487813264131546, "learning_rate": 8.732453852962166e-06, "loss": 0.144, "step": 2359 }, { "epoch": 0.7647440051847051, "grad_norm": 0.5519083142280579, "learning_rate": 8.731289703386451e-06, "loss": 0.1632, "step": 2360 }, { "epoch": 0.7650680492546986, "grad_norm": 0.5048906207084656, "learning_rate": 8.730125097137e-06, "loss": 0.1572, "step": 2361 }, { "epoch": 0.7653920933246922, "grad_norm": 0.4748183488845825, "learning_rate": 8.728960034356344e-06, "loss": 0.1479, "step": 2362 }, { "epoch": 0.7657161373946857, "grad_norm": 0.4857644736766815, "learning_rate": 8.72779451518708e-06, "loss": 0.1481, "step": 2363 }, { "epoch": 0.7660401814646792, "grad_norm": 0.5448926687240601, "learning_rate": 8.726628539771856e-06, "loss": 0.1704, "step": 2364 }, { "epoch": 0.7663642255346728, "grad_norm": 0.4890585243701935, "learning_rate": 8.725462108253375e-06, "loss": 0.1543, "step": 2365 }, { "epoch": 0.7666882696046662, "grad_norm": 0.5503400564193726, "learning_rate": 8.724295220774396e-06, "loss": 0.1727, "step": 2366 }, { "epoch": 0.7670123136746597, "grad_norm": 0.4946596026420593, "learning_rate": 8.723127877477737e-06, "loss": 0.1538, "step": 2367 }, { "epoch": 0.7673363577446533, "grad_norm": 0.49211350083351135, "learning_rate": 8.721960078506269e-06, "loss": 0.1505, "step": 2368 }, { "epoch": 0.7676604018146468, "grad_norm": 0.5130065083503723, "learning_rate": 8.72079182400292e-06, "loss": 0.1631, "step": 2369 }, { "epoch": 0.7679844458846403, "grad_norm": 0.5098603367805481, "learning_rate": 8.71962311411067e-06, "loss": 0.168, "step": 2370 }, { "epoch": 0.7683084899546339, "grad_norm": 0.4712441861629486, "learning_rate": 8.718453948972559e-06, "loss": 0.1397, "step": 2371 }, { "epoch": 0.7686325340246274, "grad_norm": 0.4978718161582947, "learning_rate": 8.717284328731681e-06, "loss": 0.1603, "step": 2372 }, { "epoch": 0.7689565780946209, "grad_norm": 0.48081910610198975, "learning_rate": 8.716114253531189e-06, "loss": 0.1427, "step": 2373 }, { "epoch": 0.7692806221646143, "grad_norm": 0.4605656564235687, "learning_rate": 8.714943723514288e-06, "loss": 0.1457, "step": 2374 }, { "epoch": 0.7696046662346079, "grad_norm": 0.5563753843307495, "learning_rate": 8.713772738824237e-06, "loss": 0.1564, "step": 2375 }, { "epoch": 0.7699287103046014, "grad_norm": 0.48092120885849, "learning_rate": 8.712601299604355e-06, "loss": 0.1508, "step": 2376 }, { "epoch": 0.770252754374595, "grad_norm": 0.4575970768928528, "learning_rate": 8.711429405998017e-06, "loss": 0.1459, "step": 2377 }, { "epoch": 0.7705767984445885, "grad_norm": 0.48775961995124817, "learning_rate": 8.710257058148647e-06, "loss": 0.1477, "step": 2378 }, { "epoch": 0.770900842514582, "grad_norm": 0.4882473051548004, "learning_rate": 8.709084256199732e-06, "loss": 0.1406, "step": 2379 }, { "epoch": 0.7712248865845756, "grad_norm": 0.5059452056884766, "learning_rate": 8.70791100029481e-06, "loss": 0.1517, "step": 2380 }, { "epoch": 0.771548930654569, "grad_norm": 0.512593686580658, "learning_rate": 8.706737290577475e-06, "loss": 0.159, "step": 2381 }, { "epoch": 0.7718729747245625, "grad_norm": 0.4948345124721527, "learning_rate": 8.705563127191383e-06, "loss": 0.1495, "step": 2382 }, { "epoch": 0.772197018794556, "grad_norm": 0.45760348439216614, "learning_rate": 8.704388510280237e-06, "loss": 0.1522, "step": 2383 }, { "epoch": 0.7725210628645496, "grad_norm": 0.5127160549163818, "learning_rate": 8.703213439987797e-06, "loss": 0.1614, "step": 2384 }, { "epoch": 0.7728451069345431, "grad_norm": 0.46613267064094543, "learning_rate": 8.702037916457882e-06, "loss": 0.1437, "step": 2385 }, { "epoch": 0.7731691510045366, "grad_norm": 0.4914171099662781, "learning_rate": 8.700861939834365e-06, "loss": 0.1671, "step": 2386 }, { "epoch": 0.7734931950745302, "grad_norm": 0.508822500705719, "learning_rate": 8.699685510261173e-06, "loss": 0.1543, "step": 2387 }, { "epoch": 0.7738172391445236, "grad_norm": 0.5024411678314209, "learning_rate": 8.698508627882291e-06, "loss": 0.1643, "step": 2388 }, { "epoch": 0.7741412832145171, "grad_norm": 0.5050293803215027, "learning_rate": 8.697331292841757e-06, "loss": 0.1676, "step": 2389 }, { "epoch": 0.7744653272845107, "grad_norm": 0.48795467615127563, "learning_rate": 8.696153505283667e-06, "loss": 0.1637, "step": 2390 }, { "epoch": 0.7747893713545042, "grad_norm": 0.4281572699546814, "learning_rate": 8.694975265352168e-06, "loss": 0.1267, "step": 2391 }, { "epoch": 0.7751134154244977, "grad_norm": 0.5082755088806152, "learning_rate": 8.693796573191467e-06, "loss": 0.1405, "step": 2392 }, { "epoch": 0.7754374594944913, "grad_norm": 0.5042880177497864, "learning_rate": 8.692617428945823e-06, "loss": 0.1524, "step": 2393 }, { "epoch": 0.7757615035644848, "grad_norm": 0.4925474524497986, "learning_rate": 8.691437832759555e-06, "loss": 0.1426, "step": 2394 }, { "epoch": 0.7760855476344782, "grad_norm": 0.4777103066444397, "learning_rate": 8.69025778477703e-06, "loss": 0.1367, "step": 2395 }, { "epoch": 0.7764095917044718, "grad_norm": 0.4859715402126312, "learning_rate": 8.689077285142678e-06, "loss": 0.1537, "step": 2396 }, { "epoch": 0.7767336357744653, "grad_norm": 0.4879585802555084, "learning_rate": 8.687896334000979e-06, "loss": 0.1453, "step": 2397 }, { "epoch": 0.7770576798444588, "grad_norm": 0.4773515462875366, "learning_rate": 8.68671493149647e-06, "loss": 0.1377, "step": 2398 }, { "epoch": 0.7773817239144524, "grad_norm": 0.48462027311325073, "learning_rate": 8.685533077773744e-06, "loss": 0.1487, "step": 2399 }, { "epoch": 0.7777057679844459, "grad_norm": 0.45635169744491577, "learning_rate": 8.684350772977447e-06, "loss": 0.1329, "step": 2400 }, { "epoch": 0.7780298120544394, "grad_norm": 0.530298113822937, "learning_rate": 8.683168017252287e-06, "loss": 0.1706, "step": 2401 }, { "epoch": 0.778353856124433, "grad_norm": 0.5109763741493225, "learning_rate": 8.681984810743012e-06, "loss": 0.1663, "step": 2402 }, { "epoch": 0.7786779001944264, "grad_norm": 0.478502482175827, "learning_rate": 8.680801153594442e-06, "loss": 0.1358, "step": 2403 }, { "epoch": 0.7790019442644199, "grad_norm": 0.4598463475704193, "learning_rate": 8.679617045951445e-06, "loss": 0.1316, "step": 2404 }, { "epoch": 0.7793259883344135, "grad_norm": 0.47924965620040894, "learning_rate": 8.678432487958943e-06, "loss": 0.1466, "step": 2405 }, { "epoch": 0.779650032404407, "grad_norm": 0.49181798100471497, "learning_rate": 8.677247479761915e-06, "loss": 0.1423, "step": 2406 }, { "epoch": 0.7799740764744005, "grad_norm": 0.47612518072128296, "learning_rate": 8.676062021505392e-06, "loss": 0.1378, "step": 2407 }, { "epoch": 0.7802981205443941, "grad_norm": 0.5300808548927307, "learning_rate": 8.674876113334465e-06, "loss": 0.1639, "step": 2408 }, { "epoch": 0.7806221646143876, "grad_norm": 0.5046321153640747, "learning_rate": 8.673689755394278e-06, "loss": 0.15, "step": 2409 }, { "epoch": 0.780946208684381, "grad_norm": 0.47892558574676514, "learning_rate": 8.67250294783003e-06, "loss": 0.1452, "step": 2410 }, { "epoch": 0.7812702527543746, "grad_norm": 0.5028601288795471, "learning_rate": 8.671315690786972e-06, "loss": 0.1539, "step": 2411 }, { "epoch": 0.7815942968243681, "grad_norm": 0.5324373841285706, "learning_rate": 8.670127984410415e-06, "loss": 0.1704, "step": 2412 }, { "epoch": 0.7819183408943616, "grad_norm": 0.48744648694992065, "learning_rate": 8.668939828845721e-06, "loss": 0.1544, "step": 2413 }, { "epoch": 0.7822423849643552, "grad_norm": 0.46258261799812317, "learning_rate": 8.667751224238311e-06, "loss": 0.1341, "step": 2414 }, { "epoch": 0.7825664290343487, "grad_norm": 0.4862961769104004, "learning_rate": 8.666562170733658e-06, "loss": 0.1476, "step": 2415 }, { "epoch": 0.7828904731043422, "grad_norm": 0.5449991226196289, "learning_rate": 8.665372668477293e-06, "loss": 0.1374, "step": 2416 }, { "epoch": 0.7832145171743357, "grad_norm": 0.47787198424339294, "learning_rate": 8.664182717614793e-06, "loss": 0.1438, "step": 2417 }, { "epoch": 0.7835385612443292, "grad_norm": 0.5111783742904663, "learning_rate": 8.662992318291803e-06, "loss": 0.1655, "step": 2418 }, { "epoch": 0.7838626053143227, "grad_norm": 0.45256495475769043, "learning_rate": 8.661801470654011e-06, "loss": 0.1309, "step": 2419 }, { "epoch": 0.7841866493843163, "grad_norm": 0.49635931849479675, "learning_rate": 8.66061017484717e-06, "loss": 0.1412, "step": 2420 }, { "epoch": 0.7845106934543098, "grad_norm": 0.48685595393180847, "learning_rate": 8.659418431017082e-06, "loss": 0.1531, "step": 2421 }, { "epoch": 0.7848347375243033, "grad_norm": 0.5329604148864746, "learning_rate": 8.658226239309602e-06, "loss": 0.157, "step": 2422 }, { "epoch": 0.7851587815942969, "grad_norm": 0.5168407559394836, "learning_rate": 8.657033599870646e-06, "loss": 0.1489, "step": 2423 }, { "epoch": 0.7854828256642904, "grad_norm": 0.49895137548446655, "learning_rate": 8.655840512846178e-06, "loss": 0.1547, "step": 2424 }, { "epoch": 0.7858068697342838, "grad_norm": 0.47160446643829346, "learning_rate": 8.654646978382227e-06, "loss": 0.1534, "step": 2425 }, { "epoch": 0.7861309138042774, "grad_norm": 0.5098379254341125, "learning_rate": 8.653452996624861e-06, "loss": 0.157, "step": 2426 }, { "epoch": 0.7864549578742709, "grad_norm": 0.5164358615875244, "learning_rate": 8.652258567720218e-06, "loss": 0.1516, "step": 2427 }, { "epoch": 0.7867790019442644, "grad_norm": 0.49435731768608093, "learning_rate": 8.651063691814483e-06, "loss": 0.1551, "step": 2428 }, { "epoch": 0.787103046014258, "grad_norm": 0.4760430157184601, "learning_rate": 8.649868369053897e-06, "loss": 0.1366, "step": 2429 }, { "epoch": 0.7874270900842515, "grad_norm": 0.5499979853630066, "learning_rate": 8.648672599584756e-06, "loss": 0.1683, "step": 2430 }, { "epoch": 0.787751134154245, "grad_norm": 0.47031792998313904, "learning_rate": 8.647476383553411e-06, "loss": 0.146, "step": 2431 }, { "epoch": 0.7880751782242384, "grad_norm": 0.4686078727245331, "learning_rate": 8.646279721106266e-06, "loss": 0.1391, "step": 2432 }, { "epoch": 0.788399222294232, "grad_norm": 0.47792428731918335, "learning_rate": 8.645082612389783e-06, "loss": 0.1481, "step": 2433 }, { "epoch": 0.7887232663642255, "grad_norm": 0.4733062982559204, "learning_rate": 8.643885057550476e-06, "loss": 0.1337, "step": 2434 }, { "epoch": 0.789047310434219, "grad_norm": 0.4806113541126251, "learning_rate": 8.642687056734911e-06, "loss": 0.1434, "step": 2435 }, { "epoch": 0.7893713545042126, "grad_norm": 0.5003640651702881, "learning_rate": 8.641488610089716e-06, "loss": 0.1398, "step": 2436 }, { "epoch": 0.7896953985742061, "grad_norm": 0.4911183714866638, "learning_rate": 8.640289717761568e-06, "loss": 0.1487, "step": 2437 }, { "epoch": 0.7900194426441997, "grad_norm": 0.4869093596935272, "learning_rate": 8.639090379897198e-06, "loss": 0.1455, "step": 2438 }, { "epoch": 0.7903434867141931, "grad_norm": 0.44922661781311035, "learning_rate": 8.637890596643396e-06, "loss": 0.1356, "step": 2439 }, { "epoch": 0.7906675307841866, "grad_norm": 0.45907965302467346, "learning_rate": 8.636690368147e-06, "loss": 0.1273, "step": 2440 }, { "epoch": 0.7909915748541801, "grad_norm": 0.5199213624000549, "learning_rate": 8.635489694554913e-06, "loss": 0.1572, "step": 2441 }, { "epoch": 0.7913156189241737, "grad_norm": 0.5016922354698181, "learning_rate": 8.634288576014078e-06, "loss": 0.1571, "step": 2442 }, { "epoch": 0.7916396629941672, "grad_norm": 0.4670245349407196, "learning_rate": 8.633087012671504e-06, "loss": 0.1384, "step": 2443 }, { "epoch": 0.7919637070641607, "grad_norm": 0.5087417960166931, "learning_rate": 8.631885004674251e-06, "loss": 0.1413, "step": 2444 }, { "epoch": 0.7922877511341543, "grad_norm": 0.48342347145080566, "learning_rate": 8.630682552169434e-06, "loss": 0.1443, "step": 2445 }, { "epoch": 0.7926117952041478, "grad_norm": 0.49706462025642395, "learning_rate": 8.629479655304221e-06, "loss": 0.1558, "step": 2446 }, { "epoch": 0.7929358392741412, "grad_norm": 0.47584474086761475, "learning_rate": 8.628276314225833e-06, "loss": 0.1468, "step": 2447 }, { "epoch": 0.7932598833441348, "grad_norm": 0.44449400901794434, "learning_rate": 8.627072529081549e-06, "loss": 0.1298, "step": 2448 }, { "epoch": 0.7935839274141283, "grad_norm": 0.49863201379776, "learning_rate": 8.625868300018701e-06, "loss": 0.1679, "step": 2449 }, { "epoch": 0.7939079714841218, "grad_norm": 0.5134031772613525, "learning_rate": 8.624663627184671e-06, "loss": 0.1618, "step": 2450 }, { "epoch": 0.7942320155541154, "grad_norm": 0.466678649187088, "learning_rate": 8.623458510726906e-06, "loss": 0.1267, "step": 2451 }, { "epoch": 0.7945560596241089, "grad_norm": 0.48051100969314575, "learning_rate": 8.622252950792895e-06, "loss": 0.1432, "step": 2452 }, { "epoch": 0.7948801036941024, "grad_norm": 0.4591250717639923, "learning_rate": 8.62104694753019e-06, "loss": 0.1441, "step": 2453 }, { "epoch": 0.7952041477640959, "grad_norm": 0.49696993827819824, "learning_rate": 8.619840501086392e-06, "loss": 0.1395, "step": 2454 }, { "epoch": 0.7955281918340894, "grad_norm": 0.4945259690284729, "learning_rate": 8.61863361160916e-06, "loss": 0.1527, "step": 2455 }, { "epoch": 0.7958522359040829, "grad_norm": 0.47516417503356934, "learning_rate": 8.617426279246206e-06, "loss": 0.1455, "step": 2456 }, { "epoch": 0.7961762799740765, "grad_norm": 0.4969664216041565, "learning_rate": 8.616218504145294e-06, "loss": 0.1564, "step": 2457 }, { "epoch": 0.79650032404407, "grad_norm": 0.49645090103149414, "learning_rate": 8.615010286454244e-06, "loss": 0.1535, "step": 2458 }, { "epoch": 0.7968243681140635, "grad_norm": 0.5396689176559448, "learning_rate": 8.613801626320932e-06, "loss": 0.1615, "step": 2459 }, { "epoch": 0.7971484121840571, "grad_norm": 0.4874482750892639, "learning_rate": 8.612592523893286e-06, "loss": 0.1466, "step": 2460 }, { "epoch": 0.7974724562540505, "grad_norm": 0.5054929256439209, "learning_rate": 8.611382979319286e-06, "loss": 0.1479, "step": 2461 }, { "epoch": 0.797796500324044, "grad_norm": 0.4731915295124054, "learning_rate": 8.610172992746971e-06, "loss": 0.1472, "step": 2462 }, { "epoch": 0.7981205443940376, "grad_norm": 0.48185446858406067, "learning_rate": 8.60896256432443e-06, "loss": 0.1538, "step": 2463 }, { "epoch": 0.7984445884640311, "grad_norm": 0.5094521045684814, "learning_rate": 8.607751694199811e-06, "loss": 0.1525, "step": 2464 }, { "epoch": 0.7987686325340246, "grad_norm": 0.5127012133598328, "learning_rate": 8.606540382521308e-06, "loss": 0.1722, "step": 2465 }, { "epoch": 0.7990926766040182, "grad_norm": 0.46042630076408386, "learning_rate": 8.605328629437177e-06, "loss": 0.1313, "step": 2466 }, { "epoch": 0.7994167206740117, "grad_norm": 0.46402016282081604, "learning_rate": 8.604116435095724e-06, "loss": 0.1339, "step": 2467 }, { "epoch": 0.7997407647440052, "grad_norm": 0.5011064410209656, "learning_rate": 8.60290379964531e-06, "loss": 0.1581, "step": 2468 }, { "epoch": 0.8000648088139987, "grad_norm": 0.4951988458633423, "learning_rate": 8.601690723234349e-06, "loss": 0.1554, "step": 2469 }, { "epoch": 0.8003888528839922, "grad_norm": 0.5081047415733337, "learning_rate": 8.600477206011312e-06, "loss": 0.1531, "step": 2470 }, { "epoch": 0.8007128969539857, "grad_norm": 0.511314332485199, "learning_rate": 8.599263248124718e-06, "loss": 0.1565, "step": 2471 }, { "epoch": 0.8010369410239793, "grad_norm": 0.4906594753265381, "learning_rate": 8.598048849723149e-06, "loss": 0.1559, "step": 2472 }, { "epoch": 0.8013609850939728, "grad_norm": 0.4825765788555145, "learning_rate": 8.596834010955231e-06, "loss": 0.1519, "step": 2473 }, { "epoch": 0.8016850291639663, "grad_norm": 0.482764333486557, "learning_rate": 8.595618731969651e-06, "loss": 0.1403, "step": 2474 }, { "epoch": 0.8020090732339599, "grad_norm": 0.46174606680870056, "learning_rate": 8.594403012915145e-06, "loss": 0.1434, "step": 2475 }, { "epoch": 0.8023331173039533, "grad_norm": 0.5105165839195251, "learning_rate": 8.593186853940507e-06, "loss": 0.1528, "step": 2476 }, { "epoch": 0.8026571613739468, "grad_norm": 0.5206249356269836, "learning_rate": 8.591970255194582e-06, "loss": 0.1598, "step": 2477 }, { "epoch": 0.8029812054439404, "grad_norm": 0.5320706963539124, "learning_rate": 8.590753216826273e-06, "loss": 0.1538, "step": 2478 }, { "epoch": 0.8033052495139339, "grad_norm": 0.5003678202629089, "learning_rate": 8.58953573898453e-06, "loss": 0.1615, "step": 2479 }, { "epoch": 0.8036292935839274, "grad_norm": 0.4871372580528259, "learning_rate": 8.588317821818362e-06, "loss": 0.151, "step": 2480 }, { "epoch": 0.803953337653921, "grad_norm": 0.45962315797805786, "learning_rate": 8.58709946547683e-06, "loss": 0.1371, "step": 2481 }, { "epoch": 0.8042773817239145, "grad_norm": 0.5067052841186523, "learning_rate": 8.585880670109051e-06, "loss": 0.1608, "step": 2482 }, { "epoch": 0.8046014257939079, "grad_norm": 0.47325399518013, "learning_rate": 8.58466143586419e-06, "loss": 0.1437, "step": 2483 }, { "epoch": 0.8049254698639015, "grad_norm": 0.47463107109069824, "learning_rate": 8.583441762891473e-06, "loss": 0.1458, "step": 2484 }, { "epoch": 0.805249513933895, "grad_norm": 0.47829777002334595, "learning_rate": 8.582221651340174e-06, "loss": 0.1486, "step": 2485 }, { "epoch": 0.8055735580038885, "grad_norm": 0.4528464078903198, "learning_rate": 8.581001101359622e-06, "loss": 0.1298, "step": 2486 }, { "epoch": 0.8058976020738821, "grad_norm": 0.4922964572906494, "learning_rate": 8.579780113099206e-06, "loss": 0.1548, "step": 2487 }, { "epoch": 0.8062216461438756, "grad_norm": 0.49854376912117004, "learning_rate": 8.578558686708356e-06, "loss": 0.1498, "step": 2488 }, { "epoch": 0.8065456902138691, "grad_norm": 0.45197510719299316, "learning_rate": 8.577336822336567e-06, "loss": 0.1354, "step": 2489 }, { "epoch": 0.8068697342838627, "grad_norm": 0.5470073223114014, "learning_rate": 8.57611452013338e-06, "loss": 0.1703, "step": 2490 }, { "epoch": 0.8071937783538561, "grad_norm": 0.5207754373550415, "learning_rate": 8.574891780248396e-06, "loss": 0.1513, "step": 2491 }, { "epoch": 0.8075178224238496, "grad_norm": 0.4938046336174011, "learning_rate": 8.573668602831268e-06, "loss": 0.1487, "step": 2492 }, { "epoch": 0.8078418664938432, "grad_norm": 0.4892713725566864, "learning_rate": 8.572444988031696e-06, "loss": 0.1436, "step": 2493 }, { "epoch": 0.8081659105638367, "grad_norm": 0.48755109310150146, "learning_rate": 8.571220935999443e-06, "loss": 0.1416, "step": 2494 }, { "epoch": 0.8084899546338302, "grad_norm": 0.47816547751426697, "learning_rate": 8.56999644688432e-06, "loss": 0.137, "step": 2495 }, { "epoch": 0.8088139987038238, "grad_norm": 0.5130065083503723, "learning_rate": 8.568771520836191e-06, "loss": 0.1522, "step": 2496 }, { "epoch": 0.8091380427738173, "grad_norm": 0.47157740592956543, "learning_rate": 8.567546158004977e-06, "loss": 0.1484, "step": 2497 }, { "epoch": 0.8094620868438107, "grad_norm": 0.5034700632095337, "learning_rate": 8.56632035854065e-06, "loss": 0.1512, "step": 2498 }, { "epoch": 0.8097861309138042, "grad_norm": 0.45632052421569824, "learning_rate": 8.565094122593236e-06, "loss": 0.1399, "step": 2499 }, { "epoch": 0.8101101749837978, "grad_norm": 0.4864128828048706, "learning_rate": 8.563867450312812e-06, "loss": 0.1457, "step": 2500 }, { "epoch": 0.8104342190537913, "grad_norm": 0.4833630919456482, "learning_rate": 8.562640341849515e-06, "loss": 0.142, "step": 2501 }, { "epoch": 0.8107582631237849, "grad_norm": 0.44969481229782104, "learning_rate": 8.561412797353528e-06, "loss": 0.134, "step": 2502 }, { "epoch": 0.8110823071937784, "grad_norm": 0.47103044390678406, "learning_rate": 8.560184816975093e-06, "loss": 0.1431, "step": 2503 }, { "epoch": 0.8114063512637719, "grad_norm": 0.4690519869327545, "learning_rate": 8.5589564008645e-06, "loss": 0.146, "step": 2504 }, { "epoch": 0.8117303953337653, "grad_norm": 0.48151180148124695, "learning_rate": 8.557727549172099e-06, "loss": 0.1426, "step": 2505 }, { "epoch": 0.8120544394037589, "grad_norm": 0.5109602808952332, "learning_rate": 8.556498262048285e-06, "loss": 0.1595, "step": 2506 }, { "epoch": 0.8123784834737524, "grad_norm": 0.476717472076416, "learning_rate": 8.555268539643515e-06, "loss": 0.1431, "step": 2507 }, { "epoch": 0.812702527543746, "grad_norm": 0.4775809049606323, "learning_rate": 8.554038382108293e-06, "loss": 0.1425, "step": 2508 }, { "epoch": 0.8130265716137395, "grad_norm": 0.49816781282424927, "learning_rate": 8.552807789593178e-06, "loss": 0.1528, "step": 2509 }, { "epoch": 0.813350615683733, "grad_norm": 0.49087977409362793, "learning_rate": 8.551576762248785e-06, "loss": 0.1467, "step": 2510 }, { "epoch": 0.8136746597537265, "grad_norm": 0.526469886302948, "learning_rate": 8.550345300225778e-06, "loss": 0.1555, "step": 2511 }, { "epoch": 0.81399870382372, "grad_norm": 0.49106329679489136, "learning_rate": 8.549113403674876e-06, "loss": 0.1419, "step": 2512 }, { "epoch": 0.8143227478937135, "grad_norm": 0.47168755531311035, "learning_rate": 8.547881072746852e-06, "loss": 0.1381, "step": 2513 }, { "epoch": 0.814646791963707, "grad_norm": 0.5075554847717285, "learning_rate": 8.546648307592529e-06, "loss": 0.1595, "step": 2514 }, { "epoch": 0.8149708360337006, "grad_norm": 0.49591541290283203, "learning_rate": 8.545415108362789e-06, "loss": 0.1503, "step": 2515 }, { "epoch": 0.8152948801036941, "grad_norm": 0.4703139364719391, "learning_rate": 8.54418147520856e-06, "loss": 0.1522, "step": 2516 }, { "epoch": 0.8156189241736876, "grad_norm": 0.46373069286346436, "learning_rate": 8.54294740828083e-06, "loss": 0.1448, "step": 2517 }, { "epoch": 0.8159429682436812, "grad_norm": 0.4968792200088501, "learning_rate": 8.541712907730636e-06, "loss": 0.1493, "step": 2518 }, { "epoch": 0.8162670123136747, "grad_norm": 0.4752669036388397, "learning_rate": 8.540477973709068e-06, "loss": 0.1528, "step": 2519 }, { "epoch": 0.8165910563836681, "grad_norm": 0.4536360204219818, "learning_rate": 8.539242606367271e-06, "loss": 0.1337, "step": 2520 }, { "epoch": 0.8169151004536617, "grad_norm": 0.5199907422065735, "learning_rate": 8.538006805856443e-06, "loss": 0.1601, "step": 2521 }, { "epoch": 0.8172391445236552, "grad_norm": 0.4877196252346039, "learning_rate": 8.53677057232783e-06, "loss": 0.1507, "step": 2522 }, { "epoch": 0.8175631885936487, "grad_norm": 0.5085099935531616, "learning_rate": 8.535533905932739e-06, "loss": 0.1638, "step": 2523 }, { "epoch": 0.8178872326636423, "grad_norm": 0.48997917771339417, "learning_rate": 8.534296806822523e-06, "loss": 0.1459, "step": 2524 }, { "epoch": 0.8182112767336358, "grad_norm": 0.441158264875412, "learning_rate": 8.533059275148594e-06, "loss": 0.1327, "step": 2525 }, { "epoch": 0.8185353208036293, "grad_norm": 0.47758910059928894, "learning_rate": 8.531821311062412e-06, "loss": 0.1415, "step": 2526 }, { "epoch": 0.8188593648736228, "grad_norm": 0.45573076605796814, "learning_rate": 8.530582914715493e-06, "loss": 0.1363, "step": 2527 }, { "epoch": 0.8191834089436163, "grad_norm": 0.46968233585357666, "learning_rate": 8.529344086259401e-06, "loss": 0.1375, "step": 2528 }, { "epoch": 0.8195074530136098, "grad_norm": 0.47364190220832825, "learning_rate": 8.528104825845763e-06, "loss": 0.1396, "step": 2529 }, { "epoch": 0.8198314970836034, "grad_norm": 0.47457167506217957, "learning_rate": 8.526865133626246e-06, "loss": 0.1453, "step": 2530 }, { "epoch": 0.8201555411535969, "grad_norm": 0.49752020835876465, "learning_rate": 8.525625009752582e-06, "loss": 0.1552, "step": 2531 }, { "epoch": 0.8204795852235904, "grad_norm": 0.4616011381149292, "learning_rate": 8.524384454376546e-06, "loss": 0.1317, "step": 2532 }, { "epoch": 0.820803629293584, "grad_norm": 0.5041443705558777, "learning_rate": 8.523143467649972e-06, "loss": 0.1534, "step": 2533 }, { "epoch": 0.8211276733635774, "grad_norm": 0.4988428056240082, "learning_rate": 8.521902049724743e-06, "loss": 0.1517, "step": 2534 }, { "epoch": 0.8214517174335709, "grad_norm": 0.48993542790412903, "learning_rate": 8.520660200752799e-06, "loss": 0.1418, "step": 2535 }, { "epoch": 0.8217757615035645, "grad_norm": 0.458396852016449, "learning_rate": 8.51941792088613e-06, "loss": 0.1432, "step": 2536 }, { "epoch": 0.822099805573558, "grad_norm": 0.5019996166229248, "learning_rate": 8.518175210276775e-06, "loss": 0.1491, "step": 2537 }, { "epoch": 0.8224238496435515, "grad_norm": 0.4727717936038971, "learning_rate": 8.516932069076835e-06, "loss": 0.149, "step": 2538 }, { "epoch": 0.8227478937135451, "grad_norm": 0.4895130395889282, "learning_rate": 8.515688497438458e-06, "loss": 0.1424, "step": 2539 }, { "epoch": 0.8230719377835386, "grad_norm": 0.5439048409461975, "learning_rate": 8.51444449551384e-06, "loss": 0.1718, "step": 2540 }, { "epoch": 0.8233959818535321, "grad_norm": 0.464637815952301, "learning_rate": 8.51320006345524e-06, "loss": 0.1413, "step": 2541 }, { "epoch": 0.8237200259235256, "grad_norm": 0.5023958086967468, "learning_rate": 8.511955201414963e-06, "loss": 0.1552, "step": 2542 }, { "epoch": 0.8240440699935191, "grad_norm": 0.477631539106369, "learning_rate": 8.510709909545367e-06, "loss": 0.1531, "step": 2543 }, { "epoch": 0.8243681140635126, "grad_norm": 0.4549383521080017, "learning_rate": 8.509464187998863e-06, "loss": 0.1422, "step": 2544 }, { "epoch": 0.8246921581335062, "grad_norm": 0.48795077204704285, "learning_rate": 8.50821803692792e-06, "loss": 0.1502, "step": 2545 }, { "epoch": 0.8250162022034997, "grad_norm": 0.473164439201355, "learning_rate": 8.50697145648505e-06, "loss": 0.1362, "step": 2546 }, { "epoch": 0.8253402462734932, "grad_norm": 0.5239213109016418, "learning_rate": 8.505724446822824e-06, "loss": 0.1453, "step": 2547 }, { "epoch": 0.8256642903434868, "grad_norm": 0.4741290509700775, "learning_rate": 8.504477008093862e-06, "loss": 0.1379, "step": 2548 }, { "epoch": 0.8259883344134802, "grad_norm": 0.4502844214439392, "learning_rate": 8.503229140450842e-06, "loss": 0.1208, "step": 2549 }, { "epoch": 0.8263123784834737, "grad_norm": 0.5354313254356384, "learning_rate": 8.501980844046486e-06, "loss": 0.1652, "step": 2550 }, { "epoch": 0.8266364225534673, "grad_norm": 0.49819859862327576, "learning_rate": 8.500732119033581e-06, "loss": 0.1446, "step": 2551 }, { "epoch": 0.8269604666234608, "grad_norm": 0.5132746696472168, "learning_rate": 8.499482965564952e-06, "loss": 0.1471, "step": 2552 }, { "epoch": 0.8272845106934543, "grad_norm": 0.47220292687416077, "learning_rate": 8.498233383793486e-06, "loss": 0.1546, "step": 2553 }, { "epoch": 0.8276085547634479, "grad_norm": 0.5188787579536438, "learning_rate": 8.496983373872119e-06, "loss": 0.1474, "step": 2554 }, { "epoch": 0.8279325988334414, "grad_norm": 0.5114861726760864, "learning_rate": 8.495732935953839e-06, "loss": 0.1532, "step": 2555 }, { "epoch": 0.8282566429034348, "grad_norm": 0.5248055458068848, "learning_rate": 8.494482070191691e-06, "loss": 0.1514, "step": 2556 }, { "epoch": 0.8285806869734283, "grad_norm": 0.4819242060184479, "learning_rate": 8.493230776738768e-06, "loss": 0.1359, "step": 2557 }, { "epoch": 0.8289047310434219, "grad_norm": 0.5227571129798889, "learning_rate": 8.491979055748214e-06, "loss": 0.1592, "step": 2558 }, { "epoch": 0.8292287751134154, "grad_norm": 0.48012658953666687, "learning_rate": 8.490726907373227e-06, "loss": 0.1416, "step": 2559 }, { "epoch": 0.829552819183409, "grad_norm": 0.4852195680141449, "learning_rate": 8.48947433176706e-06, "loss": 0.1503, "step": 2560 }, { "epoch": 0.8298768632534025, "grad_norm": 0.5364710688591003, "learning_rate": 8.488221329083017e-06, "loss": 0.1668, "step": 2561 }, { "epoch": 0.830200907323396, "grad_norm": 0.5360770225524902, "learning_rate": 8.48696789947445e-06, "loss": 0.1487, "step": 2562 }, { "epoch": 0.8305249513933896, "grad_norm": 0.5258188843727112, "learning_rate": 8.48571404309477e-06, "loss": 0.1671, "step": 2563 }, { "epoch": 0.830848995463383, "grad_norm": 0.46062448620796204, "learning_rate": 8.484459760097435e-06, "loss": 0.1457, "step": 2564 }, { "epoch": 0.8311730395333765, "grad_norm": 0.5202407240867615, "learning_rate": 8.483205050635957e-06, "loss": 0.153, "step": 2565 }, { "epoch": 0.83149708360337, "grad_norm": 0.5415773987770081, "learning_rate": 8.481949914863901e-06, "loss": 0.1712, "step": 2566 }, { "epoch": 0.8318211276733636, "grad_norm": 0.4666374623775482, "learning_rate": 8.480694352934884e-06, "loss": 0.1401, "step": 2567 }, { "epoch": 0.8321451717433571, "grad_norm": 0.4689330756664276, "learning_rate": 8.479438365002573e-06, "loss": 0.1363, "step": 2568 }, { "epoch": 0.8324692158133506, "grad_norm": 0.4993961751461029, "learning_rate": 8.478181951220693e-06, "loss": 0.1549, "step": 2569 }, { "epoch": 0.8327932598833442, "grad_norm": 0.47131237387657166, "learning_rate": 8.476925111743009e-06, "loss": 0.1469, "step": 2570 }, { "epoch": 0.8331173039533376, "grad_norm": 0.4705948531627655, "learning_rate": 8.475667846723352e-06, "loss": 0.1448, "step": 2571 }, { "epoch": 0.8334413480233311, "grad_norm": 0.5221664309501648, "learning_rate": 8.474410156315597e-06, "loss": 0.1547, "step": 2572 }, { "epoch": 0.8337653920933247, "grad_norm": 0.496259868144989, "learning_rate": 8.473152040673676e-06, "loss": 0.1557, "step": 2573 }, { "epoch": 0.8340894361633182, "grad_norm": 0.47865161299705505, "learning_rate": 8.471893499951567e-06, "loss": 0.1474, "step": 2574 }, { "epoch": 0.8344134802333117, "grad_norm": 0.4825884997844696, "learning_rate": 8.470634534303304e-06, "loss": 0.1412, "step": 2575 }, { "epoch": 0.8347375243033053, "grad_norm": 0.48204711079597473, "learning_rate": 8.469375143882972e-06, "loss": 0.165, "step": 2576 }, { "epoch": 0.8350615683732988, "grad_norm": 0.4458855986595154, "learning_rate": 8.468115328844708e-06, "loss": 0.1344, "step": 2577 }, { "epoch": 0.8353856124432922, "grad_norm": 0.47697317600250244, "learning_rate": 8.466855089342703e-06, "loss": 0.1482, "step": 2578 }, { "epoch": 0.8357096565132858, "grad_norm": 0.48309290409088135, "learning_rate": 8.465594425531197e-06, "loss": 0.1495, "step": 2579 }, { "epoch": 0.8360337005832793, "grad_norm": 0.47534915804862976, "learning_rate": 8.464333337564481e-06, "loss": 0.1536, "step": 2580 }, { "epoch": 0.8363577446532728, "grad_norm": 0.475146621465683, "learning_rate": 8.463071825596904e-06, "loss": 0.1545, "step": 2581 }, { "epoch": 0.8366817887232664, "grad_norm": 0.48443689942359924, "learning_rate": 8.46180988978286e-06, "loss": 0.1518, "step": 2582 }, { "epoch": 0.8370058327932599, "grad_norm": 0.47789594531059265, "learning_rate": 8.460547530276798e-06, "loss": 0.1354, "step": 2583 }, { "epoch": 0.8373298768632534, "grad_norm": 0.4564815163612366, "learning_rate": 8.459284747233218e-06, "loss": 0.1359, "step": 2584 }, { "epoch": 0.837653920933247, "grad_norm": 0.5119007229804993, "learning_rate": 8.458021540806674e-06, "loss": 0.157, "step": 2585 }, { "epoch": 0.8379779650032404, "grad_norm": 0.47056737542152405, "learning_rate": 8.45675791115177e-06, "loss": 0.1317, "step": 2586 }, { "epoch": 0.8383020090732339, "grad_norm": 0.453413724899292, "learning_rate": 8.455493858423163e-06, "loss": 0.1251, "step": 2587 }, { "epoch": 0.8386260531432275, "grad_norm": 0.49031537771224976, "learning_rate": 8.454229382775558e-06, "loss": 0.1562, "step": 2588 }, { "epoch": 0.838950097213221, "grad_norm": 0.4968595504760742, "learning_rate": 8.452964484363717e-06, "loss": 0.14, "step": 2589 }, { "epoch": 0.8392741412832145, "grad_norm": 0.5026286840438843, "learning_rate": 8.45169916334245e-06, "loss": 0.1506, "step": 2590 }, { "epoch": 0.8395981853532081, "grad_norm": 0.4869048297405243, "learning_rate": 8.450433419866619e-06, "loss": 0.1392, "step": 2591 }, { "epoch": 0.8399222294232016, "grad_norm": 0.4574863314628601, "learning_rate": 8.449167254091141e-06, "loss": 0.1305, "step": 2592 }, { "epoch": 0.840246273493195, "grad_norm": 0.5489205121994019, "learning_rate": 8.447900666170983e-06, "loss": 0.159, "step": 2593 }, { "epoch": 0.8405703175631886, "grad_norm": 0.4784124791622162, "learning_rate": 8.446633656261161e-06, "loss": 0.1539, "step": 2594 }, { "epoch": 0.8408943616331821, "grad_norm": 0.46957382559776306, "learning_rate": 8.445366224516744e-06, "loss": 0.1352, "step": 2595 }, { "epoch": 0.8412184057031756, "grad_norm": 0.49930262565612793, "learning_rate": 8.444098371092856e-06, "loss": 0.1453, "step": 2596 }, { "epoch": 0.8415424497731692, "grad_norm": 0.4878937602043152, "learning_rate": 8.44283009614467e-06, "loss": 0.1386, "step": 2597 }, { "epoch": 0.8418664938431627, "grad_norm": 0.5614111423492432, "learning_rate": 8.441561399827407e-06, "loss": 0.1933, "step": 2598 }, { "epoch": 0.8421905379131562, "grad_norm": 0.5082785487174988, "learning_rate": 8.440292282296348e-06, "loss": 0.1559, "step": 2599 }, { "epoch": 0.8425145819831497, "grad_norm": 0.48195216059684753, "learning_rate": 8.439022743706817e-06, "loss": 0.1442, "step": 2600 }, { "epoch": 0.8428386260531432, "grad_norm": 0.47267797589302063, "learning_rate": 8.437752784214195e-06, "loss": 0.1488, "step": 2601 }, { "epoch": 0.8431626701231367, "grad_norm": 0.5153549909591675, "learning_rate": 8.436482403973911e-06, "loss": 0.1597, "step": 2602 }, { "epoch": 0.8434867141931303, "grad_norm": 0.4659569561481476, "learning_rate": 8.43521160314145e-06, "loss": 0.1335, "step": 2603 }, { "epoch": 0.8438107582631238, "grad_norm": 0.49978286027908325, "learning_rate": 8.433940381872343e-06, "loss": 0.1748, "step": 2604 }, { "epoch": 0.8441348023331173, "grad_norm": 0.5085880756378174, "learning_rate": 8.432668740322177e-06, "loss": 0.1611, "step": 2605 }, { "epoch": 0.8444588464031109, "grad_norm": 0.4894861578941345, "learning_rate": 8.431396678646588e-06, "loss": 0.1496, "step": 2606 }, { "epoch": 0.8447828904731044, "grad_norm": 0.5207383036613464, "learning_rate": 8.430124197001264e-06, "loss": 0.1588, "step": 2607 }, { "epoch": 0.8451069345430978, "grad_norm": 0.5010978579521179, "learning_rate": 8.428851295541944e-06, "loss": 0.158, "step": 2608 }, { "epoch": 0.8454309786130914, "grad_norm": 0.4909263253211975, "learning_rate": 8.427577974424421e-06, "loss": 0.1502, "step": 2609 }, { "epoch": 0.8457550226830849, "grad_norm": 0.5070400238037109, "learning_rate": 8.426304233804534e-06, "loss": 0.1753, "step": 2610 }, { "epoch": 0.8460790667530784, "grad_norm": 0.4728187024593353, "learning_rate": 8.425030073838178e-06, "loss": 0.1451, "step": 2611 }, { "epoch": 0.846403110823072, "grad_norm": 0.46031421422958374, "learning_rate": 8.423755494681298e-06, "loss": 0.139, "step": 2612 }, { "epoch": 0.8467271548930655, "grad_norm": 0.43631961941719055, "learning_rate": 8.42248049648989e-06, "loss": 0.1379, "step": 2613 }, { "epoch": 0.847051198963059, "grad_norm": 0.4628419876098633, "learning_rate": 8.42120507942e-06, "loss": 0.1473, "step": 2614 }, { "epoch": 0.8473752430330524, "grad_norm": 0.4439941346645355, "learning_rate": 8.419929243627731e-06, "loss": 0.1214, "step": 2615 }, { "epoch": 0.847699287103046, "grad_norm": 0.5476211905479431, "learning_rate": 8.418652989269229e-06, "loss": 0.1616, "step": 2616 }, { "epoch": 0.8480233311730395, "grad_norm": 0.41573604941368103, "learning_rate": 8.417376316500696e-06, "loss": 0.1253, "step": 2617 }, { "epoch": 0.848347375243033, "grad_norm": 0.44237151741981506, "learning_rate": 8.416099225478383e-06, "loss": 0.1312, "step": 2618 }, { "epoch": 0.8486714193130266, "grad_norm": 0.4541921317577362, "learning_rate": 8.414821716358596e-06, "loss": 0.1444, "step": 2619 }, { "epoch": 0.8489954633830201, "grad_norm": 0.5047772526741028, "learning_rate": 8.413543789297692e-06, "loss": 0.1541, "step": 2620 }, { "epoch": 0.8493195074530137, "grad_norm": 0.478910893201828, "learning_rate": 8.41226544445207e-06, "loss": 0.1487, "step": 2621 }, { "epoch": 0.8496435515230071, "grad_norm": 0.49247220158576965, "learning_rate": 8.410986681978192e-06, "loss": 0.1678, "step": 2622 }, { "epoch": 0.8499675955930006, "grad_norm": 0.4732974171638489, "learning_rate": 8.409707502032565e-06, "loss": 0.1457, "step": 2623 }, { "epoch": 0.8502916396629941, "grad_norm": 0.5027114152908325, "learning_rate": 8.40842790477175e-06, "loss": 0.1586, "step": 2624 }, { "epoch": 0.8506156837329877, "grad_norm": 0.47344860434532166, "learning_rate": 8.407147890352353e-06, "loss": 0.137, "step": 2625 }, { "epoch": 0.8509397278029812, "grad_norm": 0.495750367641449, "learning_rate": 8.405867458931038e-06, "loss": 0.1442, "step": 2626 }, { "epoch": 0.8512637718729748, "grad_norm": 0.4720262289047241, "learning_rate": 8.40458661066452e-06, "loss": 0.1454, "step": 2627 }, { "epoch": 0.8515878159429683, "grad_norm": 0.5401026606559753, "learning_rate": 8.403305345709559e-06, "loss": 0.1551, "step": 2628 }, { "epoch": 0.8519118600129617, "grad_norm": 0.4968286454677582, "learning_rate": 8.40202366422297e-06, "loss": 0.151, "step": 2629 }, { "epoch": 0.8522359040829552, "grad_norm": 0.45830318331718445, "learning_rate": 8.400741566361617e-06, "loss": 0.1399, "step": 2630 }, { "epoch": 0.8525599481529488, "grad_norm": 0.4789321720600128, "learning_rate": 8.399459052282418e-06, "loss": 0.1383, "step": 2631 }, { "epoch": 0.8528839922229423, "grad_norm": 0.5061787962913513, "learning_rate": 8.398176122142344e-06, "loss": 0.1469, "step": 2632 }, { "epoch": 0.8532080362929358, "grad_norm": 0.510563850402832, "learning_rate": 8.396892776098406e-06, "loss": 0.1663, "step": 2633 }, { "epoch": 0.8535320803629294, "grad_norm": 0.5131493210792542, "learning_rate": 8.395609014307677e-06, "loss": 0.1537, "step": 2634 }, { "epoch": 0.8538561244329229, "grad_norm": 0.44384628534317017, "learning_rate": 8.394324836927278e-06, "loss": 0.1365, "step": 2635 }, { "epoch": 0.8541801685029164, "grad_norm": 0.4685351252555847, "learning_rate": 8.393040244114379e-06, "loss": 0.1442, "step": 2636 }, { "epoch": 0.8545042125729099, "grad_norm": 0.4517896771430969, "learning_rate": 8.3917552360262e-06, "loss": 0.1568, "step": 2637 }, { "epoch": 0.8548282566429034, "grad_norm": 0.4689981937408447, "learning_rate": 8.390469812820015e-06, "loss": 0.1563, "step": 2638 }, { "epoch": 0.8551523007128969, "grad_norm": 0.4492129385471344, "learning_rate": 8.389183974653148e-06, "loss": 0.1418, "step": 2639 }, { "epoch": 0.8554763447828905, "grad_norm": 0.5006503462791443, "learning_rate": 8.38789772168297e-06, "loss": 0.1438, "step": 2640 }, { "epoch": 0.855800388852884, "grad_norm": 0.49266812205314636, "learning_rate": 8.386611054066911e-06, "loss": 0.1435, "step": 2641 }, { "epoch": 0.8561244329228775, "grad_norm": 0.4764671325683594, "learning_rate": 8.385323971962442e-06, "loss": 0.1535, "step": 2642 }, { "epoch": 0.8564484769928711, "grad_norm": 0.5092926621437073, "learning_rate": 8.384036475527093e-06, "loss": 0.1529, "step": 2643 }, { "epoch": 0.8567725210628645, "grad_norm": 0.4808288514614105, "learning_rate": 8.38274856491844e-06, "loss": 0.144, "step": 2644 }, { "epoch": 0.857096565132858, "grad_norm": 0.5002308487892151, "learning_rate": 8.381460240294108e-06, "loss": 0.1483, "step": 2645 }, { "epoch": 0.8574206092028516, "grad_norm": 0.4734160900115967, "learning_rate": 8.380171501811778e-06, "loss": 0.1523, "step": 2646 }, { "epoch": 0.8577446532728451, "grad_norm": 0.4946827292442322, "learning_rate": 8.378882349629178e-06, "loss": 0.1433, "step": 2647 }, { "epoch": 0.8580686973428386, "grad_norm": 0.46848565340042114, "learning_rate": 8.377592783904092e-06, "loss": 0.1416, "step": 2648 }, { "epoch": 0.8583927414128322, "grad_norm": 0.4753412902355194, "learning_rate": 8.376302804794343e-06, "loss": 0.1421, "step": 2649 }, { "epoch": 0.8587167854828257, "grad_norm": 0.47309020161628723, "learning_rate": 8.375012412457818e-06, "loss": 0.143, "step": 2650 }, { "epoch": 0.8590408295528191, "grad_norm": 0.4673686623573303, "learning_rate": 8.373721607052445e-06, "loss": 0.1416, "step": 2651 }, { "epoch": 0.8593648736228127, "grad_norm": 0.46272486448287964, "learning_rate": 8.37243038873621e-06, "loss": 0.1546, "step": 2652 }, { "epoch": 0.8596889176928062, "grad_norm": 0.459953635931015, "learning_rate": 8.37113875766714e-06, "loss": 0.141, "step": 2653 }, { "epoch": 0.8600129617627997, "grad_norm": 0.45387542247772217, "learning_rate": 8.369846714003323e-06, "loss": 0.1357, "step": 2654 }, { "epoch": 0.8603370058327933, "grad_norm": 0.5121089816093445, "learning_rate": 8.36855425790289e-06, "loss": 0.1589, "step": 2655 }, { "epoch": 0.8606610499027868, "grad_norm": 0.490045428276062, "learning_rate": 8.367261389524027e-06, "loss": 0.1486, "step": 2656 }, { "epoch": 0.8609850939727803, "grad_norm": 0.4958958327770233, "learning_rate": 8.365968109024967e-06, "loss": 0.1553, "step": 2657 }, { "epoch": 0.8613091380427739, "grad_norm": 0.5067430138587952, "learning_rate": 8.364674416563995e-06, "loss": 0.1628, "step": 2658 }, { "epoch": 0.8616331821127673, "grad_norm": 0.4974973499774933, "learning_rate": 8.363380312299447e-06, "loss": 0.1578, "step": 2659 }, { "epoch": 0.8619572261827608, "grad_norm": 0.45146653056144714, "learning_rate": 8.362085796389711e-06, "loss": 0.1415, "step": 2660 }, { "epoch": 0.8622812702527544, "grad_norm": 0.46035945415496826, "learning_rate": 8.360790868993219e-06, "loss": 0.1465, "step": 2661 }, { "epoch": 0.8626053143227479, "grad_norm": 0.49235203862190247, "learning_rate": 8.35949553026846e-06, "loss": 0.1471, "step": 2662 }, { "epoch": 0.8629293583927414, "grad_norm": 0.5069705843925476, "learning_rate": 8.35819978037397e-06, "loss": 0.1527, "step": 2663 }, { "epoch": 0.863253402462735, "grad_norm": 0.4901638329029083, "learning_rate": 8.356903619468336e-06, "loss": 0.1554, "step": 2664 }, { "epoch": 0.8635774465327285, "grad_norm": 0.5096777677536011, "learning_rate": 8.355607047710199e-06, "loss": 0.1562, "step": 2665 }, { "epoch": 0.8639014906027219, "grad_norm": 0.5072498917579651, "learning_rate": 8.354310065258244e-06, "loss": 0.1601, "step": 2666 }, { "epoch": 0.8642255346727155, "grad_norm": 0.5249346494674683, "learning_rate": 8.353012672271206e-06, "loss": 0.1609, "step": 2667 }, { "epoch": 0.864549578742709, "grad_norm": 0.4963613450527191, "learning_rate": 8.351714868907878e-06, "loss": 0.1544, "step": 2668 }, { "epoch": 0.8648736228127025, "grad_norm": 0.5151003003120422, "learning_rate": 8.350416655327098e-06, "loss": 0.1494, "step": 2669 }, { "epoch": 0.8651976668826961, "grad_norm": 0.4893086552619934, "learning_rate": 8.349118031687755e-06, "loss": 0.1612, "step": 2670 }, { "epoch": 0.8655217109526896, "grad_norm": 0.49971023201942444, "learning_rate": 8.347818998148784e-06, "loss": 0.1493, "step": 2671 }, { "epoch": 0.8658457550226831, "grad_norm": 0.462802529335022, "learning_rate": 8.346519554869179e-06, "loss": 0.1289, "step": 2672 }, { "epoch": 0.8661697990926766, "grad_norm": 0.47770702838897705, "learning_rate": 8.345219702007979e-06, "loss": 0.141, "step": 2673 }, { "epoch": 0.8664938431626701, "grad_norm": 0.5033993124961853, "learning_rate": 8.343919439724268e-06, "loss": 0.1561, "step": 2674 }, { "epoch": 0.8668178872326636, "grad_norm": 0.4785201847553253, "learning_rate": 8.342618768177192e-06, "loss": 0.1423, "step": 2675 }, { "epoch": 0.8671419313026572, "grad_norm": 0.4748586416244507, "learning_rate": 8.341317687525936e-06, "loss": 0.1555, "step": 2676 }, { "epoch": 0.8674659753726507, "grad_norm": 0.5080292224884033, "learning_rate": 8.340016197929741e-06, "loss": 0.163, "step": 2677 }, { "epoch": 0.8677900194426442, "grad_norm": 0.4762882888317108, "learning_rate": 8.338714299547898e-06, "loss": 0.1454, "step": 2678 }, { "epoch": 0.8681140635126378, "grad_norm": 0.4820559322834015, "learning_rate": 8.337411992539747e-06, "loss": 0.1618, "step": 2679 }, { "epoch": 0.8684381075826313, "grad_norm": 0.4682447612285614, "learning_rate": 8.336109277064676e-06, "loss": 0.1414, "step": 2680 }, { "epoch": 0.8687621516526247, "grad_norm": 0.5108025074005127, "learning_rate": 8.334806153282126e-06, "loss": 0.1549, "step": 2681 }, { "epoch": 0.8690861957226182, "grad_norm": 0.4689571261405945, "learning_rate": 8.333502621351586e-06, "loss": 0.1533, "step": 2682 }, { "epoch": 0.8694102397926118, "grad_norm": 0.4340459108352661, "learning_rate": 8.332198681432596e-06, "loss": 0.1259, "step": 2683 }, { "epoch": 0.8697342838626053, "grad_norm": 0.45238205790519714, "learning_rate": 8.330894333684745e-06, "loss": 0.1426, "step": 2684 }, { "epoch": 0.8700583279325989, "grad_norm": 0.4423817992210388, "learning_rate": 8.329589578267674e-06, "loss": 0.1312, "step": 2685 }, { "epoch": 0.8703823720025924, "grad_norm": 0.4896402060985565, "learning_rate": 8.328284415341072e-06, "loss": 0.1646, "step": 2686 }, { "epoch": 0.8707064160725859, "grad_norm": 0.48064756393432617, "learning_rate": 8.326978845064676e-06, "loss": 0.134, "step": 2687 }, { "epoch": 0.8710304601425793, "grad_norm": 0.49347373843193054, "learning_rate": 8.325672867598278e-06, "loss": 0.1565, "step": 2688 }, { "epoch": 0.8713545042125729, "grad_norm": 0.46822673082351685, "learning_rate": 8.324366483101716e-06, "loss": 0.1442, "step": 2689 }, { "epoch": 0.8716785482825664, "grad_norm": 0.47423186898231506, "learning_rate": 8.323059691734879e-06, "loss": 0.1334, "step": 2690 }, { "epoch": 0.87200259235256, "grad_norm": 0.4813585579395294, "learning_rate": 8.321752493657706e-06, "loss": 0.1503, "step": 2691 }, { "epoch": 0.8723266364225535, "grad_norm": 0.48055943846702576, "learning_rate": 8.320444889030184e-06, "loss": 0.1454, "step": 2692 }, { "epoch": 0.872650680492547, "grad_norm": 0.45660504698753357, "learning_rate": 8.319136878012352e-06, "loss": 0.1449, "step": 2693 }, { "epoch": 0.8729747245625405, "grad_norm": 0.48899611830711365, "learning_rate": 8.317828460764297e-06, "loss": 0.1518, "step": 2694 }, { "epoch": 0.873298768632534, "grad_norm": 0.4620334804058075, "learning_rate": 8.316519637446158e-06, "loss": 0.1463, "step": 2695 }, { "epoch": 0.8736228127025275, "grad_norm": 0.4999637305736542, "learning_rate": 8.315210408218124e-06, "loss": 0.1516, "step": 2696 }, { "epoch": 0.873946856772521, "grad_norm": 0.4712785482406616, "learning_rate": 8.313900773240428e-06, "loss": 0.1384, "step": 2697 }, { "epoch": 0.8742709008425146, "grad_norm": 0.4864901304244995, "learning_rate": 8.312590732673359e-06, "loss": 0.1557, "step": 2698 }, { "epoch": 0.8745949449125081, "grad_norm": 0.46352046728134155, "learning_rate": 8.31128028667725e-06, "loss": 0.1377, "step": 2699 }, { "epoch": 0.8749189889825016, "grad_norm": 0.49007776379585266, "learning_rate": 8.309969435412493e-06, "loss": 0.1488, "step": 2700 }, { "epoch": 0.8752430330524952, "grad_norm": 0.4691650867462158, "learning_rate": 8.30865817903952e-06, "loss": 0.1453, "step": 2701 }, { "epoch": 0.8755670771224887, "grad_norm": 0.48605218529701233, "learning_rate": 8.307346517718813e-06, "loss": 0.1492, "step": 2702 }, { "epoch": 0.8758911211924821, "grad_norm": 0.4926930069923401, "learning_rate": 8.306034451610913e-06, "loss": 0.1441, "step": 2703 }, { "epoch": 0.8762151652624757, "grad_norm": 0.4776822626590729, "learning_rate": 8.3047219808764e-06, "loss": 0.1337, "step": 2704 }, { "epoch": 0.8765392093324692, "grad_norm": 0.44507667422294617, "learning_rate": 8.303409105675909e-06, "loss": 0.1378, "step": 2705 }, { "epoch": 0.8768632534024627, "grad_norm": 0.49557462334632874, "learning_rate": 8.302095826170122e-06, "loss": 0.152, "step": 2706 }, { "epoch": 0.8771872974724563, "grad_norm": 0.4982512295246124, "learning_rate": 8.300782142519772e-06, "loss": 0.1569, "step": 2707 }, { "epoch": 0.8775113415424498, "grad_norm": 0.46075528860092163, "learning_rate": 8.299468054885643e-06, "loss": 0.1374, "step": 2708 }, { "epoch": 0.8778353856124433, "grad_norm": 0.49135658144950867, "learning_rate": 8.298153563428565e-06, "loss": 0.1545, "step": 2709 }, { "epoch": 0.8781594296824368, "grad_norm": 0.5004354119300842, "learning_rate": 8.296838668309421e-06, "loss": 0.1611, "step": 2710 }, { "epoch": 0.8784834737524303, "grad_norm": 0.4572104215621948, "learning_rate": 8.295523369689138e-06, "loss": 0.1397, "step": 2711 }, { "epoch": 0.8788075178224238, "grad_norm": 0.4650300443172455, "learning_rate": 8.294207667728698e-06, "loss": 0.1437, "step": 2712 }, { "epoch": 0.8791315618924174, "grad_norm": 0.49239322543144226, "learning_rate": 8.292891562589131e-06, "loss": 0.1493, "step": 2713 }, { "epoch": 0.8794556059624109, "grad_norm": 0.4775744378566742, "learning_rate": 8.291575054431513e-06, "loss": 0.1407, "step": 2714 }, { "epoch": 0.8797796500324044, "grad_norm": 0.4666978120803833, "learning_rate": 8.290258143416974e-06, "loss": 0.1432, "step": 2715 }, { "epoch": 0.880103694102398, "grad_norm": 0.5041922926902771, "learning_rate": 8.28894082970669e-06, "loss": 0.1723, "step": 2716 }, { "epoch": 0.8804277381723914, "grad_norm": 0.5009000301361084, "learning_rate": 8.287623113461887e-06, "loss": 0.1583, "step": 2717 }, { "epoch": 0.8807517822423849, "grad_norm": 0.529421865940094, "learning_rate": 8.286304994843844e-06, "loss": 0.1664, "step": 2718 }, { "epoch": 0.8810758263123785, "grad_norm": 0.4445277452468872, "learning_rate": 8.284986474013882e-06, "loss": 0.1307, "step": 2719 }, { "epoch": 0.881399870382372, "grad_norm": 0.46141302585601807, "learning_rate": 8.283667551133376e-06, "loss": 0.1472, "step": 2720 }, { "epoch": 0.8817239144523655, "grad_norm": 0.5086813569068909, "learning_rate": 8.282348226363753e-06, "loss": 0.1632, "step": 2721 }, { "epoch": 0.8820479585223591, "grad_norm": 0.4846382737159729, "learning_rate": 8.28102849986648e-06, "loss": 0.1388, "step": 2722 }, { "epoch": 0.8823720025923526, "grad_norm": 0.5189880132675171, "learning_rate": 8.279708371803081e-06, "loss": 0.1465, "step": 2723 }, { "epoch": 0.8826960466623461, "grad_norm": 0.4765397906303406, "learning_rate": 8.27838784233513e-06, "loss": 0.1382, "step": 2724 }, { "epoch": 0.8830200907323396, "grad_norm": 0.5010700225830078, "learning_rate": 8.277066911624242e-06, "loss": 0.156, "step": 2725 }, { "epoch": 0.8833441348023331, "grad_norm": 0.5363819599151611, "learning_rate": 8.275745579832088e-06, "loss": 0.1748, "step": 2726 }, { "epoch": 0.8836681788723266, "grad_norm": 0.463154673576355, "learning_rate": 8.27442384712039e-06, "loss": 0.1403, "step": 2727 }, { "epoch": 0.8839922229423202, "grad_norm": 0.4815209209918976, "learning_rate": 8.27310171365091e-06, "loss": 0.1477, "step": 2728 }, { "epoch": 0.8843162670123137, "grad_norm": 0.4952019453048706, "learning_rate": 8.271779179585466e-06, "loss": 0.1578, "step": 2729 }, { "epoch": 0.8846403110823072, "grad_norm": 0.5037125945091248, "learning_rate": 8.270456245085923e-06, "loss": 0.1622, "step": 2730 }, { "epoch": 0.8849643551523008, "grad_norm": 0.49532702565193176, "learning_rate": 8.269132910314197e-06, "loss": 0.1533, "step": 2731 }, { "epoch": 0.8852883992222942, "grad_norm": 0.4823807179927826, "learning_rate": 8.267809175432252e-06, "loss": 0.1529, "step": 2732 }, { "epoch": 0.8856124432922877, "grad_norm": 0.5192368626594543, "learning_rate": 8.266485040602098e-06, "loss": 0.1459, "step": 2733 }, { "epoch": 0.8859364873622813, "grad_norm": 0.4310244917869568, "learning_rate": 8.265160505985796e-06, "loss": 0.1196, "step": 2734 }, { "epoch": 0.8862605314322748, "grad_norm": 0.49385082721710205, "learning_rate": 8.263835571745457e-06, "loss": 0.151, "step": 2735 }, { "epoch": 0.8865845755022683, "grad_norm": 0.4536881446838379, "learning_rate": 8.26251023804324e-06, "loss": 0.1448, "step": 2736 }, { "epoch": 0.8869086195722619, "grad_norm": 0.4777822494506836, "learning_rate": 8.261184505041354e-06, "loss": 0.1525, "step": 2737 }, { "epoch": 0.8872326636422554, "grad_norm": 0.534416913986206, "learning_rate": 8.259858372902056e-06, "loss": 0.1623, "step": 2738 }, { "epoch": 0.8875567077122488, "grad_norm": 0.4864097535610199, "learning_rate": 8.258531841787652e-06, "loss": 0.1537, "step": 2739 }, { "epoch": 0.8878807517822424, "grad_norm": 0.45313143730163574, "learning_rate": 8.257204911860494e-06, "loss": 0.1394, "step": 2740 }, { "epoch": 0.8882047958522359, "grad_norm": 0.47940826416015625, "learning_rate": 8.255877583282987e-06, "loss": 0.1537, "step": 2741 }, { "epoch": 0.8885288399222294, "grad_norm": 0.48140037059783936, "learning_rate": 8.254549856217584e-06, "loss": 0.1397, "step": 2742 }, { "epoch": 0.888852883992223, "grad_norm": 0.4759960472583771, "learning_rate": 8.253221730826784e-06, "loss": 0.1575, "step": 2743 }, { "epoch": 0.8891769280622165, "grad_norm": 0.5218455195426941, "learning_rate": 8.251893207273139e-06, "loss": 0.1598, "step": 2744 }, { "epoch": 0.88950097213221, "grad_norm": 0.514616847038269, "learning_rate": 8.250564285719245e-06, "loss": 0.1572, "step": 2745 }, { "epoch": 0.8898250162022034, "grad_norm": 0.4813917577266693, "learning_rate": 8.249234966327751e-06, "loss": 0.1356, "step": 2746 }, { "epoch": 0.890149060272197, "grad_norm": 0.4729185700416565, "learning_rate": 8.247905249261352e-06, "loss": 0.1442, "step": 2747 }, { "epoch": 0.8904731043421905, "grad_norm": 0.49564072489738464, "learning_rate": 8.246575134682792e-06, "loss": 0.1403, "step": 2748 }, { "epoch": 0.890797148412184, "grad_norm": 0.47402387857437134, "learning_rate": 8.245244622754866e-06, "loss": 0.143, "step": 2749 }, { "epoch": 0.8911211924821776, "grad_norm": 0.46405506134033203, "learning_rate": 8.243913713640415e-06, "loss": 0.1364, "step": 2750 }, { "epoch": 0.8914452365521711, "grad_norm": 0.5036703944206238, "learning_rate": 8.242582407502327e-06, "loss": 0.1498, "step": 2751 }, { "epoch": 0.8917692806221647, "grad_norm": 0.46614500880241394, "learning_rate": 8.241250704503545e-06, "loss": 0.1373, "step": 2752 }, { "epoch": 0.8920933246921582, "grad_norm": 0.4918436110019684, "learning_rate": 8.239918604807054e-06, "loss": 0.1482, "step": 2753 }, { "epoch": 0.8924173687621516, "grad_norm": 0.47470030188560486, "learning_rate": 8.23858610857589e-06, "loss": 0.1351, "step": 2754 }, { "epoch": 0.8927414128321451, "grad_norm": 0.4631500840187073, "learning_rate": 8.237253215973138e-06, "loss": 0.142, "step": 2755 }, { "epoch": 0.8930654569021387, "grad_norm": 0.442050963640213, "learning_rate": 8.235919927161931e-06, "loss": 0.1281, "step": 2756 }, { "epoch": 0.8933895009721322, "grad_norm": 0.46017080545425415, "learning_rate": 8.234586242305451e-06, "loss": 0.1421, "step": 2757 }, { "epoch": 0.8937135450421257, "grad_norm": 0.5412111878395081, "learning_rate": 8.233252161566928e-06, "loss": 0.1597, "step": 2758 }, { "epoch": 0.8940375891121193, "grad_norm": 0.5096152424812317, "learning_rate": 8.231917685109643e-06, "loss": 0.1626, "step": 2759 }, { "epoch": 0.8943616331821128, "grad_norm": 0.5203890800476074, "learning_rate": 8.23058281309692e-06, "loss": 0.1666, "step": 2760 }, { "epoch": 0.8946856772521062, "grad_norm": 0.4767521321773529, "learning_rate": 8.229247545692134e-06, "loss": 0.1453, "step": 2761 }, { "epoch": 0.8950097213220998, "grad_norm": 0.4585646390914917, "learning_rate": 8.22791188305871e-06, "loss": 0.1268, "step": 2762 }, { "epoch": 0.8953337653920933, "grad_norm": 0.4929313361644745, "learning_rate": 8.226575825360122e-06, "loss": 0.1527, "step": 2763 }, { "epoch": 0.8956578094620868, "grad_norm": 0.5119484066963196, "learning_rate": 8.225239372759888e-06, "loss": 0.1568, "step": 2764 }, { "epoch": 0.8959818535320804, "grad_norm": 0.471207857131958, "learning_rate": 8.223902525421576e-06, "loss": 0.1411, "step": 2765 }, { "epoch": 0.8963058976020739, "grad_norm": 0.46978434920310974, "learning_rate": 8.222565283508806e-06, "loss": 0.155, "step": 2766 }, { "epoch": 0.8966299416720674, "grad_norm": 0.42427974939346313, "learning_rate": 8.221227647185241e-06, "loss": 0.1266, "step": 2767 }, { "epoch": 0.8969539857420609, "grad_norm": 0.504667341709137, "learning_rate": 8.219889616614596e-06, "loss": 0.1687, "step": 2768 }, { "epoch": 0.8972780298120544, "grad_norm": 0.4763314723968506, "learning_rate": 8.218551191960633e-06, "loss": 0.1424, "step": 2769 }, { "epoch": 0.8976020738820479, "grad_norm": 0.46471917629241943, "learning_rate": 8.217212373387164e-06, "loss": 0.1395, "step": 2770 }, { "epoch": 0.8979261179520415, "grad_norm": 0.4854791760444641, "learning_rate": 8.215873161058043e-06, "loss": 0.1556, "step": 2771 }, { "epoch": 0.898250162022035, "grad_norm": 0.500381588935852, "learning_rate": 8.21453355513718e-06, "loss": 0.1554, "step": 2772 }, { "epoch": 0.8985742060920285, "grad_norm": 0.46978870034217834, "learning_rate": 8.21319355578853e-06, "loss": 0.1452, "step": 2773 }, { "epoch": 0.8988982501620221, "grad_norm": 0.4473946690559387, "learning_rate": 8.211853163176093e-06, "loss": 0.1343, "step": 2774 }, { "epoch": 0.8992222942320156, "grad_norm": 0.515302300453186, "learning_rate": 8.210512377463924e-06, "loss": 0.1588, "step": 2775 }, { "epoch": 0.899546338302009, "grad_norm": 0.47159039974212646, "learning_rate": 8.209171198816119e-06, "loss": 0.1461, "step": 2776 }, { "epoch": 0.8998703823720026, "grad_norm": 0.50645512342453, "learning_rate": 8.207829627396827e-06, "loss": 0.1616, "step": 2777 }, { "epoch": 0.9001944264419961, "grad_norm": 0.49735721945762634, "learning_rate": 8.206487663370242e-06, "loss": 0.1536, "step": 2778 }, { "epoch": 0.9005184705119896, "grad_norm": 0.5144430994987488, "learning_rate": 8.205145306900608e-06, "loss": 0.1683, "step": 2779 }, { "epoch": 0.9008425145819832, "grad_norm": 0.5268824696540833, "learning_rate": 8.203802558152216e-06, "loss": 0.1611, "step": 2780 }, { "epoch": 0.9011665586519767, "grad_norm": 0.4668341279029846, "learning_rate": 8.202459417289409e-06, "loss": 0.1575, "step": 2781 }, { "epoch": 0.9014906027219702, "grad_norm": 0.4710846543312073, "learning_rate": 8.201115884476568e-06, "loss": 0.1329, "step": 2782 }, { "epoch": 0.9018146467919637, "grad_norm": 0.45003706216812134, "learning_rate": 8.199771959878135e-06, "loss": 0.1412, "step": 2783 }, { "epoch": 0.9021386908619572, "grad_norm": 0.41275516152381897, "learning_rate": 8.19842764365859e-06, "loss": 0.1176, "step": 2784 }, { "epoch": 0.9024627349319507, "grad_norm": 0.4877631366252899, "learning_rate": 8.197082935982463e-06, "loss": 0.1438, "step": 2785 }, { "epoch": 0.9027867790019443, "grad_norm": 0.5254728198051453, "learning_rate": 8.195737837014336e-06, "loss": 0.162, "step": 2786 }, { "epoch": 0.9031108230719378, "grad_norm": 0.48036709427833557, "learning_rate": 8.194392346918834e-06, "loss": 0.1442, "step": 2787 }, { "epoch": 0.9034348671419313, "grad_norm": 0.4583662450313568, "learning_rate": 8.193046465860635e-06, "loss": 0.1448, "step": 2788 }, { "epoch": 0.9037589112119249, "grad_norm": 0.48229631781578064, "learning_rate": 8.191700194004457e-06, "loss": 0.151, "step": 2789 }, { "epoch": 0.9040829552819183, "grad_norm": 0.4751323163509369, "learning_rate": 8.190353531515074e-06, "loss": 0.1509, "step": 2790 }, { "epoch": 0.9044069993519118, "grad_norm": 0.4783444106578827, "learning_rate": 8.189006478557303e-06, "loss": 0.1534, "step": 2791 }, { "epoch": 0.9047310434219054, "grad_norm": 0.476629376411438, "learning_rate": 8.187659035296011e-06, "loss": 0.1413, "step": 2792 }, { "epoch": 0.9050550874918989, "grad_norm": 0.4860250651836395, "learning_rate": 8.186311201896114e-06, "loss": 0.1569, "step": 2793 }, { "epoch": 0.9053791315618924, "grad_norm": 0.48255497217178345, "learning_rate": 8.18496297852257e-06, "loss": 0.1607, "step": 2794 }, { "epoch": 0.905703175631886, "grad_norm": 0.43398430943489075, "learning_rate": 8.183614365340393e-06, "loss": 0.128, "step": 2795 }, { "epoch": 0.9060272197018795, "grad_norm": 0.4728407561779022, "learning_rate": 8.182265362514633e-06, "loss": 0.1459, "step": 2796 }, { "epoch": 0.906351263771873, "grad_norm": 0.4984278380870819, "learning_rate": 8.180915970210404e-06, "loss": 0.1526, "step": 2797 }, { "epoch": 0.9066753078418665, "grad_norm": 0.44915592670440674, "learning_rate": 8.17956618859285e-06, "loss": 0.1503, "step": 2798 }, { "epoch": 0.90699935191186, "grad_norm": 0.48544642329216003, "learning_rate": 8.178216017827178e-06, "loss": 0.1621, "step": 2799 }, { "epoch": 0.9073233959818535, "grad_norm": 0.479897677898407, "learning_rate": 8.176865458078632e-06, "loss": 0.1427, "step": 2800 }, { "epoch": 0.907647440051847, "grad_norm": 0.47575831413269043, "learning_rate": 8.175514509512508e-06, "loss": 0.1354, "step": 2801 }, { "epoch": 0.9079714841218406, "grad_norm": 0.44075480103492737, "learning_rate": 8.17416317229415e-06, "loss": 0.1398, "step": 2802 }, { "epoch": 0.9082955281918341, "grad_norm": 0.44483470916748047, "learning_rate": 8.172811446588947e-06, "loss": 0.1308, "step": 2803 }, { "epoch": 0.9086195722618277, "grad_norm": 0.43445441126823425, "learning_rate": 8.171459332562339e-06, "loss": 0.1349, "step": 2804 }, { "epoch": 0.9089436163318211, "grad_norm": 0.49809399247169495, "learning_rate": 8.17010683037981e-06, "loss": 0.1493, "step": 2805 }, { "epoch": 0.9092676604018146, "grad_norm": 0.4239096939563751, "learning_rate": 8.168753940206895e-06, "loss": 0.1249, "step": 2806 }, { "epoch": 0.9095917044718081, "grad_norm": 0.45722323656082153, "learning_rate": 8.167400662209173e-06, "loss": 0.1361, "step": 2807 }, { "epoch": 0.9099157485418017, "grad_norm": 0.5097815990447998, "learning_rate": 8.166046996552272e-06, "loss": 0.1507, "step": 2808 }, { "epoch": 0.9102397926117952, "grad_norm": 0.48827484250068665, "learning_rate": 8.16469294340187e-06, "loss": 0.1473, "step": 2809 }, { "epoch": 0.9105638366817888, "grad_norm": 0.4702487587928772, "learning_rate": 8.163338502923687e-06, "loss": 0.1418, "step": 2810 }, { "epoch": 0.9108878807517823, "grad_norm": 0.517118513584137, "learning_rate": 8.161983675283496e-06, "loss": 0.1607, "step": 2811 }, { "epoch": 0.9112119248217757, "grad_norm": 0.4593200087547302, "learning_rate": 8.160628460647113e-06, "loss": 0.1414, "step": 2812 }, { "epoch": 0.9115359688917692, "grad_norm": 0.49310043454170227, "learning_rate": 8.159272859180403e-06, "loss": 0.1355, "step": 2813 }, { "epoch": 0.9118600129617628, "grad_norm": 0.5267544388771057, "learning_rate": 8.15791687104928e-06, "loss": 0.1552, "step": 2814 }, { "epoch": 0.9121840570317563, "grad_norm": 0.46945372223854065, "learning_rate": 8.156560496419701e-06, "loss": 0.151, "step": 2815 }, { "epoch": 0.9125081011017498, "grad_norm": 0.4934663772583008, "learning_rate": 8.155203735457677e-06, "loss": 0.1539, "step": 2816 }, { "epoch": 0.9128321451717434, "grad_norm": 0.42091354727745056, "learning_rate": 8.15384658832926e-06, "loss": 0.1275, "step": 2817 }, { "epoch": 0.9131561892417369, "grad_norm": 0.45683035254478455, "learning_rate": 8.152489055200553e-06, "loss": 0.141, "step": 2818 }, { "epoch": 0.9134802333117304, "grad_norm": 0.45693978667259216, "learning_rate": 8.151131136237705e-06, "loss": 0.1457, "step": 2819 }, { "epoch": 0.9138042773817239, "grad_norm": 0.4520793855190277, "learning_rate": 8.149772831606908e-06, "loss": 0.1531, "step": 2820 }, { "epoch": 0.9141283214517174, "grad_norm": 0.47464117407798767, "learning_rate": 8.14841414147441e-06, "loss": 0.1459, "step": 2821 }, { "epoch": 0.9144523655217109, "grad_norm": 0.48440176248550415, "learning_rate": 8.1470550660065e-06, "loss": 0.1525, "step": 2822 }, { "epoch": 0.9147764095917045, "grad_norm": 0.4686981737613678, "learning_rate": 8.145695605369516e-06, "loss": 0.1466, "step": 2823 }, { "epoch": 0.915100453661698, "grad_norm": 0.48142120242118835, "learning_rate": 8.144335759729844e-06, "loss": 0.1502, "step": 2824 }, { "epoch": 0.9154244977316915, "grad_norm": 0.46992120146751404, "learning_rate": 8.142975529253914e-06, "loss": 0.1429, "step": 2825 }, { "epoch": 0.9157485418016851, "grad_norm": 0.45641136169433594, "learning_rate": 8.141614914108204e-06, "loss": 0.1401, "step": 2826 }, { "epoch": 0.9160725858716785, "grad_norm": 0.4673773944377899, "learning_rate": 8.140253914459244e-06, "loss": 0.1395, "step": 2827 }, { "epoch": 0.916396629941672, "grad_norm": 0.4894830584526062, "learning_rate": 8.138892530473601e-06, "loss": 0.1651, "step": 2828 }, { "epoch": 0.9167206740116656, "grad_norm": 0.49354568123817444, "learning_rate": 8.137530762317902e-06, "loss": 0.1439, "step": 2829 }, { "epoch": 0.9170447180816591, "grad_norm": 0.4844863712787628, "learning_rate": 8.136168610158812e-06, "loss": 0.1432, "step": 2830 }, { "epoch": 0.9173687621516526, "grad_norm": 0.5116393566131592, "learning_rate": 8.134806074163044e-06, "loss": 0.1636, "step": 2831 }, { "epoch": 0.9176928062216462, "grad_norm": 0.4723188579082489, "learning_rate": 8.13344315449736e-06, "loss": 0.1446, "step": 2832 }, { "epoch": 0.9180168502916397, "grad_norm": 0.4761399030685425, "learning_rate": 8.132079851328565e-06, "loss": 0.1335, "step": 2833 }, { "epoch": 0.9183408943616331, "grad_norm": 0.4631648361682892, "learning_rate": 8.13071616482352e-06, "loss": 0.1506, "step": 2834 }, { "epoch": 0.9186649384316267, "grad_norm": 0.47083237767219543, "learning_rate": 8.129352095149123e-06, "loss": 0.145, "step": 2835 }, { "epoch": 0.9189889825016202, "grad_norm": 0.513043999671936, "learning_rate": 8.127987642472324e-06, "loss": 0.1487, "step": 2836 }, { "epoch": 0.9193130265716137, "grad_norm": 0.4924792945384979, "learning_rate": 8.126622806960121e-06, "loss": 0.1544, "step": 2837 }, { "epoch": 0.9196370706416073, "grad_norm": 0.48701411485671997, "learning_rate": 8.125257588779553e-06, "loss": 0.1559, "step": 2838 }, { "epoch": 0.9199611147116008, "grad_norm": 0.4733465909957886, "learning_rate": 8.12389198809771e-06, "loss": 0.1388, "step": 2839 }, { "epoch": 0.9202851587815943, "grad_norm": 0.45928820967674255, "learning_rate": 8.12252600508173e-06, "loss": 0.1444, "step": 2840 }, { "epoch": 0.9206092028515879, "grad_norm": 0.4772232174873352, "learning_rate": 8.121159639898796e-06, "loss": 0.1521, "step": 2841 }, { "epoch": 0.9209332469215813, "grad_norm": 0.4992552697658539, "learning_rate": 8.119792892716136e-06, "loss": 0.1538, "step": 2842 }, { "epoch": 0.9212572909915748, "grad_norm": 0.45725077390670776, "learning_rate": 8.11842576370103e-06, "loss": 0.1342, "step": 2843 }, { "epoch": 0.9215813350615684, "grad_norm": 0.4585983455181122, "learning_rate": 8.117058253020797e-06, "loss": 0.1424, "step": 2844 }, { "epoch": 0.9219053791315619, "grad_norm": 0.5417724847793579, "learning_rate": 8.11569036084281e-06, "loss": 0.1503, "step": 2845 }, { "epoch": 0.9222294232015554, "grad_norm": 0.4953386187553406, "learning_rate": 8.114322087334485e-06, "loss": 0.1365, "step": 2846 }, { "epoch": 0.922553467271549, "grad_norm": 0.4717243015766144, "learning_rate": 8.112953432663286e-06, "loss": 0.1402, "step": 2847 }, { "epoch": 0.9228775113415425, "grad_norm": 0.49322399497032166, "learning_rate": 8.11158439699672e-06, "loss": 0.1508, "step": 2848 }, { "epoch": 0.9232015554115359, "grad_norm": 0.4861105978488922, "learning_rate": 8.11021498050235e-06, "loss": 0.154, "step": 2849 }, { "epoch": 0.9235255994815295, "grad_norm": 0.44113120436668396, "learning_rate": 8.108845183347773e-06, "loss": 0.1416, "step": 2850 }, { "epoch": 0.923849643551523, "grad_norm": 0.45397233963012695, "learning_rate": 8.107475005700645e-06, "loss": 0.137, "step": 2851 }, { "epoch": 0.9241736876215165, "grad_norm": 0.4606253504753113, "learning_rate": 8.106104447728656e-06, "loss": 0.1401, "step": 2852 }, { "epoch": 0.9244977316915101, "grad_norm": 0.5172616243362427, "learning_rate": 8.104733509599552e-06, "loss": 0.147, "step": 2853 }, { "epoch": 0.9248217757615036, "grad_norm": 0.48049119114875793, "learning_rate": 8.103362191481122e-06, "loss": 0.1506, "step": 2854 }, { "epoch": 0.9251458198314971, "grad_norm": 0.4916389286518097, "learning_rate": 8.101990493541205e-06, "loss": 0.1684, "step": 2855 }, { "epoch": 0.9254698639014906, "grad_norm": 0.459881067276001, "learning_rate": 8.10061841594768e-06, "loss": 0.1599, "step": 2856 }, { "epoch": 0.9257939079714841, "grad_norm": 0.5155965685844421, "learning_rate": 8.099245958868478e-06, "loss": 0.1479, "step": 2857 }, { "epoch": 0.9261179520414776, "grad_norm": 0.47220301628112793, "learning_rate": 8.097873122471571e-06, "loss": 0.1543, "step": 2858 }, { "epoch": 0.9264419961114712, "grad_norm": 0.47689226269721985, "learning_rate": 8.096499906924987e-06, "loss": 0.1375, "step": 2859 }, { "epoch": 0.9267660401814647, "grad_norm": 0.5045191645622253, "learning_rate": 8.095126312396789e-06, "loss": 0.1537, "step": 2860 }, { "epoch": 0.9270900842514582, "grad_norm": 0.43550044298171997, "learning_rate": 8.093752339055094e-06, "loss": 0.1357, "step": 2861 }, { "epoch": 0.9274141283214518, "grad_norm": 0.4881744086742401, "learning_rate": 8.09237798706806e-06, "loss": 0.1501, "step": 2862 }, { "epoch": 0.9277381723914452, "grad_norm": 0.48399439454078674, "learning_rate": 8.0910032566039e-06, "loss": 0.1465, "step": 2863 }, { "epoch": 0.9280622164614387, "grad_norm": 0.4915633201599121, "learning_rate": 8.089628147830864e-06, "loss": 0.1465, "step": 2864 }, { "epoch": 0.9283862605314323, "grad_norm": 0.5544672608375549, "learning_rate": 8.088252660917253e-06, "loss": 0.1691, "step": 2865 }, { "epoch": 0.9287103046014258, "grad_norm": 0.4762614667415619, "learning_rate": 8.086876796031411e-06, "loss": 0.1371, "step": 2866 }, { "epoch": 0.9290343486714193, "grad_norm": 0.4570804834365845, "learning_rate": 8.085500553341734e-06, "loss": 0.1379, "step": 2867 }, { "epoch": 0.9293583927414129, "grad_norm": 0.4688291847705841, "learning_rate": 8.08412393301666e-06, "loss": 0.1434, "step": 2868 }, { "epoch": 0.9296824368114064, "grad_norm": 0.4795081913471222, "learning_rate": 8.082746935224673e-06, "loss": 0.1268, "step": 2869 }, { "epoch": 0.9300064808813999, "grad_norm": 0.4637512266635895, "learning_rate": 8.081369560134303e-06, "loss": 0.141, "step": 2870 }, { "epoch": 0.9303305249513933, "grad_norm": 0.4551636576652527, "learning_rate": 8.079991807914129e-06, "loss": 0.1452, "step": 2871 }, { "epoch": 0.9306545690213869, "grad_norm": 0.5067548155784607, "learning_rate": 8.078613678732774e-06, "loss": 0.1649, "step": 2872 }, { "epoch": 0.9309786130913804, "grad_norm": 0.4417652189731598, "learning_rate": 8.07723517275891e-06, "loss": 0.1344, "step": 2873 }, { "epoch": 0.931302657161374, "grad_norm": 0.4837871789932251, "learning_rate": 8.075856290161251e-06, "loss": 0.1475, "step": 2874 }, { "epoch": 0.9316267012313675, "grad_norm": 0.4757400155067444, "learning_rate": 8.074477031108556e-06, "loss": 0.1395, "step": 2875 }, { "epoch": 0.931950745301361, "grad_norm": 0.47112858295440674, "learning_rate": 8.073097395769635e-06, "loss": 0.1409, "step": 2876 }, { "epoch": 0.9322747893713546, "grad_norm": 0.4562358856201172, "learning_rate": 8.071717384313347e-06, "loss": 0.1356, "step": 2877 }, { "epoch": 0.932598833441348, "grad_norm": 0.45477864146232605, "learning_rate": 8.070336996908585e-06, "loss": 0.1267, "step": 2878 }, { "epoch": 0.9329228775113415, "grad_norm": 0.47922149300575256, "learning_rate": 8.068956233724298e-06, "loss": 0.1351, "step": 2879 }, { "epoch": 0.933246921581335, "grad_norm": 0.4852301776409149, "learning_rate": 8.067575094929476e-06, "loss": 0.1384, "step": 2880 }, { "epoch": 0.9335709656513286, "grad_norm": 0.47646522521972656, "learning_rate": 8.066193580693163e-06, "loss": 0.1499, "step": 2881 }, { "epoch": 0.9338950097213221, "grad_norm": 0.44006016850471497, "learning_rate": 8.064811691184436e-06, "loss": 0.126, "step": 2882 }, { "epoch": 0.9342190537913156, "grad_norm": 0.4813414514064789, "learning_rate": 8.063429426572427e-06, "loss": 0.1448, "step": 2883 }, { "epoch": 0.9345430978613092, "grad_norm": 0.46030181646347046, "learning_rate": 8.062046787026314e-06, "loss": 0.1282, "step": 2884 }, { "epoch": 0.9348671419313026, "grad_norm": 0.4716332256793976, "learning_rate": 8.060663772715318e-06, "loss": 0.136, "step": 2885 }, { "epoch": 0.9351911860012961, "grad_norm": 0.49652379751205444, "learning_rate": 8.059280383808704e-06, "loss": 0.1544, "step": 2886 }, { "epoch": 0.9355152300712897, "grad_norm": 0.4331119656562805, "learning_rate": 8.057896620475786e-06, "loss": 0.1243, "step": 2887 }, { "epoch": 0.9358392741412832, "grad_norm": 0.4984351098537445, "learning_rate": 8.056512482885927e-06, "loss": 0.1533, "step": 2888 }, { "epoch": 0.9361633182112767, "grad_norm": 0.4716634452342987, "learning_rate": 8.055127971208529e-06, "loss": 0.1511, "step": 2889 }, { "epoch": 0.9364873622812703, "grad_norm": 0.47842100262641907, "learning_rate": 8.053743085613042e-06, "loss": 0.1507, "step": 2890 }, { "epoch": 0.9368114063512638, "grad_norm": 0.4523334801197052, "learning_rate": 8.052357826268965e-06, "loss": 0.143, "step": 2891 }, { "epoch": 0.9371354504212573, "grad_norm": 0.5262511968612671, "learning_rate": 8.05097219334584e-06, "loss": 0.1683, "step": 2892 }, { "epoch": 0.9374594944912508, "grad_norm": 0.458168625831604, "learning_rate": 8.049586187013252e-06, "loss": 0.133, "step": 2893 }, { "epoch": 0.9377835385612443, "grad_norm": 0.4705049395561218, "learning_rate": 8.048199807440838e-06, "loss": 0.1464, "step": 2894 }, { "epoch": 0.9381075826312378, "grad_norm": 0.4413962662220001, "learning_rate": 8.046813054798274e-06, "loss": 0.1377, "step": 2895 }, { "epoch": 0.9384316267012314, "grad_norm": 0.461868554353714, "learning_rate": 8.04542592925529e-06, "loss": 0.1305, "step": 2896 }, { "epoch": 0.9387556707712249, "grad_norm": 0.45427650213241577, "learning_rate": 8.044038430981655e-06, "loss": 0.1515, "step": 2897 }, { "epoch": 0.9390797148412184, "grad_norm": 0.4581550061702728, "learning_rate": 8.042650560147184e-06, "loss": 0.1444, "step": 2898 }, { "epoch": 0.939403758911212, "grad_norm": 0.4827260673046112, "learning_rate": 8.041262316921741e-06, "loss": 0.151, "step": 2899 }, { "epoch": 0.9397278029812054, "grad_norm": 0.4749002158641815, "learning_rate": 8.03987370147523e-06, "loss": 0.1417, "step": 2900 }, { "epoch": 0.9400518470511989, "grad_norm": 0.4695237874984741, "learning_rate": 8.038484713977606e-06, "loss": 0.146, "step": 2901 }, { "epoch": 0.9403758911211925, "grad_norm": 0.48500707745552063, "learning_rate": 8.037095354598869e-06, "loss": 0.1451, "step": 2902 }, { "epoch": 0.940699935191186, "grad_norm": 0.507603645324707, "learning_rate": 8.03570562350906e-06, "loss": 0.1548, "step": 2903 }, { "epoch": 0.9410239792611795, "grad_norm": 0.5060255527496338, "learning_rate": 8.034315520878272e-06, "loss": 0.1522, "step": 2904 }, { "epoch": 0.9413480233311731, "grad_norm": 0.45632651448249817, "learning_rate": 8.03292504687664e-06, "loss": 0.1385, "step": 2905 }, { "epoch": 0.9416720674011666, "grad_norm": 0.4859046936035156, "learning_rate": 8.031534201674342e-06, "loss": 0.152, "step": 2906 }, { "epoch": 0.94199611147116, "grad_norm": 0.4495350420475006, "learning_rate": 8.030142985441605e-06, "loss": 0.1347, "step": 2907 }, { "epoch": 0.9423201555411536, "grad_norm": 0.4688016176223755, "learning_rate": 8.028751398348702e-06, "loss": 0.1465, "step": 2908 }, { "epoch": 0.9426441996111471, "grad_norm": 0.4817104637622833, "learning_rate": 8.027359440565946e-06, "loss": 0.1512, "step": 2909 }, { "epoch": 0.9429682436811406, "grad_norm": 0.46768316626548767, "learning_rate": 8.025967112263704e-06, "loss": 0.1559, "step": 2910 }, { "epoch": 0.9432922877511342, "grad_norm": 0.45750781893730164, "learning_rate": 8.02457441361238e-06, "loss": 0.1401, "step": 2911 }, { "epoch": 0.9436163318211277, "grad_norm": 0.5045716762542725, "learning_rate": 8.023181344782426e-06, "loss": 0.1468, "step": 2912 }, { "epoch": 0.9439403758911212, "grad_norm": 0.4775066673755646, "learning_rate": 8.021787905944346e-06, "loss": 0.1443, "step": 2913 }, { "epoch": 0.9442644199611148, "grad_norm": 0.48258528113365173, "learning_rate": 8.020394097268677e-06, "loss": 0.1496, "step": 2914 }, { "epoch": 0.9445884640311082, "grad_norm": 0.4880499541759491, "learning_rate": 8.01899991892601e-06, "loss": 0.1441, "step": 2915 }, { "epoch": 0.9449125081011017, "grad_norm": 0.45834702253341675, "learning_rate": 8.01760537108698e-06, "loss": 0.1348, "step": 2916 }, { "epoch": 0.9452365521710953, "grad_norm": 0.465637743473053, "learning_rate": 8.016210453922265e-06, "loss": 0.1525, "step": 2917 }, { "epoch": 0.9455605962410888, "grad_norm": 0.4635922908782959, "learning_rate": 8.01481516760259e-06, "loss": 0.1338, "step": 2918 }, { "epoch": 0.9458846403110823, "grad_norm": 0.4392727315425873, "learning_rate": 8.013419512298724e-06, "loss": 0.1253, "step": 2919 }, { "epoch": 0.9462086843810759, "grad_norm": 0.5187051892280579, "learning_rate": 8.012023488181481e-06, "loss": 0.1575, "step": 2920 }, { "epoch": 0.9465327284510694, "grad_norm": 0.4780518114566803, "learning_rate": 8.010627095421722e-06, "loss": 0.1458, "step": 2921 }, { "epoch": 0.9468567725210628, "grad_norm": 0.4945436120033264, "learning_rate": 8.009230334190352e-06, "loss": 0.1379, "step": 2922 }, { "epoch": 0.9471808165910564, "grad_norm": 0.4752620756626129, "learning_rate": 8.007833204658322e-06, "loss": 0.1457, "step": 2923 }, { "epoch": 0.9475048606610499, "grad_norm": 0.5349672436714172, "learning_rate": 8.006435706996623e-06, "loss": 0.1564, "step": 2924 }, { "epoch": 0.9478289047310434, "grad_norm": 0.5222102999687195, "learning_rate": 8.0050378413763e-06, "loss": 0.1553, "step": 2925 }, { "epoch": 0.948152948801037, "grad_norm": 0.47248953580856323, "learning_rate": 8.003639607968436e-06, "loss": 0.1483, "step": 2926 }, { "epoch": 0.9484769928710305, "grad_norm": 0.46400532126426697, "learning_rate": 8.00224100694416e-06, "loss": 0.1316, "step": 2927 }, { "epoch": 0.948801036941024, "grad_norm": 0.48341265320777893, "learning_rate": 8.000842038474652e-06, "loss": 0.1511, "step": 2928 }, { "epoch": 0.9491250810110174, "grad_norm": 0.508072555065155, "learning_rate": 7.999442702731127e-06, "loss": 0.1748, "step": 2929 }, { "epoch": 0.949449125081011, "grad_norm": 0.5086212158203125, "learning_rate": 7.99804299988485e-06, "loss": 0.15, "step": 2930 }, { "epoch": 0.9497731691510045, "grad_norm": 0.4628218114376068, "learning_rate": 7.996642930107136e-06, "loss": 0.1561, "step": 2931 }, { "epoch": 0.950097213220998, "grad_norm": 0.45513710379600525, "learning_rate": 7.995242493569335e-06, "loss": 0.1385, "step": 2932 }, { "epoch": 0.9504212572909916, "grad_norm": 0.42527732253074646, "learning_rate": 7.99384169044285e-06, "loss": 0.1277, "step": 2933 }, { "epoch": 0.9507453013609851, "grad_norm": 0.4785211682319641, "learning_rate": 7.992440520899126e-06, "loss": 0.1344, "step": 2934 }, { "epoch": 0.9510693454309787, "grad_norm": 0.4923803210258484, "learning_rate": 7.991038985109649e-06, "loss": 0.1598, "step": 2935 }, { "epoch": 0.9513933895009722, "grad_norm": 0.45439448952674866, "learning_rate": 7.989637083245958e-06, "loss": 0.1386, "step": 2936 }, { "epoch": 0.9517174335709656, "grad_norm": 0.5161374807357788, "learning_rate": 7.988234815479629e-06, "loss": 0.169, "step": 2937 }, { "epoch": 0.9520414776409591, "grad_norm": 0.4716404378414154, "learning_rate": 7.986832181982286e-06, "loss": 0.1378, "step": 2938 }, { "epoch": 0.9523655217109527, "grad_norm": 0.4898372292518616, "learning_rate": 7.985429182925599e-06, "loss": 0.1373, "step": 2939 }, { "epoch": 0.9526895657809462, "grad_norm": 0.49842342734336853, "learning_rate": 7.984025818481283e-06, "loss": 0.1486, "step": 2940 }, { "epoch": 0.9530136098509397, "grad_norm": 0.44580867886543274, "learning_rate": 7.982622088821092e-06, "loss": 0.136, "step": 2941 }, { "epoch": 0.9533376539209333, "grad_norm": 0.47177910804748535, "learning_rate": 7.981217994116833e-06, "loss": 0.1461, "step": 2942 }, { "epoch": 0.9536616979909268, "grad_norm": 0.4503575563430786, "learning_rate": 7.97981353454035e-06, "loss": 0.1307, "step": 2943 }, { "epoch": 0.9539857420609202, "grad_norm": 0.5025628209114075, "learning_rate": 7.978408710263538e-06, "loss": 0.1553, "step": 2944 }, { "epoch": 0.9543097861309138, "grad_norm": 0.46699047088623047, "learning_rate": 7.977003521458336e-06, "loss": 0.1358, "step": 2945 }, { "epoch": 0.9546338302009073, "grad_norm": 0.47436779737472534, "learning_rate": 7.97559796829672e-06, "loss": 0.1404, "step": 2946 }, { "epoch": 0.9549578742709008, "grad_norm": 0.5132781267166138, "learning_rate": 7.97419205095072e-06, "loss": 0.1638, "step": 2947 }, { "epoch": 0.9552819183408944, "grad_norm": 0.4697002172470093, "learning_rate": 7.972785769592404e-06, "loss": 0.1535, "step": 2948 }, { "epoch": 0.9556059624108879, "grad_norm": 0.46338951587677, "learning_rate": 7.971379124393887e-06, "loss": 0.1456, "step": 2949 }, { "epoch": 0.9559300064808814, "grad_norm": 0.45680999755859375, "learning_rate": 7.969972115527334e-06, "loss": 0.1406, "step": 2950 }, { "epoch": 0.9562540505508749, "grad_norm": 0.4780776798725128, "learning_rate": 7.968564743164944e-06, "loss": 0.1475, "step": 2951 }, { "epoch": 0.9565780946208684, "grad_norm": 0.4549115300178528, "learning_rate": 7.967157007478967e-06, "loss": 0.1313, "step": 2952 }, { "epoch": 0.9569021386908619, "grad_norm": 0.49157506227493286, "learning_rate": 7.965748908641698e-06, "loss": 0.1502, "step": 2953 }, { "epoch": 0.9572261827608555, "grad_norm": 0.46433138847351074, "learning_rate": 7.96434044682547e-06, "loss": 0.1488, "step": 2954 }, { "epoch": 0.957550226830849, "grad_norm": 0.48217496275901794, "learning_rate": 7.96293162220267e-06, "loss": 0.1609, "step": 2955 }, { "epoch": 0.9578742709008425, "grad_norm": 0.42298516631126404, "learning_rate": 7.961522434945723e-06, "loss": 0.1365, "step": 2956 }, { "epoch": 0.9581983149708361, "grad_norm": 0.5255170464515686, "learning_rate": 7.9601128852271e-06, "loss": 0.1532, "step": 2957 }, { "epoch": 0.9585223590408296, "grad_norm": 0.42979586124420166, "learning_rate": 7.958702973219317e-06, "loss": 0.1408, "step": 2958 }, { "epoch": 0.958846403110823, "grad_norm": 0.4746703803539276, "learning_rate": 7.957292699094932e-06, "loss": 0.152, "step": 2959 }, { "epoch": 0.9591704471808166, "grad_norm": 0.49901002645492554, "learning_rate": 7.95588206302655e-06, "loss": 0.1497, "step": 2960 }, { "epoch": 0.9594944912508101, "grad_norm": 0.4546007215976715, "learning_rate": 7.954471065186816e-06, "loss": 0.1389, "step": 2961 }, { "epoch": 0.9598185353208036, "grad_norm": 0.4283216595649719, "learning_rate": 7.953059705748427e-06, "loss": 0.1305, "step": 2962 }, { "epoch": 0.9601425793907972, "grad_norm": 0.49616873264312744, "learning_rate": 7.951647984884116e-06, "loss": 0.1507, "step": 2963 }, { "epoch": 0.9604666234607907, "grad_norm": 0.46941477060317993, "learning_rate": 7.950235902766668e-06, "loss": 0.1482, "step": 2964 }, { "epoch": 0.9607906675307842, "grad_norm": 0.4654848277568817, "learning_rate": 7.948823459568907e-06, "loss": 0.1465, "step": 2965 }, { "epoch": 0.9611147116007777, "grad_norm": 0.49566254019737244, "learning_rate": 7.947410655463699e-06, "loss": 0.1544, "step": 2966 }, { "epoch": 0.9614387556707712, "grad_norm": 0.4398048222064972, "learning_rate": 7.94599749062396e-06, "loss": 0.1377, "step": 2967 }, { "epoch": 0.9617627997407647, "grad_norm": 0.4460553824901581, "learning_rate": 7.94458396522265e-06, "loss": 0.144, "step": 2968 }, { "epoch": 0.9620868438107583, "grad_norm": 0.4828304052352905, "learning_rate": 7.943170079432764e-06, "loss": 0.1702, "step": 2969 }, { "epoch": 0.9624108878807518, "grad_norm": 0.4819565415382385, "learning_rate": 7.941755833427356e-06, "loss": 0.1525, "step": 2970 }, { "epoch": 0.9627349319507453, "grad_norm": 0.4813225567340851, "learning_rate": 7.940341227379513e-06, "loss": 0.1503, "step": 2971 }, { "epoch": 0.9630589760207389, "grad_norm": 0.46761325001716614, "learning_rate": 7.938926261462366e-06, "loss": 0.153, "step": 2972 }, { "epoch": 0.9633830200907323, "grad_norm": 0.4724077880382538, "learning_rate": 7.937510935849097e-06, "loss": 0.1488, "step": 2973 }, { "epoch": 0.9637070641607258, "grad_norm": 0.45716893672943115, "learning_rate": 7.936095250712926e-06, "loss": 0.1441, "step": 2974 }, { "epoch": 0.9640311082307194, "grad_norm": 0.45643728971481323, "learning_rate": 7.93467920622712e-06, "loss": 0.1294, "step": 2975 }, { "epoch": 0.9643551523007129, "grad_norm": 0.4829012155532837, "learning_rate": 7.93326280256499e-06, "loss": 0.152, "step": 2976 }, { "epoch": 0.9646791963707064, "grad_norm": 0.4712313413619995, "learning_rate": 7.931846039899888e-06, "loss": 0.135, "step": 2977 }, { "epoch": 0.9650032404407, "grad_norm": 0.4900357723236084, "learning_rate": 7.930428918405213e-06, "loss": 0.1479, "step": 2978 }, { "epoch": 0.9653272845106935, "grad_norm": 0.48142194747924805, "learning_rate": 7.92901143825441e-06, "loss": 0.143, "step": 2979 }, { "epoch": 0.9656513285806869, "grad_norm": 0.5146341919898987, "learning_rate": 7.927593599620958e-06, "loss": 0.1517, "step": 2980 }, { "epoch": 0.9659753726506805, "grad_norm": 0.4810130000114441, "learning_rate": 7.926175402678393e-06, "loss": 0.1487, "step": 2981 }, { "epoch": 0.966299416720674, "grad_norm": 0.48700380325317383, "learning_rate": 7.924756847600285e-06, "loss": 0.1543, "step": 2982 }, { "epoch": 0.9666234607906675, "grad_norm": 0.44338053464889526, "learning_rate": 7.923337934560255e-06, "loss": 0.1311, "step": 2983 }, { "epoch": 0.9669475048606611, "grad_norm": 0.46160584688186646, "learning_rate": 7.92191866373196e-06, "loss": 0.1355, "step": 2984 }, { "epoch": 0.9672715489306546, "grad_norm": 0.4628986418247223, "learning_rate": 7.920499035289106e-06, "loss": 0.1343, "step": 2985 }, { "epoch": 0.9675955930006481, "grad_norm": 0.5248939394950867, "learning_rate": 7.919079049405444e-06, "loss": 0.1508, "step": 2986 }, { "epoch": 0.9679196370706417, "grad_norm": 0.4699530005455017, "learning_rate": 7.917658706254766e-06, "loss": 0.134, "step": 2987 }, { "epoch": 0.9682436811406351, "grad_norm": 0.5100036859512329, "learning_rate": 7.916238006010906e-06, "loss": 0.1496, "step": 2988 }, { "epoch": 0.9685677252106286, "grad_norm": 0.5195657014846802, "learning_rate": 7.914816948847747e-06, "loss": 0.1613, "step": 2989 }, { "epoch": 0.9688917692806222, "grad_norm": 0.45567071437835693, "learning_rate": 7.913395534939212e-06, "loss": 0.135, "step": 2990 }, { "epoch": 0.9692158133506157, "grad_norm": 0.49796679615974426, "learning_rate": 7.911973764459264e-06, "loss": 0.159, "step": 2991 }, { "epoch": 0.9695398574206092, "grad_norm": 0.47392913699150085, "learning_rate": 7.91055163758192e-06, "loss": 0.146, "step": 2992 }, { "epoch": 0.9698639014906028, "grad_norm": 0.5466551184654236, "learning_rate": 7.90912915448123e-06, "loss": 0.1613, "step": 2993 }, { "epoch": 0.9701879455605963, "grad_norm": 0.4669461250305176, "learning_rate": 7.907706315331293e-06, "loss": 0.1338, "step": 2994 }, { "epoch": 0.9705119896305897, "grad_norm": 0.479095458984375, "learning_rate": 7.906283120306256e-06, "loss": 0.149, "step": 2995 }, { "epoch": 0.9708360337005832, "grad_norm": 0.5312058925628662, "learning_rate": 7.904859569580296e-06, "loss": 0.1571, "step": 2996 }, { "epoch": 0.9711600777705768, "grad_norm": 0.5019996166229248, "learning_rate": 7.90343566332765e-06, "loss": 0.1458, "step": 2997 }, { "epoch": 0.9714841218405703, "grad_norm": 0.4630244970321655, "learning_rate": 7.902011401722582e-06, "loss": 0.1517, "step": 2998 }, { "epoch": 0.9718081659105638, "grad_norm": 0.45654380321502686, "learning_rate": 7.900586784939415e-06, "loss": 0.1366, "step": 2999 }, { "epoch": 0.9721322099805574, "grad_norm": 0.5140264630317688, "learning_rate": 7.899161813152504e-06, "loss": 0.1395, "step": 3000 }, { "epoch": 0.9724562540505509, "grad_norm": 0.46823394298553467, "learning_rate": 7.897736486536254e-06, "loss": 0.149, "step": 3001 }, { "epoch": 0.9727802981205443, "grad_norm": 0.44872385263442993, "learning_rate": 7.896310805265109e-06, "loss": 0.1349, "step": 3002 }, { "epoch": 0.9731043421905379, "grad_norm": 0.4944060742855072, "learning_rate": 7.89488476951356e-06, "loss": 0.1648, "step": 3003 }, { "epoch": 0.9734283862605314, "grad_norm": 0.4401301145553589, "learning_rate": 7.89345837945614e-06, "loss": 0.13, "step": 3004 }, { "epoch": 0.9737524303305249, "grad_norm": 0.47425198554992676, "learning_rate": 7.892031635267427e-06, "loss": 0.1369, "step": 3005 }, { "epoch": 0.9740764744005185, "grad_norm": 0.42337566614151, "learning_rate": 7.890604537122038e-06, "loss": 0.1317, "step": 3006 }, { "epoch": 0.974400518470512, "grad_norm": 0.4444950819015503, "learning_rate": 7.889177085194638e-06, "loss": 0.1334, "step": 3007 }, { "epoch": 0.9747245625405055, "grad_norm": 0.4752909243106842, "learning_rate": 7.887749279659928e-06, "loss": 0.1539, "step": 3008 }, { "epoch": 0.9750486066104991, "grad_norm": 0.4772418737411499, "learning_rate": 7.886321120692664e-06, "loss": 0.1417, "step": 3009 }, { "epoch": 0.9753726506804925, "grad_norm": 0.4452485740184784, "learning_rate": 7.884892608467638e-06, "loss": 0.1346, "step": 3010 }, { "epoch": 0.975696694750486, "grad_norm": 0.4744517505168915, "learning_rate": 7.883463743159685e-06, "loss": 0.1426, "step": 3011 }, { "epoch": 0.9760207388204796, "grad_norm": 0.5261785387992859, "learning_rate": 7.88203452494368e-06, "loss": 0.1681, "step": 3012 }, { "epoch": 0.9763447828904731, "grad_norm": 0.5469948649406433, "learning_rate": 7.880604953994553e-06, "loss": 0.1513, "step": 3013 }, { "epoch": 0.9766688269604666, "grad_norm": 0.44291892647743225, "learning_rate": 7.879175030487264e-06, "loss": 0.1317, "step": 3014 }, { "epoch": 0.9769928710304602, "grad_norm": 0.47622525691986084, "learning_rate": 7.877744754596826e-06, "loss": 0.1487, "step": 3015 }, { "epoch": 0.9773169151004537, "grad_norm": 0.4686462879180908, "learning_rate": 7.876314126498288e-06, "loss": 0.1419, "step": 3016 }, { "epoch": 0.9776409591704471, "grad_norm": 0.4816147983074188, "learning_rate": 7.874883146366746e-06, "loss": 0.1451, "step": 3017 }, { "epoch": 0.9779650032404407, "grad_norm": 0.44923725724220276, "learning_rate": 7.873451814377336e-06, "loss": 0.1344, "step": 3018 }, { "epoch": 0.9782890473104342, "grad_norm": 0.5001899003982544, "learning_rate": 7.872020130705244e-06, "loss": 0.157, "step": 3019 }, { "epoch": 0.9786130913804277, "grad_norm": 0.45089191198349, "learning_rate": 7.870588095525688e-06, "loss": 0.1354, "step": 3020 }, { "epoch": 0.9789371354504213, "grad_norm": 0.49082210659980774, "learning_rate": 7.86915570901394e-06, "loss": 0.1423, "step": 3021 }, { "epoch": 0.9792611795204148, "grad_norm": 0.46292129158973694, "learning_rate": 7.867722971345311e-06, "loss": 0.1487, "step": 3022 }, { "epoch": 0.9795852235904083, "grad_norm": 0.48895570635795593, "learning_rate": 7.866289882695148e-06, "loss": 0.1564, "step": 3023 }, { "epoch": 0.9799092676604018, "grad_norm": 0.4756009578704834, "learning_rate": 7.864856443238854e-06, "loss": 0.1499, "step": 3024 }, { "epoch": 0.9802333117303953, "grad_norm": 0.46246933937072754, "learning_rate": 7.863422653151866e-06, "loss": 0.1424, "step": 3025 }, { "epoch": 0.9805573558003888, "grad_norm": 0.4539702832698822, "learning_rate": 7.861988512609663e-06, "loss": 0.1429, "step": 3026 }, { "epoch": 0.9808813998703824, "grad_norm": 0.512012243270874, "learning_rate": 7.860554021787774e-06, "loss": 0.1603, "step": 3027 }, { "epoch": 0.9812054439403759, "grad_norm": 0.47591957449913025, "learning_rate": 7.859119180861762e-06, "loss": 0.1554, "step": 3028 }, { "epoch": 0.9815294880103694, "grad_norm": 0.5062115788459778, "learning_rate": 7.857683990007245e-06, "loss": 0.1507, "step": 3029 }, { "epoch": 0.981853532080363, "grad_norm": 0.40852585434913635, "learning_rate": 7.85624844939987e-06, "loss": 0.1163, "step": 3030 }, { "epoch": 0.9821775761503565, "grad_norm": 0.5025544762611389, "learning_rate": 7.854812559215335e-06, "loss": 0.1466, "step": 3031 }, { "epoch": 0.9825016202203499, "grad_norm": 0.495726615190506, "learning_rate": 7.85337631962938e-06, "loss": 0.1519, "step": 3032 }, { "epoch": 0.9828256642903435, "grad_norm": 0.5085958242416382, "learning_rate": 7.851939730817786e-06, "loss": 0.1554, "step": 3033 }, { "epoch": 0.983149708360337, "grad_norm": 0.4771917462348938, "learning_rate": 7.850502792956378e-06, "loss": 0.1487, "step": 3034 }, { "epoch": 0.9834737524303305, "grad_norm": 0.4856005311012268, "learning_rate": 7.849065506221023e-06, "loss": 0.1512, "step": 3035 }, { "epoch": 0.9837977965003241, "grad_norm": 0.4857740104198456, "learning_rate": 7.847627870787632e-06, "loss": 0.1454, "step": 3036 }, { "epoch": 0.9841218405703176, "grad_norm": 0.46327874064445496, "learning_rate": 7.846189886832157e-06, "loss": 0.1298, "step": 3037 }, { "epoch": 0.9844458846403111, "grad_norm": 0.5154156684875488, "learning_rate": 7.844751554530593e-06, "loss": 0.1593, "step": 3038 }, { "epoch": 0.9847699287103046, "grad_norm": 0.45782309770584106, "learning_rate": 7.843312874058976e-06, "loss": 0.1501, "step": 3039 }, { "epoch": 0.9850939727802981, "grad_norm": 0.479270875453949, "learning_rate": 7.841873845593389e-06, "loss": 0.1385, "step": 3040 }, { "epoch": 0.9854180168502916, "grad_norm": 0.4804162085056305, "learning_rate": 7.840434469309956e-06, "loss": 0.14, "step": 3041 }, { "epoch": 0.9857420609202852, "grad_norm": 0.4845614731311798, "learning_rate": 7.838994745384842e-06, "loss": 0.1583, "step": 3042 }, { "epoch": 0.9860661049902787, "grad_norm": 0.48938602209091187, "learning_rate": 7.837554673994254e-06, "loss": 0.1418, "step": 3043 }, { "epoch": 0.9863901490602722, "grad_norm": 0.40282419323921204, "learning_rate": 7.836114255314444e-06, "loss": 0.1175, "step": 3044 }, { "epoch": 0.9867141931302658, "grad_norm": 0.4799858033657074, "learning_rate": 7.834673489521705e-06, "loss": 0.1665, "step": 3045 }, { "epoch": 0.9870382372002592, "grad_norm": 0.4240192770957947, "learning_rate": 7.83323237679237e-06, "loss": 0.1349, "step": 3046 }, { "epoch": 0.9873622812702527, "grad_norm": 0.48984846472740173, "learning_rate": 7.831790917302822e-06, "loss": 0.1513, "step": 3047 }, { "epoch": 0.9876863253402463, "grad_norm": 0.46244630217552185, "learning_rate": 7.830349111229481e-06, "loss": 0.1373, "step": 3048 }, { "epoch": 0.9880103694102398, "grad_norm": 0.472954124212265, "learning_rate": 7.828906958748806e-06, "loss": 0.1471, "step": 3049 }, { "epoch": 0.9883344134802333, "grad_norm": 0.45337915420532227, "learning_rate": 7.827464460037308e-06, "loss": 0.1455, "step": 3050 }, { "epoch": 0.9886584575502269, "grad_norm": 0.44221627712249756, "learning_rate": 7.82602161527153e-06, "loss": 0.1502, "step": 3051 }, { "epoch": 0.9889825016202204, "grad_norm": 0.4312097728252411, "learning_rate": 7.824578424628065e-06, "loss": 0.1238, "step": 3052 }, { "epoch": 0.9893065456902139, "grad_norm": 0.460366815328598, "learning_rate": 7.823134888283543e-06, "loss": 0.141, "step": 3053 }, { "epoch": 0.9896305897602073, "grad_norm": 0.5097335577011108, "learning_rate": 7.821691006414644e-06, "loss": 0.1538, "step": 3054 }, { "epoch": 0.9899546338302009, "grad_norm": 0.4305676817893982, "learning_rate": 7.820246779198079e-06, "loss": 0.1333, "step": 3055 }, { "epoch": 0.9902786779001944, "grad_norm": 0.4182222783565521, "learning_rate": 7.818802206810613e-06, "loss": 0.1268, "step": 3056 }, { "epoch": 0.990602721970188, "grad_norm": 0.4591180682182312, "learning_rate": 7.817357289429044e-06, "loss": 0.1399, "step": 3057 }, { "epoch": 0.9909267660401815, "grad_norm": 0.4615064263343811, "learning_rate": 7.815912027230216e-06, "loss": 0.1357, "step": 3058 }, { "epoch": 0.991250810110175, "grad_norm": 0.4499772787094116, "learning_rate": 7.814466420391017e-06, "loss": 0.1393, "step": 3059 }, { "epoch": 0.9915748541801686, "grad_norm": 0.4455864131450653, "learning_rate": 7.813020469088372e-06, "loss": 0.1386, "step": 3060 }, { "epoch": 0.991898898250162, "grad_norm": 0.4960353672504425, "learning_rate": 7.811574173499257e-06, "loss": 0.1453, "step": 3061 }, { "epoch": 0.9922229423201555, "grad_norm": 0.43458986282348633, "learning_rate": 7.81012753380068e-06, "loss": 0.1415, "step": 3062 }, { "epoch": 0.992546986390149, "grad_norm": 0.4443108141422272, "learning_rate": 7.808680550169696e-06, "loss": 0.1265, "step": 3063 }, { "epoch": 0.9928710304601426, "grad_norm": 0.47840794920921326, "learning_rate": 7.807233222783403e-06, "loss": 0.1466, "step": 3064 }, { "epoch": 0.9931950745301361, "grad_norm": 0.4474855661392212, "learning_rate": 7.80578555181894e-06, "loss": 0.137, "step": 3065 }, { "epoch": 0.9935191186001296, "grad_norm": 0.4584507346153259, "learning_rate": 7.80433753745349e-06, "loss": 0.1394, "step": 3066 }, { "epoch": 0.9938431626701232, "grad_norm": 0.432372510433197, "learning_rate": 7.802889179864271e-06, "loss": 0.1229, "step": 3067 }, { "epoch": 0.9941672067401166, "grad_norm": 0.4571312963962555, "learning_rate": 7.80144047922855e-06, "loss": 0.1348, "step": 3068 }, { "epoch": 0.9944912508101101, "grad_norm": 0.4718877673149109, "learning_rate": 7.799991435723637e-06, "loss": 0.1406, "step": 3069 }, { "epoch": 0.9948152948801037, "grad_norm": 0.4299832284450531, "learning_rate": 7.798542049526875e-06, "loss": 0.1359, "step": 3070 }, { "epoch": 0.9951393389500972, "grad_norm": 0.4358609914779663, "learning_rate": 7.79709232081566e-06, "loss": 0.1338, "step": 3071 }, { "epoch": 0.9954633830200907, "grad_norm": 0.43104878067970276, "learning_rate": 7.795642249767423e-06, "loss": 0.1211, "step": 3072 }, { "epoch": 0.9957874270900843, "grad_norm": 0.4711206555366516, "learning_rate": 7.794191836559637e-06, "loss": 0.139, "step": 3073 }, { "epoch": 0.9961114711600778, "grad_norm": 0.538809061050415, "learning_rate": 7.792741081369822e-06, "loss": 0.1673, "step": 3074 }, { "epoch": 0.9964355152300713, "grad_norm": 0.4683513343334198, "learning_rate": 7.791289984375534e-06, "loss": 0.1347, "step": 3075 }, { "epoch": 0.9967595593000648, "grad_norm": 0.47475746273994446, "learning_rate": 7.789838545754373e-06, "loss": 0.1573, "step": 3076 }, { "epoch": 0.9970836033700583, "grad_norm": 0.45275938510894775, "learning_rate": 7.788386765683982e-06, "loss": 0.1387, "step": 3077 }, { "epoch": 0.9974076474400518, "grad_norm": 0.4447527825832367, "learning_rate": 7.786934644342044e-06, "loss": 0.1334, "step": 3078 }, { "epoch": 0.9977316915100454, "grad_norm": 0.43458884954452515, "learning_rate": 7.785482181906286e-06, "loss": 0.1371, "step": 3079 }, { "epoch": 0.9980557355800389, "grad_norm": 0.4534459412097931, "learning_rate": 7.784029378554475e-06, "loss": 0.1389, "step": 3080 }, { "epoch": 0.9983797796500324, "grad_norm": 0.4595879018306732, "learning_rate": 7.782576234464419e-06, "loss": 0.1341, "step": 3081 }, { "epoch": 0.998703823720026, "grad_norm": 0.5015422701835632, "learning_rate": 7.78112274981397e-06, "loss": 0.1518, "step": 3082 }, { "epoch": 0.9990278677900194, "grad_norm": 0.45415136218070984, "learning_rate": 7.779668924781017e-06, "loss": 0.1281, "step": 3083 }, { "epoch": 0.9993519118600129, "grad_norm": 0.4373987913131714, "learning_rate": 7.778214759543498e-06, "loss": 0.1298, "step": 3084 }, { "epoch": 0.9996759559300065, "grad_norm": 0.44714266061782837, "learning_rate": 7.77676025427939e-06, "loss": 0.1328, "step": 3085 }, { "epoch": 1.0, "grad_norm": 0.5027826428413391, "learning_rate": 7.775305409166707e-06, "loss": 0.1434, "step": 3086 }, { "epoch": 1.0003240440699934, "grad_norm": 0.44878828525543213, "learning_rate": 7.773850224383509e-06, "loss": 0.1197, "step": 3087 }, { "epoch": 1.000648088139987, "grad_norm": 0.40076735615730286, "learning_rate": 7.772394700107895e-06, "loss": 0.1014, "step": 3088 }, { "epoch": 1.0009721322099805, "grad_norm": 0.4214591979980469, "learning_rate": 7.77093883651801e-06, "loss": 0.1088, "step": 3089 }, { "epoch": 1.0012961762799741, "grad_norm": 0.4249264895915985, "learning_rate": 7.769482633792035e-06, "loss": 0.1148, "step": 3090 }, { "epoch": 1.0016202203499676, "grad_norm": 0.4293353259563446, "learning_rate": 7.768026092108196e-06, "loss": 0.1062, "step": 3091 }, { "epoch": 1.0019442644199612, "grad_norm": 0.4549325704574585, "learning_rate": 7.766569211644763e-06, "loss": 0.1124, "step": 3092 }, { "epoch": 1.0022683084899546, "grad_norm": 0.4095253050327301, "learning_rate": 7.765111992580038e-06, "loss": 0.1027, "step": 3093 }, { "epoch": 1.002592352559948, "grad_norm": 0.47369253635406494, "learning_rate": 7.763654435092374e-06, "loss": 0.1194, "step": 3094 }, { "epoch": 1.0029163966299417, "grad_norm": 0.45340731739997864, "learning_rate": 7.762196539360161e-06, "loss": 0.1014, "step": 3095 }, { "epoch": 1.0032404406999351, "grad_norm": 0.5088295340538025, "learning_rate": 7.760738305561832e-06, "loss": 0.1169, "step": 3096 }, { "epoch": 1.0035644847699288, "grad_norm": 0.4732248783111572, "learning_rate": 7.759279733875862e-06, "loss": 0.113, "step": 3097 }, { "epoch": 1.0038885288399222, "grad_norm": 0.5000881552696228, "learning_rate": 7.757820824480763e-06, "loss": 0.111, "step": 3098 }, { "epoch": 1.0042125729099158, "grad_norm": 0.4899469316005707, "learning_rate": 7.756361577555093e-06, "loss": 0.1219, "step": 3099 }, { "epoch": 1.0045366169799093, "grad_norm": 0.4718400537967682, "learning_rate": 7.75490199327745e-06, "loss": 0.1184, "step": 3100 }, { "epoch": 1.0048606610499027, "grad_norm": 0.46882617473602295, "learning_rate": 7.753442071826472e-06, "loss": 0.1115, "step": 3101 }, { "epoch": 1.0051847051198963, "grad_norm": 0.4647389054298401, "learning_rate": 7.75198181338084e-06, "loss": 0.1054, "step": 3102 }, { "epoch": 1.0055087491898898, "grad_norm": 0.4482908844947815, "learning_rate": 7.750521218119275e-06, "loss": 0.1128, "step": 3103 }, { "epoch": 1.0058327932598834, "grad_norm": 0.43499132990837097, "learning_rate": 7.74906028622054e-06, "loss": 0.1029, "step": 3104 }, { "epoch": 1.0061568373298768, "grad_norm": 0.47596755623817444, "learning_rate": 7.74759901786344e-06, "loss": 0.1197, "step": 3105 }, { "epoch": 1.0064808813998705, "grad_norm": 0.46033400297164917, "learning_rate": 7.746137413226817e-06, "loss": 0.1119, "step": 3106 }, { "epoch": 1.0068049254698639, "grad_norm": 0.48392659425735474, "learning_rate": 7.744675472489561e-06, "loss": 0.1264, "step": 3107 }, { "epoch": 1.0071289695398573, "grad_norm": 0.45975932478904724, "learning_rate": 7.743213195830597e-06, "loss": 0.1033, "step": 3108 }, { "epoch": 1.007453013609851, "grad_norm": 0.4883347153663635, "learning_rate": 7.741750583428895e-06, "loss": 0.1193, "step": 3109 }, { "epoch": 1.0077770576798444, "grad_norm": 0.4730301797389984, "learning_rate": 7.740287635463464e-06, "loss": 0.1184, "step": 3110 }, { "epoch": 1.008101101749838, "grad_norm": 0.45033764839172363, "learning_rate": 7.738824352113353e-06, "loss": 0.1151, "step": 3111 }, { "epoch": 1.0084251458198314, "grad_norm": 0.44802072644233704, "learning_rate": 7.737360733557656e-06, "loss": 0.1129, "step": 3112 }, { "epoch": 1.008749189889825, "grad_norm": 0.4515936076641083, "learning_rate": 7.735896779975504e-06, "loss": 0.113, "step": 3113 }, { "epoch": 1.0090732339598185, "grad_norm": 0.47022631764411926, "learning_rate": 7.734432491546073e-06, "loss": 0.115, "step": 3114 }, { "epoch": 1.0093972780298122, "grad_norm": 0.4840375781059265, "learning_rate": 7.732967868448576e-06, "loss": 0.1191, "step": 3115 }, { "epoch": 1.0097213220998056, "grad_norm": 0.431972473859787, "learning_rate": 7.731502910862268e-06, "loss": 0.1107, "step": 3116 }, { "epoch": 1.010045366169799, "grad_norm": 0.5001168251037598, "learning_rate": 7.730037618966448e-06, "loss": 0.1231, "step": 3117 }, { "epoch": 1.0103694102397927, "grad_norm": 0.4558895528316498, "learning_rate": 7.728571992940452e-06, "loss": 0.1173, "step": 3118 }, { "epoch": 1.010693454309786, "grad_norm": 0.4773532450199127, "learning_rate": 7.727106032963658e-06, "loss": 0.1236, "step": 3119 }, { "epoch": 1.0110174983797797, "grad_norm": 0.441366583108902, "learning_rate": 7.725639739215486e-06, "loss": 0.1073, "step": 3120 }, { "epoch": 1.0113415424497731, "grad_norm": 0.4424722194671631, "learning_rate": 7.724173111875398e-06, "loss": 0.1114, "step": 3121 }, { "epoch": 1.0116655865197668, "grad_norm": 0.476109117269516, "learning_rate": 7.722706151122892e-06, "loss": 0.1226, "step": 3122 }, { "epoch": 1.0119896305897602, "grad_norm": 0.47857046127319336, "learning_rate": 7.721238857137512e-06, "loss": 0.1213, "step": 3123 }, { "epoch": 1.0123136746597536, "grad_norm": 0.4299698770046234, "learning_rate": 7.719771230098839e-06, "loss": 0.1099, "step": 3124 }, { "epoch": 1.0126377187297473, "grad_norm": 0.44482967257499695, "learning_rate": 7.718303270186495e-06, "loss": 0.107, "step": 3125 }, { "epoch": 1.0129617627997407, "grad_norm": 0.44184449315071106, "learning_rate": 7.716834977580147e-06, "loss": 0.1096, "step": 3126 }, { "epoch": 1.0132858068697344, "grad_norm": 0.4429199695587158, "learning_rate": 7.715366352459499e-06, "loss": 0.1054, "step": 3127 }, { "epoch": 1.0136098509397278, "grad_norm": 0.49366313219070435, "learning_rate": 7.713897395004295e-06, "loss": 0.1197, "step": 3128 }, { "epoch": 1.0139338950097214, "grad_norm": 0.4341050386428833, "learning_rate": 7.712428105394325e-06, "loss": 0.1048, "step": 3129 }, { "epoch": 1.0142579390797148, "grad_norm": 0.42968085408210754, "learning_rate": 7.71095848380941e-06, "loss": 0.105, "step": 3130 }, { "epoch": 1.0145819831497083, "grad_norm": 0.45736268162727356, "learning_rate": 7.709488530429423e-06, "loss": 0.1075, "step": 3131 }, { "epoch": 1.014906027219702, "grad_norm": 0.5073167085647583, "learning_rate": 7.70801824543427e-06, "loss": 0.1208, "step": 3132 }, { "epoch": 1.0152300712896953, "grad_norm": 0.46384376287460327, "learning_rate": 7.706547629003897e-06, "loss": 0.1131, "step": 3133 }, { "epoch": 1.015554115359689, "grad_norm": 0.4488130509853363, "learning_rate": 7.705076681318298e-06, "loss": 0.1097, "step": 3134 }, { "epoch": 1.0158781594296824, "grad_norm": 0.4590933918952942, "learning_rate": 7.7036054025575e-06, "loss": 0.1117, "step": 3135 }, { "epoch": 1.016202203499676, "grad_norm": 0.4480055570602417, "learning_rate": 7.702133792901574e-06, "loss": 0.1087, "step": 3136 }, { "epoch": 1.0165262475696695, "grad_norm": 0.4935062527656555, "learning_rate": 7.700661852530629e-06, "loss": 0.1215, "step": 3137 }, { "epoch": 1.016850291639663, "grad_norm": 0.4196431040763855, "learning_rate": 7.699189581624818e-06, "loss": 0.0967, "step": 3138 }, { "epoch": 1.0171743357096565, "grad_norm": 0.4557700753211975, "learning_rate": 7.697716980364334e-06, "loss": 0.1156, "step": 3139 }, { "epoch": 1.01749837977965, "grad_norm": 0.4312606751918793, "learning_rate": 7.696244048929405e-06, "loss": 0.102, "step": 3140 }, { "epoch": 1.0178224238496436, "grad_norm": 0.4428192675113678, "learning_rate": 7.694770787500308e-06, "loss": 0.1021, "step": 3141 }, { "epoch": 1.018146467919637, "grad_norm": 0.4538060426712036, "learning_rate": 7.693297196257354e-06, "loss": 0.1104, "step": 3142 }, { "epoch": 1.0184705119896307, "grad_norm": 0.4797634184360504, "learning_rate": 7.691823275380895e-06, "loss": 0.1192, "step": 3143 }, { "epoch": 1.018794556059624, "grad_norm": 0.4432651400566101, "learning_rate": 7.690349025051327e-06, "loss": 0.1107, "step": 3144 }, { "epoch": 1.0191186001296175, "grad_norm": 0.46791303157806396, "learning_rate": 7.688874445449083e-06, "loss": 0.1118, "step": 3145 }, { "epoch": 1.0194426441996112, "grad_norm": 0.5142552852630615, "learning_rate": 7.687399536754636e-06, "loss": 0.1157, "step": 3146 }, { "epoch": 1.0197666882696046, "grad_norm": 0.4536757171154022, "learning_rate": 7.685924299148504e-06, "loss": 0.1085, "step": 3147 }, { "epoch": 1.0200907323395982, "grad_norm": 0.5158340334892273, "learning_rate": 7.684448732811239e-06, "loss": 0.1316, "step": 3148 }, { "epoch": 1.0204147764095917, "grad_norm": 0.49748632311820984, "learning_rate": 7.682972837923434e-06, "loss": 0.1184, "step": 3149 }, { "epoch": 1.0207388204795853, "grad_norm": 0.47120100259780884, "learning_rate": 7.68149661466573e-06, "loss": 0.1251, "step": 3150 }, { "epoch": 1.0210628645495787, "grad_norm": 0.4461970627307892, "learning_rate": 7.680020063218796e-06, "loss": 0.1063, "step": 3151 }, { "epoch": 1.0213869086195722, "grad_norm": 0.5021184086799622, "learning_rate": 7.678543183763351e-06, "loss": 0.1307, "step": 3152 }, { "epoch": 1.0217109526895658, "grad_norm": 0.4431654214859009, "learning_rate": 7.677065976480153e-06, "loss": 0.1067, "step": 3153 }, { "epoch": 1.0220349967595592, "grad_norm": 0.41496190428733826, "learning_rate": 7.675588441549995e-06, "loss": 0.0973, "step": 3154 }, { "epoch": 1.0223590408295529, "grad_norm": 0.4980688989162445, "learning_rate": 7.674110579153713e-06, "loss": 0.1201, "step": 3155 }, { "epoch": 1.0226830848995463, "grad_norm": 0.47226595878601074, "learning_rate": 7.672632389472186e-06, "loss": 0.116, "step": 3156 }, { "epoch": 1.02300712896954, "grad_norm": 0.4931360185146332, "learning_rate": 7.671153872686324e-06, "loss": 0.1166, "step": 3157 }, { "epoch": 1.0233311730395334, "grad_norm": 0.5069724321365356, "learning_rate": 7.669675028977089e-06, "loss": 0.1181, "step": 3158 }, { "epoch": 1.023655217109527, "grad_norm": 0.4590316712856293, "learning_rate": 7.668195858525474e-06, "loss": 0.1099, "step": 3159 }, { "epoch": 1.0239792611795204, "grad_norm": 0.4730747938156128, "learning_rate": 7.666716361512516e-06, "loss": 0.1173, "step": 3160 }, { "epoch": 1.0243033052495139, "grad_norm": 0.43002721667289734, "learning_rate": 7.66523653811929e-06, "loss": 0.0994, "step": 3161 }, { "epoch": 1.0246273493195075, "grad_norm": 0.4950290322303772, "learning_rate": 7.663756388526915e-06, "loss": 0.1188, "step": 3162 }, { "epoch": 1.024951393389501, "grad_norm": 0.4726638197898865, "learning_rate": 7.662275912916543e-06, "loss": 0.1109, "step": 3163 }, { "epoch": 1.0252754374594946, "grad_norm": 0.47714924812316895, "learning_rate": 7.660795111469374e-06, "loss": 0.1183, "step": 3164 }, { "epoch": 1.025599481529488, "grad_norm": 0.5277173519134521, "learning_rate": 7.659313984366643e-06, "loss": 0.1264, "step": 3165 }, { "epoch": 1.0259235255994816, "grad_norm": 0.4854954779148102, "learning_rate": 7.657832531789623e-06, "loss": 0.1099, "step": 3166 }, { "epoch": 1.026247569669475, "grad_norm": 0.48848310112953186, "learning_rate": 7.65635075391963e-06, "loss": 0.1253, "step": 3167 }, { "epoch": 1.0265716137394685, "grad_norm": 0.4541592001914978, "learning_rate": 7.654868650938023e-06, "loss": 0.1078, "step": 3168 }, { "epoch": 1.0268956578094621, "grad_norm": 0.42836257815361023, "learning_rate": 7.653386223026191e-06, "loss": 0.1, "step": 3169 }, { "epoch": 1.0272197018794555, "grad_norm": 0.4688536524772644, "learning_rate": 7.651903470365573e-06, "loss": 0.1146, "step": 3170 }, { "epoch": 1.0275437459494492, "grad_norm": 0.4861224293708801, "learning_rate": 7.650420393137646e-06, "loss": 0.1205, "step": 3171 }, { "epoch": 1.0278677900194426, "grad_norm": 0.4465397596359253, "learning_rate": 7.648936991523916e-06, "loss": 0.1122, "step": 3172 }, { "epoch": 1.0281918340894363, "grad_norm": 0.4481448531150818, "learning_rate": 7.647453265705944e-06, "loss": 0.1094, "step": 3173 }, { "epoch": 1.0285158781594297, "grad_norm": 0.4905964434146881, "learning_rate": 7.645969215865321e-06, "loss": 0.1219, "step": 3174 }, { "epoch": 1.028839922229423, "grad_norm": 0.471800833940506, "learning_rate": 7.644484842183681e-06, "loss": 0.1064, "step": 3175 }, { "epoch": 1.0291639662994168, "grad_norm": 0.4817582368850708, "learning_rate": 7.643000144842698e-06, "loss": 0.1168, "step": 3176 }, { "epoch": 1.0294880103694102, "grad_norm": 0.441641241312027, "learning_rate": 7.641515124024084e-06, "loss": 0.1139, "step": 3177 }, { "epoch": 1.0298120544394038, "grad_norm": 0.4686790704727173, "learning_rate": 7.640029779909588e-06, "loss": 0.1131, "step": 3178 }, { "epoch": 1.0301360985093972, "grad_norm": 0.4923335909843445, "learning_rate": 7.638544112681008e-06, "loss": 0.1245, "step": 3179 }, { "epoch": 1.030460142579391, "grad_norm": 0.4751708507537842, "learning_rate": 7.637058122520168e-06, "loss": 0.1176, "step": 3180 }, { "epoch": 1.0307841866493843, "grad_norm": 0.46796074509620667, "learning_rate": 7.635571809608945e-06, "loss": 0.107, "step": 3181 }, { "epoch": 1.0311082307193777, "grad_norm": 0.4851223826408386, "learning_rate": 7.634085174129246e-06, "loss": 0.1177, "step": 3182 }, { "epoch": 1.0314322747893714, "grad_norm": 0.44485923647880554, "learning_rate": 7.63259821626302e-06, "loss": 0.1077, "step": 3183 }, { "epoch": 1.0317563188593648, "grad_norm": 0.4402599334716797, "learning_rate": 7.631110936192262e-06, "loss": 0.1142, "step": 3184 }, { "epoch": 1.0320803629293585, "grad_norm": 0.45097285509109497, "learning_rate": 7.629623334098994e-06, "loss": 0.1179, "step": 3185 }, { "epoch": 1.0324044069993519, "grad_norm": 0.4726148843765259, "learning_rate": 7.628135410165286e-06, "loss": 0.1109, "step": 3186 }, { "epoch": 1.0327284510693455, "grad_norm": 0.49244487285614014, "learning_rate": 7.626647164573247e-06, "loss": 0.1147, "step": 3187 }, { "epoch": 1.033052495139339, "grad_norm": 0.46729210019111633, "learning_rate": 7.625158597505022e-06, "loss": 0.1064, "step": 3188 }, { "epoch": 1.0333765392093324, "grad_norm": 0.4835570156574249, "learning_rate": 7.6236697091428e-06, "loss": 0.1155, "step": 3189 }, { "epoch": 1.033700583279326, "grad_norm": 0.4817955195903778, "learning_rate": 7.622180499668805e-06, "loss": 0.1146, "step": 3190 }, { "epoch": 1.0340246273493194, "grad_norm": 0.49716344475746155, "learning_rate": 7.620690969265299e-06, "loss": 0.1068, "step": 3191 }, { "epoch": 1.034348671419313, "grad_norm": 0.4979782700538635, "learning_rate": 7.61920111811459e-06, "loss": 0.1107, "step": 3192 }, { "epoch": 1.0346727154893065, "grad_norm": 0.48396703600883484, "learning_rate": 7.61771094639902e-06, "loss": 0.1107, "step": 3193 }, { "epoch": 1.0349967595593002, "grad_norm": 0.48865407705307007, "learning_rate": 7.61622045430097e-06, "loss": 0.1132, "step": 3194 }, { "epoch": 1.0353208036292936, "grad_norm": 0.44049808382987976, "learning_rate": 7.6147296420028645e-06, "loss": 0.1023, "step": 3195 }, { "epoch": 1.035644847699287, "grad_norm": 0.4887087047100067, "learning_rate": 7.613238509687164e-06, "loss": 0.1099, "step": 3196 }, { "epoch": 1.0359688917692806, "grad_norm": 0.4959479868412018, "learning_rate": 7.611747057536367e-06, "loss": 0.1198, "step": 3197 }, { "epoch": 1.036292935839274, "grad_norm": 0.466974675655365, "learning_rate": 7.610255285733015e-06, "loss": 0.1051, "step": 3198 }, { "epoch": 1.0366169799092677, "grad_norm": 0.5238450765609741, "learning_rate": 7.608763194459685e-06, "loss": 0.1248, "step": 3199 }, { "epoch": 1.0369410239792611, "grad_norm": 0.490261048078537, "learning_rate": 7.607270783898995e-06, "loss": 0.1198, "step": 3200 }, { "epoch": 1.0372650680492548, "grad_norm": 0.49088743329048157, "learning_rate": 7.6057780542336e-06, "loss": 0.1127, "step": 3201 }, { "epoch": 1.0375891121192482, "grad_norm": 0.45407554507255554, "learning_rate": 7.604285005646199e-06, "loss": 0.1057, "step": 3202 }, { "epoch": 1.0379131561892416, "grad_norm": 0.4346422553062439, "learning_rate": 7.602791638319522e-06, "loss": 0.1012, "step": 3203 }, { "epoch": 1.0382372002592353, "grad_norm": 0.4368624985218048, "learning_rate": 7.601297952436349e-06, "loss": 0.1046, "step": 3204 }, { "epoch": 1.0385612443292287, "grad_norm": 0.4649512767791748, "learning_rate": 7.5998039481794846e-06, "loss": 0.1101, "step": 3205 }, { "epoch": 1.0388852883992223, "grad_norm": 0.46254339814186096, "learning_rate": 7.598309625731788e-06, "loss": 0.1095, "step": 3206 }, { "epoch": 1.0392093324692158, "grad_norm": 0.4886171817779541, "learning_rate": 7.596814985276147e-06, "loss": 0.1209, "step": 3207 }, { "epoch": 1.0395333765392094, "grad_norm": 0.47876039147377014, "learning_rate": 7.595320026995491e-06, "loss": 0.1197, "step": 3208 }, { "epoch": 1.0398574206092028, "grad_norm": 0.46234801411628723, "learning_rate": 7.593824751072787e-06, "loss": 0.1117, "step": 3209 }, { "epoch": 1.0401814646791965, "grad_norm": 0.462101548910141, "learning_rate": 7.592329157691045e-06, "loss": 0.1108, "step": 3210 }, { "epoch": 1.04050550874919, "grad_norm": 0.49281927943229675, "learning_rate": 7.590833247033309e-06, "loss": 0.1194, "step": 3211 }, { "epoch": 1.0408295528191833, "grad_norm": 0.43509066104888916, "learning_rate": 7.589337019282664e-06, "loss": 0.1026, "step": 3212 }, { "epoch": 1.041153596889177, "grad_norm": 0.505856454372406, "learning_rate": 7.587840474622236e-06, "loss": 0.1222, "step": 3213 }, { "epoch": 1.0414776409591704, "grad_norm": 0.484377920627594, "learning_rate": 7.586343613235186e-06, "loss": 0.1131, "step": 3214 }, { "epoch": 1.041801685029164, "grad_norm": 0.47142449021339417, "learning_rate": 7.584846435304715e-06, "loss": 0.114, "step": 3215 }, { "epoch": 1.0421257290991575, "grad_norm": 0.45768871903419495, "learning_rate": 7.5833489410140636e-06, "loss": 0.1034, "step": 3216 }, { "epoch": 1.042449773169151, "grad_norm": 0.4345049560070038, "learning_rate": 7.58185113054651e-06, "loss": 0.1042, "step": 3217 }, { "epoch": 1.0427738172391445, "grad_norm": 0.4796913266181946, "learning_rate": 7.580353004085372e-06, "loss": 0.1162, "step": 3218 }, { "epoch": 1.043097861309138, "grad_norm": 0.5288426876068115, "learning_rate": 7.578854561814008e-06, "loss": 0.1207, "step": 3219 }, { "epoch": 1.0434219053791316, "grad_norm": 0.4750249981880188, "learning_rate": 7.577355803915809e-06, "loss": 0.1187, "step": 3220 }, { "epoch": 1.043745949449125, "grad_norm": 0.4727710485458374, "learning_rate": 7.575856730574212e-06, "loss": 0.1155, "step": 3221 }, { "epoch": 1.0440699935191187, "grad_norm": 0.515414834022522, "learning_rate": 7.574357341972687e-06, "loss": 0.1128, "step": 3222 }, { "epoch": 1.044394037589112, "grad_norm": 0.51041179895401, "learning_rate": 7.5728576382947436e-06, "loss": 0.1237, "step": 3223 }, { "epoch": 1.0447180816591057, "grad_norm": 0.4669690430164337, "learning_rate": 7.571357619723933e-06, "loss": 0.1153, "step": 3224 }, { "epoch": 1.0450421257290992, "grad_norm": 0.47048744559288025, "learning_rate": 7.569857286443843e-06, "loss": 0.1114, "step": 3225 }, { "epoch": 1.0453661697990926, "grad_norm": 0.4638916850090027, "learning_rate": 7.5683566386381e-06, "loss": 0.1111, "step": 3226 }, { "epoch": 1.0456902138690862, "grad_norm": 0.47223541140556335, "learning_rate": 7.566855676490368e-06, "loss": 0.1118, "step": 3227 }, { "epoch": 1.0460142579390797, "grad_norm": 0.45786023139953613, "learning_rate": 7.5653544001843485e-06, "loss": 0.1088, "step": 3228 }, { "epoch": 1.0463383020090733, "grad_norm": 0.45595699548721313, "learning_rate": 7.563852809903786e-06, "loss": 0.1196, "step": 3229 }, { "epoch": 1.0466623460790667, "grad_norm": 0.5083061456680298, "learning_rate": 7.562350905832459e-06, "loss": 0.1151, "step": 3230 }, { "epoch": 1.0469863901490604, "grad_norm": 0.4339052438735962, "learning_rate": 7.560848688154187e-06, "loss": 0.1083, "step": 3231 }, { "epoch": 1.0473104342190538, "grad_norm": 0.46849381923675537, "learning_rate": 7.559346157052828e-06, "loss": 0.1118, "step": 3232 }, { "epoch": 1.0476344782890472, "grad_norm": 0.4972139596939087, "learning_rate": 7.5578433127122745e-06, "loss": 0.1143, "step": 3233 }, { "epoch": 1.0479585223590409, "grad_norm": 0.47988584637641907, "learning_rate": 7.556340155316462e-06, "loss": 0.1296, "step": 3234 }, { "epoch": 1.0482825664290343, "grad_norm": 0.48681557178497314, "learning_rate": 7.55483668504936e-06, "loss": 0.1082, "step": 3235 }, { "epoch": 1.048606610499028, "grad_norm": 0.444217711687088, "learning_rate": 7.553332902094981e-06, "loss": 0.1092, "step": 3236 }, { "epoch": 1.0489306545690213, "grad_norm": 0.46810221672058105, "learning_rate": 7.551828806637374e-06, "loss": 0.1163, "step": 3237 }, { "epoch": 1.049254698639015, "grad_norm": 0.49482589960098267, "learning_rate": 7.550324398860625e-06, "loss": 0.1038, "step": 3238 }, { "epoch": 1.0495787427090084, "grad_norm": 0.4945639371871948, "learning_rate": 7.548819678948857e-06, "loss": 0.1134, "step": 3239 }, { "epoch": 1.0499027867790018, "grad_norm": 0.4766770303249359, "learning_rate": 7.547314647086235e-06, "loss": 0.1166, "step": 3240 }, { "epoch": 1.0502268308489955, "grad_norm": 0.4779733717441559, "learning_rate": 7.545809303456961e-06, "loss": 0.1229, "step": 3241 }, { "epoch": 1.050550874918989, "grad_norm": 0.48405709862709045, "learning_rate": 7.544303648245275e-06, "loss": 0.119, "step": 3242 }, { "epoch": 1.0508749189889826, "grad_norm": 0.4582526683807373, "learning_rate": 7.54279768163545e-06, "loss": 0.1081, "step": 3243 }, { "epoch": 1.051198963058976, "grad_norm": 0.4700034260749817, "learning_rate": 7.541291403811808e-06, "loss": 0.1013, "step": 3244 }, { "epoch": 1.0515230071289696, "grad_norm": 0.48437923192977905, "learning_rate": 7.539784814958697e-06, "loss": 0.126, "step": 3245 }, { "epoch": 1.051847051198963, "grad_norm": 0.4672539234161377, "learning_rate": 7.538277915260513e-06, "loss": 0.1057, "step": 3246 }, { "epoch": 1.0521710952689567, "grad_norm": 0.4580419659614563, "learning_rate": 7.536770704901684e-06, "loss": 0.1097, "step": 3247 }, { "epoch": 1.0524951393389501, "grad_norm": 0.4356781244277954, "learning_rate": 7.535263184066679e-06, "loss": 0.1052, "step": 3248 }, { "epoch": 1.0528191834089435, "grad_norm": 0.4719600975513458, "learning_rate": 7.5337553529400046e-06, "loss": 0.1181, "step": 3249 }, { "epoch": 1.0531432274789372, "grad_norm": 0.4823813736438751, "learning_rate": 7.532247211706202e-06, "loss": 0.1207, "step": 3250 }, { "epoch": 1.0534672715489306, "grad_norm": 0.471195250749588, "learning_rate": 7.530738760549856e-06, "loss": 0.1168, "step": 3251 }, { "epoch": 1.0537913156189243, "grad_norm": 0.4789368212223053, "learning_rate": 7.529229999655585e-06, "loss": 0.1173, "step": 3252 }, { "epoch": 1.0541153596889177, "grad_norm": 0.5182376503944397, "learning_rate": 7.5277209292080465e-06, "loss": 0.1169, "step": 3253 }, { "epoch": 1.054439403758911, "grad_norm": 0.5064989924430847, "learning_rate": 7.526211549391936e-06, "loss": 0.1243, "step": 3254 }, { "epoch": 1.0547634478289047, "grad_norm": 0.49826836585998535, "learning_rate": 7.524701860391987e-06, "loss": 0.1096, "step": 3255 }, { "epoch": 1.0550874918988982, "grad_norm": 0.4568706750869751, "learning_rate": 7.523191862392973e-06, "loss": 0.1086, "step": 3256 }, { "epoch": 1.0554115359688918, "grad_norm": 0.44507694244384766, "learning_rate": 7.521681555579702e-06, "loss": 0.1078, "step": 3257 }, { "epoch": 1.0557355800388852, "grad_norm": 0.4660944640636444, "learning_rate": 7.520170940137019e-06, "loss": 0.1096, "step": 3258 }, { "epoch": 1.0560596241088789, "grad_norm": 0.4876995384693146, "learning_rate": 7.51866001624981e-06, "loss": 0.1154, "step": 3259 }, { "epoch": 1.0563836681788723, "grad_norm": 0.45799127221107483, "learning_rate": 7.517148784102997e-06, "loss": 0.1141, "step": 3260 }, { "epoch": 1.056707712248866, "grad_norm": 0.4525223970413208, "learning_rate": 7.515637243881542e-06, "loss": 0.1119, "step": 3261 }, { "epoch": 1.0570317563188594, "grad_norm": 0.43269988894462585, "learning_rate": 7.5141253957704405e-06, "loss": 0.1106, "step": 3262 }, { "epoch": 1.0573558003888528, "grad_norm": 0.5266064405441284, "learning_rate": 7.512613239954729e-06, "loss": 0.1319, "step": 3263 }, { "epoch": 1.0576798444588464, "grad_norm": 0.4743003845214844, "learning_rate": 7.511100776619483e-06, "loss": 0.1108, "step": 3264 }, { "epoch": 1.0580038885288399, "grad_norm": 0.48685547709465027, "learning_rate": 7.509588005949811e-06, "loss": 0.1174, "step": 3265 }, { "epoch": 1.0583279325988335, "grad_norm": 0.4517221748828888, "learning_rate": 7.50807492813086e-06, "loss": 0.1123, "step": 3266 }, { "epoch": 1.058651976668827, "grad_norm": 0.4700964391231537, "learning_rate": 7.5065615433478165e-06, "loss": 0.1185, "step": 3267 }, { "epoch": 1.0589760207388206, "grad_norm": 0.4660481810569763, "learning_rate": 7.505047851785908e-06, "loss": 0.1174, "step": 3268 }, { "epoch": 1.059300064808814, "grad_norm": 0.46513262391090393, "learning_rate": 7.503533853630393e-06, "loss": 0.1159, "step": 3269 }, { "epoch": 1.0596241088788074, "grad_norm": 0.4085860848426819, "learning_rate": 7.50201954906657e-06, "loss": 0.0909, "step": 3270 }, { "epoch": 1.059948152948801, "grad_norm": 0.48166197538375854, "learning_rate": 7.500504938279775e-06, "loss": 0.1246, "step": 3271 }, { "epoch": 1.0602721970187945, "grad_norm": 0.4505249857902527, "learning_rate": 7.498990021455383e-06, "loss": 0.1074, "step": 3272 }, { "epoch": 1.0605962410887881, "grad_norm": 0.459302693605423, "learning_rate": 7.497474798778805e-06, "loss": 0.1092, "step": 3273 }, { "epoch": 1.0609202851587816, "grad_norm": 0.4699563980102539, "learning_rate": 7.495959270435489e-06, "loss": 0.1097, "step": 3274 }, { "epoch": 1.0612443292287752, "grad_norm": 0.4632161259651184, "learning_rate": 7.494443436610921e-06, "loss": 0.1089, "step": 3275 }, { "epoch": 1.0615683732987686, "grad_norm": 0.5196097493171692, "learning_rate": 7.4929272974906245e-06, "loss": 0.1289, "step": 3276 }, { "epoch": 1.061892417368762, "grad_norm": 0.4618408977985382, "learning_rate": 7.49141085326016e-06, "loss": 0.1045, "step": 3277 }, { "epoch": 1.0622164614387557, "grad_norm": 0.518011212348938, "learning_rate": 7.489894104105124e-06, "loss": 0.1209, "step": 3278 }, { "epoch": 1.0625405055087491, "grad_norm": 0.4100458025932312, "learning_rate": 7.488377050211155e-06, "loss": 0.0966, "step": 3279 }, { "epoch": 1.0628645495787428, "grad_norm": 0.4721738398075104, "learning_rate": 7.4868596917639245e-06, "loss": 0.1159, "step": 3280 }, { "epoch": 1.0631885936487362, "grad_norm": 0.4702790081501007, "learning_rate": 7.485342028949143e-06, "loss": 0.1071, "step": 3281 }, { "epoch": 1.0635126377187298, "grad_norm": 0.48280274868011475, "learning_rate": 7.483824061952557e-06, "loss": 0.1156, "step": 3282 }, { "epoch": 1.0638366817887233, "grad_norm": 0.4804169237613678, "learning_rate": 7.4823057909599504e-06, "loss": 0.1135, "step": 3283 }, { "epoch": 1.0641607258587167, "grad_norm": 0.5231265425682068, "learning_rate": 7.480787216157146e-06, "loss": 0.1166, "step": 3284 }, { "epoch": 1.0644847699287103, "grad_norm": 0.4714162349700928, "learning_rate": 7.479268337730002e-06, "loss": 0.1128, "step": 3285 }, { "epoch": 1.0648088139987038, "grad_norm": 0.518753707408905, "learning_rate": 7.477749155864416e-06, "loss": 0.1171, "step": 3286 }, { "epoch": 1.0651328580686974, "grad_norm": 0.4315536320209503, "learning_rate": 7.476229670746318e-06, "loss": 0.1053, "step": 3287 }, { "epoch": 1.0654569021386908, "grad_norm": 0.4754064679145813, "learning_rate": 7.47470988256168e-06, "loss": 0.1206, "step": 3288 }, { "epoch": 1.0657809462086845, "grad_norm": 0.4845450818538666, "learning_rate": 7.47318979149651e-06, "loss": 0.1162, "step": 3289 }, { "epoch": 1.0661049902786779, "grad_norm": 0.46454256772994995, "learning_rate": 7.4716693977368516e-06, "loss": 0.1094, "step": 3290 }, { "epoch": 1.0664290343486713, "grad_norm": 0.519045352935791, "learning_rate": 7.470148701468786e-06, "loss": 0.1288, "step": 3291 }, { "epoch": 1.066753078418665, "grad_norm": 0.47300589084625244, "learning_rate": 7.468627702878434e-06, "loss": 0.1163, "step": 3292 }, { "epoch": 1.0670771224886584, "grad_norm": 0.4752744138240814, "learning_rate": 7.4671064021519494e-06, "loss": 0.117, "step": 3293 }, { "epoch": 1.067401166558652, "grad_norm": 0.48252978920936584, "learning_rate": 7.465584799475522e-06, "loss": 0.1169, "step": 3294 }, { "epoch": 1.0677252106286454, "grad_norm": 0.4439176917076111, "learning_rate": 7.4640628950353865e-06, "loss": 0.0988, "step": 3295 }, { "epoch": 1.068049254698639, "grad_norm": 0.510744035243988, "learning_rate": 7.462540689017806e-06, "loss": 0.1058, "step": 3296 }, { "epoch": 1.0683732987686325, "grad_norm": 0.4444325864315033, "learning_rate": 7.4610181816090845e-06, "loss": 0.1072, "step": 3297 }, { "epoch": 1.0686973428386262, "grad_norm": 0.4627124071121216, "learning_rate": 7.459495372995561e-06, "loss": 0.1071, "step": 3298 }, { "epoch": 1.0690213869086196, "grad_norm": 0.43973034620285034, "learning_rate": 7.4579722633636154e-06, "loss": 0.0997, "step": 3299 }, { "epoch": 1.069345430978613, "grad_norm": 0.4386976659297943, "learning_rate": 7.456448852899658e-06, "loss": 0.1097, "step": 3300 }, { "epoch": 1.0696694750486067, "grad_norm": 0.4828323721885681, "learning_rate": 7.454925141790141e-06, "loss": 0.115, "step": 3301 }, { "epoch": 1.0699935191186, "grad_norm": 0.5100405216217041, "learning_rate": 7.453401130221553e-06, "loss": 0.1211, "step": 3302 }, { "epoch": 1.0703175631885937, "grad_norm": 0.4685654938220978, "learning_rate": 7.451876818380417e-06, "loss": 0.1161, "step": 3303 }, { "epoch": 1.0706416072585871, "grad_norm": 0.45673686265945435, "learning_rate": 7.450352206453295e-06, "loss": 0.1011, "step": 3304 }, { "epoch": 1.0709656513285806, "grad_norm": 0.4703875184059143, "learning_rate": 7.448827294626785e-06, "loss": 0.1161, "step": 3305 }, { "epoch": 1.0712896953985742, "grad_norm": 0.4594164192676544, "learning_rate": 7.4473020830875195e-06, "loss": 0.1131, "step": 3306 }, { "epoch": 1.0716137394685676, "grad_norm": 0.4367749094963074, "learning_rate": 7.445776572022171e-06, "loss": 0.1005, "step": 3307 }, { "epoch": 1.0719377835385613, "grad_norm": 0.47489625215530396, "learning_rate": 7.444250761617447e-06, "loss": 0.1175, "step": 3308 }, { "epoch": 1.0722618276085547, "grad_norm": 0.4518619477748871, "learning_rate": 7.442724652060092e-06, "loss": 0.1073, "step": 3309 }, { "epoch": 1.0725858716785484, "grad_norm": 0.45332789421081543, "learning_rate": 7.441198243536886e-06, "loss": 0.1051, "step": 3310 }, { "epoch": 1.0729099157485418, "grad_norm": 0.4794146418571472, "learning_rate": 7.4396715362346495e-06, "loss": 0.1184, "step": 3311 }, { "epoch": 1.0732339598185354, "grad_norm": 0.4581579566001892, "learning_rate": 7.438144530340233e-06, "loss": 0.1097, "step": 3312 }, { "epoch": 1.0735580038885288, "grad_norm": 0.46173423528671265, "learning_rate": 7.43661722604053e-06, "loss": 0.1197, "step": 3313 }, { "epoch": 1.0738820479585223, "grad_norm": 0.437900185585022, "learning_rate": 7.435089623522466e-06, "loss": 0.1041, "step": 3314 }, { "epoch": 1.074206092028516, "grad_norm": 0.43803054094314575, "learning_rate": 7.433561722973006e-06, "loss": 0.1063, "step": 3315 }, { "epoch": 1.0745301360985093, "grad_norm": 0.49713608622550964, "learning_rate": 7.432033524579152e-06, "loss": 0.1148, "step": 3316 }, { "epoch": 1.074854180168503, "grad_norm": 0.44355711340904236, "learning_rate": 7.4305050285279364e-06, "loss": 0.1078, "step": 3317 }, { "epoch": 1.0751782242384964, "grad_norm": 0.528022825717926, "learning_rate": 7.4289762350064356e-06, "loss": 0.1287, "step": 3318 }, { "epoch": 1.07550226830849, "grad_norm": 0.5100262761116028, "learning_rate": 7.427447144201756e-06, "loss": 0.1249, "step": 3319 }, { "epoch": 1.0758263123784835, "grad_norm": 0.4281594157218933, "learning_rate": 7.4259177563010465e-06, "loss": 0.1026, "step": 3320 }, { "epoch": 1.076150356448477, "grad_norm": 0.5049169659614563, "learning_rate": 7.4243880714914875e-06, "loss": 0.1189, "step": 3321 }, { "epoch": 1.0764744005184705, "grad_norm": 0.4304545819759369, "learning_rate": 7.422858089960299e-06, "loss": 0.1029, "step": 3322 }, { "epoch": 1.076798444588464, "grad_norm": 0.5015721917152405, "learning_rate": 7.421327811894735e-06, "loss": 0.1188, "step": 3323 }, { "epoch": 1.0771224886584576, "grad_norm": 0.45259350538253784, "learning_rate": 7.419797237482089e-06, "loss": 0.1052, "step": 3324 }, { "epoch": 1.077446532728451, "grad_norm": 0.5052790641784668, "learning_rate": 7.418266366909685e-06, "loss": 0.127, "step": 3325 }, { "epoch": 1.0777705767984447, "grad_norm": 0.485783189535141, "learning_rate": 7.4167352003648885e-06, "loss": 0.1267, "step": 3326 }, { "epoch": 1.078094620868438, "grad_norm": 0.47802627086639404, "learning_rate": 7.4152037380351e-06, "loss": 0.1117, "step": 3327 }, { "epoch": 1.0784186649384315, "grad_norm": 0.47302913665771484, "learning_rate": 7.413671980107754e-06, "loss": 0.1192, "step": 3328 }, { "epoch": 1.0787427090084252, "grad_norm": 0.48858019709587097, "learning_rate": 7.412139926770323e-06, "loss": 0.1117, "step": 3329 }, { "epoch": 1.0790667530784186, "grad_norm": 0.44832563400268555, "learning_rate": 7.410607578210319e-06, "loss": 0.1048, "step": 3330 }, { "epoch": 1.0793907971484122, "grad_norm": 0.4587792754173279, "learning_rate": 7.409074934615282e-06, "loss": 0.1148, "step": 3331 }, { "epoch": 1.0797148412184057, "grad_norm": 0.4639880657196045, "learning_rate": 7.407541996172795e-06, "loss": 0.1077, "step": 3332 }, { "epoch": 1.0800388852883993, "grad_norm": 0.4755636155605316, "learning_rate": 7.406008763070474e-06, "loss": 0.1246, "step": 3333 }, { "epoch": 1.0803629293583927, "grad_norm": 0.48292648792266846, "learning_rate": 7.404475235495973e-06, "loss": 0.1127, "step": 3334 }, { "epoch": 1.0806869734283864, "grad_norm": 0.46788838505744934, "learning_rate": 7.402941413636979e-06, "loss": 0.1207, "step": 3335 }, { "epoch": 1.0810110174983798, "grad_norm": 0.425082802772522, "learning_rate": 7.40140729768122e-06, "loss": 0.0978, "step": 3336 }, { "epoch": 1.0813350615683732, "grad_norm": 0.4816244840621948, "learning_rate": 7.399872887816455e-06, "loss": 0.1168, "step": 3337 }, { "epoch": 1.0816591056383669, "grad_norm": 0.4528196454048157, "learning_rate": 7.3983381842304815e-06, "loss": 0.1148, "step": 3338 }, { "epoch": 1.0819831497083603, "grad_norm": 0.45505785942077637, "learning_rate": 7.39680318711113e-06, "loss": 0.1067, "step": 3339 }, { "epoch": 1.082307193778354, "grad_norm": 0.47752875089645386, "learning_rate": 7.395267896646272e-06, "loss": 0.1203, "step": 3340 }, { "epoch": 1.0826312378483474, "grad_norm": 0.43200385570526123, "learning_rate": 7.3937323130238125e-06, "loss": 0.1079, "step": 3341 }, { "epoch": 1.0829552819183408, "grad_norm": 0.4563518762588501, "learning_rate": 7.3921964364316915e-06, "loss": 0.1092, "step": 3342 }, { "epoch": 1.0832793259883344, "grad_norm": 0.5140635967254639, "learning_rate": 7.390660267057883e-06, "loss": 0.127, "step": 3343 }, { "epoch": 1.0836033700583279, "grad_norm": 0.48420506715774536, "learning_rate": 7.389123805090401e-06, "loss": 0.122, "step": 3344 }, { "epoch": 1.0839274141283215, "grad_norm": 0.46503114700317383, "learning_rate": 7.387587050717295e-06, "loss": 0.1143, "step": 3345 }, { "epoch": 1.084251458198315, "grad_norm": 0.444766640663147, "learning_rate": 7.386050004126647e-06, "loss": 0.1011, "step": 3346 }, { "epoch": 1.0845755022683086, "grad_norm": 0.49079981446266174, "learning_rate": 7.384512665506578e-06, "loss": 0.1177, "step": 3347 }, { "epoch": 1.084899546338302, "grad_norm": 0.4863133430480957, "learning_rate": 7.382975035045242e-06, "loss": 0.117, "step": 3348 }, { "epoch": 1.0852235904082956, "grad_norm": 0.4846367835998535, "learning_rate": 7.381437112930832e-06, "loss": 0.1211, "step": 3349 }, { "epoch": 1.085547634478289, "grad_norm": 0.49157649278640747, "learning_rate": 7.379898899351572e-06, "loss": 0.1145, "step": 3350 }, { "epoch": 1.0858716785482825, "grad_norm": 0.4918596148490906, "learning_rate": 7.378360394495726e-06, "loss": 0.128, "step": 3351 }, { "epoch": 1.0861957226182761, "grad_norm": 0.4473949372768402, "learning_rate": 7.376821598551592e-06, "loss": 0.1041, "step": 3352 }, { "epoch": 1.0865197666882696, "grad_norm": 0.47902122139930725, "learning_rate": 7.375282511707505e-06, "loss": 0.1171, "step": 3353 }, { "epoch": 1.0868438107582632, "grad_norm": 0.45830297470092773, "learning_rate": 7.373743134151832e-06, "loss": 0.1138, "step": 3354 }, { "epoch": 1.0871678548282566, "grad_norm": 0.48079654574394226, "learning_rate": 7.3722034660729795e-06, "loss": 0.1181, "step": 3355 }, { "epoch": 1.0874918988982503, "grad_norm": 0.4816071689128876, "learning_rate": 7.370663507659386e-06, "loss": 0.1164, "step": 3356 }, { "epoch": 1.0878159429682437, "grad_norm": 0.4607110023498535, "learning_rate": 7.36912325909953e-06, "loss": 0.1108, "step": 3357 }, { "epoch": 1.088139987038237, "grad_norm": 0.4666801393032074, "learning_rate": 7.367582720581923e-06, "loss": 0.1028, "step": 3358 }, { "epoch": 1.0884640311082308, "grad_norm": 0.4456480145454407, "learning_rate": 7.366041892295111e-06, "loss": 0.1066, "step": 3359 }, { "epoch": 1.0887880751782242, "grad_norm": 0.4845721423625946, "learning_rate": 7.364500774427675e-06, "loss": 0.116, "step": 3360 }, { "epoch": 1.0891121192482178, "grad_norm": 0.4765332043170929, "learning_rate": 7.3629593671682345e-06, "loss": 0.118, "step": 3361 }, { "epoch": 1.0894361633182112, "grad_norm": 0.4184534251689911, "learning_rate": 7.361417670705443e-06, "loss": 0.0986, "step": 3362 }, { "epoch": 1.089760207388205, "grad_norm": 0.492906391620636, "learning_rate": 7.3598756852279885e-06, "loss": 0.1205, "step": 3363 }, { "epoch": 1.0900842514581983, "grad_norm": 0.42441263794898987, "learning_rate": 7.358333410924596e-06, "loss": 0.0995, "step": 3364 }, { "epoch": 1.0904082955281917, "grad_norm": 0.44085580110549927, "learning_rate": 7.3567908479840235e-06, "loss": 0.1068, "step": 3365 }, { "epoch": 1.0907323395981854, "grad_norm": 0.4744189381599426, "learning_rate": 7.355247996595068e-06, "loss": 0.1091, "step": 3366 }, { "epoch": 1.0910563836681788, "grad_norm": 0.4566498398780823, "learning_rate": 7.353704856946559e-06, "loss": 0.1082, "step": 3367 }, { "epoch": 1.0913804277381725, "grad_norm": 0.4317505955696106, "learning_rate": 7.352161429227359e-06, "loss": 0.1037, "step": 3368 }, { "epoch": 1.0917044718081659, "grad_norm": 0.47662752866744995, "learning_rate": 7.350617713626372e-06, "loss": 0.1223, "step": 3369 }, { "epoch": 1.0920285158781595, "grad_norm": 0.49168187379837036, "learning_rate": 7.349073710332533e-06, "loss": 0.1173, "step": 3370 }, { "epoch": 1.092352559948153, "grad_norm": 0.47964388132095337, "learning_rate": 7.347529419534811e-06, "loss": 0.1139, "step": 3371 }, { "epoch": 1.0926766040181464, "grad_norm": 0.47219377756118774, "learning_rate": 7.3459848414222154e-06, "loss": 0.1184, "step": 3372 }, { "epoch": 1.09300064808814, "grad_norm": 0.4608166217803955, "learning_rate": 7.3444399761837855e-06, "loss": 0.1107, "step": 3373 }, { "epoch": 1.0933246921581334, "grad_norm": 0.49217841029167175, "learning_rate": 7.3428948240085985e-06, "loss": 0.1232, "step": 3374 }, { "epoch": 1.093648736228127, "grad_norm": 0.47193947434425354, "learning_rate": 7.3413493850857665e-06, "loss": 0.1133, "step": 3375 }, { "epoch": 1.0939727802981205, "grad_norm": 0.4649535119533539, "learning_rate": 7.3398036596044345e-06, "loss": 0.1183, "step": 3376 }, { "epoch": 1.0942968243681142, "grad_norm": 0.45862525701522827, "learning_rate": 7.3382576477537855e-06, "loss": 0.108, "step": 3377 }, { "epoch": 1.0946208684381076, "grad_norm": 0.4705563187599182, "learning_rate": 7.336711349723039e-06, "loss": 0.1168, "step": 3378 }, { "epoch": 1.094944912508101, "grad_norm": 0.44781872630119324, "learning_rate": 7.335164765701442e-06, "loss": 0.1085, "step": 3379 }, { "epoch": 1.0952689565780946, "grad_norm": 0.4419195353984833, "learning_rate": 7.3336178958782865e-06, "loss": 0.1008, "step": 3380 }, { "epoch": 1.095593000648088, "grad_norm": 0.4678310453891754, "learning_rate": 7.33207074044289e-06, "loss": 0.1114, "step": 3381 }, { "epoch": 1.0959170447180817, "grad_norm": 0.4491790235042572, "learning_rate": 7.33052329958461e-06, "loss": 0.1055, "step": 3382 }, { "epoch": 1.0962410887880751, "grad_norm": 0.5071040987968445, "learning_rate": 7.32897557349284e-06, "loss": 0.1171, "step": 3383 }, { "epoch": 1.0965651328580688, "grad_norm": 0.4346998333930969, "learning_rate": 7.327427562357008e-06, "loss": 0.0983, "step": 3384 }, { "epoch": 1.0968891769280622, "grad_norm": 0.4447813630104065, "learning_rate": 7.325879266366571e-06, "loss": 0.1013, "step": 3385 }, { "epoch": 1.0972132209980558, "grad_norm": 0.48892831802368164, "learning_rate": 7.324330685711029e-06, "loss": 0.1191, "step": 3386 }, { "epoch": 1.0975372650680493, "grad_norm": 0.4909844696521759, "learning_rate": 7.322781820579912e-06, "loss": 0.1222, "step": 3387 }, { "epoch": 1.0978613091380427, "grad_norm": 0.4649695158004761, "learning_rate": 7.321232671162787e-06, "loss": 0.1084, "step": 3388 }, { "epoch": 1.0981853532080363, "grad_norm": 0.4527525007724762, "learning_rate": 7.319683237649253e-06, "loss": 0.1119, "step": 3389 }, { "epoch": 1.0985093972780298, "grad_norm": 0.5357375144958496, "learning_rate": 7.3181335202289495e-06, "loss": 0.1129, "step": 3390 }, { "epoch": 1.0988334413480234, "grad_norm": 0.5059093236923218, "learning_rate": 7.3165835190915435e-06, "loss": 0.1173, "step": 3391 }, { "epoch": 1.0991574854180168, "grad_norm": 0.43152016401290894, "learning_rate": 7.315033234426741e-06, "loss": 0.11, "step": 3392 }, { "epoch": 1.0994815294880103, "grad_norm": 0.49721404910087585, "learning_rate": 7.3134826664242805e-06, "loss": 0.1113, "step": 3393 }, { "epoch": 1.099805573558004, "grad_norm": 0.4575223922729492, "learning_rate": 7.311931815273938e-06, "loss": 0.1135, "step": 3394 }, { "epoch": 1.1001296176279973, "grad_norm": 0.49471038579940796, "learning_rate": 7.310380681165523e-06, "loss": 0.124, "step": 3395 }, { "epoch": 1.100453661697991, "grad_norm": 0.4235450327396393, "learning_rate": 7.308829264288879e-06, "loss": 0.1042, "step": 3396 }, { "epoch": 1.1007777057679844, "grad_norm": 0.4568019211292267, "learning_rate": 7.307277564833886e-06, "loss": 0.108, "step": 3397 }, { "epoch": 1.101101749837978, "grad_norm": 0.4586898684501648, "learning_rate": 7.305725582990453e-06, "loss": 0.1065, "step": 3398 }, { "epoch": 1.1014257939079715, "grad_norm": 0.48752400279045105, "learning_rate": 7.30417331894853e-06, "loss": 0.1146, "step": 3399 }, { "epoch": 1.101749837977965, "grad_norm": 0.4247609078884125, "learning_rate": 7.3026207728980994e-06, "loss": 0.1105, "step": 3400 }, { "epoch": 1.1020738820479585, "grad_norm": 0.4526832401752472, "learning_rate": 7.301067945029178e-06, "loss": 0.116, "step": 3401 }, { "epoch": 1.102397926117952, "grad_norm": 0.480944961309433, "learning_rate": 7.299514835531815e-06, "loss": 0.1222, "step": 3402 }, { "epoch": 1.1027219701879456, "grad_norm": 0.4303266406059265, "learning_rate": 7.2979614445960975e-06, "loss": 0.1036, "step": 3403 }, { "epoch": 1.103046014257939, "grad_norm": 0.47771838307380676, "learning_rate": 7.296407772412146e-06, "loss": 0.1242, "step": 3404 }, { "epoch": 1.1033700583279327, "grad_norm": 0.47071340680122375, "learning_rate": 7.2948538191701136e-06, "loss": 0.1113, "step": 3405 }, { "epoch": 1.103694102397926, "grad_norm": 0.4870220720767975, "learning_rate": 7.293299585060188e-06, "loss": 0.1197, "step": 3406 }, { "epoch": 1.1040181464679197, "grad_norm": 0.4674378037452698, "learning_rate": 7.291745070272596e-06, "loss": 0.1096, "step": 3407 }, { "epoch": 1.1043421905379132, "grad_norm": 0.509381115436554, "learning_rate": 7.2901902749975915e-06, "loss": 0.1213, "step": 3408 }, { "epoch": 1.1046662346079066, "grad_norm": 0.48851171135902405, "learning_rate": 7.288635199425471e-06, "loss": 0.1002, "step": 3409 }, { "epoch": 1.1049902786779002, "grad_norm": 0.4779938757419586, "learning_rate": 7.287079843746555e-06, "loss": 0.1147, "step": 3410 }, { "epoch": 1.1053143227478937, "grad_norm": 0.47918787598609924, "learning_rate": 7.285524208151208e-06, "loss": 0.12, "step": 3411 }, { "epoch": 1.1056383668178873, "grad_norm": 0.49099570512771606, "learning_rate": 7.283968292829824e-06, "loss": 0.1121, "step": 3412 }, { "epoch": 1.1059624108878807, "grad_norm": 0.4889676868915558, "learning_rate": 7.282412097972831e-06, "loss": 0.1152, "step": 3413 }, { "epoch": 1.1062864549578744, "grad_norm": 0.459026038646698, "learning_rate": 7.280855623770692e-06, "loss": 0.1167, "step": 3414 }, { "epoch": 1.1066104990278678, "grad_norm": 0.479397177696228, "learning_rate": 7.279298870413906e-06, "loss": 0.1096, "step": 3415 }, { "epoch": 1.1069345430978612, "grad_norm": 0.503694474697113, "learning_rate": 7.2777418380930035e-06, "loss": 0.1159, "step": 3416 }, { "epoch": 1.1072585871678549, "grad_norm": 0.5161811113357544, "learning_rate": 7.276184526998548e-06, "loss": 0.1187, "step": 3417 }, { "epoch": 1.1075826312378483, "grad_norm": 0.46992769837379456, "learning_rate": 7.2746269373211445e-06, "loss": 0.1165, "step": 3418 }, { "epoch": 1.107906675307842, "grad_norm": 0.4351085126399994, "learning_rate": 7.2730690692514225e-06, "loss": 0.1007, "step": 3419 }, { "epoch": 1.1082307193778353, "grad_norm": 0.4792276918888092, "learning_rate": 7.271510922980052e-06, "loss": 0.1049, "step": 3420 }, { "epoch": 1.108554763447829, "grad_norm": 0.5022870898246765, "learning_rate": 7.269952498697734e-06, "loss": 0.1251, "step": 3421 }, { "epoch": 1.1088788075178224, "grad_norm": 0.48089098930358887, "learning_rate": 7.2683937965952055e-06, "loss": 0.1124, "step": 3422 }, { "epoch": 1.1092028515878158, "grad_norm": 0.49522295594215393, "learning_rate": 7.266834816863237e-06, "loss": 0.1177, "step": 3423 }, { "epoch": 1.1095268956578095, "grad_norm": 0.45647338032722473, "learning_rate": 7.26527555969263e-06, "loss": 0.105, "step": 3424 }, { "epoch": 1.109850939727803, "grad_norm": 0.4819112718105316, "learning_rate": 7.263716025274225e-06, "loss": 0.1091, "step": 3425 }, { "epoch": 1.1101749837977966, "grad_norm": 0.5295591354370117, "learning_rate": 7.262156213798892e-06, "loss": 0.1332, "step": 3426 }, { "epoch": 1.11049902786779, "grad_norm": 0.438809871673584, "learning_rate": 7.260596125457538e-06, "loss": 0.1014, "step": 3427 }, { "epoch": 1.1108230719377836, "grad_norm": 0.48159369826316833, "learning_rate": 7.259035760441103e-06, "loss": 0.1192, "step": 3428 }, { "epoch": 1.111147116007777, "grad_norm": 0.47701340913772583, "learning_rate": 7.25747511894056e-06, "loss": 0.113, "step": 3429 }, { "epoch": 1.1114711600777705, "grad_norm": 0.47457194328308105, "learning_rate": 7.255914201146917e-06, "loss": 0.1134, "step": 3430 }, { "epoch": 1.1117952041477641, "grad_norm": 0.5145271420478821, "learning_rate": 7.254353007251213e-06, "loss": 0.123, "step": 3431 }, { "epoch": 1.1121192482177575, "grad_norm": 0.41805732250213623, "learning_rate": 7.252791537444527e-06, "loss": 0.0951, "step": 3432 }, { "epoch": 1.1124432922877512, "grad_norm": 0.48133549094200134, "learning_rate": 7.251229791917964e-06, "loss": 0.109, "step": 3433 }, { "epoch": 1.1127673363577446, "grad_norm": 0.4440436065196991, "learning_rate": 7.249667770862668e-06, "loss": 0.109, "step": 3434 }, { "epoch": 1.1130913804277383, "grad_norm": 0.4566704332828522, "learning_rate": 7.248105474469816e-06, "loss": 0.1154, "step": 3435 }, { "epoch": 1.1134154244977317, "grad_norm": 0.4194703996181488, "learning_rate": 7.2465429029306164e-06, "loss": 0.1001, "step": 3436 }, { "epoch": 1.1137394685677253, "grad_norm": 0.49655959010124207, "learning_rate": 7.244980056436315e-06, "loss": 0.1145, "step": 3437 }, { "epoch": 1.1140635126377187, "grad_norm": 0.4804117977619171, "learning_rate": 7.243416935178187e-06, "loss": 0.1122, "step": 3438 }, { "epoch": 1.1143875567077122, "grad_norm": 0.4633403420448303, "learning_rate": 7.241853539347545e-06, "loss": 0.1121, "step": 3439 }, { "epoch": 1.1147116007777058, "grad_norm": 0.5017057061195374, "learning_rate": 7.2402898691357315e-06, "loss": 0.1262, "step": 3440 }, { "epoch": 1.1150356448476992, "grad_norm": 0.4831858277320862, "learning_rate": 7.238725924734125e-06, "loss": 0.1127, "step": 3441 }, { "epoch": 1.1153596889176929, "grad_norm": 0.47501975297927856, "learning_rate": 7.237161706334139e-06, "loss": 0.1098, "step": 3442 }, { "epoch": 1.1156837329876863, "grad_norm": 0.4766427278518677, "learning_rate": 7.235597214127218e-06, "loss": 0.1097, "step": 3443 }, { "epoch": 1.1160077770576797, "grad_norm": 0.44049620628356934, "learning_rate": 7.23403244830484e-06, "loss": 0.107, "step": 3444 }, { "epoch": 1.1163318211276734, "grad_norm": 0.47011467814445496, "learning_rate": 7.232467409058518e-06, "loss": 0.1056, "step": 3445 }, { "epoch": 1.1166558651976668, "grad_norm": 0.49452170729637146, "learning_rate": 7.2309020965797945e-06, "loss": 0.1176, "step": 3446 }, { "epoch": 1.1169799092676604, "grad_norm": 0.42975515127182007, "learning_rate": 7.229336511060253e-06, "loss": 0.1035, "step": 3447 }, { "epoch": 1.1173039533376539, "grad_norm": 0.4905330240726471, "learning_rate": 7.227770652691504e-06, "loss": 0.1291, "step": 3448 }, { "epoch": 1.1176279974076475, "grad_norm": 0.4783221483230591, "learning_rate": 7.226204521665195e-06, "loss": 0.1185, "step": 3449 }, { "epoch": 1.117952041477641, "grad_norm": 0.4664043188095093, "learning_rate": 7.224638118173e-06, "loss": 0.1117, "step": 3450 }, { "epoch": 1.1182760855476346, "grad_norm": 0.472312867641449, "learning_rate": 7.223071442406639e-06, "loss": 0.1147, "step": 3451 }, { "epoch": 1.118600129617628, "grad_norm": 0.46934542059898376, "learning_rate": 7.221504494557854e-06, "loss": 0.1078, "step": 3452 }, { "epoch": 1.1189241736876214, "grad_norm": 0.4561583399772644, "learning_rate": 7.219937274818424e-06, "loss": 0.1108, "step": 3453 }, { "epoch": 1.119248217757615, "grad_norm": 0.4605262875556946, "learning_rate": 7.218369783380163e-06, "loss": 0.1109, "step": 3454 }, { "epoch": 1.1195722618276085, "grad_norm": 0.490578293800354, "learning_rate": 7.216802020434915e-06, "loss": 0.1095, "step": 3455 }, { "epoch": 1.1198963058976021, "grad_norm": 0.5183651447296143, "learning_rate": 7.215233986174561e-06, "loss": 0.1224, "step": 3456 }, { "epoch": 1.1202203499675956, "grad_norm": 0.49729302525520325, "learning_rate": 7.213665680791012e-06, "loss": 0.1186, "step": 3457 }, { "epoch": 1.1205443940375892, "grad_norm": 0.460462749004364, "learning_rate": 7.212097104476213e-06, "loss": 0.1115, "step": 3458 }, { "epoch": 1.1208684381075826, "grad_norm": 0.495995432138443, "learning_rate": 7.210528257422144e-06, "loss": 0.119, "step": 3459 }, { "epoch": 1.121192482177576, "grad_norm": 0.4926115870475769, "learning_rate": 7.208959139820815e-06, "loss": 0.1193, "step": 3460 }, { "epoch": 1.1215165262475697, "grad_norm": 0.4380375146865845, "learning_rate": 7.207389751864271e-06, "loss": 0.1044, "step": 3461 }, { "epoch": 1.1218405703175631, "grad_norm": 0.45447611808776855, "learning_rate": 7.205820093744591e-06, "loss": 0.1045, "step": 3462 }, { "epoch": 1.1221646143875568, "grad_norm": 0.4510899484157562, "learning_rate": 7.204250165653888e-06, "loss": 0.1016, "step": 3463 }, { "epoch": 1.1224886584575502, "grad_norm": 0.509792149066925, "learning_rate": 7.2026799677843e-06, "loss": 0.1183, "step": 3464 }, { "epoch": 1.1228127025275438, "grad_norm": 0.4836459755897522, "learning_rate": 7.20110950032801e-06, "loss": 0.1133, "step": 3465 }, { "epoch": 1.1231367465975373, "grad_norm": 0.49302640557289124, "learning_rate": 7.1995387634772255e-06, "loss": 0.1183, "step": 3466 }, { "epoch": 1.1234607906675307, "grad_norm": 0.47388386726379395, "learning_rate": 7.197967757424188e-06, "loss": 0.1119, "step": 3467 }, { "epoch": 1.1237848347375243, "grad_norm": 0.49097520112991333, "learning_rate": 7.196396482361176e-06, "loss": 0.1159, "step": 3468 }, { "epoch": 1.1241088788075178, "grad_norm": 0.4219222664833069, "learning_rate": 7.194824938480496e-06, "loss": 0.0995, "step": 3469 }, { "epoch": 1.1244329228775114, "grad_norm": 0.47584277391433716, "learning_rate": 7.193253125974493e-06, "loss": 0.1108, "step": 3470 }, { "epoch": 1.1247569669475048, "grad_norm": 0.4976772665977478, "learning_rate": 7.191681045035538e-06, "loss": 0.1159, "step": 3471 }, { "epoch": 1.1250810110174985, "grad_norm": 0.4697946310043335, "learning_rate": 7.190108695856041e-06, "loss": 0.1168, "step": 3472 }, { "epoch": 1.125405055087492, "grad_norm": 0.44162654876708984, "learning_rate": 7.1885360786284405e-06, "loss": 0.1047, "step": 3473 }, { "epoch": 1.1257290991574855, "grad_norm": 0.4678237736225128, "learning_rate": 7.186963193545212e-06, "loss": 0.1118, "step": 3474 }, { "epoch": 1.126053143227479, "grad_norm": 0.44769546389579773, "learning_rate": 7.185390040798861e-06, "loss": 0.1098, "step": 3475 }, { "epoch": 1.1263771872974724, "grad_norm": 0.46062126755714417, "learning_rate": 7.183816620581923e-06, "loss": 0.1003, "step": 3476 }, { "epoch": 1.126701231367466, "grad_norm": 0.4380654990673065, "learning_rate": 7.182242933086974e-06, "loss": 0.1113, "step": 3477 }, { "epoch": 1.1270252754374595, "grad_norm": 0.4305112659931183, "learning_rate": 7.180668978506613e-06, "loss": 0.1088, "step": 3478 }, { "epoch": 1.127349319507453, "grad_norm": 0.4602273106575012, "learning_rate": 7.1790947570334815e-06, "loss": 0.1047, "step": 3479 }, { "epoch": 1.1276733635774465, "grad_norm": 0.4375208914279938, "learning_rate": 7.1775202688602455e-06, "loss": 0.1094, "step": 3480 }, { "epoch": 1.12799740764744, "grad_norm": 0.48319289088249207, "learning_rate": 7.17594551417961e-06, "loss": 0.1177, "step": 3481 }, { "epoch": 1.1283214517174336, "grad_norm": 0.49129560589790344, "learning_rate": 7.174370493184308e-06, "loss": 0.1195, "step": 3482 }, { "epoch": 1.128645495787427, "grad_norm": 0.767615020275116, "learning_rate": 7.172795206067107e-06, "loss": 0.1144, "step": 3483 }, { "epoch": 1.1289695398574207, "grad_norm": 0.4652465879917145, "learning_rate": 7.171219653020807e-06, "loss": 0.1144, "step": 3484 }, { "epoch": 1.129293583927414, "grad_norm": 0.45197775959968567, "learning_rate": 7.16964383423824e-06, "loss": 0.1013, "step": 3485 }, { "epoch": 1.1296176279974077, "grad_norm": 0.45165348052978516, "learning_rate": 7.168067749912273e-06, "loss": 0.1131, "step": 3486 }, { "epoch": 1.1299416720674011, "grad_norm": 0.47169649600982666, "learning_rate": 7.1664914002358e-06, "loss": 0.1198, "step": 3487 }, { "epoch": 1.1302657161373948, "grad_norm": 0.44688108563423157, "learning_rate": 7.164914785401756e-06, "loss": 0.107, "step": 3488 }, { "epoch": 1.1305897602073882, "grad_norm": 0.4839881360530853, "learning_rate": 7.163337905603097e-06, "loss": 0.1247, "step": 3489 }, { "epoch": 1.1309138042773816, "grad_norm": 0.48105567693710327, "learning_rate": 7.161760761032822e-06, "loss": 0.1136, "step": 3490 }, { "epoch": 1.1312378483473753, "grad_norm": 0.47501346468925476, "learning_rate": 7.160183351883957e-06, "loss": 0.1111, "step": 3491 }, { "epoch": 1.1315618924173687, "grad_norm": 0.513714075088501, "learning_rate": 7.158605678349562e-06, "loss": 0.1227, "step": 3492 }, { "epoch": 1.1318859364873624, "grad_norm": 0.4716878831386566, "learning_rate": 7.15702774062273e-06, "loss": 0.1235, "step": 3493 }, { "epoch": 1.1322099805573558, "grad_norm": 0.47698792815208435, "learning_rate": 7.155449538896584e-06, "loss": 0.1162, "step": 3494 }, { "epoch": 1.1325340246273492, "grad_norm": 0.5115220546722412, "learning_rate": 7.15387107336428e-06, "loss": 0.1233, "step": 3495 }, { "epoch": 1.1328580686973428, "grad_norm": 0.45470064878463745, "learning_rate": 7.1522923442190074e-06, "loss": 0.1107, "step": 3496 }, { "epoch": 1.1331821127673363, "grad_norm": 0.4847749173641205, "learning_rate": 7.15071335165399e-06, "loss": 0.1266, "step": 3497 }, { "epoch": 1.13350615683733, "grad_norm": 0.47126853466033936, "learning_rate": 7.149134095862476e-06, "loss": 0.1163, "step": 3498 }, { "epoch": 1.1338302009073233, "grad_norm": 0.4674606919288635, "learning_rate": 7.1475545770377555e-06, "loss": 0.1203, "step": 3499 }, { "epoch": 1.134154244977317, "grad_norm": 0.4372369349002838, "learning_rate": 7.145974795373145e-06, "loss": 0.1114, "step": 3500 }, { "epoch": 1.1344782890473104, "grad_norm": 0.4782916009426117, "learning_rate": 7.1443947510619925e-06, "loss": 0.1193, "step": 3501 }, { "epoch": 1.134802333117304, "grad_norm": 0.48257872462272644, "learning_rate": 7.142814444297683e-06, "loss": 0.1158, "step": 3502 }, { "epoch": 1.1351263771872975, "grad_norm": 0.4267340302467346, "learning_rate": 7.1412338752736286e-06, "loss": 0.1069, "step": 3503 }, { "epoch": 1.135450421257291, "grad_norm": 0.4908585548400879, "learning_rate": 7.1396530441832775e-06, "loss": 0.1215, "step": 3504 }, { "epoch": 1.1357744653272845, "grad_norm": 0.48388153314590454, "learning_rate": 7.1380719512201065e-06, "loss": 0.1204, "step": 3505 }, { "epoch": 1.136098509397278, "grad_norm": 0.4557008445262909, "learning_rate": 7.136490596577629e-06, "loss": 0.1185, "step": 3506 }, { "epoch": 1.1364225534672716, "grad_norm": 0.4656080901622772, "learning_rate": 7.134908980449383e-06, "loss": 0.1215, "step": 3507 }, { "epoch": 1.136746597537265, "grad_norm": 0.4793829619884491, "learning_rate": 7.133327103028946e-06, "loss": 0.1217, "step": 3508 }, { "epoch": 1.1370706416072587, "grad_norm": 0.44613632559776306, "learning_rate": 7.131744964509925e-06, "loss": 0.1105, "step": 3509 }, { "epoch": 1.137394685677252, "grad_norm": 0.43979090452194214, "learning_rate": 7.130162565085955e-06, "loss": 0.1045, "step": 3510 }, { "epoch": 1.1377187297472457, "grad_norm": 0.4457186162471771, "learning_rate": 7.1285799049507095e-06, "loss": 0.106, "step": 3511 }, { "epoch": 1.1380427738172392, "grad_norm": 0.45643284916877747, "learning_rate": 7.126996984297891e-06, "loss": 0.1118, "step": 3512 }, { "epoch": 1.1383668178872326, "grad_norm": 0.4419260323047638, "learning_rate": 7.125413803321232e-06, "loss": 0.107, "step": 3513 }, { "epoch": 1.1386908619572262, "grad_norm": 0.4509630799293518, "learning_rate": 7.123830362214498e-06, "loss": 0.1141, "step": 3514 }, { "epoch": 1.1390149060272197, "grad_norm": 0.44466331601142883, "learning_rate": 7.122246661171488e-06, "loss": 0.0995, "step": 3515 }, { "epoch": 1.1393389500972133, "grad_norm": 0.4958738088607788, "learning_rate": 7.120662700386032e-06, "loss": 0.1174, "step": 3516 }, { "epoch": 1.1396629941672067, "grad_norm": 0.4959253966808319, "learning_rate": 7.119078480051993e-06, "loss": 0.1139, "step": 3517 }, { "epoch": 1.1399870382372002, "grad_norm": 0.4844566583633423, "learning_rate": 7.11749400036326e-06, "loss": 0.1157, "step": 3518 }, { "epoch": 1.1403110823071938, "grad_norm": 0.46060386300086975, "learning_rate": 7.1159092615137614e-06, "loss": 0.1074, "step": 3519 }, { "epoch": 1.1406351263771872, "grad_norm": 0.49883654713630676, "learning_rate": 7.114324263697452e-06, "loss": 0.1204, "step": 3520 }, { "epoch": 1.1409591704471809, "grad_norm": 0.47432583570480347, "learning_rate": 7.112739007108321e-06, "loss": 0.1183, "step": 3521 }, { "epoch": 1.1412832145171743, "grad_norm": 0.4798056483268738, "learning_rate": 7.111153491940389e-06, "loss": 0.1055, "step": 3522 }, { "epoch": 1.141607258587168, "grad_norm": 0.4634266793727875, "learning_rate": 7.109567718387706e-06, "loss": 0.111, "step": 3523 }, { "epoch": 1.1419313026571614, "grad_norm": 0.4673268795013428, "learning_rate": 7.1079816866443585e-06, "loss": 0.113, "step": 3524 }, { "epoch": 1.142255346727155, "grad_norm": 0.4723830223083496, "learning_rate": 7.106395396904458e-06, "loss": 0.1117, "step": 3525 }, { "epoch": 1.1425793907971484, "grad_norm": 0.45182153582572937, "learning_rate": 7.104808849362153e-06, "loss": 0.11, "step": 3526 }, { "epoch": 1.1429034348671419, "grad_norm": 0.5160151124000549, "learning_rate": 7.103222044211619e-06, "loss": 0.1198, "step": 3527 }, { "epoch": 1.1432274789371355, "grad_norm": 0.4806891679763794, "learning_rate": 7.10163498164707e-06, "loss": 0.1147, "step": 3528 }, { "epoch": 1.143551523007129, "grad_norm": 0.464277058839798, "learning_rate": 7.1000476618627435e-06, "loss": 0.1066, "step": 3529 }, { "epoch": 1.1438755670771226, "grad_norm": 0.4657345116138458, "learning_rate": 7.098460085052915e-06, "loss": 0.1089, "step": 3530 }, { "epoch": 1.144199611147116, "grad_norm": 0.4736658036708832, "learning_rate": 7.096872251411885e-06, "loss": 0.1138, "step": 3531 }, { "epoch": 1.1445236552171094, "grad_norm": 0.5042957663536072, "learning_rate": 7.0952841611339906e-06, "loss": 0.1247, "step": 3532 }, { "epoch": 1.144847699287103, "grad_norm": 0.47397053241729736, "learning_rate": 7.093695814413599e-06, "loss": 0.1165, "step": 3533 }, { "epoch": 1.1451717433570965, "grad_norm": 0.4877525568008423, "learning_rate": 7.0921072114451084e-06, "loss": 0.1149, "step": 3534 }, { "epoch": 1.1454957874270901, "grad_norm": 0.4706099331378937, "learning_rate": 7.090518352422948e-06, "loss": 0.109, "step": 3535 }, { "epoch": 1.1458198314970836, "grad_norm": 0.4718784689903259, "learning_rate": 7.088929237541579e-06, "loss": 0.1044, "step": 3536 }, { "epoch": 1.1461438755670772, "grad_norm": 0.46416759490966797, "learning_rate": 7.087339866995495e-06, "loss": 0.117, "step": 3537 }, { "epoch": 1.1464679196370706, "grad_norm": 0.49240201711654663, "learning_rate": 7.0857502409792166e-06, "loss": 0.1254, "step": 3538 }, { "epoch": 1.1467919637070643, "grad_norm": 0.45724916458129883, "learning_rate": 7.084160359687302e-06, "loss": 0.1103, "step": 3539 }, { "epoch": 1.1471160077770577, "grad_norm": 0.4559352993965149, "learning_rate": 7.082570223314335e-06, "loss": 0.1116, "step": 3540 }, { "epoch": 1.1474400518470511, "grad_norm": 0.4867151081562042, "learning_rate": 7.080979832054933e-06, "loss": 0.1095, "step": 3541 }, { "epoch": 1.1477640959170448, "grad_norm": 0.4994567632675171, "learning_rate": 7.0793891861037445e-06, "loss": 0.1159, "step": 3542 }, { "epoch": 1.1480881399870382, "grad_norm": 0.4985491931438446, "learning_rate": 7.077798285655452e-06, "loss": 0.125, "step": 3543 }, { "epoch": 1.1484121840570318, "grad_norm": 0.46385741233825684, "learning_rate": 7.076207130904762e-06, "loss": 0.105, "step": 3544 }, { "epoch": 1.1487362281270252, "grad_norm": 0.46539029479026794, "learning_rate": 7.074615722046418e-06, "loss": 0.1147, "step": 3545 }, { "epoch": 1.1490602721970187, "grad_norm": 0.45837581157684326, "learning_rate": 7.073024059275194e-06, "loss": 0.1118, "step": 3546 }, { "epoch": 1.1493843162670123, "grad_norm": 0.42826589941978455, "learning_rate": 7.071432142785895e-06, "loss": 0.1077, "step": 3547 }, { "epoch": 1.1497083603370057, "grad_norm": 0.49138933420181274, "learning_rate": 7.069839972773352e-06, "loss": 0.1179, "step": 3548 }, { "epoch": 1.1500324044069994, "grad_norm": 0.49313196539878845, "learning_rate": 7.0682475494324365e-06, "loss": 0.1172, "step": 3549 }, { "epoch": 1.1503564484769928, "grad_norm": 0.45699644088745117, "learning_rate": 7.066654872958042e-06, "loss": 0.1172, "step": 3550 }, { "epoch": 1.1506804925469865, "grad_norm": 0.46840643882751465, "learning_rate": 7.0650619435451e-06, "loss": 0.116, "step": 3551 }, { "epoch": 1.1510045366169799, "grad_norm": 0.5004029870033264, "learning_rate": 7.063468761388564e-06, "loss": 0.1254, "step": 3552 }, { "epoch": 1.1513285806869735, "grad_norm": 0.528785765171051, "learning_rate": 7.061875326683429e-06, "loss": 0.1204, "step": 3553 }, { "epoch": 1.151652624756967, "grad_norm": 0.5159833431243896, "learning_rate": 7.060281639624714e-06, "loss": 0.1272, "step": 3554 }, { "epoch": 1.1519766688269604, "grad_norm": 0.4606555998325348, "learning_rate": 7.0586877004074725e-06, "loss": 0.1124, "step": 3555 }, { "epoch": 1.152300712896954, "grad_norm": 0.45181626081466675, "learning_rate": 7.057093509226785e-06, "loss": 0.1106, "step": 3556 }, { "epoch": 1.1526247569669474, "grad_norm": 0.4718472957611084, "learning_rate": 7.055499066277767e-06, "loss": 0.1121, "step": 3557 }, { "epoch": 1.152948801036941, "grad_norm": 0.45812535285949707, "learning_rate": 7.053904371755562e-06, "loss": 0.111, "step": 3558 }, { "epoch": 1.1532728451069345, "grad_norm": 0.5077002048492432, "learning_rate": 7.052309425855344e-06, "loss": 0.1267, "step": 3559 }, { "epoch": 1.1535968891769282, "grad_norm": 0.4858371317386627, "learning_rate": 7.050714228772322e-06, "loss": 0.1177, "step": 3560 }, { "epoch": 1.1539209332469216, "grad_norm": 0.45799070596694946, "learning_rate": 7.04911878070173e-06, "loss": 0.1075, "step": 3561 }, { "epoch": 1.1542449773169152, "grad_norm": 0.48219260573387146, "learning_rate": 7.047523081838836e-06, "loss": 0.1229, "step": 3562 }, { "epoch": 1.1545690213869086, "grad_norm": 0.43742167949676514, "learning_rate": 7.045927132378939e-06, "loss": 0.1022, "step": 3563 }, { "epoch": 1.154893065456902, "grad_norm": 0.4254918396472931, "learning_rate": 7.044330932517367e-06, "loss": 0.0991, "step": 3564 }, { "epoch": 1.1552171095268957, "grad_norm": 0.45003291964530945, "learning_rate": 7.042734482449478e-06, "loss": 0.11, "step": 3565 }, { "epoch": 1.1555411535968891, "grad_norm": 0.43879011273384094, "learning_rate": 7.041137782370665e-06, "loss": 0.1061, "step": 3566 }, { "epoch": 1.1558651976668828, "grad_norm": 0.42425820231437683, "learning_rate": 7.0395408324763485e-06, "loss": 0.0993, "step": 3567 }, { "epoch": 1.1561892417368762, "grad_norm": 0.4832261800765991, "learning_rate": 7.037943632961977e-06, "loss": 0.1188, "step": 3568 }, { "epoch": 1.1565132858068696, "grad_norm": 0.5043490529060364, "learning_rate": 7.036346184023033e-06, "loss": 0.1264, "step": 3569 }, { "epoch": 1.1568373298768633, "grad_norm": 0.4804173409938812, "learning_rate": 7.034748485855028e-06, "loss": 0.1139, "step": 3570 }, { "epoch": 1.1571613739468567, "grad_norm": 0.4613834023475647, "learning_rate": 7.03315053865351e-06, "loss": 0.1094, "step": 3571 }, { "epoch": 1.1574854180168503, "grad_norm": 0.46034568548202515, "learning_rate": 7.031552342614046e-06, "loss": 0.1118, "step": 3572 }, { "epoch": 1.1578094620868438, "grad_norm": 0.41124221682548523, "learning_rate": 7.029953897932243e-06, "loss": 0.0983, "step": 3573 }, { "epoch": 1.1581335061568374, "grad_norm": 0.43085286021232605, "learning_rate": 7.028355204803735e-06, "loss": 0.1025, "step": 3574 }, { "epoch": 1.1584575502268308, "grad_norm": 0.4392501413822174, "learning_rate": 7.026756263424184e-06, "loss": 0.1021, "step": 3575 }, { "epoch": 1.1587815942968245, "grad_norm": 0.4602506756782532, "learning_rate": 7.0251570739892884e-06, "loss": 0.1139, "step": 3576 }, { "epoch": 1.159105638366818, "grad_norm": 0.5223574638366699, "learning_rate": 7.023557636694771e-06, "loss": 0.1237, "step": 3577 }, { "epoch": 1.1594296824368113, "grad_norm": 0.42616182565689087, "learning_rate": 7.021957951736389e-06, "loss": 0.1033, "step": 3578 }, { "epoch": 1.159753726506805, "grad_norm": 0.4668428599834442, "learning_rate": 7.0203580193099285e-06, "loss": 0.1081, "step": 3579 }, { "epoch": 1.1600777705767984, "grad_norm": 0.4539085328578949, "learning_rate": 7.018757839611204e-06, "loss": 0.1078, "step": 3580 }, { "epoch": 1.160401814646792, "grad_norm": 0.46340179443359375, "learning_rate": 7.0171574128360635e-06, "loss": 0.1133, "step": 3581 }, { "epoch": 1.1607258587167855, "grad_norm": 0.4713670611381531, "learning_rate": 7.015556739180383e-06, "loss": 0.1156, "step": 3582 }, { "epoch": 1.1610499027867789, "grad_norm": 0.4762658476829529, "learning_rate": 7.01395581884007e-06, "loss": 0.1081, "step": 3583 }, { "epoch": 1.1613739468567725, "grad_norm": 0.4581030011177063, "learning_rate": 7.012354652011062e-06, "loss": 0.1085, "step": 3584 }, { "epoch": 1.161697990926766, "grad_norm": 0.4777476489543915, "learning_rate": 7.010753238889325e-06, "loss": 0.1276, "step": 3585 }, { "epoch": 1.1620220349967596, "grad_norm": 0.4817984700202942, "learning_rate": 7.009151579670856e-06, "loss": 0.1115, "step": 3586 }, { "epoch": 1.162346079066753, "grad_norm": 0.5016462206840515, "learning_rate": 7.007549674551686e-06, "loss": 0.1153, "step": 3587 }, { "epoch": 1.1626701231367467, "grad_norm": 0.462336927652359, "learning_rate": 7.005947523727869e-06, "loss": 0.111, "step": 3588 }, { "epoch": 1.16299416720674, "grad_norm": 0.45126065611839294, "learning_rate": 7.004345127395493e-06, "loss": 0.098, "step": 3589 }, { "epoch": 1.1633182112767337, "grad_norm": 0.4864955246448517, "learning_rate": 7.0027424857506784e-06, "loss": 0.1118, "step": 3590 }, { "epoch": 1.1636422553467272, "grad_norm": 0.5310540795326233, "learning_rate": 7.001139598989572e-06, "loss": 0.124, "step": 3591 }, { "epoch": 1.1639662994167206, "grad_norm": 0.4483162462711334, "learning_rate": 6.999536467308351e-06, "loss": 0.1049, "step": 3592 }, { "epoch": 1.1642903434867142, "grad_norm": 0.4710056483745575, "learning_rate": 6.997933090903224e-06, "loss": 0.1131, "step": 3593 }, { "epoch": 1.1646143875567077, "grad_norm": 0.4988642632961273, "learning_rate": 6.996329469970427e-06, "loss": 0.1224, "step": 3594 }, { "epoch": 1.1649384316267013, "grad_norm": 0.4584542512893677, "learning_rate": 6.994725604706229e-06, "loss": 0.1102, "step": 3595 }, { "epoch": 1.1652624756966947, "grad_norm": 0.4387478828430176, "learning_rate": 6.993121495306928e-06, "loss": 0.0976, "step": 3596 }, { "epoch": 1.1655865197666881, "grad_norm": 0.4995095431804657, "learning_rate": 6.991517141968851e-06, "loss": 0.1174, "step": 3597 }, { "epoch": 1.1659105638366818, "grad_norm": 0.45613548159599304, "learning_rate": 6.989912544888354e-06, "loss": 0.107, "step": 3598 }, { "epoch": 1.1662346079066752, "grad_norm": 0.46213752031326294, "learning_rate": 6.988307704261826e-06, "loss": 0.11, "step": 3599 }, { "epoch": 1.1665586519766689, "grad_norm": 0.4745365381240845, "learning_rate": 6.986702620285683e-06, "loss": 0.1132, "step": 3600 }, { "epoch": 1.1668826960466623, "grad_norm": 0.42137643694877625, "learning_rate": 6.985097293156373e-06, "loss": 0.0969, "step": 3601 }, { "epoch": 1.167206740116656, "grad_norm": 0.49307501316070557, "learning_rate": 6.9834917230703705e-06, "loss": 0.1141, "step": 3602 }, { "epoch": 1.1675307841866494, "grad_norm": 0.4686894714832306, "learning_rate": 6.981885910224184e-06, "loss": 0.1186, "step": 3603 }, { "epoch": 1.167854828256643, "grad_norm": 0.4602605104446411, "learning_rate": 6.980279854814348e-06, "loss": 0.1129, "step": 3604 }, { "epoch": 1.1681788723266364, "grad_norm": 0.547022819519043, "learning_rate": 6.978673557037427e-06, "loss": 0.1353, "step": 3605 }, { "epoch": 1.1685029163966298, "grad_norm": 0.4729814827442169, "learning_rate": 6.977067017090019e-06, "loss": 0.1147, "step": 3606 }, { "epoch": 1.1688269604666235, "grad_norm": 0.48199722170829773, "learning_rate": 6.975460235168747e-06, "loss": 0.1119, "step": 3607 }, { "epoch": 1.169151004536617, "grad_norm": 0.48165759444236755, "learning_rate": 6.973853211470266e-06, "loss": 0.111, "step": 3608 }, { "epoch": 1.1694750486066106, "grad_norm": 0.5283550024032593, "learning_rate": 6.972245946191262e-06, "loss": 0.1238, "step": 3609 }, { "epoch": 1.169799092676604, "grad_norm": 0.4986545145511627, "learning_rate": 6.970638439528445e-06, "loss": 0.1251, "step": 3610 }, { "epoch": 1.1701231367465976, "grad_norm": 0.4904198944568634, "learning_rate": 6.96903069167856e-06, "loss": 0.1149, "step": 3611 }, { "epoch": 1.170447180816591, "grad_norm": 0.49144813418388367, "learning_rate": 6.967422702838381e-06, "loss": 0.1258, "step": 3612 }, { "epoch": 1.1707712248865847, "grad_norm": 0.5225583910942078, "learning_rate": 6.965814473204708e-06, "loss": 0.1329, "step": 3613 }, { "epoch": 1.1710952689565781, "grad_norm": 0.4927222430706024, "learning_rate": 6.964206002974377e-06, "loss": 0.1173, "step": 3614 }, { "epoch": 1.1714193130265715, "grad_norm": 0.48234739899635315, "learning_rate": 6.962597292344244e-06, "loss": 0.1142, "step": 3615 }, { "epoch": 1.1717433570965652, "grad_norm": 0.48802363872528076, "learning_rate": 6.960988341511204e-06, "loss": 0.1123, "step": 3616 }, { "epoch": 1.1720674011665586, "grad_norm": 0.45166656374931335, "learning_rate": 6.959379150672172e-06, "loss": 0.1048, "step": 3617 }, { "epoch": 1.1723914452365523, "grad_norm": 0.4444221258163452, "learning_rate": 6.9577697200241014e-06, "loss": 0.1023, "step": 3618 }, { "epoch": 1.1727154893065457, "grad_norm": 0.49823105335235596, "learning_rate": 6.956160049763969e-06, "loss": 0.1055, "step": 3619 }, { "epoch": 1.173039533376539, "grad_norm": 0.4852776825428009, "learning_rate": 6.9545501400887846e-06, "loss": 0.115, "step": 3620 }, { "epoch": 1.1733635774465327, "grad_norm": 0.48619014024734497, "learning_rate": 6.952939991195584e-06, "loss": 0.1169, "step": 3621 }, { "epoch": 1.1736876215165262, "grad_norm": 0.4564978778362274, "learning_rate": 6.951329603281435e-06, "loss": 0.104, "step": 3622 }, { "epoch": 1.1740116655865198, "grad_norm": 0.47127726674079895, "learning_rate": 6.9497189765434326e-06, "loss": 0.109, "step": 3623 }, { "epoch": 1.1743357096565132, "grad_norm": 0.487866073846817, "learning_rate": 6.948108111178702e-06, "loss": 0.1156, "step": 3624 }, { "epoch": 1.1746597537265069, "grad_norm": 0.4357970952987671, "learning_rate": 6.946497007384398e-06, "loss": 0.1012, "step": 3625 }, { "epoch": 1.1749837977965003, "grad_norm": 0.5288655757904053, "learning_rate": 6.944885665357704e-06, "loss": 0.1231, "step": 3626 }, { "epoch": 1.175307841866494, "grad_norm": 0.49479320645332336, "learning_rate": 6.943274085295832e-06, "loss": 0.118, "step": 3627 }, { "epoch": 1.1756318859364874, "grad_norm": 0.52422696352005, "learning_rate": 6.941662267396026e-06, "loss": 0.126, "step": 3628 }, { "epoch": 1.1759559300064808, "grad_norm": 0.47797513008117676, "learning_rate": 6.940050211855554e-06, "loss": 0.1113, "step": 3629 }, { "epoch": 1.1762799740764744, "grad_norm": 0.4675103425979614, "learning_rate": 6.9384379188717155e-06, "loss": 0.1052, "step": 3630 }, { "epoch": 1.1766040181464679, "grad_norm": 0.4948538839817047, "learning_rate": 6.936825388641842e-06, "loss": 0.1209, "step": 3631 }, { "epoch": 1.1769280622164615, "grad_norm": 0.4761638045310974, "learning_rate": 6.935212621363292e-06, "loss": 0.1129, "step": 3632 }, { "epoch": 1.177252106286455, "grad_norm": 0.4795760214328766, "learning_rate": 6.9335996172334505e-06, "loss": 0.1083, "step": 3633 }, { "epoch": 1.1775761503564484, "grad_norm": 0.45007088780403137, "learning_rate": 6.931986376449736e-06, "loss": 0.1116, "step": 3634 }, { "epoch": 1.177900194426442, "grad_norm": 0.44401949644088745, "learning_rate": 6.9303728992095905e-06, "loss": 0.1059, "step": 3635 }, { "epoch": 1.1782242384964354, "grad_norm": 0.4538906216621399, "learning_rate": 6.928759185710492e-06, "loss": 0.1122, "step": 3636 }, { "epoch": 1.178548282566429, "grad_norm": 0.4766751229763031, "learning_rate": 6.9271452361499396e-06, "loss": 0.1173, "step": 3637 }, { "epoch": 1.1788723266364225, "grad_norm": 0.4567437469959259, "learning_rate": 6.925531050725465e-06, "loss": 0.114, "step": 3638 }, { "epoch": 1.1791963707064161, "grad_norm": 0.48373913764953613, "learning_rate": 6.923916629634632e-06, "loss": 0.1203, "step": 3639 }, { "epoch": 1.1795204147764096, "grad_norm": 0.4734843671321869, "learning_rate": 6.9223019730750285e-06, "loss": 0.1188, "step": 3640 }, { "epoch": 1.1798444588464032, "grad_norm": 0.4800986647605896, "learning_rate": 6.920687081244271e-06, "loss": 0.1188, "step": 3641 }, { "epoch": 1.1801685029163966, "grad_norm": 0.4790305197238922, "learning_rate": 6.919071954340011e-06, "loss": 0.1167, "step": 3642 }, { "epoch": 1.18049254698639, "grad_norm": 0.48875489830970764, "learning_rate": 6.9174565925599205e-06, "loss": 0.1199, "step": 3643 }, { "epoch": 1.1808165910563837, "grad_norm": 0.44894662499427795, "learning_rate": 6.915840996101705e-06, "loss": 0.1008, "step": 3644 }, { "epoch": 1.1811406351263771, "grad_norm": 0.45716553926467896, "learning_rate": 6.9142251651631e-06, "loss": 0.1105, "step": 3645 }, { "epoch": 1.1814646791963708, "grad_norm": 0.47267618775367737, "learning_rate": 6.912609099941865e-06, "loss": 0.1153, "step": 3646 }, { "epoch": 1.1817887232663642, "grad_norm": 0.5003800988197327, "learning_rate": 6.910992800635792e-06, "loss": 0.1265, "step": 3647 }, { "epoch": 1.1821127673363578, "grad_norm": 0.481633722782135, "learning_rate": 6.9093762674427e-06, "loss": 0.1159, "step": 3648 }, { "epoch": 1.1824368114063513, "grad_norm": 0.49060332775115967, "learning_rate": 6.907759500560436e-06, "loss": 0.1116, "step": 3649 }, { "epoch": 1.1827608554763447, "grad_norm": 0.4521649479866028, "learning_rate": 6.906142500186879e-06, "loss": 0.1038, "step": 3650 }, { "epoch": 1.1830848995463383, "grad_norm": 0.44412970542907715, "learning_rate": 6.904525266519931e-06, "loss": 0.1127, "step": 3651 }, { "epoch": 1.1834089436163318, "grad_norm": 0.48646020889282227, "learning_rate": 6.90290779975753e-06, "loss": 0.1235, "step": 3652 }, { "epoch": 1.1837329876863254, "grad_norm": 0.4747726619243622, "learning_rate": 6.901290100097634e-06, "loss": 0.1188, "step": 3653 }, { "epoch": 1.1840570317563188, "grad_norm": 0.45250165462493896, "learning_rate": 6.899672167738236e-06, "loss": 0.1147, "step": 3654 }, { "epoch": 1.1843810758263125, "grad_norm": 0.4335748553276062, "learning_rate": 6.898054002877356e-06, "loss": 0.105, "step": 3655 }, { "epoch": 1.184705119896306, "grad_norm": 0.47117671370506287, "learning_rate": 6.89643560571304e-06, "loss": 0.1113, "step": 3656 }, { "epoch": 1.1850291639662993, "grad_norm": 0.5246734619140625, "learning_rate": 6.894816976443365e-06, "loss": 0.1219, "step": 3657 }, { "epoch": 1.185353208036293, "grad_norm": 0.4772835075855255, "learning_rate": 6.8931981152664354e-06, "loss": 0.1297, "step": 3658 }, { "epoch": 1.1856772521062864, "grad_norm": 0.4772994816303253, "learning_rate": 6.891579022380384e-06, "loss": 0.1206, "step": 3659 }, { "epoch": 1.18600129617628, "grad_norm": 0.5017443299293518, "learning_rate": 6.889959697983371e-06, "loss": 0.1171, "step": 3660 }, { "epoch": 1.1863253402462735, "grad_norm": 0.49202096462249756, "learning_rate": 6.888340142273588e-06, "loss": 0.1229, "step": 3661 }, { "epoch": 1.186649384316267, "grad_norm": 0.5096995234489441, "learning_rate": 6.886720355449253e-06, "loss": 0.1266, "step": 3662 }, { "epoch": 1.1869734283862605, "grad_norm": 0.45919767022132874, "learning_rate": 6.88510033770861e-06, "loss": 0.1153, "step": 3663 }, { "epoch": 1.1872974724562542, "grad_norm": 0.45726320147514343, "learning_rate": 6.883480089249937e-06, "loss": 0.1128, "step": 3664 }, { "epoch": 1.1876215165262476, "grad_norm": 0.4446745216846466, "learning_rate": 6.881859610271532e-06, "loss": 0.1037, "step": 3665 }, { "epoch": 1.187945560596241, "grad_norm": 0.48562106490135193, "learning_rate": 6.88023890097173e-06, "loss": 0.1249, "step": 3666 }, { "epoch": 1.1882696046662347, "grad_norm": 0.487153559923172, "learning_rate": 6.878617961548888e-06, "loss": 0.1117, "step": 3667 }, { "epoch": 1.188593648736228, "grad_norm": 0.4806040823459625, "learning_rate": 6.876996792201394e-06, "loss": 0.1239, "step": 3668 }, { "epoch": 1.1889176928062217, "grad_norm": 0.44205746054649353, "learning_rate": 6.875375393127663e-06, "loss": 0.0955, "step": 3669 }, { "epoch": 1.1892417368762151, "grad_norm": 0.49507611989974976, "learning_rate": 6.873753764526141e-06, "loss": 0.1193, "step": 3670 }, { "epoch": 1.1895657809462086, "grad_norm": 0.44034644961357117, "learning_rate": 6.872131906595295e-06, "loss": 0.104, "step": 3671 }, { "epoch": 1.1898898250162022, "grad_norm": 0.5001266598701477, "learning_rate": 6.870509819533628e-06, "loss": 0.119, "step": 3672 }, { "epoch": 1.1902138690861956, "grad_norm": 0.4320615530014038, "learning_rate": 6.868887503539667e-06, "loss": 0.1052, "step": 3673 }, { "epoch": 1.1905379131561893, "grad_norm": 0.44837626814842224, "learning_rate": 6.867264958811968e-06, "loss": 0.1115, "step": 3674 }, { "epoch": 1.1908619572261827, "grad_norm": 0.43587976694107056, "learning_rate": 6.865642185549115e-06, "loss": 0.1012, "step": 3675 }, { "epoch": 1.1911860012961764, "grad_norm": 0.515214204788208, "learning_rate": 6.864019183949719e-06, "loss": 0.1261, "step": 3676 }, { "epoch": 1.1915100453661698, "grad_norm": 0.4589107930660248, "learning_rate": 6.86239595421242e-06, "loss": 0.1051, "step": 3677 }, { "epoch": 1.1918340894361634, "grad_norm": 0.503598690032959, "learning_rate": 6.860772496535887e-06, "loss": 0.1284, "step": 3678 }, { "epoch": 1.1921581335061568, "grad_norm": 0.48150861263275146, "learning_rate": 6.859148811118812e-06, "loss": 0.1233, "step": 3679 }, { "epoch": 1.1924821775761503, "grad_norm": 0.4901348054409027, "learning_rate": 6.857524898159921e-06, "loss": 0.12, "step": 3680 }, { "epoch": 1.192806221646144, "grad_norm": 0.4648503065109253, "learning_rate": 6.855900757857965e-06, "loss": 0.108, "step": 3681 }, { "epoch": 1.1931302657161373, "grad_norm": 0.440851628780365, "learning_rate": 6.854276390411721e-06, "loss": 0.1062, "step": 3682 }, { "epoch": 1.193454309786131, "grad_norm": 0.49569249153137207, "learning_rate": 6.85265179602e-06, "loss": 0.11, "step": 3683 }, { "epoch": 1.1937783538561244, "grad_norm": 0.47996777296066284, "learning_rate": 6.851026974881634e-06, "loss": 0.1153, "step": 3684 }, { "epoch": 1.1941023979261178, "grad_norm": 0.5012381076812744, "learning_rate": 6.849401927195485e-06, "loss": 0.1184, "step": 3685 }, { "epoch": 1.1944264419961115, "grad_norm": 0.5340484380722046, "learning_rate": 6.847776653160443e-06, "loss": 0.1163, "step": 3686 }, { "epoch": 1.194750486066105, "grad_norm": 0.4254695475101471, "learning_rate": 6.846151152975427e-06, "loss": 0.1032, "step": 3687 }, { "epoch": 1.1950745301360985, "grad_norm": 0.48035746812820435, "learning_rate": 6.844525426839383e-06, "loss": 0.1201, "step": 3688 }, { "epoch": 1.195398574206092, "grad_norm": 0.502835214138031, "learning_rate": 6.842899474951283e-06, "loss": 0.1194, "step": 3689 }, { "epoch": 1.1957226182760856, "grad_norm": 0.44748517870903015, "learning_rate": 6.841273297510127e-06, "loss": 0.1031, "step": 3690 }, { "epoch": 1.196046662346079, "grad_norm": 0.43392542004585266, "learning_rate": 6.839646894714944e-06, "loss": 0.1025, "step": 3691 }, { "epoch": 1.1963707064160727, "grad_norm": 0.5193026065826416, "learning_rate": 6.838020266764791e-06, "loss": 0.1255, "step": 3692 }, { "epoch": 1.196694750486066, "grad_norm": 0.4965396523475647, "learning_rate": 6.836393413858751e-06, "loss": 0.1206, "step": 3693 }, { "epoch": 1.1970187945560595, "grad_norm": 0.5123834013938904, "learning_rate": 6.834766336195934e-06, "loss": 0.1095, "step": 3694 }, { "epoch": 1.1973428386260532, "grad_norm": 0.46777281165122986, "learning_rate": 6.83313903397548e-06, "loss": 0.1107, "step": 3695 }, { "epoch": 1.1976668826960466, "grad_norm": 0.48866865038871765, "learning_rate": 6.831511507396555e-06, "loss": 0.1117, "step": 3696 }, { "epoch": 1.1979909267660402, "grad_norm": 0.5388202667236328, "learning_rate": 6.82988375665835e-06, "loss": 0.1098, "step": 3697 }, { "epoch": 1.1983149708360337, "grad_norm": 0.4483700096607208, "learning_rate": 6.828255781960089e-06, "loss": 0.1088, "step": 3698 }, { "epoch": 1.1986390149060273, "grad_norm": 0.48592525720596313, "learning_rate": 6.82662758350102e-06, "loss": 0.1126, "step": 3699 }, { "epoch": 1.1989630589760207, "grad_norm": 0.45029422640800476, "learning_rate": 6.8249991614804165e-06, "loss": 0.1062, "step": 3700 }, { "epoch": 1.1992871030460144, "grad_norm": 0.4915621876716614, "learning_rate": 6.823370516097585e-06, "loss": 0.118, "step": 3701 }, { "epoch": 1.1996111471160078, "grad_norm": 0.4654853045940399, "learning_rate": 6.8217416475518515e-06, "loss": 0.1078, "step": 3702 }, { "epoch": 1.1999351911860012, "grad_norm": 0.4795008897781372, "learning_rate": 6.820112556042577e-06, "loss": 0.1096, "step": 3703 }, { "epoch": 1.2002592352559949, "grad_norm": 0.4701564908027649, "learning_rate": 6.8184832417691446e-06, "loss": 0.1114, "step": 3704 }, { "epoch": 1.2005832793259883, "grad_norm": 0.4656620919704437, "learning_rate": 6.816853704930969e-06, "loss": 0.1094, "step": 3705 }, { "epoch": 1.200907323395982, "grad_norm": 0.4720616340637207, "learning_rate": 6.815223945727488e-06, "loss": 0.1163, "step": 3706 }, { "epoch": 1.2012313674659754, "grad_norm": 0.48066261410713196, "learning_rate": 6.81359396435817e-06, "loss": 0.1177, "step": 3707 }, { "epoch": 1.2015554115359688, "grad_norm": 0.5046426057815552, "learning_rate": 6.811963761022507e-06, "loss": 0.1255, "step": 3708 }, { "epoch": 1.2018794556059624, "grad_norm": 0.4512089788913727, "learning_rate": 6.810333335920021e-06, "loss": 0.1056, "step": 3709 }, { "epoch": 1.2022034996759559, "grad_norm": 0.41402915120124817, "learning_rate": 6.80870268925026e-06, "loss": 0.0983, "step": 3710 }, { "epoch": 1.2025275437459495, "grad_norm": 0.48315009474754333, "learning_rate": 6.807071821212798e-06, "loss": 0.112, "step": 3711 }, { "epoch": 1.202851587815943, "grad_norm": 0.4571656286716461, "learning_rate": 6.8054407320072405e-06, "loss": 0.1072, "step": 3712 }, { "epoch": 1.2031756318859366, "grad_norm": 0.4467449486255646, "learning_rate": 6.8038094218332155e-06, "loss": 0.1075, "step": 3713 }, { "epoch": 1.20349967595593, "grad_norm": 0.42994919419288635, "learning_rate": 6.802177890890378e-06, "loss": 0.1039, "step": 3714 }, { "epoch": 1.2038237200259236, "grad_norm": 0.4829837679862976, "learning_rate": 6.800546139378415e-06, "loss": 0.1132, "step": 3715 }, { "epoch": 1.204147764095917, "grad_norm": 0.4595179855823517, "learning_rate": 6.798914167497033e-06, "loss": 0.1108, "step": 3716 }, { "epoch": 1.2044718081659105, "grad_norm": 0.5180400609970093, "learning_rate": 6.797281975445973e-06, "loss": 0.128, "step": 3717 }, { "epoch": 1.2047958522359041, "grad_norm": 0.4271709620952606, "learning_rate": 6.795649563424997e-06, "loss": 0.1041, "step": 3718 }, { "epoch": 1.2051198963058976, "grad_norm": 0.4888884127140045, "learning_rate": 6.7940169316339e-06, "loss": 0.1222, "step": 3719 }, { "epoch": 1.2054439403758912, "grad_norm": 0.42299413681030273, "learning_rate": 6.7923840802724975e-06, "loss": 0.0963, "step": 3720 }, { "epoch": 1.2057679844458846, "grad_norm": 0.47027137875556946, "learning_rate": 6.790751009540635e-06, "loss": 0.1104, "step": 3721 }, { "epoch": 1.206092028515878, "grad_norm": 0.4526819884777069, "learning_rate": 6.789117719638184e-06, "loss": 0.1118, "step": 3722 }, { "epoch": 1.2064160725858717, "grad_norm": 0.4731588065624237, "learning_rate": 6.787484210765044e-06, "loss": 0.125, "step": 3723 }, { "epoch": 1.2067401166558651, "grad_norm": 0.5060051679611206, "learning_rate": 6.7858504831211416e-06, "loss": 0.1199, "step": 3724 }, { "epoch": 1.2070641607258588, "grad_norm": 0.4772077798843384, "learning_rate": 6.784216536906429e-06, "loss": 0.1195, "step": 3725 }, { "epoch": 1.2073882047958522, "grad_norm": 0.43619367480278015, "learning_rate": 6.782582372320882e-06, "loss": 0.1056, "step": 3726 }, { "epoch": 1.2077122488658458, "grad_norm": 0.42430293560028076, "learning_rate": 6.780947989564511e-06, "loss": 0.0981, "step": 3727 }, { "epoch": 1.2080362929358393, "grad_norm": 0.4979735314846039, "learning_rate": 6.7793133888373475e-06, "loss": 0.127, "step": 3728 }, { "epoch": 1.208360337005833, "grad_norm": 0.4383695423603058, "learning_rate": 6.77767857033945e-06, "loss": 0.0991, "step": 3729 }, { "epoch": 1.2086843810758263, "grad_norm": 0.49072328209877014, "learning_rate": 6.7760435342709054e-06, "loss": 0.1166, "step": 3730 }, { "epoch": 1.2090084251458197, "grad_norm": 0.46386846899986267, "learning_rate": 6.774408280831825e-06, "loss": 0.1138, "step": 3731 }, { "epoch": 1.2093324692158134, "grad_norm": 0.48256877064704895, "learning_rate": 6.772772810222349e-06, "loss": 0.119, "step": 3732 }, { "epoch": 1.2096565132858068, "grad_norm": 0.5180603861808777, "learning_rate": 6.771137122642642e-06, "loss": 0.1215, "step": 3733 }, { "epoch": 1.2099805573558005, "grad_norm": 0.45415544509887695, "learning_rate": 6.769501218292897e-06, "loss": 0.114, "step": 3734 }, { "epoch": 1.2103046014257939, "grad_norm": 0.4832041263580322, "learning_rate": 6.767865097373334e-06, "loss": 0.1198, "step": 3735 }, { "epoch": 1.2106286454957873, "grad_norm": 0.4709514379501343, "learning_rate": 6.766228760084197e-06, "loss": 0.1147, "step": 3736 }, { "epoch": 1.210952689565781, "grad_norm": 0.46234336495399475, "learning_rate": 6.764592206625759e-06, "loss": 0.1135, "step": 3737 }, { "epoch": 1.2112767336357744, "grad_norm": 0.48604175448417664, "learning_rate": 6.762955437198315e-06, "loss": 0.1181, "step": 3738 }, { "epoch": 1.211600777705768, "grad_norm": 0.4512622058391571, "learning_rate": 6.761318452002194e-06, "loss": 0.1043, "step": 3739 }, { "epoch": 1.2119248217757614, "grad_norm": 0.4739454388618469, "learning_rate": 6.759681251237745e-06, "loss": 0.1134, "step": 3740 }, { "epoch": 1.212248865845755, "grad_norm": 0.473141610622406, "learning_rate": 6.7580438351053466e-06, "loss": 0.1151, "step": 3741 }, { "epoch": 1.2125729099157485, "grad_norm": 0.4593276381492615, "learning_rate": 6.756406203805401e-06, "loss": 0.1097, "step": 3742 }, { "epoch": 1.2128969539857422, "grad_norm": 0.44628623127937317, "learning_rate": 6.7547683575383415e-06, "loss": 0.1099, "step": 3743 }, { "epoch": 1.2132209980557356, "grad_norm": 0.4702170789241791, "learning_rate": 6.7531302965046194e-06, "loss": 0.1142, "step": 3744 }, { "epoch": 1.213545042125729, "grad_norm": 0.5326228737831116, "learning_rate": 6.751492020904722e-06, "loss": 0.1116, "step": 3745 }, { "epoch": 1.2138690861957226, "grad_norm": 0.4977304935455322, "learning_rate": 6.749853530939156e-06, "loss": 0.1268, "step": 3746 }, { "epoch": 1.214193130265716, "grad_norm": 0.46702513098716736, "learning_rate": 6.748214826808459e-06, "loss": 0.1094, "step": 3747 }, { "epoch": 1.2145171743357097, "grad_norm": 0.5220220685005188, "learning_rate": 6.746575908713191e-06, "loss": 0.1327, "step": 3748 }, { "epoch": 1.2148412184057031, "grad_norm": 0.4549591839313507, "learning_rate": 6.7449367768539405e-06, "loss": 0.1107, "step": 3749 }, { "epoch": 1.2151652624756968, "grad_norm": 0.526520311832428, "learning_rate": 6.74329743143132e-06, "loss": 0.1404, "step": 3750 }, { "epoch": 1.2154893065456902, "grad_norm": 0.465791791677475, "learning_rate": 6.74165787264597e-06, "loss": 0.1125, "step": 3751 }, { "epoch": 1.2158133506156839, "grad_norm": 0.4611221253871918, "learning_rate": 6.740018100698559e-06, "loss": 0.1038, "step": 3752 }, { "epoch": 1.2161373946856773, "grad_norm": 0.4689771234989166, "learning_rate": 6.738378115789775e-06, "loss": 0.1121, "step": 3753 }, { "epoch": 1.2164614387556707, "grad_norm": 0.4977121949195862, "learning_rate": 6.73673791812034e-06, "loss": 0.1183, "step": 3754 }, { "epoch": 1.2167854828256643, "grad_norm": 0.4518553912639618, "learning_rate": 6.7350975078909944e-06, "loss": 0.1076, "step": 3755 }, { "epoch": 1.2171095268956578, "grad_norm": 0.4840216636657715, "learning_rate": 6.733456885302513e-06, "loss": 0.1234, "step": 3756 }, { "epoch": 1.2174335709656514, "grad_norm": 0.4555285573005676, "learning_rate": 6.731816050555689e-06, "loss": 0.1101, "step": 3757 }, { "epoch": 1.2177576150356448, "grad_norm": 0.434114009141922, "learning_rate": 6.730175003851346e-06, "loss": 0.1088, "step": 3758 }, { "epoch": 1.2180816591056383, "grad_norm": 0.4861230254173279, "learning_rate": 6.728533745390331e-06, "loss": 0.1243, "step": 3759 }, { "epoch": 1.218405703175632, "grad_norm": 0.4449920952320099, "learning_rate": 6.72689227537352e-06, "loss": 0.1172, "step": 3760 }, { "epoch": 1.2187297472456253, "grad_norm": 0.4476756751537323, "learning_rate": 6.725250594001812e-06, "loss": 0.1135, "step": 3761 }, { "epoch": 1.219053791315619, "grad_norm": 0.4657207131385803, "learning_rate": 6.723608701476135e-06, "loss": 0.1137, "step": 3762 }, { "epoch": 1.2193778353856124, "grad_norm": 0.4610616862773895, "learning_rate": 6.7219665979974355e-06, "loss": 0.1124, "step": 3763 }, { "epoch": 1.219701879455606, "grad_norm": 0.47316476702690125, "learning_rate": 6.720324283766696e-06, "loss": 0.1131, "step": 3764 }, { "epoch": 1.2200259235255995, "grad_norm": 0.4340543746948242, "learning_rate": 6.718681758984917e-06, "loss": 0.1024, "step": 3765 }, { "epoch": 1.220349967595593, "grad_norm": 0.45548132061958313, "learning_rate": 6.71703902385313e-06, "loss": 0.1142, "step": 3766 }, { "epoch": 1.2206740116655865, "grad_norm": 0.4511728584766388, "learning_rate": 6.715396078572388e-06, "loss": 0.1074, "step": 3767 }, { "epoch": 1.22099805573558, "grad_norm": 0.48144659399986267, "learning_rate": 6.713752923343774e-06, "loss": 0.1179, "step": 3768 }, { "epoch": 1.2213220998055736, "grad_norm": 0.4838949739933014, "learning_rate": 6.712109558368391e-06, "loss": 0.1173, "step": 3769 }, { "epoch": 1.221646143875567, "grad_norm": 0.4324999153614044, "learning_rate": 6.710465983847373e-06, "loss": 0.1041, "step": 3770 }, { "epoch": 1.2219701879455607, "grad_norm": 0.47770074009895325, "learning_rate": 6.708822199981877e-06, "loss": 0.1125, "step": 3771 }, { "epoch": 1.222294232015554, "grad_norm": 0.4648069441318512, "learning_rate": 6.7071782069730865e-06, "loss": 0.11, "step": 3772 }, { "epoch": 1.2226182760855475, "grad_norm": 0.45485344529151917, "learning_rate": 6.705534005022209e-06, "loss": 0.1143, "step": 3773 }, { "epoch": 1.2229423201555412, "grad_norm": 0.4402785003185272, "learning_rate": 6.703889594330481e-06, "loss": 0.0989, "step": 3774 }, { "epoch": 1.2232663642255346, "grad_norm": 0.44086694717407227, "learning_rate": 6.702244975099164e-06, "loss": 0.1043, "step": 3775 }, { "epoch": 1.2235904082955282, "grad_norm": 0.4607573449611664, "learning_rate": 6.700600147529539e-06, "loss": 0.1106, "step": 3776 }, { "epoch": 1.2239144523655217, "grad_norm": 0.4859803020954132, "learning_rate": 6.698955111822918e-06, "loss": 0.12, "step": 3777 }, { "epoch": 1.2242384964355153, "grad_norm": 0.44567012786865234, "learning_rate": 6.697309868180639e-06, "loss": 0.1043, "step": 3778 }, { "epoch": 1.2245625405055087, "grad_norm": 0.4929608702659607, "learning_rate": 6.6956644168040644e-06, "loss": 0.1242, "step": 3779 }, { "epoch": 1.2248865845755024, "grad_norm": 0.49041947722435, "learning_rate": 6.69401875789458e-06, "loss": 0.1122, "step": 3780 }, { "epoch": 1.2252106286454958, "grad_norm": 0.4901500940322876, "learning_rate": 6.692372891653599e-06, "loss": 0.1153, "step": 3781 }, { "epoch": 1.2255346727154892, "grad_norm": 0.4653424024581909, "learning_rate": 6.690726818282559e-06, "loss": 0.1054, "step": 3782 }, { "epoch": 1.2258587167854829, "grad_norm": 0.4732823669910431, "learning_rate": 6.689080537982924e-06, "loss": 0.1204, "step": 3783 }, { "epoch": 1.2261827608554763, "grad_norm": 0.5188104510307312, "learning_rate": 6.687434050956184e-06, "loss": 0.129, "step": 3784 }, { "epoch": 1.22650680492547, "grad_norm": 0.46234825253486633, "learning_rate": 6.68578735740385e-06, "loss": 0.11, "step": 3785 }, { "epoch": 1.2268308489954634, "grad_norm": 0.47152841091156006, "learning_rate": 6.684140457527465e-06, "loss": 0.1119, "step": 3786 }, { "epoch": 1.227154893065457, "grad_norm": 0.5100438594818115, "learning_rate": 6.68249335152859e-06, "loss": 0.13, "step": 3787 }, { "epoch": 1.2274789371354504, "grad_norm": 0.46005114912986755, "learning_rate": 6.680846039608817e-06, "loss": 0.1128, "step": 3788 }, { "epoch": 1.2278029812054438, "grad_norm": 0.4707135856151581, "learning_rate": 6.679198521969761e-06, "loss": 0.1178, "step": 3789 }, { "epoch": 1.2281270252754375, "grad_norm": 0.46408483386039734, "learning_rate": 6.677550798813062e-06, "loss": 0.1116, "step": 3790 }, { "epoch": 1.228451069345431, "grad_norm": 0.4784737229347229, "learning_rate": 6.675902870340385e-06, "loss": 0.1125, "step": 3791 }, { "epoch": 1.2287751134154246, "grad_norm": 0.4646395444869995, "learning_rate": 6.6742547367534205e-06, "loss": 0.1088, "step": 3792 }, { "epoch": 1.229099157485418, "grad_norm": 0.44539207220077515, "learning_rate": 6.6726063982538846e-06, "loss": 0.1038, "step": 3793 }, { "epoch": 1.2294232015554116, "grad_norm": 0.4744553565979004, "learning_rate": 6.6709578550435174e-06, "loss": 0.1141, "step": 3794 }, { "epoch": 1.229747245625405, "grad_norm": 0.47019147872924805, "learning_rate": 6.669309107324085e-06, "loss": 0.1138, "step": 3795 }, { "epoch": 1.2300712896953985, "grad_norm": 0.4724312126636505, "learning_rate": 6.667660155297377e-06, "loss": 0.1139, "step": 3796 }, { "epoch": 1.2303953337653921, "grad_norm": 0.4590516984462738, "learning_rate": 6.666010999165211e-06, "loss": 0.1082, "step": 3797 }, { "epoch": 1.2307193778353855, "grad_norm": 0.47386661171913147, "learning_rate": 6.664361639129429e-06, "loss": 0.1096, "step": 3798 }, { "epoch": 1.2310434219053792, "grad_norm": 0.46582818031311035, "learning_rate": 6.662712075391891e-06, "loss": 0.1118, "step": 3799 }, { "epoch": 1.2313674659753726, "grad_norm": 0.4459727704524994, "learning_rate": 6.6610623081544934e-06, "loss": 0.1031, "step": 3800 }, { "epoch": 1.2316915100453663, "grad_norm": 0.48455187678337097, "learning_rate": 6.659412337619149e-06, "loss": 0.1101, "step": 3801 }, { "epoch": 1.2320155541153597, "grad_norm": 0.4238426089286804, "learning_rate": 6.657762163987799e-06, "loss": 0.1047, "step": 3802 }, { "epoch": 1.2323395981853533, "grad_norm": 0.4523986876010895, "learning_rate": 6.656111787462407e-06, "loss": 0.1106, "step": 3803 }, { "epoch": 1.2326636422553467, "grad_norm": 0.45181649923324585, "learning_rate": 6.654461208244968e-06, "loss": 0.1015, "step": 3804 }, { "epoch": 1.2329876863253402, "grad_norm": 0.459146648645401, "learning_rate": 6.65281042653749e-06, "loss": 0.1094, "step": 3805 }, { "epoch": 1.2333117303953338, "grad_norm": 0.4525477886199951, "learning_rate": 6.651159442542019e-06, "loss": 0.1084, "step": 3806 }, { "epoch": 1.2336357744653272, "grad_norm": 0.45423898100852966, "learning_rate": 6.6495082564606164e-06, "loss": 0.1111, "step": 3807 }, { "epoch": 1.2339598185353209, "grad_norm": 0.48852837085723877, "learning_rate": 6.6478568684953704e-06, "loss": 0.1191, "step": 3808 }, { "epoch": 1.2342838626053143, "grad_norm": 0.4974091649055481, "learning_rate": 6.6462052788483965e-06, "loss": 0.1265, "step": 3809 }, { "epoch": 1.2346079066753077, "grad_norm": 0.5248463153839111, "learning_rate": 6.644553487721833e-06, "loss": 0.1283, "step": 3810 }, { "epoch": 1.2349319507453014, "grad_norm": 0.4278448820114136, "learning_rate": 6.642901495317844e-06, "loss": 0.1005, "step": 3811 }, { "epoch": 1.2352559948152948, "grad_norm": 0.4968985617160797, "learning_rate": 6.641249301838615e-06, "loss": 0.1252, "step": 3812 }, { "epoch": 1.2355800388852884, "grad_norm": 0.47982725501060486, "learning_rate": 6.639596907486359e-06, "loss": 0.1171, "step": 3813 }, { "epoch": 1.2359040829552819, "grad_norm": 0.4500291347503662, "learning_rate": 6.637944312463317e-06, "loss": 0.1055, "step": 3814 }, { "epoch": 1.2362281270252755, "grad_norm": 0.4736287593841553, "learning_rate": 6.636291516971747e-06, "loss": 0.1188, "step": 3815 }, { "epoch": 1.236552171095269, "grad_norm": 0.4399452209472656, "learning_rate": 6.6346385212139345e-06, "loss": 0.1083, "step": 3816 }, { "epoch": 1.2368762151652626, "grad_norm": 0.4650920331478119, "learning_rate": 6.632985325392194e-06, "loss": 0.114, "step": 3817 }, { "epoch": 1.237200259235256, "grad_norm": 0.500536322593689, "learning_rate": 6.631331929708855e-06, "loss": 0.1222, "step": 3818 }, { "epoch": 1.2375243033052494, "grad_norm": 0.472375750541687, "learning_rate": 6.629678334366282e-06, "loss": 0.1167, "step": 3819 }, { "epoch": 1.237848347375243, "grad_norm": 0.4487501084804535, "learning_rate": 6.628024539566857e-06, "loss": 0.1046, "step": 3820 }, { "epoch": 1.2381723914452365, "grad_norm": 0.4511720538139343, "learning_rate": 6.626370545512989e-06, "loss": 0.1032, "step": 3821 }, { "epoch": 1.2384964355152301, "grad_norm": 0.46332141757011414, "learning_rate": 6.6247163524071115e-06, "loss": 0.1132, "step": 3822 }, { "epoch": 1.2388204795852236, "grad_norm": 0.4876859486103058, "learning_rate": 6.62306196045168e-06, "loss": 0.1216, "step": 3823 }, { "epoch": 1.239144523655217, "grad_norm": 0.48191460967063904, "learning_rate": 6.6214073698491766e-06, "loss": 0.1128, "step": 3824 }, { "epoch": 1.2394685677252106, "grad_norm": 0.46776261925697327, "learning_rate": 6.619752580802108e-06, "loss": 0.1081, "step": 3825 }, { "epoch": 1.239792611795204, "grad_norm": 0.4756350517272949, "learning_rate": 6.618097593513006e-06, "loss": 0.1161, "step": 3826 }, { "epoch": 1.2401166558651977, "grad_norm": 0.48851534724235535, "learning_rate": 6.616442408184421e-06, "loss": 0.1203, "step": 3827 }, { "epoch": 1.2404406999351911, "grad_norm": 0.4545134902000427, "learning_rate": 6.614787025018932e-06, "loss": 0.1147, "step": 3828 }, { "epoch": 1.2407647440051848, "grad_norm": 0.44553035497665405, "learning_rate": 6.6131314442191465e-06, "loss": 0.1094, "step": 3829 }, { "epoch": 1.2410887880751782, "grad_norm": 0.402369886636734, "learning_rate": 6.6114756659876864e-06, "loss": 0.0925, "step": 3830 }, { "epoch": 1.2414128321451718, "grad_norm": 0.4575660824775696, "learning_rate": 6.609819690527206e-06, "loss": 0.1123, "step": 3831 }, { "epoch": 1.2417368762151653, "grad_norm": 0.46121862530708313, "learning_rate": 6.6081635180403794e-06, "loss": 0.1124, "step": 3832 }, { "epoch": 1.2420609202851587, "grad_norm": 0.44581833481788635, "learning_rate": 6.606507148729906e-06, "loss": 0.1005, "step": 3833 }, { "epoch": 1.2423849643551523, "grad_norm": 0.5141975283622742, "learning_rate": 6.6048505827985096e-06, "loss": 0.118, "step": 3834 }, { "epoch": 1.2427090084251458, "grad_norm": 0.4516439437866211, "learning_rate": 6.603193820448941e-06, "loss": 0.1083, "step": 3835 }, { "epoch": 1.2430330524951394, "grad_norm": 0.5207176804542542, "learning_rate": 6.601536861883966e-06, "loss": 0.1321, "step": 3836 }, { "epoch": 1.2433570965651328, "grad_norm": 0.4359625279903412, "learning_rate": 6.599879707306384e-06, "loss": 0.1008, "step": 3837 }, { "epoch": 1.2436811406351265, "grad_norm": 0.5066201090812683, "learning_rate": 6.598222356919014e-06, "loss": 0.1093, "step": 3838 }, { "epoch": 1.24400518470512, "grad_norm": 0.49363771080970764, "learning_rate": 6.5965648109247e-06, "loss": 0.1225, "step": 3839 }, { "epoch": 1.2443292287751135, "grad_norm": 0.4841170012950897, "learning_rate": 6.594907069526308e-06, "loss": 0.12, "step": 3840 }, { "epoch": 1.244653272845107, "grad_norm": 0.45232200622558594, "learning_rate": 6.593249132926731e-06, "loss": 0.1099, "step": 3841 }, { "epoch": 1.2449773169151004, "grad_norm": 0.5103926658630371, "learning_rate": 6.591591001328884e-06, "loss": 0.1165, "step": 3842 }, { "epoch": 1.245301360985094, "grad_norm": 0.506633460521698, "learning_rate": 6.589932674935706e-06, "loss": 0.1205, "step": 3843 }, { "epoch": 1.2456254050550875, "grad_norm": 0.4852462708950043, "learning_rate": 6.588274153950161e-06, "loss": 0.1134, "step": 3844 }, { "epoch": 1.245949449125081, "grad_norm": 0.4266104996204376, "learning_rate": 6.586615438575234e-06, "loss": 0.108, "step": 3845 }, { "epoch": 1.2462734931950745, "grad_norm": 0.4661915600299835, "learning_rate": 6.584956529013937e-06, "loss": 0.1102, "step": 3846 }, { "epoch": 1.246597537265068, "grad_norm": 0.4633307456970215, "learning_rate": 6.583297425469306e-06, "loss": 0.1084, "step": 3847 }, { "epoch": 1.2469215813350616, "grad_norm": 0.4619586765766144, "learning_rate": 6.581638128144396e-06, "loss": 0.1035, "step": 3848 }, { "epoch": 1.247245625405055, "grad_norm": 0.49360668659210205, "learning_rate": 6.579978637242292e-06, "loss": 0.1222, "step": 3849 }, { "epoch": 1.2475696694750487, "grad_norm": 0.4530634582042694, "learning_rate": 6.578318952966098e-06, "loss": 0.1112, "step": 3850 }, { "epoch": 1.247893713545042, "grad_norm": 0.47121280431747437, "learning_rate": 6.576659075518943e-06, "loss": 0.1127, "step": 3851 }, { "epoch": 1.2482177576150357, "grad_norm": 0.5079575777053833, "learning_rate": 6.5749990051039795e-06, "loss": 0.1196, "step": 3852 }, { "epoch": 1.2485418016850292, "grad_norm": 0.46687185764312744, "learning_rate": 6.573338741924387e-06, "loss": 0.1112, "step": 3853 }, { "epoch": 1.2488658457550228, "grad_norm": 0.4628097414970398, "learning_rate": 6.571678286183362e-06, "loss": 0.107, "step": 3854 }, { "epoch": 1.2491898898250162, "grad_norm": 0.42948663234710693, "learning_rate": 6.5700176380841294e-06, "loss": 0.1013, "step": 3855 }, { "epoch": 1.2495139338950096, "grad_norm": 0.4683387279510498, "learning_rate": 6.568356797829938e-06, "loss": 0.1136, "step": 3856 }, { "epoch": 1.2498379779650033, "grad_norm": 0.48012471199035645, "learning_rate": 6.566695765624056e-06, "loss": 0.112, "step": 3857 }, { "epoch": 1.2501620220349967, "grad_norm": 0.5040128827095032, "learning_rate": 6.565034541669782e-06, "loss": 0.1204, "step": 3858 }, { "epoch": 1.2504860661049904, "grad_norm": 0.4954575300216675, "learning_rate": 6.563373126170428e-06, "loss": 0.1213, "step": 3859 }, { "epoch": 1.2508101101749838, "grad_norm": 0.47964587807655334, "learning_rate": 6.56171151932934e-06, "loss": 0.1207, "step": 3860 }, { "epoch": 1.2511341542449772, "grad_norm": 0.4623505473136902, "learning_rate": 6.560049721349879e-06, "loss": 0.1086, "step": 3861 }, { "epoch": 1.2514581983149708, "grad_norm": 0.4356464743614197, "learning_rate": 6.558387732435435e-06, "loss": 0.1077, "step": 3862 }, { "epoch": 1.2517822423849643, "grad_norm": 0.47153621912002563, "learning_rate": 6.556725552789418e-06, "loss": 0.11, "step": 3863 }, { "epoch": 1.252106286454958, "grad_norm": 0.5097867250442505, "learning_rate": 6.555063182615264e-06, "loss": 0.1285, "step": 3864 }, { "epoch": 1.2524303305249513, "grad_norm": 0.4765617847442627, "learning_rate": 6.553400622116433e-06, "loss": 0.1107, "step": 3865 }, { "epoch": 1.252754374594945, "grad_norm": 0.4168030619621277, "learning_rate": 6.551737871496402e-06, "loss": 0.1001, "step": 3866 }, { "epoch": 1.2530784186649384, "grad_norm": 0.47275760769844055, "learning_rate": 6.5500749309586775e-06, "loss": 0.1255, "step": 3867 }, { "epoch": 1.253402462734932, "grad_norm": 0.4696555733680725, "learning_rate": 6.548411800706787e-06, "loss": 0.1105, "step": 3868 }, { "epoch": 1.2537265068049255, "grad_norm": 0.6402665376663208, "learning_rate": 6.5467484809442825e-06, "loss": 0.119, "step": 3869 }, { "epoch": 1.254050550874919, "grad_norm": 0.4428213834762573, "learning_rate": 6.545084971874738e-06, "loss": 0.101, "step": 3870 }, { "epoch": 1.2543745949449125, "grad_norm": 0.4355408847332001, "learning_rate": 6.543421273701751e-06, "loss": 0.1008, "step": 3871 }, { "epoch": 1.254698639014906, "grad_norm": 0.4726782441139221, "learning_rate": 6.54175738662894e-06, "loss": 0.1105, "step": 3872 }, { "epoch": 1.2550226830848996, "grad_norm": 0.4533577859401703, "learning_rate": 6.540093310859951e-06, "loss": 0.1066, "step": 3873 }, { "epoch": 1.255346727154893, "grad_norm": 0.47577226161956787, "learning_rate": 6.538429046598449e-06, "loss": 0.1092, "step": 3874 }, { "epoch": 1.2556707712248865, "grad_norm": 0.48334789276123047, "learning_rate": 6.536764594048126e-06, "loss": 0.1174, "step": 3875 }, { "epoch": 1.25599481529488, "grad_norm": 0.46139639616012573, "learning_rate": 6.535099953412693e-06, "loss": 0.1075, "step": 3876 }, { "epoch": 1.2563188593648738, "grad_norm": 0.47558170557022095, "learning_rate": 6.533435124895888e-06, "loss": 0.1092, "step": 3877 }, { "epoch": 1.2566429034348672, "grad_norm": 0.49053213000297546, "learning_rate": 6.531770108701467e-06, "loss": 0.1186, "step": 3878 }, { "epoch": 1.2569669475048606, "grad_norm": 0.48654669523239136, "learning_rate": 6.530104905033213e-06, "loss": 0.1194, "step": 3879 }, { "epoch": 1.2572909915748542, "grad_norm": 0.4468207359313965, "learning_rate": 6.528439514094933e-06, "loss": 0.1064, "step": 3880 }, { "epoch": 1.2576150356448477, "grad_norm": 0.4944261610507965, "learning_rate": 6.526773936090452e-06, "loss": 0.1199, "step": 3881 }, { "epoch": 1.2579390797148413, "grad_norm": 0.46873268485069275, "learning_rate": 6.525108171223622e-06, "loss": 0.1064, "step": 3882 }, { "epoch": 1.2582631237848347, "grad_norm": 0.5125519633293152, "learning_rate": 6.523442219698316e-06, "loss": 0.1279, "step": 3883 }, { "epoch": 1.2585871678548282, "grad_norm": 0.42340168356895447, "learning_rate": 6.5217760817184295e-06, "loss": 0.1018, "step": 3884 }, { "epoch": 1.2589112119248218, "grad_norm": 0.45173561573028564, "learning_rate": 6.520109757487883e-06, "loss": 0.1014, "step": 3885 }, { "epoch": 1.2592352559948152, "grad_norm": 0.45571208000183105, "learning_rate": 6.518443247210617e-06, "loss": 0.1119, "step": 3886 }, { "epoch": 1.2595593000648089, "grad_norm": 0.4843176305294037, "learning_rate": 6.516776551090599e-06, "loss": 0.1142, "step": 3887 }, { "epoch": 1.2598833441348023, "grad_norm": 0.4941485822200775, "learning_rate": 6.515109669331814e-06, "loss": 0.115, "step": 3888 }, { "epoch": 1.2602073882047957, "grad_norm": 0.43944495916366577, "learning_rate": 6.513442602138273e-06, "loss": 0.1092, "step": 3889 }, { "epoch": 1.2605314322747894, "grad_norm": 0.4794701635837555, "learning_rate": 6.511775349714008e-06, "loss": 0.116, "step": 3890 }, { "epoch": 1.260855476344783, "grad_norm": 0.45469602942466736, "learning_rate": 6.510107912263077e-06, "loss": 0.113, "step": 3891 }, { "epoch": 1.2611795204147764, "grad_norm": 0.42920494079589844, "learning_rate": 6.508440289989555e-06, "loss": 0.1055, "step": 3892 }, { "epoch": 1.2615035644847699, "grad_norm": 0.47261765599250793, "learning_rate": 6.506772483097545e-06, "loss": 0.121, "step": 3893 }, { "epoch": 1.2618276085547635, "grad_norm": 0.45626595616340637, "learning_rate": 6.505104491791169e-06, "loss": 0.1076, "step": 3894 }, { "epoch": 1.262151652624757, "grad_norm": 0.49716857075691223, "learning_rate": 6.503436316274574e-06, "loss": 0.1264, "step": 3895 }, { "epoch": 1.2624756966947506, "grad_norm": 0.4248673617839813, "learning_rate": 6.5017679567519285e-06, "loss": 0.1026, "step": 3896 }, { "epoch": 1.262799740764744, "grad_norm": 0.4387178122997284, "learning_rate": 6.500099413427423e-06, "loss": 0.0995, "step": 3897 }, { "epoch": 1.2631237848347374, "grad_norm": 0.4541378319263458, "learning_rate": 6.498430686505271e-06, "loss": 0.1055, "step": 3898 }, { "epoch": 1.263447828904731, "grad_norm": 0.4681780934333801, "learning_rate": 6.4967617761897104e-06, "loss": 0.1127, "step": 3899 }, { "epoch": 1.2637718729747245, "grad_norm": 0.490855872631073, "learning_rate": 6.495092682684997e-06, "loss": 0.1323, "step": 3900 }, { "epoch": 1.2640959170447181, "grad_norm": 0.43128740787506104, "learning_rate": 6.493423406195414e-06, "loss": 0.1045, "step": 3901 }, { "epoch": 1.2644199611147116, "grad_norm": 0.4609931707382202, "learning_rate": 6.491753946925263e-06, "loss": 0.1182, "step": 3902 }, { "epoch": 1.2647440051847052, "grad_norm": 0.4781551957130432, "learning_rate": 6.49008430507887e-06, "loss": 0.1216, "step": 3903 }, { "epoch": 1.2650680492546986, "grad_norm": 0.46152034401893616, "learning_rate": 6.488414480860583e-06, "loss": 0.1139, "step": 3904 }, { "epoch": 1.2653920933246923, "grad_norm": 0.4464397132396698, "learning_rate": 6.486744474474772e-06, "loss": 0.0979, "step": 3905 }, { "epoch": 1.2657161373946857, "grad_norm": 0.4776097536087036, "learning_rate": 6.485074286125831e-06, "loss": 0.111, "step": 3906 }, { "epoch": 1.2660401814646791, "grad_norm": 0.44906583428382874, "learning_rate": 6.483403916018174e-06, "loss": 0.1113, "step": 3907 }, { "epoch": 1.2663642255346728, "grad_norm": 0.4759385585784912, "learning_rate": 6.481733364356237e-06, "loss": 0.1141, "step": 3908 }, { "epoch": 1.2666882696046662, "grad_norm": 0.4835518002510071, "learning_rate": 6.480062631344483e-06, "loss": 0.1186, "step": 3909 }, { "epoch": 1.2670123136746598, "grad_norm": 0.48663994669914246, "learning_rate": 6.478391717187389e-06, "loss": 0.1115, "step": 3910 }, { "epoch": 1.2673363577446533, "grad_norm": 0.44438567757606506, "learning_rate": 6.476720622089461e-06, "loss": 0.1009, "step": 3911 }, { "epoch": 1.2676604018146467, "grad_norm": 0.48949071764945984, "learning_rate": 6.4750493462552245e-06, "loss": 0.1203, "step": 3912 }, { "epoch": 1.2679844458846403, "grad_norm": 0.46416011452674866, "learning_rate": 6.473377889889228e-06, "loss": 0.1061, "step": 3913 }, { "epoch": 1.268308489954634, "grad_norm": 0.46517306566238403, "learning_rate": 6.4717062531960415e-06, "loss": 0.1068, "step": 3914 }, { "epoch": 1.2686325340246274, "grad_norm": 0.4801362454891205, "learning_rate": 6.470034436380257e-06, "loss": 0.1062, "step": 3915 }, { "epoch": 1.2689565780946208, "grad_norm": 0.5002215504646301, "learning_rate": 6.468362439646487e-06, "loss": 0.1129, "step": 3916 }, { "epoch": 1.2692806221646145, "grad_norm": 0.45998987555503845, "learning_rate": 6.4666902631993714e-06, "loss": 0.1165, "step": 3917 }, { "epoch": 1.2696046662346079, "grad_norm": 0.5007704496383667, "learning_rate": 6.465017907243564e-06, "loss": 0.1168, "step": 3918 }, { "epoch": 1.2699287103046015, "grad_norm": 0.47280606627464294, "learning_rate": 6.463345371983748e-06, "loss": 0.1143, "step": 3919 }, { "epoch": 1.270252754374595, "grad_norm": 0.5030761957168579, "learning_rate": 6.4616726576246255e-06, "loss": 0.1172, "step": 3920 }, { "epoch": 1.2705767984445884, "grad_norm": 0.4938787519931793, "learning_rate": 6.459999764370919e-06, "loss": 0.121, "step": 3921 }, { "epoch": 1.270900842514582, "grad_norm": 0.42462942004203796, "learning_rate": 6.458326692427376e-06, "loss": 0.1052, "step": 3922 }, { "epoch": 1.2712248865845754, "grad_norm": 0.4278605580329895, "learning_rate": 6.456653441998764e-06, "loss": 0.1029, "step": 3923 }, { "epoch": 1.271548930654569, "grad_norm": 0.4416208565235138, "learning_rate": 6.454980013289871e-06, "loss": 0.1019, "step": 3924 }, { "epoch": 1.2718729747245625, "grad_norm": 0.492037832736969, "learning_rate": 6.4533064065055095e-06, "loss": 0.1232, "step": 3925 }, { "epoch": 1.272197018794556, "grad_norm": 0.46979275345802307, "learning_rate": 6.451632621850514e-06, "loss": 0.1085, "step": 3926 }, { "epoch": 1.2725210628645496, "grad_norm": 0.4623831808567047, "learning_rate": 6.449958659529738e-06, "loss": 0.1064, "step": 3927 }, { "epoch": 1.2728451069345432, "grad_norm": 0.44108131527900696, "learning_rate": 6.448284519748058e-06, "loss": 0.101, "step": 3928 }, { "epoch": 1.2731691510045366, "grad_norm": 0.5097630620002747, "learning_rate": 6.446610202710374e-06, "loss": 0.1199, "step": 3929 }, { "epoch": 1.27349319507453, "grad_norm": 0.5080080032348633, "learning_rate": 6.444935708621605e-06, "loss": 0.1262, "step": 3930 }, { "epoch": 1.2738172391445237, "grad_norm": 0.45362842082977295, "learning_rate": 6.443261037686694e-06, "loss": 0.1085, "step": 3931 }, { "epoch": 1.2741412832145171, "grad_norm": 0.4672083258628845, "learning_rate": 6.441586190110603e-06, "loss": 0.1137, "step": 3932 }, { "epoch": 1.2744653272845108, "grad_norm": 0.45072510838508606, "learning_rate": 6.439911166098319e-06, "loss": 0.1058, "step": 3933 }, { "epoch": 1.2747893713545042, "grad_norm": 0.5025404095649719, "learning_rate": 6.438235965854849e-06, "loss": 0.1159, "step": 3934 }, { "epoch": 1.2751134154244976, "grad_norm": 0.491618812084198, "learning_rate": 6.436560589585217e-06, "loss": 0.1191, "step": 3935 }, { "epoch": 1.2754374594944913, "grad_norm": 0.4758274555206299, "learning_rate": 6.434885037494477e-06, "loss": 0.1122, "step": 3936 }, { "epoch": 1.2757615035644847, "grad_norm": 0.4895617663860321, "learning_rate": 6.4332093097876994e-06, "loss": 0.1211, "step": 3937 }, { "epoch": 1.2760855476344783, "grad_norm": 0.45443156361579895, "learning_rate": 6.431533406669976e-06, "loss": 0.1105, "step": 3938 }, { "epoch": 1.2764095917044718, "grad_norm": 0.47179800271987915, "learning_rate": 6.429857328346422e-06, "loss": 0.1098, "step": 3939 }, { "epoch": 1.2767336357744652, "grad_norm": 0.4958186447620392, "learning_rate": 6.428181075022175e-06, "loss": 0.1209, "step": 3940 }, { "epoch": 1.2770576798444588, "grad_norm": 0.485847145318985, "learning_rate": 6.426504646902389e-06, "loss": 0.1223, "step": 3941 }, { "epoch": 1.2773817239144525, "grad_norm": 0.45189395546913147, "learning_rate": 6.424828044192243e-06, "loss": 0.1005, "step": 3942 }, { "epoch": 1.277705767984446, "grad_norm": 0.48433998227119446, "learning_rate": 6.423151267096939e-06, "loss": 0.1216, "step": 3943 }, { "epoch": 1.2780298120544393, "grad_norm": 0.4740990102291107, "learning_rate": 6.421474315821696e-06, "loss": 0.1217, "step": 3944 }, { "epoch": 1.278353856124433, "grad_norm": 0.5032677054405212, "learning_rate": 6.419797190571759e-06, "loss": 0.1259, "step": 3945 }, { "epoch": 1.2786779001944264, "grad_norm": 0.5017115473747253, "learning_rate": 6.41811989155239e-06, "loss": 0.1282, "step": 3946 }, { "epoch": 1.27900194426442, "grad_norm": 0.49619826674461365, "learning_rate": 6.416442418968875e-06, "loss": 0.1242, "step": 3947 }, { "epoch": 1.2793259883344135, "grad_norm": 0.4738031029701233, "learning_rate": 6.41476477302652e-06, "loss": 0.1141, "step": 3948 }, { "epoch": 1.279650032404407, "grad_norm": 0.4560187757015228, "learning_rate": 6.413086953930652e-06, "loss": 0.1117, "step": 3949 }, { "epoch": 1.2799740764744005, "grad_norm": 0.4704577326774597, "learning_rate": 6.4114089618866215e-06, "loss": 0.1204, "step": 3950 }, { "epoch": 1.280298120544394, "grad_norm": 0.46816858649253845, "learning_rate": 6.409730797099797e-06, "loss": 0.1161, "step": 3951 }, { "epoch": 1.2806221646143876, "grad_norm": 0.46960848569869995, "learning_rate": 6.40805245977557e-06, "loss": 0.1159, "step": 3952 }, { "epoch": 1.280946208684381, "grad_norm": 0.44595637917518616, "learning_rate": 6.406373950119354e-06, "loss": 0.1072, "step": 3953 }, { "epoch": 1.2812702527543747, "grad_norm": 0.471149742603302, "learning_rate": 6.4046952683365805e-06, "loss": 0.1164, "step": 3954 }, { "epoch": 1.281594296824368, "grad_norm": 0.4557897448539734, "learning_rate": 6.403016414632705e-06, "loss": 0.1093, "step": 3955 }, { "epoch": 1.2819183408943617, "grad_norm": 0.45571160316467285, "learning_rate": 6.4013373892132024e-06, "loss": 0.1186, "step": 3956 }, { "epoch": 1.2822423849643552, "grad_norm": 0.45447635650634766, "learning_rate": 6.399658192283569e-06, "loss": 0.0979, "step": 3957 }, { "epoch": 1.2825664290343486, "grad_norm": 0.4660548269748688, "learning_rate": 6.3979788240493226e-06, "loss": 0.109, "step": 3958 }, { "epoch": 1.2828904731043422, "grad_norm": 0.45442885160446167, "learning_rate": 6.3962992847160025e-06, "loss": 0.1102, "step": 3959 }, { "epoch": 1.2832145171743357, "grad_norm": 0.48392462730407715, "learning_rate": 6.3946195744891656e-06, "loss": 0.1213, "step": 3960 }, { "epoch": 1.2835385612443293, "grad_norm": 0.4479372501373291, "learning_rate": 6.392939693574395e-06, "loss": 0.0962, "step": 3961 }, { "epoch": 1.2838626053143227, "grad_norm": 0.5068491101264954, "learning_rate": 6.391259642177291e-06, "loss": 0.1287, "step": 3962 }, { "epoch": 1.2841866493843161, "grad_norm": 0.43103277683258057, "learning_rate": 6.389579420503475e-06, "loss": 0.0999, "step": 3963 }, { "epoch": 1.2845106934543098, "grad_norm": 0.47594237327575684, "learning_rate": 6.387899028758589e-06, "loss": 0.1131, "step": 3964 }, { "epoch": 1.2848347375243034, "grad_norm": 0.4817759692668915, "learning_rate": 6.3862184671482995e-06, "loss": 0.1174, "step": 3965 }, { "epoch": 1.2851587815942969, "grad_norm": 0.4870638847351074, "learning_rate": 6.384537735878288e-06, "loss": 0.1177, "step": 3966 }, { "epoch": 1.2854828256642903, "grad_norm": 0.5051252245903015, "learning_rate": 6.3828568351542605e-06, "loss": 0.1216, "step": 3967 }, { "epoch": 1.285806869734284, "grad_norm": 0.4776197075843811, "learning_rate": 6.381175765181945e-06, "loss": 0.1129, "step": 3968 }, { "epoch": 1.2861309138042774, "grad_norm": 0.44561052322387695, "learning_rate": 6.379494526167086e-06, "loss": 0.1089, "step": 3969 }, { "epoch": 1.286454957874271, "grad_norm": 0.5017491579055786, "learning_rate": 6.37781311831545e-06, "loss": 0.115, "step": 3970 }, { "epoch": 1.2867790019442644, "grad_norm": 0.4586908221244812, "learning_rate": 6.376131541832829e-06, "loss": 0.1057, "step": 3971 }, { "epoch": 1.2871030460142578, "grad_norm": 0.4588594138622284, "learning_rate": 6.374449796925027e-06, "loss": 0.1186, "step": 3972 }, { "epoch": 1.2874270900842515, "grad_norm": 0.47810885310173035, "learning_rate": 6.372767883797877e-06, "loss": 0.1162, "step": 3973 }, { "epoch": 1.287751134154245, "grad_norm": 0.44631102681159973, "learning_rate": 6.371085802657226e-06, "loss": 0.1074, "step": 3974 }, { "epoch": 1.2880751782242386, "grad_norm": 0.5198760628700256, "learning_rate": 6.369403553708948e-06, "loss": 0.1318, "step": 3975 }, { "epoch": 1.288399222294232, "grad_norm": 0.4657326638698578, "learning_rate": 6.367721137158933e-06, "loss": 0.1075, "step": 3976 }, { "epoch": 1.2887232663642254, "grad_norm": 0.4796975255012512, "learning_rate": 6.366038553213089e-06, "loss": 0.1125, "step": 3977 }, { "epoch": 1.289047310434219, "grad_norm": 0.4714679718017578, "learning_rate": 6.364355802077351e-06, "loss": 0.1147, "step": 3978 }, { "epoch": 1.2893713545042127, "grad_norm": 0.41647621989250183, "learning_rate": 6.36267288395767e-06, "loss": 0.1075, "step": 3979 }, { "epoch": 1.2896953985742061, "grad_norm": 0.4634070098400116, "learning_rate": 6.36098979906002e-06, "loss": 0.1095, "step": 3980 }, { "epoch": 1.2900194426441995, "grad_norm": 0.41540664434432983, "learning_rate": 6.359306547590395e-06, "loss": 0.1038, "step": 3981 }, { "epoch": 1.2903434867141932, "grad_norm": 0.4272691607475281, "learning_rate": 6.357623129754807e-06, "loss": 0.1059, "step": 3982 }, { "epoch": 1.2906675307841866, "grad_norm": 0.49106115102767944, "learning_rate": 6.35593954575929e-06, "loss": 0.1188, "step": 3983 }, { "epoch": 1.2909915748541803, "grad_norm": 0.463112473487854, "learning_rate": 6.354255795809899e-06, "loss": 0.1115, "step": 3984 }, { "epoch": 1.2913156189241737, "grad_norm": 0.631113588809967, "learning_rate": 6.35257188011271e-06, "loss": 0.1172, "step": 3985 }, { "epoch": 1.291639662994167, "grad_norm": 0.4728262722492218, "learning_rate": 6.3508877988738174e-06, "loss": 0.1176, "step": 3986 }, { "epoch": 1.2919637070641607, "grad_norm": 0.4452451765537262, "learning_rate": 6.349203552299336e-06, "loss": 0.1103, "step": 3987 }, { "epoch": 1.2922877511341542, "grad_norm": 0.4820338487625122, "learning_rate": 6.347519140595399e-06, "loss": 0.1155, "step": 3988 }, { "epoch": 1.2926117952041478, "grad_norm": 0.46228158473968506, "learning_rate": 6.345834563968165e-06, "loss": 0.1117, "step": 3989 }, { "epoch": 1.2929358392741412, "grad_norm": 0.4567924439907074, "learning_rate": 6.344149822623809e-06, "loss": 0.1139, "step": 3990 }, { "epoch": 1.2932598833441347, "grad_norm": 0.47757378220558167, "learning_rate": 6.3424649167685274e-06, "loss": 0.1027, "step": 3991 }, { "epoch": 1.2935839274141283, "grad_norm": 0.4581971764564514, "learning_rate": 6.340779846608535e-06, "loss": 0.1105, "step": 3992 }, { "epoch": 1.293907971484122, "grad_norm": 0.4690464437007904, "learning_rate": 6.339094612350071e-06, "loss": 0.1111, "step": 3993 }, { "epoch": 1.2942320155541154, "grad_norm": 0.4487064778804779, "learning_rate": 6.3374092141993884e-06, "loss": 0.1042, "step": 3994 }, { "epoch": 1.2945560596241088, "grad_norm": 0.5062069296836853, "learning_rate": 6.3357236523627656e-06, "loss": 0.1211, "step": 3995 }, { "epoch": 1.2948801036941024, "grad_norm": 0.46276944875717163, "learning_rate": 6.334037927046498e-06, "loss": 0.1071, "step": 3996 }, { "epoch": 1.2952041477640959, "grad_norm": 0.5312107801437378, "learning_rate": 6.3323520384569036e-06, "loss": 0.124, "step": 3997 }, { "epoch": 1.2955281918340895, "grad_norm": 0.4688504934310913, "learning_rate": 6.330665986800318e-06, "loss": 0.1158, "step": 3998 }, { "epoch": 1.295852235904083, "grad_norm": 0.4981670379638672, "learning_rate": 6.328979772283097e-06, "loss": 0.125, "step": 3999 }, { "epoch": 1.2961762799740764, "grad_norm": 0.48476776480674744, "learning_rate": 6.327293395111618e-06, "loss": 0.1245, "step": 4000 }, { "epoch": 1.29650032404407, "grad_norm": 0.472931444644928, "learning_rate": 6.325606855492275e-06, "loss": 0.1167, "step": 4001 }, { "epoch": 1.2968243681140634, "grad_norm": 0.4795880913734436, "learning_rate": 6.323920153631486e-06, "loss": 0.1083, "step": 4002 }, { "epoch": 1.297148412184057, "grad_norm": 0.5015627145767212, "learning_rate": 6.322233289735689e-06, "loss": 0.1194, "step": 4003 }, { "epoch": 1.2974724562540505, "grad_norm": 0.4274068772792816, "learning_rate": 6.320546264011335e-06, "loss": 0.1032, "step": 4004 }, { "epoch": 1.2977965003240441, "grad_norm": 0.4991876184940338, "learning_rate": 6.318859076664904e-06, "loss": 0.1206, "step": 4005 }, { "epoch": 1.2981205443940376, "grad_norm": 0.47470569610595703, "learning_rate": 6.317171727902889e-06, "loss": 0.1241, "step": 4006 }, { "epoch": 1.2984445884640312, "grad_norm": 0.48465102910995483, "learning_rate": 6.315484217931805e-06, "loss": 0.1119, "step": 4007 }, { "epoch": 1.2987686325340246, "grad_norm": 0.4615066945552826, "learning_rate": 6.313796546958189e-06, "loss": 0.1082, "step": 4008 }, { "epoch": 1.299092676604018, "grad_norm": 0.45659682154655457, "learning_rate": 6.3121087151885915e-06, "loss": 0.1126, "step": 4009 }, { "epoch": 1.2994167206740117, "grad_norm": 0.502086877822876, "learning_rate": 6.310420722829591e-06, "loss": 0.1197, "step": 4010 }, { "epoch": 1.2997407647440051, "grad_norm": 0.46333470940589905, "learning_rate": 6.308732570087781e-06, "loss": 0.1091, "step": 4011 }, { "epoch": 1.3000648088139988, "grad_norm": 0.4794006943702698, "learning_rate": 6.307044257169773e-06, "loss": 0.1234, "step": 4012 }, { "epoch": 1.3003888528839922, "grad_norm": 0.4782438576221466, "learning_rate": 6.305355784282201e-06, "loss": 0.1106, "step": 4013 }, { "epoch": 1.3007128969539856, "grad_norm": 0.4819827675819397, "learning_rate": 6.303667151631718e-06, "loss": 0.1154, "step": 4014 }, { "epoch": 1.3010369410239793, "grad_norm": 0.44112661480903625, "learning_rate": 6.301978359424995e-06, "loss": 0.1057, "step": 4015 }, { "epoch": 1.301360985093973, "grad_norm": 0.4780457019805908, "learning_rate": 6.300289407868726e-06, "loss": 0.1271, "step": 4016 }, { "epoch": 1.3016850291639663, "grad_norm": 0.4852437376976013, "learning_rate": 6.298600297169622e-06, "loss": 0.1228, "step": 4017 }, { "epoch": 1.3020090732339598, "grad_norm": 0.4688003361225128, "learning_rate": 6.296911027534413e-06, "loss": 0.1024, "step": 4018 }, { "epoch": 1.3023331173039534, "grad_norm": 0.4358528256416321, "learning_rate": 6.295221599169848e-06, "loss": 0.1057, "step": 4019 }, { "epoch": 1.3026571613739468, "grad_norm": 0.46564021706581116, "learning_rate": 6.293532012282699e-06, "loss": 0.1106, "step": 4020 }, { "epoch": 1.3029812054439405, "grad_norm": 0.4571780562400818, "learning_rate": 6.291842267079753e-06, "loss": 0.1135, "step": 4021 }, { "epoch": 1.303305249513934, "grad_norm": 0.4707525968551636, "learning_rate": 6.29015236376782e-06, "loss": 0.1149, "step": 4022 }, { "epoch": 1.3036292935839273, "grad_norm": 0.45767292380332947, "learning_rate": 6.288462302553728e-06, "loss": 0.118, "step": 4023 }, { "epoch": 1.303953337653921, "grad_norm": 0.46272486448287964, "learning_rate": 6.286772083644324e-06, "loss": 0.1077, "step": 4024 }, { "epoch": 1.3042773817239144, "grad_norm": 0.4843446910381317, "learning_rate": 6.285081707246472e-06, "loss": 0.1152, "step": 4025 }, { "epoch": 1.304601425793908, "grad_norm": 0.4920760989189148, "learning_rate": 6.28339117356706e-06, "loss": 0.1192, "step": 4026 }, { "epoch": 1.3049254698639015, "grad_norm": 0.4871255159378052, "learning_rate": 6.281700482812993e-06, "loss": 0.111, "step": 4027 }, { "epoch": 1.3052495139338949, "grad_norm": 0.4424149990081787, "learning_rate": 6.280009635191194e-06, "loss": 0.1014, "step": 4028 }, { "epoch": 1.3055735580038885, "grad_norm": 0.4454768896102905, "learning_rate": 6.2783186309086086e-06, "loss": 0.1079, "step": 4029 }, { "epoch": 1.3058976020738822, "grad_norm": 0.47427868843078613, "learning_rate": 6.276627470172198e-06, "loss": 0.1132, "step": 4030 }, { "epoch": 1.3062216461438756, "grad_norm": 0.49968528747558594, "learning_rate": 6.274936153188942e-06, "loss": 0.1237, "step": 4031 }, { "epoch": 1.306545690213869, "grad_norm": 0.4610420763492584, "learning_rate": 6.273244680165843e-06, "loss": 0.1139, "step": 4032 }, { "epoch": 1.3068697342838627, "grad_norm": 0.4568065106868744, "learning_rate": 6.271553051309922e-06, "loss": 0.1063, "step": 4033 }, { "epoch": 1.307193778353856, "grad_norm": 0.4630391299724579, "learning_rate": 6.269861266828217e-06, "loss": 0.1086, "step": 4034 }, { "epoch": 1.3075178224238497, "grad_norm": 0.46840715408325195, "learning_rate": 6.268169326927788e-06, "loss": 0.1118, "step": 4035 }, { "epoch": 1.3078418664938432, "grad_norm": 0.48442018032073975, "learning_rate": 6.266477231815707e-06, "loss": 0.1148, "step": 4036 }, { "epoch": 1.3081659105638366, "grad_norm": 0.4597585201263428, "learning_rate": 6.264784981699074e-06, "loss": 0.1095, "step": 4037 }, { "epoch": 1.3084899546338302, "grad_norm": 0.44043147563934326, "learning_rate": 6.263092576785005e-06, "loss": 0.1008, "step": 4038 }, { "epoch": 1.3088139987038236, "grad_norm": 0.4840199649333954, "learning_rate": 6.2614000172806324e-06, "loss": 0.1236, "step": 4039 }, { "epoch": 1.3091380427738173, "grad_norm": 0.4641066789627075, "learning_rate": 6.2597073033931075e-06, "loss": 0.1124, "step": 4040 }, { "epoch": 1.3094620868438107, "grad_norm": 0.47489872574806213, "learning_rate": 6.258014435329604e-06, "loss": 0.1153, "step": 4041 }, { "epoch": 1.3097861309138044, "grad_norm": 0.47044837474823, "learning_rate": 6.256321413297313e-06, "loss": 0.1152, "step": 4042 }, { "epoch": 1.3101101749837978, "grad_norm": 0.4591846168041229, "learning_rate": 6.254628237503442e-06, "loss": 0.1081, "step": 4043 }, { "epoch": 1.3104342190537914, "grad_norm": 0.4056306779384613, "learning_rate": 6.25293490815522e-06, "loss": 0.0999, "step": 4044 }, { "epoch": 1.3107582631237849, "grad_norm": 0.49313053488731384, "learning_rate": 6.251241425459895e-06, "loss": 0.1214, "step": 4045 }, { "epoch": 1.3110823071937783, "grad_norm": 0.46979349851608276, "learning_rate": 6.249547789624734e-06, "loss": 0.1125, "step": 4046 }, { "epoch": 1.311406351263772, "grad_norm": 0.4617290496826172, "learning_rate": 6.247854000857018e-06, "loss": 0.1103, "step": 4047 }, { "epoch": 1.3117303953337653, "grad_norm": 0.4825688600540161, "learning_rate": 6.246160059364054e-06, "loss": 0.122, "step": 4048 }, { "epoch": 1.312054439403759, "grad_norm": 0.48396843671798706, "learning_rate": 6.244465965353161e-06, "loss": 0.1287, "step": 4049 }, { "epoch": 1.3123784834737524, "grad_norm": 0.4837722182273865, "learning_rate": 6.242771719031684e-06, "loss": 0.1166, "step": 4050 }, { "epoch": 1.3127025275437458, "grad_norm": 0.5088424682617188, "learning_rate": 6.241077320606977e-06, "loss": 0.1257, "step": 4051 }, { "epoch": 1.3130265716137395, "grad_norm": 0.49504292011260986, "learning_rate": 6.2393827702864215e-06, "loss": 0.1137, "step": 4052 }, { "epoch": 1.3133506156837331, "grad_norm": 0.43925389647483826, "learning_rate": 6.2376880682774125e-06, "loss": 0.1062, "step": 4053 }, { "epoch": 1.3136746597537265, "grad_norm": 0.4258055090904236, "learning_rate": 6.235993214787367e-06, "loss": 0.0944, "step": 4054 }, { "epoch": 1.31399870382372, "grad_norm": 0.4500040113925934, "learning_rate": 6.234298210023716e-06, "loss": 0.1132, "step": 4055 }, { "epoch": 1.3143227478937136, "grad_norm": 0.44641897082328796, "learning_rate": 6.2326030541939135e-06, "loss": 0.1068, "step": 4056 }, { "epoch": 1.314646791963707, "grad_norm": 0.512460470199585, "learning_rate": 6.230907747505428e-06, "loss": 0.1285, "step": 4057 }, { "epoch": 1.3149708360337007, "grad_norm": 0.48335960507392883, "learning_rate": 6.229212290165752e-06, "loss": 0.1192, "step": 4058 }, { "epoch": 1.315294880103694, "grad_norm": 0.4757685661315918, "learning_rate": 6.227516682382391e-06, "loss": 0.1143, "step": 4059 }, { "epoch": 1.3156189241736875, "grad_norm": 0.48404660820961, "learning_rate": 6.225820924362873e-06, "loss": 0.1132, "step": 4060 }, { "epoch": 1.3159429682436812, "grad_norm": 0.46162092685699463, "learning_rate": 6.224125016314739e-06, "loss": 0.112, "step": 4061 }, { "epoch": 1.3162670123136746, "grad_norm": 0.439531534910202, "learning_rate": 6.222428958445555e-06, "loss": 0.1028, "step": 4062 }, { "epoch": 1.3165910563836682, "grad_norm": 0.48746368288993835, "learning_rate": 6.220732750962899e-06, "loss": 0.1137, "step": 4063 }, { "epoch": 1.3169151004536617, "grad_norm": 0.5479814410209656, "learning_rate": 6.219036394074372e-06, "loss": 0.1339, "step": 4064 }, { "epoch": 1.317239144523655, "grad_norm": 0.44706690311431885, "learning_rate": 6.217339887987591e-06, "loss": 0.1028, "step": 4065 }, { "epoch": 1.3175631885936487, "grad_norm": 0.448042094707489, "learning_rate": 6.215643232910193e-06, "loss": 0.107, "step": 4066 }, { "epoch": 1.3178872326636424, "grad_norm": 0.5104048252105713, "learning_rate": 6.213946429049833e-06, "loss": 0.1276, "step": 4067 }, { "epoch": 1.3182112767336358, "grad_norm": 0.43246206641197205, "learning_rate": 6.212249476614181e-06, "loss": 0.0954, "step": 4068 }, { "epoch": 1.3185353208036292, "grad_norm": 0.4300832450389862, "learning_rate": 6.210552375810927e-06, "loss": 0.105, "step": 4069 }, { "epoch": 1.3188593648736229, "grad_norm": 0.4607033431529999, "learning_rate": 6.208855126847783e-06, "loss": 0.1086, "step": 4070 }, { "epoch": 1.3191834089436163, "grad_norm": 0.45668113231658936, "learning_rate": 6.207157729932474e-06, "loss": 0.1104, "step": 4071 }, { "epoch": 1.31950745301361, "grad_norm": 0.4810584485530853, "learning_rate": 6.205460185272745e-06, "loss": 0.1146, "step": 4072 }, { "epoch": 1.3198314970836034, "grad_norm": 0.5209870338439941, "learning_rate": 6.203762493076359e-06, "loss": 0.1243, "step": 4073 }, { "epoch": 1.3201555411535968, "grad_norm": 0.4863770306110382, "learning_rate": 6.202064653551097e-06, "loss": 0.1171, "step": 4074 }, { "epoch": 1.3204795852235904, "grad_norm": 0.45841556787490845, "learning_rate": 6.200366666904758e-06, "loss": 0.11, "step": 4075 }, { "epoch": 1.3208036292935839, "grad_norm": 0.43122103810310364, "learning_rate": 6.1986685333451606e-06, "loss": 0.106, "step": 4076 }, { "epoch": 1.3211276733635775, "grad_norm": 0.474511057138443, "learning_rate": 6.196970253080137e-06, "loss": 0.111, "step": 4077 }, { "epoch": 1.321451717433571, "grad_norm": 0.47046247124671936, "learning_rate": 6.195271826317544e-06, "loss": 0.1099, "step": 4078 }, { "epoch": 1.3217757615035644, "grad_norm": 0.46735498309135437, "learning_rate": 6.193573253265248e-06, "loss": 0.1095, "step": 4079 }, { "epoch": 1.322099805573558, "grad_norm": 0.48384442925453186, "learning_rate": 6.191874534131143e-06, "loss": 0.121, "step": 4080 }, { "epoch": 1.3224238496435516, "grad_norm": 0.4530140161514282, "learning_rate": 6.190175669123131e-06, "loss": 0.1108, "step": 4081 }, { "epoch": 1.322747893713545, "grad_norm": 0.4611378312110901, "learning_rate": 6.188476658449141e-06, "loss": 0.1204, "step": 4082 }, { "epoch": 1.3230719377835385, "grad_norm": 0.4774719774723053, "learning_rate": 6.186777502317113e-06, "loss": 0.1174, "step": 4083 }, { "epoch": 1.3233959818535321, "grad_norm": 0.458342581987381, "learning_rate": 6.1850782009350075e-06, "loss": 0.1153, "step": 4084 }, { "epoch": 1.3237200259235256, "grad_norm": 0.4817325472831726, "learning_rate": 6.183378754510801e-06, "loss": 0.1121, "step": 4085 }, { "epoch": 1.3240440699935192, "grad_norm": 0.4727479815483093, "learning_rate": 6.181679163252493e-06, "loss": 0.11, "step": 4086 }, { "epoch": 1.3243681140635126, "grad_norm": 0.5003477931022644, "learning_rate": 6.1799794273680936e-06, "loss": 0.1195, "step": 4087 }, { "epoch": 1.324692158133506, "grad_norm": 0.4953130781650543, "learning_rate": 6.178279547065635e-06, "loss": 0.1163, "step": 4088 }, { "epoch": 1.3250162022034997, "grad_norm": 0.4566429853439331, "learning_rate": 6.176579522553168e-06, "loss": 0.1094, "step": 4089 }, { "epoch": 1.3253402462734931, "grad_norm": 0.4813137948513031, "learning_rate": 6.174879354038757e-06, "loss": 0.1124, "step": 4090 }, { "epoch": 1.3256642903434868, "grad_norm": 0.4739673435688019, "learning_rate": 6.173179041730487e-06, "loss": 0.1277, "step": 4091 }, { "epoch": 1.3259883344134802, "grad_norm": 0.5202913880348206, "learning_rate": 6.171478585836459e-06, "loss": 0.1322, "step": 4092 }, { "epoch": 1.3263123784834738, "grad_norm": 0.45613497495651245, "learning_rate": 6.169777986564794e-06, "loss": 0.1105, "step": 4093 }, { "epoch": 1.3266364225534673, "grad_norm": 0.4877902865409851, "learning_rate": 6.168077244123627e-06, "loss": 0.1218, "step": 4094 }, { "epoch": 1.326960466623461, "grad_norm": 0.4779025912284851, "learning_rate": 6.166376358721112e-06, "loss": 0.1196, "step": 4095 }, { "epoch": 1.3272845106934543, "grad_norm": 0.4462127387523651, "learning_rate": 6.164675330565425e-06, "loss": 0.1114, "step": 4096 }, { "epoch": 1.3276085547634477, "grad_norm": 0.4394828677177429, "learning_rate": 6.1629741598647496e-06, "loss": 0.1106, "step": 4097 }, { "epoch": 1.3279325988334414, "grad_norm": 0.4651632308959961, "learning_rate": 6.161272846827298e-06, "loss": 0.1071, "step": 4098 }, { "epoch": 1.3282566429034348, "grad_norm": 0.4370875358581543, "learning_rate": 6.15957139166129e-06, "loss": 0.116, "step": 4099 }, { "epoch": 1.3285806869734285, "grad_norm": 0.47040608525276184, "learning_rate": 6.157869794574969e-06, "loss": 0.1177, "step": 4100 }, { "epoch": 1.3289047310434219, "grad_norm": 0.4459068179130554, "learning_rate": 6.156168055776595e-06, "loss": 0.107, "step": 4101 }, { "epoch": 1.3292287751134153, "grad_norm": 0.42454037070274353, "learning_rate": 6.154466175474444e-06, "loss": 0.1014, "step": 4102 }, { "epoch": 1.329552819183409, "grad_norm": 0.4620989263057709, "learning_rate": 6.1527641538768075e-06, "loss": 0.1132, "step": 4103 }, { "epoch": 1.3298768632534026, "grad_norm": 0.4761102795600891, "learning_rate": 6.151061991192001e-06, "loss": 0.1108, "step": 4104 }, { "epoch": 1.330200907323396, "grad_norm": 0.4630895256996155, "learning_rate": 6.149359687628348e-06, "loss": 0.1133, "step": 4105 }, { "epoch": 1.3305249513933894, "grad_norm": 0.4673987627029419, "learning_rate": 6.147657243394196e-06, "loss": 0.1132, "step": 4106 }, { "epoch": 1.330848995463383, "grad_norm": 0.4772607088088989, "learning_rate": 6.145954658697908e-06, "loss": 0.1165, "step": 4107 }, { "epoch": 1.3311730395333765, "grad_norm": 0.4496089518070221, "learning_rate": 6.144251933747864e-06, "loss": 0.1164, "step": 4108 }, { "epoch": 1.3314970836033702, "grad_norm": 0.5247877836227417, "learning_rate": 6.142549068752459e-06, "loss": 0.1283, "step": 4109 }, { "epoch": 1.3318211276733636, "grad_norm": 0.47814130783081055, "learning_rate": 6.1408460639201095e-06, "loss": 0.1111, "step": 4110 }, { "epoch": 1.332145171743357, "grad_norm": 0.4383772313594818, "learning_rate": 6.139142919459246e-06, "loss": 0.102, "step": 4111 }, { "epoch": 1.3324692158133506, "grad_norm": 0.4361100196838379, "learning_rate": 6.137439635578316e-06, "loss": 0.1123, "step": 4112 }, { "epoch": 1.332793259883344, "grad_norm": 0.4610941708087921, "learning_rate": 6.135736212485788e-06, "loss": 0.1088, "step": 4113 }, { "epoch": 1.3331173039533377, "grad_norm": 0.48235994577407837, "learning_rate": 6.1340326503901405e-06, "loss": 0.1077, "step": 4114 }, { "epoch": 1.3334413480233311, "grad_norm": 0.49216118454933167, "learning_rate": 6.132328949499877e-06, "loss": 0.1067, "step": 4115 }, { "epoch": 1.3337653920933246, "grad_norm": 0.46302199363708496, "learning_rate": 6.1306251100235094e-06, "loss": 0.1104, "step": 4116 }, { "epoch": 1.3340894361633182, "grad_norm": 0.4512770473957062, "learning_rate": 6.128921132169575e-06, "loss": 0.1066, "step": 4117 }, { "epoch": 1.3344134802333119, "grad_norm": 0.47927504777908325, "learning_rate": 6.1272170161466225e-06, "loss": 0.1171, "step": 4118 }, { "epoch": 1.3347375243033053, "grad_norm": 0.4398737847805023, "learning_rate": 6.125512762163219e-06, "loss": 0.1039, "step": 4119 }, { "epoch": 1.3350615683732987, "grad_norm": 0.45653656125068665, "learning_rate": 6.123808370427949e-06, "loss": 0.1155, "step": 4120 }, { "epoch": 1.3353856124432923, "grad_norm": 0.4693288207054138, "learning_rate": 6.122103841149416e-06, "loss": 0.115, "step": 4121 }, { "epoch": 1.3357096565132858, "grad_norm": 0.43127191066741943, "learning_rate": 6.120399174536233e-06, "loss": 0.1096, "step": 4122 }, { "epoch": 1.3360337005832794, "grad_norm": 0.47410160303115845, "learning_rate": 6.1186943707970395e-06, "loss": 0.1134, "step": 4123 }, { "epoch": 1.3363577446532728, "grad_norm": 0.45060470700263977, "learning_rate": 6.116989430140484e-06, "loss": 0.1021, "step": 4124 }, { "epoch": 1.3366817887232663, "grad_norm": 0.4330093562602997, "learning_rate": 6.115284352775235e-06, "loss": 0.1, "step": 4125 }, { "epoch": 1.33700583279326, "grad_norm": 0.4936785399913788, "learning_rate": 6.113579138909978e-06, "loss": 0.1234, "step": 4126 }, { "epoch": 1.3373298768632533, "grad_norm": 0.44440215826034546, "learning_rate": 6.111873788753416e-06, "loss": 0.1064, "step": 4127 }, { "epoch": 1.337653920933247, "grad_norm": 0.48851221799850464, "learning_rate": 6.110168302514266e-06, "loss": 0.1146, "step": 4128 }, { "epoch": 1.3379779650032404, "grad_norm": 0.468937486410141, "learning_rate": 6.108462680401262e-06, "loss": 0.1142, "step": 4129 }, { "epoch": 1.3383020090732338, "grad_norm": 0.4295579493045807, "learning_rate": 6.106756922623156e-06, "loss": 0.0968, "step": 4130 }, { "epoch": 1.3386260531432275, "grad_norm": 0.4338614344596863, "learning_rate": 6.1050510293887165e-06, "loss": 0.1022, "step": 4131 }, { "epoch": 1.3389500972132211, "grad_norm": 0.43360260128974915, "learning_rate": 6.1033450009067295e-06, "loss": 0.1013, "step": 4132 }, { "epoch": 1.3392741412832145, "grad_norm": 0.4844948649406433, "learning_rate": 6.101638837385997e-06, "loss": 0.1129, "step": 4133 }, { "epoch": 1.339598185353208, "grad_norm": 0.4562806487083435, "learning_rate": 6.099932539035335e-06, "loss": 0.1071, "step": 4134 }, { "epoch": 1.3399222294232016, "grad_norm": 0.44041526317596436, "learning_rate": 6.098226106063577e-06, "loss": 0.1031, "step": 4135 }, { "epoch": 1.340246273493195, "grad_norm": 0.4702534079551697, "learning_rate": 6.0965195386795774e-06, "loss": 0.1141, "step": 4136 }, { "epoch": 1.3405703175631887, "grad_norm": 0.4721149802207947, "learning_rate": 6.0948128370922e-06, "loss": 0.1113, "step": 4137 }, { "epoch": 1.340894361633182, "grad_norm": 0.46534913778305054, "learning_rate": 6.093106001510329e-06, "loss": 0.1143, "step": 4138 }, { "epoch": 1.3412184057031755, "grad_norm": 0.49070435762405396, "learning_rate": 6.091399032142869e-06, "loss": 0.1139, "step": 4139 }, { "epoch": 1.3415424497731692, "grad_norm": 0.4650188684463501, "learning_rate": 6.08969192919873e-06, "loss": 0.1108, "step": 4140 }, { "epoch": 1.3418664938431626, "grad_norm": 0.42567306756973267, "learning_rate": 6.087984692886848e-06, "loss": 0.1031, "step": 4141 }, { "epoch": 1.3421905379131562, "grad_norm": 0.4322057068347931, "learning_rate": 6.086277323416172e-06, "loss": 0.101, "step": 4142 }, { "epoch": 1.3425145819831497, "grad_norm": 0.4325510263442993, "learning_rate": 6.084569820995668e-06, "loss": 0.1026, "step": 4143 }, { "epoch": 1.3428386260531433, "grad_norm": 0.46976298093795776, "learning_rate": 6.0828621858343175e-06, "loss": 0.1216, "step": 4144 }, { "epoch": 1.3431626701231367, "grad_norm": 0.4701208472251892, "learning_rate": 6.08115441814112e-06, "loss": 0.1137, "step": 4145 }, { "epoch": 1.3434867141931304, "grad_norm": 0.45568281412124634, "learning_rate": 6.079446518125086e-06, "loss": 0.1119, "step": 4146 }, { "epoch": 1.3438107582631238, "grad_norm": 0.4867906868457794, "learning_rate": 6.077738485995249e-06, "loss": 0.1181, "step": 4147 }, { "epoch": 1.3441348023331172, "grad_norm": 0.4914509654045105, "learning_rate": 6.076030321960654e-06, "loss": 0.1129, "step": 4148 }, { "epoch": 1.3444588464031109, "grad_norm": 0.4761381149291992, "learning_rate": 6.074322026230365e-06, "loss": 0.1193, "step": 4149 }, { "epoch": 1.3447828904731043, "grad_norm": 0.4466744363307953, "learning_rate": 6.072613599013459e-06, "loss": 0.1099, "step": 4150 }, { "epoch": 1.345106934543098, "grad_norm": 0.4828190207481384, "learning_rate": 6.070905040519034e-06, "loss": 0.1176, "step": 4151 }, { "epoch": 1.3454309786130914, "grad_norm": 0.4406268298625946, "learning_rate": 6.069196350956198e-06, "loss": 0.1056, "step": 4152 }, { "epoch": 1.3457550226830848, "grad_norm": 0.4638471305370331, "learning_rate": 6.06748753053408e-06, "loss": 0.1106, "step": 4153 }, { "epoch": 1.3460790667530784, "grad_norm": 0.4701295495033264, "learning_rate": 6.065778579461821e-06, "loss": 0.1175, "step": 4154 }, { "epoch": 1.346403110823072, "grad_norm": 0.4829196035861969, "learning_rate": 6.064069497948581e-06, "loss": 0.1202, "step": 4155 }, { "epoch": 1.3467271548930655, "grad_norm": 0.4684045910835266, "learning_rate": 6.062360286203538e-06, "loss": 0.1084, "step": 4156 }, { "epoch": 1.347051198963059, "grad_norm": 0.5000907182693481, "learning_rate": 6.06065094443588e-06, "loss": 0.1151, "step": 4157 }, { "epoch": 1.3473752430330526, "grad_norm": 0.4776161015033722, "learning_rate": 6.058941472854813e-06, "loss": 0.117, "step": 4158 }, { "epoch": 1.347699287103046, "grad_norm": 0.44210824370384216, "learning_rate": 6.057231871669562e-06, "loss": 0.1053, "step": 4159 }, { "epoch": 1.3480233311730396, "grad_norm": 0.47822654247283936, "learning_rate": 6.055522141089364e-06, "loss": 0.1114, "step": 4160 }, { "epoch": 1.348347375243033, "grad_norm": 0.4372398853302002, "learning_rate": 6.053812281323474e-06, "loss": 0.1023, "step": 4161 }, { "epoch": 1.3486714193130265, "grad_norm": 0.45535725355148315, "learning_rate": 6.052102292581162e-06, "loss": 0.1065, "step": 4162 }, { "epoch": 1.3489954633830201, "grad_norm": 0.4853324592113495, "learning_rate": 6.050392175071716e-06, "loss": 0.1165, "step": 4163 }, { "epoch": 1.3493195074530135, "grad_norm": 0.4817340672016144, "learning_rate": 6.048681929004436e-06, "loss": 0.1182, "step": 4164 }, { "epoch": 1.3496435515230072, "grad_norm": 0.43401870131492615, "learning_rate": 6.0469715545886394e-06, "loss": 0.1052, "step": 4165 }, { "epoch": 1.3499675955930006, "grad_norm": 0.43376854062080383, "learning_rate": 6.0452610520336595e-06, "loss": 0.0973, "step": 4166 }, { "epoch": 1.350291639662994, "grad_norm": 0.4899052083492279, "learning_rate": 6.043550421548847e-06, "loss": 0.1259, "step": 4167 }, { "epoch": 1.3506156837329877, "grad_norm": 0.4612118601799011, "learning_rate": 6.041839663343565e-06, "loss": 0.1069, "step": 4168 }, { "epoch": 1.3509397278029813, "grad_norm": 0.4711647033691406, "learning_rate": 6.0401287776271945e-06, "loss": 0.116, "step": 4169 }, { "epoch": 1.3512637718729748, "grad_norm": 0.5112835764884949, "learning_rate": 6.03841776460913e-06, "loss": 0.124, "step": 4170 }, { "epoch": 1.3515878159429682, "grad_norm": 0.42946645617485046, "learning_rate": 6.0367066244987834e-06, "loss": 0.099, "step": 4171 }, { "epoch": 1.3519118600129618, "grad_norm": 0.49457547068595886, "learning_rate": 6.034995357505582e-06, "loss": 0.1281, "step": 4172 }, { "epoch": 1.3522359040829552, "grad_norm": 0.49168887734413147, "learning_rate": 6.03328396383897e-06, "loss": 0.1239, "step": 4173 }, { "epoch": 1.3525599481529489, "grad_norm": 0.5071669816970825, "learning_rate": 6.031572443708401e-06, "loss": 0.1101, "step": 4174 }, { "epoch": 1.3528839922229423, "grad_norm": 0.49065500497817993, "learning_rate": 6.0298607973233545e-06, "loss": 0.1242, "step": 4175 }, { "epoch": 1.3532080362929357, "grad_norm": 0.5139132738113403, "learning_rate": 6.028149024893314e-06, "loss": 0.1134, "step": 4176 }, { "epoch": 1.3535320803629294, "grad_norm": 0.4649846851825714, "learning_rate": 6.026437126627787e-06, "loss": 0.1193, "step": 4177 }, { "epoch": 1.3538561244329228, "grad_norm": 0.46105092763900757, "learning_rate": 6.024725102736293e-06, "loss": 0.1113, "step": 4178 }, { "epoch": 1.3541801685029164, "grad_norm": 0.4748861491680145, "learning_rate": 6.023012953428365e-06, "loss": 0.1117, "step": 4179 }, { "epoch": 1.3545042125729099, "grad_norm": 0.4985915720462799, "learning_rate": 6.021300678913555e-06, "loss": 0.1227, "step": 4180 }, { "epoch": 1.3548282566429035, "grad_norm": 0.463781476020813, "learning_rate": 6.019588279401431e-06, "loss": 0.1107, "step": 4181 }, { "epoch": 1.355152300712897, "grad_norm": 0.43629616498947144, "learning_rate": 6.01787575510157e-06, "loss": 0.1096, "step": 4182 }, { "epoch": 1.3554763447828906, "grad_norm": 0.5077922940254211, "learning_rate": 6.016163106223572e-06, "loss": 0.122, "step": 4183 }, { "epoch": 1.355800388852884, "grad_norm": 0.4964093565940857, "learning_rate": 6.0144503329770445e-06, "loss": 0.1262, "step": 4184 }, { "epoch": 1.3561244329228774, "grad_norm": 0.45850878953933716, "learning_rate": 6.012737435571618e-06, "loss": 0.111, "step": 4185 }, { "epoch": 1.356448476992871, "grad_norm": 0.44368118047714233, "learning_rate": 6.011024414216934e-06, "loss": 0.1042, "step": 4186 }, { "epoch": 1.3567725210628645, "grad_norm": 0.44517815113067627, "learning_rate": 6.00931126912265e-06, "loss": 0.1011, "step": 4187 }, { "epoch": 1.3570965651328581, "grad_norm": 0.5106947422027588, "learning_rate": 6.007598000498436e-06, "loss": 0.1249, "step": 4188 }, { "epoch": 1.3574206092028516, "grad_norm": 0.5051454305648804, "learning_rate": 6.005884608553982e-06, "loss": 0.1248, "step": 4189 }, { "epoch": 1.357744653272845, "grad_norm": 0.4792226552963257, "learning_rate": 6.00417109349899e-06, "loss": 0.1113, "step": 4190 }, { "epoch": 1.3580686973428386, "grad_norm": 0.4766109585762024, "learning_rate": 6.002457455543176e-06, "loss": 0.1121, "step": 4191 }, { "epoch": 1.3583927414128323, "grad_norm": 0.4882993996143341, "learning_rate": 6.000743694896274e-06, "loss": 0.1245, "step": 4192 }, { "epoch": 1.3587167854828257, "grad_norm": 0.44108298420906067, "learning_rate": 5.999029811768031e-06, "loss": 0.1008, "step": 4193 }, { "epoch": 1.3590408295528191, "grad_norm": 0.4641225039958954, "learning_rate": 5.997315806368214e-06, "loss": 0.1079, "step": 4194 }, { "epoch": 1.3593648736228128, "grad_norm": 0.47970643639564514, "learning_rate": 5.995601678906593e-06, "loss": 0.1135, "step": 4195 }, { "epoch": 1.3596889176928062, "grad_norm": 0.48470956087112427, "learning_rate": 5.993887429592966e-06, "loss": 0.1158, "step": 4196 }, { "epoch": 1.3600129617627998, "grad_norm": 0.4878312647342682, "learning_rate": 5.992173058637139e-06, "loss": 0.1164, "step": 4197 }, { "epoch": 1.3603370058327933, "grad_norm": 0.4864386022090912, "learning_rate": 5.990458566248936e-06, "loss": 0.1118, "step": 4198 }, { "epoch": 1.3606610499027867, "grad_norm": 0.4517388939857483, "learning_rate": 5.988743952638192e-06, "loss": 0.1105, "step": 4199 }, { "epoch": 1.3609850939727803, "grad_norm": 0.46198770403862, "learning_rate": 5.987029218014762e-06, "loss": 0.1156, "step": 4200 }, { "epoch": 1.3613091380427738, "grad_norm": 0.5112099051475525, "learning_rate": 5.985314362588508e-06, "loss": 0.1263, "step": 4201 }, { "epoch": 1.3616331821127674, "grad_norm": 0.44200339913368225, "learning_rate": 5.9835993865693144e-06, "loss": 0.1051, "step": 4202 }, { "epoch": 1.3619572261827608, "grad_norm": 0.45443880558013916, "learning_rate": 5.98188429016708e-06, "loss": 0.1037, "step": 4203 }, { "epoch": 1.3622812702527543, "grad_norm": 0.4965524673461914, "learning_rate": 5.980169073591712e-06, "loss": 0.1255, "step": 4204 }, { "epoch": 1.362605314322748, "grad_norm": 0.42498093843460083, "learning_rate": 5.978453737053138e-06, "loss": 0.095, "step": 4205 }, { "epoch": 1.3629293583927415, "grad_norm": 0.4135143458843231, "learning_rate": 5.9767382807613e-06, "loss": 0.0998, "step": 4206 }, { "epoch": 1.363253402462735, "grad_norm": 0.4564655125141144, "learning_rate": 5.975022704926152e-06, "loss": 0.1113, "step": 4207 }, { "epoch": 1.3635774465327284, "grad_norm": 0.4875675439834595, "learning_rate": 5.973307009757663e-06, "loss": 0.1176, "step": 4208 }, { "epoch": 1.363901490602722, "grad_norm": 0.43339845538139343, "learning_rate": 5.971591195465819e-06, "loss": 0.1048, "step": 4209 }, { "epoch": 1.3642255346727155, "grad_norm": 0.4639114439487457, "learning_rate": 5.969875262260619e-06, "loss": 0.113, "step": 4210 }, { "epoch": 1.364549578742709, "grad_norm": 0.4624967873096466, "learning_rate": 5.968159210352076e-06, "loss": 0.1068, "step": 4211 }, { "epoch": 1.3648736228127025, "grad_norm": 0.4855850636959076, "learning_rate": 5.966443039950217e-06, "loss": 0.1197, "step": 4212 }, { "epoch": 1.365197666882696, "grad_norm": 0.46072232723236084, "learning_rate": 5.9647267512650866e-06, "loss": 0.1115, "step": 4213 }, { "epoch": 1.3655217109526896, "grad_norm": 0.4450376629829407, "learning_rate": 5.9630103445067414e-06, "loss": 0.1014, "step": 4214 }, { "epoch": 1.365845755022683, "grad_norm": 0.476151704788208, "learning_rate": 5.961293819885251e-06, "loss": 0.1167, "step": 4215 }, { "epoch": 1.3661697990926767, "grad_norm": 0.4346964955329895, "learning_rate": 5.959577177610703e-06, "loss": 0.0987, "step": 4216 }, { "epoch": 1.36649384316267, "grad_norm": 0.4830533564090729, "learning_rate": 5.9578604178932e-06, "loss": 0.1138, "step": 4217 }, { "epoch": 1.3668178872326635, "grad_norm": 0.4955235421657562, "learning_rate": 5.956143540942854e-06, "loss": 0.1223, "step": 4218 }, { "epoch": 1.3671419313026572, "grad_norm": 0.5037327408790588, "learning_rate": 5.954426546969795e-06, "loss": 0.1212, "step": 4219 }, { "epoch": 1.3674659753726508, "grad_norm": 0.4872628450393677, "learning_rate": 5.952709436184165e-06, "loss": 0.1212, "step": 4220 }, { "epoch": 1.3677900194426442, "grad_norm": 0.485176146030426, "learning_rate": 5.9509922087961245e-06, "loss": 0.1156, "step": 4221 }, { "epoch": 1.3681140635126376, "grad_norm": 0.4762813150882721, "learning_rate": 5.949274865015843e-06, "loss": 0.1181, "step": 4222 }, { "epoch": 1.3684381075826313, "grad_norm": 0.4957357943058014, "learning_rate": 5.947557405053508e-06, "loss": 0.1157, "step": 4223 }, { "epoch": 1.3687621516526247, "grad_norm": 0.4911295771598816, "learning_rate": 5.94583982911932e-06, "loss": 0.1254, "step": 4224 }, { "epoch": 1.3690861957226184, "grad_norm": 0.4450444281101227, "learning_rate": 5.9441221374234925e-06, "loss": 0.1111, "step": 4225 }, { "epoch": 1.3694102397926118, "grad_norm": 0.4533950090408325, "learning_rate": 5.942404330176256e-06, "loss": 0.1036, "step": 4226 }, { "epoch": 1.3697342838626052, "grad_norm": 0.47625434398651123, "learning_rate": 5.940686407587851e-06, "loss": 0.1149, "step": 4227 }, { "epoch": 1.3700583279325989, "grad_norm": 0.4599977433681488, "learning_rate": 5.9389683698685376e-06, "loss": 0.1104, "step": 4228 }, { "epoch": 1.3703823720025923, "grad_norm": 0.4753097593784332, "learning_rate": 5.9372502172285854e-06, "loss": 0.1192, "step": 4229 }, { "epoch": 1.370706416072586, "grad_norm": 0.48577627539634705, "learning_rate": 5.935531949878281e-06, "loss": 0.1172, "step": 4230 }, { "epoch": 1.3710304601425793, "grad_norm": 0.44800129532814026, "learning_rate": 5.933813568027921e-06, "loss": 0.1073, "step": 4231 }, { "epoch": 1.371354504212573, "grad_norm": 0.4549189805984497, "learning_rate": 5.932095071887823e-06, "loss": 0.1082, "step": 4232 }, { "epoch": 1.3716785482825664, "grad_norm": 0.4787933826446533, "learning_rate": 5.930376461668308e-06, "loss": 0.1216, "step": 4233 }, { "epoch": 1.37200259235256, "grad_norm": 0.4628862738609314, "learning_rate": 5.928657737579723e-06, "loss": 0.1084, "step": 4234 }, { "epoch": 1.3723266364225535, "grad_norm": 0.47287628054618835, "learning_rate": 5.92693889983242e-06, "loss": 0.1191, "step": 4235 }, { "epoch": 1.372650680492547, "grad_norm": 0.5101609230041504, "learning_rate": 5.92521994863677e-06, "loss": 0.1291, "step": 4236 }, { "epoch": 1.3729747245625405, "grad_norm": 0.47439146041870117, "learning_rate": 5.923500884203154e-06, "loss": 0.1114, "step": 4237 }, { "epoch": 1.373298768632534, "grad_norm": 0.4542114734649658, "learning_rate": 5.9217817067419705e-06, "loss": 0.1052, "step": 4238 }, { "epoch": 1.3736228127025276, "grad_norm": 0.43908876180648804, "learning_rate": 5.920062416463629e-06, "loss": 0.1047, "step": 4239 }, { "epoch": 1.373946856772521, "grad_norm": 0.4801899790763855, "learning_rate": 5.9183430135785555e-06, "loss": 0.115, "step": 4240 }, { "epoch": 1.3742709008425145, "grad_norm": 0.4846722185611725, "learning_rate": 5.916623498297188e-06, "loss": 0.1144, "step": 4241 }, { "epoch": 1.374594944912508, "grad_norm": 0.4870109260082245, "learning_rate": 5.914903870829977e-06, "loss": 0.1134, "step": 4242 }, { "epoch": 1.3749189889825018, "grad_norm": 0.4847196936607361, "learning_rate": 5.913184131387389e-06, "loss": 0.1197, "step": 4243 }, { "epoch": 1.3752430330524952, "grad_norm": 0.4836478531360626, "learning_rate": 5.911464280179905e-06, "loss": 0.1128, "step": 4244 }, { "epoch": 1.3755670771224886, "grad_norm": 0.4619148075580597, "learning_rate": 5.909744317418015e-06, "loss": 0.1162, "step": 4245 }, { "epoch": 1.3758911211924822, "grad_norm": 0.44892606139183044, "learning_rate": 5.908024243312228e-06, "loss": 0.1017, "step": 4246 }, { "epoch": 1.3762151652624757, "grad_norm": 0.44396424293518066, "learning_rate": 5.906304058073063e-06, "loss": 0.0963, "step": 4247 }, { "epoch": 1.3765392093324693, "grad_norm": 0.4212004244327545, "learning_rate": 5.904583761911058e-06, "loss": 0.1004, "step": 4248 }, { "epoch": 1.3768632534024627, "grad_norm": 0.4835984706878662, "learning_rate": 5.902863355036755e-06, "loss": 0.1171, "step": 4249 }, { "epoch": 1.3771872974724562, "grad_norm": 0.4739507734775543, "learning_rate": 5.901142837660718e-06, "loss": 0.1204, "step": 4250 }, { "epoch": 1.3775113415424498, "grad_norm": 0.44449394941329956, "learning_rate": 5.899422209993522e-06, "loss": 0.1051, "step": 4251 }, { "epoch": 1.3778353856124432, "grad_norm": 0.47911691665649414, "learning_rate": 5.897701472245756e-06, "loss": 0.1217, "step": 4252 }, { "epoch": 1.3781594296824369, "grad_norm": 0.4616232216358185, "learning_rate": 5.895980624628018e-06, "loss": 0.1148, "step": 4253 }, { "epoch": 1.3784834737524303, "grad_norm": 0.49407100677490234, "learning_rate": 5.8942596673509266e-06, "loss": 0.1327, "step": 4254 }, { "epoch": 1.3788075178224237, "grad_norm": 0.4562760889530182, "learning_rate": 5.892538600625109e-06, "loss": 0.1092, "step": 4255 }, { "epoch": 1.3791315618924174, "grad_norm": 0.4495685398578644, "learning_rate": 5.890817424661206e-06, "loss": 0.1072, "step": 4256 }, { "epoch": 1.379455605962411, "grad_norm": 0.4521036446094513, "learning_rate": 5.889096139669874e-06, "loss": 0.1142, "step": 4257 }, { "epoch": 1.3797796500324044, "grad_norm": 0.44457265734672546, "learning_rate": 5.887374745861782e-06, "loss": 0.1162, "step": 4258 }, { "epoch": 1.3801036941023979, "grad_norm": 0.47161737084388733, "learning_rate": 5.885653243447612e-06, "loss": 0.1086, "step": 4259 }, { "epoch": 1.3804277381723915, "grad_norm": 0.47300124168395996, "learning_rate": 5.883931632638059e-06, "loss": 0.1149, "step": 4260 }, { "epoch": 1.380751782242385, "grad_norm": 0.45929595828056335, "learning_rate": 5.882209913643831e-06, "loss": 0.113, "step": 4261 }, { "epoch": 1.3810758263123786, "grad_norm": 0.46925094723701477, "learning_rate": 5.8804880866756494e-06, "loss": 0.1085, "step": 4262 }, { "epoch": 1.381399870382372, "grad_norm": 0.45652511715888977, "learning_rate": 5.87876615194425e-06, "loss": 0.112, "step": 4263 }, { "epoch": 1.3817239144523654, "grad_norm": 0.44846397638320923, "learning_rate": 5.877044109660381e-06, "loss": 0.1075, "step": 4264 }, { "epoch": 1.382047958522359, "grad_norm": 0.4708314538002014, "learning_rate": 5.875321960034804e-06, "loss": 0.1127, "step": 4265 }, { "epoch": 1.3823720025923525, "grad_norm": 0.4565647840499878, "learning_rate": 5.873599703278292e-06, "loss": 0.1057, "step": 4266 }, { "epoch": 1.3826960466623461, "grad_norm": 0.4954982101917267, "learning_rate": 5.8718773396016345e-06, "loss": 0.1198, "step": 4267 }, { "epoch": 1.3830200907323396, "grad_norm": 0.47357866168022156, "learning_rate": 5.87015486921563e-06, "loss": 0.1092, "step": 4268 }, { "epoch": 1.383344134802333, "grad_norm": 0.46112143993377686, "learning_rate": 5.8684322923310936e-06, "loss": 0.105, "step": 4269 }, { "epoch": 1.3836681788723266, "grad_norm": 0.4896453320980072, "learning_rate": 5.8667096091588506e-06, "loss": 0.1144, "step": 4270 }, { "epoch": 1.3839922229423203, "grad_norm": 0.4607629179954529, "learning_rate": 5.8649868199097425e-06, "loss": 0.1092, "step": 4271 }, { "epoch": 1.3843162670123137, "grad_norm": 0.44088199734687805, "learning_rate": 5.863263924794622e-06, "loss": 0.1042, "step": 4272 }, { "epoch": 1.3846403110823071, "grad_norm": 0.5327633619308472, "learning_rate": 5.861540924024355e-06, "loss": 0.1257, "step": 4273 }, { "epoch": 1.3849643551523008, "grad_norm": 0.46473363041877747, "learning_rate": 5.8598178178098185e-06, "loss": 0.1123, "step": 4274 }, { "epoch": 1.3852883992222942, "grad_norm": 0.4873182475566864, "learning_rate": 5.858094606361904e-06, "loss": 0.1163, "step": 4275 }, { "epoch": 1.3856124432922878, "grad_norm": 0.4518771469593048, "learning_rate": 5.856371289891517e-06, "loss": 0.1086, "step": 4276 }, { "epoch": 1.3859364873622813, "grad_norm": 0.47316327691078186, "learning_rate": 5.854647868609574e-06, "loss": 0.1123, "step": 4277 }, { "epoch": 1.3862605314322747, "grad_norm": 0.5178737044334412, "learning_rate": 5.852924342727006e-06, "loss": 0.1369, "step": 4278 }, { "epoch": 1.3865845755022683, "grad_norm": 0.43328502774238586, "learning_rate": 5.851200712454757e-06, "loss": 0.1043, "step": 4279 }, { "epoch": 1.3869086195722617, "grad_norm": 0.46997931599617004, "learning_rate": 5.849476978003778e-06, "loss": 0.1149, "step": 4280 }, { "epoch": 1.3872326636422554, "grad_norm": 0.45517292618751526, "learning_rate": 5.847753139585042e-06, "loss": 0.1089, "step": 4281 }, { "epoch": 1.3875567077122488, "grad_norm": 0.49720102548599243, "learning_rate": 5.846029197409528e-06, "loss": 0.1292, "step": 4282 }, { "epoch": 1.3878807517822425, "grad_norm": 0.46744126081466675, "learning_rate": 5.844305151688231e-06, "loss": 0.117, "step": 4283 }, { "epoch": 1.3882047958522359, "grad_norm": 0.46075981855392456, "learning_rate": 5.842581002632157e-06, "loss": 0.117, "step": 4284 }, { "epoch": 1.3885288399222295, "grad_norm": 0.43533214926719666, "learning_rate": 5.840856750452326e-06, "loss": 0.1102, "step": 4285 }, { "epoch": 1.388852883992223, "grad_norm": 0.41412171721458435, "learning_rate": 5.8391323953597675e-06, "loss": 0.0969, "step": 4286 }, { "epoch": 1.3891769280622164, "grad_norm": 0.43589508533477783, "learning_rate": 5.837407937565528e-06, "loss": 0.1016, "step": 4287 }, { "epoch": 1.38950097213221, "grad_norm": 0.45945459604263306, "learning_rate": 5.8356833772806636e-06, "loss": 0.1125, "step": 4288 }, { "epoch": 1.3898250162022034, "grad_norm": 0.4774591326713562, "learning_rate": 5.833958714716242e-06, "loss": 0.1183, "step": 4289 }, { "epoch": 1.390149060272197, "grad_norm": 0.4431980550289154, "learning_rate": 5.832233950083349e-06, "loss": 0.1108, "step": 4290 }, { "epoch": 1.3904731043421905, "grad_norm": 0.4475851058959961, "learning_rate": 5.830509083593078e-06, "loss": 0.1157, "step": 4291 }, { "epoch": 1.390797148412184, "grad_norm": 0.4317694306373596, "learning_rate": 5.828784115456534e-06, "loss": 0.1031, "step": 4292 }, { "epoch": 1.3911211924821776, "grad_norm": 0.4566093981266022, "learning_rate": 5.827059045884836e-06, "loss": 0.1175, "step": 4293 }, { "epoch": 1.3914452365521712, "grad_norm": 0.45999404788017273, "learning_rate": 5.825333875089119e-06, "loss": 0.1115, "step": 4294 }, { "epoch": 1.3917692806221647, "grad_norm": 0.4236088693141937, "learning_rate": 5.823608603280526e-06, "loss": 0.1107, "step": 4295 }, { "epoch": 1.392093324692158, "grad_norm": 0.4458802342414856, "learning_rate": 5.821883230670212e-06, "loss": 0.1157, "step": 4296 }, { "epoch": 1.3924173687621517, "grad_norm": 0.4616601765155792, "learning_rate": 5.820157757469349e-06, "loss": 0.1205, "step": 4297 }, { "epoch": 1.3927414128321451, "grad_norm": 0.4825879633426666, "learning_rate": 5.818432183889113e-06, "loss": 0.1148, "step": 4298 }, { "epoch": 1.3930654569021388, "grad_norm": 0.4698737859725952, "learning_rate": 5.816706510140703e-06, "loss": 0.1155, "step": 4299 }, { "epoch": 1.3933895009721322, "grad_norm": 0.4210644066333771, "learning_rate": 5.814980736435321e-06, "loss": 0.1053, "step": 4300 }, { "epoch": 1.3937135450421256, "grad_norm": 0.4978773891925812, "learning_rate": 5.813254862984188e-06, "loss": 0.1183, "step": 4301 }, { "epoch": 1.3940375891121193, "grad_norm": 0.4545080363750458, "learning_rate": 5.811528889998531e-06, "loss": 0.1083, "step": 4302 }, { "epoch": 1.3943616331821127, "grad_norm": 0.4733128249645233, "learning_rate": 5.809802817689596e-06, "loss": 0.1115, "step": 4303 }, { "epoch": 1.3946856772521063, "grad_norm": 0.45566970109939575, "learning_rate": 5.8080766462686345e-06, "loss": 0.11, "step": 4304 }, { "epoch": 1.3950097213220998, "grad_norm": 0.4669945538043976, "learning_rate": 5.806350375946914e-06, "loss": 0.1083, "step": 4305 }, { "epoch": 1.3953337653920932, "grad_norm": 0.46048641204833984, "learning_rate": 5.804624006935715e-06, "loss": 0.1123, "step": 4306 }, { "epoch": 1.3956578094620868, "grad_norm": 0.4888690412044525, "learning_rate": 5.802897539446326e-06, "loss": 0.1292, "step": 4307 }, { "epoch": 1.3959818535320805, "grad_norm": 0.44061604142189026, "learning_rate": 5.801170973690052e-06, "loss": 0.102, "step": 4308 }, { "epoch": 1.396305897602074, "grad_norm": 0.5046406388282776, "learning_rate": 5.799444309878205e-06, "loss": 0.1234, "step": 4309 }, { "epoch": 1.3966299416720673, "grad_norm": 0.4761616885662079, "learning_rate": 5.797717548222115e-06, "loss": 0.1188, "step": 4310 }, { "epoch": 1.396953985742061, "grad_norm": 0.4426950216293335, "learning_rate": 5.795990688933117e-06, "loss": 0.0986, "step": 4311 }, { "epoch": 1.3972780298120544, "grad_norm": 0.42711836099624634, "learning_rate": 5.794263732222567e-06, "loss": 0.0988, "step": 4312 }, { "epoch": 1.397602073882048, "grad_norm": 0.46538108587265015, "learning_rate": 5.792536678301824e-06, "loss": 0.1112, "step": 4313 }, { "epoch": 1.3979261179520415, "grad_norm": 0.4521970748901367, "learning_rate": 5.790809527382264e-06, "loss": 0.1084, "step": 4314 }, { "epoch": 1.398250162022035, "grad_norm": 0.48829540610313416, "learning_rate": 5.789082279675276e-06, "loss": 0.1184, "step": 4315 }, { "epoch": 1.3985742060920285, "grad_norm": 0.4372498691082001, "learning_rate": 5.787354935392253e-06, "loss": 0.1032, "step": 4316 }, { "epoch": 1.398898250162022, "grad_norm": 0.4990762174129486, "learning_rate": 5.78562749474461e-06, "loss": 0.118, "step": 4317 }, { "epoch": 1.3992222942320156, "grad_norm": 0.4872603118419647, "learning_rate": 5.783899957943766e-06, "loss": 0.1246, "step": 4318 }, { "epoch": 1.399546338302009, "grad_norm": 0.4665011465549469, "learning_rate": 5.782172325201155e-06, "loss": 0.1106, "step": 4319 }, { "epoch": 1.3998703823720027, "grad_norm": 0.41253137588500977, "learning_rate": 5.780444596728224e-06, "loss": 0.0922, "step": 4320 }, { "epoch": 1.400194426441996, "grad_norm": 0.491841197013855, "learning_rate": 5.778716772736431e-06, "loss": 0.1269, "step": 4321 }, { "epoch": 1.4005184705119897, "grad_norm": 0.4381025433540344, "learning_rate": 5.776988853437242e-06, "loss": 0.1131, "step": 4322 }, { "epoch": 1.4008425145819832, "grad_norm": 0.5174381732940674, "learning_rate": 5.775260839042139e-06, "loss": 0.1291, "step": 4323 }, { "epoch": 1.4011665586519766, "grad_norm": 0.46297118067741394, "learning_rate": 5.7735327297626154e-06, "loss": 0.1072, "step": 4324 }, { "epoch": 1.4014906027219702, "grad_norm": 0.4963762164115906, "learning_rate": 5.771804525810174e-06, "loss": 0.1232, "step": 4325 }, { "epoch": 1.4018146467919637, "grad_norm": 0.49227485060691833, "learning_rate": 5.77007622739633e-06, "loss": 0.1058, "step": 4326 }, { "epoch": 1.4021386908619573, "grad_norm": 0.4902411997318268, "learning_rate": 5.7683478347326115e-06, "loss": 0.1237, "step": 4327 }, { "epoch": 1.4024627349319507, "grad_norm": 0.49123913049697876, "learning_rate": 5.766619348030556e-06, "loss": 0.1098, "step": 4328 }, { "epoch": 1.4027867790019442, "grad_norm": 0.46614375710487366, "learning_rate": 5.7648907675017126e-06, "loss": 0.106, "step": 4329 }, { "epoch": 1.4031108230719378, "grad_norm": 0.4563212990760803, "learning_rate": 5.763162093357645e-06, "loss": 0.1072, "step": 4330 }, { "epoch": 1.4034348671419314, "grad_norm": 0.4761507511138916, "learning_rate": 5.7614333258099245e-06, "loss": 0.1175, "step": 4331 }, { "epoch": 1.4037589112119249, "grad_norm": 0.5162764191627502, "learning_rate": 5.7597044650701365e-06, "loss": 0.1259, "step": 4332 }, { "epoch": 1.4040829552819183, "grad_norm": 0.4567275643348694, "learning_rate": 5.757975511349877e-06, "loss": 0.1058, "step": 4333 }, { "epoch": 1.404406999351912, "grad_norm": 0.4625505805015564, "learning_rate": 5.7562464648607515e-06, "loss": 0.1028, "step": 4334 }, { "epoch": 1.4047310434219054, "grad_norm": 0.5034233331680298, "learning_rate": 5.7545173258143804e-06, "loss": 0.1161, "step": 4335 }, { "epoch": 1.405055087491899, "grad_norm": 0.4404418468475342, "learning_rate": 5.752788094422392e-06, "loss": 0.1109, "step": 4336 }, { "epoch": 1.4053791315618924, "grad_norm": 0.43947386741638184, "learning_rate": 5.75105877089643e-06, "loss": 0.1106, "step": 4337 }, { "epoch": 1.4057031756318858, "grad_norm": 0.45304930210113525, "learning_rate": 5.749329355448145e-06, "loss": 0.1055, "step": 4338 }, { "epoch": 1.4060272197018795, "grad_norm": 0.5092594027519226, "learning_rate": 5.7475998482892e-06, "loss": 0.1227, "step": 4339 }, { "epoch": 1.406351263771873, "grad_norm": 0.4667177200317383, "learning_rate": 5.745870249631273e-06, "loss": 0.1194, "step": 4340 }, { "epoch": 1.4066753078418666, "grad_norm": 0.42807644605636597, "learning_rate": 5.744140559686046e-06, "loss": 0.0996, "step": 4341 }, { "epoch": 1.40699935191186, "grad_norm": 0.49830734729766846, "learning_rate": 5.7424107786652175e-06, "loss": 0.124, "step": 4342 }, { "epoch": 1.4073233959818534, "grad_norm": 0.49827465415000916, "learning_rate": 5.7406809067804984e-06, "loss": 0.1226, "step": 4343 }, { "epoch": 1.407647440051847, "grad_norm": 0.4859582185745239, "learning_rate": 5.738950944243605e-06, "loss": 0.1247, "step": 4344 }, { "epoch": 1.4079714841218407, "grad_norm": 0.4473390281200409, "learning_rate": 5.737220891266271e-06, "loss": 0.1104, "step": 4345 }, { "epoch": 1.4082955281918341, "grad_norm": 0.4510458707809448, "learning_rate": 5.735490748060237e-06, "loss": 0.1062, "step": 4346 }, { "epoch": 1.4086195722618275, "grad_norm": 0.46265891194343567, "learning_rate": 5.733760514837255e-06, "loss": 0.1091, "step": 4347 }, { "epoch": 1.4089436163318212, "grad_norm": 0.46770209074020386, "learning_rate": 5.732030191809091e-06, "loss": 0.1184, "step": 4348 }, { "epoch": 1.4092676604018146, "grad_norm": 0.5292964577674866, "learning_rate": 5.730299779187516e-06, "loss": 0.1177, "step": 4349 }, { "epoch": 1.4095917044718083, "grad_norm": 0.45356395840644836, "learning_rate": 5.7285692771843185e-06, "loss": 0.1078, "step": 4350 }, { "epoch": 1.4099157485418017, "grad_norm": 0.48939910531044006, "learning_rate": 5.726838686011294e-06, "loss": 0.1131, "step": 4351 }, { "epoch": 1.410239792611795, "grad_norm": 0.4604235589504242, "learning_rate": 5.7251080058802525e-06, "loss": 0.1143, "step": 4352 }, { "epoch": 1.4105638366817888, "grad_norm": 0.48968636989593506, "learning_rate": 5.723377237003009e-06, "loss": 0.1204, "step": 4353 }, { "epoch": 1.4108878807517822, "grad_norm": 0.5055221915245056, "learning_rate": 5.721646379591394e-06, "loss": 0.1252, "step": 4354 }, { "epoch": 1.4112119248217758, "grad_norm": 0.47648561000823975, "learning_rate": 5.71991543385725e-06, "loss": 0.1163, "step": 4355 }, { "epoch": 1.4115359688917692, "grad_norm": 0.4697307348251343, "learning_rate": 5.718184400012425e-06, "loss": 0.1159, "step": 4356 }, { "epoch": 1.4118600129617627, "grad_norm": 0.4544907808303833, "learning_rate": 5.716453278268782e-06, "loss": 0.107, "step": 4357 }, { "epoch": 1.4121840570317563, "grad_norm": 0.4428936839103699, "learning_rate": 5.7147220688381955e-06, "loss": 0.106, "step": 4358 }, { "epoch": 1.41250810110175, "grad_norm": 0.4969765245914459, "learning_rate": 5.712990771932545e-06, "loss": 0.1279, "step": 4359 }, { "epoch": 1.4128321451717434, "grad_norm": 0.44887062907218933, "learning_rate": 5.7112593877637264e-06, "loss": 0.1104, "step": 4360 }, { "epoch": 1.4131561892417368, "grad_norm": 0.4952261447906494, "learning_rate": 5.7095279165436446e-06, "loss": 0.1241, "step": 4361 }, { "epoch": 1.4134802333117304, "grad_norm": 0.42035895586013794, "learning_rate": 5.707796358484214e-06, "loss": 0.0978, "step": 4362 }, { "epoch": 1.4138042773817239, "grad_norm": 0.4624359607696533, "learning_rate": 5.706064713797361e-06, "loss": 0.112, "step": 4363 }, { "epoch": 1.4141283214517175, "grad_norm": 0.44356340169906616, "learning_rate": 5.704332982695025e-06, "loss": 0.1032, "step": 4364 }, { "epoch": 1.414452365521711, "grad_norm": 0.4767822325229645, "learning_rate": 5.7026011653891466e-06, "loss": 0.1115, "step": 4365 }, { "epoch": 1.4147764095917044, "grad_norm": 0.42960530519485474, "learning_rate": 5.700869262091689e-06, "loss": 0.1027, "step": 4366 }, { "epoch": 1.415100453661698, "grad_norm": 0.4716300368309021, "learning_rate": 5.699137273014619e-06, "loss": 0.1124, "step": 4367 }, { "epoch": 1.4154244977316914, "grad_norm": 0.4724923074245453, "learning_rate": 5.697405198369914e-06, "loss": 0.1139, "step": 4368 }, { "epoch": 1.415748541801685, "grad_norm": 0.4250900149345398, "learning_rate": 5.695673038369565e-06, "loss": 0.1003, "step": 4369 }, { "epoch": 1.4160725858716785, "grad_norm": 0.45592403411865234, "learning_rate": 5.693940793225571e-06, "loss": 0.1135, "step": 4370 }, { "epoch": 1.4163966299416721, "grad_norm": 0.47380363941192627, "learning_rate": 5.692208463149941e-06, "loss": 0.1128, "step": 4371 }, { "epoch": 1.4167206740116656, "grad_norm": 0.4276685416698456, "learning_rate": 5.690476048354696e-06, "loss": 0.1023, "step": 4372 }, { "epoch": 1.4170447180816592, "grad_norm": 0.46587416529655457, "learning_rate": 5.688743549051867e-06, "loss": 0.1156, "step": 4373 }, { "epoch": 1.4173687621516526, "grad_norm": 0.4692516326904297, "learning_rate": 5.687010965453495e-06, "loss": 0.1156, "step": 4374 }, { "epoch": 1.417692806221646, "grad_norm": 0.48215970396995544, "learning_rate": 5.6852782977716326e-06, "loss": 0.121, "step": 4375 }, { "epoch": 1.4180168502916397, "grad_norm": 0.46034133434295654, "learning_rate": 5.683545546218341e-06, "loss": 0.1179, "step": 4376 }, { "epoch": 1.4183408943616331, "grad_norm": 0.5115047097206116, "learning_rate": 5.68181271100569e-06, "loss": 0.1221, "step": 4377 }, { "epoch": 1.4186649384316268, "grad_norm": 0.49292752146720886, "learning_rate": 5.680079792345764e-06, "loss": 0.1227, "step": 4378 }, { "epoch": 1.4189889825016202, "grad_norm": 0.4294084310531616, "learning_rate": 5.6783467904506554e-06, "loss": 0.1006, "step": 4379 }, { "epoch": 1.4193130265716136, "grad_norm": 0.4855675995349884, "learning_rate": 5.676613705532468e-06, "loss": 0.1139, "step": 4380 }, { "epoch": 1.4196370706416073, "grad_norm": 0.4840838313102722, "learning_rate": 5.674880537803311e-06, "loss": 0.1182, "step": 4381 }, { "epoch": 1.419961114711601, "grad_norm": 0.442710816860199, "learning_rate": 5.673147287475311e-06, "loss": 0.1043, "step": 4382 }, { "epoch": 1.4202851587815943, "grad_norm": 0.46457353234291077, "learning_rate": 5.671413954760599e-06, "loss": 0.1145, "step": 4383 }, { "epoch": 1.4206092028515878, "grad_norm": 0.47664782404899597, "learning_rate": 5.669680539871318e-06, "loss": 0.119, "step": 4384 }, { "epoch": 1.4209332469215814, "grad_norm": 0.4621475040912628, "learning_rate": 5.667947043019624e-06, "loss": 0.1155, "step": 4385 }, { "epoch": 1.4212572909915748, "grad_norm": 0.48262637853622437, "learning_rate": 5.666213464417676e-06, "loss": 0.1173, "step": 4386 }, { "epoch": 1.4215813350615685, "grad_norm": 0.46006882190704346, "learning_rate": 5.6644798042776495e-06, "loss": 0.1153, "step": 4387 }, { "epoch": 1.421905379131562, "grad_norm": 0.4948621094226837, "learning_rate": 5.662746062811729e-06, "loss": 0.1153, "step": 4388 }, { "epoch": 1.4222294232015553, "grad_norm": 0.5037163496017456, "learning_rate": 5.661012240232106e-06, "loss": 0.1208, "step": 4389 }, { "epoch": 1.422553467271549, "grad_norm": 0.484428346157074, "learning_rate": 5.659278336750983e-06, "loss": 0.1188, "step": 4390 }, { "epoch": 1.4228775113415424, "grad_norm": 0.46746310591697693, "learning_rate": 5.6575443525805754e-06, "loss": 0.1162, "step": 4391 }, { "epoch": 1.423201555411536, "grad_norm": 0.44541990756988525, "learning_rate": 5.655810287933103e-06, "loss": 0.1101, "step": 4392 }, { "epoch": 1.4235255994815295, "grad_norm": 0.4760904312133789, "learning_rate": 5.6540761430208e-06, "loss": 0.1185, "step": 4393 }, { "epoch": 1.4238496435515229, "grad_norm": 0.4710126221179962, "learning_rate": 5.652341918055912e-06, "loss": 0.114, "step": 4394 }, { "epoch": 1.4241736876215165, "grad_norm": 0.4768831133842468, "learning_rate": 5.650607613250685e-06, "loss": 0.11, "step": 4395 }, { "epoch": 1.4244977316915102, "grad_norm": 0.5086276531219482, "learning_rate": 5.648873228817385e-06, "loss": 0.1232, "step": 4396 }, { "epoch": 1.4248217757615036, "grad_norm": 0.4469287097454071, "learning_rate": 5.647138764968284e-06, "loss": 0.1076, "step": 4397 }, { "epoch": 1.425145819831497, "grad_norm": 0.478959321975708, "learning_rate": 5.645404221915662e-06, "loss": 0.1221, "step": 4398 }, { "epoch": 1.4254698639014907, "grad_norm": 0.4652095139026642, "learning_rate": 5.643669599871813e-06, "loss": 0.1128, "step": 4399 }, { "epoch": 1.425793907971484, "grad_norm": 0.485418438911438, "learning_rate": 5.641934899049035e-06, "loss": 0.1172, "step": 4400 }, { "epoch": 1.4261179520414777, "grad_norm": 0.4753665626049042, "learning_rate": 5.64020011965964e-06, "loss": 0.1138, "step": 4401 }, { "epoch": 1.4264419961114712, "grad_norm": 0.46920743584632874, "learning_rate": 5.63846526191595e-06, "loss": 0.1159, "step": 4402 }, { "epoch": 1.4267660401814646, "grad_norm": 0.5033137798309326, "learning_rate": 5.63673032603029e-06, "loss": 0.1249, "step": 4403 }, { "epoch": 1.4270900842514582, "grad_norm": 0.4642874300479889, "learning_rate": 5.6349953122150026e-06, "loss": 0.1109, "step": 4404 }, { "epoch": 1.4274141283214516, "grad_norm": 0.45898953080177307, "learning_rate": 5.633260220682436e-06, "loss": 0.1151, "step": 4405 }, { "epoch": 1.4277381723914453, "grad_norm": 0.44028547406196594, "learning_rate": 5.631525051644949e-06, "loss": 0.1085, "step": 4406 }, { "epoch": 1.4280622164614387, "grad_norm": 0.4695223271846771, "learning_rate": 5.629789805314912e-06, "loss": 0.114, "step": 4407 }, { "epoch": 1.4283862605314321, "grad_norm": 0.4158531725406647, "learning_rate": 5.628054481904696e-06, "loss": 0.0991, "step": 4408 }, { "epoch": 1.4287103046014258, "grad_norm": 0.5092782378196716, "learning_rate": 5.626319081626693e-06, "loss": 0.1201, "step": 4409 }, { "epoch": 1.4290343486714194, "grad_norm": 0.47505107522010803, "learning_rate": 5.624583604693297e-06, "loss": 0.1108, "step": 4410 }, { "epoch": 1.4293583927414129, "grad_norm": 0.45775267481803894, "learning_rate": 5.622848051316916e-06, "loss": 0.1118, "step": 4411 }, { "epoch": 1.4296824368114063, "grad_norm": 0.4317185580730438, "learning_rate": 5.621112421709961e-06, "loss": 0.0998, "step": 4412 }, { "epoch": 1.4300064808814, "grad_norm": 0.50062096118927, "learning_rate": 5.61937671608486e-06, "loss": 0.1268, "step": 4413 }, { "epoch": 1.4303305249513933, "grad_norm": 0.46965810656547546, "learning_rate": 5.617640934654044e-06, "loss": 0.1085, "step": 4414 }, { "epoch": 1.430654569021387, "grad_norm": 0.48559674620628357, "learning_rate": 5.6159050776299574e-06, "loss": 0.1201, "step": 4415 }, { "epoch": 1.4309786130913804, "grad_norm": 0.4981156885623932, "learning_rate": 5.614169145225051e-06, "loss": 0.1211, "step": 4416 }, { "epoch": 1.4313026571613738, "grad_norm": 0.47100457549095154, "learning_rate": 5.612433137651787e-06, "loss": 0.1025, "step": 4417 }, { "epoch": 1.4316267012313675, "grad_norm": 0.5128635168075562, "learning_rate": 5.6106970551226355e-06, "loss": 0.1257, "step": 4418 }, { "epoch": 1.431950745301361, "grad_norm": 0.4389077425003052, "learning_rate": 5.608960897850078e-06, "loss": 0.0965, "step": 4419 }, { "epoch": 1.4322747893713546, "grad_norm": 0.4553956687450409, "learning_rate": 5.6072246660466e-06, "loss": 0.1088, "step": 4420 }, { "epoch": 1.432598833441348, "grad_norm": 0.4483269155025482, "learning_rate": 5.605488359924702e-06, "loss": 0.105, "step": 4421 }, { "epoch": 1.4329228775113416, "grad_norm": 0.4663432538509369, "learning_rate": 5.603751979696892e-06, "loss": 0.1081, "step": 4422 }, { "epoch": 1.433246921581335, "grad_norm": 0.4651101529598236, "learning_rate": 5.602015525575683e-06, "loss": 0.1182, "step": 4423 }, { "epoch": 1.4335709656513287, "grad_norm": 0.47833356261253357, "learning_rate": 5.600278997773601e-06, "loss": 0.1066, "step": 4424 }, { "epoch": 1.4338950097213221, "grad_norm": 0.46498045325279236, "learning_rate": 5.5985423965031815e-06, "loss": 0.1114, "step": 4425 }, { "epoch": 1.4342190537913155, "grad_norm": 0.4368334412574768, "learning_rate": 5.596805721976966e-06, "loss": 0.1048, "step": 4426 }, { "epoch": 1.4345430978613092, "grad_norm": 0.46312999725341797, "learning_rate": 5.59506897440751e-06, "loss": 0.1022, "step": 4427 }, { "epoch": 1.4348671419313026, "grad_norm": 0.459268718957901, "learning_rate": 5.593332154007369e-06, "loss": 0.1083, "step": 4428 }, { "epoch": 1.4351911860012962, "grad_norm": 0.4764942228794098, "learning_rate": 5.591595260989118e-06, "loss": 0.1226, "step": 4429 }, { "epoch": 1.4355152300712897, "grad_norm": 0.44204190373420715, "learning_rate": 5.589858295565333e-06, "loss": 0.1053, "step": 4430 }, { "epoch": 1.435839274141283, "grad_norm": 0.5024438500404358, "learning_rate": 5.588121257948605e-06, "loss": 0.111, "step": 4431 }, { "epoch": 1.4361633182112767, "grad_norm": 0.4540453553199768, "learning_rate": 5.586384148351528e-06, "loss": 0.1003, "step": 4432 }, { "epoch": 1.4364873622812704, "grad_norm": 0.4889206290245056, "learning_rate": 5.584646966986708e-06, "loss": 0.1197, "step": 4433 }, { "epoch": 1.4368114063512638, "grad_norm": 0.46186313033103943, "learning_rate": 5.582909714066758e-06, "loss": 0.1103, "step": 4434 }, { "epoch": 1.4371354504212572, "grad_norm": 0.4445458948612213, "learning_rate": 5.581172389804302e-06, "loss": 0.1042, "step": 4435 }, { "epoch": 1.4374594944912509, "grad_norm": 0.433281272649765, "learning_rate": 5.579434994411972e-06, "loss": 0.11, "step": 4436 }, { "epoch": 1.4377835385612443, "grad_norm": 0.4515359699726105, "learning_rate": 5.577697528102409e-06, "loss": 0.1037, "step": 4437 }, { "epoch": 1.438107582631238, "grad_norm": 0.48822012543678284, "learning_rate": 5.57595999108826e-06, "loss": 0.1224, "step": 4438 }, { "epoch": 1.4384316267012314, "grad_norm": 0.442730575799942, "learning_rate": 5.574222383582184e-06, "loss": 0.1062, "step": 4439 }, { "epoch": 1.4387556707712248, "grad_norm": 0.4672057330608368, "learning_rate": 5.5724847057968466e-06, "loss": 0.1131, "step": 4440 }, { "epoch": 1.4390797148412184, "grad_norm": 0.46711841225624084, "learning_rate": 5.5707469579449235e-06, "loss": 0.1127, "step": 4441 }, { "epoch": 1.4394037589112119, "grad_norm": 0.4616106152534485, "learning_rate": 5.569009140239099e-06, "loss": 0.1012, "step": 4442 }, { "epoch": 1.4397278029812055, "grad_norm": 0.5364953279495239, "learning_rate": 5.567271252892063e-06, "loss": 0.1344, "step": 4443 }, { "epoch": 1.440051847051199, "grad_norm": 0.44760236144065857, "learning_rate": 5.565533296116519e-06, "loss": 0.11, "step": 4444 }, { "epoch": 1.4403758911211924, "grad_norm": 0.4851779043674469, "learning_rate": 5.563795270125173e-06, "loss": 0.1181, "step": 4445 }, { "epoch": 1.440699935191186, "grad_norm": 0.4202938973903656, "learning_rate": 5.562057175130744e-06, "loss": 0.091, "step": 4446 }, { "epoch": 1.4410239792611796, "grad_norm": 0.4636053442955017, "learning_rate": 5.560319011345958e-06, "loss": 0.1153, "step": 4447 }, { "epoch": 1.441348023331173, "grad_norm": 0.49078604578971863, "learning_rate": 5.558580778983549e-06, "loss": 0.1194, "step": 4448 }, { "epoch": 1.4416720674011665, "grad_norm": 0.466388463973999, "learning_rate": 5.556842478256261e-06, "loss": 0.1175, "step": 4449 }, { "epoch": 1.4419961114711601, "grad_norm": 0.4817126393318176, "learning_rate": 5.555104109376843e-06, "loss": 0.1111, "step": 4450 }, { "epoch": 1.4423201555411536, "grad_norm": 0.4500816762447357, "learning_rate": 5.553365672558057e-06, "loss": 0.1113, "step": 4451 }, { "epoch": 1.4426441996111472, "grad_norm": 0.46708181500434875, "learning_rate": 5.551627168012669e-06, "loss": 0.1191, "step": 4452 }, { "epoch": 1.4429682436811406, "grad_norm": 0.4912620782852173, "learning_rate": 5.549888595953455e-06, "loss": 0.1225, "step": 4453 }, { "epoch": 1.443292287751134, "grad_norm": 0.46872448921203613, "learning_rate": 5.548149956593203e-06, "loss": 0.1189, "step": 4454 }, { "epoch": 1.4436163318211277, "grad_norm": 0.4642205834388733, "learning_rate": 5.546411250144701e-06, "loss": 0.1085, "step": 4455 }, { "epoch": 1.4439403758911211, "grad_norm": 0.45991188287734985, "learning_rate": 5.544672476820751e-06, "loss": 0.1147, "step": 4456 }, { "epoch": 1.4442644199611148, "grad_norm": 0.4836006462574005, "learning_rate": 5.542933636834164e-06, "loss": 0.1153, "step": 4457 }, { "epoch": 1.4445884640311082, "grad_norm": 0.5125424861907959, "learning_rate": 5.541194730397755e-06, "loss": 0.1242, "step": 4458 }, { "epoch": 1.4449125081011016, "grad_norm": 0.4503360688686371, "learning_rate": 5.53945575772435e-06, "loss": 0.1066, "step": 4459 }, { "epoch": 1.4452365521710953, "grad_norm": 0.4397493004798889, "learning_rate": 5.537716719026784e-06, "loss": 0.1121, "step": 4460 }, { "epoch": 1.445560596241089, "grad_norm": 0.46807533502578735, "learning_rate": 5.535977614517896e-06, "loss": 0.1107, "step": 4461 }, { "epoch": 1.4458846403110823, "grad_norm": 0.4748874306678772, "learning_rate": 5.534238444410537e-06, "loss": 0.1214, "step": 4462 }, { "epoch": 1.4462086843810757, "grad_norm": 0.46195435523986816, "learning_rate": 5.532499208917563e-06, "loss": 0.113, "step": 4463 }, { "epoch": 1.4465327284510694, "grad_norm": 0.43683934211730957, "learning_rate": 5.530759908251842e-06, "loss": 0.1029, "step": 4464 }, { "epoch": 1.4468567725210628, "grad_norm": 0.4722435772418976, "learning_rate": 5.529020542626246e-06, "loss": 0.1173, "step": 4465 }, { "epoch": 1.4471808165910565, "grad_norm": 0.4351162016391754, "learning_rate": 5.527281112253657e-06, "loss": 0.1048, "step": 4466 }, { "epoch": 1.4475048606610499, "grad_norm": 0.45564013719558716, "learning_rate": 5.525541617346965e-06, "loss": 0.1147, "step": 4467 }, { "epoch": 1.4478289047310433, "grad_norm": 0.48730671405792236, "learning_rate": 5.523802058119067e-06, "loss": 0.1217, "step": 4468 }, { "epoch": 1.448152948801037, "grad_norm": 0.40554869174957275, "learning_rate": 5.522062434782867e-06, "loss": 0.0939, "step": 4469 }, { "epoch": 1.4484769928710304, "grad_norm": 0.45881539583206177, "learning_rate": 5.520322747551278e-06, "loss": 0.1095, "step": 4470 }, { "epoch": 1.448801036941024, "grad_norm": 0.4682929813861847, "learning_rate": 5.518582996637223e-06, "loss": 0.1145, "step": 4471 }, { "epoch": 1.4491250810110174, "grad_norm": 0.45971962809562683, "learning_rate": 5.516843182253628e-06, "loss": 0.1088, "step": 4472 }, { "epoch": 1.449449125081011, "grad_norm": 0.4667341113090515, "learning_rate": 5.515103304613434e-06, "loss": 0.1163, "step": 4473 }, { "epoch": 1.4497731691510045, "grad_norm": 0.440285325050354, "learning_rate": 5.5133633639295795e-06, "loss": 0.103, "step": 4474 }, { "epoch": 1.4500972132209982, "grad_norm": 0.4824848473072052, "learning_rate": 5.511623360415019e-06, "loss": 0.1233, "step": 4475 }, { "epoch": 1.4504212572909916, "grad_norm": 0.42888009548187256, "learning_rate": 5.509883294282714e-06, "loss": 0.1049, "step": 4476 }, { "epoch": 1.450745301360985, "grad_norm": 0.4593934714794159, "learning_rate": 5.508143165745628e-06, "loss": 0.1139, "step": 4477 }, { "epoch": 1.4510693454309787, "grad_norm": 0.49577581882476807, "learning_rate": 5.506402975016738e-06, "loss": 0.1189, "step": 4478 }, { "epoch": 1.451393389500972, "grad_norm": 0.49611929059028625, "learning_rate": 5.5046627223090265e-06, "loss": 0.1206, "step": 4479 }, { "epoch": 1.4517174335709657, "grad_norm": 0.4758347272872925, "learning_rate": 5.5029224078354844e-06, "loss": 0.1104, "step": 4480 }, { "epoch": 1.4520414776409591, "grad_norm": 0.48814454674720764, "learning_rate": 5.501182031809107e-06, "loss": 0.1205, "step": 4481 }, { "epoch": 1.4523655217109526, "grad_norm": 0.47150522470474243, "learning_rate": 5.4994415944429e-06, "loss": 0.1059, "step": 4482 }, { "epoch": 1.4526895657809462, "grad_norm": 0.47519728541374207, "learning_rate": 5.497701095949879e-06, "loss": 0.1186, "step": 4483 }, { "epoch": 1.4530136098509399, "grad_norm": 0.4623326361179352, "learning_rate": 5.49596053654306e-06, "loss": 0.1091, "step": 4484 }, { "epoch": 1.4533376539209333, "grad_norm": 0.4716527462005615, "learning_rate": 5.494219916435474e-06, "loss": 0.116, "step": 4485 }, { "epoch": 1.4536616979909267, "grad_norm": 0.46047505736351013, "learning_rate": 5.492479235840154e-06, "loss": 0.1041, "step": 4486 }, { "epoch": 1.4539857420609203, "grad_norm": 0.49836093187332153, "learning_rate": 5.490738494970144e-06, "loss": 0.1063, "step": 4487 }, { "epoch": 1.4543097861309138, "grad_norm": 0.43920251727104187, "learning_rate": 5.4889976940384915e-06, "loss": 0.0945, "step": 4488 }, { "epoch": 1.4546338302009074, "grad_norm": 0.43778035044670105, "learning_rate": 5.487256833258256e-06, "loss": 0.1046, "step": 4489 }, { "epoch": 1.4549578742709008, "grad_norm": 0.469485342502594, "learning_rate": 5.485515912842499e-06, "loss": 0.1098, "step": 4490 }, { "epoch": 1.4552819183408943, "grad_norm": 0.4534398317337036, "learning_rate": 5.4837749330042965e-06, "loss": 0.1093, "step": 4491 }, { "epoch": 1.455605962410888, "grad_norm": 0.47168147563934326, "learning_rate": 5.4820338939567265e-06, "loss": 0.1121, "step": 4492 }, { "epoch": 1.4559300064808813, "grad_norm": 0.47512897849082947, "learning_rate": 5.480292795912873e-06, "loss": 0.1136, "step": 4493 }, { "epoch": 1.456254050550875, "grad_norm": 0.4722960293292999, "learning_rate": 5.478551639085831e-06, "loss": 0.1065, "step": 4494 }, { "epoch": 1.4565780946208684, "grad_norm": 0.44116073846817017, "learning_rate": 5.4768104236887e-06, "loss": 0.1021, "step": 4495 }, { "epoch": 1.4569021386908618, "grad_norm": 0.457285612821579, "learning_rate": 5.47506914993459e-06, "loss": 0.1123, "step": 4496 }, { "epoch": 1.4572261827608555, "grad_norm": 0.47741320729255676, "learning_rate": 5.473327818036615e-06, "loss": 0.1175, "step": 4497 }, { "epoch": 1.4575502268308491, "grad_norm": 0.5050625801086426, "learning_rate": 5.471586428207899e-06, "loss": 0.133, "step": 4498 }, { "epoch": 1.4578742709008425, "grad_norm": 0.46975886821746826, "learning_rate": 5.469844980661567e-06, "loss": 0.1158, "step": 4499 }, { "epoch": 1.458198314970836, "grad_norm": 0.46380752325057983, "learning_rate": 5.468103475610758e-06, "loss": 0.1148, "step": 4500 }, { "epoch": 1.4585223590408296, "grad_norm": 0.4460957646369934, "learning_rate": 5.466361913268616e-06, "loss": 0.1074, "step": 4501 }, { "epoch": 1.458846403110823, "grad_norm": 0.49102339148521423, "learning_rate": 5.464620293848291e-06, "loss": 0.1167, "step": 4502 }, { "epoch": 1.4591704471808167, "grad_norm": 0.45689043402671814, "learning_rate": 5.462878617562939e-06, "loss": 0.1102, "step": 4503 }, { "epoch": 1.45949449125081, "grad_norm": 0.44406747817993164, "learning_rate": 5.461136884625727e-06, "loss": 0.104, "step": 4504 }, { "epoch": 1.4598185353208035, "grad_norm": 0.40898412466049194, "learning_rate": 5.459395095249822e-06, "loss": 0.0999, "step": 4505 }, { "epoch": 1.4601425793907972, "grad_norm": 0.430336058139801, "learning_rate": 5.457653249648405e-06, "loss": 0.0998, "step": 4506 }, { "epoch": 1.4604666234607906, "grad_norm": 0.5189803242683411, "learning_rate": 5.455911348034661e-06, "loss": 0.1157, "step": 4507 }, { "epoch": 1.4607906675307842, "grad_norm": 0.4046836197376251, "learning_rate": 5.454169390621783e-06, "loss": 0.0932, "step": 4508 }, { "epoch": 1.4611147116007777, "grad_norm": 0.45686987042427063, "learning_rate": 5.452427377622967e-06, "loss": 0.113, "step": 4509 }, { "epoch": 1.4614387556707713, "grad_norm": 0.5095760822296143, "learning_rate": 5.45068530925142e-06, "loss": 0.1104, "step": 4510 }, { "epoch": 1.4617627997407647, "grad_norm": 0.5702762007713318, "learning_rate": 5.448943185720355e-06, "loss": 0.102, "step": 4511 }, { "epoch": 1.4620868438107584, "grad_norm": 0.45763012766838074, "learning_rate": 5.447201007242988e-06, "loss": 0.1131, "step": 4512 }, { "epoch": 1.4624108878807518, "grad_norm": 0.4436397850513458, "learning_rate": 5.4454587740325485e-06, "loss": 0.1121, "step": 4513 }, { "epoch": 1.4627349319507452, "grad_norm": 0.47292545437812805, "learning_rate": 5.443716486302266e-06, "loss": 0.1138, "step": 4514 }, { "epoch": 1.4630589760207389, "grad_norm": 0.5149725675582886, "learning_rate": 5.441974144265383e-06, "loss": 0.1249, "step": 4515 }, { "epoch": 1.4633830200907323, "grad_norm": 0.46080783009529114, "learning_rate": 5.440231748135143e-06, "loss": 0.1067, "step": 4516 }, { "epoch": 1.463707064160726, "grad_norm": 0.48276081681251526, "learning_rate": 5.438489298124798e-06, "loss": 0.113, "step": 4517 }, { "epoch": 1.4640311082307194, "grad_norm": 0.4716761112213135, "learning_rate": 5.436746794447608e-06, "loss": 0.1095, "step": 4518 }, { "epoch": 1.4643551523007128, "grad_norm": 0.4616851806640625, "learning_rate": 5.43500423731684e-06, "loss": 0.1043, "step": 4519 }, { "epoch": 1.4646791963707064, "grad_norm": 0.46129775047302246, "learning_rate": 5.433261626945763e-06, "loss": 0.1055, "step": 4520 }, { "epoch": 1.4650032404407, "grad_norm": 0.4611496329307556, "learning_rate": 5.431518963547656e-06, "loss": 0.1093, "step": 4521 }, { "epoch": 1.4653272845106935, "grad_norm": 0.45577365159988403, "learning_rate": 5.429776247335807e-06, "loss": 0.112, "step": 4522 }, { "epoch": 1.465651328580687, "grad_norm": 0.4398552477359772, "learning_rate": 5.428033478523505e-06, "loss": 0.1071, "step": 4523 }, { "epoch": 1.4659753726506806, "grad_norm": 0.4762032628059387, "learning_rate": 5.426290657324051e-06, "loss": 0.1153, "step": 4524 }, { "epoch": 1.466299416720674, "grad_norm": 0.4628520607948303, "learning_rate": 5.424547783950744e-06, "loss": 0.113, "step": 4525 }, { "epoch": 1.4666234607906676, "grad_norm": 0.47359782457351685, "learning_rate": 5.4228048586169005e-06, "loss": 0.121, "step": 4526 }, { "epoch": 1.466947504860661, "grad_norm": 0.43229299783706665, "learning_rate": 5.421061881535834e-06, "loss": 0.0964, "step": 4527 }, { "epoch": 1.4672715489306545, "grad_norm": 0.4732006788253784, "learning_rate": 5.4193188529208715e-06, "loss": 0.1118, "step": 4528 }, { "epoch": 1.4675955930006481, "grad_norm": 0.4338208734989166, "learning_rate": 5.417575772985339e-06, "loss": 0.1034, "step": 4529 }, { "epoch": 1.4679196370706415, "grad_norm": 0.45097798109054565, "learning_rate": 5.415832641942576e-06, "loss": 0.1042, "step": 4530 }, { "epoch": 1.4682436811406352, "grad_norm": 0.42899981141090393, "learning_rate": 5.4140894600059215e-06, "loss": 0.1067, "step": 4531 }, { "epoch": 1.4685677252106286, "grad_norm": 0.4907374978065491, "learning_rate": 5.412346227388726e-06, "loss": 0.1165, "step": 4532 }, { "epoch": 1.468891769280622, "grad_norm": 0.43755388259887695, "learning_rate": 5.410602944304344e-06, "loss": 0.1038, "step": 4533 }, { "epoch": 1.4692158133506157, "grad_norm": 0.49814197421073914, "learning_rate": 5.4088596109661374e-06, "loss": 0.1186, "step": 4534 }, { "epoch": 1.4695398574206093, "grad_norm": 0.5008993148803711, "learning_rate": 5.407116227587472e-06, "loss": 0.1151, "step": 4535 }, { "epoch": 1.4698639014906028, "grad_norm": 0.49627819657325745, "learning_rate": 5.4053727943817215e-06, "loss": 0.1146, "step": 4536 }, { "epoch": 1.4701879455605962, "grad_norm": 0.45009931921958923, "learning_rate": 5.403629311562265e-06, "loss": 0.1108, "step": 4537 }, { "epoch": 1.4705119896305898, "grad_norm": 0.4893830716609955, "learning_rate": 5.4018857793424885e-06, "loss": 0.1161, "step": 4538 }, { "epoch": 1.4708360337005832, "grad_norm": 0.46369659900665283, "learning_rate": 5.400142197935784e-06, "loss": 0.1138, "step": 4539 }, { "epoch": 1.471160077770577, "grad_norm": 0.4574586749076843, "learning_rate": 5.398398567555546e-06, "loss": 0.1101, "step": 4540 }, { "epoch": 1.4714841218405703, "grad_norm": 0.42343491315841675, "learning_rate": 5.396654888415183e-06, "loss": 0.0933, "step": 4541 }, { "epoch": 1.4718081659105637, "grad_norm": 0.46502062678337097, "learning_rate": 5.3949111607281e-06, "loss": 0.1127, "step": 4542 }, { "epoch": 1.4721322099805574, "grad_norm": 0.47962379455566406, "learning_rate": 5.3931673847077135e-06, "loss": 0.1211, "step": 4543 }, { "epoch": 1.4724562540505508, "grad_norm": 0.4389127790927887, "learning_rate": 5.391423560567446e-06, "loss": 0.1075, "step": 4544 }, { "epoch": 1.4727802981205445, "grad_norm": 0.43341097235679626, "learning_rate": 5.389679688520722e-06, "loss": 0.1065, "step": 4545 }, { "epoch": 1.4731043421905379, "grad_norm": 0.47744542360305786, "learning_rate": 5.3879357687809795e-06, "loss": 0.1156, "step": 4546 }, { "epoch": 1.4734283862605313, "grad_norm": 0.5202251672744751, "learning_rate": 5.3861918015616536e-06, "loss": 0.1354, "step": 4547 }, { "epoch": 1.473752430330525, "grad_norm": 0.46379417181015015, "learning_rate": 5.384447787076189e-06, "loss": 0.1186, "step": 4548 }, { "epoch": 1.4740764744005186, "grad_norm": 0.44243863224983215, "learning_rate": 5.382703725538036e-06, "loss": 0.1085, "step": 4549 }, { "epoch": 1.474400518470512, "grad_norm": 0.47953954339027405, "learning_rate": 5.380959617160655e-06, "loss": 0.1106, "step": 4550 }, { "epoch": 1.4747245625405054, "grad_norm": 0.43394193053245544, "learning_rate": 5.379215462157502e-06, "loss": 0.1, "step": 4551 }, { "epoch": 1.475048606610499, "grad_norm": 0.4366772174835205, "learning_rate": 5.377471260742048e-06, "loss": 0.1038, "step": 4552 }, { "epoch": 1.4753726506804925, "grad_norm": 0.45473575592041016, "learning_rate": 5.375727013127769e-06, "loss": 0.1128, "step": 4553 }, { "epoch": 1.4756966947504861, "grad_norm": 0.4511753022670746, "learning_rate": 5.373982719528137e-06, "loss": 0.11, "step": 4554 }, { "epoch": 1.4760207388204796, "grad_norm": 0.4673917889595032, "learning_rate": 5.3722383801566425e-06, "loss": 0.111, "step": 4555 }, { "epoch": 1.476344782890473, "grad_norm": 0.459878534078598, "learning_rate": 5.370493995226772e-06, "loss": 0.1158, "step": 4556 }, { "epoch": 1.4766688269604666, "grad_norm": 0.45323413610458374, "learning_rate": 5.368749564952025e-06, "loss": 0.11, "step": 4557 }, { "epoch": 1.47699287103046, "grad_norm": 0.4526347517967224, "learning_rate": 5.367005089545899e-06, "loss": 0.1029, "step": 4558 }, { "epoch": 1.4773169151004537, "grad_norm": 0.44370052218437195, "learning_rate": 5.365260569221906e-06, "loss": 0.1088, "step": 4559 }, { "epoch": 1.4776409591704471, "grad_norm": 0.4556959271430969, "learning_rate": 5.363516004193553e-06, "loss": 0.1042, "step": 4560 }, { "epoch": 1.4779650032404408, "grad_norm": 0.44470837712287903, "learning_rate": 5.361771394674362e-06, "loss": 0.1057, "step": 4561 }, { "epoch": 1.4782890473104342, "grad_norm": 0.40587565302848816, "learning_rate": 5.360026740877853e-06, "loss": 0.0937, "step": 4562 }, { "epoch": 1.4786130913804278, "grad_norm": 0.45987752079963684, "learning_rate": 5.358282043017557e-06, "loss": 0.1148, "step": 4563 }, { "epoch": 1.4789371354504213, "grad_norm": 0.44257667660713196, "learning_rate": 5.356537301307006e-06, "loss": 0.1055, "step": 4564 }, { "epoch": 1.4792611795204147, "grad_norm": 0.4302980899810791, "learning_rate": 5.3547925159597426e-06, "loss": 0.1047, "step": 4565 }, { "epoch": 1.4795852235904083, "grad_norm": 0.4797317087650299, "learning_rate": 5.353047687189309e-06, "loss": 0.1154, "step": 4566 }, { "epoch": 1.4799092676604018, "grad_norm": 0.46265506744384766, "learning_rate": 5.351302815209256e-06, "loss": 0.116, "step": 4567 }, { "epoch": 1.4802333117303954, "grad_norm": 0.4362986385822296, "learning_rate": 5.34955790023314e-06, "loss": 0.0969, "step": 4568 }, { "epoch": 1.4805573558003888, "grad_norm": 0.43595537543296814, "learning_rate": 5.347812942474519e-06, "loss": 0.1098, "step": 4569 }, { "epoch": 1.4808813998703823, "grad_norm": 0.49564412236213684, "learning_rate": 5.346067942146963e-06, "loss": 0.1273, "step": 4570 }, { "epoch": 1.481205443940376, "grad_norm": 0.4789816737174988, "learning_rate": 5.344322899464042e-06, "loss": 0.114, "step": 4571 }, { "epoch": 1.4815294880103695, "grad_norm": 0.4422919750213623, "learning_rate": 5.342577814639332e-06, "loss": 0.1061, "step": 4572 }, { "epoch": 1.481853532080363, "grad_norm": 0.4468146562576294, "learning_rate": 5.34083268788641e-06, "loss": 0.1108, "step": 4573 }, { "epoch": 1.4821775761503564, "grad_norm": 0.47361573576927185, "learning_rate": 5.339087519418868e-06, "loss": 0.1089, "step": 4574 }, { "epoch": 1.48250162022035, "grad_norm": 0.4521580636501312, "learning_rate": 5.337342309450298e-06, "loss": 0.1075, "step": 4575 }, { "epoch": 1.4828256642903435, "grad_norm": 0.44739869236946106, "learning_rate": 5.335597058194293e-06, "loss": 0.1047, "step": 4576 }, { "epoch": 1.483149708360337, "grad_norm": 0.5015192627906799, "learning_rate": 5.333851765864458e-06, "loss": 0.1117, "step": 4577 }, { "epoch": 1.4834737524303305, "grad_norm": 0.4476906657218933, "learning_rate": 5.332106432674399e-06, "loss": 0.1036, "step": 4578 }, { "epoch": 1.483797796500324, "grad_norm": 0.42498645186424255, "learning_rate": 5.330361058837726e-06, "loss": 0.1037, "step": 4579 }, { "epoch": 1.4841218405703176, "grad_norm": 0.42732563614845276, "learning_rate": 5.328615644568059e-06, "loss": 0.1, "step": 4580 }, { "epoch": 1.484445884640311, "grad_norm": 0.5075024366378784, "learning_rate": 5.326870190079019e-06, "loss": 0.1221, "step": 4581 }, { "epoch": 1.4847699287103047, "grad_norm": 0.45853862166404724, "learning_rate": 5.325124695584232e-06, "loss": 0.1117, "step": 4582 }, { "epoch": 1.485093972780298, "grad_norm": 0.4977118968963623, "learning_rate": 5.323379161297329e-06, "loss": 0.1177, "step": 4583 }, { "epoch": 1.4854180168502915, "grad_norm": 0.47412171959877014, "learning_rate": 5.321633587431947e-06, "loss": 0.1134, "step": 4584 }, { "epoch": 1.4857420609202852, "grad_norm": 0.4670596718788147, "learning_rate": 5.319887974201727e-06, "loss": 0.1119, "step": 4585 }, { "epoch": 1.4860661049902788, "grad_norm": 0.46831014752388, "learning_rate": 5.318142321820316e-06, "loss": 0.1042, "step": 4586 }, { "epoch": 1.4863901490602722, "grad_norm": 0.42625847458839417, "learning_rate": 5.316396630501365e-06, "loss": 0.1005, "step": 4587 }, { "epoch": 1.4867141931302656, "grad_norm": 0.46382203698158264, "learning_rate": 5.314650900458529e-06, "loss": 0.1105, "step": 4588 }, { "epoch": 1.4870382372002593, "grad_norm": 0.44759857654571533, "learning_rate": 5.31290513190547e-06, "loss": 0.1043, "step": 4589 }, { "epoch": 1.4873622812702527, "grad_norm": 0.44344985485076904, "learning_rate": 5.3111593250558515e-06, "loss": 0.1048, "step": 4590 }, { "epoch": 1.4876863253402464, "grad_norm": 0.44920921325683594, "learning_rate": 5.309413480123343e-06, "loss": 0.1056, "step": 4591 }, { "epoch": 1.4880103694102398, "grad_norm": 0.46111756563186646, "learning_rate": 5.307667597321621e-06, "loss": 0.1053, "step": 4592 }, { "epoch": 1.4883344134802332, "grad_norm": 0.45845603942871094, "learning_rate": 5.305921676864363e-06, "loss": 0.1153, "step": 4593 }, { "epoch": 1.4886584575502269, "grad_norm": 0.45684415102005005, "learning_rate": 5.3041757189652535e-06, "loss": 0.109, "step": 4594 }, { "epoch": 1.4889825016202203, "grad_norm": 0.47713303565979004, "learning_rate": 5.302429723837982e-06, "loss": 0.1178, "step": 4595 }, { "epoch": 1.489306545690214, "grad_norm": 0.598270833492279, "learning_rate": 5.3006836916962375e-06, "loss": 0.1202, "step": 4596 }, { "epoch": 1.4896305897602073, "grad_norm": 0.4470941722393036, "learning_rate": 5.298937622753722e-06, "loss": 0.1056, "step": 4597 }, { "epoch": 1.4899546338302008, "grad_norm": 0.45994114875793457, "learning_rate": 5.297191517224133e-06, "loss": 0.1155, "step": 4598 }, { "epoch": 1.4902786779001944, "grad_norm": 0.45040905475616455, "learning_rate": 5.295445375321181e-06, "loss": 0.1097, "step": 4599 }, { "epoch": 1.490602721970188, "grad_norm": 0.4630436897277832, "learning_rate": 5.293699197258574e-06, "loss": 0.1134, "step": 4600 }, { "epoch": 1.4909267660401815, "grad_norm": 0.4739874005317688, "learning_rate": 5.29195298325003e-06, "loss": 0.1127, "step": 4601 }, { "epoch": 1.491250810110175, "grad_norm": 0.45205971598625183, "learning_rate": 5.290206733509266e-06, "loss": 0.1093, "step": 4602 }, { "epoch": 1.4915748541801686, "grad_norm": 0.4669693112373352, "learning_rate": 5.288460448250009e-06, "loss": 0.1118, "step": 4603 }, { "epoch": 1.491898898250162, "grad_norm": 0.4435705542564392, "learning_rate": 5.286714127685985e-06, "loss": 0.1039, "step": 4604 }, { "epoch": 1.4922229423201556, "grad_norm": 0.43490707874298096, "learning_rate": 5.284967772030927e-06, "loss": 0.1034, "step": 4605 }, { "epoch": 1.492546986390149, "grad_norm": 0.4700656235218048, "learning_rate": 5.283221381498572e-06, "loss": 0.1149, "step": 4606 }, { "epoch": 1.4928710304601425, "grad_norm": 0.4963572323322296, "learning_rate": 5.281474956302662e-06, "loss": 0.1316, "step": 4607 }, { "epoch": 1.4931950745301361, "grad_norm": 0.4392409324645996, "learning_rate": 5.279728496656943e-06, "loss": 0.1002, "step": 4608 }, { "epoch": 1.4935191186001295, "grad_norm": 0.48771601915359497, "learning_rate": 5.277982002775163e-06, "loss": 0.1186, "step": 4609 }, { "epoch": 1.4938431626701232, "grad_norm": 0.43994951248168945, "learning_rate": 5.276235474871076e-06, "loss": 0.1029, "step": 4610 }, { "epoch": 1.4941672067401166, "grad_norm": 0.43616870045661926, "learning_rate": 5.274488913158442e-06, "loss": 0.1043, "step": 4611 }, { "epoch": 1.4944912508101102, "grad_norm": 0.46900299191474915, "learning_rate": 5.272742317851023e-06, "loss": 0.1123, "step": 4612 }, { "epoch": 1.4948152948801037, "grad_norm": 0.4342197775840759, "learning_rate": 5.2709956891625845e-06, "loss": 0.1063, "step": 4613 }, { "epoch": 1.4951393389500973, "grad_norm": 0.44443580508232117, "learning_rate": 5.2692490273068965e-06, "loss": 0.1068, "step": 4614 }, { "epoch": 1.4954633830200907, "grad_norm": 0.4577859938144684, "learning_rate": 5.2675023324977356e-06, "loss": 0.1069, "step": 4615 }, { "epoch": 1.4957874270900842, "grad_norm": 0.4948973059654236, "learning_rate": 5.265755604948877e-06, "loss": 0.1189, "step": 4616 }, { "epoch": 1.4961114711600778, "grad_norm": 0.4664367735385895, "learning_rate": 5.264008844874105e-06, "loss": 0.1085, "step": 4617 }, { "epoch": 1.4964355152300712, "grad_norm": 0.4518917202949524, "learning_rate": 5.262262052487207e-06, "loss": 0.1018, "step": 4618 }, { "epoch": 1.4967595593000649, "grad_norm": 0.49077895283699036, "learning_rate": 5.260515228001973e-06, "loss": 0.123, "step": 4619 }, { "epoch": 1.4970836033700583, "grad_norm": 0.49957966804504395, "learning_rate": 5.258768371632197e-06, "loss": 0.1155, "step": 4620 }, { "epoch": 1.4974076474400517, "grad_norm": 0.44145315885543823, "learning_rate": 5.257021483591677e-06, "loss": 0.1046, "step": 4621 }, { "epoch": 1.4977316915100454, "grad_norm": 0.42506128549575806, "learning_rate": 5.255274564094215e-06, "loss": 0.0977, "step": 4622 }, { "epoch": 1.498055735580039, "grad_norm": 0.473082572221756, "learning_rate": 5.2535276133536175e-06, "loss": 0.108, "step": 4623 }, { "epoch": 1.4983797796500324, "grad_norm": 0.4737880229949951, "learning_rate": 5.251780631583696e-06, "loss": 0.1133, "step": 4624 }, { "epoch": 1.4987038237200259, "grad_norm": 0.4546349346637726, "learning_rate": 5.250033618998262e-06, "loss": 0.1072, "step": 4625 }, { "epoch": 1.4990278677900195, "grad_norm": 0.4377857744693756, "learning_rate": 5.2482865758111335e-06, "loss": 0.1056, "step": 4626 }, { "epoch": 1.499351911860013, "grad_norm": 0.43855395913124084, "learning_rate": 5.246539502236131e-06, "loss": 0.1028, "step": 4627 }, { "epoch": 1.4996759559300066, "grad_norm": 0.4623560607433319, "learning_rate": 5.244792398487081e-06, "loss": 0.1086, "step": 4628 }, { "epoch": 1.5, "grad_norm": 0.4703526198863983, "learning_rate": 5.2430452647778095e-06, "loss": 0.1064, "step": 4629 }, { "epoch": 1.5003240440699934, "grad_norm": 0.45636919140815735, "learning_rate": 5.241298101322152e-06, "loss": 0.1064, "step": 4630 }, { "epoch": 1.500648088139987, "grad_norm": 0.4634525179862976, "learning_rate": 5.239550908333943e-06, "loss": 0.1094, "step": 4631 }, { "epoch": 1.5009721322099807, "grad_norm": 0.4673732817173004, "learning_rate": 5.237803686027021e-06, "loss": 0.1123, "step": 4632 }, { "epoch": 1.5012961762799741, "grad_norm": 0.4772822856903076, "learning_rate": 5.236056434615231e-06, "loss": 0.1199, "step": 4633 }, { "epoch": 1.5016202203499676, "grad_norm": 0.43391144275665283, "learning_rate": 5.234309154312417e-06, "loss": 0.1037, "step": 4634 }, { "epoch": 1.501944264419961, "grad_norm": 0.47034046053886414, "learning_rate": 5.232561845332433e-06, "loss": 0.1058, "step": 4635 }, { "epoch": 1.5022683084899546, "grad_norm": 0.45668259263038635, "learning_rate": 5.230814507889129e-06, "loss": 0.1049, "step": 4636 }, { "epoch": 1.5025923525599483, "grad_norm": 0.4736466705799103, "learning_rate": 5.2290671421963635e-06, "loss": 0.1201, "step": 4637 }, { "epoch": 1.5029163966299417, "grad_norm": 0.47415265440940857, "learning_rate": 5.227319748467998e-06, "loss": 0.1045, "step": 4638 }, { "epoch": 1.5032404406999351, "grad_norm": 0.4864136576652527, "learning_rate": 5.225572326917896e-06, "loss": 0.1155, "step": 4639 }, { "epoch": 1.5035644847699285, "grad_norm": 0.46752864122390747, "learning_rate": 5.223824877759924e-06, "loss": 0.1135, "step": 4640 }, { "epoch": 1.5038885288399222, "grad_norm": 0.5391199588775635, "learning_rate": 5.222077401207954e-06, "loss": 0.1318, "step": 4641 }, { "epoch": 1.5042125729099158, "grad_norm": 0.4489925503730774, "learning_rate": 5.220329897475861e-06, "loss": 0.1032, "step": 4642 }, { "epoch": 1.5045366169799093, "grad_norm": 0.46553587913513184, "learning_rate": 5.2185823667775204e-06, "loss": 0.1149, "step": 4643 }, { "epoch": 1.5048606610499027, "grad_norm": 0.5243228077888489, "learning_rate": 5.216834809326815e-06, "loss": 0.1236, "step": 4644 }, { "epoch": 1.5051847051198963, "grad_norm": 0.4822893440723419, "learning_rate": 5.215087225337628e-06, "loss": 0.1119, "step": 4645 }, { "epoch": 1.50550874918989, "grad_norm": 0.4582579433917999, "learning_rate": 5.213339615023847e-06, "loss": 0.1078, "step": 4646 }, { "epoch": 1.5058327932598834, "grad_norm": 0.43378040194511414, "learning_rate": 5.211591978599362e-06, "loss": 0.1034, "step": 4647 }, { "epoch": 1.5061568373298768, "grad_norm": 0.451219767332077, "learning_rate": 5.209844316278066e-06, "loss": 0.1084, "step": 4648 }, { "epoch": 1.5064808813998702, "grad_norm": 0.4611717760562897, "learning_rate": 5.208096628273859e-06, "loss": 0.1011, "step": 4649 }, { "epoch": 1.5068049254698639, "grad_norm": 0.4647367298603058, "learning_rate": 5.206348914800638e-06, "loss": 0.1104, "step": 4650 }, { "epoch": 1.5071289695398575, "grad_norm": 0.4628082811832428, "learning_rate": 5.204601176072308e-06, "loss": 0.1116, "step": 4651 }, { "epoch": 1.507453013609851, "grad_norm": 0.45257583260536194, "learning_rate": 5.202853412302775e-06, "loss": 0.1009, "step": 4652 }, { "epoch": 1.5077770576798444, "grad_norm": 0.46580031514167786, "learning_rate": 5.2011056237059476e-06, "loss": 0.1084, "step": 4653 }, { "epoch": 1.508101101749838, "grad_norm": 0.4912484288215637, "learning_rate": 5.1993578104957385e-06, "loss": 0.1147, "step": 4654 }, { "epoch": 1.5084251458198314, "grad_norm": 0.4861374795436859, "learning_rate": 5.197609972886063e-06, "loss": 0.1194, "step": 4655 }, { "epoch": 1.508749189889825, "grad_norm": 0.4274054765701294, "learning_rate": 5.195862111090842e-06, "loss": 0.0995, "step": 4656 }, { "epoch": 1.5090732339598185, "grad_norm": 0.47026312351226807, "learning_rate": 5.194114225323994e-06, "loss": 0.1109, "step": 4657 }, { "epoch": 1.509397278029812, "grad_norm": 0.46407759189605713, "learning_rate": 5.192366315799443e-06, "loss": 0.1135, "step": 4658 }, { "epoch": 1.5097213220998056, "grad_norm": 0.45349907875061035, "learning_rate": 5.190618382731117e-06, "loss": 0.1095, "step": 4659 }, { "epoch": 1.5100453661697992, "grad_norm": 0.43043872714042664, "learning_rate": 5.188870426332946e-06, "loss": 0.1075, "step": 4660 }, { "epoch": 1.5103694102397927, "grad_norm": 0.47293248772621155, "learning_rate": 5.187122446818864e-06, "loss": 0.1172, "step": 4661 }, { "epoch": 1.510693454309786, "grad_norm": 0.476509690284729, "learning_rate": 5.185374444402806e-06, "loss": 0.1167, "step": 4662 }, { "epoch": 1.5110174983797795, "grad_norm": 0.44831207394599915, "learning_rate": 5.1836264192987104e-06, "loss": 0.1054, "step": 4663 }, { "epoch": 1.5113415424497731, "grad_norm": 0.4845344126224518, "learning_rate": 5.181878371720519e-06, "loss": 0.1199, "step": 4664 }, { "epoch": 1.5116655865197668, "grad_norm": 0.44630926847457886, "learning_rate": 5.180130301882175e-06, "loss": 0.1093, "step": 4665 }, { "epoch": 1.5119896305897602, "grad_norm": 0.4507032036781311, "learning_rate": 5.1783822099976265e-06, "loss": 0.1091, "step": 4666 }, { "epoch": 1.5123136746597536, "grad_norm": 0.4598129987716675, "learning_rate": 5.1766340962808225e-06, "loss": 0.1125, "step": 4667 }, { "epoch": 1.5126377187297473, "grad_norm": 0.43906885385513306, "learning_rate": 5.174885960945716e-06, "loss": 0.1064, "step": 4668 }, { "epoch": 1.512961762799741, "grad_norm": 0.4334750771522522, "learning_rate": 5.1731378042062584e-06, "loss": 0.1025, "step": 4669 }, { "epoch": 1.5132858068697344, "grad_norm": 0.46838998794555664, "learning_rate": 5.171389626276411e-06, "loss": 0.1048, "step": 4670 }, { "epoch": 1.5136098509397278, "grad_norm": 0.48773279786109924, "learning_rate": 5.169641427370132e-06, "loss": 0.1182, "step": 4671 }, { "epoch": 1.5139338950097212, "grad_norm": 0.44825443625450134, "learning_rate": 5.167893207701385e-06, "loss": 0.1055, "step": 4672 }, { "epoch": 1.5142579390797148, "grad_norm": 0.43601194024086, "learning_rate": 5.166144967484135e-06, "loss": 0.1058, "step": 4673 }, { "epoch": 1.5145819831497085, "grad_norm": 0.49638938903808594, "learning_rate": 5.16439670693235e-06, "loss": 0.1201, "step": 4674 }, { "epoch": 1.514906027219702, "grad_norm": 0.4662867784500122, "learning_rate": 5.162648426259997e-06, "loss": 0.1033, "step": 4675 }, { "epoch": 1.5152300712896953, "grad_norm": 0.4531441926956177, "learning_rate": 5.160900125681053e-06, "loss": 0.1096, "step": 4676 }, { "epoch": 1.5155541153596888, "grad_norm": 0.4930696487426758, "learning_rate": 5.159151805409491e-06, "loss": 0.1169, "step": 4677 }, { "epoch": 1.5158781594296824, "grad_norm": 0.4691605865955353, "learning_rate": 5.15740346565929e-06, "loss": 0.1078, "step": 4678 }, { "epoch": 1.516202203499676, "grad_norm": 0.492055207490921, "learning_rate": 5.155655106644427e-06, "loss": 0.116, "step": 4679 }, { "epoch": 1.5165262475696695, "grad_norm": 0.4335920512676239, "learning_rate": 5.153906728578887e-06, "loss": 0.1053, "step": 4680 }, { "epoch": 1.516850291639663, "grad_norm": 0.4806187152862549, "learning_rate": 5.152158331676652e-06, "loss": 0.1084, "step": 4681 }, { "epoch": 1.5171743357096565, "grad_norm": 0.5146906971931458, "learning_rate": 5.150409916151711e-06, "loss": 0.1173, "step": 4682 }, { "epoch": 1.5174983797796502, "grad_norm": 0.4676697552204132, "learning_rate": 5.148661482218051e-06, "loss": 0.1226, "step": 4683 }, { "epoch": 1.5178224238496436, "grad_norm": 0.45594385266304016, "learning_rate": 5.146913030089665e-06, "loss": 0.1118, "step": 4684 }, { "epoch": 1.518146467919637, "grad_norm": 0.423243910074234, "learning_rate": 5.1451645599805475e-06, "loss": 0.0956, "step": 4685 }, { "epoch": 1.5184705119896305, "grad_norm": 0.46194830536842346, "learning_rate": 5.143416072104693e-06, "loss": 0.1094, "step": 4686 }, { "epoch": 1.518794556059624, "grad_norm": 0.4480394124984741, "learning_rate": 5.1416675666761e-06, "loss": 0.1093, "step": 4687 }, { "epoch": 1.5191186001296177, "grad_norm": 0.4406319856643677, "learning_rate": 5.1399190439087675e-06, "loss": 0.1108, "step": 4688 }, { "epoch": 1.5194426441996112, "grad_norm": 0.4938640892505646, "learning_rate": 5.138170504016699e-06, "loss": 0.1198, "step": 4689 }, { "epoch": 1.5197666882696046, "grad_norm": 0.4330286383628845, "learning_rate": 5.1364219472138984e-06, "loss": 0.1036, "step": 4690 }, { "epoch": 1.5200907323395982, "grad_norm": 0.5021396279335022, "learning_rate": 5.1346733737143715e-06, "loss": 0.1202, "step": 4691 }, { "epoch": 1.5204147764095917, "grad_norm": 0.43403375148773193, "learning_rate": 5.132924783732128e-06, "loss": 0.1043, "step": 4692 }, { "epoch": 1.5207388204795853, "grad_norm": 0.4966108202934265, "learning_rate": 5.131176177481179e-06, "loss": 0.1176, "step": 4693 }, { "epoch": 1.5210628645495787, "grad_norm": 0.468465119600296, "learning_rate": 5.129427555175534e-06, "loss": 0.1166, "step": 4694 }, { "epoch": 1.5213869086195722, "grad_norm": 0.4209233820438385, "learning_rate": 5.127678917029209e-06, "loss": 0.0997, "step": 4695 }, { "epoch": 1.5217109526895658, "grad_norm": 0.49298617243766785, "learning_rate": 5.12593026325622e-06, "loss": 0.1115, "step": 4696 }, { "epoch": 1.5220349967595594, "grad_norm": 0.4441665709018707, "learning_rate": 5.1241815940705874e-06, "loss": 0.1085, "step": 4697 }, { "epoch": 1.5223590408295529, "grad_norm": 0.5018707513809204, "learning_rate": 5.12243290968633e-06, "loss": 0.1186, "step": 4698 }, { "epoch": 1.5226830848995463, "grad_norm": 0.4586740732192993, "learning_rate": 5.120684210317469e-06, "loss": 0.1071, "step": 4699 }, { "epoch": 1.5230071289695397, "grad_norm": 0.4645669758319855, "learning_rate": 5.1189354961780305e-06, "loss": 0.1162, "step": 4700 }, { "epoch": 1.5233311730395334, "grad_norm": 0.47067075967788696, "learning_rate": 5.117186767482036e-06, "loss": 0.1156, "step": 4701 }, { "epoch": 1.523655217109527, "grad_norm": 0.4710082709789276, "learning_rate": 5.115438024443517e-06, "loss": 0.1132, "step": 4702 }, { "epoch": 1.5239792611795204, "grad_norm": 0.453998327255249, "learning_rate": 5.1136892672765e-06, "loss": 0.1106, "step": 4703 }, { "epoch": 1.5243033052495139, "grad_norm": 0.4350145161151886, "learning_rate": 5.111940496195019e-06, "loss": 0.1048, "step": 4704 }, { "epoch": 1.5246273493195075, "grad_norm": 0.45774731040000916, "learning_rate": 5.110191711413105e-06, "loss": 0.1111, "step": 4705 }, { "epoch": 1.524951393389501, "grad_norm": 0.5091844797134399, "learning_rate": 5.108442913144792e-06, "loss": 0.1282, "step": 4706 }, { "epoch": 1.5252754374594946, "grad_norm": 0.462661474943161, "learning_rate": 5.106694101604116e-06, "loss": 0.1123, "step": 4707 }, { "epoch": 1.525599481529488, "grad_norm": 0.45566895604133606, "learning_rate": 5.104945277005114e-06, "loss": 0.1112, "step": 4708 }, { "epoch": 1.5259235255994814, "grad_norm": 0.47541677951812744, "learning_rate": 5.1031964395618285e-06, "loss": 0.1155, "step": 4709 }, { "epoch": 1.526247569669475, "grad_norm": 0.4878292679786682, "learning_rate": 5.1014475894882956e-06, "loss": 0.1143, "step": 4710 }, { "epoch": 1.5265716137394687, "grad_norm": 0.4552808701992035, "learning_rate": 5.099698726998561e-06, "loss": 0.1123, "step": 4711 }, { "epoch": 1.5268956578094621, "grad_norm": 0.5104338526725769, "learning_rate": 5.0979498523066665e-06, "loss": 0.1199, "step": 4712 }, { "epoch": 1.5272197018794555, "grad_norm": 0.4424220323562622, "learning_rate": 5.096200965626658e-06, "loss": 0.1057, "step": 4713 }, { "epoch": 1.527543745949449, "grad_norm": 0.463344931602478, "learning_rate": 5.094452067172583e-06, "loss": 0.107, "step": 4714 }, { "epoch": 1.5278677900194426, "grad_norm": 0.48906680941581726, "learning_rate": 5.09270315715849e-06, "loss": 0.1132, "step": 4715 }, { "epoch": 1.5281918340894363, "grad_norm": 0.44374555349349976, "learning_rate": 5.090954235798426e-06, "loss": 0.1064, "step": 4716 }, { "epoch": 1.5285158781594297, "grad_norm": 0.4655635356903076, "learning_rate": 5.089205303306447e-06, "loss": 0.1166, "step": 4717 }, { "epoch": 1.528839922229423, "grad_norm": 0.5116678476333618, "learning_rate": 5.087456359896601e-06, "loss": 0.1198, "step": 4718 }, { "epoch": 1.5291639662994168, "grad_norm": 0.46683815121650696, "learning_rate": 5.085707405782942e-06, "loss": 0.1147, "step": 4719 }, { "epoch": 1.5294880103694104, "grad_norm": 0.4789724051952362, "learning_rate": 5.08395844117953e-06, "loss": 0.1187, "step": 4720 }, { "epoch": 1.5298120544394038, "grad_norm": 0.4618801474571228, "learning_rate": 5.082209466300414e-06, "loss": 0.1106, "step": 4721 }, { "epoch": 1.5301360985093972, "grad_norm": 0.43246573209762573, "learning_rate": 5.080460481359656e-06, "loss": 0.1056, "step": 4722 }, { "epoch": 1.5304601425793907, "grad_norm": 0.452864408493042, "learning_rate": 5.078711486571315e-06, "loss": 0.1083, "step": 4723 }, { "epoch": 1.5307841866493843, "grad_norm": 0.4218159317970276, "learning_rate": 5.076962482149449e-06, "loss": 0.103, "step": 4724 }, { "epoch": 1.531108230719378, "grad_norm": 0.4770122766494751, "learning_rate": 5.07521346830812e-06, "loss": 0.1159, "step": 4725 }, { "epoch": 1.5314322747893714, "grad_norm": 0.46023043990135193, "learning_rate": 5.073464445261391e-06, "loss": 0.1115, "step": 4726 }, { "epoch": 1.5317563188593648, "grad_norm": 0.4554358422756195, "learning_rate": 5.0717154132233245e-06, "loss": 0.1088, "step": 4727 }, { "epoch": 1.5320803629293582, "grad_norm": 0.45043107867240906, "learning_rate": 5.069966372407986e-06, "loss": 0.1079, "step": 4728 }, { "epoch": 1.5324044069993519, "grad_norm": 0.46434617042541504, "learning_rate": 5.0682173230294415e-06, "loss": 0.111, "step": 4729 }, { "epoch": 1.5327284510693455, "grad_norm": 0.4614694118499756, "learning_rate": 5.066468265301757e-06, "loss": 0.1109, "step": 4730 }, { "epoch": 1.533052495139339, "grad_norm": 0.4331463575363159, "learning_rate": 5.064719199439001e-06, "loss": 0.0982, "step": 4731 }, { "epoch": 1.5333765392093324, "grad_norm": 0.44229406118392944, "learning_rate": 5.06297012565524e-06, "loss": 0.1013, "step": 4732 }, { "epoch": 1.533700583279326, "grad_norm": 0.4934309720993042, "learning_rate": 5.061221044164546e-06, "loss": 0.1173, "step": 4733 }, { "epoch": 1.5340246273493197, "grad_norm": 0.49272412061691284, "learning_rate": 5.059471955180988e-06, "loss": 0.1126, "step": 4734 }, { "epoch": 1.534348671419313, "grad_norm": 0.4679400324821472, "learning_rate": 5.05772285891864e-06, "loss": 0.1106, "step": 4735 }, { "epoch": 1.5346727154893065, "grad_norm": 0.49218079447746277, "learning_rate": 5.055973755591572e-06, "loss": 0.1196, "step": 4736 }, { "epoch": 1.5349967595593, "grad_norm": 0.45480284094810486, "learning_rate": 5.054224645413858e-06, "loss": 0.1019, "step": 4737 }, { "epoch": 1.5353208036292936, "grad_norm": 0.48111364245414734, "learning_rate": 5.052475528599572e-06, "loss": 0.1137, "step": 4738 }, { "epoch": 1.5356448476992872, "grad_norm": 0.4530400037765503, "learning_rate": 5.050726405362789e-06, "loss": 0.1006, "step": 4739 }, { "epoch": 1.5359688917692806, "grad_norm": 0.4552764296531677, "learning_rate": 5.048977275917586e-06, "loss": 0.109, "step": 4740 }, { "epoch": 1.536292935839274, "grad_norm": 0.5146282315254211, "learning_rate": 5.047228140478039e-06, "loss": 0.1191, "step": 4741 }, { "epoch": 1.5366169799092677, "grad_norm": 0.436003178358078, "learning_rate": 5.045478999258224e-06, "loss": 0.0997, "step": 4742 }, { "epoch": 1.5369410239792611, "grad_norm": 0.45526957511901855, "learning_rate": 5.043729852472221e-06, "loss": 0.1102, "step": 4743 }, { "epoch": 1.5372650680492548, "grad_norm": 0.46070560812950134, "learning_rate": 5.041980700334106e-06, "loss": 0.1093, "step": 4744 }, { "epoch": 1.5375891121192482, "grad_norm": 0.45329949259757996, "learning_rate": 5.040231543057959e-06, "loss": 0.1115, "step": 4745 }, { "epoch": 1.5379131561892416, "grad_norm": 0.45783063769340515, "learning_rate": 5.038482380857862e-06, "loss": 0.1086, "step": 4746 }, { "epoch": 1.5382372002592353, "grad_norm": 0.4675714373588562, "learning_rate": 5.036733213947894e-06, "loss": 0.1166, "step": 4747 }, { "epoch": 1.538561244329229, "grad_norm": 0.5060395002365112, "learning_rate": 5.034984042542136e-06, "loss": 0.125, "step": 4748 }, { "epoch": 1.5388852883992223, "grad_norm": 0.45864853262901306, "learning_rate": 5.033234866854669e-06, "loss": 0.1071, "step": 4749 }, { "epoch": 1.5392093324692158, "grad_norm": 0.42704102396965027, "learning_rate": 5.0314856870995775e-06, "loss": 0.1058, "step": 4750 }, { "epoch": 1.5395333765392092, "grad_norm": 0.424561470746994, "learning_rate": 5.029736503490941e-06, "loss": 0.1093, "step": 4751 }, { "epoch": 1.5398574206092028, "grad_norm": 0.4763147234916687, "learning_rate": 5.027987316242847e-06, "loss": 0.117, "step": 4752 }, { "epoch": 1.5401814646791965, "grad_norm": 0.41369420289993286, "learning_rate": 5.026238125569375e-06, "loss": 0.0943, "step": 4753 }, { "epoch": 1.54050550874919, "grad_norm": 0.4703889489173889, "learning_rate": 5.02448893168461e-06, "loss": 0.1138, "step": 4754 }, { "epoch": 1.5408295528191833, "grad_norm": 0.4704046845436096, "learning_rate": 5.022739734802637e-06, "loss": 0.1218, "step": 4755 }, { "epoch": 1.541153596889177, "grad_norm": 0.4563504755496979, "learning_rate": 5.020990535137541e-06, "loss": 0.1103, "step": 4756 }, { "epoch": 1.5414776409591704, "grad_norm": 0.47579801082611084, "learning_rate": 5.019241332903406e-06, "loss": 0.1245, "step": 4757 }, { "epoch": 1.541801685029164, "grad_norm": 0.45955783128738403, "learning_rate": 5.017492128314319e-06, "loss": 0.1129, "step": 4758 }, { "epoch": 1.5421257290991575, "grad_norm": 0.47311556339263916, "learning_rate": 5.015742921584365e-06, "loss": 0.1158, "step": 4759 }, { "epoch": 1.5424497731691509, "grad_norm": 0.4540937840938568, "learning_rate": 5.013993712927628e-06, "loss": 0.1121, "step": 4760 }, { "epoch": 1.5427738172391445, "grad_norm": 0.4233587682247162, "learning_rate": 5.012244502558198e-06, "loss": 0.108, "step": 4761 }, { "epoch": 1.5430978613091382, "grad_norm": 0.5149884819984436, "learning_rate": 5.0104952906901576e-06, "loss": 0.1231, "step": 4762 }, { "epoch": 1.5434219053791316, "grad_norm": 0.44611334800720215, "learning_rate": 5.008746077537598e-06, "loss": 0.1074, "step": 4763 }, { "epoch": 1.543745949449125, "grad_norm": 0.5051946640014648, "learning_rate": 5.0069968633146006e-06, "loss": 0.1243, "step": 4764 }, { "epoch": 1.5440699935191184, "grad_norm": 0.516586184501648, "learning_rate": 5.005247648235257e-06, "loss": 0.1251, "step": 4765 }, { "epoch": 1.544394037589112, "grad_norm": 0.47310107946395874, "learning_rate": 5.003498432513649e-06, "loss": 0.1074, "step": 4766 }, { "epoch": 1.5447180816591057, "grad_norm": 0.4739862084388733, "learning_rate": 5.001749216363869e-06, "loss": 0.1146, "step": 4767 }, { "epoch": 1.5450421257290992, "grad_norm": 0.4782755374908447, "learning_rate": 5e-06, "loss": 0.1127, "step": 4768 }, { "epoch": 1.5453661697990926, "grad_norm": 0.4727858304977417, "learning_rate": 4.9982507836361335e-06, "loss": 0.1141, "step": 4769 }, { "epoch": 1.5456902138690862, "grad_norm": 0.4504522681236267, "learning_rate": 4.996501567486352e-06, "loss": 0.1149, "step": 4770 }, { "epoch": 1.5460142579390799, "grad_norm": 0.5180319547653198, "learning_rate": 4.994752351764747e-06, "loss": 0.1215, "step": 4771 }, { "epoch": 1.5463383020090733, "grad_norm": 0.46624067425727844, "learning_rate": 4.993003136685401e-06, "loss": 0.1155, "step": 4772 }, { "epoch": 1.5466623460790667, "grad_norm": 0.47008898854255676, "learning_rate": 4.991253922462405e-06, "loss": 0.1049, "step": 4773 }, { "epoch": 1.5469863901490601, "grad_norm": 0.4565297067165375, "learning_rate": 4.989504709309842e-06, "loss": 0.1124, "step": 4774 }, { "epoch": 1.5473104342190538, "grad_norm": 0.4473397135734558, "learning_rate": 4.9877554974418045e-06, "loss": 0.1072, "step": 4775 }, { "epoch": 1.5476344782890474, "grad_norm": 0.48428457975387573, "learning_rate": 4.986006287072374e-06, "loss": 0.1249, "step": 4776 }, { "epoch": 1.5479585223590409, "grad_norm": 0.44468632340431213, "learning_rate": 4.984257078415637e-06, "loss": 0.1122, "step": 4777 }, { "epoch": 1.5482825664290343, "grad_norm": 0.4435969591140747, "learning_rate": 4.982507871685684e-06, "loss": 0.1036, "step": 4778 }, { "epoch": 1.5486066104990277, "grad_norm": 0.49106940627098083, "learning_rate": 4.980758667096594e-06, "loss": 0.1201, "step": 4779 }, { "epoch": 1.5489306545690213, "grad_norm": 0.5072797536849976, "learning_rate": 4.9790094648624605e-06, "loss": 0.114, "step": 4780 }, { "epoch": 1.549254698639015, "grad_norm": 0.44148191809654236, "learning_rate": 4.977260265197365e-06, "loss": 0.1056, "step": 4781 }, { "epoch": 1.5495787427090084, "grad_norm": 0.43735694885253906, "learning_rate": 4.975511068315391e-06, "loss": 0.1013, "step": 4782 }, { "epoch": 1.5499027867790018, "grad_norm": 0.43392884731292725, "learning_rate": 4.9737618744306274e-06, "loss": 0.0987, "step": 4783 }, { "epoch": 1.5502268308489955, "grad_norm": 0.48035189509391785, "learning_rate": 4.972012683757155e-06, "loss": 0.1149, "step": 4784 }, { "epoch": 1.5505508749189891, "grad_norm": 0.41511499881744385, "learning_rate": 4.970263496509059e-06, "loss": 0.0949, "step": 4785 }, { "epoch": 1.5508749189889826, "grad_norm": 0.4699539542198181, "learning_rate": 4.968514312900423e-06, "loss": 0.1172, "step": 4786 }, { "epoch": 1.551198963058976, "grad_norm": 0.460025817155838, "learning_rate": 4.9667651331453315e-06, "loss": 0.1053, "step": 4787 }, { "epoch": 1.5515230071289694, "grad_norm": 0.4553222358226776, "learning_rate": 4.965015957457866e-06, "loss": 0.1137, "step": 4788 }, { "epoch": 1.551847051198963, "grad_norm": 0.47193190455436707, "learning_rate": 4.963266786052107e-06, "loss": 0.1175, "step": 4789 }, { "epoch": 1.5521710952689567, "grad_norm": 0.4407771825790405, "learning_rate": 4.961517619142139e-06, "loss": 0.1051, "step": 4790 }, { "epoch": 1.5524951393389501, "grad_norm": 0.4660543203353882, "learning_rate": 4.959768456942041e-06, "loss": 0.1074, "step": 4791 }, { "epoch": 1.5528191834089435, "grad_norm": 0.4992606043815613, "learning_rate": 4.958019299665895e-06, "loss": 0.1146, "step": 4792 }, { "epoch": 1.5531432274789372, "grad_norm": 0.47650009393692017, "learning_rate": 4.956270147527782e-06, "loss": 0.1147, "step": 4793 }, { "epoch": 1.5534672715489306, "grad_norm": 0.4672715961933136, "learning_rate": 4.954521000741777e-06, "loss": 0.1098, "step": 4794 }, { "epoch": 1.5537913156189243, "grad_norm": 0.4578569829463959, "learning_rate": 4.952771859521962e-06, "loss": 0.1177, "step": 4795 }, { "epoch": 1.5541153596889177, "grad_norm": 0.4631955325603485, "learning_rate": 4.951022724082414e-06, "loss": 0.1125, "step": 4796 }, { "epoch": 1.554439403758911, "grad_norm": 0.41857489943504333, "learning_rate": 4.949273594637213e-06, "loss": 0.0922, "step": 4797 }, { "epoch": 1.5547634478289047, "grad_norm": 0.43116259574890137, "learning_rate": 4.947524471400428e-06, "loss": 0.1115, "step": 4798 }, { "epoch": 1.5550874918988984, "grad_norm": 0.4329448938369751, "learning_rate": 4.945775354586144e-06, "loss": 0.1026, "step": 4799 }, { "epoch": 1.5554115359688918, "grad_norm": 0.4677078127861023, "learning_rate": 4.944026244408431e-06, "loss": 0.1103, "step": 4800 }, { "epoch": 1.5557355800388852, "grad_norm": 0.4555562436580658, "learning_rate": 4.942277141081361e-06, "loss": 0.1044, "step": 4801 }, { "epoch": 1.5560596241088787, "grad_norm": 0.4801924228668213, "learning_rate": 4.940528044819013e-06, "loss": 0.1152, "step": 4802 }, { "epoch": 1.5563836681788723, "grad_norm": 0.4613364338874817, "learning_rate": 4.938778955835454e-06, "loss": 0.1128, "step": 4803 }, { "epoch": 1.556707712248866, "grad_norm": 0.43879398703575134, "learning_rate": 4.937029874344761e-06, "loss": 0.1068, "step": 4804 }, { "epoch": 1.5570317563188594, "grad_norm": 0.4469672739505768, "learning_rate": 4.935280800561002e-06, "loss": 0.1102, "step": 4805 }, { "epoch": 1.5573558003888528, "grad_norm": 0.42352110147476196, "learning_rate": 4.933531734698244e-06, "loss": 0.0962, "step": 4806 }, { "epoch": 1.5576798444588464, "grad_norm": 0.4701560437679291, "learning_rate": 4.93178267697056e-06, "loss": 0.1156, "step": 4807 }, { "epoch": 1.55800388852884, "grad_norm": 0.4738878905773163, "learning_rate": 4.930033627592014e-06, "loss": 0.117, "step": 4808 }, { "epoch": 1.5583279325988335, "grad_norm": 0.46682971715927124, "learning_rate": 4.928284586776676e-06, "loss": 0.1149, "step": 4809 }, { "epoch": 1.558651976668827, "grad_norm": 0.4412629306316376, "learning_rate": 4.9265355547386095e-06, "loss": 0.1019, "step": 4810 }, { "epoch": 1.5589760207388204, "grad_norm": 0.4332481026649475, "learning_rate": 4.924786531691881e-06, "loss": 0.0998, "step": 4811 }, { "epoch": 1.559300064808814, "grad_norm": 0.46273818612098694, "learning_rate": 4.923037517850554e-06, "loss": 0.1141, "step": 4812 }, { "epoch": 1.5596241088788076, "grad_norm": 0.4398345947265625, "learning_rate": 4.921288513428687e-06, "loss": 0.1053, "step": 4813 }, { "epoch": 1.559948152948801, "grad_norm": 0.46956223249435425, "learning_rate": 4.9195395186403455e-06, "loss": 0.1148, "step": 4814 }, { "epoch": 1.5602721970187945, "grad_norm": 0.4423372745513916, "learning_rate": 4.917790533699587e-06, "loss": 0.0994, "step": 4815 }, { "epoch": 1.560596241088788, "grad_norm": 0.4394659996032715, "learning_rate": 4.916041558820473e-06, "loss": 0.1058, "step": 4816 }, { "epoch": 1.5609202851587816, "grad_norm": 0.4328305721282959, "learning_rate": 4.914292594217059e-06, "loss": 0.1032, "step": 4817 }, { "epoch": 1.5612443292287752, "grad_norm": 0.4785172939300537, "learning_rate": 4.912543640103401e-06, "loss": 0.1105, "step": 4818 }, { "epoch": 1.5615683732987686, "grad_norm": 0.45546668767929077, "learning_rate": 4.9107946966935555e-06, "loss": 0.1097, "step": 4819 }, { "epoch": 1.561892417368762, "grad_norm": 0.42712464928627014, "learning_rate": 4.909045764201574e-06, "loss": 0.1036, "step": 4820 }, { "epoch": 1.5622164614387557, "grad_norm": 0.4540826380252838, "learning_rate": 4.907296842841512e-06, "loss": 0.1063, "step": 4821 }, { "epoch": 1.5625405055087493, "grad_norm": 0.4345269799232483, "learning_rate": 4.905547932827417e-06, "loss": 0.1072, "step": 4822 }, { "epoch": 1.5628645495787428, "grad_norm": 0.42495936155319214, "learning_rate": 4.903799034373343e-06, "loss": 0.0992, "step": 4823 }, { "epoch": 1.5631885936487362, "grad_norm": 0.45917809009552, "learning_rate": 4.902050147693336e-06, "loss": 0.1129, "step": 4824 }, { "epoch": 1.5635126377187296, "grad_norm": 0.4842449724674225, "learning_rate": 4.90030127300144e-06, "loss": 0.1156, "step": 4825 }, { "epoch": 1.5638366817887233, "grad_norm": 0.44549980759620667, "learning_rate": 4.898552410511706e-06, "loss": 0.1087, "step": 4826 }, { "epoch": 1.564160725858717, "grad_norm": 0.5054349899291992, "learning_rate": 4.896803560438174e-06, "loss": 0.1139, "step": 4827 }, { "epoch": 1.5644847699287103, "grad_norm": 0.45134031772613525, "learning_rate": 4.8950547229948874e-06, "loss": 0.1026, "step": 4828 }, { "epoch": 1.5648088139987038, "grad_norm": 0.46557843685150146, "learning_rate": 4.893305898395887e-06, "loss": 0.1127, "step": 4829 }, { "epoch": 1.5651328580686974, "grad_norm": 0.44793131947517395, "learning_rate": 4.89155708685521e-06, "loss": 0.1105, "step": 4830 }, { "epoch": 1.5654569021386908, "grad_norm": 0.4677371382713318, "learning_rate": 4.889808288586897e-06, "loss": 0.1005, "step": 4831 }, { "epoch": 1.5657809462086845, "grad_norm": 0.4556576609611511, "learning_rate": 4.888059503804981e-06, "loss": 0.1122, "step": 4832 }, { "epoch": 1.5661049902786779, "grad_norm": 0.4515218138694763, "learning_rate": 4.8863107327235005e-06, "loss": 0.1084, "step": 4833 }, { "epoch": 1.5664290343486713, "grad_norm": 0.44593796133995056, "learning_rate": 4.884561975556483e-06, "loss": 0.1058, "step": 4834 }, { "epoch": 1.566753078418665, "grad_norm": 0.4894297122955322, "learning_rate": 4.882813232517965e-06, "loss": 0.1187, "step": 4835 }, { "epoch": 1.5670771224886586, "grad_norm": 0.441567599773407, "learning_rate": 4.881064503821973e-06, "loss": 0.1093, "step": 4836 }, { "epoch": 1.567401166558652, "grad_norm": 0.4360569417476654, "learning_rate": 4.879315789682533e-06, "loss": 0.1079, "step": 4837 }, { "epoch": 1.5677252106286454, "grad_norm": 0.4840516746044159, "learning_rate": 4.877567090313671e-06, "loss": 0.1228, "step": 4838 }, { "epoch": 1.5680492546986389, "grad_norm": 0.4710215926170349, "learning_rate": 4.875818405929413e-06, "loss": 0.1118, "step": 4839 }, { "epoch": 1.5683732987686325, "grad_norm": 0.43163785338401794, "learning_rate": 4.874069736743781e-06, "loss": 0.1013, "step": 4840 }, { "epoch": 1.5686973428386262, "grad_norm": 0.43744251132011414, "learning_rate": 4.872321082970792e-06, "loss": 0.107, "step": 4841 }, { "epoch": 1.5690213869086196, "grad_norm": 0.457549124956131, "learning_rate": 4.870572444824469e-06, "loss": 0.1083, "step": 4842 }, { "epoch": 1.569345430978613, "grad_norm": 0.451315701007843, "learning_rate": 4.868823822518825e-06, "loss": 0.1123, "step": 4843 }, { "epoch": 1.5696694750486067, "grad_norm": 0.3946089446544647, "learning_rate": 4.867075216267873e-06, "loss": 0.0902, "step": 4844 }, { "epoch": 1.5699935191186, "grad_norm": 0.45311471819877625, "learning_rate": 4.865326626285629e-06, "loss": 0.1133, "step": 4845 }, { "epoch": 1.5703175631885937, "grad_norm": 0.42419302463531494, "learning_rate": 4.863578052786103e-06, "loss": 0.0989, "step": 4846 }, { "epoch": 1.5706416072585871, "grad_norm": 0.4588811993598938, "learning_rate": 4.861829495983302e-06, "loss": 0.1053, "step": 4847 }, { "epoch": 1.5709656513285806, "grad_norm": 0.44163978099823, "learning_rate": 4.860080956091234e-06, "loss": 0.1155, "step": 4848 }, { "epoch": 1.5712896953985742, "grad_norm": 0.44390761852264404, "learning_rate": 4.858332433323902e-06, "loss": 0.1041, "step": 4849 }, { "epoch": 1.5716137394685679, "grad_norm": 0.47635310888290405, "learning_rate": 4.856583927895309e-06, "loss": 0.1112, "step": 4850 }, { "epoch": 1.5719377835385613, "grad_norm": 0.45071783661842346, "learning_rate": 4.854835440019453e-06, "loss": 0.1029, "step": 4851 }, { "epoch": 1.5722618276085547, "grad_norm": 0.47139331698417664, "learning_rate": 4.853086969910336e-06, "loss": 0.1149, "step": 4852 }, { "epoch": 1.5725858716785481, "grad_norm": 0.5124130249023438, "learning_rate": 4.85133851778195e-06, "loss": 0.1167, "step": 4853 }, { "epoch": 1.5729099157485418, "grad_norm": 0.45137298107147217, "learning_rate": 4.8495900838482915e-06, "loss": 0.116, "step": 4854 }, { "epoch": 1.5732339598185354, "grad_norm": 0.45909252762794495, "learning_rate": 4.847841668323351e-06, "loss": 0.1119, "step": 4855 }, { "epoch": 1.5735580038885288, "grad_norm": 0.4754926860332489, "learning_rate": 4.846093271421115e-06, "loss": 0.1099, "step": 4856 }, { "epoch": 1.5738820479585223, "grad_norm": 0.4783075451850891, "learning_rate": 4.844344893355575e-06, "loss": 0.118, "step": 4857 }, { "epoch": 1.574206092028516, "grad_norm": 0.45256945490837097, "learning_rate": 4.842596534340712e-06, "loss": 0.1053, "step": 4858 }, { "epoch": 1.5745301360985096, "grad_norm": 0.4893447458744049, "learning_rate": 4.84084819459051e-06, "loss": 0.1132, "step": 4859 }, { "epoch": 1.574854180168503, "grad_norm": 0.47987404465675354, "learning_rate": 4.839099874318948e-06, "loss": 0.1169, "step": 4860 }, { "epoch": 1.5751782242384964, "grad_norm": 0.4620630145072937, "learning_rate": 4.837351573740004e-06, "loss": 0.1033, "step": 4861 }, { "epoch": 1.5755022683084898, "grad_norm": 0.48919492959976196, "learning_rate": 4.835603293067653e-06, "loss": 0.1187, "step": 4862 }, { "epoch": 1.5758263123784835, "grad_norm": 0.45602983236312866, "learning_rate": 4.833855032515866e-06, "loss": 0.1127, "step": 4863 }, { "epoch": 1.5761503564484771, "grad_norm": 0.4560927152633667, "learning_rate": 4.832106792298616e-06, "loss": 0.1142, "step": 4864 }, { "epoch": 1.5764744005184705, "grad_norm": 0.48238566517829895, "learning_rate": 4.830358572629868e-06, "loss": 0.1113, "step": 4865 }, { "epoch": 1.576798444588464, "grad_norm": 0.46003594994544983, "learning_rate": 4.82861037372359e-06, "loss": 0.1095, "step": 4866 }, { "epoch": 1.5771224886584574, "grad_norm": 0.46108075976371765, "learning_rate": 4.826862195793743e-06, "loss": 0.1192, "step": 4867 }, { "epoch": 1.577446532728451, "grad_norm": 0.45616206526756287, "learning_rate": 4.825114039054286e-06, "loss": 0.1084, "step": 4868 }, { "epoch": 1.5777705767984447, "grad_norm": 0.45565494894981384, "learning_rate": 4.823365903719179e-06, "loss": 0.1098, "step": 4869 }, { "epoch": 1.578094620868438, "grad_norm": 0.42677366733551025, "learning_rate": 4.821617790002374e-06, "loss": 0.1021, "step": 4870 }, { "epoch": 1.5784186649384315, "grad_norm": 0.4569999873638153, "learning_rate": 4.819869698117826e-06, "loss": 0.1168, "step": 4871 }, { "epoch": 1.5787427090084252, "grad_norm": 0.4749670922756195, "learning_rate": 4.818121628279483e-06, "loss": 0.1223, "step": 4872 }, { "epoch": 1.5790667530784188, "grad_norm": 0.46919116377830505, "learning_rate": 4.81637358070129e-06, "loss": 0.1075, "step": 4873 }, { "epoch": 1.5793907971484122, "grad_norm": 0.4429967403411865, "learning_rate": 4.8146255555971964e-06, "loss": 0.0993, "step": 4874 }, { "epoch": 1.5797148412184057, "grad_norm": 0.41248396039009094, "learning_rate": 4.812877553181136e-06, "loss": 0.0934, "step": 4875 }, { "epoch": 1.580038885288399, "grad_norm": 0.43637144565582275, "learning_rate": 4.8111295736670545e-06, "loss": 0.1069, "step": 4876 }, { "epoch": 1.5803629293583927, "grad_norm": 0.4072212278842926, "learning_rate": 4.809381617268884e-06, "loss": 0.101, "step": 4877 }, { "epoch": 1.5806869734283864, "grad_norm": 0.48271292448043823, "learning_rate": 4.807633684200559e-06, "loss": 0.1169, "step": 4878 }, { "epoch": 1.5810110174983798, "grad_norm": 0.4623858332633972, "learning_rate": 4.805885774676009e-06, "loss": 0.111, "step": 4879 }, { "epoch": 1.5813350615683732, "grad_norm": 0.4412733316421509, "learning_rate": 4.804137888909159e-06, "loss": 0.1021, "step": 4880 }, { "epoch": 1.5816591056383669, "grad_norm": 0.48238319158554077, "learning_rate": 4.802390027113938e-06, "loss": 0.1137, "step": 4881 }, { "epoch": 1.5819831497083603, "grad_norm": 0.4745473265647888, "learning_rate": 4.800642189504262e-06, "loss": 0.1126, "step": 4882 }, { "epoch": 1.582307193778354, "grad_norm": 0.46913549304008484, "learning_rate": 4.798894376294054e-06, "loss": 0.1094, "step": 4883 }, { "epoch": 1.5826312378483474, "grad_norm": 0.4792521297931671, "learning_rate": 4.7971465876972274e-06, "loss": 0.1053, "step": 4884 }, { "epoch": 1.5829552819183408, "grad_norm": 0.4615633189678192, "learning_rate": 4.795398823927693e-06, "loss": 0.1135, "step": 4885 }, { "epoch": 1.5832793259883344, "grad_norm": 0.4534716010093689, "learning_rate": 4.7936510851993635e-06, "loss": 0.1097, "step": 4886 }, { "epoch": 1.583603370058328, "grad_norm": 0.4541871249675751, "learning_rate": 4.791903371726141e-06, "loss": 0.1051, "step": 4887 }, { "epoch": 1.5839274141283215, "grad_norm": 0.47825732827186584, "learning_rate": 4.790155683721935e-06, "loss": 0.1121, "step": 4888 }, { "epoch": 1.584251458198315, "grad_norm": 0.4308681786060333, "learning_rate": 4.78840802140064e-06, "loss": 0.0917, "step": 4889 }, { "epoch": 1.5845755022683083, "grad_norm": 0.4852618873119354, "learning_rate": 4.7866603849761535e-06, "loss": 0.1168, "step": 4890 }, { "epoch": 1.584899546338302, "grad_norm": 0.41038641333580017, "learning_rate": 4.7849127746623735e-06, "loss": 0.1029, "step": 4891 }, { "epoch": 1.5852235904082956, "grad_norm": 0.4608546495437622, "learning_rate": 4.783165190673186e-06, "loss": 0.1084, "step": 4892 }, { "epoch": 1.585547634478289, "grad_norm": 0.513335645198822, "learning_rate": 4.781417633222481e-06, "loss": 0.1186, "step": 4893 }, { "epoch": 1.5858716785482825, "grad_norm": 0.46274006366729736, "learning_rate": 4.779670102524139e-06, "loss": 0.1118, "step": 4894 }, { "epoch": 1.5861957226182761, "grad_norm": 0.48566991090774536, "learning_rate": 4.777922598792047e-06, "loss": 0.1218, "step": 4895 }, { "epoch": 1.5865197666882696, "grad_norm": 0.45120298862457275, "learning_rate": 4.776175122240077e-06, "loss": 0.1126, "step": 4896 }, { "epoch": 1.5868438107582632, "grad_norm": 0.46900275349617004, "learning_rate": 4.774427673082105e-06, "loss": 0.1143, "step": 4897 }, { "epoch": 1.5871678548282566, "grad_norm": 0.477458655834198, "learning_rate": 4.772680251532003e-06, "loss": 0.1144, "step": 4898 }, { "epoch": 1.58749189889825, "grad_norm": 0.42421650886535645, "learning_rate": 4.7709328578036365e-06, "loss": 0.1071, "step": 4899 }, { "epoch": 1.5878159429682437, "grad_norm": 0.4880410432815552, "learning_rate": 4.769185492110873e-06, "loss": 0.1218, "step": 4900 }, { "epoch": 1.5881399870382373, "grad_norm": 0.4839230477809906, "learning_rate": 4.767438154667568e-06, "loss": 0.1096, "step": 4901 }, { "epoch": 1.5884640311082308, "grad_norm": 0.45489901304244995, "learning_rate": 4.765690845687584e-06, "loss": 0.1192, "step": 4902 }, { "epoch": 1.5887880751782242, "grad_norm": 0.4356914460659027, "learning_rate": 4.763943565384772e-06, "loss": 0.1093, "step": 4903 }, { "epoch": 1.5891121192482176, "grad_norm": 0.4305530786514282, "learning_rate": 4.7621963139729795e-06, "loss": 0.1021, "step": 4904 }, { "epoch": 1.5894361633182112, "grad_norm": 0.43889176845550537, "learning_rate": 4.76044909166606e-06, "loss": 0.1029, "step": 4905 }, { "epoch": 1.589760207388205, "grad_norm": 0.4710550904273987, "learning_rate": 4.758701898677848e-06, "loss": 0.1161, "step": 4906 }, { "epoch": 1.5900842514581983, "grad_norm": 0.44871771335601807, "learning_rate": 4.756954735222192e-06, "loss": 0.1169, "step": 4907 }, { "epoch": 1.5904082955281917, "grad_norm": 0.4578435719013214, "learning_rate": 4.755207601512922e-06, "loss": 0.1081, "step": 4908 }, { "epoch": 1.5907323395981854, "grad_norm": 0.4500477612018585, "learning_rate": 4.75346049776387e-06, "loss": 0.1108, "step": 4909 }, { "epoch": 1.591056383668179, "grad_norm": 0.44578132033348083, "learning_rate": 4.751713424188869e-06, "loss": 0.1028, "step": 4910 }, { "epoch": 1.5913804277381725, "grad_norm": 0.4344480037689209, "learning_rate": 4.749966381001741e-06, "loss": 0.1079, "step": 4911 }, { "epoch": 1.5917044718081659, "grad_norm": 0.42421379685401917, "learning_rate": 4.748219368416306e-06, "loss": 0.1009, "step": 4912 }, { "epoch": 1.5920285158781593, "grad_norm": 0.45676690340042114, "learning_rate": 4.746472386646383e-06, "loss": 0.1078, "step": 4913 }, { "epoch": 1.592352559948153, "grad_norm": 0.4680733382701874, "learning_rate": 4.744725435905787e-06, "loss": 0.1063, "step": 4914 }, { "epoch": 1.5926766040181466, "grad_norm": 0.46036866307258606, "learning_rate": 4.742978516408326e-06, "loss": 0.1012, "step": 4915 }, { "epoch": 1.59300064808814, "grad_norm": 0.4334714412689209, "learning_rate": 4.741231628367805e-06, "loss": 0.1049, "step": 4916 }, { "epoch": 1.5933246921581334, "grad_norm": 0.4619475305080414, "learning_rate": 4.739484771998029e-06, "loss": 0.1124, "step": 4917 }, { "epoch": 1.5936487362281269, "grad_norm": 0.45115354657173157, "learning_rate": 4.737737947512793e-06, "loss": 0.1089, "step": 4918 }, { "epoch": 1.5939727802981205, "grad_norm": 0.46323361992836, "learning_rate": 4.735991155125896e-06, "loss": 0.1125, "step": 4919 }, { "epoch": 1.5942968243681142, "grad_norm": 0.4536522328853607, "learning_rate": 4.734244395051123e-06, "loss": 0.1078, "step": 4920 }, { "epoch": 1.5946208684381076, "grad_norm": 0.4844619631767273, "learning_rate": 4.732497667502266e-06, "loss": 0.1117, "step": 4921 }, { "epoch": 1.594944912508101, "grad_norm": 0.47589054703712463, "learning_rate": 4.730750972693104e-06, "loss": 0.1229, "step": 4922 }, { "epoch": 1.5952689565780946, "grad_norm": 0.4797200858592987, "learning_rate": 4.729004310837417e-06, "loss": 0.1115, "step": 4923 }, { "epoch": 1.5955930006480883, "grad_norm": 0.49576741456985474, "learning_rate": 4.727257682148979e-06, "loss": 0.1177, "step": 4924 }, { "epoch": 1.5959170447180817, "grad_norm": 0.4299625754356384, "learning_rate": 4.725511086841557e-06, "loss": 0.1033, "step": 4925 }, { "epoch": 1.5962410887880751, "grad_norm": 0.5060962438583374, "learning_rate": 4.723764525128925e-06, "loss": 0.127, "step": 4926 }, { "epoch": 1.5965651328580686, "grad_norm": 0.46263906359672546, "learning_rate": 4.72201799722484e-06, "loss": 0.115, "step": 4927 }, { "epoch": 1.5968891769280622, "grad_norm": 0.42641258239746094, "learning_rate": 4.720271503343059e-06, "loss": 0.1085, "step": 4928 }, { "epoch": 1.5972132209980558, "grad_norm": 0.4287305772304535, "learning_rate": 4.71852504369734e-06, "loss": 0.1099, "step": 4929 }, { "epoch": 1.5975372650680493, "grad_norm": 0.4682078957557678, "learning_rate": 4.716778618501429e-06, "loss": 0.1144, "step": 4930 }, { "epoch": 1.5978613091380427, "grad_norm": 0.443132609128952, "learning_rate": 4.715032227969075e-06, "loss": 0.1045, "step": 4931 }, { "epoch": 1.5981853532080363, "grad_norm": 0.3979075253009796, "learning_rate": 4.713285872314016e-06, "loss": 0.0959, "step": 4932 }, { "epoch": 1.5985093972780298, "grad_norm": 0.43410465121269226, "learning_rate": 4.711539551749993e-06, "loss": 0.0988, "step": 4933 }, { "epoch": 1.5988334413480234, "grad_norm": 0.4905455708503723, "learning_rate": 4.709793266490735e-06, "loss": 0.1202, "step": 4934 }, { "epoch": 1.5991574854180168, "grad_norm": 0.45668309926986694, "learning_rate": 4.7080470167499705e-06, "loss": 0.11, "step": 4935 }, { "epoch": 1.5994815294880103, "grad_norm": 0.45235005021095276, "learning_rate": 4.706300802741427e-06, "loss": 0.1109, "step": 4936 }, { "epoch": 1.599805573558004, "grad_norm": 0.4434433877468109, "learning_rate": 4.70455462467882e-06, "loss": 0.1076, "step": 4937 }, { "epoch": 1.6001296176279975, "grad_norm": 0.43349820375442505, "learning_rate": 4.702808482775869e-06, "loss": 0.1068, "step": 4938 }, { "epoch": 1.600453661697991, "grad_norm": 0.45358890295028687, "learning_rate": 4.701062377246282e-06, "loss": 0.1063, "step": 4939 }, { "epoch": 1.6007777057679844, "grad_norm": 0.4753774404525757, "learning_rate": 4.699316308303764e-06, "loss": 0.1064, "step": 4940 }, { "epoch": 1.6011017498379778, "grad_norm": 0.45095357298851013, "learning_rate": 4.697570276162021e-06, "loss": 0.1153, "step": 4941 }, { "epoch": 1.6014257939079715, "grad_norm": 0.47847557067871094, "learning_rate": 4.695824281034747e-06, "loss": 0.1183, "step": 4942 }, { "epoch": 1.601749837977965, "grad_norm": 0.44354525208473206, "learning_rate": 4.694078323135638e-06, "loss": 0.1026, "step": 4943 }, { "epoch": 1.6020738820479585, "grad_norm": 0.4864445924758911, "learning_rate": 4.69233240267838e-06, "loss": 0.1131, "step": 4944 }, { "epoch": 1.602397926117952, "grad_norm": 0.4234994649887085, "learning_rate": 4.690586519876658e-06, "loss": 0.103, "step": 4945 }, { "epoch": 1.6027219701879456, "grad_norm": 0.43492475152015686, "learning_rate": 4.688840674944151e-06, "loss": 0.1085, "step": 4946 }, { "epoch": 1.6030460142579392, "grad_norm": 0.4122197926044464, "learning_rate": 4.687094868094531e-06, "loss": 0.1008, "step": 4947 }, { "epoch": 1.6033700583279327, "grad_norm": 0.46981731057167053, "learning_rate": 4.685349099541473e-06, "loss": 0.1127, "step": 4948 }, { "epoch": 1.603694102397926, "grad_norm": 0.4796302914619446, "learning_rate": 4.683603369498636e-06, "loss": 0.1127, "step": 4949 }, { "epoch": 1.6040181464679195, "grad_norm": 0.44304171204566956, "learning_rate": 4.681857678179685e-06, "loss": 0.1103, "step": 4950 }, { "epoch": 1.6043421905379132, "grad_norm": 0.4452519714832306, "learning_rate": 4.680112025798275e-06, "loss": 0.1099, "step": 4951 }, { "epoch": 1.6046662346079068, "grad_norm": 0.464995801448822, "learning_rate": 4.678366412568055e-06, "loss": 0.1123, "step": 4952 }, { "epoch": 1.6049902786779002, "grad_norm": 0.46111589670181274, "learning_rate": 4.676620838702674e-06, "loss": 0.1055, "step": 4953 }, { "epoch": 1.6053143227478937, "grad_norm": 0.43135613203048706, "learning_rate": 4.6748753044157705e-06, "loss": 0.0967, "step": 4954 }, { "epoch": 1.605638366817887, "grad_norm": 0.4677044153213501, "learning_rate": 4.673129809920983e-06, "loss": 0.1084, "step": 4955 }, { "epoch": 1.6059624108878807, "grad_norm": 0.5228461623191833, "learning_rate": 4.671384355431941e-06, "loss": 0.131, "step": 4956 }, { "epoch": 1.6062864549578744, "grad_norm": 0.44885966181755066, "learning_rate": 4.669638941162274e-06, "loss": 0.104, "step": 4957 }, { "epoch": 1.6066104990278678, "grad_norm": 0.4723048210144043, "learning_rate": 4.6678935673256036e-06, "loss": 0.1108, "step": 4958 }, { "epoch": 1.6069345430978612, "grad_norm": 0.5116240382194519, "learning_rate": 4.666148234135543e-06, "loss": 0.1297, "step": 4959 }, { "epoch": 1.6072585871678549, "grad_norm": 0.4954541027545929, "learning_rate": 4.664402941805709e-06, "loss": 0.1221, "step": 4960 }, { "epoch": 1.6075826312378485, "grad_norm": 0.45687317848205566, "learning_rate": 4.662657690549703e-06, "loss": 0.1065, "step": 4961 }, { "epoch": 1.607906675307842, "grad_norm": 0.45222848653793335, "learning_rate": 4.6609124805811325e-06, "loss": 0.1117, "step": 4962 }, { "epoch": 1.6082307193778353, "grad_norm": 0.48122653365135193, "learning_rate": 4.659167312113592e-06, "loss": 0.107, "step": 4963 }, { "epoch": 1.6085547634478288, "grad_norm": 0.47544240951538086, "learning_rate": 4.657422185360671e-06, "loss": 0.1191, "step": 4964 }, { "epoch": 1.6088788075178224, "grad_norm": 0.47073894739151, "learning_rate": 4.65567710053596e-06, "loss": 0.1115, "step": 4965 }, { "epoch": 1.609202851587816, "grad_norm": 0.49683570861816406, "learning_rate": 4.653932057853037e-06, "loss": 0.1252, "step": 4966 }, { "epoch": 1.6095268956578095, "grad_norm": 0.5067100524902344, "learning_rate": 4.6521870575254815e-06, "loss": 0.1291, "step": 4967 }, { "epoch": 1.609850939727803, "grad_norm": 0.4912336766719818, "learning_rate": 4.650442099766861e-06, "loss": 0.119, "step": 4968 }, { "epoch": 1.6101749837977966, "grad_norm": 0.4592455327510834, "learning_rate": 4.648697184790745e-06, "loss": 0.1073, "step": 4969 }, { "epoch": 1.61049902786779, "grad_norm": 0.49731016159057617, "learning_rate": 4.646952312810694e-06, "loss": 0.1223, "step": 4970 }, { "epoch": 1.6108230719377836, "grad_norm": 0.4806722402572632, "learning_rate": 4.645207484040259e-06, "loss": 0.1132, "step": 4971 }, { "epoch": 1.611147116007777, "grad_norm": 0.474865198135376, "learning_rate": 4.6434626986929954e-06, "loss": 0.1147, "step": 4972 }, { "epoch": 1.6114711600777705, "grad_norm": 0.4066011905670166, "learning_rate": 4.641717956982444e-06, "loss": 0.0925, "step": 4973 }, { "epoch": 1.6117952041477641, "grad_norm": 0.4577323794364929, "learning_rate": 4.639973259122148e-06, "loss": 0.1128, "step": 4974 }, { "epoch": 1.6121192482177578, "grad_norm": 0.47177231311798096, "learning_rate": 4.638228605325641e-06, "loss": 0.11, "step": 4975 }, { "epoch": 1.6124432922877512, "grad_norm": 0.45513543486595154, "learning_rate": 4.636483995806448e-06, "loss": 0.1096, "step": 4976 }, { "epoch": 1.6127673363577446, "grad_norm": 0.45789656043052673, "learning_rate": 4.634739430778097e-06, "loss": 0.1162, "step": 4977 }, { "epoch": 1.613091380427738, "grad_norm": 0.4321471154689789, "learning_rate": 4.6329949104541e-06, "loss": 0.1006, "step": 4978 }, { "epoch": 1.6134154244977317, "grad_norm": 0.45868197083473206, "learning_rate": 4.631250435047977e-06, "loss": 0.1118, "step": 4979 }, { "epoch": 1.6137394685677253, "grad_norm": 0.4248989522457123, "learning_rate": 4.629506004773227e-06, "loss": 0.1028, "step": 4980 }, { "epoch": 1.6140635126377187, "grad_norm": 0.5093784928321838, "learning_rate": 4.627761619843359e-06, "loss": 0.1277, "step": 4981 }, { "epoch": 1.6143875567077122, "grad_norm": 0.43014755845069885, "learning_rate": 4.626017280471865e-06, "loss": 0.0985, "step": 4982 }, { "epoch": 1.6147116007777058, "grad_norm": 0.47510039806365967, "learning_rate": 4.624272986872234e-06, "loss": 0.1129, "step": 4983 }, { "epoch": 1.6150356448476992, "grad_norm": 0.4764617383480072, "learning_rate": 4.622528739257952e-06, "loss": 0.1209, "step": 4984 }, { "epoch": 1.6153596889176929, "grad_norm": 0.4343688189983368, "learning_rate": 4.620784537842499e-06, "loss": 0.1084, "step": 4985 }, { "epoch": 1.6156837329876863, "grad_norm": 0.4666691720485687, "learning_rate": 4.6190403828393464e-06, "loss": 0.1077, "step": 4986 }, { "epoch": 1.6160077770576797, "grad_norm": 0.44340160489082336, "learning_rate": 4.617296274461964e-06, "loss": 0.0994, "step": 4987 }, { "epoch": 1.6163318211276734, "grad_norm": 0.4468873143196106, "learning_rate": 4.6155522129238124e-06, "loss": 0.1084, "step": 4988 }, { "epoch": 1.616655865197667, "grad_norm": 0.44237035512924194, "learning_rate": 4.613808198438349e-06, "loss": 0.1041, "step": 4989 }, { "epoch": 1.6169799092676604, "grad_norm": 0.44018039107322693, "learning_rate": 4.612064231219021e-06, "loss": 0.101, "step": 4990 }, { "epoch": 1.6173039533376539, "grad_norm": 0.41502729058265686, "learning_rate": 4.610320311479279e-06, "loss": 0.0933, "step": 4991 }, { "epoch": 1.6176279974076473, "grad_norm": 0.4824730157852173, "learning_rate": 4.608576439432555e-06, "loss": 0.1183, "step": 4992 }, { "epoch": 1.617952041477641, "grad_norm": 0.4967108368873596, "learning_rate": 4.606832615292288e-06, "loss": 0.1192, "step": 4993 }, { "epoch": 1.6182760855476346, "grad_norm": 0.4692974090576172, "learning_rate": 4.605088839271903e-06, "loss": 0.1091, "step": 4994 }, { "epoch": 1.618600129617628, "grad_norm": 0.43657752871513367, "learning_rate": 4.603345111584819e-06, "loss": 0.1037, "step": 4995 }, { "epoch": 1.6189241736876214, "grad_norm": 0.48579537868499756, "learning_rate": 4.6016014324444545e-06, "loss": 0.1194, "step": 4996 }, { "epoch": 1.619248217757615, "grad_norm": 0.44366809725761414, "learning_rate": 4.5998578020642185e-06, "loss": 0.1083, "step": 4997 }, { "epoch": 1.6195722618276087, "grad_norm": 0.43966636061668396, "learning_rate": 4.598114220657514e-06, "loss": 0.101, "step": 4998 }, { "epoch": 1.6198963058976021, "grad_norm": 0.4372827708721161, "learning_rate": 4.596370688437736e-06, "loss": 0.1036, "step": 4999 }, { "epoch": 1.6202203499675956, "grad_norm": 0.47269198298454285, "learning_rate": 4.59462720561828e-06, "loss": 0.1055, "step": 5000 }, { "epoch": 1.620544394037589, "grad_norm": 0.5441954135894775, "learning_rate": 4.592883772412531e-06, "loss": 0.1284, "step": 5001 }, { "epoch": 1.6208684381075826, "grad_norm": 0.44606369733810425, "learning_rate": 4.591140389033863e-06, "loss": 0.1076, "step": 5002 }, { "epoch": 1.6211924821775763, "grad_norm": 0.4738067388534546, "learning_rate": 4.589397055695658e-06, "loss": 0.1144, "step": 5003 }, { "epoch": 1.6215165262475697, "grad_norm": 0.44347885251045227, "learning_rate": 4.587653772611275e-06, "loss": 0.1049, "step": 5004 }, { "epoch": 1.6218405703175631, "grad_norm": 0.49658679962158203, "learning_rate": 4.58591053999408e-06, "loss": 0.1182, "step": 5005 }, { "epoch": 1.6221646143875565, "grad_norm": 0.5013179183006287, "learning_rate": 4.584167358057427e-06, "loss": 0.1141, "step": 5006 }, { "epoch": 1.6224886584575502, "grad_norm": 0.429353266954422, "learning_rate": 4.582424227014662e-06, "loss": 0.1069, "step": 5007 }, { "epoch": 1.6228127025275438, "grad_norm": 0.4228212833404541, "learning_rate": 4.58068114707913e-06, "loss": 0.1019, "step": 5008 }, { "epoch": 1.6231367465975373, "grad_norm": 0.49328097701072693, "learning_rate": 4.5789381184641655e-06, "loss": 0.1215, "step": 5009 }, { "epoch": 1.6234607906675307, "grad_norm": 0.4080681800842285, "learning_rate": 4.577195141383101e-06, "loss": 0.0916, "step": 5010 }, { "epoch": 1.6237848347375243, "grad_norm": 0.45990294218063354, "learning_rate": 4.575452216049256e-06, "loss": 0.1064, "step": 5011 }, { "epoch": 1.624108878807518, "grad_norm": 0.4920955002307892, "learning_rate": 4.573709342675951e-06, "loss": 0.1205, "step": 5012 }, { "epoch": 1.6244329228775114, "grad_norm": 0.4758051931858063, "learning_rate": 4.571966521476496e-06, "loss": 0.112, "step": 5013 }, { "epoch": 1.6247569669475048, "grad_norm": 0.4831903278827667, "learning_rate": 4.570223752664194e-06, "loss": 0.1207, "step": 5014 }, { "epoch": 1.6250810110174982, "grad_norm": 0.49597156047821045, "learning_rate": 4.568481036452345e-06, "loss": 0.1099, "step": 5015 }, { "epoch": 1.625405055087492, "grad_norm": 0.4464362561702728, "learning_rate": 4.566738373054238e-06, "loss": 0.1136, "step": 5016 }, { "epoch": 1.6257290991574855, "grad_norm": 0.47042131423950195, "learning_rate": 4.564995762683162e-06, "loss": 0.1163, "step": 5017 }, { "epoch": 1.626053143227479, "grad_norm": 0.4361095726490021, "learning_rate": 4.563253205552393e-06, "loss": 0.1104, "step": 5018 }, { "epoch": 1.6263771872974724, "grad_norm": 0.4437979757785797, "learning_rate": 4.561510701875204e-06, "loss": 0.109, "step": 5019 }, { "epoch": 1.626701231367466, "grad_norm": 0.427032470703125, "learning_rate": 4.55976825186486e-06, "loss": 0.104, "step": 5020 }, { "epoch": 1.6270252754374595, "grad_norm": 0.4684242904186249, "learning_rate": 4.558025855734618e-06, "loss": 0.113, "step": 5021 }, { "epoch": 1.627349319507453, "grad_norm": 0.4585596024990082, "learning_rate": 4.5562835136977355e-06, "loss": 0.1179, "step": 5022 }, { "epoch": 1.6276733635774465, "grad_norm": 0.4794493019580841, "learning_rate": 4.554541225967452e-06, "loss": 0.1186, "step": 5023 }, { "epoch": 1.62799740764744, "grad_norm": 0.46028104424476624, "learning_rate": 4.552798992757013e-06, "loss": 0.1122, "step": 5024 }, { "epoch": 1.6283214517174336, "grad_norm": 0.4441133439540863, "learning_rate": 4.5510568142796485e-06, "loss": 0.1011, "step": 5025 }, { "epoch": 1.6286454957874272, "grad_norm": 0.41523277759552, "learning_rate": 4.549314690748581e-06, "loss": 0.0969, "step": 5026 }, { "epoch": 1.6289695398574207, "grad_norm": 0.4720069169998169, "learning_rate": 4.547572622377035e-06, "loss": 0.1141, "step": 5027 }, { "epoch": 1.629293583927414, "grad_norm": 0.45224857330322266, "learning_rate": 4.545830609378219e-06, "loss": 0.1108, "step": 5028 }, { "epoch": 1.6296176279974075, "grad_norm": 0.4049452543258667, "learning_rate": 4.5440886519653404e-06, "loss": 0.0942, "step": 5029 }, { "epoch": 1.6299416720674011, "grad_norm": 0.48412343859672546, "learning_rate": 4.542346750351597e-06, "loss": 0.1161, "step": 5030 }, { "epoch": 1.6302657161373948, "grad_norm": 0.4523051381111145, "learning_rate": 4.54060490475018e-06, "loss": 0.1043, "step": 5031 }, { "epoch": 1.6305897602073882, "grad_norm": 0.44593173265457153, "learning_rate": 4.538863115374277e-06, "loss": 0.1093, "step": 5032 }, { "epoch": 1.6309138042773816, "grad_norm": 0.4349612295627594, "learning_rate": 4.537121382437062e-06, "loss": 0.0989, "step": 5033 }, { "epoch": 1.6312378483473753, "grad_norm": 0.5067662596702576, "learning_rate": 4.535379706151711e-06, "loss": 0.121, "step": 5034 }, { "epoch": 1.6315618924173687, "grad_norm": 0.46852073073387146, "learning_rate": 4.533638086731384e-06, "loss": 0.1083, "step": 5035 }, { "epoch": 1.6318859364873624, "grad_norm": 0.4713311195373535, "learning_rate": 4.531896524389242e-06, "loss": 0.1061, "step": 5036 }, { "epoch": 1.6322099805573558, "grad_norm": 0.48059168457984924, "learning_rate": 4.530155019338435e-06, "loss": 0.1173, "step": 5037 }, { "epoch": 1.6325340246273492, "grad_norm": 0.4491584300994873, "learning_rate": 4.528413571792103e-06, "loss": 0.1095, "step": 5038 }, { "epoch": 1.6328580686973428, "grad_norm": 0.47590669989585876, "learning_rate": 4.526672181963386e-06, "loss": 0.1081, "step": 5039 }, { "epoch": 1.6331821127673365, "grad_norm": 0.5039756298065186, "learning_rate": 4.524930850065411e-06, "loss": 0.1202, "step": 5040 }, { "epoch": 1.63350615683733, "grad_norm": 0.4588005542755127, "learning_rate": 4.523189576311301e-06, "loss": 0.1117, "step": 5041 }, { "epoch": 1.6338302009073233, "grad_norm": 0.4382549524307251, "learning_rate": 4.521448360914173e-06, "loss": 0.1044, "step": 5042 }, { "epoch": 1.6341542449773168, "grad_norm": 0.42880919575691223, "learning_rate": 4.519707204087129e-06, "loss": 0.1076, "step": 5043 }, { "epoch": 1.6344782890473104, "grad_norm": 0.4497835636138916, "learning_rate": 4.517966106043276e-06, "loss": 0.1062, "step": 5044 }, { "epoch": 1.634802333117304, "grad_norm": 0.6321564316749573, "learning_rate": 4.5162250669957035e-06, "loss": 0.1222, "step": 5045 }, { "epoch": 1.6351263771872975, "grad_norm": 0.44169488549232483, "learning_rate": 4.514484087157502e-06, "loss": 0.1056, "step": 5046 }, { "epoch": 1.635450421257291, "grad_norm": 0.4321480691432953, "learning_rate": 4.512743166741745e-06, "loss": 0.1039, "step": 5047 }, { "epoch": 1.6357744653272845, "grad_norm": 0.4385624825954437, "learning_rate": 4.51100230596151e-06, "loss": 0.1078, "step": 5048 }, { "epoch": 1.6360985093972782, "grad_norm": 0.5026413202285767, "learning_rate": 4.5092615050298585e-06, "loss": 0.1207, "step": 5049 }, { "epoch": 1.6364225534672716, "grad_norm": 0.46124643087387085, "learning_rate": 4.507520764159848e-06, "loss": 0.1135, "step": 5050 }, { "epoch": 1.636746597537265, "grad_norm": 0.4504324793815613, "learning_rate": 4.505780083564527e-06, "loss": 0.1038, "step": 5051 }, { "epoch": 1.6370706416072585, "grad_norm": 0.4859355092048645, "learning_rate": 4.5040394634569405e-06, "loss": 0.1212, "step": 5052 }, { "epoch": 1.637394685677252, "grad_norm": 0.4548870921134949, "learning_rate": 4.502298904050123e-06, "loss": 0.113, "step": 5053 }, { "epoch": 1.6377187297472457, "grad_norm": 0.41375717520713806, "learning_rate": 4.5005584055571016e-06, "loss": 0.0959, "step": 5054 }, { "epoch": 1.6380427738172392, "grad_norm": 0.5188780426979065, "learning_rate": 4.498817968190894e-06, "loss": 0.1205, "step": 5055 }, { "epoch": 1.6383668178872326, "grad_norm": 0.4686286747455597, "learning_rate": 4.497077592164518e-06, "loss": 0.1135, "step": 5056 }, { "epoch": 1.638690861957226, "grad_norm": 0.44579342007637024, "learning_rate": 4.4953372776909735e-06, "loss": 0.1066, "step": 5057 }, { "epoch": 1.6390149060272197, "grad_norm": 0.47754591703414917, "learning_rate": 4.493597024983263e-06, "loss": 0.1068, "step": 5058 }, { "epoch": 1.6393389500972133, "grad_norm": 0.4362635910511017, "learning_rate": 4.4918568342543725e-06, "loss": 0.1094, "step": 5059 }, { "epoch": 1.6396629941672067, "grad_norm": 0.48197826743125916, "learning_rate": 4.490116705717287e-06, "loss": 0.12, "step": 5060 }, { "epoch": 1.6399870382372002, "grad_norm": 0.46622657775878906, "learning_rate": 4.488376639584982e-06, "loss": 0.1114, "step": 5061 }, { "epoch": 1.6403110823071938, "grad_norm": 0.45262354612350464, "learning_rate": 4.486636636070422e-06, "loss": 0.1059, "step": 5062 }, { "epoch": 1.6406351263771874, "grad_norm": 0.47442132234573364, "learning_rate": 4.484896695386569e-06, "loss": 0.1075, "step": 5063 }, { "epoch": 1.6409591704471809, "grad_norm": 0.43097731471061707, "learning_rate": 4.483156817746372e-06, "loss": 0.1012, "step": 5064 }, { "epoch": 1.6412832145171743, "grad_norm": 0.43258407711982727, "learning_rate": 4.481417003362779e-06, "loss": 0.104, "step": 5065 }, { "epoch": 1.6416072585871677, "grad_norm": 0.45446422696113586, "learning_rate": 4.479677252448722e-06, "loss": 0.1121, "step": 5066 }, { "epoch": 1.6419313026571614, "grad_norm": 0.4537167549133301, "learning_rate": 4.477937565217135e-06, "loss": 0.1056, "step": 5067 }, { "epoch": 1.642255346727155, "grad_norm": 0.46545058488845825, "learning_rate": 4.476197941880936e-06, "loss": 0.1112, "step": 5068 }, { "epoch": 1.6425793907971484, "grad_norm": 0.42365697026252747, "learning_rate": 4.474458382653035e-06, "loss": 0.0989, "step": 5069 }, { "epoch": 1.6429034348671419, "grad_norm": 0.4858678877353668, "learning_rate": 4.472718887746344e-06, "loss": 0.1191, "step": 5070 }, { "epoch": 1.6432274789371355, "grad_norm": 0.45485833287239075, "learning_rate": 4.4709794573737545e-06, "loss": 0.1081, "step": 5071 }, { "epoch": 1.643551523007129, "grad_norm": 0.43576475977897644, "learning_rate": 4.46924009174816e-06, "loss": 0.1043, "step": 5072 }, { "epoch": 1.6438755670771226, "grad_norm": 0.4274187684059143, "learning_rate": 4.467500791082438e-06, "loss": 0.0974, "step": 5073 }, { "epoch": 1.644199611147116, "grad_norm": 0.42950570583343506, "learning_rate": 4.465761555589465e-06, "loss": 0.1044, "step": 5074 }, { "epoch": 1.6445236552171094, "grad_norm": 0.43484798073768616, "learning_rate": 4.464022385482106e-06, "loss": 0.0985, "step": 5075 }, { "epoch": 1.644847699287103, "grad_norm": 0.5140442848205566, "learning_rate": 4.462283280973217e-06, "loss": 0.1241, "step": 5076 }, { "epoch": 1.6451717433570967, "grad_norm": 0.46632710099220276, "learning_rate": 4.460544242275651e-06, "loss": 0.1084, "step": 5077 }, { "epoch": 1.6454957874270901, "grad_norm": 0.4264677166938782, "learning_rate": 4.458805269602245e-06, "loss": 0.1006, "step": 5078 }, { "epoch": 1.6458198314970836, "grad_norm": 0.47796615958213806, "learning_rate": 4.457066363165837e-06, "loss": 0.1131, "step": 5079 }, { "epoch": 1.646143875567077, "grad_norm": 0.45501673221588135, "learning_rate": 4.45532752317925e-06, "loss": 0.1039, "step": 5080 }, { "epoch": 1.6464679196370706, "grad_norm": 0.43680480122566223, "learning_rate": 4.453588749855301e-06, "loss": 0.1012, "step": 5081 }, { "epoch": 1.6467919637070643, "grad_norm": 0.4651895761489868, "learning_rate": 4.451850043406798e-06, "loss": 0.112, "step": 5082 }, { "epoch": 1.6471160077770577, "grad_norm": 0.37965553998947144, "learning_rate": 4.450111404046545e-06, "loss": 0.0892, "step": 5083 }, { "epoch": 1.6474400518470511, "grad_norm": 0.48205476999282837, "learning_rate": 4.448372831987333e-06, "loss": 0.1164, "step": 5084 }, { "epoch": 1.6477640959170448, "grad_norm": 0.4473625123500824, "learning_rate": 4.446634327441946e-06, "loss": 0.1093, "step": 5085 }, { "epoch": 1.6480881399870384, "grad_norm": 0.4586276710033417, "learning_rate": 4.444895890623158e-06, "loss": 0.1135, "step": 5086 }, { "epoch": 1.6484121840570318, "grad_norm": 0.5061674118041992, "learning_rate": 4.443157521743741e-06, "loss": 0.132, "step": 5087 }, { "epoch": 1.6487362281270252, "grad_norm": 0.47010958194732666, "learning_rate": 4.441419221016452e-06, "loss": 0.1141, "step": 5088 }, { "epoch": 1.6490602721970187, "grad_norm": 0.45167699456214905, "learning_rate": 4.439680988654043e-06, "loss": 0.107, "step": 5089 }, { "epoch": 1.6493843162670123, "grad_norm": 0.44836100935935974, "learning_rate": 4.437942824869256e-06, "loss": 0.1039, "step": 5090 }, { "epoch": 1.649708360337006, "grad_norm": 0.48193061351776123, "learning_rate": 4.436204729874828e-06, "loss": 0.1143, "step": 5091 }, { "epoch": 1.6500324044069994, "grad_norm": 0.47236403822898865, "learning_rate": 4.434466703883483e-06, "loss": 0.1068, "step": 5092 }, { "epoch": 1.6503564484769928, "grad_norm": 0.46453502774238586, "learning_rate": 4.4327287471079375e-06, "loss": 0.1144, "step": 5093 }, { "epoch": 1.6506804925469862, "grad_norm": 0.44503045082092285, "learning_rate": 4.430990859760903e-06, "loss": 0.1104, "step": 5094 }, { "epoch": 1.6510045366169799, "grad_norm": 0.4143137037754059, "learning_rate": 4.429253042055076e-06, "loss": 0.0875, "step": 5095 }, { "epoch": 1.6513285806869735, "grad_norm": 0.47698596119880676, "learning_rate": 4.427515294203154e-06, "loss": 0.1157, "step": 5096 }, { "epoch": 1.651652624756967, "grad_norm": 0.46756431460380554, "learning_rate": 4.425777616417819e-06, "loss": 0.109, "step": 5097 }, { "epoch": 1.6519766688269604, "grad_norm": 0.44503191113471985, "learning_rate": 4.424040008911741e-06, "loss": 0.1044, "step": 5098 }, { "epoch": 1.652300712896954, "grad_norm": 0.4633592367172241, "learning_rate": 4.422302471897593e-06, "loss": 0.1098, "step": 5099 }, { "epoch": 1.6526247569669477, "grad_norm": 0.48274174332618713, "learning_rate": 4.4205650055880286e-06, "loss": 0.1058, "step": 5100 }, { "epoch": 1.652948801036941, "grad_norm": 0.45976313948631287, "learning_rate": 4.418827610195699e-06, "loss": 0.1147, "step": 5101 }, { "epoch": 1.6532728451069345, "grad_norm": 0.43249958753585815, "learning_rate": 4.417090285933243e-06, "loss": 0.0984, "step": 5102 }, { "epoch": 1.653596889176928, "grad_norm": 0.48664548993110657, "learning_rate": 4.415353033013294e-06, "loss": 0.1146, "step": 5103 }, { "epoch": 1.6539209332469216, "grad_norm": 0.45588254928588867, "learning_rate": 4.413615851648474e-06, "loss": 0.101, "step": 5104 }, { "epoch": 1.6542449773169152, "grad_norm": 0.46738970279693604, "learning_rate": 4.411878742051396e-06, "loss": 0.1131, "step": 5105 }, { "epoch": 1.6545690213869086, "grad_norm": 0.4817322790622711, "learning_rate": 4.410141704434668e-06, "loss": 0.1147, "step": 5106 }, { "epoch": 1.654893065456902, "grad_norm": 0.46531572937965393, "learning_rate": 4.408404739010882e-06, "loss": 0.1076, "step": 5107 }, { "epoch": 1.6552171095268955, "grad_norm": 0.48336824774742126, "learning_rate": 4.406667845992632e-06, "loss": 0.1162, "step": 5108 }, { "epoch": 1.6555411535968891, "grad_norm": 0.40397506952285767, "learning_rate": 4.404931025592494e-06, "loss": 0.091, "step": 5109 }, { "epoch": 1.6558651976668828, "grad_norm": 0.47702574729919434, "learning_rate": 4.4031942780230345e-06, "loss": 0.114, "step": 5110 }, { "epoch": 1.6561892417368762, "grad_norm": 0.4436090290546417, "learning_rate": 4.401457603496821e-06, "loss": 0.1002, "step": 5111 }, { "epoch": 1.6565132858068696, "grad_norm": 0.43865570425987244, "learning_rate": 4.399721002226399e-06, "loss": 0.1031, "step": 5112 }, { "epoch": 1.6568373298768633, "grad_norm": 0.4540642201900482, "learning_rate": 4.39798447442432e-06, "loss": 0.1109, "step": 5113 }, { "epoch": 1.657161373946857, "grad_norm": 0.4558948576450348, "learning_rate": 4.3962480203031095e-06, "loss": 0.1121, "step": 5114 }, { "epoch": 1.6574854180168503, "grad_norm": 0.44171616435050964, "learning_rate": 4.3945116400752994e-06, "loss": 0.1074, "step": 5115 }, { "epoch": 1.6578094620868438, "grad_norm": 0.4583776891231537, "learning_rate": 4.3927753339534015e-06, "loss": 0.1027, "step": 5116 }, { "epoch": 1.6581335061568372, "grad_norm": 0.506412923336029, "learning_rate": 4.391039102149923e-06, "loss": 0.1148, "step": 5117 }, { "epoch": 1.6584575502268308, "grad_norm": 0.48240935802459717, "learning_rate": 4.389302944877365e-06, "loss": 0.1188, "step": 5118 }, { "epoch": 1.6587815942968245, "grad_norm": 0.4528309106826782, "learning_rate": 4.387566862348213e-06, "loss": 0.1084, "step": 5119 }, { "epoch": 1.659105638366818, "grad_norm": 0.42765727639198303, "learning_rate": 4.38583085477495e-06, "loss": 0.1012, "step": 5120 }, { "epoch": 1.6594296824368113, "grad_norm": 0.42442506551742554, "learning_rate": 4.384094922370045e-06, "loss": 0.0994, "step": 5121 }, { "epoch": 1.659753726506805, "grad_norm": 0.4268704950809479, "learning_rate": 4.382359065345957e-06, "loss": 0.1012, "step": 5122 }, { "epoch": 1.6600777705767984, "grad_norm": 0.4246152937412262, "learning_rate": 4.380623283915142e-06, "loss": 0.0955, "step": 5123 }, { "epoch": 1.660401814646792, "grad_norm": 0.45979657769203186, "learning_rate": 4.37888757829004e-06, "loss": 0.1028, "step": 5124 }, { "epoch": 1.6607258587167855, "grad_norm": 0.49330389499664307, "learning_rate": 4.377151948683086e-06, "loss": 0.1235, "step": 5125 }, { "epoch": 1.6610499027867789, "grad_norm": 0.4123222231864929, "learning_rate": 4.375416395306703e-06, "loss": 0.0947, "step": 5126 }, { "epoch": 1.6613739468567725, "grad_norm": 0.46925610303878784, "learning_rate": 4.373680918373308e-06, "loss": 0.1161, "step": 5127 }, { "epoch": 1.6616979909267662, "grad_norm": 0.4626232385635376, "learning_rate": 4.371945518095306e-06, "loss": 0.1041, "step": 5128 }, { "epoch": 1.6620220349967596, "grad_norm": 0.5107243061065674, "learning_rate": 4.370210194685091e-06, "loss": 0.1278, "step": 5129 }, { "epoch": 1.662346079066753, "grad_norm": 0.4590868651866913, "learning_rate": 4.3684749483550524e-06, "loss": 0.1068, "step": 5130 }, { "epoch": 1.6626701231367464, "grad_norm": 0.4151780605316162, "learning_rate": 4.366739779317563e-06, "loss": 0.098, "step": 5131 }, { "epoch": 1.66299416720674, "grad_norm": 0.49163949489593506, "learning_rate": 4.365004687784999e-06, "loss": 0.1074, "step": 5132 }, { "epoch": 1.6633182112767337, "grad_norm": 0.4786001741886139, "learning_rate": 4.363269673969711e-06, "loss": 0.1213, "step": 5133 }, { "epoch": 1.6636422553467272, "grad_norm": 0.43519163131713867, "learning_rate": 4.361534738084052e-06, "loss": 0.1035, "step": 5134 }, { "epoch": 1.6639662994167206, "grad_norm": 0.4144386947154999, "learning_rate": 4.3597998803403604e-06, "loss": 0.095, "step": 5135 }, { "epoch": 1.6642903434867142, "grad_norm": 0.48504820466041565, "learning_rate": 4.3580651009509654e-06, "loss": 0.109, "step": 5136 }, { "epoch": 1.6646143875567079, "grad_norm": 0.490996390581131, "learning_rate": 4.356330400128189e-06, "loss": 0.1197, "step": 5137 }, { "epoch": 1.6649384316267013, "grad_norm": 0.41804537177085876, "learning_rate": 4.354595778084338e-06, "loss": 0.0998, "step": 5138 }, { "epoch": 1.6652624756966947, "grad_norm": 0.4619435667991638, "learning_rate": 4.3528612350317175e-06, "loss": 0.1113, "step": 5139 }, { "epoch": 1.6655865197666881, "grad_norm": 0.4360075294971466, "learning_rate": 4.351126771182617e-06, "loss": 0.1011, "step": 5140 }, { "epoch": 1.6659105638366818, "grad_norm": 0.4541635811328888, "learning_rate": 4.349392386749316e-06, "loss": 0.1111, "step": 5141 }, { "epoch": 1.6662346079066754, "grad_norm": 0.45814111828804016, "learning_rate": 4.347658081944092e-06, "loss": 0.1076, "step": 5142 }, { "epoch": 1.6665586519766689, "grad_norm": 0.4763301610946655, "learning_rate": 4.3459238569792e-06, "loss": 0.1128, "step": 5143 }, { "epoch": 1.6668826960466623, "grad_norm": 0.4556414783000946, "learning_rate": 4.3441897120668985e-06, "loss": 0.1139, "step": 5144 }, { "epoch": 1.6672067401166557, "grad_norm": 0.40329378843307495, "learning_rate": 4.342455647419426e-06, "loss": 0.0921, "step": 5145 }, { "epoch": 1.6675307841866494, "grad_norm": 0.5186251997947693, "learning_rate": 4.3407216632490185e-06, "loss": 0.1307, "step": 5146 }, { "epoch": 1.667854828256643, "grad_norm": 0.47811368107795715, "learning_rate": 4.338987759767896e-06, "loss": 0.1164, "step": 5147 }, { "epoch": 1.6681788723266364, "grad_norm": 0.4457603693008423, "learning_rate": 4.337253937188272e-06, "loss": 0.1048, "step": 5148 }, { "epoch": 1.6685029163966298, "grad_norm": 0.46842989325523376, "learning_rate": 4.335520195722352e-06, "loss": 0.1043, "step": 5149 }, { "epoch": 1.6688269604666235, "grad_norm": 0.4409325122833252, "learning_rate": 4.333786535582325e-06, "loss": 0.1064, "step": 5150 }, { "epoch": 1.6691510045366171, "grad_norm": 0.4449619650840759, "learning_rate": 4.332052956980378e-06, "loss": 0.0991, "step": 5151 }, { "epoch": 1.6694750486066106, "grad_norm": 0.447503924369812, "learning_rate": 4.3303194601286835e-06, "loss": 0.1009, "step": 5152 }, { "epoch": 1.669799092676604, "grad_norm": 0.4481426775455475, "learning_rate": 4.3285860452394025e-06, "loss": 0.0965, "step": 5153 }, { "epoch": 1.6701231367465974, "grad_norm": 0.4478634297847748, "learning_rate": 4.326852712524691e-06, "loss": 0.1114, "step": 5154 }, { "epoch": 1.670447180816591, "grad_norm": 0.43121814727783203, "learning_rate": 4.32511946219669e-06, "loss": 0.0996, "step": 5155 }, { "epoch": 1.6707712248865847, "grad_norm": 0.3942187428474426, "learning_rate": 4.323386294467534e-06, "loss": 0.0916, "step": 5156 }, { "epoch": 1.6710952689565781, "grad_norm": 0.49547019600868225, "learning_rate": 4.3216532095493445e-06, "loss": 0.1248, "step": 5157 }, { "epoch": 1.6714193130265715, "grad_norm": 0.48921433091163635, "learning_rate": 4.319920207654237e-06, "loss": 0.1204, "step": 5158 }, { "epoch": 1.6717433570965652, "grad_norm": 0.4274780750274658, "learning_rate": 4.3181872889943126e-06, "loss": 0.0996, "step": 5159 }, { "epoch": 1.6720674011665586, "grad_norm": 0.4929424822330475, "learning_rate": 4.316454453781661e-06, "loss": 0.1219, "step": 5160 }, { "epoch": 1.6723914452365523, "grad_norm": 0.47485384345054626, "learning_rate": 4.314721702228369e-06, "loss": 0.1192, "step": 5161 }, { "epoch": 1.6727154893065457, "grad_norm": 0.4370744228363037, "learning_rate": 4.312989034546505e-06, "loss": 0.1121, "step": 5162 }, { "epoch": 1.673039533376539, "grad_norm": 0.4904261827468872, "learning_rate": 4.311256450948134e-06, "loss": 0.1207, "step": 5163 }, { "epoch": 1.6733635774465327, "grad_norm": 0.4401930868625641, "learning_rate": 4.309523951645306e-06, "loss": 0.1111, "step": 5164 }, { "epoch": 1.6736876215165264, "grad_norm": 0.44211092591285706, "learning_rate": 4.3077915368500605e-06, "loss": 0.1053, "step": 5165 }, { "epoch": 1.6740116655865198, "grad_norm": 0.4240233898162842, "learning_rate": 4.306059206774431e-06, "loss": 0.1056, "step": 5166 }, { "epoch": 1.6743357096565132, "grad_norm": 0.4609757661819458, "learning_rate": 4.304326961630436e-06, "loss": 0.112, "step": 5167 }, { "epoch": 1.6746597537265067, "grad_norm": 0.46120592951774597, "learning_rate": 4.302594801630088e-06, "loss": 0.1118, "step": 5168 }, { "epoch": 1.6749837977965003, "grad_norm": 0.45138785243034363, "learning_rate": 4.300862726985382e-06, "loss": 0.1101, "step": 5169 }, { "epoch": 1.675307841866494, "grad_norm": 0.45417898893356323, "learning_rate": 4.2991307379083125e-06, "loss": 0.1132, "step": 5170 }, { "epoch": 1.6756318859364874, "grad_norm": 0.4745250344276428, "learning_rate": 4.297398834610855e-06, "loss": 0.1173, "step": 5171 }, { "epoch": 1.6759559300064808, "grad_norm": 0.4576803743839264, "learning_rate": 4.295667017304977e-06, "loss": 0.108, "step": 5172 }, { "epoch": 1.6762799740764744, "grad_norm": 0.43262675404548645, "learning_rate": 4.29393528620264e-06, "loss": 0.1031, "step": 5173 }, { "epoch": 1.6766040181464679, "grad_norm": 0.47831442952156067, "learning_rate": 4.2922036415157865e-06, "loss": 0.1143, "step": 5174 }, { "epoch": 1.6769280622164615, "grad_norm": 0.4054790735244751, "learning_rate": 4.290472083456357e-06, "loss": 0.0972, "step": 5175 }, { "epoch": 1.677252106286455, "grad_norm": 0.4552663266658783, "learning_rate": 4.288740612236276e-06, "loss": 0.1054, "step": 5176 }, { "epoch": 1.6775761503564484, "grad_norm": 0.43283385038375854, "learning_rate": 4.287009228067456e-06, "loss": 0.101, "step": 5177 }, { "epoch": 1.677900194426442, "grad_norm": 0.4280630350112915, "learning_rate": 4.285277931161806e-06, "loss": 0.0966, "step": 5178 }, { "epoch": 1.6782242384964356, "grad_norm": 0.46848851442337036, "learning_rate": 4.283546721731218e-06, "loss": 0.1077, "step": 5179 }, { "epoch": 1.678548282566429, "grad_norm": 0.4811386168003082, "learning_rate": 4.281815599987577e-06, "loss": 0.1122, "step": 5180 }, { "epoch": 1.6788723266364225, "grad_norm": 0.4960719645023346, "learning_rate": 4.2800845661427505e-06, "loss": 0.1196, "step": 5181 }, { "epoch": 1.679196370706416, "grad_norm": 0.48916101455688477, "learning_rate": 4.2783536204086065e-06, "loss": 0.1116, "step": 5182 }, { "epoch": 1.6795204147764096, "grad_norm": 0.5164931416511536, "learning_rate": 4.276622762996993e-06, "loss": 0.1246, "step": 5183 }, { "epoch": 1.6798444588464032, "grad_norm": 0.4530620276927948, "learning_rate": 4.274891994119748e-06, "loss": 0.1049, "step": 5184 }, { "epoch": 1.6801685029163966, "grad_norm": 0.4720141589641571, "learning_rate": 4.273161313988707e-06, "loss": 0.1067, "step": 5185 }, { "epoch": 1.68049254698639, "grad_norm": 0.4903331995010376, "learning_rate": 4.2714307228156814e-06, "loss": 0.1164, "step": 5186 }, { "epoch": 1.6808165910563837, "grad_norm": 0.49225887656211853, "learning_rate": 4.2697002208124845e-06, "loss": 0.114, "step": 5187 }, { "epoch": 1.6811406351263773, "grad_norm": 0.47676360607147217, "learning_rate": 4.267969808190911e-06, "loss": 0.1179, "step": 5188 }, { "epoch": 1.6814646791963708, "grad_norm": 0.4683300852775574, "learning_rate": 4.266239485162746e-06, "loss": 0.1087, "step": 5189 }, { "epoch": 1.6817887232663642, "grad_norm": 0.461770236492157, "learning_rate": 4.264509251939765e-06, "loss": 0.1103, "step": 5190 }, { "epoch": 1.6821127673363576, "grad_norm": 0.47698649764060974, "learning_rate": 4.262779108733729e-06, "loss": 0.1087, "step": 5191 }, { "epoch": 1.6824368114063513, "grad_norm": 0.4547366499900818, "learning_rate": 4.2610490557563955e-06, "loss": 0.1079, "step": 5192 }, { "epoch": 1.682760855476345, "grad_norm": 0.4503345191478729, "learning_rate": 4.259319093219502e-06, "loss": 0.1066, "step": 5193 }, { "epoch": 1.6830848995463383, "grad_norm": 0.487965852022171, "learning_rate": 4.257589221334783e-06, "loss": 0.1096, "step": 5194 }, { "epoch": 1.6834089436163318, "grad_norm": 0.4674997627735138, "learning_rate": 4.2558594403139574e-06, "loss": 0.1077, "step": 5195 }, { "epoch": 1.6837329876863252, "grad_norm": 0.48449698090553284, "learning_rate": 4.25412975036873e-06, "loss": 0.1161, "step": 5196 }, { "epoch": 1.6840570317563188, "grad_norm": 0.43596965074539185, "learning_rate": 4.252400151710802e-06, "loss": 0.1014, "step": 5197 }, { "epoch": 1.6843810758263125, "grad_norm": 0.4734188914299011, "learning_rate": 4.250670644551856e-06, "loss": 0.1156, "step": 5198 }, { "epoch": 1.684705119896306, "grad_norm": 0.41340628266334534, "learning_rate": 4.2489412291035706e-06, "loss": 0.0975, "step": 5199 }, { "epoch": 1.6850291639662993, "grad_norm": 0.43714478611946106, "learning_rate": 4.247211905577609e-06, "loss": 0.0997, "step": 5200 }, { "epoch": 1.685353208036293, "grad_norm": 0.4615105986595154, "learning_rate": 4.245482674185621e-06, "loss": 0.1094, "step": 5201 }, { "epoch": 1.6856772521062866, "grad_norm": 0.46344971656799316, "learning_rate": 4.243753535139251e-06, "loss": 0.1085, "step": 5202 }, { "epoch": 1.68600129617628, "grad_norm": 0.4436193108558655, "learning_rate": 4.242024488650125e-06, "loss": 0.1017, "step": 5203 }, { "epoch": 1.6863253402462735, "grad_norm": 0.4480687975883484, "learning_rate": 4.240295534929865e-06, "loss": 0.1088, "step": 5204 }, { "epoch": 1.6866493843162669, "grad_norm": 0.46114522218704224, "learning_rate": 4.238566674190076e-06, "loss": 0.1077, "step": 5205 }, { "epoch": 1.6869734283862605, "grad_norm": 0.44118571281433105, "learning_rate": 4.236837906642357e-06, "loss": 0.1053, "step": 5206 }, { "epoch": 1.6872974724562542, "grad_norm": 0.4613795280456543, "learning_rate": 4.23510923249829e-06, "loss": 0.1129, "step": 5207 }, { "epoch": 1.6876215165262476, "grad_norm": 0.47433406114578247, "learning_rate": 4.2333806519694455e-06, "loss": 0.1176, "step": 5208 }, { "epoch": 1.687945560596241, "grad_norm": 0.4397332966327667, "learning_rate": 4.23165216526739e-06, "loss": 0.0993, "step": 5209 }, { "epoch": 1.6882696046662347, "grad_norm": 0.4279300570487976, "learning_rate": 4.22992377260367e-06, "loss": 0.1059, "step": 5210 }, { "epoch": 1.688593648736228, "grad_norm": 0.46599432826042175, "learning_rate": 4.228195474189828e-06, "loss": 0.1102, "step": 5211 }, { "epoch": 1.6889176928062217, "grad_norm": 0.48240232467651367, "learning_rate": 4.2264672702373845e-06, "loss": 0.1187, "step": 5212 }, { "epoch": 1.6892417368762151, "grad_norm": 0.49114811420440674, "learning_rate": 4.2247391609578614e-06, "loss": 0.1126, "step": 5213 }, { "epoch": 1.6895657809462086, "grad_norm": 0.43978747725486755, "learning_rate": 4.22301114656276e-06, "loss": 0.1026, "step": 5214 }, { "epoch": 1.6898898250162022, "grad_norm": 0.5041015148162842, "learning_rate": 4.22128322726357e-06, "loss": 0.1214, "step": 5215 }, { "epoch": 1.6902138690861959, "grad_norm": 0.44519177079200745, "learning_rate": 4.219555403271778e-06, "loss": 0.1081, "step": 5216 }, { "epoch": 1.6905379131561893, "grad_norm": 0.5372322797775269, "learning_rate": 4.217827674798845e-06, "loss": 0.1136, "step": 5217 }, { "epoch": 1.6908619572261827, "grad_norm": 0.4537216126918793, "learning_rate": 4.216100042056236e-06, "loss": 0.1021, "step": 5218 }, { "epoch": 1.6911860012961761, "grad_norm": 0.4697504937648773, "learning_rate": 4.214372505255393e-06, "loss": 0.1059, "step": 5219 }, { "epoch": 1.6915100453661698, "grad_norm": 0.43239736557006836, "learning_rate": 4.212645064607749e-06, "loss": 0.095, "step": 5220 }, { "epoch": 1.6918340894361634, "grad_norm": 0.4243602454662323, "learning_rate": 4.210917720324727e-06, "loss": 0.1018, "step": 5221 }, { "epoch": 1.6921581335061568, "grad_norm": 0.4734940528869629, "learning_rate": 4.209190472617736e-06, "loss": 0.112, "step": 5222 }, { "epoch": 1.6924821775761503, "grad_norm": 0.45138147473335266, "learning_rate": 4.207463321698177e-06, "loss": 0.1067, "step": 5223 }, { "epoch": 1.692806221646144, "grad_norm": 0.4541546106338501, "learning_rate": 4.205736267777433e-06, "loss": 0.114, "step": 5224 }, { "epoch": 1.6931302657161373, "grad_norm": 0.4667559564113617, "learning_rate": 4.204009311066884e-06, "loss": 0.1143, "step": 5225 }, { "epoch": 1.693454309786131, "grad_norm": 0.41926097869873047, "learning_rate": 4.202282451777888e-06, "loss": 0.0961, "step": 5226 }, { "epoch": 1.6937783538561244, "grad_norm": 0.4348689615726471, "learning_rate": 4.200555690121796e-06, "loss": 0.1045, "step": 5227 }, { "epoch": 1.6941023979261178, "grad_norm": 0.4243723154067993, "learning_rate": 4.198829026309951e-06, "loss": 0.1014, "step": 5228 }, { "epoch": 1.6944264419961115, "grad_norm": 0.4645223021507263, "learning_rate": 4.197102460553673e-06, "loss": 0.1095, "step": 5229 }, { "epoch": 1.6947504860661051, "grad_norm": 0.4362965226173401, "learning_rate": 4.195375993064286e-06, "loss": 0.0962, "step": 5230 }, { "epoch": 1.6950745301360985, "grad_norm": 0.43382829427719116, "learning_rate": 4.1936496240530865e-06, "loss": 0.0943, "step": 5231 }, { "epoch": 1.695398574206092, "grad_norm": 0.47366800904273987, "learning_rate": 4.191923353731366e-06, "loss": 0.1124, "step": 5232 }, { "epoch": 1.6957226182760854, "grad_norm": 0.4507708251476288, "learning_rate": 4.190197182310406e-06, "loss": 0.1107, "step": 5233 }, { "epoch": 1.696046662346079, "grad_norm": 0.43914100527763367, "learning_rate": 4.188471110001468e-06, "loss": 0.1068, "step": 5234 }, { "epoch": 1.6963707064160727, "grad_norm": 0.48613178730010986, "learning_rate": 4.186745137015814e-06, "loss": 0.1119, "step": 5235 }, { "epoch": 1.696694750486066, "grad_norm": 0.44728830456733704, "learning_rate": 4.185019263564679e-06, "loss": 0.1021, "step": 5236 }, { "epoch": 1.6970187945560595, "grad_norm": 0.4880698621273041, "learning_rate": 4.183293489859298e-06, "loss": 0.1093, "step": 5237 }, { "epoch": 1.6973428386260532, "grad_norm": 0.4449664354324341, "learning_rate": 4.1815678161108885e-06, "loss": 0.0999, "step": 5238 }, { "epoch": 1.6976668826960468, "grad_norm": 0.47024253010749817, "learning_rate": 4.1798422425306525e-06, "loss": 0.1105, "step": 5239 }, { "epoch": 1.6979909267660402, "grad_norm": 0.4396377503871918, "learning_rate": 4.178116769329789e-06, "loss": 0.1078, "step": 5240 }, { "epoch": 1.6983149708360337, "grad_norm": 0.43690598011016846, "learning_rate": 4.176391396719475e-06, "loss": 0.1035, "step": 5241 }, { "epoch": 1.698639014906027, "grad_norm": 0.46832552552223206, "learning_rate": 4.174666124910882e-06, "loss": 0.108, "step": 5242 }, { "epoch": 1.6989630589760207, "grad_norm": 0.4596247673034668, "learning_rate": 4.172940954115165e-06, "loss": 0.1153, "step": 5243 }, { "epoch": 1.6992871030460144, "grad_norm": 0.4955204725265503, "learning_rate": 4.171215884543468e-06, "loss": 0.1173, "step": 5244 }, { "epoch": 1.6996111471160078, "grad_norm": 0.47185131907463074, "learning_rate": 4.169490916406925e-06, "loss": 0.1095, "step": 5245 }, { "epoch": 1.6999351911860012, "grad_norm": 0.47953593730926514, "learning_rate": 4.167766049916651e-06, "loss": 0.1138, "step": 5246 }, { "epoch": 1.7002592352559946, "grad_norm": 0.44474267959594727, "learning_rate": 4.166041285283759e-06, "loss": 0.0999, "step": 5247 }, { "epoch": 1.7005832793259883, "grad_norm": 0.46185240149497986, "learning_rate": 4.164316622719337e-06, "loss": 0.1108, "step": 5248 }, { "epoch": 1.700907323395982, "grad_norm": 0.44396278262138367, "learning_rate": 4.162592062434474e-06, "loss": 0.1078, "step": 5249 }, { "epoch": 1.7012313674659754, "grad_norm": 0.4664950370788574, "learning_rate": 4.160867604640234e-06, "loss": 0.1098, "step": 5250 }, { "epoch": 1.7015554115359688, "grad_norm": 0.44771596789360046, "learning_rate": 4.159143249547675e-06, "loss": 0.1067, "step": 5251 }, { "epoch": 1.7018794556059624, "grad_norm": 0.45071953535079956, "learning_rate": 4.157418997367844e-06, "loss": 0.1075, "step": 5252 }, { "epoch": 1.702203499675956, "grad_norm": 0.44093945622444153, "learning_rate": 4.155694848311769e-06, "loss": 0.0984, "step": 5253 }, { "epoch": 1.7025275437459495, "grad_norm": 0.4549207091331482, "learning_rate": 4.1539708025904736e-06, "loss": 0.1078, "step": 5254 }, { "epoch": 1.702851587815943, "grad_norm": 0.4288365840911865, "learning_rate": 4.1522468604149606e-06, "loss": 0.1011, "step": 5255 }, { "epoch": 1.7031756318859363, "grad_norm": 0.4736907184123993, "learning_rate": 4.150523021996223e-06, "loss": 0.1148, "step": 5256 }, { "epoch": 1.70349967595593, "grad_norm": 0.4149734675884247, "learning_rate": 4.148799287545247e-06, "loss": 0.0958, "step": 5257 }, { "epoch": 1.7038237200259236, "grad_norm": 0.4175003170967102, "learning_rate": 4.147075657272994e-06, "loss": 0.0931, "step": 5258 }, { "epoch": 1.704147764095917, "grad_norm": 0.4388391077518463, "learning_rate": 4.145352131390427e-06, "loss": 0.094, "step": 5259 }, { "epoch": 1.7044718081659105, "grad_norm": 0.4352574646472931, "learning_rate": 4.1436287101084835e-06, "loss": 0.0998, "step": 5260 }, { "epoch": 1.7047958522359041, "grad_norm": 0.4395085275173187, "learning_rate": 4.141905393638097e-06, "loss": 0.1044, "step": 5261 }, { "epoch": 1.7051198963058976, "grad_norm": 0.5015331506729126, "learning_rate": 4.140182182190184e-06, "loss": 0.1087, "step": 5262 }, { "epoch": 1.7054439403758912, "grad_norm": 0.4683492183685303, "learning_rate": 4.1384590759756474e-06, "loss": 0.112, "step": 5263 }, { "epoch": 1.7057679844458846, "grad_norm": 0.4542158842086792, "learning_rate": 4.1367360752053795e-06, "loss": 0.1148, "step": 5264 }, { "epoch": 1.706092028515878, "grad_norm": 0.48041650652885437, "learning_rate": 4.1350131800902575e-06, "loss": 0.1222, "step": 5265 }, { "epoch": 1.7064160725858717, "grad_norm": 0.4712051451206207, "learning_rate": 4.13329039084115e-06, "loss": 0.1076, "step": 5266 }, { "epoch": 1.7067401166558653, "grad_norm": 0.4659982919692993, "learning_rate": 4.131567707668909e-06, "loss": 0.1101, "step": 5267 }, { "epoch": 1.7070641607258588, "grad_norm": 0.48965689539909363, "learning_rate": 4.129845130784371e-06, "loss": 0.118, "step": 5268 }, { "epoch": 1.7073882047958522, "grad_norm": 0.4107588827610016, "learning_rate": 4.128122660398368e-06, "loss": 0.1018, "step": 5269 }, { "epoch": 1.7077122488658456, "grad_norm": 0.4468505382537842, "learning_rate": 4.126400296721709e-06, "loss": 0.1065, "step": 5270 }, { "epoch": 1.7080362929358393, "grad_norm": 0.4403822124004364, "learning_rate": 4.124678039965198e-06, "loss": 0.1032, "step": 5271 }, { "epoch": 1.708360337005833, "grad_norm": 0.46318909525871277, "learning_rate": 4.12295589033962e-06, "loss": 0.1145, "step": 5272 }, { "epoch": 1.7086843810758263, "grad_norm": 0.4636130630970001, "learning_rate": 4.1212338480557504e-06, "loss": 0.1105, "step": 5273 }, { "epoch": 1.7090084251458197, "grad_norm": 0.44539788365364075, "learning_rate": 4.119511913324352e-06, "loss": 0.1092, "step": 5274 }, { "epoch": 1.7093324692158134, "grad_norm": 0.45345407724380493, "learning_rate": 4.11779008635617e-06, "loss": 0.1108, "step": 5275 }, { "epoch": 1.709656513285807, "grad_norm": 0.43254977464675903, "learning_rate": 4.1160683673619435e-06, "loss": 0.1018, "step": 5276 }, { "epoch": 1.7099805573558005, "grad_norm": 0.43789052963256836, "learning_rate": 4.114346756552389e-06, "loss": 0.1095, "step": 5277 }, { "epoch": 1.7103046014257939, "grad_norm": 0.4524228572845459, "learning_rate": 4.112625254138219e-06, "loss": 0.1042, "step": 5278 }, { "epoch": 1.7106286454957873, "grad_norm": 0.5019272565841675, "learning_rate": 4.110903860330126e-06, "loss": 0.1185, "step": 5279 }, { "epoch": 1.710952689565781, "grad_norm": 0.4378442168235779, "learning_rate": 4.109182575338796e-06, "loss": 0.0988, "step": 5280 }, { "epoch": 1.7112767336357746, "grad_norm": 0.49997493624687195, "learning_rate": 4.107461399374894e-06, "loss": 0.1151, "step": 5281 }, { "epoch": 1.711600777705768, "grad_norm": 0.47978028655052185, "learning_rate": 4.105740332649074e-06, "loss": 0.1064, "step": 5282 }, { "epoch": 1.7119248217757614, "grad_norm": 0.4691540598869324, "learning_rate": 4.1040193753719835e-06, "loss": 0.1062, "step": 5283 }, { "epoch": 1.7122488658457549, "grad_norm": 0.46821358799934387, "learning_rate": 4.102298527754246e-06, "loss": 0.111, "step": 5284 }, { "epoch": 1.7125729099157485, "grad_norm": 0.4568684697151184, "learning_rate": 4.100577790006479e-06, "loss": 0.1151, "step": 5285 }, { "epoch": 1.7128969539857422, "grad_norm": 0.5008982419967651, "learning_rate": 4.098857162339283e-06, "loss": 0.1277, "step": 5286 }, { "epoch": 1.7132209980557356, "grad_norm": 0.4188055694103241, "learning_rate": 4.097136644963246e-06, "loss": 0.1024, "step": 5287 }, { "epoch": 1.713545042125729, "grad_norm": 0.46634042263031006, "learning_rate": 4.095416238088945e-06, "loss": 0.1025, "step": 5288 }, { "epoch": 1.7138690861957226, "grad_norm": 0.42526668310165405, "learning_rate": 4.093695941926936e-06, "loss": 0.0974, "step": 5289 }, { "epoch": 1.7141931302657163, "grad_norm": 0.4504096806049347, "learning_rate": 4.0919757566877735e-06, "loss": 0.1112, "step": 5290 }, { "epoch": 1.7145171743357097, "grad_norm": 0.4655960202217102, "learning_rate": 4.090255682581986e-06, "loss": 0.1071, "step": 5291 }, { "epoch": 1.7148412184057031, "grad_norm": 0.47816991806030273, "learning_rate": 4.088535719820097e-06, "loss": 0.1067, "step": 5292 }, { "epoch": 1.7151652624756966, "grad_norm": 0.4879434108734131, "learning_rate": 4.086815868612612e-06, "loss": 0.1111, "step": 5293 }, { "epoch": 1.7154893065456902, "grad_norm": 0.48248669505119324, "learning_rate": 4.085096129170025e-06, "loss": 0.1134, "step": 5294 }, { "epoch": 1.7158133506156839, "grad_norm": 0.4520329236984253, "learning_rate": 4.083376501702814e-06, "loss": 0.1091, "step": 5295 }, { "epoch": 1.7161373946856773, "grad_norm": 0.48300623893737793, "learning_rate": 4.081656986421445e-06, "loss": 0.1101, "step": 5296 }, { "epoch": 1.7164614387556707, "grad_norm": 0.4790801405906677, "learning_rate": 4.079937583536372e-06, "loss": 0.1192, "step": 5297 }, { "epoch": 1.7167854828256643, "grad_norm": 0.44851386547088623, "learning_rate": 4.078218293258032e-06, "loss": 0.1074, "step": 5298 }, { "epoch": 1.7171095268956578, "grad_norm": 0.45854687690734863, "learning_rate": 4.076499115796847e-06, "loss": 0.1024, "step": 5299 }, { "epoch": 1.7174335709656514, "grad_norm": 0.44694340229034424, "learning_rate": 4.074780051363233e-06, "loss": 0.1045, "step": 5300 }, { "epoch": 1.7177576150356448, "grad_norm": 0.4249899685382843, "learning_rate": 4.073061100167581e-06, "loss": 0.103, "step": 5301 }, { "epoch": 1.7180816591056383, "grad_norm": 0.4468053877353668, "learning_rate": 4.071342262420279e-06, "loss": 0.1078, "step": 5302 }, { "epoch": 1.718405703175632, "grad_norm": 0.45494428277015686, "learning_rate": 4.069623538331693e-06, "loss": 0.1093, "step": 5303 }, { "epoch": 1.7187297472456255, "grad_norm": 0.5028614401817322, "learning_rate": 4.06790492811218e-06, "loss": 0.1087, "step": 5304 }, { "epoch": 1.719053791315619, "grad_norm": 0.42624178528785706, "learning_rate": 4.06618643197208e-06, "loss": 0.0988, "step": 5305 }, { "epoch": 1.7193778353856124, "grad_norm": 0.4529658854007721, "learning_rate": 4.06446805012172e-06, "loss": 0.1068, "step": 5306 }, { "epoch": 1.7197018794556058, "grad_norm": 0.42587921023368835, "learning_rate": 4.062749782771416e-06, "loss": 0.0994, "step": 5307 }, { "epoch": 1.7200259235255995, "grad_norm": 0.45960429310798645, "learning_rate": 4.061031630131463e-06, "loss": 0.1078, "step": 5308 }, { "epoch": 1.720349967595593, "grad_norm": 0.45776546001434326, "learning_rate": 4.0593135924121506e-06, "loss": 0.1062, "step": 5309 }, { "epoch": 1.7206740116655865, "grad_norm": 0.4353899359703064, "learning_rate": 4.057595669823747e-06, "loss": 0.1047, "step": 5310 }, { "epoch": 1.72099805573558, "grad_norm": 0.4940280616283417, "learning_rate": 4.055877862576509e-06, "loss": 0.1187, "step": 5311 }, { "epoch": 1.7213220998055736, "grad_norm": 0.4680522680282593, "learning_rate": 4.054160170880682e-06, "loss": 0.1069, "step": 5312 }, { "epoch": 1.721646143875567, "grad_norm": 0.4386557638645172, "learning_rate": 4.052442594946493e-06, "loss": 0.0983, "step": 5313 }, { "epoch": 1.7219701879455607, "grad_norm": 0.4579738974571228, "learning_rate": 4.050725134984159e-06, "loss": 0.1005, "step": 5314 }, { "epoch": 1.722294232015554, "grad_norm": 0.46672308444976807, "learning_rate": 4.049007791203877e-06, "loss": 0.1084, "step": 5315 }, { "epoch": 1.7226182760855475, "grad_norm": 0.4470560550689697, "learning_rate": 4.047290563815837e-06, "loss": 0.1079, "step": 5316 }, { "epoch": 1.7229423201555412, "grad_norm": 0.4670076072216034, "learning_rate": 4.045573453030207e-06, "loss": 0.105, "step": 5317 }, { "epoch": 1.7232663642255348, "grad_norm": 0.4503187835216522, "learning_rate": 4.043856459057147e-06, "loss": 0.1136, "step": 5318 }, { "epoch": 1.7235904082955282, "grad_norm": 0.4329877197742462, "learning_rate": 4.042139582106802e-06, "loss": 0.1009, "step": 5319 }, { "epoch": 1.7239144523655217, "grad_norm": 0.43727365136146545, "learning_rate": 4.040422822389297e-06, "loss": 0.1031, "step": 5320 }, { "epoch": 1.724238496435515, "grad_norm": 0.496261864900589, "learning_rate": 4.03870618011475e-06, "loss": 0.122, "step": 5321 }, { "epoch": 1.7245625405055087, "grad_norm": 0.4283820688724518, "learning_rate": 4.036989655493262e-06, "loss": 0.0971, "step": 5322 }, { "epoch": 1.7248865845755024, "grad_norm": 0.48006966710090637, "learning_rate": 4.035273248734915e-06, "loss": 0.1208, "step": 5323 }, { "epoch": 1.7252106286454958, "grad_norm": 0.4690515100955963, "learning_rate": 4.033556960049785e-06, "loss": 0.1091, "step": 5324 }, { "epoch": 1.7255346727154892, "grad_norm": 0.4697633981704712, "learning_rate": 4.031840789647925e-06, "loss": 0.1099, "step": 5325 }, { "epoch": 1.7258587167854829, "grad_norm": 0.4803153872489929, "learning_rate": 4.030124737739382e-06, "loss": 0.1157, "step": 5326 }, { "epoch": 1.7261827608554765, "grad_norm": 0.4614790380001068, "learning_rate": 4.028408804534181e-06, "loss": 0.1129, "step": 5327 }, { "epoch": 1.72650680492547, "grad_norm": 0.45441630482673645, "learning_rate": 4.026692990242338e-06, "loss": 0.1099, "step": 5328 }, { "epoch": 1.7268308489954634, "grad_norm": 0.4581306278705597, "learning_rate": 4.02497729507385e-06, "loss": 0.1103, "step": 5329 }, { "epoch": 1.7271548930654568, "grad_norm": 0.4611577093601227, "learning_rate": 4.0232617192387005e-06, "loss": 0.1098, "step": 5330 }, { "epoch": 1.7274789371354504, "grad_norm": 0.45392879843711853, "learning_rate": 4.021546262946863e-06, "loss": 0.1028, "step": 5331 }, { "epoch": 1.727802981205444, "grad_norm": 0.4982890784740448, "learning_rate": 4.019830926408289e-06, "loss": 0.1239, "step": 5332 }, { "epoch": 1.7281270252754375, "grad_norm": 0.4632214605808258, "learning_rate": 4.018115709832923e-06, "loss": 0.1077, "step": 5333 }, { "epoch": 1.728451069345431, "grad_norm": 0.46273502707481384, "learning_rate": 4.016400613430687e-06, "loss": 0.11, "step": 5334 }, { "epoch": 1.7287751134154243, "grad_norm": 0.4509614109992981, "learning_rate": 4.014685637411494e-06, "loss": 0.109, "step": 5335 }, { "epoch": 1.729099157485418, "grad_norm": 0.4570104777812958, "learning_rate": 4.012970781985242e-06, "loss": 0.1065, "step": 5336 }, { "epoch": 1.7294232015554116, "grad_norm": 0.4149811863899231, "learning_rate": 4.011256047361809e-06, "loss": 0.0989, "step": 5337 }, { "epoch": 1.729747245625405, "grad_norm": 0.46206188201904297, "learning_rate": 4.009541433751066e-06, "loss": 0.1128, "step": 5338 }, { "epoch": 1.7300712896953985, "grad_norm": 0.4685775935649872, "learning_rate": 4.007826941362861e-06, "loss": 0.1088, "step": 5339 }, { "epoch": 1.7303953337653921, "grad_norm": 0.47735917568206787, "learning_rate": 4.006112570407035e-06, "loss": 0.1154, "step": 5340 }, { "epoch": 1.7307193778353858, "grad_norm": 0.44871029257774353, "learning_rate": 4.0043983210934086e-06, "loss": 0.1049, "step": 5341 }, { "epoch": 1.7310434219053792, "grad_norm": 0.4410346746444702, "learning_rate": 4.002684193631789e-06, "loss": 0.1031, "step": 5342 }, { "epoch": 1.7313674659753726, "grad_norm": 0.47302764654159546, "learning_rate": 4.0009701882319695e-06, "loss": 0.1095, "step": 5343 }, { "epoch": 1.731691510045366, "grad_norm": 0.4517134130001068, "learning_rate": 3.999256305103726e-06, "loss": 0.1041, "step": 5344 }, { "epoch": 1.7320155541153597, "grad_norm": 0.43479084968566895, "learning_rate": 3.9975425444568265e-06, "loss": 0.1009, "step": 5345 }, { "epoch": 1.7323395981853533, "grad_norm": 0.42933210730552673, "learning_rate": 3.995828906501013e-06, "loss": 0.0967, "step": 5346 }, { "epoch": 1.7326636422553467, "grad_norm": 0.4475741982460022, "learning_rate": 3.994115391446019e-06, "loss": 0.1052, "step": 5347 }, { "epoch": 1.7329876863253402, "grad_norm": 0.4910019338130951, "learning_rate": 3.992401999501566e-06, "loss": 0.1153, "step": 5348 }, { "epoch": 1.7333117303953338, "grad_norm": 0.42681607604026794, "learning_rate": 3.990688730877352e-06, "loss": 0.1019, "step": 5349 }, { "epoch": 1.7336357744653272, "grad_norm": 0.4786801040172577, "learning_rate": 3.988975585783068e-06, "loss": 0.1184, "step": 5350 }, { "epoch": 1.7339598185353209, "grad_norm": 0.45410358905792236, "learning_rate": 3.987262564428382e-06, "loss": 0.1123, "step": 5351 }, { "epoch": 1.7342838626053143, "grad_norm": 0.4435284733772278, "learning_rate": 3.985549667022956e-06, "loss": 0.1112, "step": 5352 }, { "epoch": 1.7346079066753077, "grad_norm": 0.4696560502052307, "learning_rate": 3.983836893776432e-06, "loss": 0.1286, "step": 5353 }, { "epoch": 1.7349319507453014, "grad_norm": 0.46720215678215027, "learning_rate": 3.982124244898431e-06, "loss": 0.1182, "step": 5354 }, { "epoch": 1.735255994815295, "grad_norm": 0.46293580532073975, "learning_rate": 3.980411720598571e-06, "loss": 0.1093, "step": 5355 }, { "epoch": 1.7355800388852884, "grad_norm": 0.44470348954200745, "learning_rate": 3.978699321086444e-06, "loss": 0.1007, "step": 5356 }, { "epoch": 1.7359040829552819, "grad_norm": 0.4292317032814026, "learning_rate": 3.976987046571636e-06, "loss": 0.0904, "step": 5357 }, { "epoch": 1.7362281270252753, "grad_norm": 0.45817989110946655, "learning_rate": 3.975274897263709e-06, "loss": 0.1143, "step": 5358 }, { "epoch": 1.736552171095269, "grad_norm": 0.4869634807109833, "learning_rate": 3.973562873372215e-06, "loss": 0.1208, "step": 5359 }, { "epoch": 1.7368762151652626, "grad_norm": 0.4258679747581482, "learning_rate": 3.971850975106687e-06, "loss": 0.0977, "step": 5360 }, { "epoch": 1.737200259235256, "grad_norm": 0.45118412375450134, "learning_rate": 3.970139202676647e-06, "loss": 0.1053, "step": 5361 }, { "epoch": 1.7375243033052494, "grad_norm": 0.46550124883651733, "learning_rate": 3.9684275562916e-06, "loss": 0.1155, "step": 5362 }, { "epoch": 1.737848347375243, "grad_norm": 0.43546655774116516, "learning_rate": 3.966716036161031e-06, "loss": 0.1002, "step": 5363 }, { "epoch": 1.7381723914452365, "grad_norm": 0.45683375000953674, "learning_rate": 3.965004642494419e-06, "loss": 0.1097, "step": 5364 }, { "epoch": 1.7384964355152301, "grad_norm": 0.49274900555610657, "learning_rate": 3.963293375501219e-06, "loss": 0.1109, "step": 5365 }, { "epoch": 1.7388204795852236, "grad_norm": 0.43467313051223755, "learning_rate": 3.961582235390871e-06, "loss": 0.1012, "step": 5366 }, { "epoch": 1.739144523655217, "grad_norm": 0.471316933631897, "learning_rate": 3.959871222372807e-06, "loss": 0.1102, "step": 5367 }, { "epoch": 1.7394685677252106, "grad_norm": 0.4421023428440094, "learning_rate": 3.958160336656436e-06, "loss": 0.1073, "step": 5368 }, { "epoch": 1.7397926117952043, "grad_norm": 0.43588271737098694, "learning_rate": 3.9564495784511535e-06, "loss": 0.1093, "step": 5369 }, { "epoch": 1.7401166558651977, "grad_norm": 0.4536246657371521, "learning_rate": 3.9547389479663404e-06, "loss": 0.1084, "step": 5370 }, { "epoch": 1.7404406999351911, "grad_norm": 0.46806734800338745, "learning_rate": 3.953028445411362e-06, "loss": 0.1078, "step": 5371 }, { "epoch": 1.7407647440051845, "grad_norm": 0.45266732573509216, "learning_rate": 3.951318070995566e-06, "loss": 0.1079, "step": 5372 }, { "epoch": 1.7410887880751782, "grad_norm": 0.46891167759895325, "learning_rate": 3.949607824928285e-06, "loss": 0.1067, "step": 5373 }, { "epoch": 1.7414128321451718, "grad_norm": 0.43091535568237305, "learning_rate": 3.947897707418839e-06, "loss": 0.1016, "step": 5374 }, { "epoch": 1.7417368762151653, "grad_norm": 0.47220826148986816, "learning_rate": 3.946187718676526e-06, "loss": 0.1145, "step": 5375 }, { "epoch": 1.7420609202851587, "grad_norm": 0.46615976095199585, "learning_rate": 3.9444778589106375e-06, "loss": 0.1033, "step": 5376 }, { "epoch": 1.7423849643551523, "grad_norm": 0.4272662103176117, "learning_rate": 3.942768128330441e-06, "loss": 0.1008, "step": 5377 }, { "epoch": 1.742709008425146, "grad_norm": 0.46686840057373047, "learning_rate": 3.941058527145188e-06, "loss": 0.1155, "step": 5378 }, { "epoch": 1.7430330524951394, "grad_norm": 0.4431476891040802, "learning_rate": 3.9393490555641226e-06, "loss": 0.1115, "step": 5379 }, { "epoch": 1.7433570965651328, "grad_norm": 0.48964086174964905, "learning_rate": 3.937639713796463e-06, "loss": 0.1101, "step": 5380 }, { "epoch": 1.7436811406351262, "grad_norm": 0.4731682538986206, "learning_rate": 3.93593050205142e-06, "loss": 0.1205, "step": 5381 }, { "epoch": 1.74400518470512, "grad_norm": 0.4271281063556671, "learning_rate": 3.93422142053818e-06, "loss": 0.0992, "step": 5382 }, { "epoch": 1.7443292287751135, "grad_norm": 0.43892544507980347, "learning_rate": 3.932512469465923e-06, "loss": 0.102, "step": 5383 }, { "epoch": 1.744653272845107, "grad_norm": 0.4575996696949005, "learning_rate": 3.930803649043805e-06, "loss": 0.1127, "step": 5384 }, { "epoch": 1.7449773169151004, "grad_norm": 0.4452469050884247, "learning_rate": 3.9290949594809676e-06, "loss": 0.1051, "step": 5385 }, { "epoch": 1.7453013609850938, "grad_norm": 0.44725051522254944, "learning_rate": 3.927386400986542e-06, "loss": 0.1077, "step": 5386 }, { "epoch": 1.7456254050550875, "grad_norm": 0.4135163724422455, "learning_rate": 3.925677973769636e-06, "loss": 0.0981, "step": 5387 }, { "epoch": 1.745949449125081, "grad_norm": 0.48988252878189087, "learning_rate": 3.923969678039347e-06, "loss": 0.1179, "step": 5388 }, { "epoch": 1.7462734931950745, "grad_norm": 0.4563140571117401, "learning_rate": 3.922261514004753e-06, "loss": 0.1075, "step": 5389 }, { "epoch": 1.746597537265068, "grad_norm": 0.43328210711479187, "learning_rate": 3.920553481874916e-06, "loss": 0.1017, "step": 5390 }, { "epoch": 1.7469215813350616, "grad_norm": 0.481424480676651, "learning_rate": 3.918845581858882e-06, "loss": 0.116, "step": 5391 }, { "epoch": 1.7472456254050552, "grad_norm": 0.4085712134838104, "learning_rate": 3.9171378141656825e-06, "loss": 0.0948, "step": 5392 }, { "epoch": 1.7475696694750487, "grad_norm": 0.4701805114746094, "learning_rate": 3.915430179004333e-06, "loss": 0.1113, "step": 5393 }, { "epoch": 1.747893713545042, "grad_norm": 0.5021188855171204, "learning_rate": 3.9137226765838285e-06, "loss": 0.1189, "step": 5394 }, { "epoch": 1.7482177576150355, "grad_norm": 0.44054004549980164, "learning_rate": 3.912015307113153e-06, "loss": 0.1072, "step": 5395 }, { "epoch": 1.7485418016850292, "grad_norm": 0.465827614068985, "learning_rate": 3.910308070801273e-06, "loss": 0.1104, "step": 5396 }, { "epoch": 1.7488658457550228, "grad_norm": 0.46348854899406433, "learning_rate": 3.908600967857134e-06, "loss": 0.1047, "step": 5397 }, { "epoch": 1.7491898898250162, "grad_norm": 0.4622736871242523, "learning_rate": 3.906893998489672e-06, "loss": 0.1125, "step": 5398 }, { "epoch": 1.7495139338950096, "grad_norm": 0.44009384512901306, "learning_rate": 3.9051871629078e-06, "loss": 0.106, "step": 5399 }, { "epoch": 1.7498379779650033, "grad_norm": 0.42945173382759094, "learning_rate": 3.903480461320424e-06, "loss": 0.0989, "step": 5400 }, { "epoch": 1.7501620220349967, "grad_norm": 0.4769463837146759, "learning_rate": 3.901773893936424e-06, "loss": 0.117, "step": 5401 }, { "epoch": 1.7504860661049904, "grad_norm": 0.4809808135032654, "learning_rate": 3.900067460964667e-06, "loss": 0.1139, "step": 5402 }, { "epoch": 1.7508101101749838, "grad_norm": 0.40791723132133484, "learning_rate": 3.898361162614005e-06, "loss": 0.0923, "step": 5403 }, { "epoch": 1.7511341542449772, "grad_norm": 0.4657929241657257, "learning_rate": 3.89665499909327e-06, "loss": 0.1111, "step": 5404 }, { "epoch": 1.7514581983149708, "grad_norm": 0.483063280582428, "learning_rate": 3.894948970611284e-06, "loss": 0.1108, "step": 5405 }, { "epoch": 1.7517822423849645, "grad_norm": 0.43636569380760193, "learning_rate": 3.893243077376845e-06, "loss": 0.1023, "step": 5406 }, { "epoch": 1.752106286454958, "grad_norm": 0.49354323744773865, "learning_rate": 3.891537319598741e-06, "loss": 0.1215, "step": 5407 }, { "epoch": 1.7524303305249513, "grad_norm": 0.44748640060424805, "learning_rate": 3.8898316974857375e-06, "loss": 0.1058, "step": 5408 }, { "epoch": 1.7527543745949448, "grad_norm": 0.5163024663925171, "learning_rate": 3.888126211246585e-06, "loss": 0.1163, "step": 5409 }, { "epoch": 1.7530784186649384, "grad_norm": 0.48390907049179077, "learning_rate": 3.8864208610900234e-06, "loss": 0.1088, "step": 5410 }, { "epoch": 1.753402462734932, "grad_norm": 0.47634682059288025, "learning_rate": 3.884715647224766e-06, "loss": 0.1169, "step": 5411 }, { "epoch": 1.7537265068049255, "grad_norm": 0.46805477142333984, "learning_rate": 3.883010569859517e-06, "loss": 0.1158, "step": 5412 }, { "epoch": 1.754050550874919, "grad_norm": 0.4899493455886841, "learning_rate": 3.881305629202963e-06, "loss": 0.1252, "step": 5413 }, { "epoch": 1.7543745949449125, "grad_norm": 0.470268189907074, "learning_rate": 3.879600825463768e-06, "loss": 0.1117, "step": 5414 }, { "epoch": 1.7546986390149062, "grad_norm": 0.4910193681716919, "learning_rate": 3.877896158850587e-06, "loss": 0.1216, "step": 5415 }, { "epoch": 1.7550226830848996, "grad_norm": 0.49009987711906433, "learning_rate": 3.876191629572051e-06, "loss": 0.1167, "step": 5416 }, { "epoch": 1.755346727154893, "grad_norm": 0.44104844331741333, "learning_rate": 3.874487237836782e-06, "loss": 0.1074, "step": 5417 }, { "epoch": 1.7556707712248865, "grad_norm": 0.48006612062454224, "learning_rate": 3.872782983853378e-06, "loss": 0.1114, "step": 5418 }, { "epoch": 1.75599481529488, "grad_norm": 0.4347625970840454, "learning_rate": 3.871078867830427e-06, "loss": 0.0971, "step": 5419 }, { "epoch": 1.7563188593648738, "grad_norm": 0.4484483301639557, "learning_rate": 3.869374889976493e-06, "loss": 0.1034, "step": 5420 }, { "epoch": 1.7566429034348672, "grad_norm": 0.44795218110084534, "learning_rate": 3.867671050500125e-06, "loss": 0.1111, "step": 5421 }, { "epoch": 1.7569669475048606, "grad_norm": 0.4586280882358551, "learning_rate": 3.86596734960986e-06, "loss": 0.1099, "step": 5422 }, { "epoch": 1.757290991574854, "grad_norm": 0.458940327167511, "learning_rate": 3.864263787514214e-06, "loss": 0.0964, "step": 5423 }, { "epoch": 1.7576150356448477, "grad_norm": 0.3986370265483856, "learning_rate": 3.862560364421685e-06, "loss": 0.0921, "step": 5424 }, { "epoch": 1.7579390797148413, "grad_norm": 0.46346795558929443, "learning_rate": 3.860857080540755e-06, "loss": 0.1125, "step": 5425 }, { "epoch": 1.7582631237848347, "grad_norm": 0.45013266801834106, "learning_rate": 3.859153936079892e-06, "loss": 0.1057, "step": 5426 }, { "epoch": 1.7585871678548282, "grad_norm": 0.48645591735839844, "learning_rate": 3.857450931247544e-06, "loss": 0.1183, "step": 5427 }, { "epoch": 1.7589112119248218, "grad_norm": 0.49759355187416077, "learning_rate": 3.855748066252138e-06, "loss": 0.1227, "step": 5428 }, { "epoch": 1.7592352559948155, "grad_norm": 0.47369781136512756, "learning_rate": 3.854045341302094e-06, "loss": 0.1138, "step": 5429 }, { "epoch": 1.7595593000648089, "grad_norm": 0.48496073484420776, "learning_rate": 3.852342756605805e-06, "loss": 0.1213, "step": 5430 }, { "epoch": 1.7598833441348023, "grad_norm": 0.43131017684936523, "learning_rate": 3.850640312371653e-06, "loss": 0.1004, "step": 5431 }, { "epoch": 1.7602073882047957, "grad_norm": 0.44668450951576233, "learning_rate": 3.848938008808002e-06, "loss": 0.1016, "step": 5432 }, { "epoch": 1.7605314322747894, "grad_norm": 0.3990452289581299, "learning_rate": 3.847235846123193e-06, "loss": 0.0964, "step": 5433 }, { "epoch": 1.760855476344783, "grad_norm": 0.41115233302116394, "learning_rate": 3.845533824525558e-06, "loss": 0.094, "step": 5434 }, { "epoch": 1.7611795204147764, "grad_norm": 0.4076218903064728, "learning_rate": 3.843831944223406e-06, "loss": 0.0929, "step": 5435 }, { "epoch": 1.7615035644847699, "grad_norm": 0.4631011486053467, "learning_rate": 3.842130205425033e-06, "loss": 0.1141, "step": 5436 }, { "epoch": 1.7618276085547635, "grad_norm": 0.4253556728363037, "learning_rate": 3.840428608338711e-06, "loss": 0.1029, "step": 5437 }, { "epoch": 1.762151652624757, "grad_norm": 0.45234552025794983, "learning_rate": 3.838727153172704e-06, "loss": 0.1103, "step": 5438 }, { "epoch": 1.7624756966947506, "grad_norm": 0.4516621232032776, "learning_rate": 3.837025840135252e-06, "loss": 0.1077, "step": 5439 }, { "epoch": 1.762799740764744, "grad_norm": 0.45661982893943787, "learning_rate": 3.835324669434577e-06, "loss": 0.1097, "step": 5440 }, { "epoch": 1.7631237848347374, "grad_norm": 0.4486618638038635, "learning_rate": 3.833623641278889e-06, "loss": 0.1057, "step": 5441 }, { "epoch": 1.763447828904731, "grad_norm": 0.4842875599861145, "learning_rate": 3.831922755876374e-06, "loss": 0.1144, "step": 5442 }, { "epoch": 1.7637718729747247, "grad_norm": 0.4536530673503876, "learning_rate": 3.8302220134352075e-06, "loss": 0.1105, "step": 5443 }, { "epoch": 1.7640959170447181, "grad_norm": 0.5151732563972473, "learning_rate": 3.828521414163542e-06, "loss": 0.1336, "step": 5444 }, { "epoch": 1.7644199611147116, "grad_norm": 0.4194684624671936, "learning_rate": 3.826820958269514e-06, "loss": 0.0976, "step": 5445 }, { "epoch": 1.764744005184705, "grad_norm": 0.4612726867198944, "learning_rate": 3.825120645961245e-06, "loss": 0.1127, "step": 5446 }, { "epoch": 1.7650680492546986, "grad_norm": 0.47306978702545166, "learning_rate": 3.823420477446833e-06, "loss": 0.114, "step": 5447 }, { "epoch": 1.7653920933246923, "grad_norm": 0.4526808261871338, "learning_rate": 3.821720452934366e-06, "loss": 0.1076, "step": 5448 }, { "epoch": 1.7657161373946857, "grad_norm": 0.48292243480682373, "learning_rate": 3.820020572631906e-06, "loss": 0.1149, "step": 5449 }, { "epoch": 1.7660401814646791, "grad_norm": 0.4273426830768585, "learning_rate": 3.818320836747509e-06, "loss": 0.0982, "step": 5450 }, { "epoch": 1.7663642255346728, "grad_norm": 0.4303819537162781, "learning_rate": 3.8166212454892e-06, "loss": 0.1053, "step": 5451 }, { "epoch": 1.7666882696046662, "grad_norm": 0.436567097902298, "learning_rate": 3.814921799064994e-06, "loss": 0.1023, "step": 5452 }, { "epoch": 1.7670123136746598, "grad_norm": 0.4349686801433563, "learning_rate": 3.813222497682889e-06, "loss": 0.0964, "step": 5453 }, { "epoch": 1.7673363577446533, "grad_norm": 0.4685845971107483, "learning_rate": 3.81152334155086e-06, "loss": 0.1088, "step": 5454 }, { "epoch": 1.7676604018146467, "grad_norm": 0.45242762565612793, "learning_rate": 3.80982433087687e-06, "loss": 0.1006, "step": 5455 }, { "epoch": 1.7679844458846403, "grad_norm": 0.4908858835697174, "learning_rate": 3.8081254658688592e-06, "loss": 0.1139, "step": 5456 }, { "epoch": 1.768308489954634, "grad_norm": 0.46974053978919983, "learning_rate": 3.8064267467347527e-06, "loss": 0.1075, "step": 5457 }, { "epoch": 1.7686325340246274, "grad_norm": 0.4594678282737732, "learning_rate": 3.8047281736824593e-06, "loss": 0.1128, "step": 5458 }, { "epoch": 1.7689565780946208, "grad_norm": 0.4492245018482208, "learning_rate": 3.8030297469198633e-06, "loss": 0.1017, "step": 5459 }, { "epoch": 1.7692806221646142, "grad_norm": 0.4470076262950897, "learning_rate": 3.801331466654842e-06, "loss": 0.1044, "step": 5460 }, { "epoch": 1.7696046662346079, "grad_norm": 0.4670352637767792, "learning_rate": 3.799633333095242e-06, "loss": 0.1059, "step": 5461 }, { "epoch": 1.7699287103046015, "grad_norm": 0.49116596579551697, "learning_rate": 3.7979353464489044e-06, "loss": 0.1194, "step": 5462 }, { "epoch": 1.770252754374595, "grad_norm": 0.5399103164672852, "learning_rate": 3.7962375069236433e-06, "loss": 0.1262, "step": 5463 }, { "epoch": 1.7705767984445884, "grad_norm": 0.5004605054855347, "learning_rate": 3.7945398147272566e-06, "loss": 0.1139, "step": 5464 }, { "epoch": 1.770900842514582, "grad_norm": 0.4163171350955963, "learning_rate": 3.7928422700675273e-06, "loss": 0.0984, "step": 5465 }, { "epoch": 1.7712248865845757, "grad_norm": 0.4578644037246704, "learning_rate": 3.791144873152218e-06, "loss": 0.107, "step": 5466 }, { "epoch": 1.771548930654569, "grad_norm": 0.45029592514038086, "learning_rate": 3.7894476241890743e-06, "loss": 0.1016, "step": 5467 }, { "epoch": 1.7718729747245625, "grad_norm": 0.483422189950943, "learning_rate": 3.7877505233858224e-06, "loss": 0.1181, "step": 5468 }, { "epoch": 1.772197018794556, "grad_norm": 0.485103577375412, "learning_rate": 3.786053570950169e-06, "loss": 0.115, "step": 5469 }, { "epoch": 1.7725210628645496, "grad_norm": 0.49042415618896484, "learning_rate": 3.7843567670898085e-06, "loss": 0.1233, "step": 5470 }, { "epoch": 1.7728451069345432, "grad_norm": 0.42680788040161133, "learning_rate": 3.7826601120124094e-06, "loss": 0.1013, "step": 5471 }, { "epoch": 1.7731691510045366, "grad_norm": 0.45376521348953247, "learning_rate": 3.78096360592563e-06, "loss": 0.1052, "step": 5472 }, { "epoch": 1.77349319507453, "grad_norm": 0.42488327622413635, "learning_rate": 3.779267249037102e-06, "loss": 0.0959, "step": 5473 }, { "epoch": 1.7738172391445235, "grad_norm": 0.46989819407463074, "learning_rate": 3.777571041554447e-06, "loss": 0.1114, "step": 5474 }, { "epoch": 1.7741412832145171, "grad_norm": 0.47452855110168457, "learning_rate": 3.7758749836852625e-06, "loss": 0.1262, "step": 5475 }, { "epoch": 1.7744653272845108, "grad_norm": 0.4259603023529053, "learning_rate": 3.7741790756371287e-06, "loss": 0.1007, "step": 5476 }, { "epoch": 1.7747893713545042, "grad_norm": 0.4658740162849426, "learning_rate": 3.77248331761761e-06, "loss": 0.1079, "step": 5477 }, { "epoch": 1.7751134154244976, "grad_norm": 0.39768102765083313, "learning_rate": 3.7707877098342484e-06, "loss": 0.0927, "step": 5478 }, { "epoch": 1.7754374594944913, "grad_norm": 0.47288915514945984, "learning_rate": 3.7690922524945727e-06, "loss": 0.1129, "step": 5479 }, { "epoch": 1.775761503564485, "grad_norm": 0.4316980838775635, "learning_rate": 3.76739694580609e-06, "loss": 0.1091, "step": 5480 }, { "epoch": 1.7760855476344783, "grad_norm": 0.4133599102497101, "learning_rate": 3.765701789976286e-06, "loss": 0.1014, "step": 5481 }, { "epoch": 1.7764095917044718, "grad_norm": 0.43327245116233826, "learning_rate": 3.764006785212636e-06, "loss": 0.1054, "step": 5482 }, { "epoch": 1.7767336357744652, "grad_norm": 0.4340040683746338, "learning_rate": 3.7623119317225883e-06, "loss": 0.1028, "step": 5483 }, { "epoch": 1.7770576798444588, "grad_norm": 0.4386005997657776, "learning_rate": 3.76061722971358e-06, "loss": 0.1046, "step": 5484 }, { "epoch": 1.7773817239144525, "grad_norm": 0.4821830987930298, "learning_rate": 3.758922679393024e-06, "loss": 0.1166, "step": 5485 }, { "epoch": 1.777705767984446, "grad_norm": 0.4418981671333313, "learning_rate": 3.7572282809683174e-06, "loss": 0.1039, "step": 5486 }, { "epoch": 1.7780298120544393, "grad_norm": 0.44899412989616394, "learning_rate": 3.7555340346468396e-06, "loss": 0.1077, "step": 5487 }, { "epoch": 1.778353856124433, "grad_norm": 0.47012320160865784, "learning_rate": 3.7538399406359473e-06, "loss": 0.1132, "step": 5488 }, { "epoch": 1.7786779001944264, "grad_norm": 0.41314542293548584, "learning_rate": 3.752145999142983e-06, "loss": 0.0979, "step": 5489 }, { "epoch": 1.77900194426442, "grad_norm": 0.4720423221588135, "learning_rate": 3.7504522103752674e-06, "loss": 0.1134, "step": 5490 }, { "epoch": 1.7793259883344135, "grad_norm": 0.43733474612236023, "learning_rate": 3.7487585745401058e-06, "loss": 0.1095, "step": 5491 }, { "epoch": 1.779650032404407, "grad_norm": 0.44804707169532776, "learning_rate": 3.747065091844782e-06, "loss": 0.1034, "step": 5492 }, { "epoch": 1.7799740764744005, "grad_norm": 0.44590461254119873, "learning_rate": 3.7453717624965595e-06, "loss": 0.1028, "step": 5493 }, { "epoch": 1.7802981205443942, "grad_norm": 0.44284960627555847, "learning_rate": 3.74367858670269e-06, "loss": 0.106, "step": 5494 }, { "epoch": 1.7806221646143876, "grad_norm": 0.43234145641326904, "learning_rate": 3.741985564670396e-06, "loss": 0.1013, "step": 5495 }, { "epoch": 1.780946208684381, "grad_norm": 0.46835172176361084, "learning_rate": 3.7402926966068942e-06, "loss": 0.1147, "step": 5496 }, { "epoch": 1.7812702527543745, "grad_norm": 0.4646758735179901, "learning_rate": 3.7385999827193692e-06, "loss": 0.1094, "step": 5497 }, { "epoch": 1.781594296824368, "grad_norm": 0.4845737814903259, "learning_rate": 3.7369074232149965e-06, "loss": 0.1239, "step": 5498 }, { "epoch": 1.7819183408943617, "grad_norm": 0.46913474798202515, "learning_rate": 3.7352150183009274e-06, "loss": 0.1126, "step": 5499 }, { "epoch": 1.7822423849643552, "grad_norm": 0.43723833560943604, "learning_rate": 3.733522768184294e-06, "loss": 0.0965, "step": 5500 }, { "epoch": 1.7825664290343486, "grad_norm": 0.4722057282924652, "learning_rate": 3.7318306730722153e-06, "loss": 0.1103, "step": 5501 }, { "epoch": 1.7828904731043422, "grad_norm": 0.455968976020813, "learning_rate": 3.7301387331717832e-06, "loss": 0.108, "step": 5502 }, { "epoch": 1.7832145171743357, "grad_norm": 0.5014598369598389, "learning_rate": 3.728446948690079e-06, "loss": 0.1178, "step": 5503 }, { "epoch": 1.7835385612443293, "grad_norm": 0.43590158224105835, "learning_rate": 3.7267553198341566e-06, "loss": 0.0998, "step": 5504 }, { "epoch": 1.7838626053143227, "grad_norm": 0.4979320466518402, "learning_rate": 3.72506384681106e-06, "loss": 0.1187, "step": 5505 }, { "epoch": 1.7841866493843161, "grad_norm": 0.4761984050273895, "learning_rate": 3.723372529827805e-06, "loss": 0.1162, "step": 5506 }, { "epoch": 1.7845106934543098, "grad_norm": 0.4369703531265259, "learning_rate": 3.7216813690913935e-06, "loss": 0.0991, "step": 5507 }, { "epoch": 1.7848347375243034, "grad_norm": 0.44826263189315796, "learning_rate": 3.7199903648088065e-06, "loss": 0.0997, "step": 5508 }, { "epoch": 1.7851587815942969, "grad_norm": 0.43109968304634094, "learning_rate": 3.7182995171870082e-06, "loss": 0.1003, "step": 5509 }, { "epoch": 1.7854828256642903, "grad_norm": 0.4809877276420593, "learning_rate": 3.716608826432942e-06, "loss": 0.1218, "step": 5510 }, { "epoch": 1.7858068697342837, "grad_norm": 0.46688663959503174, "learning_rate": 3.714918292753531e-06, "loss": 0.1139, "step": 5511 }, { "epoch": 1.7861309138042774, "grad_norm": 0.4388737082481384, "learning_rate": 3.7132279163556784e-06, "loss": 0.104, "step": 5512 }, { "epoch": 1.786454957874271, "grad_norm": 0.4492657780647278, "learning_rate": 3.711537697446274e-06, "loss": 0.111, "step": 5513 }, { "epoch": 1.7867790019442644, "grad_norm": 0.4479140639305115, "learning_rate": 3.70984763623218e-06, "loss": 0.1065, "step": 5514 }, { "epoch": 1.7871030460142578, "grad_norm": 0.4573601186275482, "learning_rate": 3.708157732920248e-06, "loss": 0.1086, "step": 5515 }, { "epoch": 1.7874270900842515, "grad_norm": 0.4481222331523895, "learning_rate": 3.7064679877173027e-06, "loss": 0.1009, "step": 5516 }, { "epoch": 1.7877511341542451, "grad_norm": 0.4937301576137543, "learning_rate": 3.704778400830153e-06, "loss": 0.1161, "step": 5517 }, { "epoch": 1.7880751782242386, "grad_norm": 0.4630590081214905, "learning_rate": 3.7030889724655894e-06, "loss": 0.1092, "step": 5518 }, { "epoch": 1.788399222294232, "grad_norm": 0.453305184841156, "learning_rate": 3.7013997028303792e-06, "loss": 0.1042, "step": 5519 }, { "epoch": 1.7887232663642254, "grad_norm": 0.4361039698123932, "learning_rate": 3.6997105921312755e-06, "loss": 0.1023, "step": 5520 }, { "epoch": 1.789047310434219, "grad_norm": 0.44130071997642517, "learning_rate": 3.6980216405750047e-06, "loss": 0.1049, "step": 5521 }, { "epoch": 1.7893713545042127, "grad_norm": 0.511313796043396, "learning_rate": 3.696332848368284e-06, "loss": 0.1254, "step": 5522 }, { "epoch": 1.7896953985742061, "grad_norm": 0.4271393120288849, "learning_rate": 3.6946442157178013e-06, "loss": 0.1025, "step": 5523 }, { "epoch": 1.7900194426441995, "grad_norm": 0.4805537164211273, "learning_rate": 3.692955742830228e-06, "loss": 0.1182, "step": 5524 }, { "epoch": 1.790343486714193, "grad_norm": 0.44407039880752563, "learning_rate": 3.691267429912221e-06, "loss": 0.1049, "step": 5525 }, { "epoch": 1.7906675307841866, "grad_norm": 0.44100818037986755, "learning_rate": 3.6895792771704085e-06, "loss": 0.1103, "step": 5526 }, { "epoch": 1.7909915748541803, "grad_norm": 0.4450782835483551, "learning_rate": 3.687891284811409e-06, "loss": 0.1084, "step": 5527 }, { "epoch": 1.7913156189241737, "grad_norm": 0.4650527834892273, "learning_rate": 3.6862034530418135e-06, "loss": 0.1056, "step": 5528 }, { "epoch": 1.791639662994167, "grad_norm": 0.447388231754303, "learning_rate": 3.684515782068197e-06, "loss": 0.1096, "step": 5529 }, { "epoch": 1.7919637070641607, "grad_norm": 0.44772782921791077, "learning_rate": 3.682828272097113e-06, "loss": 0.1051, "step": 5530 }, { "epoch": 1.7922877511341544, "grad_norm": 0.4599211812019348, "learning_rate": 3.681140923335098e-06, "loss": 0.1106, "step": 5531 }, { "epoch": 1.7926117952041478, "grad_norm": 0.4325055181980133, "learning_rate": 3.6794537359886667e-06, "loss": 0.0994, "step": 5532 }, { "epoch": 1.7929358392741412, "grad_norm": 0.46257203817367554, "learning_rate": 3.6777667102643123e-06, "loss": 0.1054, "step": 5533 }, { "epoch": 1.7932598833441347, "grad_norm": 0.4851507842540741, "learning_rate": 3.676079846368514e-06, "loss": 0.11, "step": 5534 }, { "epoch": 1.7935839274141283, "grad_norm": 0.4444712698459625, "learning_rate": 3.6743931445077273e-06, "loss": 0.1056, "step": 5535 }, { "epoch": 1.793907971484122, "grad_norm": 0.44603532552719116, "learning_rate": 3.672706604888384e-06, "loss": 0.1091, "step": 5536 }, { "epoch": 1.7942320155541154, "grad_norm": 0.41911235451698303, "learning_rate": 3.671020227716905e-06, "loss": 0.0951, "step": 5537 }, { "epoch": 1.7945560596241088, "grad_norm": 0.48308685421943665, "learning_rate": 3.6693340131996823e-06, "loss": 0.1204, "step": 5538 }, { "epoch": 1.7948801036941024, "grad_norm": 0.43123897910118103, "learning_rate": 3.6676479615430973e-06, "loss": 0.0991, "step": 5539 }, { "epoch": 1.7952041477640959, "grad_norm": 0.5153533816337585, "learning_rate": 3.6659620729535022e-06, "loss": 0.1234, "step": 5540 }, { "epoch": 1.7955281918340895, "grad_norm": 0.4650425910949707, "learning_rate": 3.6642763476372357e-06, "loss": 0.1082, "step": 5541 }, { "epoch": 1.795852235904083, "grad_norm": 0.4416308104991913, "learning_rate": 3.6625907858006137e-06, "loss": 0.1003, "step": 5542 }, { "epoch": 1.7961762799740764, "grad_norm": 0.41461002826690674, "learning_rate": 3.6609053876499306e-06, "loss": 0.0952, "step": 5543 }, { "epoch": 1.79650032404407, "grad_norm": 0.42781490087509155, "learning_rate": 3.6592201533914662e-06, "loss": 0.0933, "step": 5544 }, { "epoch": 1.7968243681140637, "grad_norm": 0.498722106218338, "learning_rate": 3.657535083231474e-06, "loss": 0.1121, "step": 5545 }, { "epoch": 1.797148412184057, "grad_norm": 0.44308096170425415, "learning_rate": 3.6558501773761923e-06, "loss": 0.1028, "step": 5546 }, { "epoch": 1.7974724562540505, "grad_norm": 0.4877070486545563, "learning_rate": 3.654165436031838e-06, "loss": 0.1228, "step": 5547 }, { "epoch": 1.797796500324044, "grad_norm": 0.46494802832603455, "learning_rate": 3.6524808594046025e-06, "loss": 0.1128, "step": 5548 }, { "epoch": 1.7981205443940376, "grad_norm": 0.4507131576538086, "learning_rate": 3.6507964477006675e-06, "loss": 0.1131, "step": 5549 }, { "epoch": 1.7984445884640312, "grad_norm": 0.41261738538742065, "learning_rate": 3.6491122011261842e-06, "loss": 0.0928, "step": 5550 }, { "epoch": 1.7987686325340246, "grad_norm": 0.45822086930274963, "learning_rate": 3.647428119887292e-06, "loss": 0.1066, "step": 5551 }, { "epoch": 1.799092676604018, "grad_norm": 0.4836665987968445, "learning_rate": 3.645744204190101e-06, "loss": 0.1135, "step": 5552 }, { "epoch": 1.7994167206740117, "grad_norm": 0.43096283078193665, "learning_rate": 3.6440604542407114e-06, "loss": 0.0993, "step": 5553 }, { "epoch": 1.7997407647440054, "grad_norm": 0.47783106565475464, "learning_rate": 3.6423768702451955e-06, "loss": 0.1168, "step": 5554 }, { "epoch": 1.8000648088139988, "grad_norm": 0.4445684552192688, "learning_rate": 3.6406934524096066e-06, "loss": 0.0983, "step": 5555 }, { "epoch": 1.8003888528839922, "grad_norm": 0.49606752395629883, "learning_rate": 3.639010200939982e-06, "loss": 0.1198, "step": 5556 }, { "epoch": 1.8007128969539856, "grad_norm": 0.420172780752182, "learning_rate": 3.637327116042331e-06, "loss": 0.0952, "step": 5557 }, { "epoch": 1.8010369410239793, "grad_norm": 0.4633721709251404, "learning_rate": 3.635644197922651e-06, "loss": 0.1064, "step": 5558 }, { "epoch": 1.801360985093973, "grad_norm": 0.4439217150211334, "learning_rate": 3.6339614467869135e-06, "loss": 0.1008, "step": 5559 }, { "epoch": 1.8016850291639663, "grad_norm": 0.46251246333122253, "learning_rate": 3.6322788628410687e-06, "loss": 0.1099, "step": 5560 }, { "epoch": 1.8020090732339598, "grad_norm": 0.46781501173973083, "learning_rate": 3.6305964462910524e-06, "loss": 0.1144, "step": 5561 }, { "epoch": 1.8023331173039532, "grad_norm": 0.4599486291408539, "learning_rate": 3.6289141973427733e-06, "loss": 0.1003, "step": 5562 }, { "epoch": 1.8026571613739468, "grad_norm": 0.47066158056259155, "learning_rate": 3.6272321162021247e-06, "loss": 0.1154, "step": 5563 }, { "epoch": 1.8029812054439405, "grad_norm": 0.48277753591537476, "learning_rate": 3.625550203074973e-06, "loss": 0.1131, "step": 5564 }, { "epoch": 1.803305249513934, "grad_norm": 0.5130548477172852, "learning_rate": 3.623868458167173e-06, "loss": 0.1204, "step": 5565 }, { "epoch": 1.8036292935839273, "grad_norm": 0.4539277255535126, "learning_rate": 3.6221868816845517e-06, "loss": 0.1028, "step": 5566 }, { "epoch": 1.803953337653921, "grad_norm": 0.4764772355556488, "learning_rate": 3.620505473832916e-06, "loss": 0.1112, "step": 5567 }, { "epoch": 1.8042773817239146, "grad_norm": 0.49271273612976074, "learning_rate": 3.6188242348180577e-06, "loss": 0.1194, "step": 5568 }, { "epoch": 1.804601425793908, "grad_norm": 0.4314182698726654, "learning_rate": 3.61714316484574e-06, "loss": 0.1058, "step": 5569 }, { "epoch": 1.8049254698639015, "grad_norm": 0.46068280935287476, "learning_rate": 3.6154622641217143e-06, "loss": 0.1167, "step": 5570 }, { "epoch": 1.8052495139338949, "grad_norm": 0.47679510712623596, "learning_rate": 3.613781532851702e-06, "loss": 0.1109, "step": 5571 }, { "epoch": 1.8055735580038885, "grad_norm": 0.47514277696609497, "learning_rate": 3.6121009712414124e-06, "loss": 0.1124, "step": 5572 }, { "epoch": 1.8058976020738822, "grad_norm": 0.48157599568367004, "learning_rate": 3.6104205794965286e-06, "loss": 0.1187, "step": 5573 }, { "epoch": 1.8062216461438756, "grad_norm": 0.4681515395641327, "learning_rate": 3.6087403578227104e-06, "loss": 0.108, "step": 5574 }, { "epoch": 1.806545690213869, "grad_norm": 0.44759202003479004, "learning_rate": 3.6070603064256065e-06, "loss": 0.107, "step": 5575 }, { "epoch": 1.8068697342838627, "grad_norm": 0.47079500555992126, "learning_rate": 3.6053804255108344e-06, "loss": 0.1064, "step": 5576 }, { "epoch": 1.807193778353856, "grad_norm": 0.47174084186553955, "learning_rate": 3.603700715283999e-06, "loss": 0.1108, "step": 5577 }, { "epoch": 1.8075178224238497, "grad_norm": 0.4588591456413269, "learning_rate": 3.6020211759506795e-06, "loss": 0.1108, "step": 5578 }, { "epoch": 1.8078418664938432, "grad_norm": 0.45068198442459106, "learning_rate": 3.600341807716432e-06, "loss": 0.1127, "step": 5579 }, { "epoch": 1.8081659105638366, "grad_norm": 0.42220938205718994, "learning_rate": 3.5986626107867996e-06, "loss": 0.0989, "step": 5580 }, { "epoch": 1.8084899546338302, "grad_norm": 0.43438515067100525, "learning_rate": 3.596983585367297e-06, "loss": 0.0997, "step": 5581 }, { "epoch": 1.8088139987038239, "grad_norm": 0.44364234805107117, "learning_rate": 3.595304731663421e-06, "loss": 0.1031, "step": 5582 }, { "epoch": 1.8091380427738173, "grad_norm": 0.48173975944519043, "learning_rate": 3.5936260498806476e-06, "loss": 0.1232, "step": 5583 }, { "epoch": 1.8094620868438107, "grad_norm": 0.47106197476387024, "learning_rate": 3.5919475402244315e-06, "loss": 0.1097, "step": 5584 }, { "epoch": 1.8097861309138041, "grad_norm": 0.46400800347328186, "learning_rate": 3.5902692029002055e-06, "loss": 0.1185, "step": 5585 }, { "epoch": 1.8101101749837978, "grad_norm": 0.4439902901649475, "learning_rate": 3.5885910381133797e-06, "loss": 0.1044, "step": 5586 }, { "epoch": 1.8104342190537914, "grad_norm": 0.4391462802886963, "learning_rate": 3.5869130460693504e-06, "loss": 0.0995, "step": 5587 }, { "epoch": 1.8107582631237849, "grad_norm": 0.46092846989631653, "learning_rate": 3.5852352269734815e-06, "loss": 0.1063, "step": 5588 }, { "epoch": 1.8110823071937783, "grad_norm": 0.44837290048599243, "learning_rate": 3.583557581031127e-06, "loss": 0.1019, "step": 5589 }, { "epoch": 1.811406351263772, "grad_norm": 0.48929664492607117, "learning_rate": 3.581880108447612e-06, "loss": 0.1208, "step": 5590 }, { "epoch": 1.8117303953337653, "grad_norm": 0.4306938648223877, "learning_rate": 3.5802028094282416e-06, "loss": 0.1057, "step": 5591 }, { "epoch": 1.812054439403759, "grad_norm": 0.4768986403942108, "learning_rate": 3.5785256841783052e-06, "loss": 0.1156, "step": 5592 }, { "epoch": 1.8123784834737524, "grad_norm": 0.44823887944221497, "learning_rate": 3.576848732903062e-06, "loss": 0.1018, "step": 5593 }, { "epoch": 1.8127025275437458, "grad_norm": 0.4879656136035919, "learning_rate": 3.575171955807759e-06, "loss": 0.1113, "step": 5594 }, { "epoch": 1.8130265716137395, "grad_norm": 0.45889967679977417, "learning_rate": 3.5734953530976122e-06, "loss": 0.1064, "step": 5595 }, { "epoch": 1.8133506156837331, "grad_norm": 0.4404192566871643, "learning_rate": 3.571818924977827e-06, "loss": 0.1063, "step": 5596 }, { "epoch": 1.8136746597537265, "grad_norm": 0.4716794788837433, "learning_rate": 3.5701426716535793e-06, "loss": 0.1105, "step": 5597 }, { "epoch": 1.81399870382372, "grad_norm": 0.4661045968532562, "learning_rate": 3.5684665933300244e-06, "loss": 0.1094, "step": 5598 }, { "epoch": 1.8143227478937134, "grad_norm": 0.4793951213359833, "learning_rate": 3.5667906902123027e-06, "loss": 0.1089, "step": 5599 }, { "epoch": 1.814646791963707, "grad_norm": 0.4394589960575104, "learning_rate": 3.5651149625055235e-06, "loss": 0.0979, "step": 5600 }, { "epoch": 1.8149708360337007, "grad_norm": 0.44929248094558716, "learning_rate": 3.563439410414784e-06, "loss": 0.1023, "step": 5601 }, { "epoch": 1.815294880103694, "grad_norm": 0.445188045501709, "learning_rate": 3.5617640341451545e-06, "loss": 0.1044, "step": 5602 }, { "epoch": 1.8156189241736875, "grad_norm": 0.42353734374046326, "learning_rate": 3.5600888339016827e-06, "loss": 0.0996, "step": 5603 }, { "epoch": 1.8159429682436812, "grad_norm": 0.4926955997943878, "learning_rate": 3.5584138098893974e-06, "loss": 0.1165, "step": 5604 }, { "epoch": 1.8162670123136748, "grad_norm": 0.46474507451057434, "learning_rate": 3.5567389623133068e-06, "loss": 0.1109, "step": 5605 }, { "epoch": 1.8165910563836682, "grad_norm": 0.43861955404281616, "learning_rate": 3.555064291378396e-06, "loss": 0.1047, "step": 5606 }, { "epoch": 1.8169151004536617, "grad_norm": 0.4347488582134247, "learning_rate": 3.5533897972896263e-06, "loss": 0.1049, "step": 5607 }, { "epoch": 1.817239144523655, "grad_norm": 0.47579696774482727, "learning_rate": 3.5517154802519432e-06, "loss": 0.1033, "step": 5608 }, { "epoch": 1.8175631885936487, "grad_norm": 0.4459049105644226, "learning_rate": 3.550041340470265e-06, "loss": 0.1096, "step": 5609 }, { "epoch": 1.8178872326636424, "grad_norm": 0.4174961447715759, "learning_rate": 3.5483673781494876e-06, "loss": 0.1017, "step": 5610 }, { "epoch": 1.8182112767336358, "grad_norm": 0.47640395164489746, "learning_rate": 3.5466935934944917e-06, "loss": 0.1145, "step": 5611 }, { "epoch": 1.8185353208036292, "grad_norm": 0.4541381895542145, "learning_rate": 3.5450199867101298e-06, "loss": 0.1034, "step": 5612 }, { "epoch": 1.8188593648736227, "grad_norm": 0.4641683101654053, "learning_rate": 3.5433465580012377e-06, "loss": 0.1022, "step": 5613 }, { "epoch": 1.8191834089436163, "grad_norm": 0.4748760163784027, "learning_rate": 3.5416733075726258e-06, "loss": 0.1105, "step": 5614 }, { "epoch": 1.81950745301361, "grad_norm": 0.47564688324928284, "learning_rate": 3.5400002356290817e-06, "loss": 0.1144, "step": 5615 }, { "epoch": 1.8198314970836034, "grad_norm": 0.44213637709617615, "learning_rate": 3.5383273423753766e-06, "loss": 0.1012, "step": 5616 }, { "epoch": 1.8201555411535968, "grad_norm": 0.44395461678504944, "learning_rate": 3.536654628016252e-06, "loss": 0.1014, "step": 5617 }, { "epoch": 1.8204795852235904, "grad_norm": 0.4228231906890869, "learning_rate": 3.534982092756437e-06, "loss": 0.0976, "step": 5618 }, { "epoch": 1.820803629293584, "grad_norm": 0.4690253436565399, "learning_rate": 3.53330973680063e-06, "loss": 0.1181, "step": 5619 }, { "epoch": 1.8211276733635775, "grad_norm": 0.42680132389068604, "learning_rate": 3.5316375603535135e-06, "loss": 0.1057, "step": 5620 }, { "epoch": 1.821451717433571, "grad_norm": 0.42568618059158325, "learning_rate": 3.5299655636197454e-06, "loss": 0.0944, "step": 5621 }, { "epoch": 1.8217757615035644, "grad_norm": 0.44182854890823364, "learning_rate": 3.528293746803959e-06, "loss": 0.1015, "step": 5622 }, { "epoch": 1.822099805573558, "grad_norm": 0.4019891917705536, "learning_rate": 3.5266221101107735e-06, "loss": 0.0953, "step": 5623 }, { "epoch": 1.8224238496435516, "grad_norm": 0.4538567364215851, "learning_rate": 3.5249506537447763e-06, "loss": 0.1021, "step": 5624 }, { "epoch": 1.822747893713545, "grad_norm": 0.4837633967399597, "learning_rate": 3.523279377910541e-06, "loss": 0.1178, "step": 5625 }, { "epoch": 1.8230719377835385, "grad_norm": 0.4749487340450287, "learning_rate": 3.521608282812613e-06, "loss": 0.1084, "step": 5626 }, { "epoch": 1.8233959818535321, "grad_norm": 0.5325826406478882, "learning_rate": 3.519937368655519e-06, "loss": 0.1255, "step": 5627 }, { "epoch": 1.8237200259235256, "grad_norm": 0.526648759841919, "learning_rate": 3.5182666356437646e-06, "loss": 0.1211, "step": 5628 }, { "epoch": 1.8240440699935192, "grad_norm": 0.4531908929347992, "learning_rate": 3.516596083981827e-06, "loss": 0.0992, "step": 5629 }, { "epoch": 1.8243681140635126, "grad_norm": 0.492156058549881, "learning_rate": 3.514925713874171e-06, "loss": 0.1204, "step": 5630 }, { "epoch": 1.824692158133506, "grad_norm": 0.47805356979370117, "learning_rate": 3.513255525525228e-06, "loss": 0.1151, "step": 5631 }, { "epoch": 1.8250162022034997, "grad_norm": 0.47491201758384705, "learning_rate": 3.5115855191394187e-06, "loss": 0.1097, "step": 5632 }, { "epoch": 1.8253402462734933, "grad_norm": 0.47285890579223633, "learning_rate": 3.5099156949211323e-06, "loss": 0.1123, "step": 5633 }, { "epoch": 1.8256642903434868, "grad_norm": 0.46936270594596863, "learning_rate": 3.508246053074738e-06, "loss": 0.104, "step": 5634 }, { "epoch": 1.8259883344134802, "grad_norm": 0.43193650245666504, "learning_rate": 3.5065765938045883e-06, "loss": 0.0975, "step": 5635 }, { "epoch": 1.8263123784834736, "grad_norm": 0.47375643253326416, "learning_rate": 3.504907317315004e-06, "loss": 0.1088, "step": 5636 }, { "epoch": 1.8266364225534673, "grad_norm": 0.4534396827220917, "learning_rate": 3.5032382238102912e-06, "loss": 0.0999, "step": 5637 }, { "epoch": 1.826960466623461, "grad_norm": 0.46225836873054504, "learning_rate": 3.5015693134947287e-06, "loss": 0.104, "step": 5638 }, { "epoch": 1.8272845106934543, "grad_norm": 0.45612695813179016, "learning_rate": 3.499900586572578e-06, "loss": 0.1095, "step": 5639 }, { "epoch": 1.8276085547634477, "grad_norm": 0.455872118473053, "learning_rate": 3.4982320432480736e-06, "loss": 0.1048, "step": 5640 }, { "epoch": 1.8279325988334414, "grad_norm": 0.4655687212944031, "learning_rate": 3.4965636837254267e-06, "loss": 0.1144, "step": 5641 }, { "epoch": 1.8282566429034348, "grad_norm": 0.4484519362449646, "learning_rate": 3.494895508208833e-06, "loss": 0.1062, "step": 5642 }, { "epoch": 1.8285806869734285, "grad_norm": 0.4629919230937958, "learning_rate": 3.493227516902456e-06, "loss": 0.1033, "step": 5643 }, { "epoch": 1.8289047310434219, "grad_norm": 0.4088801145553589, "learning_rate": 3.4915597100104464e-06, "loss": 0.0981, "step": 5644 }, { "epoch": 1.8292287751134153, "grad_norm": 0.4668869376182556, "learning_rate": 3.489892087736926e-06, "loss": 0.1155, "step": 5645 }, { "epoch": 1.829552819183409, "grad_norm": 0.47804510593414307, "learning_rate": 3.4882246502859937e-06, "loss": 0.1154, "step": 5646 }, { "epoch": 1.8298768632534026, "grad_norm": 0.48258739709854126, "learning_rate": 3.4865573978617295e-06, "loss": 0.1073, "step": 5647 }, { "epoch": 1.830200907323396, "grad_norm": 0.44823774695396423, "learning_rate": 3.4848903306681868e-06, "loss": 0.1016, "step": 5648 }, { "epoch": 1.8305249513933894, "grad_norm": 0.4402213990688324, "learning_rate": 3.483223448909403e-06, "loss": 0.1098, "step": 5649 }, { "epoch": 1.8308489954633829, "grad_norm": 0.4777219891548157, "learning_rate": 3.4815567527893823e-06, "loss": 0.1184, "step": 5650 }, { "epoch": 1.8311730395333765, "grad_norm": 0.47285157442092896, "learning_rate": 3.4798902425121185e-06, "loss": 0.1162, "step": 5651 }, { "epoch": 1.8314970836033702, "grad_norm": 0.45721814036369324, "learning_rate": 3.4782239182815725e-06, "loss": 0.1114, "step": 5652 }, { "epoch": 1.8318211276733636, "grad_norm": 0.48380038142204285, "learning_rate": 3.4765577803016852e-06, "loss": 0.1142, "step": 5653 }, { "epoch": 1.832145171743357, "grad_norm": 0.48291128873825073, "learning_rate": 3.4748918287763798e-06, "loss": 0.1152, "step": 5654 }, { "epoch": 1.8324692158133506, "grad_norm": 0.43343526124954224, "learning_rate": 3.4732260639095493e-06, "loss": 0.097, "step": 5655 }, { "epoch": 1.8327932598833443, "grad_norm": 0.4707466661930084, "learning_rate": 3.471560485905068e-06, "loss": 0.1117, "step": 5656 }, { "epoch": 1.8331173039533377, "grad_norm": 0.4821437895298004, "learning_rate": 3.4698950949667875e-06, "loss": 0.1071, "step": 5657 }, { "epoch": 1.8334413480233311, "grad_norm": 0.46275627613067627, "learning_rate": 3.4682298912985344e-06, "loss": 0.113, "step": 5658 }, { "epoch": 1.8337653920933246, "grad_norm": 0.4579031467437744, "learning_rate": 3.466564875104115e-06, "loss": 0.1102, "step": 5659 }, { "epoch": 1.8340894361633182, "grad_norm": 0.4628545045852661, "learning_rate": 3.4649000465873073e-06, "loss": 0.0974, "step": 5660 }, { "epoch": 1.8344134802333119, "grad_norm": 0.4802587628364563, "learning_rate": 3.463235405951876e-06, "loss": 0.1185, "step": 5661 }, { "epoch": 1.8347375243033053, "grad_norm": 0.4990658760070801, "learning_rate": 3.4615709534015512e-06, "loss": 0.1284, "step": 5662 }, { "epoch": 1.8350615683732987, "grad_norm": 0.498654842376709, "learning_rate": 3.4599066891400507e-06, "loss": 0.111, "step": 5663 }, { "epoch": 1.8353856124432921, "grad_norm": 0.4472542107105255, "learning_rate": 3.4582426133710623e-06, "loss": 0.1071, "step": 5664 }, { "epoch": 1.8357096565132858, "grad_norm": 0.48664551973342896, "learning_rate": 3.4565787262982507e-06, "loss": 0.1136, "step": 5665 }, { "epoch": 1.8360337005832794, "grad_norm": 0.4686160385608673, "learning_rate": 3.4549150281252635e-06, "loss": 0.1135, "step": 5666 }, { "epoch": 1.8363577446532728, "grad_norm": 0.45993703603744507, "learning_rate": 3.4532515190557183e-06, "loss": 0.1071, "step": 5667 }, { "epoch": 1.8366817887232663, "grad_norm": 0.475545197725296, "learning_rate": 3.451588199293214e-06, "loss": 0.108, "step": 5668 }, { "epoch": 1.83700583279326, "grad_norm": 0.4416792392730713, "learning_rate": 3.4499250690413246e-06, "loss": 0.1078, "step": 5669 }, { "epoch": 1.8373298768632536, "grad_norm": 0.45210230350494385, "learning_rate": 3.4482621285035996e-06, "loss": 0.1058, "step": 5670 }, { "epoch": 1.837653920933247, "grad_norm": 0.4606115221977234, "learning_rate": 3.4465993778835692e-06, "loss": 0.1112, "step": 5671 }, { "epoch": 1.8379779650032404, "grad_norm": 0.4382837414741516, "learning_rate": 3.4449368173847354e-06, "loss": 0.0989, "step": 5672 }, { "epoch": 1.8383020090732338, "grad_norm": 0.42500272393226624, "learning_rate": 3.443274447210583e-06, "loss": 0.1037, "step": 5673 }, { "epoch": 1.8386260531432275, "grad_norm": 0.45148107409477234, "learning_rate": 3.4416122675645656e-06, "loss": 0.1102, "step": 5674 }, { "epoch": 1.8389500972132211, "grad_norm": 0.426503986120224, "learning_rate": 3.4399502786501227e-06, "loss": 0.1035, "step": 5675 }, { "epoch": 1.8392741412832145, "grad_norm": 0.4459628760814667, "learning_rate": 3.438288480670663e-06, "loss": 0.1039, "step": 5676 }, { "epoch": 1.839598185353208, "grad_norm": 0.4436597526073456, "learning_rate": 3.4366268738295733e-06, "loss": 0.1022, "step": 5677 }, { "epoch": 1.8399222294232016, "grad_norm": 0.5080084204673767, "learning_rate": 3.43496545833022e-06, "loss": 0.1139, "step": 5678 }, { "epoch": 1.840246273493195, "grad_norm": 0.44031599164009094, "learning_rate": 3.433304234375944e-06, "loss": 0.108, "step": 5679 }, { "epoch": 1.8405703175631887, "grad_norm": 0.4491998553276062, "learning_rate": 3.4316432021700636e-06, "loss": 0.1006, "step": 5680 }, { "epoch": 1.840894361633182, "grad_norm": 0.4462529122829437, "learning_rate": 3.4299823619158722e-06, "loss": 0.1104, "step": 5681 }, { "epoch": 1.8412184057031755, "grad_norm": 0.4552091360092163, "learning_rate": 3.428321713816639e-06, "loss": 0.1062, "step": 5682 }, { "epoch": 1.8415424497731692, "grad_norm": 0.4409257173538208, "learning_rate": 3.4266612580756155e-06, "loss": 0.1011, "step": 5683 }, { "epoch": 1.8418664938431628, "grad_norm": 0.4279926121234894, "learning_rate": 3.4250009948960205e-06, "loss": 0.0936, "step": 5684 }, { "epoch": 1.8421905379131562, "grad_norm": 0.4621392786502838, "learning_rate": 3.423340924481059e-06, "loss": 0.1024, "step": 5685 }, { "epoch": 1.8425145819831497, "grad_norm": 0.46478646993637085, "learning_rate": 3.4216810470339022e-06, "loss": 0.1095, "step": 5686 }, { "epoch": 1.842838626053143, "grad_norm": 0.40607044100761414, "learning_rate": 3.4200213627577087e-06, "loss": 0.0902, "step": 5687 }, { "epoch": 1.8431626701231367, "grad_norm": 0.4548855423927307, "learning_rate": 3.418361871855605e-06, "loss": 0.1119, "step": 5688 }, { "epoch": 1.8434867141931304, "grad_norm": 0.4773532748222351, "learning_rate": 3.4167025745306954e-06, "loss": 0.1181, "step": 5689 }, { "epoch": 1.8438107582631238, "grad_norm": 0.4710024893283844, "learning_rate": 3.4150434709860648e-06, "loss": 0.1134, "step": 5690 }, { "epoch": 1.8441348023331172, "grad_norm": 0.45383182168006897, "learning_rate": 3.4133845614247667e-06, "loss": 0.1053, "step": 5691 }, { "epoch": 1.8444588464031109, "grad_norm": 0.4348435699939728, "learning_rate": 3.4117258460498414e-06, "loss": 0.1008, "step": 5692 }, { "epoch": 1.8447828904731045, "grad_norm": 0.4294411242008209, "learning_rate": 3.4100673250642967e-06, "loss": 0.0993, "step": 5693 }, { "epoch": 1.845106934543098, "grad_norm": 0.44455474615097046, "learning_rate": 3.408408998671118e-06, "loss": 0.1039, "step": 5694 }, { "epoch": 1.8454309786130914, "grad_norm": 0.46732982993125916, "learning_rate": 3.4067508670732712e-06, "loss": 0.1103, "step": 5695 }, { "epoch": 1.8457550226830848, "grad_norm": 0.47106361389160156, "learning_rate": 3.405092930473693e-06, "loss": 0.1151, "step": 5696 }, { "epoch": 1.8460790667530784, "grad_norm": 0.45745667815208435, "learning_rate": 3.403435189075302e-06, "loss": 0.1103, "step": 5697 }, { "epoch": 1.846403110823072, "grad_norm": 0.43495872616767883, "learning_rate": 3.4017776430809866e-06, "loss": 0.1061, "step": 5698 }, { "epoch": 1.8467271548930655, "grad_norm": 0.4372204542160034, "learning_rate": 3.4001202926936177e-06, "loss": 0.1018, "step": 5699 }, { "epoch": 1.847051198963059, "grad_norm": 0.4519484341144562, "learning_rate": 3.3984631381160355e-06, "loss": 0.1047, "step": 5700 }, { "epoch": 1.8473752430330523, "grad_norm": 0.4064551889896393, "learning_rate": 3.396806179551061e-06, "loss": 0.0904, "step": 5701 }, { "epoch": 1.847699287103046, "grad_norm": 0.45493823289871216, "learning_rate": 3.395149417201491e-06, "loss": 0.1089, "step": 5702 }, { "epoch": 1.8480233311730396, "grad_norm": 0.45996221899986267, "learning_rate": 3.3934928512700936e-06, "loss": 0.1034, "step": 5703 }, { "epoch": 1.848347375243033, "grad_norm": 0.46049776673316956, "learning_rate": 3.3918364819596222e-06, "loss": 0.1139, "step": 5704 }, { "epoch": 1.8486714193130265, "grad_norm": 0.47365984320640564, "learning_rate": 3.390180309472796e-06, "loss": 0.1114, "step": 5705 }, { "epoch": 1.8489954633830201, "grad_norm": 0.44574666023254395, "learning_rate": 3.388524334012315e-06, "loss": 0.1061, "step": 5706 }, { "epoch": 1.8493195074530138, "grad_norm": 0.4225917160511017, "learning_rate": 3.386868555780856e-06, "loss": 0.1057, "step": 5707 }, { "epoch": 1.8496435515230072, "grad_norm": 0.4410113990306854, "learning_rate": 3.385212974981068e-06, "loss": 0.108, "step": 5708 }, { "epoch": 1.8499675955930006, "grad_norm": 0.4146822392940521, "learning_rate": 3.3835575918155814e-06, "loss": 0.1014, "step": 5709 }, { "epoch": 1.850291639662994, "grad_norm": 0.4648820161819458, "learning_rate": 3.3819024064869967e-06, "loss": 0.1209, "step": 5710 }, { "epoch": 1.8506156837329877, "grad_norm": 0.48059096932411194, "learning_rate": 3.3802474191978927e-06, "loss": 0.1197, "step": 5711 }, { "epoch": 1.8509397278029813, "grad_norm": 0.42399024963378906, "learning_rate": 3.3785926301508255e-06, "loss": 0.0955, "step": 5712 }, { "epoch": 1.8512637718729748, "grad_norm": 0.4546625316143036, "learning_rate": 3.3769380395483215e-06, "loss": 0.1038, "step": 5713 }, { "epoch": 1.8515878159429682, "grad_norm": 0.5044828057289124, "learning_rate": 3.3752836475928906e-06, "loss": 0.1213, "step": 5714 }, { "epoch": 1.8519118600129616, "grad_norm": 0.48981040716171265, "learning_rate": 3.3736294544870114e-06, "loss": 0.1122, "step": 5715 }, { "epoch": 1.8522359040829552, "grad_norm": 0.4367123544216156, "learning_rate": 3.3719754604331447e-06, "loss": 0.102, "step": 5716 }, { "epoch": 1.8525599481529489, "grad_norm": 0.45596742630004883, "learning_rate": 3.3703216656337177e-06, "loss": 0.1072, "step": 5717 }, { "epoch": 1.8528839922229423, "grad_norm": 0.4703064262866974, "learning_rate": 3.3686680702911456e-06, "loss": 0.1086, "step": 5718 }, { "epoch": 1.8532080362929357, "grad_norm": 0.45928290486335754, "learning_rate": 3.367014674607809e-06, "loss": 0.1013, "step": 5719 }, { "epoch": 1.8535320803629294, "grad_norm": 0.4606897234916687, "learning_rate": 3.3653614787860667e-06, "loss": 0.1059, "step": 5720 }, { "epoch": 1.853856124432923, "grad_norm": 0.4154520332813263, "learning_rate": 3.3637084830282545e-06, "loss": 0.0918, "step": 5721 }, { "epoch": 1.8541801685029164, "grad_norm": 0.47586536407470703, "learning_rate": 3.3620556875366837e-06, "loss": 0.1091, "step": 5722 }, { "epoch": 1.8545042125729099, "grad_norm": 0.47497645020484924, "learning_rate": 3.360403092513641e-06, "loss": 0.1187, "step": 5723 }, { "epoch": 1.8548282566429033, "grad_norm": 0.4510015547275543, "learning_rate": 3.3587506981613877e-06, "loss": 0.1117, "step": 5724 }, { "epoch": 1.855152300712897, "grad_norm": 0.4139556586742401, "learning_rate": 3.357098504682158e-06, "loss": 0.0932, "step": 5725 }, { "epoch": 1.8554763447828906, "grad_norm": 0.43587085604667664, "learning_rate": 3.355446512278169e-06, "loss": 0.1044, "step": 5726 }, { "epoch": 1.855800388852884, "grad_norm": 0.44422632455825806, "learning_rate": 3.3537947211516043e-06, "loss": 0.1004, "step": 5727 }, { "epoch": 1.8561244329228774, "grad_norm": 0.43391063809394836, "learning_rate": 3.3521431315046317e-06, "loss": 0.0964, "step": 5728 }, { "epoch": 1.856448476992871, "grad_norm": 0.42529645562171936, "learning_rate": 3.3504917435393857e-06, "loss": 0.1007, "step": 5729 }, { "epoch": 1.8567725210628645, "grad_norm": 0.45272016525268555, "learning_rate": 3.348840557457982e-06, "loss": 0.1075, "step": 5730 }, { "epoch": 1.8570965651328581, "grad_norm": 0.43974587321281433, "learning_rate": 3.3471895734625106e-06, "loss": 0.1019, "step": 5731 }, { "epoch": 1.8574206092028516, "grad_norm": 0.48083487153053284, "learning_rate": 3.3455387917550344e-06, "loss": 0.1127, "step": 5732 }, { "epoch": 1.857744653272845, "grad_norm": 0.47117093205451965, "learning_rate": 3.343888212537594e-06, "loss": 0.1122, "step": 5733 }, { "epoch": 1.8580686973428386, "grad_norm": 0.5059354901313782, "learning_rate": 3.342237836012202e-06, "loss": 0.1219, "step": 5734 }, { "epoch": 1.8583927414128323, "grad_norm": 0.4689786434173584, "learning_rate": 3.3405876623808525e-06, "loss": 0.1059, "step": 5735 }, { "epoch": 1.8587167854828257, "grad_norm": 0.4725983440876007, "learning_rate": 3.338937691845509e-06, "loss": 0.1063, "step": 5736 }, { "epoch": 1.8590408295528191, "grad_norm": 0.4368756413459778, "learning_rate": 3.3372879246081096e-06, "loss": 0.1061, "step": 5737 }, { "epoch": 1.8593648736228126, "grad_norm": 0.42094147205352783, "learning_rate": 3.3356383608705746e-06, "loss": 0.0944, "step": 5738 }, { "epoch": 1.8596889176928062, "grad_norm": 0.4665633738040924, "learning_rate": 3.3339890008347888e-06, "loss": 0.1064, "step": 5739 }, { "epoch": 1.8600129617627998, "grad_norm": 0.457518070936203, "learning_rate": 3.3323398447026235e-06, "loss": 0.1102, "step": 5740 }, { "epoch": 1.8603370058327933, "grad_norm": 0.4493125379085541, "learning_rate": 3.3306908926759163e-06, "loss": 0.11, "step": 5741 }, { "epoch": 1.8606610499027867, "grad_norm": 0.4822162985801697, "learning_rate": 3.3290421449564846e-06, "loss": 0.1196, "step": 5742 }, { "epoch": 1.8609850939727803, "grad_norm": 0.47358089685440063, "learning_rate": 3.327393601746117e-06, "loss": 0.115, "step": 5743 }, { "epoch": 1.861309138042774, "grad_norm": 0.39975741505622864, "learning_rate": 3.3257452632465804e-06, "loss": 0.0873, "step": 5744 }, { "epoch": 1.8616331821127674, "grad_norm": 0.43184319138526917, "learning_rate": 3.324097129659617e-06, "loss": 0.093, "step": 5745 }, { "epoch": 1.8619572261827608, "grad_norm": 0.45764079689979553, "learning_rate": 3.3224492011869387e-06, "loss": 0.1067, "step": 5746 }, { "epoch": 1.8622812702527543, "grad_norm": 0.41477134823799133, "learning_rate": 3.32080147803024e-06, "loss": 0.0948, "step": 5747 }, { "epoch": 1.862605314322748, "grad_norm": 0.4603864848613739, "learning_rate": 3.319153960391185e-06, "loss": 0.1203, "step": 5748 }, { "epoch": 1.8629293583927415, "grad_norm": 0.4607555568218231, "learning_rate": 3.3175066484714107e-06, "loss": 0.1018, "step": 5749 }, { "epoch": 1.863253402462735, "grad_norm": 0.439416766166687, "learning_rate": 3.315859542472537e-06, "loss": 0.0984, "step": 5750 }, { "epoch": 1.8635774465327284, "grad_norm": 0.5201691389083862, "learning_rate": 3.3142126425961506e-06, "loss": 0.1164, "step": 5751 }, { "epoch": 1.8639014906027218, "grad_norm": 0.47248104214668274, "learning_rate": 3.3125659490438177e-06, "loss": 0.1102, "step": 5752 }, { "epoch": 1.8642255346727155, "grad_norm": 0.535966157913208, "learning_rate": 3.3109194620170766e-06, "loss": 0.1067, "step": 5753 }, { "epoch": 1.864549578742709, "grad_norm": 0.503506064414978, "learning_rate": 3.3092731817174427e-06, "loss": 0.0935, "step": 5754 }, { "epoch": 1.8648736228127025, "grad_norm": 0.4545847773551941, "learning_rate": 3.307627108346404e-06, "loss": 0.1075, "step": 5755 }, { "epoch": 1.865197666882696, "grad_norm": 0.486439049243927, "learning_rate": 3.3059812421054214e-06, "loss": 0.116, "step": 5756 }, { "epoch": 1.8655217109526896, "grad_norm": 0.46297916769981384, "learning_rate": 3.3043355831959376e-06, "loss": 0.1089, "step": 5757 }, { "epoch": 1.8658457550226832, "grad_norm": 0.46269655227661133, "learning_rate": 3.302690131819361e-06, "loss": 0.1092, "step": 5758 }, { "epoch": 1.8661697990926767, "grad_norm": 0.4384731948375702, "learning_rate": 3.301044888177083e-06, "loss": 0.1047, "step": 5759 }, { "epoch": 1.86649384316267, "grad_norm": 0.48540347814559937, "learning_rate": 3.299399852470464e-06, "loss": 0.1175, "step": 5760 }, { "epoch": 1.8668178872326635, "grad_norm": 0.44267207384109497, "learning_rate": 3.2977550249008377e-06, "loss": 0.1014, "step": 5761 }, { "epoch": 1.8671419313026572, "grad_norm": 0.4874940514564514, "learning_rate": 3.2961104056695194e-06, "loss": 0.124, "step": 5762 }, { "epoch": 1.8674659753726508, "grad_norm": 0.4731050431728363, "learning_rate": 3.294465994977791e-06, "loss": 0.1135, "step": 5763 }, { "epoch": 1.8677900194426442, "grad_norm": 0.42587292194366455, "learning_rate": 3.2928217930269155e-06, "loss": 0.1033, "step": 5764 }, { "epoch": 1.8681140635126376, "grad_norm": 0.46378660202026367, "learning_rate": 3.291177800018124e-06, "loss": 0.1159, "step": 5765 }, { "epoch": 1.8684381075826313, "grad_norm": 0.4911424219608307, "learning_rate": 3.289534016152629e-06, "loss": 0.1149, "step": 5766 }, { "epoch": 1.8687621516526247, "grad_norm": 0.464822381734848, "learning_rate": 3.2878904416316116e-06, "loss": 0.1121, "step": 5767 }, { "epoch": 1.8690861957226184, "grad_norm": 0.4543420672416687, "learning_rate": 3.286247076656227e-06, "loss": 0.1095, "step": 5768 }, { "epoch": 1.8694102397926118, "grad_norm": 0.44206535816192627, "learning_rate": 3.2846039214276127e-06, "loss": 0.1103, "step": 5769 }, { "epoch": 1.8697342838626052, "grad_norm": 0.434787392616272, "learning_rate": 3.28296097614687e-06, "loss": 0.1026, "step": 5770 }, { "epoch": 1.8700583279325989, "grad_norm": 0.43137770891189575, "learning_rate": 3.2813182410150834e-06, "loss": 0.1095, "step": 5771 }, { "epoch": 1.8703823720025925, "grad_norm": 0.4367537200450897, "learning_rate": 3.279675716233306e-06, "loss": 0.1023, "step": 5772 }, { "epoch": 1.870706416072586, "grad_norm": 0.4092714190483093, "learning_rate": 3.278033402002565e-06, "loss": 0.0964, "step": 5773 }, { "epoch": 1.8710304601425793, "grad_norm": 0.4678616523742676, "learning_rate": 3.276391298523868e-06, "loss": 0.1116, "step": 5774 }, { "epoch": 1.8713545042125728, "grad_norm": 0.4475710093975067, "learning_rate": 3.2747494059981887e-06, "loss": 0.1023, "step": 5775 }, { "epoch": 1.8716785482825664, "grad_norm": 0.44051387906074524, "learning_rate": 3.273107724626481e-06, "loss": 0.1134, "step": 5776 }, { "epoch": 1.87200259235256, "grad_norm": 0.43110376596450806, "learning_rate": 3.2714662546096686e-06, "loss": 0.0995, "step": 5777 }, { "epoch": 1.8723266364225535, "grad_norm": 0.4657581150531769, "learning_rate": 3.2698249961486556e-06, "loss": 0.1134, "step": 5778 }, { "epoch": 1.872650680492547, "grad_norm": 0.4725774824619293, "learning_rate": 3.2681839494443137e-06, "loss": 0.1033, "step": 5779 }, { "epoch": 1.8729747245625405, "grad_norm": 0.45880869030952454, "learning_rate": 3.266543114697488e-06, "loss": 0.1111, "step": 5780 }, { "epoch": 1.873298768632534, "grad_norm": 0.4175031781196594, "learning_rate": 3.264902492109007e-06, "loss": 0.0936, "step": 5781 }, { "epoch": 1.8736228127025276, "grad_norm": 0.42836299538612366, "learning_rate": 3.2632620818796612e-06, "loss": 0.1009, "step": 5782 }, { "epoch": 1.873946856772521, "grad_norm": 0.47268831729888916, "learning_rate": 3.2616218842102264e-06, "loss": 0.1138, "step": 5783 }, { "epoch": 1.8742709008425145, "grad_norm": 0.41969698667526245, "learning_rate": 3.2599818993014427e-06, "loss": 0.1052, "step": 5784 }, { "epoch": 1.874594944912508, "grad_norm": 0.504946768283844, "learning_rate": 3.2583421273540304e-06, "loss": 0.1163, "step": 5785 }, { "epoch": 1.8749189889825018, "grad_norm": 0.42377039790153503, "learning_rate": 3.256702568568682e-06, "loss": 0.1013, "step": 5786 }, { "epoch": 1.8752430330524952, "grad_norm": 0.46529340744018555, "learning_rate": 3.2550632231460603e-06, "loss": 0.1076, "step": 5787 }, { "epoch": 1.8755670771224886, "grad_norm": 0.46467939019203186, "learning_rate": 3.25342409128681e-06, "loss": 0.1119, "step": 5788 }, { "epoch": 1.875891121192482, "grad_norm": 0.4557555615901947, "learning_rate": 3.2517851731915407e-06, "loss": 0.1064, "step": 5789 }, { "epoch": 1.8762151652624757, "grad_norm": 0.4496322572231293, "learning_rate": 3.250146469060844e-06, "loss": 0.1057, "step": 5790 }, { "epoch": 1.8765392093324693, "grad_norm": 0.45844563841819763, "learning_rate": 3.24850797909528e-06, "loss": 0.1047, "step": 5791 }, { "epoch": 1.8768632534024627, "grad_norm": 0.42063984274864197, "learning_rate": 3.246869703495381e-06, "loss": 0.1013, "step": 5792 }, { "epoch": 1.8771872974724562, "grad_norm": 0.4641461968421936, "learning_rate": 3.2452316424616614e-06, "loss": 0.1174, "step": 5793 }, { "epoch": 1.8775113415424498, "grad_norm": 0.4542684853076935, "learning_rate": 3.2435937961945996e-06, "loss": 0.109, "step": 5794 }, { "epoch": 1.8778353856124435, "grad_norm": 0.44021913409233093, "learning_rate": 3.241956164894654e-06, "loss": 0.0934, "step": 5795 }, { "epoch": 1.8781594296824369, "grad_norm": 0.4576353430747986, "learning_rate": 3.240318748762255e-06, "loss": 0.108, "step": 5796 }, { "epoch": 1.8784834737524303, "grad_norm": 0.4571791887283325, "learning_rate": 3.2386815479978074e-06, "loss": 0.1062, "step": 5797 }, { "epoch": 1.8788075178224237, "grad_norm": 0.46839702129364014, "learning_rate": 3.2370445628016868e-06, "loss": 0.1057, "step": 5798 }, { "epoch": 1.8791315618924174, "grad_norm": 0.44016075134277344, "learning_rate": 3.2354077933742426e-06, "loss": 0.1063, "step": 5799 }, { "epoch": 1.879455605962411, "grad_norm": 0.4617091119289398, "learning_rate": 3.233771239915805e-06, "loss": 0.1066, "step": 5800 }, { "epoch": 1.8797796500324044, "grad_norm": 0.4427884519100189, "learning_rate": 3.2321349026266664e-06, "loss": 0.0998, "step": 5801 }, { "epoch": 1.8801036941023979, "grad_norm": 0.47309133410453796, "learning_rate": 3.230498781707104e-06, "loss": 0.1159, "step": 5802 }, { "epoch": 1.8804277381723913, "grad_norm": 0.45895934104919434, "learning_rate": 3.22886287735736e-06, "loss": 0.1003, "step": 5803 }, { "epoch": 1.880751782242385, "grad_norm": 0.45034587383270264, "learning_rate": 3.227227189777652e-06, "loss": 0.1086, "step": 5804 }, { "epoch": 1.8810758263123786, "grad_norm": 0.4406733512878418, "learning_rate": 3.225591719168176e-06, "loss": 0.1075, "step": 5805 }, { "epoch": 1.881399870382372, "grad_norm": 0.44266191124916077, "learning_rate": 3.223956465729096e-06, "loss": 0.1008, "step": 5806 }, { "epoch": 1.8817239144523654, "grad_norm": 0.46553224325180054, "learning_rate": 3.2223214296605516e-06, "loss": 0.1112, "step": 5807 }, { "epoch": 1.882047958522359, "grad_norm": 0.43898141384124756, "learning_rate": 3.220686611162653e-06, "loss": 0.1035, "step": 5808 }, { "epoch": 1.8823720025923527, "grad_norm": 0.4390580952167511, "learning_rate": 3.2190520104354893e-06, "loss": 0.0977, "step": 5809 }, { "epoch": 1.8826960466623461, "grad_norm": 0.4468221366405487, "learning_rate": 3.2174176276791197e-06, "loss": 0.0996, "step": 5810 }, { "epoch": 1.8830200907323396, "grad_norm": 0.4968999922275543, "learning_rate": 3.2157834630935735e-06, "loss": 0.1196, "step": 5811 }, { "epoch": 1.883344134802333, "grad_norm": 0.4182228446006775, "learning_rate": 3.2141495168788605e-06, "loss": 0.0969, "step": 5812 }, { "epoch": 1.8836681788723266, "grad_norm": 0.4635598659515381, "learning_rate": 3.212515789234957e-06, "loss": 0.1138, "step": 5813 }, { "epoch": 1.8839922229423203, "grad_norm": 0.5124261379241943, "learning_rate": 3.210882280361818e-06, "loss": 0.1231, "step": 5814 }, { "epoch": 1.8843162670123137, "grad_norm": 0.4198930263519287, "learning_rate": 3.2092489904593677e-06, "loss": 0.1012, "step": 5815 }, { "epoch": 1.8846403110823071, "grad_norm": 0.4284537136554718, "learning_rate": 3.2076159197275046e-06, "loss": 0.098, "step": 5816 }, { "epoch": 1.8849643551523008, "grad_norm": 0.469592422246933, "learning_rate": 3.2059830683661006e-06, "loss": 0.1099, "step": 5817 }, { "epoch": 1.8852883992222942, "grad_norm": 0.4751318693161011, "learning_rate": 3.2043504365750024e-06, "loss": 0.119, "step": 5818 }, { "epoch": 1.8856124432922878, "grad_norm": 0.43352800607681274, "learning_rate": 3.2027180245540286e-06, "loss": 0.0981, "step": 5819 }, { "epoch": 1.8859364873622813, "grad_norm": 0.47290316224098206, "learning_rate": 3.201085832502967e-06, "loss": 0.1019, "step": 5820 }, { "epoch": 1.8862605314322747, "grad_norm": 0.4248226583003998, "learning_rate": 3.1994538606215875e-06, "loss": 0.0956, "step": 5821 }, { "epoch": 1.8865845755022683, "grad_norm": 0.44013458490371704, "learning_rate": 3.197822109109624e-06, "loss": 0.1088, "step": 5822 }, { "epoch": 1.886908619572262, "grad_norm": 0.44821250438690186, "learning_rate": 3.1961905781667858e-06, "loss": 0.1011, "step": 5823 }, { "epoch": 1.8872326636422554, "grad_norm": 0.45587074756622314, "learning_rate": 3.194559267992761e-06, "loss": 0.0994, "step": 5824 }, { "epoch": 1.8875567077122488, "grad_norm": 0.4564206302165985, "learning_rate": 3.192928178787203e-06, "loss": 0.1084, "step": 5825 }, { "epoch": 1.8878807517822422, "grad_norm": 0.47661349177360535, "learning_rate": 3.191297310749742e-06, "loss": 0.1159, "step": 5826 }, { "epoch": 1.8882047958522359, "grad_norm": 0.4797954261302948, "learning_rate": 3.189666664079981e-06, "loss": 0.116, "step": 5827 }, { "epoch": 1.8885288399222295, "grad_norm": 0.4409724473953247, "learning_rate": 3.1880362389774944e-06, "loss": 0.1036, "step": 5828 }, { "epoch": 1.888852883992223, "grad_norm": 0.3927188217639923, "learning_rate": 3.1864060356418325e-06, "loss": 0.0926, "step": 5829 }, { "epoch": 1.8891769280622164, "grad_norm": 0.42764827609062195, "learning_rate": 3.184776054272512e-06, "loss": 0.1003, "step": 5830 }, { "epoch": 1.88950097213221, "grad_norm": 0.4615615904331207, "learning_rate": 3.183146295069032e-06, "loss": 0.113, "step": 5831 }, { "epoch": 1.8898250162022034, "grad_norm": 0.46609097719192505, "learning_rate": 3.181516758230855e-06, "loss": 0.1112, "step": 5832 }, { "epoch": 1.890149060272197, "grad_norm": 0.4488867223262787, "learning_rate": 3.1798874439574248e-06, "loss": 0.1038, "step": 5833 }, { "epoch": 1.8904731043421905, "grad_norm": 0.4500874876976013, "learning_rate": 3.1782583524481514e-06, "loss": 0.0997, "step": 5834 }, { "epoch": 1.890797148412184, "grad_norm": 0.4450390934944153, "learning_rate": 3.176629483902417e-06, "loss": 0.1015, "step": 5835 }, { "epoch": 1.8911211924821776, "grad_norm": 0.43396297097206116, "learning_rate": 3.1750008385195852e-06, "loss": 0.1025, "step": 5836 }, { "epoch": 1.8914452365521712, "grad_norm": 0.46059921383857727, "learning_rate": 3.1733724164989815e-06, "loss": 0.1083, "step": 5837 }, { "epoch": 1.8917692806221647, "grad_norm": 0.4415331482887268, "learning_rate": 3.1717442180399128e-06, "loss": 0.0962, "step": 5838 }, { "epoch": 1.892093324692158, "grad_norm": 0.4565853178501129, "learning_rate": 3.170116243341651e-06, "loss": 0.1067, "step": 5839 }, { "epoch": 1.8924173687621515, "grad_norm": 0.4561675488948822, "learning_rate": 3.168488492603447e-06, "loss": 0.111, "step": 5840 }, { "epoch": 1.8927414128321451, "grad_norm": 0.47068914771080017, "learning_rate": 3.166860966024522e-06, "loss": 0.0999, "step": 5841 }, { "epoch": 1.8930654569021388, "grad_norm": 0.45309674739837646, "learning_rate": 3.1652336638040664e-06, "loss": 0.1049, "step": 5842 }, { "epoch": 1.8933895009721322, "grad_norm": 0.45928430557250977, "learning_rate": 3.163606586141251e-06, "loss": 0.1031, "step": 5843 }, { "epoch": 1.8937135450421256, "grad_norm": 0.4930223822593689, "learning_rate": 3.161979733235209e-06, "loss": 0.1124, "step": 5844 }, { "epoch": 1.8940375891121193, "grad_norm": 0.45104873180389404, "learning_rate": 3.1603531052850565e-06, "loss": 0.1077, "step": 5845 }, { "epoch": 1.894361633182113, "grad_norm": 0.45227307081222534, "learning_rate": 3.1587267024898747e-06, "loss": 0.1077, "step": 5846 }, { "epoch": 1.8946856772521063, "grad_norm": 0.4638083875179291, "learning_rate": 3.157100525048718e-06, "loss": 0.1122, "step": 5847 }, { "epoch": 1.8950097213220998, "grad_norm": 0.45469921827316284, "learning_rate": 3.1554745731606183e-06, "loss": 0.0987, "step": 5848 }, { "epoch": 1.8953337653920932, "grad_norm": 0.4775233268737793, "learning_rate": 3.1538488470245733e-06, "loss": 0.1042, "step": 5849 }, { "epoch": 1.8956578094620868, "grad_norm": 0.4605369567871094, "learning_rate": 3.152223346839558e-06, "loss": 0.1094, "step": 5850 }, { "epoch": 1.8959818535320805, "grad_norm": 0.4589992165565491, "learning_rate": 3.1505980728045176e-06, "loss": 0.1087, "step": 5851 }, { "epoch": 1.896305897602074, "grad_norm": 0.42988982796669006, "learning_rate": 3.1489730251183675e-06, "loss": 0.0984, "step": 5852 }, { "epoch": 1.8966299416720673, "grad_norm": 0.4849797189235687, "learning_rate": 3.147348203980002e-06, "loss": 0.1115, "step": 5853 }, { "epoch": 1.8969539857420608, "grad_norm": 0.45637208223342896, "learning_rate": 3.1457236095882786e-06, "loss": 0.111, "step": 5854 }, { "epoch": 1.8972780298120544, "grad_norm": 0.45666077733039856, "learning_rate": 3.144099242142037e-06, "loss": 0.0968, "step": 5855 }, { "epoch": 1.897602073882048, "grad_norm": 0.46886733174324036, "learning_rate": 3.1424751018400794e-06, "loss": 0.1042, "step": 5856 }, { "epoch": 1.8979261179520415, "grad_norm": 0.4961826205253601, "learning_rate": 3.1408511888811894e-06, "loss": 0.1097, "step": 5857 }, { "epoch": 1.898250162022035, "grad_norm": 0.4678135812282562, "learning_rate": 3.1392275034641163e-06, "loss": 0.1074, "step": 5858 }, { "epoch": 1.8985742060920285, "grad_norm": 0.46530935168266296, "learning_rate": 3.137604045787581e-06, "loss": 0.1111, "step": 5859 }, { "epoch": 1.8988982501620222, "grad_norm": 0.4346042275428772, "learning_rate": 3.135980816050283e-06, "loss": 0.1053, "step": 5860 }, { "epoch": 1.8992222942320156, "grad_norm": 0.5243309736251831, "learning_rate": 3.134357814450886e-06, "loss": 0.1208, "step": 5861 }, { "epoch": 1.899546338302009, "grad_norm": 0.48006880283355713, "learning_rate": 3.132735041188033e-06, "loss": 0.1138, "step": 5862 }, { "epoch": 1.8998703823720025, "grad_norm": 0.46991440653800964, "learning_rate": 3.1311124964603327e-06, "loss": 0.1027, "step": 5863 }, { "epoch": 1.900194426441996, "grad_norm": 0.5111856460571289, "learning_rate": 3.129490180466373e-06, "loss": 0.114, "step": 5864 }, { "epoch": 1.9005184705119897, "grad_norm": 0.4719022214412689, "learning_rate": 3.1278680934047068e-06, "loss": 0.1115, "step": 5865 }, { "epoch": 1.9008425145819832, "grad_norm": 0.44093480706214905, "learning_rate": 3.126246235473861e-06, "loss": 0.0983, "step": 5866 }, { "epoch": 1.9011665586519766, "grad_norm": 0.46212008595466614, "learning_rate": 3.124624606872338e-06, "loss": 0.108, "step": 5867 }, { "epoch": 1.9014906027219702, "grad_norm": 0.46517205238342285, "learning_rate": 3.123003207798607e-06, "loss": 0.1114, "step": 5868 }, { "epoch": 1.9018146467919637, "grad_norm": 0.4682121276855469, "learning_rate": 3.121382038451113e-06, "loss": 0.1076, "step": 5869 }, { "epoch": 1.9021386908619573, "grad_norm": 0.47169503569602966, "learning_rate": 3.1197610990282725e-06, "loss": 0.1095, "step": 5870 }, { "epoch": 1.9024627349319507, "grad_norm": 0.4562494456768036, "learning_rate": 3.1181403897284696e-06, "loss": 0.1077, "step": 5871 }, { "epoch": 1.9027867790019442, "grad_norm": 0.45159628987312317, "learning_rate": 3.1165199107500665e-06, "loss": 0.1066, "step": 5872 }, { "epoch": 1.9031108230719378, "grad_norm": 0.4589924216270447, "learning_rate": 3.1148996622913906e-06, "loss": 0.1151, "step": 5873 }, { "epoch": 1.9034348671419314, "grad_norm": 0.44332441687583923, "learning_rate": 3.113279644550749e-06, "loss": 0.1004, "step": 5874 }, { "epoch": 1.9037589112119249, "grad_norm": 0.44975969195365906, "learning_rate": 3.1116598577264122e-06, "loss": 0.1005, "step": 5875 }, { "epoch": 1.9040829552819183, "grad_norm": 0.4652109444141388, "learning_rate": 3.11004030201663e-06, "loss": 0.0988, "step": 5876 }, { "epoch": 1.9044069993519117, "grad_norm": 0.4715868830680847, "learning_rate": 3.1084209776196185e-06, "loss": 0.1137, "step": 5877 }, { "epoch": 1.9047310434219054, "grad_norm": 0.4277364909648895, "learning_rate": 3.106801884733566e-06, "loss": 0.0979, "step": 5878 }, { "epoch": 1.905055087491899, "grad_norm": 0.45353928208351135, "learning_rate": 3.1051830235566365e-06, "loss": 0.1082, "step": 5879 }, { "epoch": 1.9053791315618924, "grad_norm": 0.492728054523468, "learning_rate": 3.103564394286961e-06, "loss": 0.1139, "step": 5880 }, { "epoch": 1.9057031756318858, "grad_norm": 0.45384249091148376, "learning_rate": 3.1019459971226463e-06, "loss": 0.1027, "step": 5881 }, { "epoch": 1.9060272197018795, "grad_norm": 0.46857234835624695, "learning_rate": 3.1003278322617657e-06, "loss": 0.1133, "step": 5882 }, { "epoch": 1.9063512637718731, "grad_norm": 0.48681899905204773, "learning_rate": 3.0987098999023667e-06, "loss": 0.108, "step": 5883 }, { "epoch": 1.9066753078418666, "grad_norm": 0.4567790627479553, "learning_rate": 3.097092200242473e-06, "loss": 0.1162, "step": 5884 }, { "epoch": 1.90699935191186, "grad_norm": 0.46391937136650085, "learning_rate": 3.0954747334800695e-06, "loss": 0.1116, "step": 5885 }, { "epoch": 1.9073233959818534, "grad_norm": 0.43967387080192566, "learning_rate": 3.093857499813123e-06, "loss": 0.1021, "step": 5886 }, { "epoch": 1.907647440051847, "grad_norm": 0.4478691816329956, "learning_rate": 3.0922404994395642e-06, "loss": 0.102, "step": 5887 }, { "epoch": 1.9079714841218407, "grad_norm": 0.4580485224723816, "learning_rate": 3.0906237325573017e-06, "loss": 0.1056, "step": 5888 }, { "epoch": 1.9082955281918341, "grad_norm": 0.489286333322525, "learning_rate": 3.08900719936421e-06, "loss": 0.1196, "step": 5889 }, { "epoch": 1.9086195722618275, "grad_norm": 0.45502734184265137, "learning_rate": 3.087390900058137e-06, "loss": 0.1039, "step": 5890 }, { "epoch": 1.908943616331821, "grad_norm": 0.4502509534358978, "learning_rate": 3.0857748348369017e-06, "loss": 0.1044, "step": 5891 }, { "epoch": 1.9092676604018146, "grad_norm": 0.47804975509643555, "learning_rate": 3.084159003898295e-06, "loss": 0.1149, "step": 5892 }, { "epoch": 1.9095917044718083, "grad_norm": 0.44914719462394714, "learning_rate": 3.082543407440081e-06, "loss": 0.1027, "step": 5893 }, { "epoch": 1.9099157485418017, "grad_norm": 0.42086276412010193, "learning_rate": 3.080928045659992e-06, "loss": 0.0942, "step": 5894 }, { "epoch": 1.910239792611795, "grad_norm": 0.46406880021095276, "learning_rate": 3.079312918755729e-06, "loss": 0.104, "step": 5895 }, { "epoch": 1.9105638366817888, "grad_norm": 0.4733922779560089, "learning_rate": 3.077698026924974e-06, "loss": 0.1115, "step": 5896 }, { "epoch": 1.9108878807517824, "grad_norm": 0.4340214133262634, "learning_rate": 3.076083370365369e-06, "loss": 0.1028, "step": 5897 }, { "epoch": 1.9112119248217758, "grad_norm": 0.4886152446269989, "learning_rate": 3.074468949274536e-06, "loss": 0.1166, "step": 5898 }, { "epoch": 1.9115359688917692, "grad_norm": 0.4482693672180176, "learning_rate": 3.0728547638500617e-06, "loss": 0.1127, "step": 5899 }, { "epoch": 1.9118600129617627, "grad_norm": 0.4709647595882416, "learning_rate": 3.07124081428951e-06, "loss": 0.112, "step": 5900 }, { "epoch": 1.9121840570317563, "grad_norm": 0.4490049183368683, "learning_rate": 3.06962710079041e-06, "loss": 0.1075, "step": 5901 }, { "epoch": 1.91250810110175, "grad_norm": 0.46207594871520996, "learning_rate": 3.0680136235502657e-06, "loss": 0.1114, "step": 5902 }, { "epoch": 1.9128321451717434, "grad_norm": 0.42192792892456055, "learning_rate": 3.0664003827665507e-06, "loss": 0.0984, "step": 5903 }, { "epoch": 1.9131561892417368, "grad_norm": 0.43207815289497375, "learning_rate": 3.0647873786367083e-06, "loss": 0.1044, "step": 5904 }, { "epoch": 1.9134802333117304, "grad_norm": 0.45980560779571533, "learning_rate": 3.0631746113581582e-06, "loss": 0.1097, "step": 5905 }, { "epoch": 1.9138042773817239, "grad_norm": 0.41908833384513855, "learning_rate": 3.0615620811282866e-06, "loss": 0.096, "step": 5906 }, { "epoch": 1.9141283214517175, "grad_norm": 0.46487948298454285, "learning_rate": 3.0599497881444482e-06, "loss": 0.108, "step": 5907 }, { "epoch": 1.914452365521711, "grad_norm": 0.44004693627357483, "learning_rate": 3.058337732603977e-06, "loss": 0.1016, "step": 5908 }, { "epoch": 1.9147764095917044, "grad_norm": 0.45447227358818054, "learning_rate": 3.0567259147041682e-06, "loss": 0.1036, "step": 5909 }, { "epoch": 1.915100453661698, "grad_norm": 0.45923227071762085, "learning_rate": 3.0551143346422973e-06, "loss": 0.1015, "step": 5910 }, { "epoch": 1.9154244977316917, "grad_norm": 0.5280866026878357, "learning_rate": 3.0535029926156027e-06, "loss": 0.1208, "step": 5911 }, { "epoch": 1.915748541801685, "grad_norm": 0.42134809494018555, "learning_rate": 3.0518918888212994e-06, "loss": 0.0936, "step": 5912 }, { "epoch": 1.9160725858716785, "grad_norm": 0.4500383138656616, "learning_rate": 3.0502810234565687e-06, "loss": 0.1044, "step": 5913 }, { "epoch": 1.916396629941672, "grad_norm": 0.49136853218078613, "learning_rate": 3.048670396718566e-06, "loss": 0.1126, "step": 5914 }, { "epoch": 1.9167206740116656, "grad_norm": 0.478859543800354, "learning_rate": 3.0470600088044177e-06, "loss": 0.1122, "step": 5915 }, { "epoch": 1.9170447180816592, "grad_norm": 0.4460492730140686, "learning_rate": 3.045449859911216e-06, "loss": 0.1054, "step": 5916 }, { "epoch": 1.9173687621516526, "grad_norm": 0.44223952293395996, "learning_rate": 3.0438399502360323e-06, "loss": 0.1033, "step": 5917 }, { "epoch": 1.917692806221646, "grad_norm": 0.4340362250804901, "learning_rate": 3.042230279975901e-06, "loss": 0.0968, "step": 5918 }, { "epoch": 1.9180168502916397, "grad_norm": 0.44690605998039246, "learning_rate": 3.0406208493278287e-06, "loss": 0.1071, "step": 5919 }, { "epoch": 1.9183408943616331, "grad_norm": 0.47108444571495056, "learning_rate": 3.039011658488799e-06, "loss": 0.1199, "step": 5920 }, { "epoch": 1.9186649384316268, "grad_norm": 0.44304531812667847, "learning_rate": 3.037402707655756e-06, "loss": 0.1078, "step": 5921 }, { "epoch": 1.9189889825016202, "grad_norm": 0.4661952257156372, "learning_rate": 3.0357939970256244e-06, "loss": 0.1055, "step": 5922 }, { "epoch": 1.9193130265716136, "grad_norm": 0.5036419034004211, "learning_rate": 3.0341855267952914e-06, "loss": 0.1165, "step": 5923 }, { "epoch": 1.9196370706416073, "grad_norm": 0.4615981876850128, "learning_rate": 3.0325772971616203e-06, "loss": 0.1088, "step": 5924 }, { "epoch": 1.919961114711601, "grad_norm": 0.46507641673088074, "learning_rate": 3.030969308321442e-06, "loss": 0.1108, "step": 5925 }, { "epoch": 1.9202851587815943, "grad_norm": 0.46919676661491394, "learning_rate": 3.0293615604715564e-06, "loss": 0.1177, "step": 5926 }, { "epoch": 1.9206092028515878, "grad_norm": 0.4771016836166382, "learning_rate": 3.027754053808741e-06, "loss": 0.1141, "step": 5927 }, { "epoch": 1.9209332469215812, "grad_norm": 0.48941096663475037, "learning_rate": 3.026146788529734e-06, "loss": 0.1125, "step": 5928 }, { "epoch": 1.9212572909915748, "grad_norm": 0.44794967770576477, "learning_rate": 3.0245397648312543e-06, "loss": 0.1, "step": 5929 }, { "epoch": 1.9215813350615685, "grad_norm": 0.47503042221069336, "learning_rate": 3.0229329829099805e-06, "loss": 0.1061, "step": 5930 }, { "epoch": 1.921905379131562, "grad_norm": 0.4251140356063843, "learning_rate": 3.021326442962573e-06, "loss": 0.0978, "step": 5931 }, { "epoch": 1.9222294232015553, "grad_norm": 0.4348359704017639, "learning_rate": 3.0197201451856537e-06, "loss": 0.1039, "step": 5932 }, { "epoch": 1.922553467271549, "grad_norm": 0.4612886607646942, "learning_rate": 3.0181140897758175e-06, "loss": 0.1131, "step": 5933 }, { "epoch": 1.9228775113415426, "grad_norm": 0.43259257078170776, "learning_rate": 3.0165082769296307e-06, "loss": 0.1057, "step": 5934 }, { "epoch": 1.923201555411536, "grad_norm": 0.44745153188705444, "learning_rate": 3.0149027068436275e-06, "loss": 0.1044, "step": 5935 }, { "epoch": 1.9235255994815295, "grad_norm": 0.43348148465156555, "learning_rate": 3.0132973797143176e-06, "loss": 0.1002, "step": 5936 }, { "epoch": 1.9238496435515229, "grad_norm": 0.44396522641181946, "learning_rate": 3.0116922957381757e-06, "loss": 0.1069, "step": 5937 }, { "epoch": 1.9241736876215165, "grad_norm": 0.41942650079727173, "learning_rate": 3.0100874551116467e-06, "loss": 0.0939, "step": 5938 }, { "epoch": 1.9244977316915102, "grad_norm": 0.47876089811325073, "learning_rate": 3.008482858031151e-06, "loss": 0.1094, "step": 5939 }, { "epoch": 1.9248217757615036, "grad_norm": 0.4715111553668976, "learning_rate": 3.0068785046930728e-06, "loss": 0.1057, "step": 5940 }, { "epoch": 1.925145819831497, "grad_norm": 0.4314781427383423, "learning_rate": 3.005274395293772e-06, "loss": 0.091, "step": 5941 }, { "epoch": 1.9254698639014904, "grad_norm": 0.46073848009109497, "learning_rate": 3.0036705300295743e-06, "loss": 0.1075, "step": 5942 }, { "epoch": 1.925793907971484, "grad_norm": 0.43426087498664856, "learning_rate": 3.002066909096777e-06, "loss": 0.0993, "step": 5943 }, { "epoch": 1.9261179520414777, "grad_norm": 0.48200058937072754, "learning_rate": 3.00046353269165e-06, "loss": 0.1192, "step": 5944 }, { "epoch": 1.9264419961114712, "grad_norm": 0.45754122734069824, "learning_rate": 2.9988604010104283e-06, "loss": 0.1174, "step": 5945 }, { "epoch": 1.9267660401814646, "grad_norm": 0.42181843519210815, "learning_rate": 2.997257514249323e-06, "loss": 0.098, "step": 5946 }, { "epoch": 1.9270900842514582, "grad_norm": 0.4637039601802826, "learning_rate": 2.9956548726045064e-06, "loss": 0.1126, "step": 5947 }, { "epoch": 1.9274141283214519, "grad_norm": 0.47404715418815613, "learning_rate": 2.994052476272133e-06, "loss": 0.1118, "step": 5948 }, { "epoch": 1.9277381723914453, "grad_norm": 0.4384154677391052, "learning_rate": 2.9924503254483166e-06, "loss": 0.1003, "step": 5949 }, { "epoch": 1.9280622164614387, "grad_norm": 0.44925567507743835, "learning_rate": 2.9908484203291444e-06, "loss": 0.1052, "step": 5950 }, { "epoch": 1.9283862605314321, "grad_norm": 0.4423099458217621, "learning_rate": 2.9892467611106774e-06, "loss": 0.0996, "step": 5951 }, { "epoch": 1.9287103046014258, "grad_norm": 0.45022711157798767, "learning_rate": 2.9876453479889388e-06, "loss": 0.1052, "step": 5952 }, { "epoch": 1.9290343486714194, "grad_norm": 0.4542100131511688, "learning_rate": 2.9860441811599304e-06, "loss": 0.1076, "step": 5953 }, { "epoch": 1.9293583927414129, "grad_norm": 0.44740357995033264, "learning_rate": 2.984443260819617e-06, "loss": 0.112, "step": 5954 }, { "epoch": 1.9296824368114063, "grad_norm": 0.49884316325187683, "learning_rate": 2.9828425871639378e-06, "loss": 0.1171, "step": 5955 }, { "epoch": 1.9300064808814, "grad_norm": 0.5064646005630493, "learning_rate": 2.981242160388797e-06, "loss": 0.1199, "step": 5956 }, { "epoch": 1.9303305249513933, "grad_norm": 0.47748908400535583, "learning_rate": 2.9796419806900723e-06, "loss": 0.1107, "step": 5957 }, { "epoch": 1.930654569021387, "grad_norm": 0.4773690104484558, "learning_rate": 2.978042048263612e-06, "loss": 0.1149, "step": 5958 }, { "epoch": 1.9309786130913804, "grad_norm": 0.4412952959537506, "learning_rate": 2.9764423633052288e-06, "loss": 0.0995, "step": 5959 }, { "epoch": 1.9313026571613738, "grad_norm": 0.44443589448928833, "learning_rate": 2.9748429260107124e-06, "loss": 0.1, "step": 5960 }, { "epoch": 1.9316267012313675, "grad_norm": 0.47857704758644104, "learning_rate": 2.9732437365758177e-06, "loss": 0.1111, "step": 5961 }, { "epoch": 1.9319507453013611, "grad_norm": 0.4563223123550415, "learning_rate": 2.971644795196267e-06, "loss": 0.1025, "step": 5962 }, { "epoch": 1.9322747893713546, "grad_norm": 0.4517981708049774, "learning_rate": 2.9700461020677585e-06, "loss": 0.1061, "step": 5963 }, { "epoch": 1.932598833441348, "grad_norm": 0.4651930034160614, "learning_rate": 2.9684476573859554e-06, "loss": 0.1075, "step": 5964 }, { "epoch": 1.9329228775113414, "grad_norm": 0.4484946131706238, "learning_rate": 2.9668494613464914e-06, "loss": 0.107, "step": 5965 }, { "epoch": 1.933246921581335, "grad_norm": 0.479286789894104, "learning_rate": 2.9652515141449713e-06, "loss": 0.1096, "step": 5966 }, { "epoch": 1.9335709656513287, "grad_norm": 0.44580888748168945, "learning_rate": 2.963653815976969e-06, "loss": 0.0991, "step": 5967 }, { "epoch": 1.9338950097213221, "grad_norm": 0.4828713834285736, "learning_rate": 2.9620563670380265e-06, "loss": 0.1135, "step": 5968 }, { "epoch": 1.9342190537913155, "grad_norm": 0.48791760206222534, "learning_rate": 2.9604591675236536e-06, "loss": 0.1092, "step": 5969 }, { "epoch": 1.9345430978613092, "grad_norm": 0.4554043412208557, "learning_rate": 2.958862217629336e-06, "loss": 0.1073, "step": 5970 }, { "epoch": 1.9348671419313026, "grad_norm": 0.4587395489215851, "learning_rate": 2.9572655175505217e-06, "loss": 0.0978, "step": 5971 }, { "epoch": 1.9351911860012962, "grad_norm": 0.45185279846191406, "learning_rate": 2.9556690674826348e-06, "loss": 0.103, "step": 5972 }, { "epoch": 1.9355152300712897, "grad_norm": 0.4833644926548004, "learning_rate": 2.954072867621063e-06, "loss": 0.1161, "step": 5973 }, { "epoch": 1.935839274141283, "grad_norm": 0.44658008217811584, "learning_rate": 2.9524769181611646e-06, "loss": 0.1062, "step": 5974 }, { "epoch": 1.9361633182112767, "grad_norm": 0.4688814878463745, "learning_rate": 2.950881219298272e-06, "loss": 0.1084, "step": 5975 }, { "epoch": 1.9364873622812704, "grad_norm": 0.47974029183387756, "learning_rate": 2.949285771227679e-06, "loss": 0.1162, "step": 5976 }, { "epoch": 1.9368114063512638, "grad_norm": 0.42677879333496094, "learning_rate": 2.947690574144657e-06, "loss": 0.1038, "step": 5977 }, { "epoch": 1.9371354504212572, "grad_norm": 0.4472271800041199, "learning_rate": 2.9460956282444387e-06, "loss": 0.1058, "step": 5978 }, { "epoch": 1.9374594944912507, "grad_norm": 0.48130524158477783, "learning_rate": 2.9445009337222343e-06, "loss": 0.1174, "step": 5979 }, { "epoch": 1.9377835385612443, "grad_norm": 0.4491761326789856, "learning_rate": 2.942906490773217e-06, "loss": 0.1086, "step": 5980 }, { "epoch": 1.938107582631238, "grad_norm": 0.4414316415786743, "learning_rate": 2.9413122995925287e-06, "loss": 0.1031, "step": 5981 }, { "epoch": 1.9384316267012314, "grad_norm": 0.46407943964004517, "learning_rate": 2.939718360375287e-06, "loss": 0.1066, "step": 5982 }, { "epoch": 1.9387556707712248, "grad_norm": 0.47318655252456665, "learning_rate": 2.9381246733165713e-06, "loss": 0.117, "step": 5983 }, { "epoch": 1.9390797148412184, "grad_norm": 0.4662487804889679, "learning_rate": 2.9365312386114377e-06, "loss": 0.109, "step": 5984 }, { "epoch": 1.939403758911212, "grad_norm": 0.4455025792121887, "learning_rate": 2.9349380564549033e-06, "loss": 0.0995, "step": 5985 }, { "epoch": 1.9397278029812055, "grad_norm": 0.49028605222702026, "learning_rate": 2.933345127041959e-06, "loss": 0.1138, "step": 5986 }, { "epoch": 1.940051847051199, "grad_norm": 0.4964102506637573, "learning_rate": 2.9317524505675643e-06, "loss": 0.1068, "step": 5987 }, { "epoch": 1.9403758911211924, "grad_norm": 0.49319037795066833, "learning_rate": 2.9301600272266477e-06, "loss": 0.1159, "step": 5988 }, { "epoch": 1.940699935191186, "grad_norm": 0.4496191740036011, "learning_rate": 2.9285678572141075e-06, "loss": 0.1021, "step": 5989 }, { "epoch": 1.9410239792611796, "grad_norm": 0.4779997766017914, "learning_rate": 2.9269759407248053e-06, "loss": 0.1137, "step": 5990 }, { "epoch": 1.941348023331173, "grad_norm": 0.4509337246417999, "learning_rate": 2.925384277953583e-06, "loss": 0.1001, "step": 5991 }, { "epoch": 1.9416720674011665, "grad_norm": 0.43564677238464355, "learning_rate": 2.9237928690952405e-06, "loss": 0.1031, "step": 5992 }, { "epoch": 1.94199611147116, "grad_norm": 0.45528966188430786, "learning_rate": 2.9222017143445495e-06, "loss": 0.1063, "step": 5993 }, { "epoch": 1.9423201555411536, "grad_norm": 0.4465389549732208, "learning_rate": 2.9206108138962563e-06, "loss": 0.1057, "step": 5994 }, { "epoch": 1.9426441996111472, "grad_norm": 0.4120558798313141, "learning_rate": 2.9190201679450676e-06, "loss": 0.0918, "step": 5995 }, { "epoch": 1.9429682436811406, "grad_norm": 0.44868025183677673, "learning_rate": 2.9174297766856675e-06, "loss": 0.1052, "step": 5996 }, { "epoch": 1.943292287751134, "grad_norm": 0.41865020990371704, "learning_rate": 2.9158396403127e-06, "loss": 0.0996, "step": 5997 }, { "epoch": 1.9436163318211277, "grad_norm": 0.43086937069892883, "learning_rate": 2.9142497590207842e-06, "loss": 0.1026, "step": 5998 }, { "epoch": 1.9439403758911213, "grad_norm": 0.4507567286491394, "learning_rate": 2.912660133004507e-06, "loss": 0.0957, "step": 5999 }, { "epoch": 1.9442644199611148, "grad_norm": 0.4489535093307495, "learning_rate": 2.911070762458421e-06, "loss": 0.1018, "step": 6000 }, { "epoch": 1.9445884640311082, "grad_norm": 0.48330259323120117, "learning_rate": 2.9094816475770525e-06, "loss": 0.1149, "step": 6001 }, { "epoch": 1.9449125081011016, "grad_norm": 0.39563846588134766, "learning_rate": 2.9078927885548924e-06, "loss": 0.0866, "step": 6002 }, { "epoch": 1.9452365521710953, "grad_norm": 0.4709007740020752, "learning_rate": 2.9063041855864033e-06, "loss": 0.1099, "step": 6003 }, { "epoch": 1.945560596241089, "grad_norm": 0.44115516543388367, "learning_rate": 2.904715838866012e-06, "loss": 0.1023, "step": 6004 }, { "epoch": 1.9458846403110823, "grad_norm": 0.4623752534389496, "learning_rate": 2.903127748588117e-06, "loss": 0.1129, "step": 6005 }, { "epoch": 1.9462086843810757, "grad_norm": 0.44601622223854065, "learning_rate": 2.9015399149470873e-06, "loss": 0.1016, "step": 6006 }, { "epoch": 1.9465327284510694, "grad_norm": 0.452635794878006, "learning_rate": 2.8999523381372573e-06, "loss": 0.1, "step": 6007 }, { "epoch": 1.9468567725210628, "grad_norm": 0.4444234073162079, "learning_rate": 2.898365018352931e-06, "loss": 0.1016, "step": 6008 }, { "epoch": 1.9471808165910565, "grad_norm": 0.46949273347854614, "learning_rate": 2.8967779557883807e-06, "loss": 0.1122, "step": 6009 }, { "epoch": 1.9475048606610499, "grad_norm": 0.42208442091941833, "learning_rate": 2.895191150637848e-06, "loss": 0.094, "step": 6010 }, { "epoch": 1.9478289047310433, "grad_norm": 0.48604580760002136, "learning_rate": 2.8936046030955445e-06, "loss": 0.1214, "step": 6011 }, { "epoch": 1.948152948801037, "grad_norm": 0.44420698285102844, "learning_rate": 2.892018313355644e-06, "loss": 0.1021, "step": 6012 }, { "epoch": 1.9484769928710306, "grad_norm": 0.4467221796512604, "learning_rate": 2.8904322816122955e-06, "loss": 0.1022, "step": 6013 }, { "epoch": 1.948801036941024, "grad_norm": 0.4772205948829651, "learning_rate": 2.888846508059613e-06, "loss": 0.1177, "step": 6014 }, { "epoch": 1.9491250810110174, "grad_norm": 0.4955427646636963, "learning_rate": 2.88726099289168e-06, "loss": 0.1174, "step": 6015 }, { "epoch": 1.9494491250810109, "grad_norm": 0.45310765504837036, "learning_rate": 2.885675736302551e-06, "loss": 0.1051, "step": 6016 }, { "epoch": 1.9497731691510045, "grad_norm": 0.4388439357280731, "learning_rate": 2.8840907384862394e-06, "loss": 0.0971, "step": 6017 }, { "epoch": 1.9500972132209982, "grad_norm": 0.42228397727012634, "learning_rate": 2.882505999636742e-06, "loss": 0.1025, "step": 6018 }, { "epoch": 1.9504212572909916, "grad_norm": 0.4494069814682007, "learning_rate": 2.880921519948008e-06, "loss": 0.1012, "step": 6019 }, { "epoch": 1.950745301360985, "grad_norm": 0.44469448924064636, "learning_rate": 2.879337299613969e-06, "loss": 0.0992, "step": 6020 }, { "epoch": 1.9510693454309787, "grad_norm": 0.45649954676628113, "learning_rate": 2.8777533388285106e-06, "loss": 0.1121, "step": 6021 }, { "epoch": 1.9513933895009723, "grad_norm": 0.4270290434360504, "learning_rate": 2.876169637785503e-06, "loss": 0.0975, "step": 6022 }, { "epoch": 1.9517174335709657, "grad_norm": 0.4835495948791504, "learning_rate": 2.8745861966787697e-06, "loss": 0.1131, "step": 6023 }, { "epoch": 1.9520414776409591, "grad_norm": 0.4646935760974884, "learning_rate": 2.8730030157021106e-06, "loss": 0.1078, "step": 6024 }, { "epoch": 1.9523655217109526, "grad_norm": 0.4147730767726898, "learning_rate": 2.8714200950492925e-06, "loss": 0.0939, "step": 6025 }, { "epoch": 1.9526895657809462, "grad_norm": 0.4704440236091614, "learning_rate": 2.869837434914045e-06, "loss": 0.1048, "step": 6026 }, { "epoch": 1.9530136098509399, "grad_norm": 0.42922016978263855, "learning_rate": 2.8682550354900778e-06, "loss": 0.098, "step": 6027 }, { "epoch": 1.9533376539209333, "grad_norm": 0.43618518114089966, "learning_rate": 2.8666728969710555e-06, "loss": 0.1032, "step": 6028 }, { "epoch": 1.9536616979909267, "grad_norm": 0.4387398958206177, "learning_rate": 2.865091019550618e-06, "loss": 0.1027, "step": 6029 }, { "epoch": 1.9539857420609201, "grad_norm": 0.4201517403125763, "learning_rate": 2.863509403422373e-06, "loss": 0.0977, "step": 6030 }, { "epoch": 1.9543097861309138, "grad_norm": 0.47074347734451294, "learning_rate": 2.8619280487798935e-06, "loss": 0.1092, "step": 6031 }, { "epoch": 1.9546338302009074, "grad_norm": 0.45103365182876587, "learning_rate": 2.860346955816723e-06, "loss": 0.1096, "step": 6032 }, { "epoch": 1.9549578742709008, "grad_norm": 0.46009188890457153, "learning_rate": 2.8587661247263714e-06, "loss": 0.1035, "step": 6033 }, { "epoch": 1.9552819183408943, "grad_norm": 0.47794514894485474, "learning_rate": 2.8571855557023196e-06, "loss": 0.1082, "step": 6034 }, { "epoch": 1.955605962410888, "grad_norm": 0.4807623624801636, "learning_rate": 2.855605248938009e-06, "loss": 0.112, "step": 6035 }, { "epoch": 1.9559300064808816, "grad_norm": 0.45439618825912476, "learning_rate": 2.854025204626858e-06, "loss": 0.1027, "step": 6036 }, { "epoch": 1.956254050550875, "grad_norm": 0.47287946939468384, "learning_rate": 2.8524454229622466e-06, "loss": 0.1107, "step": 6037 }, { "epoch": 1.9565780946208684, "grad_norm": 0.4567338526248932, "learning_rate": 2.850865904137525e-06, "loss": 0.1075, "step": 6038 }, { "epoch": 1.9569021386908618, "grad_norm": 0.4594586193561554, "learning_rate": 2.8492866483460124e-06, "loss": 0.1077, "step": 6039 }, { "epoch": 1.9572261827608555, "grad_norm": 0.4383203983306885, "learning_rate": 2.8477076557809946e-06, "loss": 0.1063, "step": 6040 }, { "epoch": 1.9575502268308491, "grad_norm": 0.43546706438064575, "learning_rate": 2.8461289266357205e-06, "loss": 0.0968, "step": 6041 }, { "epoch": 1.9578742709008425, "grad_norm": 0.4216805100440979, "learning_rate": 2.8445504611034185e-06, "loss": 0.102, "step": 6042 }, { "epoch": 1.958198314970836, "grad_norm": 0.45357009768486023, "learning_rate": 2.84297225937727e-06, "loss": 0.1057, "step": 6043 }, { "epoch": 1.9585223590408296, "grad_norm": 0.4711031913757324, "learning_rate": 2.841394321650439e-06, "loss": 0.1173, "step": 6044 }, { "epoch": 1.958846403110823, "grad_norm": 0.4412153363227844, "learning_rate": 2.8398166481160437e-06, "loss": 0.1015, "step": 6045 }, { "epoch": 1.9591704471808167, "grad_norm": 0.489576518535614, "learning_rate": 2.83823923896718e-06, "loss": 0.1137, "step": 6046 }, { "epoch": 1.95949449125081, "grad_norm": 0.44831836223602295, "learning_rate": 2.8366620943969063e-06, "loss": 0.1029, "step": 6047 }, { "epoch": 1.9598185353208035, "grad_norm": 0.4388909339904785, "learning_rate": 2.8350852145982468e-06, "loss": 0.1024, "step": 6048 }, { "epoch": 1.9601425793907972, "grad_norm": 0.47623389959335327, "learning_rate": 2.833508599764202e-06, "loss": 0.1065, "step": 6049 }, { "epoch": 1.9604666234607908, "grad_norm": 0.4384922385215759, "learning_rate": 2.831932250087728e-06, "loss": 0.1003, "step": 6050 }, { "epoch": 1.9607906675307842, "grad_norm": 0.44177010655403137, "learning_rate": 2.830356165761762e-06, "loss": 0.1014, "step": 6051 }, { "epoch": 1.9611147116007777, "grad_norm": 0.4402632415294647, "learning_rate": 2.8287803469791946e-06, "loss": 0.0958, "step": 6052 }, { "epoch": 1.961438755670771, "grad_norm": 0.477048397064209, "learning_rate": 2.8272047939328943e-06, "loss": 0.1034, "step": 6053 }, { "epoch": 1.9617627997407647, "grad_norm": 0.4629821181297302, "learning_rate": 2.8256295068156938e-06, "loss": 0.1077, "step": 6054 }, { "epoch": 1.9620868438107584, "grad_norm": 0.43252432346343994, "learning_rate": 2.824054485820391e-06, "loss": 0.1023, "step": 6055 }, { "epoch": 1.9624108878807518, "grad_norm": 0.45714935660362244, "learning_rate": 2.8224797311397544e-06, "loss": 0.1075, "step": 6056 }, { "epoch": 1.9627349319507452, "grad_norm": 0.46026867628097534, "learning_rate": 2.820905242966519e-06, "loss": 0.1057, "step": 6057 }, { "epoch": 1.9630589760207389, "grad_norm": 0.44352877140045166, "learning_rate": 2.8193310214933887e-06, "loss": 0.1031, "step": 6058 }, { "epoch": 1.9633830200907323, "grad_norm": 0.4600834846496582, "learning_rate": 2.817757066913029e-06, "loss": 0.1107, "step": 6059 }, { "epoch": 1.963707064160726, "grad_norm": 0.4361191391944885, "learning_rate": 2.8161833794180783e-06, "loss": 0.1051, "step": 6060 }, { "epoch": 1.9640311082307194, "grad_norm": 0.44225600361824036, "learning_rate": 2.814609959201141e-06, "loss": 0.1049, "step": 6061 }, { "epoch": 1.9643551523007128, "grad_norm": 0.4521048069000244, "learning_rate": 2.8130368064547884e-06, "loss": 0.1102, "step": 6062 }, { "epoch": 1.9646791963707064, "grad_norm": 0.45793232321739197, "learning_rate": 2.811463921371559e-06, "loss": 0.1072, "step": 6063 }, { "epoch": 1.9650032404407, "grad_norm": 0.46674272418022156, "learning_rate": 2.809891304143961e-06, "loss": 0.1106, "step": 6064 }, { "epoch": 1.9653272845106935, "grad_norm": 0.43551090359687805, "learning_rate": 2.808318954964462e-06, "loss": 0.1027, "step": 6065 }, { "epoch": 1.965651328580687, "grad_norm": 0.4202669858932495, "learning_rate": 2.80674687402551e-06, "loss": 0.0938, "step": 6066 }, { "epoch": 1.9659753726506803, "grad_norm": 0.41163575649261475, "learning_rate": 2.8051750615195055e-06, "loss": 0.0948, "step": 6067 }, { "epoch": 1.966299416720674, "grad_norm": 0.4615885019302368, "learning_rate": 2.8036035176388264e-06, "loss": 0.1025, "step": 6068 }, { "epoch": 1.9666234607906676, "grad_norm": 0.42400792241096497, "learning_rate": 2.802032242575814e-06, "loss": 0.0936, "step": 6069 }, { "epoch": 1.966947504860661, "grad_norm": 0.44435328245162964, "learning_rate": 2.800461236522777e-06, "loss": 0.1079, "step": 6070 }, { "epoch": 1.9672715489306545, "grad_norm": 0.4586362838745117, "learning_rate": 2.7988904996719927e-06, "loss": 0.1123, "step": 6071 }, { "epoch": 1.9675955930006481, "grad_norm": 0.4277156591415405, "learning_rate": 2.7973200322157e-06, "loss": 0.0987, "step": 6072 }, { "epoch": 1.9679196370706418, "grad_norm": 0.46500012278556824, "learning_rate": 2.7957498343461154e-06, "loss": 0.1096, "step": 6073 }, { "epoch": 1.9682436811406352, "grad_norm": 0.4366726279258728, "learning_rate": 2.794179906255408e-06, "loss": 0.1029, "step": 6074 }, { "epoch": 1.9685677252106286, "grad_norm": 0.4122413992881775, "learning_rate": 2.7926102481357303e-06, "loss": 0.0965, "step": 6075 }, { "epoch": 1.968891769280622, "grad_norm": 0.4758448898792267, "learning_rate": 2.7910408601791873e-06, "loss": 0.1059, "step": 6076 }, { "epoch": 1.9692158133506157, "grad_norm": 0.4578683376312256, "learning_rate": 2.7894717425778585e-06, "loss": 0.1038, "step": 6077 }, { "epoch": 1.9695398574206093, "grad_norm": 0.43178144097328186, "learning_rate": 2.7879028955237887e-06, "loss": 0.1034, "step": 6078 }, { "epoch": 1.9698639014906028, "grad_norm": 0.4421790838241577, "learning_rate": 2.7863343192089893e-06, "loss": 0.1025, "step": 6079 }, { "epoch": 1.9701879455605962, "grad_norm": 0.4443241059780121, "learning_rate": 2.784766013825442e-06, "loss": 0.101, "step": 6080 }, { "epoch": 1.9705119896305896, "grad_norm": 0.4201869070529938, "learning_rate": 2.7831979795650848e-06, "loss": 0.0965, "step": 6081 }, { "epoch": 1.9708360337005832, "grad_norm": 0.43988552689552307, "learning_rate": 2.781630216619839e-06, "loss": 0.1052, "step": 6082 }, { "epoch": 1.971160077770577, "grad_norm": 0.4632330536842346, "learning_rate": 2.7800627251815772e-06, "loss": 0.1058, "step": 6083 }, { "epoch": 1.9714841218405703, "grad_norm": 0.4753658175468445, "learning_rate": 2.778495505442147e-06, "loss": 0.111, "step": 6084 }, { "epoch": 1.9718081659105637, "grad_norm": 0.45654088258743286, "learning_rate": 2.776928557593361e-06, "loss": 0.1087, "step": 6085 }, { "epoch": 1.9721322099805574, "grad_norm": 0.47004789113998413, "learning_rate": 2.7753618818269988e-06, "loss": 0.1097, "step": 6086 }, { "epoch": 1.972456254050551, "grad_norm": 0.4864579141139984, "learning_rate": 2.7737954783348066e-06, "loss": 0.1196, "step": 6087 }, { "epoch": 1.9727802981205445, "grad_norm": 0.4297422766685486, "learning_rate": 2.772229347308496e-06, "loss": 0.1035, "step": 6088 }, { "epoch": 1.9731043421905379, "grad_norm": 0.4059481918811798, "learning_rate": 2.770663488939749e-06, "loss": 0.0959, "step": 6089 }, { "epoch": 1.9734283862605313, "grad_norm": 0.49687182903289795, "learning_rate": 2.769097903420207e-06, "loss": 0.1258, "step": 6090 }, { "epoch": 1.973752430330525, "grad_norm": 0.4761006832122803, "learning_rate": 2.767532590941485e-06, "loss": 0.1155, "step": 6091 }, { "epoch": 1.9740764744005186, "grad_norm": 0.46504855155944824, "learning_rate": 2.7659675516951616e-06, "loss": 0.1086, "step": 6092 }, { "epoch": 1.974400518470512, "grad_norm": 0.451185017824173, "learning_rate": 2.7644027858727827e-06, "loss": 0.1068, "step": 6093 }, { "epoch": 1.9747245625405054, "grad_norm": 0.4202289581298828, "learning_rate": 2.7628382936658614e-06, "loss": 0.0974, "step": 6094 }, { "epoch": 1.975048606610499, "grad_norm": 0.49244967103004456, "learning_rate": 2.7612740752658775e-06, "loss": 0.1062, "step": 6095 }, { "epoch": 1.9753726506804925, "grad_norm": 0.45967769622802734, "learning_rate": 2.7597101308642694e-06, "loss": 0.1043, "step": 6096 }, { "epoch": 1.9756966947504861, "grad_norm": 0.4675883948802948, "learning_rate": 2.758146460652458e-06, "loss": 0.1126, "step": 6097 }, { "epoch": 1.9760207388204796, "grad_norm": 0.4642001986503601, "learning_rate": 2.756583064821815e-06, "loss": 0.118, "step": 6098 }, { "epoch": 1.976344782890473, "grad_norm": 0.4494239389896393, "learning_rate": 2.7550199435636864e-06, "loss": 0.1098, "step": 6099 }, { "epoch": 1.9766688269604666, "grad_norm": 0.47027140855789185, "learning_rate": 2.753457097069384e-06, "loss": 0.1068, "step": 6100 }, { "epoch": 1.9769928710304603, "grad_norm": 0.453249990940094, "learning_rate": 2.7518945255301852e-06, "loss": 0.1017, "step": 6101 }, { "epoch": 1.9773169151004537, "grad_norm": 0.4894610643386841, "learning_rate": 2.7503322291373346e-06, "loss": 0.1118, "step": 6102 }, { "epoch": 1.9776409591704471, "grad_norm": 0.4645687937736511, "learning_rate": 2.7487702080820366e-06, "loss": 0.1088, "step": 6103 }, { "epoch": 1.9779650032404406, "grad_norm": 0.44122567772865295, "learning_rate": 2.7472084625554763e-06, "loss": 0.1032, "step": 6104 }, { "epoch": 1.9782890473104342, "grad_norm": 0.47128430008888245, "learning_rate": 2.7456469927487863e-06, "loss": 0.1003, "step": 6105 }, { "epoch": 1.9786130913804278, "grad_norm": 0.44808393716812134, "learning_rate": 2.7440857988530855e-06, "loss": 0.102, "step": 6106 }, { "epoch": 1.9789371354504213, "grad_norm": 0.4369852542877197, "learning_rate": 2.7425248810594417e-06, "loss": 0.098, "step": 6107 }, { "epoch": 1.9792611795204147, "grad_norm": 0.4913753569126129, "learning_rate": 2.7409642395588983e-06, "loss": 0.1188, "step": 6108 }, { "epoch": 1.9795852235904083, "grad_norm": 0.41489270329475403, "learning_rate": 2.739403874542462e-06, "loss": 0.0952, "step": 6109 }, { "epoch": 1.9799092676604018, "grad_norm": 0.4610608220100403, "learning_rate": 2.7378437862011086e-06, "loss": 0.1057, "step": 6110 }, { "epoch": 1.9802333117303954, "grad_norm": 0.4758901000022888, "learning_rate": 2.736283974725778e-06, "loss": 0.1124, "step": 6111 }, { "epoch": 1.9805573558003888, "grad_norm": 0.446199506521225, "learning_rate": 2.7347244403073704e-06, "loss": 0.1026, "step": 6112 }, { "epoch": 1.9808813998703823, "grad_norm": 0.45175614953041077, "learning_rate": 2.7331651831367657e-06, "loss": 0.1018, "step": 6113 }, { "epoch": 1.981205443940376, "grad_norm": 0.48578208684921265, "learning_rate": 2.7316062034047953e-06, "loss": 0.1174, "step": 6114 }, { "epoch": 1.9815294880103695, "grad_norm": 0.4611910879611969, "learning_rate": 2.7300475013022666e-06, "loss": 0.1017, "step": 6115 }, { "epoch": 1.981853532080363, "grad_norm": 0.467991441488266, "learning_rate": 2.728489077019949e-06, "loss": 0.104, "step": 6116 }, { "epoch": 1.9821775761503564, "grad_norm": 0.458097368478775, "learning_rate": 2.726930930748578e-06, "loss": 0.0999, "step": 6117 }, { "epoch": 1.9825016202203498, "grad_norm": 0.4219837784767151, "learning_rate": 2.725373062678856e-06, "loss": 0.0932, "step": 6118 }, { "epoch": 1.9828256642903435, "grad_norm": 0.46422630548477173, "learning_rate": 2.7238154730014533e-06, "loss": 0.1109, "step": 6119 }, { "epoch": 1.983149708360337, "grad_norm": 0.45828792452812195, "learning_rate": 2.7222581619069994e-06, "loss": 0.1028, "step": 6120 }, { "epoch": 1.9834737524303305, "grad_norm": 0.45479437708854675, "learning_rate": 2.7207011295860962e-06, "loss": 0.105, "step": 6121 }, { "epoch": 1.983797796500324, "grad_norm": 0.4284836947917938, "learning_rate": 2.7191443762293096e-06, "loss": 0.0948, "step": 6122 }, { "epoch": 1.9841218405703176, "grad_norm": 0.4283240735530853, "learning_rate": 2.717587902027171e-06, "loss": 0.0998, "step": 6123 }, { "epoch": 1.9844458846403112, "grad_norm": 0.44493427872657776, "learning_rate": 2.716031707170177e-06, "loss": 0.1057, "step": 6124 }, { "epoch": 1.9847699287103047, "grad_norm": 0.4590212404727936, "learning_rate": 2.714475791848792e-06, "loss": 0.11, "step": 6125 }, { "epoch": 1.985093972780298, "grad_norm": 0.45337870717048645, "learning_rate": 2.712920156253447e-06, "loss": 0.1053, "step": 6126 }, { "epoch": 1.9854180168502915, "grad_norm": 0.4287721812725067, "learning_rate": 2.7113648005745295e-06, "loss": 0.0961, "step": 6127 }, { "epoch": 1.9857420609202852, "grad_norm": 0.4195595383644104, "learning_rate": 2.7098097250024093e-06, "loss": 0.1022, "step": 6128 }, { "epoch": 1.9860661049902788, "grad_norm": 0.5380899906158447, "learning_rate": 2.708254929727406e-06, "loss": 0.1222, "step": 6129 }, { "epoch": 1.9863901490602722, "grad_norm": 0.42683035135269165, "learning_rate": 2.706700414939813e-06, "loss": 0.0998, "step": 6130 }, { "epoch": 1.9867141931302656, "grad_norm": 0.43054234981536865, "learning_rate": 2.7051461808298885e-06, "loss": 0.0953, "step": 6131 }, { "epoch": 1.987038237200259, "grad_norm": 0.4568624198436737, "learning_rate": 2.703592227587856e-06, "loss": 0.1085, "step": 6132 }, { "epoch": 1.9873622812702527, "grad_norm": 0.4255097806453705, "learning_rate": 2.7020385554039055e-06, "loss": 0.0993, "step": 6133 }, { "epoch": 1.9876863253402464, "grad_norm": 0.4464300274848938, "learning_rate": 2.700485164468185e-06, "loss": 0.096, "step": 6134 }, { "epoch": 1.9880103694102398, "grad_norm": 0.4486854672431946, "learning_rate": 2.6989320549708244e-06, "loss": 0.1043, "step": 6135 }, { "epoch": 1.9883344134802332, "grad_norm": 0.4583606421947479, "learning_rate": 2.6973792271019005e-06, "loss": 0.1089, "step": 6136 }, { "epoch": 1.9886584575502269, "grad_norm": 0.45357078313827515, "learning_rate": 2.695826681051471e-06, "loss": 0.1059, "step": 6137 }, { "epoch": 1.9889825016202205, "grad_norm": 0.41597336530685425, "learning_rate": 2.6942744170095486e-06, "loss": 0.0982, "step": 6138 }, { "epoch": 1.989306545690214, "grad_norm": 0.42208510637283325, "learning_rate": 2.6927224351661157e-06, "loss": 0.1004, "step": 6139 }, { "epoch": 1.9896305897602073, "grad_norm": 0.45657432079315186, "learning_rate": 2.691170735711121e-06, "loss": 0.1065, "step": 6140 }, { "epoch": 1.9899546338302008, "grad_norm": 0.43733304738998413, "learning_rate": 2.6896193188344766e-06, "loss": 0.0963, "step": 6141 }, { "epoch": 1.9902786779001944, "grad_norm": 0.41876494884490967, "learning_rate": 2.688068184726064e-06, "loss": 0.0967, "step": 6142 }, { "epoch": 1.990602721970188, "grad_norm": 0.47125309705734253, "learning_rate": 2.686517333575722e-06, "loss": 0.1037, "step": 6143 }, { "epoch": 1.9909267660401815, "grad_norm": 0.41714105010032654, "learning_rate": 2.6849667655732623e-06, "loss": 0.0979, "step": 6144 }, { "epoch": 1.991250810110175, "grad_norm": 0.4522852599620819, "learning_rate": 2.683416480908459e-06, "loss": 0.1059, "step": 6145 }, { "epoch": 1.9915748541801686, "grad_norm": 0.4258798658847809, "learning_rate": 2.6818664797710526e-06, "loss": 0.1001, "step": 6146 }, { "epoch": 1.991898898250162, "grad_norm": 0.44886261224746704, "learning_rate": 2.680316762350747e-06, "loss": 0.1011, "step": 6147 }, { "epoch": 1.9922229423201556, "grad_norm": 0.4444274604320526, "learning_rate": 2.678767328837214e-06, "loss": 0.1019, "step": 6148 }, { "epoch": 1.992546986390149, "grad_norm": 0.45031675696372986, "learning_rate": 2.6772181794200885e-06, "loss": 0.094, "step": 6149 }, { "epoch": 1.9928710304601425, "grad_norm": 0.4273860454559326, "learning_rate": 2.6756693142889733e-06, "loss": 0.0907, "step": 6150 }, { "epoch": 1.9931950745301361, "grad_norm": 0.41163426637649536, "learning_rate": 2.6741207336334312e-06, "loss": 0.0935, "step": 6151 }, { "epoch": 1.9935191186001298, "grad_norm": 0.4350992441177368, "learning_rate": 2.6725724376429953e-06, "loss": 0.0981, "step": 6152 }, { "epoch": 1.9938431626701232, "grad_norm": 0.4312846064567566, "learning_rate": 2.671024426507161e-06, "loss": 0.1033, "step": 6153 }, { "epoch": 1.9941672067401166, "grad_norm": 0.4249867796897888, "learning_rate": 2.669476700415391e-06, "loss": 0.0983, "step": 6154 }, { "epoch": 1.99449125081011, "grad_norm": 0.4431942403316498, "learning_rate": 2.667929259557112e-06, "loss": 0.1071, "step": 6155 }, { "epoch": 1.9948152948801037, "grad_norm": 0.47021448612213135, "learning_rate": 2.666382104121715e-06, "loss": 0.1103, "step": 6156 }, { "epoch": 1.9951393389500973, "grad_norm": 0.4780392348766327, "learning_rate": 2.6648352342985596e-06, "loss": 0.112, "step": 6157 }, { "epoch": 1.9954633830200907, "grad_norm": 0.4610086977481842, "learning_rate": 2.6632886502769617e-06, "loss": 0.1091, "step": 6158 }, { "epoch": 1.9957874270900842, "grad_norm": 0.4752705991268158, "learning_rate": 2.661742352246215e-06, "loss": 0.1137, "step": 6159 }, { "epoch": 1.9961114711600778, "grad_norm": 0.4885751008987427, "learning_rate": 2.6601963403955667e-06, "loss": 0.1137, "step": 6160 }, { "epoch": 1.9964355152300715, "grad_norm": 0.4374214708805084, "learning_rate": 2.6586506149142355e-06, "loss": 0.1007, "step": 6161 }, { "epoch": 1.9967595593000649, "grad_norm": 0.45261216163635254, "learning_rate": 2.6571051759914023e-06, "loss": 0.1001, "step": 6162 }, { "epoch": 1.9970836033700583, "grad_norm": 0.4949720799922943, "learning_rate": 2.6555600238162153e-06, "loss": 0.1148, "step": 6163 }, { "epoch": 1.9974076474400517, "grad_norm": 0.46937358379364014, "learning_rate": 2.6540151585777875e-06, "loss": 0.109, "step": 6164 }, { "epoch": 1.9977316915100454, "grad_norm": 0.4581141471862793, "learning_rate": 2.652470580465189e-06, "loss": 0.1087, "step": 6165 }, { "epoch": 1.998055735580039, "grad_norm": 0.48938125371932983, "learning_rate": 2.65092628966747e-06, "loss": 0.1101, "step": 6166 }, { "epoch": 1.9983797796500324, "grad_norm": 0.4437830448150635, "learning_rate": 2.649382286373628e-06, "loss": 0.1016, "step": 6167 }, { "epoch": 1.9987038237200259, "grad_norm": 0.45508936047554016, "learning_rate": 2.647838570772642e-06, "loss": 0.0992, "step": 6168 }, { "epoch": 1.9990278677900193, "grad_norm": 0.46347787976264954, "learning_rate": 2.6462951430534434e-06, "loss": 0.1094, "step": 6169 }, { "epoch": 1.999351911860013, "grad_norm": 0.43078288435935974, "learning_rate": 2.6447520034049323e-06, "loss": 0.1063, "step": 6170 }, { "epoch": 1.9996759559300066, "grad_norm": 0.43163982033729553, "learning_rate": 2.6432091520159764e-06, "loss": 0.0985, "step": 6171 }, { "epoch": 2.0, "grad_norm": 0.4776883125305176, "learning_rate": 2.6416665890754044e-06, "loss": 0.1241, "step": 6172 }, { "epoch": 2.0003240440699934, "grad_norm": 0.39515194296836853, "learning_rate": 2.6401243147720136e-06, "loss": 0.0775, "step": 6173 }, { "epoch": 2.000648088139987, "grad_norm": 0.3835315704345703, "learning_rate": 2.6385823292945593e-06, "loss": 0.0777, "step": 6174 }, { "epoch": 2.0009721322099807, "grad_norm": 0.3732658922672272, "learning_rate": 2.6370406328317676e-06, "loss": 0.0741, "step": 6175 }, { "epoch": 2.001296176279974, "grad_norm": 0.36997994780540466, "learning_rate": 2.635499225572327e-06, "loss": 0.0743, "step": 6176 }, { "epoch": 2.0016202203499676, "grad_norm": 0.3874099552631378, "learning_rate": 2.6339581077048914e-06, "loss": 0.0782, "step": 6177 }, { "epoch": 2.001944264419961, "grad_norm": 0.3987525999546051, "learning_rate": 2.632417279418078e-06, "loss": 0.0803, "step": 6178 }, { "epoch": 2.002268308489955, "grad_norm": 0.36920928955078125, "learning_rate": 2.6308767409004697e-06, "loss": 0.0759, "step": 6179 }, { "epoch": 2.0025923525599483, "grad_norm": 0.38145503401756287, "learning_rate": 2.6293364923406138e-06, "loss": 0.0764, "step": 6180 }, { "epoch": 2.0029163966299417, "grad_norm": 0.37188780307769775, "learning_rate": 2.6277965339270234e-06, "loss": 0.0701, "step": 6181 }, { "epoch": 2.003240440699935, "grad_norm": 0.3832031488418579, "learning_rate": 2.626256865848168e-06, "loss": 0.0705, "step": 6182 }, { "epoch": 2.0035644847699285, "grad_norm": 0.3867064118385315, "learning_rate": 2.6247174882924974e-06, "loss": 0.0735, "step": 6183 }, { "epoch": 2.0038885288399224, "grad_norm": 0.4124574065208435, "learning_rate": 2.623178401448409e-06, "loss": 0.0758, "step": 6184 }, { "epoch": 2.004212572909916, "grad_norm": 0.41653987765312195, "learning_rate": 2.6216396055042747e-06, "loss": 0.0709, "step": 6185 }, { "epoch": 2.0045366169799093, "grad_norm": 0.38831084966659546, "learning_rate": 2.620101100648431e-06, "loss": 0.0668, "step": 6186 }, { "epoch": 2.0048606610499027, "grad_norm": 0.41165921092033386, "learning_rate": 2.618562887069169e-06, "loss": 0.0748, "step": 6187 }, { "epoch": 2.005184705119896, "grad_norm": 0.3913106322288513, "learning_rate": 2.6170249649547595e-06, "loss": 0.0678, "step": 6188 }, { "epoch": 2.00550874918989, "grad_norm": 0.4491140842437744, "learning_rate": 2.615487334493422e-06, "loss": 0.0778, "step": 6189 }, { "epoch": 2.0058327932598834, "grad_norm": 0.4365970194339752, "learning_rate": 2.613949995873354e-06, "loss": 0.0789, "step": 6190 }, { "epoch": 2.006156837329877, "grad_norm": 0.4461490511894226, "learning_rate": 2.6124129492827045e-06, "loss": 0.0764, "step": 6191 }, { "epoch": 2.0064808813998702, "grad_norm": 0.4379573166370392, "learning_rate": 2.6108761949095996e-06, "loss": 0.0725, "step": 6192 }, { "epoch": 2.006804925469864, "grad_norm": 0.46649301052093506, "learning_rate": 2.609339732942119e-06, "loss": 0.0774, "step": 6193 }, { "epoch": 2.0071289695398575, "grad_norm": 0.42365261912345886, "learning_rate": 2.6078035635683106e-06, "loss": 0.068, "step": 6194 }, { "epoch": 2.007453013609851, "grad_norm": 0.4476017355918884, "learning_rate": 2.6062676869761905e-06, "loss": 0.0751, "step": 6195 }, { "epoch": 2.0077770576798444, "grad_norm": 0.48353251814842224, "learning_rate": 2.6047321033537276e-06, "loss": 0.0825, "step": 6196 }, { "epoch": 2.008101101749838, "grad_norm": 0.46962183713912964, "learning_rate": 2.603196812888872e-06, "loss": 0.0828, "step": 6197 }, { "epoch": 2.0084251458198317, "grad_norm": 0.46140119433403015, "learning_rate": 2.601661815769521e-06, "loss": 0.0777, "step": 6198 }, { "epoch": 2.008749189889825, "grad_norm": 0.47431838512420654, "learning_rate": 2.600127112183547e-06, "loss": 0.0718, "step": 6199 }, { "epoch": 2.0090732339598185, "grad_norm": 0.4523472487926483, "learning_rate": 2.598592702318781e-06, "loss": 0.0751, "step": 6200 }, { "epoch": 2.009397278029812, "grad_norm": 0.4582405686378479, "learning_rate": 2.597058586363021e-06, "loss": 0.0745, "step": 6201 }, { "epoch": 2.0097213220998054, "grad_norm": 0.4842430651187897, "learning_rate": 2.5955247645040282e-06, "loss": 0.0776, "step": 6202 }, { "epoch": 2.0100453661697992, "grad_norm": 0.5084421634674072, "learning_rate": 2.593991236929526e-06, "loss": 0.0839, "step": 6203 }, { "epoch": 2.0103694102397927, "grad_norm": 0.4512103199958801, "learning_rate": 2.5924580038272056e-06, "loss": 0.0704, "step": 6204 }, { "epoch": 2.010693454309786, "grad_norm": 0.41768187284469604, "learning_rate": 2.5909250653847205e-06, "loss": 0.0707, "step": 6205 }, { "epoch": 2.0110174983797795, "grad_norm": 0.43947720527648926, "learning_rate": 2.589392421789684e-06, "loss": 0.0699, "step": 6206 }, { "epoch": 2.0113415424497734, "grad_norm": 0.453520804643631, "learning_rate": 2.5878600732296778e-06, "loss": 0.0733, "step": 6207 }, { "epoch": 2.011665586519767, "grad_norm": 0.4499374032020569, "learning_rate": 2.5863280198922474e-06, "loss": 0.0722, "step": 6208 }, { "epoch": 2.01198963058976, "grad_norm": 0.4869464039802551, "learning_rate": 2.5847962619649015e-06, "loss": 0.0813, "step": 6209 }, { "epoch": 2.0123136746597536, "grad_norm": 0.4863876700401306, "learning_rate": 2.583264799635114e-06, "loss": 0.0797, "step": 6210 }, { "epoch": 2.012637718729747, "grad_norm": 0.43555524945259094, "learning_rate": 2.5817336330903154e-06, "loss": 0.0701, "step": 6211 }, { "epoch": 2.012961762799741, "grad_norm": 0.4633532762527466, "learning_rate": 2.580202762517914e-06, "loss": 0.0726, "step": 6212 }, { "epoch": 2.0132858068697344, "grad_norm": 0.4807261526584625, "learning_rate": 2.578672188105264e-06, "loss": 0.0772, "step": 6213 }, { "epoch": 2.0136098509397278, "grad_norm": 0.5036196112632751, "learning_rate": 2.577141910039702e-06, "loss": 0.0775, "step": 6214 }, { "epoch": 2.013933895009721, "grad_norm": 0.47016188502311707, "learning_rate": 2.5756119285085133e-06, "loss": 0.0789, "step": 6215 }, { "epoch": 2.0142579390797146, "grad_norm": 0.43279916048049927, "learning_rate": 2.5740822436989556e-06, "loss": 0.0735, "step": 6216 }, { "epoch": 2.0145819831497085, "grad_norm": 0.41868194937705994, "learning_rate": 2.572552855798247e-06, "loss": 0.0695, "step": 6217 }, { "epoch": 2.014906027219702, "grad_norm": 0.4771308898925781, "learning_rate": 2.5710237649935665e-06, "loss": 0.0761, "step": 6218 }, { "epoch": 2.0152300712896953, "grad_norm": 0.4490995407104492, "learning_rate": 2.5694949714720665e-06, "loss": 0.0737, "step": 6219 }, { "epoch": 2.0155541153596888, "grad_norm": 0.48034578561782837, "learning_rate": 2.567966475420849e-06, "loss": 0.0759, "step": 6220 }, { "epoch": 2.0158781594296826, "grad_norm": 0.49473416805267334, "learning_rate": 2.5664382770269945e-06, "loss": 0.0853, "step": 6221 }, { "epoch": 2.016202203499676, "grad_norm": 0.47211727499961853, "learning_rate": 2.5649103764775328e-06, "loss": 0.0663, "step": 6222 }, { "epoch": 2.0165262475696695, "grad_norm": 0.4685417413711548, "learning_rate": 2.5633827739594706e-06, "loss": 0.0789, "step": 6223 }, { "epoch": 2.016850291639663, "grad_norm": 0.44643479585647583, "learning_rate": 2.5618554696597676e-06, "loss": 0.0772, "step": 6224 }, { "epoch": 2.0171743357096563, "grad_norm": 0.42165714502334595, "learning_rate": 2.5603284637653517e-06, "loss": 0.0696, "step": 6225 }, { "epoch": 2.01749837977965, "grad_norm": 0.4879651665687561, "learning_rate": 2.558801756463114e-06, "loss": 0.0845, "step": 6226 }, { "epoch": 2.0178224238496436, "grad_norm": 0.4335845112800598, "learning_rate": 2.5572753479399094e-06, "loss": 0.0725, "step": 6227 }, { "epoch": 2.018146467919637, "grad_norm": 0.4804597795009613, "learning_rate": 2.5557492383825557e-06, "loss": 0.0852, "step": 6228 }, { "epoch": 2.0184705119896305, "grad_norm": 0.41719064116477966, "learning_rate": 2.554223427977831e-06, "loss": 0.0672, "step": 6229 }, { "epoch": 2.0187945560596243, "grad_norm": 0.4422290027141571, "learning_rate": 2.552697916912482e-06, "loss": 0.0738, "step": 6230 }, { "epoch": 2.0191186001296177, "grad_norm": 0.4339218735694885, "learning_rate": 2.5511727053732173e-06, "loss": 0.0734, "step": 6231 }, { "epoch": 2.019442644199611, "grad_norm": 0.4676489233970642, "learning_rate": 2.5496477935467057e-06, "loss": 0.0764, "step": 6232 }, { "epoch": 2.0197666882696046, "grad_norm": 0.45664387941360474, "learning_rate": 2.548123181619583e-06, "loss": 0.0817, "step": 6233 }, { "epoch": 2.020090732339598, "grad_norm": 0.47742414474487305, "learning_rate": 2.5465988697784473e-06, "loss": 0.0734, "step": 6234 }, { "epoch": 2.020414776409592, "grad_norm": 0.464341938495636, "learning_rate": 2.5450748582098592e-06, "loss": 0.0734, "step": 6235 }, { "epoch": 2.0207388204795853, "grad_norm": 0.4464026987552643, "learning_rate": 2.543551147100345e-06, "loss": 0.0749, "step": 6236 }, { "epoch": 2.0210628645495787, "grad_norm": 0.4652882218360901, "learning_rate": 2.5420277366363875e-06, "loss": 0.0795, "step": 6237 }, { "epoch": 2.021386908619572, "grad_norm": 0.4414404332637787, "learning_rate": 2.540504627004441e-06, "loss": 0.0731, "step": 6238 }, { "epoch": 2.0217109526895656, "grad_norm": 0.41924309730529785, "learning_rate": 2.5389818183909176e-06, "loss": 0.0667, "step": 6239 }, { "epoch": 2.0220349967595594, "grad_norm": 0.4610547125339508, "learning_rate": 2.5374593109821955e-06, "loss": 0.0796, "step": 6240 }, { "epoch": 2.022359040829553, "grad_norm": 0.45951277017593384, "learning_rate": 2.5359371049646164e-06, "loss": 0.0722, "step": 6241 }, { "epoch": 2.0226830848995463, "grad_norm": 0.46303337812423706, "learning_rate": 2.534415200524477e-06, "loss": 0.0734, "step": 6242 }, { "epoch": 2.0230071289695397, "grad_norm": 0.4553036689758301, "learning_rate": 2.532893597848053e-06, "loss": 0.0741, "step": 6243 }, { "epoch": 2.0233311730395336, "grad_norm": 0.4600156247615814, "learning_rate": 2.5313722971215655e-06, "loss": 0.0743, "step": 6244 }, { "epoch": 2.023655217109527, "grad_norm": 0.4609796404838562, "learning_rate": 2.529851298531214e-06, "loss": 0.074, "step": 6245 }, { "epoch": 2.0239792611795204, "grad_norm": 0.4632244408130646, "learning_rate": 2.5283306022631493e-06, "loss": 0.0763, "step": 6246 }, { "epoch": 2.024303305249514, "grad_norm": 0.4586898684501648, "learning_rate": 2.5268102085034906e-06, "loss": 0.0738, "step": 6247 }, { "epoch": 2.0246273493195073, "grad_norm": 0.4696769118309021, "learning_rate": 2.5252901174383203e-06, "loss": 0.0742, "step": 6248 }, { "epoch": 2.024951393389501, "grad_norm": 0.7212674617767334, "learning_rate": 2.523770329253683e-06, "loss": 0.0723, "step": 6249 }, { "epoch": 2.0252754374594946, "grad_norm": 0.4639633893966675, "learning_rate": 2.5222508441355875e-06, "loss": 0.0738, "step": 6250 }, { "epoch": 2.025599481529488, "grad_norm": 0.41529393196105957, "learning_rate": 2.520731662269998e-06, "loss": 0.0631, "step": 6251 }, { "epoch": 2.0259235255994814, "grad_norm": 0.44837164878845215, "learning_rate": 2.519212783842856e-06, "loss": 0.0691, "step": 6252 }, { "epoch": 2.026247569669475, "grad_norm": 0.46720263361930847, "learning_rate": 2.5176942090400512e-06, "loss": 0.0754, "step": 6253 }, { "epoch": 2.0265716137394687, "grad_norm": 0.48482298851013184, "learning_rate": 2.5161759380474448e-06, "loss": 0.0709, "step": 6254 }, { "epoch": 2.026895657809462, "grad_norm": 0.46289435029029846, "learning_rate": 2.514657971050858e-06, "loss": 0.0767, "step": 6255 }, { "epoch": 2.0272197018794555, "grad_norm": 0.4710228741168976, "learning_rate": 2.513140308236076e-06, "loss": 0.0734, "step": 6256 }, { "epoch": 2.027543745949449, "grad_norm": 0.48888158798217773, "learning_rate": 2.511622949788845e-06, "loss": 0.077, "step": 6257 }, { "epoch": 2.027867790019443, "grad_norm": 0.4653327763080597, "learning_rate": 2.5101058958948766e-06, "loss": 0.0776, "step": 6258 }, { "epoch": 2.0281918340894363, "grad_norm": 0.4562969207763672, "learning_rate": 2.5085891467398433e-06, "loss": 0.0721, "step": 6259 }, { "epoch": 2.0285158781594297, "grad_norm": 0.4664032459259033, "learning_rate": 2.5070727025093785e-06, "loss": 0.0683, "step": 6260 }, { "epoch": 2.028839922229423, "grad_norm": 0.49336710572242737, "learning_rate": 2.505556563389081e-06, "loss": 0.0775, "step": 6261 }, { "epoch": 2.0291639662994165, "grad_norm": 0.4181060791015625, "learning_rate": 2.5040407295645126e-06, "loss": 0.0662, "step": 6262 }, { "epoch": 2.0294880103694104, "grad_norm": 0.4688979685306549, "learning_rate": 2.5025252012211955e-06, "loss": 0.0757, "step": 6263 }, { "epoch": 2.029812054439404, "grad_norm": 0.5041515827178955, "learning_rate": 2.5010099785446166e-06, "loss": 0.0806, "step": 6264 }, { "epoch": 2.0301360985093972, "grad_norm": 0.4415440559387207, "learning_rate": 2.4994950617202268e-06, "loss": 0.0715, "step": 6265 }, { "epoch": 2.0304601425793907, "grad_norm": 0.4888792037963867, "learning_rate": 2.49798045093343e-06, "loss": 0.0786, "step": 6266 }, { "epoch": 2.0307841866493845, "grad_norm": 0.43033257126808167, "learning_rate": 2.4964661463696087e-06, "loss": 0.0708, "step": 6267 }, { "epoch": 2.031108230719378, "grad_norm": 0.4612560272216797, "learning_rate": 2.494952148214093e-06, "loss": 0.0763, "step": 6268 }, { "epoch": 2.0314322747893714, "grad_norm": 0.4671456515789032, "learning_rate": 2.493438456652184e-06, "loss": 0.0794, "step": 6269 }, { "epoch": 2.031756318859365, "grad_norm": 0.5146381855010986, "learning_rate": 2.4919250718691424e-06, "loss": 0.0784, "step": 6270 }, { "epoch": 2.0320803629293582, "grad_norm": 0.44912979006767273, "learning_rate": 2.4904119940501914e-06, "loss": 0.0678, "step": 6271 }, { "epoch": 2.032404406999352, "grad_norm": 0.506953775882721, "learning_rate": 2.4888992233805205e-06, "loss": 0.0795, "step": 6272 }, { "epoch": 2.0327284510693455, "grad_norm": 0.5138766169548035, "learning_rate": 2.4873867600452705e-06, "loss": 0.0826, "step": 6273 }, { "epoch": 2.033052495139339, "grad_norm": 0.4463346600532532, "learning_rate": 2.4858746042295616e-06, "loss": 0.0708, "step": 6274 }, { "epoch": 2.0333765392093324, "grad_norm": 0.5110496878623962, "learning_rate": 2.4843627561184587e-06, "loss": 0.0803, "step": 6275 }, { "epoch": 2.033700583279326, "grad_norm": 0.4695765972137451, "learning_rate": 2.4828512158970042e-06, "loss": 0.0727, "step": 6276 }, { "epoch": 2.0340246273493197, "grad_norm": 0.47844061255455017, "learning_rate": 2.481339983750192e-06, "loss": 0.0775, "step": 6277 }, { "epoch": 2.034348671419313, "grad_norm": 0.4744821786880493, "learning_rate": 2.4798290598629826e-06, "loss": 0.0765, "step": 6278 }, { "epoch": 2.0346727154893065, "grad_norm": 0.45612332224845886, "learning_rate": 2.4783184444202995e-06, "loss": 0.0782, "step": 6279 }, { "epoch": 2.0349967595593, "grad_norm": 0.4476688504219055, "learning_rate": 2.476808137607027e-06, "loss": 0.0703, "step": 6280 }, { "epoch": 2.035320803629294, "grad_norm": 0.4483274817466736, "learning_rate": 2.4752981396080138e-06, "loss": 0.0729, "step": 6281 }, { "epoch": 2.035644847699287, "grad_norm": 0.4268377423286438, "learning_rate": 2.473788450608064e-06, "loss": 0.0659, "step": 6282 }, { "epoch": 2.0359688917692806, "grad_norm": 0.4591849446296692, "learning_rate": 2.472279070791955e-06, "loss": 0.0711, "step": 6283 }, { "epoch": 2.036292935839274, "grad_norm": 0.4683074355125427, "learning_rate": 2.470770000344417e-06, "loss": 0.0763, "step": 6284 }, { "epoch": 2.0366169799092675, "grad_norm": 0.46519917249679565, "learning_rate": 2.4692612394501454e-06, "loss": 0.0749, "step": 6285 }, { "epoch": 2.0369410239792614, "grad_norm": 0.4236977994441986, "learning_rate": 2.4677527882937986e-06, "loss": 0.068, "step": 6286 }, { "epoch": 2.037265068049255, "grad_norm": 0.45990490913391113, "learning_rate": 2.4662446470599967e-06, "loss": 0.0759, "step": 6287 }, { "epoch": 2.037589112119248, "grad_norm": 0.4591306149959564, "learning_rate": 2.4647368159333207e-06, "loss": 0.0733, "step": 6288 }, { "epoch": 2.0379131561892416, "grad_norm": 0.4575057625770569, "learning_rate": 2.4632292950983156e-06, "loss": 0.0678, "step": 6289 }, { "epoch": 2.038237200259235, "grad_norm": 0.42534708976745605, "learning_rate": 2.461722084739489e-06, "loss": 0.07, "step": 6290 }, { "epoch": 2.038561244329229, "grad_norm": 0.4384348690509796, "learning_rate": 2.460215185041305e-06, "loss": 0.0706, "step": 6291 }, { "epoch": 2.0388852883992223, "grad_norm": 0.5067781805992126, "learning_rate": 2.4587085961881947e-06, "loss": 0.0728, "step": 6292 }, { "epoch": 2.0392093324692158, "grad_norm": 0.44250717759132385, "learning_rate": 2.4572023183645512e-06, "loss": 0.0666, "step": 6293 }, { "epoch": 2.039533376539209, "grad_norm": 0.4823816120624542, "learning_rate": 2.455696351754727e-06, "loss": 0.0797, "step": 6294 }, { "epoch": 2.039857420609203, "grad_norm": 0.4811875820159912, "learning_rate": 2.4541906965430395e-06, "loss": 0.0763, "step": 6295 }, { "epoch": 2.0401814646791965, "grad_norm": 0.475002646446228, "learning_rate": 2.452685352913767e-06, "loss": 0.0724, "step": 6296 }, { "epoch": 2.04050550874919, "grad_norm": 0.5020356774330139, "learning_rate": 2.451180321051143e-06, "loss": 0.0807, "step": 6297 }, { "epoch": 2.0408295528191833, "grad_norm": 0.4758034944534302, "learning_rate": 2.449675601139378e-06, "loss": 0.0749, "step": 6298 }, { "epoch": 2.0411535968891767, "grad_norm": 0.45018815994262695, "learning_rate": 2.448171193362628e-06, "loss": 0.0684, "step": 6299 }, { "epoch": 2.0414776409591706, "grad_norm": 0.46852201223373413, "learning_rate": 2.44666709790502e-06, "loss": 0.0732, "step": 6300 }, { "epoch": 2.041801685029164, "grad_norm": 0.4619670510292053, "learning_rate": 2.4451633149506416e-06, "loss": 0.0728, "step": 6301 }, { "epoch": 2.0421257290991575, "grad_norm": 0.46022775769233704, "learning_rate": 2.4436598446835404e-06, "loss": 0.0722, "step": 6302 }, { "epoch": 2.042449773169151, "grad_norm": 0.45504963397979736, "learning_rate": 2.442156687287729e-06, "loss": 0.0704, "step": 6303 }, { "epoch": 2.0427738172391443, "grad_norm": 0.5202746391296387, "learning_rate": 2.4406538429471733e-06, "loss": 0.0739, "step": 6304 }, { "epoch": 2.043097861309138, "grad_norm": 0.4696272611618042, "learning_rate": 2.4391513118458145e-06, "loss": 0.072, "step": 6305 }, { "epoch": 2.0434219053791316, "grad_norm": 0.46362751722335815, "learning_rate": 2.4376490941675403e-06, "loss": 0.0702, "step": 6306 }, { "epoch": 2.043745949449125, "grad_norm": 0.5203388333320618, "learning_rate": 2.436147190096216e-06, "loss": 0.0815, "step": 6307 }, { "epoch": 2.0440699935191184, "grad_norm": 0.4749559164047241, "learning_rate": 2.434645599815653e-06, "loss": 0.0705, "step": 6308 }, { "epoch": 2.0443940375891123, "grad_norm": 0.4685365855693817, "learning_rate": 2.4331443235096343e-06, "loss": 0.0757, "step": 6309 }, { "epoch": 2.0447180816591057, "grad_norm": 0.4908275902271271, "learning_rate": 2.431643361361901e-06, "loss": 0.0748, "step": 6310 }, { "epoch": 2.045042125729099, "grad_norm": 0.48974791169166565, "learning_rate": 2.4301427135561572e-06, "loss": 0.0761, "step": 6311 }, { "epoch": 2.0453661697990926, "grad_norm": 0.4484266936779022, "learning_rate": 2.4286423802760683e-06, "loss": 0.0713, "step": 6312 }, { "epoch": 2.045690213869086, "grad_norm": 0.4263099431991577, "learning_rate": 2.4271423617052564e-06, "loss": 0.0691, "step": 6313 }, { "epoch": 2.04601425793908, "grad_norm": 0.4288794696331024, "learning_rate": 2.4256426580273156e-06, "loss": 0.07, "step": 6314 }, { "epoch": 2.0463383020090733, "grad_norm": 0.48074692487716675, "learning_rate": 2.42414326942579e-06, "loss": 0.075, "step": 6315 }, { "epoch": 2.0466623460790667, "grad_norm": 0.46625542640686035, "learning_rate": 2.422644196084192e-06, "loss": 0.0682, "step": 6316 }, { "epoch": 2.04698639014906, "grad_norm": 0.47445952892303467, "learning_rate": 2.4211454381859935e-06, "loss": 0.0764, "step": 6317 }, { "epoch": 2.047310434219054, "grad_norm": 0.48692288994789124, "learning_rate": 2.419646995914628e-06, "loss": 0.0768, "step": 6318 }, { "epoch": 2.0476344782890474, "grad_norm": 0.5131134986877441, "learning_rate": 2.4181488694534903e-06, "loss": 0.0823, "step": 6319 }, { "epoch": 2.047958522359041, "grad_norm": 0.4429638087749481, "learning_rate": 2.4166510589859394e-06, "loss": 0.0712, "step": 6320 }, { "epoch": 2.0482825664290343, "grad_norm": 0.49024632573127747, "learning_rate": 2.4151535646952877e-06, "loss": 0.0748, "step": 6321 }, { "epoch": 2.0486066104990277, "grad_norm": 0.4927096664905548, "learning_rate": 2.413656386764817e-06, "loss": 0.0782, "step": 6322 }, { "epoch": 2.0489306545690216, "grad_norm": 0.5299798250198364, "learning_rate": 2.4121595253777657e-06, "loss": 0.0866, "step": 6323 }, { "epoch": 2.049254698639015, "grad_norm": 0.436034232378006, "learning_rate": 2.410662980717337e-06, "loss": 0.0638, "step": 6324 }, { "epoch": 2.0495787427090084, "grad_norm": 0.461628794670105, "learning_rate": 2.4091667529666923e-06, "loss": 0.0738, "step": 6325 }, { "epoch": 2.049902786779002, "grad_norm": 0.4576832354068756, "learning_rate": 2.4076708423089563e-06, "loss": 0.0756, "step": 6326 }, { "epoch": 2.0502268308489953, "grad_norm": 0.4655950963497162, "learning_rate": 2.4061752489272156e-06, "loss": 0.0734, "step": 6327 }, { "epoch": 2.050550874918989, "grad_norm": 0.446172833442688, "learning_rate": 2.40467997300451e-06, "loss": 0.0706, "step": 6328 }, { "epoch": 2.0508749189889826, "grad_norm": 0.43468722701072693, "learning_rate": 2.403185014723855e-06, "loss": 0.068, "step": 6329 }, { "epoch": 2.051198963058976, "grad_norm": 0.4308585524559021, "learning_rate": 2.401690374268211e-06, "loss": 0.068, "step": 6330 }, { "epoch": 2.0515230071289694, "grad_norm": 0.42982977628707886, "learning_rate": 2.400196051820516e-06, "loss": 0.0718, "step": 6331 }, { "epoch": 2.0518470511989633, "grad_norm": 0.47812700271606445, "learning_rate": 2.3987020475636538e-06, "loss": 0.0784, "step": 6332 }, { "epoch": 2.0521710952689567, "grad_norm": 0.447527676820755, "learning_rate": 2.3972083616804786e-06, "loss": 0.0688, "step": 6333 }, { "epoch": 2.05249513933895, "grad_norm": 0.5247117877006531, "learning_rate": 2.395714994353805e-06, "loss": 0.0834, "step": 6334 }, { "epoch": 2.0528191834089435, "grad_norm": 0.46339911222457886, "learning_rate": 2.3942219457664007e-06, "loss": 0.0761, "step": 6335 }, { "epoch": 2.053143227478937, "grad_norm": 0.41818615794181824, "learning_rate": 2.392729216101008e-06, "loss": 0.0674, "step": 6336 }, { "epoch": 2.053467271548931, "grad_norm": 0.479206919670105, "learning_rate": 2.391236805540315e-06, "loss": 0.0777, "step": 6337 }, { "epoch": 2.0537913156189243, "grad_norm": 0.4307020306587219, "learning_rate": 2.3897447142669864e-06, "loss": 0.0706, "step": 6338 }, { "epoch": 2.0541153596889177, "grad_norm": 0.43551212549209595, "learning_rate": 2.3882529424636335e-06, "loss": 0.0713, "step": 6339 }, { "epoch": 2.054439403758911, "grad_norm": 0.4901053011417389, "learning_rate": 2.3867614903128372e-06, "loss": 0.0739, "step": 6340 }, { "epoch": 2.0547634478289045, "grad_norm": 0.5215500593185425, "learning_rate": 2.385270357997136e-06, "loss": 0.0743, "step": 6341 }, { "epoch": 2.0550874918988984, "grad_norm": 0.4421338737010956, "learning_rate": 2.38377954569903e-06, "loss": 0.0676, "step": 6342 }, { "epoch": 2.055411535968892, "grad_norm": 0.4533500373363495, "learning_rate": 2.3822890536009835e-06, "loss": 0.0712, "step": 6343 }, { "epoch": 2.0557355800388852, "grad_norm": 0.4607000946998596, "learning_rate": 2.3807988818854126e-06, "loss": 0.072, "step": 6344 }, { "epoch": 2.0560596241088787, "grad_norm": 0.4839719533920288, "learning_rate": 2.3793090307347034e-06, "loss": 0.0761, "step": 6345 }, { "epoch": 2.0563836681788725, "grad_norm": 0.44080618023872375, "learning_rate": 2.3778195003311983e-06, "loss": 0.0706, "step": 6346 }, { "epoch": 2.056707712248866, "grad_norm": 0.4685359001159668, "learning_rate": 2.3763302908572016e-06, "loss": 0.0746, "step": 6347 }, { "epoch": 2.0570317563188594, "grad_norm": 0.45527467131614685, "learning_rate": 2.3748414024949783e-06, "loss": 0.0708, "step": 6348 }, { "epoch": 2.057355800388853, "grad_norm": 0.46099653840065, "learning_rate": 2.373352835426754e-06, "loss": 0.0712, "step": 6349 }, { "epoch": 2.057679844458846, "grad_norm": 0.45064616203308105, "learning_rate": 2.3718645898347144e-06, "loss": 0.0703, "step": 6350 }, { "epoch": 2.05800388852884, "grad_norm": 0.47433632612228394, "learning_rate": 2.3703766659010086e-06, "loss": 0.073, "step": 6351 }, { "epoch": 2.0583279325988335, "grad_norm": 0.5127187371253967, "learning_rate": 2.368889063807739e-06, "loss": 0.0785, "step": 6352 }, { "epoch": 2.058651976668827, "grad_norm": 0.4757230281829834, "learning_rate": 2.36740178373698e-06, "loss": 0.0739, "step": 6353 }, { "epoch": 2.0589760207388204, "grad_norm": 0.47789400815963745, "learning_rate": 2.3659148258707553e-06, "loss": 0.0795, "step": 6354 }, { "epoch": 2.059300064808814, "grad_norm": 0.489640474319458, "learning_rate": 2.364428190391056e-06, "loss": 0.0692, "step": 6355 }, { "epoch": 2.0596241088788076, "grad_norm": 0.4995042383670807, "learning_rate": 2.362941877479834e-06, "loss": 0.0708, "step": 6356 }, { "epoch": 2.059948152948801, "grad_norm": 0.47687268257141113, "learning_rate": 2.3614558873189934e-06, "loss": 0.0795, "step": 6357 }, { "epoch": 2.0602721970187945, "grad_norm": 0.5026583075523376, "learning_rate": 2.3599702200904134e-06, "loss": 0.0769, "step": 6358 }, { "epoch": 2.060596241088788, "grad_norm": 0.5032001733779907, "learning_rate": 2.3584848759759165e-06, "loss": 0.0763, "step": 6359 }, { "epoch": 2.060920285158782, "grad_norm": 0.48778533935546875, "learning_rate": 2.356999855157303e-06, "loss": 0.0764, "step": 6360 }, { "epoch": 2.061244329228775, "grad_norm": 0.47545087337493896, "learning_rate": 2.355515157816318e-06, "loss": 0.0723, "step": 6361 }, { "epoch": 2.0615683732987686, "grad_norm": 0.47968313097953796, "learning_rate": 2.35403078413468e-06, "loss": 0.0769, "step": 6362 }, { "epoch": 2.061892417368762, "grad_norm": 0.5082609057426453, "learning_rate": 2.352546734294057e-06, "loss": 0.08, "step": 6363 }, { "epoch": 2.0622164614387555, "grad_norm": 0.4632527232170105, "learning_rate": 2.351063008476085e-06, "loss": 0.0682, "step": 6364 }, { "epoch": 2.0625405055087493, "grad_norm": 0.4676029086112976, "learning_rate": 2.3495796068623565e-06, "loss": 0.0748, "step": 6365 }, { "epoch": 2.0628645495787428, "grad_norm": 0.49084705114364624, "learning_rate": 2.3480965296344264e-06, "loss": 0.0789, "step": 6366 }, { "epoch": 2.063188593648736, "grad_norm": 0.47968021035194397, "learning_rate": 2.3466137769738106e-06, "loss": 0.0794, "step": 6367 }, { "epoch": 2.0635126377187296, "grad_norm": 0.4509599804878235, "learning_rate": 2.345131349061978e-06, "loss": 0.064, "step": 6368 }, { "epoch": 2.0638366817887235, "grad_norm": 0.4971236288547516, "learning_rate": 2.343649246080371e-06, "loss": 0.0829, "step": 6369 }, { "epoch": 2.064160725858717, "grad_norm": 0.49975186586380005, "learning_rate": 2.3421674682103784e-06, "loss": 0.0799, "step": 6370 }, { "epoch": 2.0644847699287103, "grad_norm": 0.4948911666870117, "learning_rate": 2.3406860156333584e-06, "loss": 0.0817, "step": 6371 }, { "epoch": 2.0648088139987038, "grad_norm": 0.4723863899707794, "learning_rate": 2.339204888530626e-06, "loss": 0.0748, "step": 6372 }, { "epoch": 2.065132858068697, "grad_norm": 0.4942839741706848, "learning_rate": 2.337724087083456e-06, "loss": 0.0806, "step": 6373 }, { "epoch": 2.065456902138691, "grad_norm": 0.4573349952697754, "learning_rate": 2.3362436114730858e-06, "loss": 0.0724, "step": 6374 }, { "epoch": 2.0657809462086845, "grad_norm": 0.49293631315231323, "learning_rate": 2.334763461880712e-06, "loss": 0.0791, "step": 6375 }, { "epoch": 2.066104990278678, "grad_norm": 0.5131592750549316, "learning_rate": 2.333283638487487e-06, "loss": 0.0731, "step": 6376 }, { "epoch": 2.0664290343486713, "grad_norm": 0.4619980752468109, "learning_rate": 2.3318041414745286e-06, "loss": 0.072, "step": 6377 }, { "epoch": 2.0667530784186647, "grad_norm": 0.4428941309452057, "learning_rate": 2.330324971022913e-06, "loss": 0.0698, "step": 6378 }, { "epoch": 2.0670771224886586, "grad_norm": 0.4406226873397827, "learning_rate": 2.328846127313677e-06, "loss": 0.0728, "step": 6379 }, { "epoch": 2.067401166558652, "grad_norm": 0.44868943095207214, "learning_rate": 2.3273676105278163e-06, "loss": 0.071, "step": 6380 }, { "epoch": 2.0677252106286454, "grad_norm": 0.4691688120365143, "learning_rate": 2.325889420846287e-06, "loss": 0.0757, "step": 6381 }, { "epoch": 2.068049254698639, "grad_norm": 0.47268572449684143, "learning_rate": 2.3244115584500065e-06, "loss": 0.0759, "step": 6382 }, { "epoch": 2.0683732987686327, "grad_norm": 0.4854334890842438, "learning_rate": 2.3229340235198462e-06, "loss": 0.0765, "step": 6383 }, { "epoch": 2.068697342838626, "grad_norm": 0.44808229804039, "learning_rate": 2.321456816236649e-06, "loss": 0.0697, "step": 6384 }, { "epoch": 2.0690213869086196, "grad_norm": 0.48902401328086853, "learning_rate": 2.3199799367812052e-06, "loss": 0.0746, "step": 6385 }, { "epoch": 2.069345430978613, "grad_norm": 0.4742581844329834, "learning_rate": 2.3185033853342733e-06, "loss": 0.0726, "step": 6386 }, { "epoch": 2.0696694750486064, "grad_norm": 0.46202272176742554, "learning_rate": 2.317027162076567e-06, "loss": 0.0732, "step": 6387 }, { "epoch": 2.0699935191186003, "grad_norm": 0.49215349555015564, "learning_rate": 2.3155512671887637e-06, "loss": 0.0764, "step": 6388 }, { "epoch": 2.0703175631885937, "grad_norm": 0.47210338711738586, "learning_rate": 2.3140757008514997e-06, "loss": 0.0663, "step": 6389 }, { "epoch": 2.070641607258587, "grad_norm": 0.47308242321014404, "learning_rate": 2.312600463245364e-06, "loss": 0.071, "step": 6390 }, { "epoch": 2.0709656513285806, "grad_norm": 0.47955775260925293, "learning_rate": 2.311125554550919e-06, "loss": 0.0782, "step": 6391 }, { "epoch": 2.071289695398574, "grad_norm": 0.4697352945804596, "learning_rate": 2.309650974948673e-06, "loss": 0.0719, "step": 6392 }, { "epoch": 2.071613739468568, "grad_norm": 0.5027076601982117, "learning_rate": 2.308176724619106e-06, "loss": 0.071, "step": 6393 }, { "epoch": 2.0719377835385613, "grad_norm": 0.47702887654304504, "learning_rate": 2.3067028037426476e-06, "loss": 0.0752, "step": 6394 }, { "epoch": 2.0722618276085547, "grad_norm": 0.4764552116394043, "learning_rate": 2.3052292124996927e-06, "loss": 0.0755, "step": 6395 }, { "epoch": 2.072585871678548, "grad_norm": 0.47523608803749084, "learning_rate": 2.3037559510705954e-06, "loss": 0.0767, "step": 6396 }, { "epoch": 2.072909915748542, "grad_norm": 0.4495546519756317, "learning_rate": 2.302283019635667e-06, "loss": 0.0715, "step": 6397 }, { "epoch": 2.0732339598185354, "grad_norm": 0.4740990996360779, "learning_rate": 2.3008104183751835e-06, "loss": 0.0721, "step": 6398 }, { "epoch": 2.073558003888529, "grad_norm": 0.4597020447254181, "learning_rate": 2.299338147469373e-06, "loss": 0.071, "step": 6399 }, { "epoch": 2.0738820479585223, "grad_norm": 0.4866323471069336, "learning_rate": 2.297866207098428e-06, "loss": 0.0784, "step": 6400 }, { "epoch": 2.0742060920285157, "grad_norm": 0.4788459837436676, "learning_rate": 2.296394597442501e-06, "loss": 0.0733, "step": 6401 }, { "epoch": 2.0745301360985096, "grad_norm": 0.47980794310569763, "learning_rate": 2.2949233186817026e-06, "loss": 0.0755, "step": 6402 }, { "epoch": 2.074854180168503, "grad_norm": 0.4675251245498657, "learning_rate": 2.2934523709961027e-06, "loss": 0.0759, "step": 6403 }, { "epoch": 2.0751782242384964, "grad_norm": 0.47605273127555847, "learning_rate": 2.2919817545657315e-06, "loss": 0.0747, "step": 6404 }, { "epoch": 2.07550226830849, "grad_norm": 0.49417728185653687, "learning_rate": 2.290511469570577e-06, "loss": 0.0737, "step": 6405 }, { "epoch": 2.0758263123784833, "grad_norm": 0.5057334899902344, "learning_rate": 2.289041516190591e-06, "loss": 0.0759, "step": 6406 }, { "epoch": 2.076150356448477, "grad_norm": 0.49573424458503723, "learning_rate": 2.287571894605678e-06, "loss": 0.0835, "step": 6407 }, { "epoch": 2.0764744005184705, "grad_norm": 0.5077083706855774, "learning_rate": 2.286102604995706e-06, "loss": 0.0842, "step": 6408 }, { "epoch": 2.076798444588464, "grad_norm": 0.47334715723991394, "learning_rate": 2.284633647540503e-06, "loss": 0.068, "step": 6409 }, { "epoch": 2.0771224886584574, "grad_norm": 0.46106183528900146, "learning_rate": 2.2831650224198547e-06, "loss": 0.063, "step": 6410 }, { "epoch": 2.0774465327284513, "grad_norm": 0.44112372398376465, "learning_rate": 2.2816967298135082e-06, "loss": 0.0688, "step": 6411 }, { "epoch": 2.0777705767984447, "grad_norm": 0.43434029817581177, "learning_rate": 2.2802287699011633e-06, "loss": 0.0685, "step": 6412 }, { "epoch": 2.078094620868438, "grad_norm": 0.49972984194755554, "learning_rate": 2.2787611428624913e-06, "loss": 0.0787, "step": 6413 }, { "epoch": 2.0784186649384315, "grad_norm": 0.49321141839027405, "learning_rate": 2.277293848877108e-06, "loss": 0.0788, "step": 6414 }, { "epoch": 2.078742709008425, "grad_norm": 0.48927661776542664, "learning_rate": 2.2758268881246036e-06, "loss": 0.0791, "step": 6415 }, { "epoch": 2.079066753078419, "grad_norm": 0.4573841989040375, "learning_rate": 2.274360260784514e-06, "loss": 0.0677, "step": 6416 }, { "epoch": 2.0793907971484122, "grad_norm": 0.5282468795776367, "learning_rate": 2.2728939670363425e-06, "loss": 0.0816, "step": 6417 }, { "epoch": 2.0797148412184057, "grad_norm": 0.43157610297203064, "learning_rate": 2.271428007059549e-06, "loss": 0.0675, "step": 6418 }, { "epoch": 2.080038885288399, "grad_norm": 0.48366230726242065, "learning_rate": 2.2699623810335523e-06, "loss": 0.0728, "step": 6419 }, { "epoch": 2.080362929358393, "grad_norm": 0.46891725063323975, "learning_rate": 2.268497089137734e-06, "loss": 0.069, "step": 6420 }, { "epoch": 2.0806869734283864, "grad_norm": 0.4607992470264435, "learning_rate": 2.2670321315514242e-06, "loss": 0.0746, "step": 6421 }, { "epoch": 2.08101101749838, "grad_norm": 0.4852626621723175, "learning_rate": 2.265567508453929e-06, "loss": 0.077, "step": 6422 }, { "epoch": 2.0813350615683732, "grad_norm": 0.4649800956249237, "learning_rate": 2.2641032200244973e-06, "loss": 0.0758, "step": 6423 }, { "epoch": 2.0816591056383666, "grad_norm": 0.48343998193740845, "learning_rate": 2.2626392664423457e-06, "loss": 0.0768, "step": 6424 }, { "epoch": 2.0819831497083605, "grad_norm": 0.5083569288253784, "learning_rate": 2.261175647886648e-06, "loss": 0.0757, "step": 6425 }, { "epoch": 2.082307193778354, "grad_norm": 0.4693621098995209, "learning_rate": 2.2597123645365375e-06, "loss": 0.0749, "step": 6426 }, { "epoch": 2.0826312378483474, "grad_norm": 0.5031575560569763, "learning_rate": 2.2582494165711055e-06, "loss": 0.0762, "step": 6427 }, { "epoch": 2.082955281918341, "grad_norm": 0.49334973096847534, "learning_rate": 2.256786804169403e-06, "loss": 0.0696, "step": 6428 }, { "epoch": 2.083279325988334, "grad_norm": 0.4514414370059967, "learning_rate": 2.2553245275104406e-06, "loss": 0.07, "step": 6429 }, { "epoch": 2.083603370058328, "grad_norm": 0.4750472903251648, "learning_rate": 2.253862586773184e-06, "loss": 0.0748, "step": 6430 }, { "epoch": 2.0839274141283215, "grad_norm": 0.4798753261566162, "learning_rate": 2.2524009821365622e-06, "loss": 0.0786, "step": 6431 }, { "epoch": 2.084251458198315, "grad_norm": 0.4874429702758789, "learning_rate": 2.2509397137794616e-06, "loss": 0.0813, "step": 6432 }, { "epoch": 2.0845755022683083, "grad_norm": 0.5213260054588318, "learning_rate": 2.249478781880726e-06, "loss": 0.0819, "step": 6433 }, { "epoch": 2.084899546338302, "grad_norm": 0.45666658878326416, "learning_rate": 2.248018186619161e-06, "loss": 0.0707, "step": 6434 }, { "epoch": 2.0852235904082956, "grad_norm": 0.4775700569152832, "learning_rate": 2.2465579281735288e-06, "loss": 0.0679, "step": 6435 }, { "epoch": 2.085547634478289, "grad_norm": 0.4781835675239563, "learning_rate": 2.245098006722551e-06, "loss": 0.073, "step": 6436 }, { "epoch": 2.0858716785482825, "grad_norm": 0.45587706565856934, "learning_rate": 2.2436384224449094e-06, "loss": 0.0739, "step": 6437 }, { "epoch": 2.086195722618276, "grad_norm": 0.44620490074157715, "learning_rate": 2.242179175519239e-06, "loss": 0.0697, "step": 6438 }, { "epoch": 2.0865197666882698, "grad_norm": 0.5067679286003113, "learning_rate": 2.24072026612414e-06, "loss": 0.0744, "step": 6439 }, { "epoch": 2.086843810758263, "grad_norm": 0.47220391035079956, "learning_rate": 2.239261694438169e-06, "loss": 0.0733, "step": 6440 }, { "epoch": 2.0871678548282566, "grad_norm": 0.48890239000320435, "learning_rate": 2.2378034606398396e-06, "loss": 0.0768, "step": 6441 }, { "epoch": 2.08749189889825, "grad_norm": 0.48164820671081543, "learning_rate": 2.2363455649076295e-06, "loss": 0.0744, "step": 6442 }, { "epoch": 2.087815942968244, "grad_norm": 0.5287774801254272, "learning_rate": 2.234888007419963e-06, "loss": 0.078, "step": 6443 }, { "epoch": 2.0881399870382373, "grad_norm": 0.5205512642860413, "learning_rate": 2.233430788355241e-06, "loss": 0.0795, "step": 6444 }, { "epoch": 2.0884640311082308, "grad_norm": 0.479859858751297, "learning_rate": 2.2319739078918036e-06, "loss": 0.0703, "step": 6445 }, { "epoch": 2.088788075178224, "grad_norm": 0.5163217782974243, "learning_rate": 2.230517366207967e-06, "loss": 0.08, "step": 6446 }, { "epoch": 2.0891121192482176, "grad_norm": 0.4633115530014038, "learning_rate": 2.2290611634819925e-06, "loss": 0.0738, "step": 6447 }, { "epoch": 2.0894361633182115, "grad_norm": 0.4752098619937897, "learning_rate": 2.2276052998921064e-06, "loss": 0.0697, "step": 6448 }, { "epoch": 2.089760207388205, "grad_norm": 0.4927937686443329, "learning_rate": 2.2261497756164934e-06, "loss": 0.0757, "step": 6449 }, { "epoch": 2.0900842514581983, "grad_norm": 0.44520872831344604, "learning_rate": 2.2246945908332946e-06, "loss": 0.0657, "step": 6450 }, { "epoch": 2.0904082955281917, "grad_norm": 0.4533376693725586, "learning_rate": 2.2232397457206122e-06, "loss": 0.071, "step": 6451 }, { "epoch": 2.090732339598185, "grad_norm": 0.4914572238922119, "learning_rate": 2.221785240456501e-06, "loss": 0.0733, "step": 6452 }, { "epoch": 2.091056383668179, "grad_norm": 0.46928560733795166, "learning_rate": 2.220331075218984e-06, "loss": 0.0747, "step": 6453 }, { "epoch": 2.0913804277381725, "grad_norm": 0.46369004249572754, "learning_rate": 2.218877250186033e-06, "loss": 0.0691, "step": 6454 }, { "epoch": 2.091704471808166, "grad_norm": 0.48942112922668457, "learning_rate": 2.217423765535583e-06, "loss": 0.0759, "step": 6455 }, { "epoch": 2.0920285158781593, "grad_norm": 0.44329163432121277, "learning_rate": 2.2159706214455267e-06, "loss": 0.0666, "step": 6456 }, { "epoch": 2.0923525599481527, "grad_norm": 0.47483310103416443, "learning_rate": 2.2145178180937142e-06, "loss": 0.0794, "step": 6457 }, { "epoch": 2.0926766040181466, "grad_norm": 0.49389538168907166, "learning_rate": 2.2130653556579564e-06, "loss": 0.0774, "step": 6458 }, { "epoch": 2.09300064808814, "grad_norm": 0.5197028517723083, "learning_rate": 2.2116132343160183e-06, "loss": 0.078, "step": 6459 }, { "epoch": 2.0933246921581334, "grad_norm": 0.4676690399646759, "learning_rate": 2.210161454245629e-06, "loss": 0.0702, "step": 6460 }, { "epoch": 2.093648736228127, "grad_norm": 0.464379221200943, "learning_rate": 2.2087100156244684e-06, "loss": 0.0703, "step": 6461 }, { "epoch": 2.0939727802981207, "grad_norm": 0.46508681774139404, "learning_rate": 2.2072589186301797e-06, "loss": 0.0721, "step": 6462 }, { "epoch": 2.094296824368114, "grad_norm": 0.46484997868537903, "learning_rate": 2.2058081634403637e-06, "loss": 0.0727, "step": 6463 }, { "epoch": 2.0946208684381076, "grad_norm": 0.47296300530433655, "learning_rate": 2.2043577502325786e-06, "loss": 0.0705, "step": 6464 }, { "epoch": 2.094944912508101, "grad_norm": 0.4883367121219635, "learning_rate": 2.2029076791843408e-06, "loss": 0.0761, "step": 6465 }, { "epoch": 2.0952689565780944, "grad_norm": 0.47738826274871826, "learning_rate": 2.201457950473127e-06, "loss": 0.0717, "step": 6466 }, { "epoch": 2.0955930006480883, "grad_norm": 0.44886377453804016, "learning_rate": 2.2000085642763647e-06, "loss": 0.0655, "step": 6467 }, { "epoch": 2.0959170447180817, "grad_norm": 0.4837161898612976, "learning_rate": 2.1985595207714515e-06, "loss": 0.0685, "step": 6468 }, { "epoch": 2.096241088788075, "grad_norm": 0.4646311104297638, "learning_rate": 2.1971108201357294e-06, "loss": 0.0728, "step": 6469 }, { "epoch": 2.0965651328580686, "grad_norm": 0.49209609627723694, "learning_rate": 2.1956624625465124e-06, "loss": 0.0762, "step": 6470 }, { "epoch": 2.0968891769280624, "grad_norm": 0.46310967206954956, "learning_rate": 2.19421444818106e-06, "loss": 0.0719, "step": 6471 }, { "epoch": 2.097213220998056, "grad_norm": 0.494526743888855, "learning_rate": 2.1927667772165974e-06, "loss": 0.0779, "step": 6472 }, { "epoch": 2.0975372650680493, "grad_norm": 0.4947710633277893, "learning_rate": 2.1913194498303064e-06, "loss": 0.0739, "step": 6473 }, { "epoch": 2.0978613091380427, "grad_norm": 0.45872384309768677, "learning_rate": 2.189872466199321e-06, "loss": 0.0715, "step": 6474 }, { "epoch": 2.098185353208036, "grad_norm": 0.45841169357299805, "learning_rate": 2.188425826500746e-06, "loss": 0.0682, "step": 6475 }, { "epoch": 2.09850939727803, "grad_norm": 0.485802561044693, "learning_rate": 2.186979530911627e-06, "loss": 0.072, "step": 6476 }, { "epoch": 2.0988334413480234, "grad_norm": 0.4475204348564148, "learning_rate": 2.1855335796089854e-06, "loss": 0.0691, "step": 6477 }, { "epoch": 2.099157485418017, "grad_norm": 0.45817169547080994, "learning_rate": 2.184087972769786e-06, "loss": 0.0727, "step": 6478 }, { "epoch": 2.0994815294880103, "grad_norm": 0.4263944923877716, "learning_rate": 2.182642710570958e-06, "loss": 0.0633, "step": 6479 }, { "epoch": 2.0998055735580037, "grad_norm": 0.5023675560951233, "learning_rate": 2.1811977931893884e-06, "loss": 0.0774, "step": 6480 }, { "epoch": 2.1001296176279975, "grad_norm": 0.45618337392807007, "learning_rate": 2.1797532208019212e-06, "loss": 0.0711, "step": 6481 }, { "epoch": 2.100453661697991, "grad_norm": 0.5437365770339966, "learning_rate": 2.178308993585359e-06, "loss": 0.0856, "step": 6482 }, { "epoch": 2.1007777057679844, "grad_norm": 0.4399570822715759, "learning_rate": 2.1768651117164565e-06, "loss": 0.0685, "step": 6483 }, { "epoch": 2.101101749837978, "grad_norm": 0.46941646933555603, "learning_rate": 2.1754215753719376e-06, "loss": 0.0711, "step": 6484 }, { "epoch": 2.1014257939079717, "grad_norm": 0.5223105549812317, "learning_rate": 2.173978384728472e-06, "loss": 0.0776, "step": 6485 }, { "epoch": 2.101749837977965, "grad_norm": 0.5064131617546082, "learning_rate": 2.1725355399626943e-06, "loss": 0.0734, "step": 6486 }, { "epoch": 2.1020738820479585, "grad_norm": 0.46504563093185425, "learning_rate": 2.1710930412511943e-06, "loss": 0.0689, "step": 6487 }, { "epoch": 2.102397926117952, "grad_norm": 0.46737441420555115, "learning_rate": 2.1696508887705203e-06, "loss": 0.0722, "step": 6488 }, { "epoch": 2.1027219701879454, "grad_norm": 0.4769153594970703, "learning_rate": 2.168209082697178e-06, "loss": 0.0759, "step": 6489 }, { "epoch": 2.1030460142579392, "grad_norm": 0.4670775830745697, "learning_rate": 2.166767623207631e-06, "loss": 0.0686, "step": 6490 }, { "epoch": 2.1033700583279327, "grad_norm": 0.49240654706954956, "learning_rate": 2.1653265104782967e-06, "loss": 0.0737, "step": 6491 }, { "epoch": 2.103694102397926, "grad_norm": 0.4757388234138489, "learning_rate": 2.163885744685558e-06, "loss": 0.074, "step": 6492 }, { "epoch": 2.1040181464679195, "grad_norm": 0.48718348145484924, "learning_rate": 2.1624453260057477e-06, "loss": 0.0753, "step": 6493 }, { "epoch": 2.1043421905379134, "grad_norm": 0.4722293019294739, "learning_rate": 2.1610052546151593e-06, "loss": 0.0741, "step": 6494 }, { "epoch": 2.104666234607907, "grad_norm": 0.5052745342254639, "learning_rate": 2.1595655306900444e-06, "loss": 0.0769, "step": 6495 }, { "epoch": 2.1049902786779002, "grad_norm": 0.4688330590724945, "learning_rate": 2.1581261544066113e-06, "loss": 0.0775, "step": 6496 }, { "epoch": 2.1053143227478937, "grad_norm": 0.49035385251045227, "learning_rate": 2.1566871259410267e-06, "loss": 0.08, "step": 6497 }, { "epoch": 2.105638366817887, "grad_norm": 0.4556289315223694, "learning_rate": 2.1552484454694087e-06, "loss": 0.0667, "step": 6498 }, { "epoch": 2.105962410887881, "grad_norm": 0.4465049207210541, "learning_rate": 2.1538101131678457e-06, "loss": 0.0718, "step": 6499 }, { "epoch": 2.1062864549578744, "grad_norm": 0.48459509015083313, "learning_rate": 2.1523721292123676e-06, "loss": 0.0742, "step": 6500 }, { "epoch": 2.106610499027868, "grad_norm": 0.45342642068862915, "learning_rate": 2.1509344937789778e-06, "loss": 0.0697, "step": 6501 }, { "epoch": 2.106934543097861, "grad_norm": 0.4800603687763214, "learning_rate": 2.149497207043623e-06, "loss": 0.0757, "step": 6502 }, { "epoch": 2.1072585871678546, "grad_norm": 0.4755440354347229, "learning_rate": 2.148060269182215e-06, "loss": 0.0745, "step": 6503 }, { "epoch": 2.1075826312378485, "grad_norm": 0.4882161617279053, "learning_rate": 2.1466236803706225e-06, "loss": 0.0767, "step": 6504 }, { "epoch": 2.107906675307842, "grad_norm": 0.508690357208252, "learning_rate": 2.1451874407846655e-06, "loss": 0.0794, "step": 6505 }, { "epoch": 2.1082307193778353, "grad_norm": 0.4412241578102112, "learning_rate": 2.143751550600133e-06, "loss": 0.0666, "step": 6506 }, { "epoch": 2.1085547634478288, "grad_norm": 0.5109474062919617, "learning_rate": 2.142316009992756e-06, "loss": 0.0719, "step": 6507 }, { "epoch": 2.108878807517822, "grad_norm": 0.4810276925563812, "learning_rate": 2.140880819138238e-06, "loss": 0.0726, "step": 6508 }, { "epoch": 2.109202851587816, "grad_norm": 0.4703182578086853, "learning_rate": 2.1394459782122283e-06, "loss": 0.0721, "step": 6509 }, { "epoch": 2.1095268956578095, "grad_norm": 0.47323593497276306, "learning_rate": 2.1380114873903377e-06, "loss": 0.0746, "step": 6510 }, { "epoch": 2.109850939727803, "grad_norm": 0.48979589343070984, "learning_rate": 2.136577346848136e-06, "loss": 0.0657, "step": 6511 }, { "epoch": 2.1101749837977963, "grad_norm": 0.43888792395591736, "learning_rate": 2.135143556761146e-06, "loss": 0.0657, "step": 6512 }, { "epoch": 2.11049902786779, "grad_norm": 0.47470882534980774, "learning_rate": 2.133710117304851e-06, "loss": 0.0721, "step": 6513 }, { "epoch": 2.1108230719377836, "grad_norm": 0.5063864588737488, "learning_rate": 2.13227702865469e-06, "loss": 0.0658, "step": 6514 }, { "epoch": 2.111147116007777, "grad_norm": 0.4682093858718872, "learning_rate": 2.130844290986061e-06, "loss": 0.0707, "step": 6515 }, { "epoch": 2.1114711600777705, "grad_norm": 0.4421197474002838, "learning_rate": 2.129411904474313e-06, "loss": 0.0688, "step": 6516 }, { "epoch": 2.111795204147764, "grad_norm": 0.5191793441772461, "learning_rate": 2.1279798692947585e-06, "loss": 0.0859, "step": 6517 }, { "epoch": 2.1121192482177578, "grad_norm": 0.47814905643463135, "learning_rate": 2.1265481856226646e-06, "loss": 0.0669, "step": 6518 }, { "epoch": 2.112443292287751, "grad_norm": 0.4759092926979065, "learning_rate": 2.1251168536332556e-06, "loss": 0.0739, "step": 6519 }, { "epoch": 2.1127673363577446, "grad_norm": 0.4706220328807831, "learning_rate": 2.123685873501713e-06, "loss": 0.0708, "step": 6520 }, { "epoch": 2.113091380427738, "grad_norm": 0.48442214727401733, "learning_rate": 2.122255245403176e-06, "loss": 0.073, "step": 6521 }, { "epoch": 2.113415424497732, "grad_norm": 0.4349236488342285, "learning_rate": 2.120824969512735e-06, "loss": 0.0649, "step": 6522 }, { "epoch": 2.1137394685677253, "grad_norm": 0.5289400815963745, "learning_rate": 2.1193950460054486e-06, "loss": 0.0795, "step": 6523 }, { "epoch": 2.1140635126377187, "grad_norm": 0.45300814509391785, "learning_rate": 2.11796547505632e-06, "loss": 0.0729, "step": 6524 }, { "epoch": 2.114387556707712, "grad_norm": 0.4735052287578583, "learning_rate": 2.1165362568403175e-06, "loss": 0.0719, "step": 6525 }, { "epoch": 2.1147116007777056, "grad_norm": 0.4929650127887726, "learning_rate": 2.115107391532363e-06, "loss": 0.0702, "step": 6526 }, { "epoch": 2.1150356448476995, "grad_norm": 0.44897279143333435, "learning_rate": 2.113678879307336e-06, "loss": 0.0648, "step": 6527 }, { "epoch": 2.115359688917693, "grad_norm": 0.5100932121276855, "learning_rate": 2.112250720340074e-06, "loss": 0.0781, "step": 6528 }, { "epoch": 2.1156837329876863, "grad_norm": 0.47918304800987244, "learning_rate": 2.1108229148053645e-06, "loss": 0.0726, "step": 6529 }, { "epoch": 2.1160077770576797, "grad_norm": 0.5191142559051514, "learning_rate": 2.1093954628779644e-06, "loss": 0.0802, "step": 6530 }, { "epoch": 2.116331821127673, "grad_norm": 0.4737345278263092, "learning_rate": 2.1079683647325734e-06, "loss": 0.0744, "step": 6531 }, { "epoch": 2.116655865197667, "grad_norm": 0.48412197828292847, "learning_rate": 2.1065416205438607e-06, "loss": 0.0755, "step": 6532 }, { "epoch": 2.1169799092676604, "grad_norm": 0.499695748090744, "learning_rate": 2.105115230486441e-06, "loss": 0.0771, "step": 6533 }, { "epoch": 2.117303953337654, "grad_norm": 0.5066558718681335, "learning_rate": 2.103689194734892e-06, "loss": 0.0766, "step": 6534 }, { "epoch": 2.1176279974076473, "grad_norm": 0.4975617527961731, "learning_rate": 2.1022635134637474e-06, "loss": 0.0736, "step": 6535 }, { "epoch": 2.117952041477641, "grad_norm": 0.4812944233417511, "learning_rate": 2.100838186847497e-06, "loss": 0.0734, "step": 6536 }, { "epoch": 2.1182760855476346, "grad_norm": 0.46450215578079224, "learning_rate": 2.0994132150605878e-06, "loss": 0.0708, "step": 6537 }, { "epoch": 2.118600129617628, "grad_norm": 0.4873053729534149, "learning_rate": 2.0979885982774177e-06, "loss": 0.0753, "step": 6538 }, { "epoch": 2.1189241736876214, "grad_norm": 0.46467098593711853, "learning_rate": 2.0965643366723533e-06, "loss": 0.071, "step": 6539 }, { "epoch": 2.119248217757615, "grad_norm": 0.4787776470184326, "learning_rate": 2.0951404304197044e-06, "loss": 0.0726, "step": 6540 }, { "epoch": 2.1195722618276087, "grad_norm": 0.46391019225120544, "learning_rate": 2.0937168796937457e-06, "loss": 0.0725, "step": 6541 }, { "epoch": 2.119896305897602, "grad_norm": 0.4304513931274414, "learning_rate": 2.0922936846687065e-06, "loss": 0.0667, "step": 6542 }, { "epoch": 2.1202203499675956, "grad_norm": 0.4738362729549408, "learning_rate": 2.090870845518771e-06, "loss": 0.0736, "step": 6543 }, { "epoch": 2.120544394037589, "grad_norm": 0.42504286766052246, "learning_rate": 2.0894483624180816e-06, "loss": 0.0666, "step": 6544 }, { "epoch": 2.120868438107583, "grad_norm": 0.4533616304397583, "learning_rate": 2.0880262355407384e-06, "loss": 0.0711, "step": 6545 }, { "epoch": 2.1211924821775763, "grad_norm": 0.4556693732738495, "learning_rate": 2.086604465060791e-06, "loss": 0.0711, "step": 6546 }, { "epoch": 2.1215165262475697, "grad_norm": 0.46734166145324707, "learning_rate": 2.085183051152254e-06, "loss": 0.0699, "step": 6547 }, { "epoch": 2.121840570317563, "grad_norm": 0.4497268497943878, "learning_rate": 2.083761993989094e-06, "loss": 0.0722, "step": 6548 }, { "epoch": 2.1221646143875565, "grad_norm": 0.4714108407497406, "learning_rate": 2.0823412937452345e-06, "loss": 0.0746, "step": 6549 }, { "epoch": 2.1224886584575504, "grad_norm": 0.488873690366745, "learning_rate": 2.080920950594556e-06, "loss": 0.0767, "step": 6550 }, { "epoch": 2.122812702527544, "grad_norm": 0.46497318148612976, "learning_rate": 2.079500964710894e-06, "loss": 0.0723, "step": 6551 }, { "epoch": 2.1231367465975373, "grad_norm": 0.45600226521492004, "learning_rate": 2.0780813362680424e-06, "loss": 0.0738, "step": 6552 }, { "epoch": 2.1234607906675307, "grad_norm": 0.4736258089542389, "learning_rate": 2.0766620654397455e-06, "loss": 0.0712, "step": 6553 }, { "epoch": 2.123784834737524, "grad_norm": 0.4751882553100586, "learning_rate": 2.0752431523997156e-06, "loss": 0.0718, "step": 6554 }, { "epoch": 2.124108878807518, "grad_norm": 0.49826884269714355, "learning_rate": 2.073824597321609e-06, "loss": 0.0714, "step": 6555 }, { "epoch": 2.1244329228775114, "grad_norm": 0.47052130103111267, "learning_rate": 2.0724064003790428e-06, "loss": 0.073, "step": 6556 }, { "epoch": 2.124756966947505, "grad_norm": 0.4417573809623718, "learning_rate": 2.0709885617455923e-06, "loss": 0.0693, "step": 6557 }, { "epoch": 2.1250810110174982, "grad_norm": 0.48403653502464294, "learning_rate": 2.0695710815947866e-06, "loss": 0.0792, "step": 6558 }, { "epoch": 2.1254050550874917, "grad_norm": 0.4700932502746582, "learning_rate": 2.068153960100114e-06, "loss": 0.0737, "step": 6559 }, { "epoch": 2.1257290991574855, "grad_norm": 0.44414281845092773, "learning_rate": 2.0667371974350105e-06, "loss": 0.0691, "step": 6560 }, { "epoch": 2.126053143227479, "grad_norm": 0.49838581681251526, "learning_rate": 2.0653207937728814e-06, "loss": 0.0757, "step": 6561 }, { "epoch": 2.1263771872974724, "grad_norm": 0.46537691354751587, "learning_rate": 2.0639047492870735e-06, "loss": 0.0708, "step": 6562 }, { "epoch": 2.126701231367466, "grad_norm": 0.49669939279556274, "learning_rate": 2.0624890641509043e-06, "loss": 0.0763, "step": 6563 }, { "epoch": 2.1270252754374597, "grad_norm": 0.4871048331260681, "learning_rate": 2.061073738537635e-06, "loss": 0.076, "step": 6564 }, { "epoch": 2.127349319507453, "grad_norm": 0.48748287558555603, "learning_rate": 2.059658772620489e-06, "loss": 0.0757, "step": 6565 }, { "epoch": 2.1276733635774465, "grad_norm": 0.4756983518600464, "learning_rate": 2.0582441665726438e-06, "loss": 0.0722, "step": 6566 }, { "epoch": 2.12799740764744, "grad_norm": 0.5200271606445312, "learning_rate": 2.0568299205672347e-06, "loss": 0.0762, "step": 6567 }, { "epoch": 2.1283214517174334, "grad_norm": 0.44534698128700256, "learning_rate": 2.0554160347773532e-06, "loss": 0.0708, "step": 6568 }, { "epoch": 2.1286454957874272, "grad_norm": 0.4336491525173187, "learning_rate": 2.0540025093760414e-06, "loss": 0.0685, "step": 6569 }, { "epoch": 2.1289695398574207, "grad_norm": 0.4875744879245758, "learning_rate": 2.0525893445363027e-06, "loss": 0.0758, "step": 6570 }, { "epoch": 2.129293583927414, "grad_norm": 0.48467862606048584, "learning_rate": 2.0511765404310953e-06, "loss": 0.0716, "step": 6571 }, { "epoch": 2.1296176279974075, "grad_norm": 0.44703033566474915, "learning_rate": 2.049764097233332e-06, "loss": 0.0659, "step": 6572 }, { "epoch": 2.1299416720674014, "grad_norm": 0.4997273087501526, "learning_rate": 2.0483520151158837e-06, "loss": 0.077, "step": 6573 }, { "epoch": 2.130265716137395, "grad_norm": 0.4592891335487366, "learning_rate": 2.0469402942515735e-06, "loss": 0.071, "step": 6574 }, { "epoch": 2.130589760207388, "grad_norm": 0.45686131715774536, "learning_rate": 2.0455289348131845e-06, "loss": 0.0689, "step": 6575 }, { "epoch": 2.1309138042773816, "grad_norm": 0.4669840335845947, "learning_rate": 2.0441179369734538e-06, "loss": 0.0695, "step": 6576 }, { "epoch": 2.131237848347375, "grad_norm": 0.4641000032424927, "learning_rate": 2.04270730090507e-06, "loss": 0.0698, "step": 6577 }, { "epoch": 2.131561892417369, "grad_norm": 0.4740959107875824, "learning_rate": 2.0412970267806847e-06, "loss": 0.0711, "step": 6578 }, { "epoch": 2.1318859364873624, "grad_norm": 0.45085427165031433, "learning_rate": 2.0398871147729004e-06, "loss": 0.0684, "step": 6579 }, { "epoch": 2.1322099805573558, "grad_norm": 0.4833509922027588, "learning_rate": 2.038477565054277e-06, "loss": 0.0718, "step": 6580 }, { "epoch": 2.132534024627349, "grad_norm": 0.5095663070678711, "learning_rate": 2.03706837779733e-06, "loss": 0.0796, "step": 6581 }, { "epoch": 2.1328580686973426, "grad_norm": 0.4786771237850189, "learning_rate": 2.0356595531745303e-06, "loss": 0.0734, "step": 6582 }, { "epoch": 2.1331821127673365, "grad_norm": 0.45221537351608276, "learning_rate": 2.0342510913583062e-06, "loss": 0.0714, "step": 6583 }, { "epoch": 2.13350615683733, "grad_norm": 0.496338814496994, "learning_rate": 2.032842992521034e-06, "loss": 0.0784, "step": 6584 }, { "epoch": 2.1338302009073233, "grad_norm": 0.49012452363967896, "learning_rate": 2.0314352568350586e-06, "loss": 0.077, "step": 6585 }, { "epoch": 2.1341542449773168, "grad_norm": 0.4398423433303833, "learning_rate": 2.0300278844726685e-06, "loss": 0.0627, "step": 6586 }, { "epoch": 2.1344782890473106, "grad_norm": 0.4631512463092804, "learning_rate": 2.028620875606113e-06, "loss": 0.0714, "step": 6587 }, { "epoch": 2.134802333117304, "grad_norm": 0.5162291526794434, "learning_rate": 2.027214230407598e-06, "loss": 0.0788, "step": 6588 }, { "epoch": 2.1351263771872975, "grad_norm": 0.46149012446403503, "learning_rate": 2.025807949049282e-06, "loss": 0.0742, "step": 6589 }, { "epoch": 2.135450421257291, "grad_norm": 0.47114327549934387, "learning_rate": 2.0244020317032825e-06, "loss": 0.0714, "step": 6590 }, { "epoch": 2.1357744653272843, "grad_norm": 0.5015454888343811, "learning_rate": 2.022996478541665e-06, "loss": 0.0831, "step": 6591 }, { "epoch": 2.136098509397278, "grad_norm": 0.4543628990650177, "learning_rate": 2.021591289736462e-06, "loss": 0.0714, "step": 6592 }, { "epoch": 2.1364225534672716, "grad_norm": 0.5102995038032532, "learning_rate": 2.020186465459649e-06, "loss": 0.0764, "step": 6593 }, { "epoch": 2.136746597537265, "grad_norm": 0.47079968452453613, "learning_rate": 2.0187820058831685e-06, "loss": 0.0689, "step": 6594 }, { "epoch": 2.1370706416072585, "grad_norm": 0.4647594392299652, "learning_rate": 2.017377911178909e-06, "loss": 0.0714, "step": 6595 }, { "epoch": 2.1373946856772523, "grad_norm": 0.4710672199726105, "learning_rate": 2.0159741815187184e-06, "loss": 0.0731, "step": 6596 }, { "epoch": 2.1377187297472457, "grad_norm": 0.46359890699386597, "learning_rate": 2.014570817074401e-06, "loss": 0.0722, "step": 6597 }, { "epoch": 2.138042773817239, "grad_norm": 0.4689292311668396, "learning_rate": 2.0131678180177144e-06, "loss": 0.0697, "step": 6598 }, { "epoch": 2.1383668178872326, "grad_norm": 0.48033982515335083, "learning_rate": 2.0117651845203733e-06, "loss": 0.0739, "step": 6599 }, { "epoch": 2.138690861957226, "grad_norm": 0.4882848858833313, "learning_rate": 2.010362916754044e-06, "loss": 0.08, "step": 6600 }, { "epoch": 2.13901490602722, "grad_norm": 0.4545619785785675, "learning_rate": 2.0089610148903515e-06, "loss": 0.0664, "step": 6601 }, { "epoch": 2.1393389500972133, "grad_norm": 0.4811970591545105, "learning_rate": 2.007559479100876e-06, "loss": 0.0759, "step": 6602 }, { "epoch": 2.1396629941672067, "grad_norm": 0.4705151915550232, "learning_rate": 2.00615830955715e-06, "loss": 0.0786, "step": 6603 }, { "epoch": 2.1399870382372, "grad_norm": 0.47506824135780334, "learning_rate": 2.004757506430665e-06, "loss": 0.0738, "step": 6604 }, { "epoch": 2.1403110823071936, "grad_norm": 0.47582632303237915, "learning_rate": 2.0033570698928652e-06, "loss": 0.0744, "step": 6605 }, { "epoch": 2.1406351263771874, "grad_norm": 0.4712214171886444, "learning_rate": 2.0019570001151494e-06, "loss": 0.0687, "step": 6606 }, { "epoch": 2.140959170447181, "grad_norm": 0.4881756603717804, "learning_rate": 2.0005572972688757e-06, "loss": 0.0762, "step": 6607 }, { "epoch": 2.1412832145171743, "grad_norm": 0.4259354770183563, "learning_rate": 1.9991579615253507e-06, "loss": 0.0618, "step": 6608 }, { "epoch": 2.1416072585871677, "grad_norm": 0.4770180881023407, "learning_rate": 1.9977589930558406e-06, "loss": 0.0758, "step": 6609 }, { "epoch": 2.141931302657161, "grad_norm": 0.4740351140499115, "learning_rate": 1.9963603920315655e-06, "loss": 0.0765, "step": 6610 }, { "epoch": 2.142255346727155, "grad_norm": 0.47260332107543945, "learning_rate": 1.994962158623701e-06, "loss": 0.0737, "step": 6611 }, { "epoch": 2.1425793907971484, "grad_norm": 0.49404194951057434, "learning_rate": 1.9935642930033786e-06, "loss": 0.0716, "step": 6612 }, { "epoch": 2.142903434867142, "grad_norm": 0.4638146162033081, "learning_rate": 1.9921667953416796e-06, "loss": 0.069, "step": 6613 }, { "epoch": 2.1432274789371353, "grad_norm": 0.44174426794052124, "learning_rate": 1.9907696658096496e-06, "loss": 0.0665, "step": 6614 }, { "epoch": 2.143551523007129, "grad_norm": 0.5062809586524963, "learning_rate": 1.989372904578278e-06, "loss": 0.0769, "step": 6615 }, { "epoch": 2.1438755670771226, "grad_norm": 0.5006329417228699, "learning_rate": 1.987976511818521e-06, "loss": 0.0804, "step": 6616 }, { "epoch": 2.144199611147116, "grad_norm": 0.4906056225299835, "learning_rate": 1.986580487701276e-06, "loss": 0.0757, "step": 6617 }, { "epoch": 2.1445236552171094, "grad_norm": 0.4586811363697052, "learning_rate": 1.9851848323974114e-06, "loss": 0.0703, "step": 6618 }, { "epoch": 2.144847699287103, "grad_norm": 0.48461246490478516, "learning_rate": 1.9837895460777364e-06, "loss": 0.0729, "step": 6619 }, { "epoch": 2.1451717433570967, "grad_norm": 0.49435991048812866, "learning_rate": 1.982394628913021e-06, "loss": 0.0809, "step": 6620 }, { "epoch": 2.14549578742709, "grad_norm": 0.4677877724170685, "learning_rate": 1.981000081073992e-06, "loss": 0.078, "step": 6621 }, { "epoch": 2.1458198314970836, "grad_norm": 0.4829846918582916, "learning_rate": 1.9796059027313237e-06, "loss": 0.0666, "step": 6622 }, { "epoch": 2.146143875567077, "grad_norm": 0.5388849973678589, "learning_rate": 1.9782120940556573e-06, "loss": 0.0784, "step": 6623 }, { "epoch": 2.146467919637071, "grad_norm": 0.483267217874527, "learning_rate": 1.9768186552175743e-06, "loss": 0.0732, "step": 6624 }, { "epoch": 2.1467919637070643, "grad_norm": 0.4950624108314514, "learning_rate": 1.9754255863876222e-06, "loss": 0.0756, "step": 6625 }, { "epoch": 2.1471160077770577, "grad_norm": 0.4850957989692688, "learning_rate": 1.974032887736298e-06, "loss": 0.074, "step": 6626 }, { "epoch": 2.147440051847051, "grad_norm": 0.4783896803855896, "learning_rate": 1.9726405594340547e-06, "loss": 0.0726, "step": 6627 }, { "epoch": 2.1477640959170445, "grad_norm": 0.4619564712047577, "learning_rate": 1.9712486016513e-06, "loss": 0.071, "step": 6628 }, { "epoch": 2.1480881399870384, "grad_norm": 0.49050572514533997, "learning_rate": 1.9698570145583956e-06, "loss": 0.0733, "step": 6629 }, { "epoch": 2.148412184057032, "grad_norm": 0.4600142538547516, "learning_rate": 1.96846579832566e-06, "loss": 0.0691, "step": 6630 }, { "epoch": 2.1487362281270252, "grad_norm": 0.5022426247596741, "learning_rate": 1.9670749531233617e-06, "loss": 0.0742, "step": 6631 }, { "epoch": 2.1490602721970187, "grad_norm": 0.45566675066947937, "learning_rate": 1.965684479121728e-06, "loss": 0.07, "step": 6632 }, { "epoch": 2.149384316267012, "grad_norm": 0.46745166182518005, "learning_rate": 1.9642943764909406e-06, "loss": 0.0692, "step": 6633 }, { "epoch": 2.149708360337006, "grad_norm": 0.46203088760375977, "learning_rate": 1.9629046454011325e-06, "loss": 0.0724, "step": 6634 }, { "epoch": 2.1500324044069994, "grad_norm": 0.46705809235572815, "learning_rate": 1.961515286022395e-06, "loss": 0.0753, "step": 6635 }, { "epoch": 2.150356448476993, "grad_norm": 0.46384093165397644, "learning_rate": 1.9601262985247733e-06, "loss": 0.0639, "step": 6636 }, { "epoch": 2.1506804925469862, "grad_norm": 0.4611685276031494, "learning_rate": 1.9587376830782608e-06, "loss": 0.0672, "step": 6637 }, { "epoch": 2.15100453661698, "grad_norm": 0.4918757975101471, "learning_rate": 1.9573494398528175e-06, "loss": 0.0716, "step": 6638 }, { "epoch": 2.1513285806869735, "grad_norm": 0.4503691792488098, "learning_rate": 1.9559615690183444e-06, "loss": 0.0698, "step": 6639 }, { "epoch": 2.151652624756967, "grad_norm": 0.45643165707588196, "learning_rate": 1.95457407074471e-06, "loss": 0.0683, "step": 6640 }, { "epoch": 2.1519766688269604, "grad_norm": 0.4970737099647522, "learning_rate": 1.953186945201726e-06, "loss": 0.0753, "step": 6641 }, { "epoch": 2.152300712896954, "grad_norm": 0.48359444737434387, "learning_rate": 1.951800192559164e-06, "loss": 0.0747, "step": 6642 }, { "epoch": 2.1526247569669477, "grad_norm": 0.49781110882759094, "learning_rate": 1.9504138129867516e-06, "loss": 0.0765, "step": 6643 }, { "epoch": 2.152948801036941, "grad_norm": 0.4743006229400635, "learning_rate": 1.9490278066541624e-06, "loss": 0.0764, "step": 6644 }, { "epoch": 2.1532728451069345, "grad_norm": 0.4961163401603699, "learning_rate": 1.9476421737310375e-06, "loss": 0.0782, "step": 6645 }, { "epoch": 2.153596889176928, "grad_norm": 0.5054019093513489, "learning_rate": 1.946256914386958e-06, "loss": 0.0724, "step": 6646 }, { "epoch": 2.153920933246922, "grad_norm": 0.5000643134117126, "learning_rate": 1.9448720287914735e-06, "loss": 0.0776, "step": 6647 }, { "epoch": 2.154244977316915, "grad_norm": 0.47631070017814636, "learning_rate": 1.943487517114075e-06, "loss": 0.071, "step": 6648 }, { "epoch": 2.1545690213869086, "grad_norm": 0.5166507959365845, "learning_rate": 1.9421033795242144e-06, "loss": 0.0804, "step": 6649 }, { "epoch": 2.154893065456902, "grad_norm": 0.46033474802970886, "learning_rate": 1.9407196161912976e-06, "loss": 0.069, "step": 6650 }, { "epoch": 2.1552171095268955, "grad_norm": 0.45443040132522583, "learning_rate": 1.9393362272846844e-06, "loss": 0.0714, "step": 6651 }, { "epoch": 2.1555411535968894, "grad_norm": 0.45767298340797424, "learning_rate": 1.937953212973687e-06, "loss": 0.074, "step": 6652 }, { "epoch": 2.155865197666883, "grad_norm": 0.5098868608474731, "learning_rate": 1.936570573427573e-06, "loss": 0.073, "step": 6653 }, { "epoch": 2.156189241736876, "grad_norm": 0.48986905813217163, "learning_rate": 1.9351883088155666e-06, "loss": 0.0773, "step": 6654 }, { "epoch": 2.1565132858068696, "grad_norm": 0.44977831840515137, "learning_rate": 1.93380641930684e-06, "loss": 0.0679, "step": 6655 }, { "epoch": 2.156837329876863, "grad_norm": 0.45664796233177185, "learning_rate": 1.932424905070524e-06, "loss": 0.0713, "step": 6656 }, { "epoch": 2.157161373946857, "grad_norm": 0.49763405323028564, "learning_rate": 1.9310437662757037e-06, "loss": 0.075, "step": 6657 }, { "epoch": 2.1574854180168503, "grad_norm": 0.49584758281707764, "learning_rate": 1.9296630030914165e-06, "loss": 0.0805, "step": 6658 }, { "epoch": 2.1578094620868438, "grad_norm": 0.4893285930156708, "learning_rate": 1.928282615686655e-06, "loss": 0.0717, "step": 6659 }, { "epoch": 2.158133506156837, "grad_norm": 0.4704589545726776, "learning_rate": 1.926902604230364e-06, "loss": 0.0683, "step": 6660 }, { "epoch": 2.158457550226831, "grad_norm": 0.45831263065338135, "learning_rate": 1.9255229688914445e-06, "loss": 0.0667, "step": 6661 }, { "epoch": 2.1587815942968245, "grad_norm": 0.47325196862220764, "learning_rate": 1.9241437098387528e-06, "loss": 0.0739, "step": 6662 }, { "epoch": 2.159105638366818, "grad_norm": 0.4823969900608063, "learning_rate": 1.922764827241092e-06, "loss": 0.0716, "step": 6663 }, { "epoch": 2.1594296824368113, "grad_norm": 0.4870539605617523, "learning_rate": 1.921386321267227e-06, "loss": 0.0765, "step": 6664 }, { "epoch": 2.1597537265068047, "grad_norm": 0.47531864047050476, "learning_rate": 1.920008192085872e-06, "loss": 0.0743, "step": 6665 }, { "epoch": 2.1600777705767986, "grad_norm": 0.46033617854118347, "learning_rate": 1.9186304398656987e-06, "loss": 0.0643, "step": 6666 }, { "epoch": 2.160401814646792, "grad_norm": 0.47263818979263306, "learning_rate": 1.9172530647753306e-06, "loss": 0.0752, "step": 6667 }, { "epoch": 2.1607258587167855, "grad_norm": 0.5137720108032227, "learning_rate": 1.915876066983341e-06, "loss": 0.0811, "step": 6668 }, { "epoch": 2.161049902786779, "grad_norm": 0.4592183232307434, "learning_rate": 1.9144994466582674e-06, "loss": 0.069, "step": 6669 }, { "epoch": 2.1613739468567728, "grad_norm": 0.5136011838912964, "learning_rate": 1.913123203968588e-06, "loss": 0.0833, "step": 6670 }, { "epoch": 2.161697990926766, "grad_norm": 0.4686625897884369, "learning_rate": 1.911747339082749e-06, "loss": 0.0744, "step": 6671 }, { "epoch": 2.1620220349967596, "grad_norm": 0.4724651575088501, "learning_rate": 1.910371852169137e-06, "loss": 0.0761, "step": 6672 }, { "epoch": 2.162346079066753, "grad_norm": 0.4784649610519409, "learning_rate": 1.908996743396101e-06, "loss": 0.0701, "step": 6673 }, { "epoch": 2.1626701231367464, "grad_norm": 0.476528525352478, "learning_rate": 1.90762201293194e-06, "loss": 0.0733, "step": 6674 }, { "epoch": 2.1629941672067403, "grad_norm": 0.4567173719406128, "learning_rate": 1.9062476609449075e-06, "loss": 0.07, "step": 6675 }, { "epoch": 2.1633182112767337, "grad_norm": 0.466298371553421, "learning_rate": 1.9048736876032142e-06, "loss": 0.0698, "step": 6676 }, { "epoch": 2.163642255346727, "grad_norm": 0.48197922110557556, "learning_rate": 1.9035000930750142e-06, "loss": 0.075, "step": 6677 }, { "epoch": 2.1639662994167206, "grad_norm": 0.460759699344635, "learning_rate": 1.9021268775284301e-06, "loss": 0.0694, "step": 6678 }, { "epoch": 2.164290343486714, "grad_norm": 0.488559752702713, "learning_rate": 1.900754041131525e-06, "loss": 0.0747, "step": 6679 }, { "epoch": 2.164614387556708, "grad_norm": 0.48355886340141296, "learning_rate": 1.8993815840523217e-06, "loss": 0.0712, "step": 6680 }, { "epoch": 2.1649384316267013, "grad_norm": 0.49874863028526306, "learning_rate": 1.8980095064587967e-06, "loss": 0.0736, "step": 6681 }, { "epoch": 2.1652624756966947, "grad_norm": 0.4778255522251129, "learning_rate": 1.896637808518878e-06, "loss": 0.0744, "step": 6682 }, { "epoch": 2.165586519766688, "grad_norm": 0.4567441940307617, "learning_rate": 1.895266490400449e-06, "loss": 0.0712, "step": 6683 }, { "epoch": 2.1659105638366816, "grad_norm": 0.46725553274154663, "learning_rate": 1.8938955522713455e-06, "loss": 0.0741, "step": 6684 }, { "epoch": 2.1662346079066754, "grad_norm": 0.48114606738090515, "learning_rate": 1.8925249942993585e-06, "loss": 0.0707, "step": 6685 }, { "epoch": 2.166558651976669, "grad_norm": 0.5054229497909546, "learning_rate": 1.8911548166522276e-06, "loss": 0.0809, "step": 6686 }, { "epoch": 2.1668826960466623, "grad_norm": 0.4752826690673828, "learning_rate": 1.8897850194976514e-06, "loss": 0.0684, "step": 6687 }, { "epoch": 2.1672067401166557, "grad_norm": 0.4631727635860443, "learning_rate": 1.8884156030032797e-06, "loss": 0.0724, "step": 6688 }, { "epoch": 2.1675307841866496, "grad_norm": 0.4974950850009918, "learning_rate": 1.8870465673367154e-06, "loss": 0.0802, "step": 6689 }, { "epoch": 2.167854828256643, "grad_norm": 0.5214879512786865, "learning_rate": 1.885677912665516e-06, "loss": 0.0822, "step": 6690 }, { "epoch": 2.1681788723266364, "grad_norm": 0.5338072180747986, "learning_rate": 1.8843096391571924e-06, "loss": 0.0792, "step": 6691 }, { "epoch": 2.16850291639663, "grad_norm": 0.4919593036174774, "learning_rate": 1.8829417469792038e-06, "loss": 0.076, "step": 6692 }, { "epoch": 2.1688269604666233, "grad_norm": 0.5080357789993286, "learning_rate": 1.881574236298973e-06, "loss": 0.0753, "step": 6693 }, { "epoch": 2.169151004536617, "grad_norm": 0.492123007774353, "learning_rate": 1.8802071072838652e-06, "loss": 0.0801, "step": 6694 }, { "epoch": 2.1694750486066106, "grad_norm": 0.4710664451122284, "learning_rate": 1.8788403601012056e-06, "loss": 0.0704, "step": 6695 }, { "epoch": 2.169799092676604, "grad_norm": 0.4927854835987091, "learning_rate": 1.8774739949182707e-06, "loss": 0.0742, "step": 6696 }, { "epoch": 2.1701231367465974, "grad_norm": 0.49946191906929016, "learning_rate": 1.8761080119022907e-06, "loss": 0.0736, "step": 6697 }, { "epoch": 2.1704471808165913, "grad_norm": 0.48680731654167175, "learning_rate": 1.8747424112204499e-06, "loss": 0.0765, "step": 6698 }, { "epoch": 2.1707712248865847, "grad_norm": 0.5381428599357605, "learning_rate": 1.8733771930398797e-06, "loss": 0.0859, "step": 6699 }, { "epoch": 2.171095268956578, "grad_norm": 0.4549349248409271, "learning_rate": 1.8720123575276766e-06, "loss": 0.0707, "step": 6700 }, { "epoch": 2.1714193130265715, "grad_norm": 0.45881056785583496, "learning_rate": 1.8706479048508764e-06, "loss": 0.0723, "step": 6701 }, { "epoch": 2.171743357096565, "grad_norm": 0.4502636194229126, "learning_rate": 1.8692838351764814e-06, "loss": 0.0691, "step": 6702 }, { "epoch": 2.172067401166559, "grad_norm": 0.47965043783187866, "learning_rate": 1.8679201486714354e-06, "loss": 0.0708, "step": 6703 }, { "epoch": 2.1723914452365523, "grad_norm": 0.46118342876434326, "learning_rate": 1.8665568455026424e-06, "loss": 0.0713, "step": 6704 }, { "epoch": 2.1727154893065457, "grad_norm": 0.4682093560695648, "learning_rate": 1.8651939258369577e-06, "loss": 0.071, "step": 6705 }, { "epoch": 2.173039533376539, "grad_norm": 0.49599429965019226, "learning_rate": 1.8638313898411885e-06, "loss": 0.0754, "step": 6706 }, { "epoch": 2.1733635774465325, "grad_norm": 0.4504205584526062, "learning_rate": 1.8624692376820992e-06, "loss": 0.0699, "step": 6707 }, { "epoch": 2.1736876215165264, "grad_norm": 0.4703887701034546, "learning_rate": 1.861107469526398e-06, "loss": 0.0705, "step": 6708 }, { "epoch": 2.17401166558652, "grad_norm": 0.48415201902389526, "learning_rate": 1.859746085540759e-06, "loss": 0.0743, "step": 6709 }, { "epoch": 2.1743357096565132, "grad_norm": 0.4921909272670746, "learning_rate": 1.8583850858917974e-06, "loss": 0.0757, "step": 6710 }, { "epoch": 2.1746597537265067, "grad_norm": 0.4651033282279968, "learning_rate": 1.8570244707460878e-06, "loss": 0.0697, "step": 6711 }, { "epoch": 2.1749837977965005, "grad_norm": 0.48125144839286804, "learning_rate": 1.8556642402701569e-06, "loss": 0.0727, "step": 6712 }, { "epoch": 2.175307841866494, "grad_norm": 0.4617149531841278, "learning_rate": 1.8543043946304835e-06, "loss": 0.0711, "step": 6713 }, { "epoch": 2.1756318859364874, "grad_norm": 0.5271691083908081, "learning_rate": 1.8529449339934997e-06, "loss": 0.0783, "step": 6714 }, { "epoch": 2.175955930006481, "grad_norm": 0.5199576616287231, "learning_rate": 1.8515858585255913e-06, "loss": 0.0839, "step": 6715 }, { "epoch": 2.176279974076474, "grad_norm": 0.5006012916564941, "learning_rate": 1.8502271683930933e-06, "loss": 0.0768, "step": 6716 }, { "epoch": 2.176604018146468, "grad_norm": 0.47408223152160645, "learning_rate": 1.8488688637622981e-06, "loss": 0.0722, "step": 6717 }, { "epoch": 2.1769280622164615, "grad_norm": 0.44292151927948, "learning_rate": 1.8475109447994483e-06, "loss": 0.0697, "step": 6718 }, { "epoch": 2.177252106286455, "grad_norm": 0.4794071316719055, "learning_rate": 1.8461534116707403e-06, "loss": 0.074, "step": 6719 }, { "epoch": 2.1775761503564484, "grad_norm": 0.4785098433494568, "learning_rate": 1.8447962645423233e-06, "loss": 0.0749, "step": 6720 }, { "epoch": 2.1779001944264422, "grad_norm": 0.49060362577438354, "learning_rate": 1.8434395035802987e-06, "loss": 0.0701, "step": 6721 }, { "epoch": 2.1782242384964356, "grad_norm": 0.4688788056373596, "learning_rate": 1.842083128950723e-06, "loss": 0.0697, "step": 6722 }, { "epoch": 2.178548282566429, "grad_norm": 0.45464015007019043, "learning_rate": 1.8407271408195975e-06, "loss": 0.0686, "step": 6723 }, { "epoch": 2.1788723266364225, "grad_norm": 0.4882039725780487, "learning_rate": 1.8393715393528893e-06, "loss": 0.0739, "step": 6724 }, { "epoch": 2.179196370706416, "grad_norm": 0.4844962954521179, "learning_rate": 1.8380163247165062e-06, "loss": 0.0765, "step": 6725 }, { "epoch": 2.17952041477641, "grad_norm": 0.46952706575393677, "learning_rate": 1.8366614970763142e-06, "loss": 0.0717, "step": 6726 }, { "epoch": 2.179844458846403, "grad_norm": 0.5064206123352051, "learning_rate": 1.8353070565981313e-06, "loss": 0.0794, "step": 6727 }, { "epoch": 2.1801685029163966, "grad_norm": 0.5096865296363831, "learning_rate": 1.8339530034477283e-06, "loss": 0.0761, "step": 6728 }, { "epoch": 2.18049254698639, "grad_norm": 0.46830105781555176, "learning_rate": 1.8325993377908296e-06, "loss": 0.0728, "step": 6729 }, { "epoch": 2.1808165910563835, "grad_norm": 0.4919731318950653, "learning_rate": 1.8312460597931058e-06, "loss": 0.0742, "step": 6730 }, { "epoch": 2.1811406351263773, "grad_norm": 0.4882954955101013, "learning_rate": 1.8298931696201915e-06, "loss": 0.0746, "step": 6731 }, { "epoch": 2.1814646791963708, "grad_norm": 0.4879685640335083, "learning_rate": 1.828540667437661e-06, "loss": 0.0745, "step": 6732 }, { "epoch": 2.181788723266364, "grad_norm": 0.457216739654541, "learning_rate": 1.8271885534110544e-06, "loss": 0.0714, "step": 6733 }, { "epoch": 2.1821127673363576, "grad_norm": 0.46054744720458984, "learning_rate": 1.8258368277058519e-06, "loss": 0.0719, "step": 6734 }, { "epoch": 2.182436811406351, "grad_norm": 0.49528566002845764, "learning_rate": 1.824485490487493e-06, "loss": 0.0709, "step": 6735 }, { "epoch": 2.182760855476345, "grad_norm": 0.5130648612976074, "learning_rate": 1.8231345419213692e-06, "loss": 0.0779, "step": 6736 }, { "epoch": 2.1830848995463383, "grad_norm": 0.5079078674316406, "learning_rate": 1.8217839821728222e-06, "loss": 0.0798, "step": 6737 }, { "epoch": 2.1834089436163318, "grad_norm": 0.5093557834625244, "learning_rate": 1.8204338114071506e-06, "loss": 0.0769, "step": 6738 }, { "epoch": 2.183732987686325, "grad_norm": 0.46685534715652466, "learning_rate": 1.8190840297895968e-06, "loss": 0.0693, "step": 6739 }, { "epoch": 2.184057031756319, "grad_norm": 0.44228246808052063, "learning_rate": 1.8177346374853672e-06, "loss": 0.0691, "step": 6740 }, { "epoch": 2.1843810758263125, "grad_norm": 0.4679490327835083, "learning_rate": 1.8163856346596092e-06, "loss": 0.0696, "step": 6741 }, { "epoch": 2.184705119896306, "grad_norm": 0.5041935443878174, "learning_rate": 1.8150370214774298e-06, "loss": 0.0772, "step": 6742 }, { "epoch": 2.1850291639662993, "grad_norm": 0.478503555059433, "learning_rate": 1.8136887981038864e-06, "loss": 0.0755, "step": 6743 }, { "epoch": 2.1853532080362927, "grad_norm": 0.46148279309272766, "learning_rate": 1.8123409647039885e-06, "loss": 0.0753, "step": 6744 }, { "epoch": 2.1856772521062866, "grad_norm": 0.5056387782096863, "learning_rate": 1.8109935214426971e-06, "loss": 0.0778, "step": 6745 }, { "epoch": 2.18600129617628, "grad_norm": 0.44552767276763916, "learning_rate": 1.8096464684849285e-06, "loss": 0.0715, "step": 6746 }, { "epoch": 2.1863253402462735, "grad_norm": 0.46720853447914124, "learning_rate": 1.808299805995546e-06, "loss": 0.0753, "step": 6747 }, { "epoch": 2.186649384316267, "grad_norm": 0.46596813201904297, "learning_rate": 1.8069535341393685e-06, "loss": 0.0778, "step": 6748 }, { "epoch": 2.1869734283862607, "grad_norm": 0.4803813695907593, "learning_rate": 1.8056076530811672e-06, "loss": 0.0788, "step": 6749 }, { "epoch": 2.187297472456254, "grad_norm": 0.4746699333190918, "learning_rate": 1.8042621629856656e-06, "loss": 0.0732, "step": 6750 }, { "epoch": 2.1876215165262476, "grad_norm": 0.4526144564151764, "learning_rate": 1.802917064017538e-06, "loss": 0.0706, "step": 6751 }, { "epoch": 2.187945560596241, "grad_norm": 0.4581446051597595, "learning_rate": 1.8015723563414112e-06, "loss": 0.0718, "step": 6752 }, { "epoch": 2.1882696046662344, "grad_norm": 0.4723081886768341, "learning_rate": 1.8002280401218669e-06, "loss": 0.0736, "step": 6753 }, { "epoch": 2.1885936487362283, "grad_norm": 0.5154812932014465, "learning_rate": 1.798884115523431e-06, "loss": 0.0787, "step": 6754 }, { "epoch": 2.1889176928062217, "grad_norm": 0.49709799885749817, "learning_rate": 1.7975405827105929e-06, "loss": 0.079, "step": 6755 }, { "epoch": 2.189241736876215, "grad_norm": 0.46235930919647217, "learning_rate": 1.7961974418477845e-06, "loss": 0.0696, "step": 6756 }, { "epoch": 2.1895657809462086, "grad_norm": 0.5224365592002869, "learning_rate": 1.7948546930993932e-06, "loss": 0.0736, "step": 6757 }, { "epoch": 2.189889825016202, "grad_norm": 0.4718781113624573, "learning_rate": 1.7935123366297596e-06, "loss": 0.0694, "step": 6758 }, { "epoch": 2.190213869086196, "grad_norm": 0.4754774570465088, "learning_rate": 1.7921703726031748e-06, "loss": 0.0672, "step": 6759 }, { "epoch": 2.1905379131561893, "grad_norm": 0.47679826617240906, "learning_rate": 1.7908288011838843e-06, "loss": 0.0711, "step": 6760 }, { "epoch": 2.1908619572261827, "grad_norm": 0.5035863518714905, "learning_rate": 1.7894876225360774e-06, "loss": 0.0759, "step": 6761 }, { "epoch": 2.191186001296176, "grad_norm": 0.4582345485687256, "learning_rate": 1.7881468368239085e-06, "loss": 0.0689, "step": 6762 }, { "epoch": 2.19151004536617, "grad_norm": 0.44637078046798706, "learning_rate": 1.7868064442114707e-06, "loss": 0.0673, "step": 6763 }, { "epoch": 2.1918340894361634, "grad_norm": 0.47751113772392273, "learning_rate": 1.7854664448628211e-06, "loss": 0.0731, "step": 6764 }, { "epoch": 2.192158133506157, "grad_norm": 0.4775833785533905, "learning_rate": 1.784126838941958e-06, "loss": 0.0722, "step": 6765 }, { "epoch": 2.1924821775761503, "grad_norm": 0.49275335669517517, "learning_rate": 1.782787626612838e-06, "loss": 0.0722, "step": 6766 }, { "epoch": 2.1928062216461437, "grad_norm": 0.44845327734947205, "learning_rate": 1.7814488080393672e-06, "loss": 0.0682, "step": 6767 }, { "epoch": 2.1931302657161376, "grad_norm": 0.4543963372707367, "learning_rate": 1.7801103833854044e-06, "loss": 0.0692, "step": 6768 }, { "epoch": 2.193454309786131, "grad_norm": 0.5207314491271973, "learning_rate": 1.7787723528147615e-06, "loss": 0.0782, "step": 6769 }, { "epoch": 2.1937783538561244, "grad_norm": 0.44605255126953125, "learning_rate": 1.777434716491197e-06, "loss": 0.0648, "step": 6770 }, { "epoch": 2.194102397926118, "grad_norm": 0.4934347867965698, "learning_rate": 1.776097474578426e-06, "loss": 0.0738, "step": 6771 }, { "epoch": 2.1944264419961117, "grad_norm": 0.48282739520072937, "learning_rate": 1.7747606272401147e-06, "loss": 0.0747, "step": 6772 }, { "epoch": 2.194750486066105, "grad_norm": 0.4789862632751465, "learning_rate": 1.77342417463988e-06, "loss": 0.0734, "step": 6773 }, { "epoch": 2.1950745301360985, "grad_norm": 0.46827223896980286, "learning_rate": 1.7720881169412902e-06, "loss": 0.0698, "step": 6774 }, { "epoch": 2.195398574206092, "grad_norm": 0.505933940410614, "learning_rate": 1.7707524543078664e-06, "loss": 0.0713, "step": 6775 }, { "epoch": 2.1957226182760854, "grad_norm": 0.4550766944885254, "learning_rate": 1.7694171869030807e-06, "loss": 0.0654, "step": 6776 }, { "epoch": 2.1960466623460793, "grad_norm": 0.48559170961380005, "learning_rate": 1.7680823148903585e-06, "loss": 0.0709, "step": 6777 }, { "epoch": 2.1963707064160727, "grad_norm": 0.4962014853954315, "learning_rate": 1.7667478384330704e-06, "loss": 0.0708, "step": 6778 }, { "epoch": 2.196694750486066, "grad_norm": 0.42602312564849854, "learning_rate": 1.7654137576945502e-06, "loss": 0.0626, "step": 6779 }, { "epoch": 2.1970187945560595, "grad_norm": 0.47422635555267334, "learning_rate": 1.7640800728380702e-06, "loss": 0.0725, "step": 6780 }, { "epoch": 2.197342838626053, "grad_norm": 0.4561498165130615, "learning_rate": 1.7627467840268642e-06, "loss": 0.0708, "step": 6781 }, { "epoch": 2.197666882696047, "grad_norm": 0.495524138212204, "learning_rate": 1.7614138914241141e-06, "loss": 0.0757, "step": 6782 }, { "epoch": 2.1979909267660402, "grad_norm": 0.5002186894416809, "learning_rate": 1.760081395192948e-06, "loss": 0.0776, "step": 6783 }, { "epoch": 2.1983149708360337, "grad_norm": 0.48374322056770325, "learning_rate": 1.758749295496458e-06, "loss": 0.0756, "step": 6784 }, { "epoch": 2.198639014906027, "grad_norm": 0.4546158015727997, "learning_rate": 1.7574175924976733e-06, "loss": 0.0692, "step": 6785 }, { "epoch": 2.1989630589760205, "grad_norm": 0.544314444065094, "learning_rate": 1.7560862863595873e-06, "loss": 0.0828, "step": 6786 }, { "epoch": 2.1992871030460144, "grad_norm": 0.4958459734916687, "learning_rate": 1.7547553772451336e-06, "loss": 0.0755, "step": 6787 }, { "epoch": 2.199611147116008, "grad_norm": 0.4743732213973999, "learning_rate": 1.7534248653172087e-06, "loss": 0.0719, "step": 6788 }, { "epoch": 2.1999351911860012, "grad_norm": 0.5188460350036621, "learning_rate": 1.7520947507386487e-06, "loss": 0.0794, "step": 6789 }, { "epoch": 2.2002592352559946, "grad_norm": 0.47565385699272156, "learning_rate": 1.7507650336722497e-06, "loss": 0.0707, "step": 6790 }, { "epoch": 2.2005832793259885, "grad_norm": 0.48570117354393005, "learning_rate": 1.7494357142807572e-06, "loss": 0.0739, "step": 6791 }, { "epoch": 2.200907323395982, "grad_norm": 0.4817291796207428, "learning_rate": 1.748106792726862e-06, "loss": 0.072, "step": 6792 }, { "epoch": 2.2012313674659754, "grad_norm": 0.4850255250930786, "learning_rate": 1.7467782691732176e-06, "loss": 0.0717, "step": 6793 }, { "epoch": 2.201555411535969, "grad_norm": 0.4843173623085022, "learning_rate": 1.7454501437824178e-06, "loss": 0.0724, "step": 6794 }, { "epoch": 2.201879455605962, "grad_norm": 0.5217100977897644, "learning_rate": 1.744122416717014e-06, "loss": 0.0758, "step": 6795 }, { "epoch": 2.202203499675956, "grad_norm": 0.4569748044013977, "learning_rate": 1.7427950881395072e-06, "loss": 0.07, "step": 6796 }, { "epoch": 2.2025275437459495, "grad_norm": 0.48870840668678284, "learning_rate": 1.7414681582123493e-06, "loss": 0.0742, "step": 6797 }, { "epoch": 2.202851587815943, "grad_norm": 0.5258080363273621, "learning_rate": 1.7401416270979443e-06, "loss": 0.0809, "step": 6798 }, { "epoch": 2.2031756318859363, "grad_norm": 0.471910685300827, "learning_rate": 1.7388154949586455e-06, "loss": 0.0717, "step": 6799 }, { "epoch": 2.20349967595593, "grad_norm": 0.48028430342674255, "learning_rate": 1.7374897619567598e-06, "loss": 0.0779, "step": 6800 }, { "epoch": 2.2038237200259236, "grad_norm": 0.4923180043697357, "learning_rate": 1.7361644282545454e-06, "loss": 0.0774, "step": 6801 }, { "epoch": 2.204147764095917, "grad_norm": 0.46793532371520996, "learning_rate": 1.7348394940142067e-06, "loss": 0.0687, "step": 6802 }, { "epoch": 2.2044718081659105, "grad_norm": 0.5037127733230591, "learning_rate": 1.7335149593979051e-06, "loss": 0.0789, "step": 6803 }, { "epoch": 2.204795852235904, "grad_norm": 0.4806075990200043, "learning_rate": 1.73219082456775e-06, "loss": 0.0726, "step": 6804 }, { "epoch": 2.2051198963058978, "grad_norm": 0.49096283316612244, "learning_rate": 1.7308670896858032e-06, "loss": 0.0717, "step": 6805 }, { "epoch": 2.205443940375891, "grad_norm": 0.44644638895988464, "learning_rate": 1.729543754914077e-06, "loss": 0.0691, "step": 6806 }, { "epoch": 2.2057679844458846, "grad_norm": 0.4400399923324585, "learning_rate": 1.7282208204145351e-06, "loss": 0.0671, "step": 6807 }, { "epoch": 2.206092028515878, "grad_norm": 0.4830164313316345, "learning_rate": 1.726898286349093e-06, "loss": 0.073, "step": 6808 }, { "epoch": 2.2064160725858715, "grad_norm": 0.4604962468147278, "learning_rate": 1.725576152879611e-06, "loss": 0.0694, "step": 6809 }, { "epoch": 2.2067401166558653, "grad_norm": 0.4484296143054962, "learning_rate": 1.7242544201679124e-06, "loss": 0.0671, "step": 6810 }, { "epoch": 2.2070641607258588, "grad_norm": 0.4855106472969055, "learning_rate": 1.7229330883757595e-06, "loss": 0.0673, "step": 6811 }, { "epoch": 2.207388204795852, "grad_norm": 0.5392202138900757, "learning_rate": 1.721612157664872e-06, "loss": 0.0866, "step": 6812 }, { "epoch": 2.2077122488658456, "grad_norm": 0.4744535982608795, "learning_rate": 1.7202916281969212e-06, "loss": 0.0723, "step": 6813 }, { "epoch": 2.2080362929358395, "grad_norm": 0.4794631600379944, "learning_rate": 1.7189715001335211e-06, "loss": 0.0678, "step": 6814 }, { "epoch": 2.208360337005833, "grad_norm": 0.4904131591320038, "learning_rate": 1.7176517736362502e-06, "loss": 0.0754, "step": 6815 }, { "epoch": 2.2086843810758263, "grad_norm": 0.48358476161956787, "learning_rate": 1.7163324488666233e-06, "loss": 0.078, "step": 6816 }, { "epoch": 2.2090084251458197, "grad_norm": 0.46504324674606323, "learning_rate": 1.7150135259861201e-06, "loss": 0.0696, "step": 6817 }, { "epoch": 2.209332469215813, "grad_norm": 0.459061861038208, "learning_rate": 1.7136950051561562e-06, "loss": 0.0698, "step": 6818 }, { "epoch": 2.209656513285807, "grad_norm": 0.492106169462204, "learning_rate": 1.7123768865381136e-06, "loss": 0.0801, "step": 6819 }, { "epoch": 2.2099805573558005, "grad_norm": 0.487063467502594, "learning_rate": 1.7110591702933111e-06, "loss": 0.0756, "step": 6820 }, { "epoch": 2.210304601425794, "grad_norm": 0.4428948163986206, "learning_rate": 1.709741856583027e-06, "loss": 0.0731, "step": 6821 }, { "epoch": 2.2106286454957873, "grad_norm": 0.5058284401893616, "learning_rate": 1.7084249455684876e-06, "loss": 0.08, "step": 6822 }, { "epoch": 2.210952689565781, "grad_norm": 0.4309074282646179, "learning_rate": 1.7071084374108704e-06, "loss": 0.0666, "step": 6823 }, { "epoch": 2.2112767336357746, "grad_norm": 0.47959843277931213, "learning_rate": 1.7057923322713038e-06, "loss": 0.073, "step": 6824 }, { "epoch": 2.211600777705768, "grad_norm": 0.46819719672203064, "learning_rate": 1.704476630310864e-06, "loss": 0.0698, "step": 6825 }, { "epoch": 2.2119248217757614, "grad_norm": 0.4914425015449524, "learning_rate": 1.7031613316905816e-06, "loss": 0.0678, "step": 6826 }, { "epoch": 2.212248865845755, "grad_norm": 0.5222875475883484, "learning_rate": 1.701846436571436e-06, "loss": 0.0738, "step": 6827 }, { "epoch": 2.2125729099157487, "grad_norm": 0.47440800070762634, "learning_rate": 1.7005319451143581e-06, "loss": 0.0756, "step": 6828 }, { "epoch": 2.212896953985742, "grad_norm": 0.46596425771713257, "learning_rate": 1.6992178574802288e-06, "loss": 0.0691, "step": 6829 }, { "epoch": 2.2132209980557356, "grad_norm": 0.4916023910045624, "learning_rate": 1.6979041738298796e-06, "loss": 0.0729, "step": 6830 }, { "epoch": 2.213545042125729, "grad_norm": 0.4842454791069031, "learning_rate": 1.6965908943240928e-06, "loss": 0.07, "step": 6831 }, { "epoch": 2.2138690861957224, "grad_norm": 0.44420167803764343, "learning_rate": 1.695278019123603e-06, "loss": 0.0656, "step": 6832 }, { "epoch": 2.2141931302657163, "grad_norm": 0.4698340892791748, "learning_rate": 1.6939655483890894e-06, "loss": 0.0741, "step": 6833 }, { "epoch": 2.2145171743357097, "grad_norm": 0.4900783896446228, "learning_rate": 1.692653482281188e-06, "loss": 0.075, "step": 6834 }, { "epoch": 2.214841218405703, "grad_norm": 0.4853631258010864, "learning_rate": 1.6913418209604825e-06, "loss": 0.0721, "step": 6835 }, { "epoch": 2.2151652624756966, "grad_norm": 0.460990846157074, "learning_rate": 1.6900305645875082e-06, "loss": 0.0741, "step": 6836 }, { "epoch": 2.21548930654569, "grad_norm": 0.44160157442092896, "learning_rate": 1.6887197133227512e-06, "loss": 0.0644, "step": 6837 }, { "epoch": 2.215813350615684, "grad_norm": 0.48136088252067566, "learning_rate": 1.6874092673266424e-06, "loss": 0.0726, "step": 6838 }, { "epoch": 2.2161373946856773, "grad_norm": 0.4865271747112274, "learning_rate": 1.6860992267595745e-06, "loss": 0.0779, "step": 6839 }, { "epoch": 2.2164614387556707, "grad_norm": 0.45788124203681946, "learning_rate": 1.6847895917818762e-06, "loss": 0.071, "step": 6840 }, { "epoch": 2.216785482825664, "grad_norm": 0.46521875262260437, "learning_rate": 1.683480362553842e-06, "loss": 0.0722, "step": 6841 }, { "epoch": 2.217109526895658, "grad_norm": 0.5691971182823181, "learning_rate": 1.6821715392357036e-06, "loss": 0.0859, "step": 6842 }, { "epoch": 2.2174335709656514, "grad_norm": 0.48135796189308167, "learning_rate": 1.6808631219876491e-06, "loss": 0.0742, "step": 6843 }, { "epoch": 2.217757615035645, "grad_norm": 0.4701908528804779, "learning_rate": 1.6795551109698171e-06, "loss": 0.0726, "step": 6844 }, { "epoch": 2.2180816591056383, "grad_norm": 0.5185319781303406, "learning_rate": 1.6782475063422947e-06, "loss": 0.0749, "step": 6845 }, { "epoch": 2.2184057031756317, "grad_norm": 0.47821444272994995, "learning_rate": 1.6769403082651225e-06, "loss": 0.0768, "step": 6846 }, { "epoch": 2.2187297472456255, "grad_norm": 0.5236374735832214, "learning_rate": 1.6756335168982834e-06, "loss": 0.0817, "step": 6847 }, { "epoch": 2.219053791315619, "grad_norm": 0.4434678256511688, "learning_rate": 1.674327132401723e-06, "loss": 0.0685, "step": 6848 }, { "epoch": 2.2193778353856124, "grad_norm": 0.4766329228878021, "learning_rate": 1.673021154935325e-06, "loss": 0.0682, "step": 6849 }, { "epoch": 2.219701879455606, "grad_norm": 0.5388830900192261, "learning_rate": 1.6717155846589294e-06, "loss": 0.0737, "step": 6850 }, { "epoch": 2.2200259235255997, "grad_norm": 0.5101863145828247, "learning_rate": 1.6704104217323268e-06, "loss": 0.0746, "step": 6851 }, { "epoch": 2.220349967595593, "grad_norm": 0.4756203293800354, "learning_rate": 1.669105666315255e-06, "loss": 0.0702, "step": 6852 }, { "epoch": 2.2206740116655865, "grad_norm": 0.48440685868263245, "learning_rate": 1.6678013185674041e-06, "loss": 0.0736, "step": 6853 }, { "epoch": 2.22099805573558, "grad_norm": 0.4911997318267822, "learning_rate": 1.666497378648414e-06, "loss": 0.0811, "step": 6854 }, { "epoch": 2.2213220998055734, "grad_norm": 0.45472726225852966, "learning_rate": 1.6651938467178751e-06, "loss": 0.0737, "step": 6855 }, { "epoch": 2.2216461438755672, "grad_norm": 0.48038211464881897, "learning_rate": 1.6638907229353252e-06, "loss": 0.0729, "step": 6856 }, { "epoch": 2.2219701879455607, "grad_norm": 0.44669100642204285, "learning_rate": 1.662588007460254e-06, "loss": 0.0682, "step": 6857 }, { "epoch": 2.222294232015554, "grad_norm": 0.5000935196876526, "learning_rate": 1.6612857004521022e-06, "loss": 0.0756, "step": 6858 }, { "epoch": 2.2226182760855475, "grad_norm": 0.4641658365726471, "learning_rate": 1.6599838020702592e-06, "loss": 0.0704, "step": 6859 }, { "epoch": 2.222942320155541, "grad_norm": 0.47990691661834717, "learning_rate": 1.6586823124740654e-06, "loss": 0.0701, "step": 6860 }, { "epoch": 2.223266364225535, "grad_norm": 0.49902471899986267, "learning_rate": 1.6573812318228116e-06, "loss": 0.074, "step": 6861 }, { "epoch": 2.2235904082955282, "grad_norm": 0.5331015586853027, "learning_rate": 1.6560805602757324e-06, "loss": 0.0788, "step": 6862 }, { "epoch": 2.2239144523655217, "grad_norm": 0.5091055035591125, "learning_rate": 1.654780297992024e-06, "loss": 0.0782, "step": 6863 }, { "epoch": 2.224238496435515, "grad_norm": 0.513282835483551, "learning_rate": 1.6534804451308224e-06, "loss": 0.081, "step": 6864 }, { "epoch": 2.224562540505509, "grad_norm": 0.4953143894672394, "learning_rate": 1.6521810018512163e-06, "loss": 0.0668, "step": 6865 }, { "epoch": 2.2248865845755024, "grad_norm": 0.5035532116889954, "learning_rate": 1.6508819683122468e-06, "loss": 0.0786, "step": 6866 }, { "epoch": 2.225210628645496, "grad_norm": 0.46315574645996094, "learning_rate": 1.649583344672902e-06, "loss": 0.0704, "step": 6867 }, { "epoch": 2.225534672715489, "grad_norm": 0.5127713084220886, "learning_rate": 1.6482851310921232e-06, "loss": 0.0763, "step": 6868 }, { "epoch": 2.2258587167854826, "grad_norm": 0.4451947808265686, "learning_rate": 1.646987327728794e-06, "loss": 0.0686, "step": 6869 }, { "epoch": 2.2261827608554765, "grad_norm": 0.474811315536499, "learning_rate": 1.6456899347417593e-06, "loss": 0.0754, "step": 6870 }, { "epoch": 2.22650680492547, "grad_norm": 0.496021568775177, "learning_rate": 1.6443929522898017e-06, "loss": 0.0759, "step": 6871 }, { "epoch": 2.2268308489954634, "grad_norm": 0.4854046404361725, "learning_rate": 1.6430963805316646e-06, "loss": 0.0758, "step": 6872 }, { "epoch": 2.2271548930654568, "grad_norm": 0.4593440890312195, "learning_rate": 1.6418002196260314e-06, "loss": 0.0686, "step": 6873 }, { "epoch": 2.2274789371354506, "grad_norm": 0.45711126923561096, "learning_rate": 1.640504469731542e-06, "loss": 0.0706, "step": 6874 }, { "epoch": 2.227802981205444, "grad_norm": 0.4680411219596863, "learning_rate": 1.6392091310067825e-06, "loss": 0.0739, "step": 6875 }, { "epoch": 2.2281270252754375, "grad_norm": 0.4617251455783844, "learning_rate": 1.6379142036102908e-06, "loss": 0.0709, "step": 6876 }, { "epoch": 2.228451069345431, "grad_norm": 0.44760486483573914, "learning_rate": 1.6366196877005541e-06, "loss": 0.072, "step": 6877 }, { "epoch": 2.2287751134154243, "grad_norm": 0.5043668746948242, "learning_rate": 1.635325583436005e-06, "loss": 0.0763, "step": 6878 }, { "epoch": 2.229099157485418, "grad_norm": 0.4933907091617584, "learning_rate": 1.6340318909750347e-06, "loss": 0.0772, "step": 6879 }, { "epoch": 2.2294232015554116, "grad_norm": 0.4615463316440582, "learning_rate": 1.6327386104759746e-06, "loss": 0.0714, "step": 6880 }, { "epoch": 2.229747245625405, "grad_norm": 0.47937801480293274, "learning_rate": 1.6314457420971107e-06, "loss": 0.0777, "step": 6881 }, { "epoch": 2.2300712896953985, "grad_norm": 0.46577373147010803, "learning_rate": 1.630153285996678e-06, "loss": 0.0685, "step": 6882 }, { "epoch": 2.230395333765392, "grad_norm": 0.4340088963508606, "learning_rate": 1.6288612423328604e-06, "loss": 0.0674, "step": 6883 }, { "epoch": 2.2307193778353858, "grad_norm": 0.5063320994377136, "learning_rate": 1.6275696112637918e-06, "loss": 0.0752, "step": 6884 }, { "epoch": 2.231043421905379, "grad_norm": 0.4855647683143616, "learning_rate": 1.6262783929475545e-06, "loss": 0.0775, "step": 6885 }, { "epoch": 2.2313674659753726, "grad_norm": 0.5098389983177185, "learning_rate": 1.624987587542184e-06, "loss": 0.0773, "step": 6886 }, { "epoch": 2.231691510045366, "grad_norm": 0.4832960069179535, "learning_rate": 1.6236971952056584e-06, "loss": 0.0722, "step": 6887 }, { "epoch": 2.2320155541153595, "grad_norm": 0.5732384324073792, "learning_rate": 1.6224072160959109e-06, "loss": 0.0742, "step": 6888 }, { "epoch": 2.2323395981853533, "grad_norm": 0.4994621276855469, "learning_rate": 1.621117650370822e-06, "loss": 0.0759, "step": 6889 }, { "epoch": 2.2326636422553467, "grad_norm": 0.4736066162586212, "learning_rate": 1.6198284981882234e-06, "loss": 0.0738, "step": 6890 }, { "epoch": 2.23298768632534, "grad_norm": 0.5226100087165833, "learning_rate": 1.618539759705894e-06, "loss": 0.0724, "step": 6891 }, { "epoch": 2.2333117303953336, "grad_norm": 0.4682101905345917, "learning_rate": 1.6172514350815638e-06, "loss": 0.0718, "step": 6892 }, { "epoch": 2.2336357744653275, "grad_norm": 0.46126919984817505, "learning_rate": 1.6159635244729077e-06, "loss": 0.0718, "step": 6893 }, { "epoch": 2.233959818535321, "grad_norm": 0.5003137588500977, "learning_rate": 1.614676028037559e-06, "loss": 0.0753, "step": 6894 }, { "epoch": 2.2342838626053143, "grad_norm": 0.48518186807632446, "learning_rate": 1.613388945933091e-06, "loss": 0.0754, "step": 6895 }, { "epoch": 2.2346079066753077, "grad_norm": 0.4596126675605774, "learning_rate": 1.6121022783170305e-06, "loss": 0.0708, "step": 6896 }, { "epoch": 2.234931950745301, "grad_norm": 0.5081773400306702, "learning_rate": 1.6108160253468542e-06, "loss": 0.0802, "step": 6897 }, { "epoch": 2.235255994815295, "grad_norm": 0.47247710824012756, "learning_rate": 1.6095301871799862e-06, "loss": 0.0769, "step": 6898 }, { "epoch": 2.2355800388852884, "grad_norm": 0.48782727122306824, "learning_rate": 1.608244763973803e-06, "loss": 0.0746, "step": 6899 }, { "epoch": 2.235904082955282, "grad_norm": 0.49604710936546326, "learning_rate": 1.6069597558856225e-06, "loss": 0.0709, "step": 6900 }, { "epoch": 2.2362281270252753, "grad_norm": 0.47329846024513245, "learning_rate": 1.605675163072724e-06, "loss": 0.0761, "step": 6901 }, { "epoch": 2.236552171095269, "grad_norm": 0.4849427342414856, "learning_rate": 1.6043909856923222e-06, "loss": 0.0703, "step": 6902 }, { "epoch": 2.2368762151652626, "grad_norm": 0.48779815435409546, "learning_rate": 1.6031072239015954e-06, "loss": 0.0764, "step": 6903 }, { "epoch": 2.237200259235256, "grad_norm": 0.4910009503364563, "learning_rate": 1.6018238778576583e-06, "loss": 0.0695, "step": 6904 }, { "epoch": 2.2375243033052494, "grad_norm": 0.47566890716552734, "learning_rate": 1.6005409477175821e-06, "loss": 0.0713, "step": 6905 }, { "epoch": 2.237848347375243, "grad_norm": 0.4630560874938965, "learning_rate": 1.5992584336383837e-06, "loss": 0.0657, "step": 6906 }, { "epoch": 2.2381723914452367, "grad_norm": 0.47630998492240906, "learning_rate": 1.5979763357770316e-06, "loss": 0.0724, "step": 6907 }, { "epoch": 2.23849643551523, "grad_norm": 0.4527610242366791, "learning_rate": 1.5966946542904438e-06, "loss": 0.0653, "step": 6908 }, { "epoch": 2.2388204795852236, "grad_norm": 0.46687713265419006, "learning_rate": 1.5954133893354807e-06, "loss": 0.0714, "step": 6909 }, { "epoch": 2.239144523655217, "grad_norm": 0.489058256149292, "learning_rate": 1.5941325410689624e-06, "loss": 0.073, "step": 6910 }, { "epoch": 2.2394685677252104, "grad_norm": 0.4567910432815552, "learning_rate": 1.5928521096476484e-06, "loss": 0.0671, "step": 6911 }, { "epoch": 2.2397926117952043, "grad_norm": 0.4794004559516907, "learning_rate": 1.5915720952282521e-06, "loss": 0.07, "step": 6912 }, { "epoch": 2.2401166558651977, "grad_norm": 0.4764542579650879, "learning_rate": 1.5902924979674355e-06, "loss": 0.0714, "step": 6913 }, { "epoch": 2.240440699935191, "grad_norm": 0.5001002550125122, "learning_rate": 1.5890133180218087e-06, "loss": 0.079, "step": 6914 }, { "epoch": 2.2407647440051845, "grad_norm": 0.49677136540412903, "learning_rate": 1.5877345555479307e-06, "loss": 0.0747, "step": 6915 }, { "epoch": 2.2410887880751784, "grad_norm": 0.4633571207523346, "learning_rate": 1.5864562107023118e-06, "loss": 0.0719, "step": 6916 }, { "epoch": 2.241412832145172, "grad_norm": 0.5432472825050354, "learning_rate": 1.5851782836414049e-06, "loss": 0.0738, "step": 6917 }, { "epoch": 2.2417368762151653, "grad_norm": 0.5069789886474609, "learning_rate": 1.5839007745216184e-06, "loss": 0.0736, "step": 6918 }, { "epoch": 2.2420609202851587, "grad_norm": 0.5018969178199768, "learning_rate": 1.5826236834993064e-06, "loss": 0.0779, "step": 6919 }, { "epoch": 2.242384964355152, "grad_norm": 0.4906817078590393, "learning_rate": 1.5813470107307733e-06, "loss": 0.0754, "step": 6920 }, { "epoch": 2.242709008425146, "grad_norm": 0.5077282786369324, "learning_rate": 1.5800707563722707e-06, "loss": 0.0711, "step": 6921 }, { "epoch": 2.2430330524951394, "grad_norm": 0.5080711245536804, "learning_rate": 1.5787949205799997e-06, "loss": 0.0775, "step": 6922 }, { "epoch": 2.243357096565133, "grad_norm": 0.42835354804992676, "learning_rate": 1.5775195035101127e-06, "loss": 0.0659, "step": 6923 }, { "epoch": 2.2436811406351262, "grad_norm": 0.5123438835144043, "learning_rate": 1.5762445053187025e-06, "loss": 0.0799, "step": 6924 }, { "epoch": 2.24400518470512, "grad_norm": 0.49820834398269653, "learning_rate": 1.574969926161824e-06, "loss": 0.0696, "step": 6925 }, { "epoch": 2.2443292287751135, "grad_norm": 0.46529993414878845, "learning_rate": 1.5736957661954662e-06, "loss": 0.0683, "step": 6926 }, { "epoch": 2.244653272845107, "grad_norm": 0.4767801761627197, "learning_rate": 1.5724220255755806e-06, "loss": 0.0782, "step": 6927 }, { "epoch": 2.2449773169151004, "grad_norm": 0.46832630038261414, "learning_rate": 1.5711487044580565e-06, "loss": 0.0723, "step": 6928 }, { "epoch": 2.245301360985094, "grad_norm": 0.48059654235839844, "learning_rate": 1.5698758029987366e-06, "loss": 0.0713, "step": 6929 }, { "epoch": 2.2456254050550877, "grad_norm": 0.4985971748828888, "learning_rate": 1.568603321353414e-06, "loss": 0.0757, "step": 6930 }, { "epoch": 2.245949449125081, "grad_norm": 0.43783020973205566, "learning_rate": 1.5673312596778229e-06, "loss": 0.0665, "step": 6931 }, { "epoch": 2.2462734931950745, "grad_norm": 0.49100205302238464, "learning_rate": 1.5660596181276582e-06, "loss": 0.0718, "step": 6932 }, { "epoch": 2.246597537265068, "grad_norm": 0.4694617986679077, "learning_rate": 1.5647883968585503e-06, "loss": 0.0706, "step": 6933 }, { "epoch": 2.2469215813350614, "grad_norm": 0.50400310754776, "learning_rate": 1.5635175960260901e-06, "loss": 0.0805, "step": 6934 }, { "epoch": 2.2472456254050552, "grad_norm": 0.48022282123565674, "learning_rate": 1.5622472157858066e-06, "loss": 0.074, "step": 6935 }, { "epoch": 2.2475696694750487, "grad_norm": 0.5288910269737244, "learning_rate": 1.560977256293184e-06, "loss": 0.0817, "step": 6936 }, { "epoch": 2.247893713545042, "grad_norm": 0.47093531489372253, "learning_rate": 1.559707717703653e-06, "loss": 0.0751, "step": 6937 }, { "epoch": 2.2482177576150355, "grad_norm": 0.5040370225906372, "learning_rate": 1.5584386001725927e-06, "loss": 0.0778, "step": 6938 }, { "epoch": 2.248541801685029, "grad_norm": 0.4735819697380066, "learning_rate": 1.5571699038553323e-06, "loss": 0.0695, "step": 6939 }, { "epoch": 2.248865845755023, "grad_norm": 0.4850353002548218, "learning_rate": 1.555901628907145e-06, "loss": 0.0734, "step": 6940 }, { "epoch": 2.249189889825016, "grad_norm": 0.4938466250896454, "learning_rate": 1.554633775483257e-06, "loss": 0.0764, "step": 6941 }, { "epoch": 2.2495139338950096, "grad_norm": 0.45608532428741455, "learning_rate": 1.5533663437388408e-06, "loss": 0.0698, "step": 6942 }, { "epoch": 2.249837977965003, "grad_norm": 0.46821609139442444, "learning_rate": 1.5520993338290186e-06, "loss": 0.0772, "step": 6943 }, { "epoch": 2.250162022034997, "grad_norm": 0.43394994735717773, "learning_rate": 1.5508327459088595e-06, "loss": 0.0713, "step": 6944 }, { "epoch": 2.2504860661049904, "grad_norm": 0.45471128821372986, "learning_rate": 1.5495665801333815e-06, "loss": 0.0752, "step": 6945 }, { "epoch": 2.250810110174984, "grad_norm": 0.4942833185195923, "learning_rate": 1.5483008366575514e-06, "loss": 0.0767, "step": 6946 }, { "epoch": 2.251134154244977, "grad_norm": 0.44427838921546936, "learning_rate": 1.547035515636286e-06, "loss": 0.0656, "step": 6947 }, { "epoch": 2.251458198314971, "grad_norm": 0.5050087571144104, "learning_rate": 1.5457706172244425e-06, "loss": 0.0785, "step": 6948 }, { "epoch": 2.2517822423849645, "grad_norm": 0.5311718583106995, "learning_rate": 1.5445061415768391e-06, "loss": 0.0842, "step": 6949 }, { "epoch": 2.252106286454958, "grad_norm": 0.48573848605155945, "learning_rate": 1.5432420888482308e-06, "loss": 0.0702, "step": 6950 }, { "epoch": 2.2524303305249513, "grad_norm": 0.49349531531333923, "learning_rate": 1.5419784591933267e-06, "loss": 0.0734, "step": 6951 }, { "epoch": 2.2527543745949448, "grad_norm": 0.4787566661834717, "learning_rate": 1.540715252766783e-06, "loss": 0.0756, "step": 6952 }, { "epoch": 2.2530784186649386, "grad_norm": 0.47929108142852783, "learning_rate": 1.5394524697232038e-06, "loss": 0.0718, "step": 6953 }, { "epoch": 2.253402462734932, "grad_norm": 0.4798576831817627, "learning_rate": 1.538190110217143e-06, "loss": 0.0767, "step": 6954 }, { "epoch": 2.2537265068049255, "grad_norm": 0.45218637585639954, "learning_rate": 1.5369281744030968e-06, "loss": 0.0666, "step": 6955 }, { "epoch": 2.254050550874919, "grad_norm": 0.4727388620376587, "learning_rate": 1.5356666624355204e-06, "loss": 0.0693, "step": 6956 }, { "epoch": 2.2543745949449123, "grad_norm": 0.5332309603691101, "learning_rate": 1.5344055744688035e-06, "loss": 0.0779, "step": 6957 }, { "epoch": 2.254698639014906, "grad_norm": 0.4802578091621399, "learning_rate": 1.5331449106572983e-06, "loss": 0.0704, "step": 6958 }, { "epoch": 2.2550226830848996, "grad_norm": 0.46075379848480225, "learning_rate": 1.5318846711552926e-06, "loss": 0.0709, "step": 6959 }, { "epoch": 2.255346727154893, "grad_norm": 0.47085317969322205, "learning_rate": 1.530624856117029e-06, "loss": 0.0713, "step": 6960 }, { "epoch": 2.2556707712248865, "grad_norm": 0.5056251883506775, "learning_rate": 1.5293654656966972e-06, "loss": 0.0762, "step": 6961 }, { "epoch": 2.25599481529488, "grad_norm": 0.495922714471817, "learning_rate": 1.528106500048434e-06, "loss": 0.0773, "step": 6962 }, { "epoch": 2.2563188593648738, "grad_norm": 0.5067967176437378, "learning_rate": 1.5268479593263257e-06, "loss": 0.075, "step": 6963 }, { "epoch": 2.256642903434867, "grad_norm": 0.5422359704971313, "learning_rate": 1.525589843684402e-06, "loss": 0.0848, "step": 6964 }, { "epoch": 2.2569669475048606, "grad_norm": 0.4754337668418884, "learning_rate": 1.5243321532766492e-06, "loss": 0.0714, "step": 6965 }, { "epoch": 2.257290991574854, "grad_norm": 0.44811588525772095, "learning_rate": 1.5230748882569924e-06, "loss": 0.061, "step": 6966 }, { "epoch": 2.257615035644848, "grad_norm": 0.47737687826156616, "learning_rate": 1.52181804877931e-06, "loss": 0.0709, "step": 6967 }, { "epoch": 2.2579390797148413, "grad_norm": 0.4759818911552429, "learning_rate": 1.5205616349974273e-06, "loss": 0.0748, "step": 6968 }, { "epoch": 2.2582631237848347, "grad_norm": 0.46108049154281616, "learning_rate": 1.5193056470651163e-06, "loss": 0.0699, "step": 6969 }, { "epoch": 2.258587167854828, "grad_norm": 0.4533197581768036, "learning_rate": 1.5180500851360991e-06, "loss": 0.0693, "step": 6970 }, { "epoch": 2.2589112119248216, "grad_norm": 0.522936999797821, "learning_rate": 1.5167949493640444e-06, "loss": 0.081, "step": 6971 }, { "epoch": 2.2592352559948155, "grad_norm": 0.4949372410774231, "learning_rate": 1.515540239902567e-06, "loss": 0.0765, "step": 6972 }, { "epoch": 2.259559300064809, "grad_norm": 0.5081701278686523, "learning_rate": 1.5142859569052315e-06, "loss": 0.0759, "step": 6973 }, { "epoch": 2.2598833441348023, "grad_norm": 0.46086448431015015, "learning_rate": 1.5130321005255504e-06, "loss": 0.0681, "step": 6974 }, { "epoch": 2.2602073882047957, "grad_norm": 0.5110464692115784, "learning_rate": 1.5117786709169845e-06, "loss": 0.0762, "step": 6975 }, { "epoch": 2.2605314322747896, "grad_norm": 0.49480530619621277, "learning_rate": 1.51052566823294e-06, "loss": 0.0765, "step": 6976 }, { "epoch": 2.260855476344783, "grad_norm": 0.5045551657676697, "learning_rate": 1.5092730926267734e-06, "loss": 0.076, "step": 6977 }, { "epoch": 2.2611795204147764, "grad_norm": 0.4924558401107788, "learning_rate": 1.508020944251789e-06, "loss": 0.0733, "step": 6978 }, { "epoch": 2.26150356448477, "grad_norm": 0.497134804725647, "learning_rate": 1.5067692232612323e-06, "loss": 0.0751, "step": 6979 }, { "epoch": 2.2618276085547633, "grad_norm": 0.4783954918384552, "learning_rate": 1.5055179298083095e-06, "loss": 0.0724, "step": 6980 }, { "epoch": 2.262151652624757, "grad_norm": 0.47952574491500854, "learning_rate": 1.5042670640461609e-06, "loss": 0.0724, "step": 6981 }, { "epoch": 2.2624756966947506, "grad_norm": 0.4710170328617096, "learning_rate": 1.5030166261278823e-06, "loss": 0.0697, "step": 6982 }, { "epoch": 2.262799740764744, "grad_norm": 0.49460992217063904, "learning_rate": 1.5017666162065153e-06, "loss": 0.0725, "step": 6983 }, { "epoch": 2.2631237848347374, "grad_norm": 0.5072700381278992, "learning_rate": 1.5005170344350489e-06, "loss": 0.0779, "step": 6984 }, { "epoch": 2.263447828904731, "grad_norm": 0.47068458795547485, "learning_rate": 1.4992678809664218e-06, "loss": 0.0701, "step": 6985 }, { "epoch": 2.2637718729747247, "grad_norm": 0.4834175407886505, "learning_rate": 1.4980191559535128e-06, "loss": 0.0768, "step": 6986 }, { "epoch": 2.264095917044718, "grad_norm": 0.4545857012271881, "learning_rate": 1.496770859549161e-06, "loss": 0.0683, "step": 6987 }, { "epoch": 2.2644199611147116, "grad_norm": 0.5007740259170532, "learning_rate": 1.495522991906138e-06, "loss": 0.0731, "step": 6988 }, { "epoch": 2.264744005184705, "grad_norm": 0.4794744849205017, "learning_rate": 1.4942755531771785e-06, "loss": 0.074, "step": 6989 }, { "epoch": 2.2650680492546984, "grad_norm": 0.5080423355102539, "learning_rate": 1.4930285435149522e-06, "loss": 0.0763, "step": 6990 }, { "epoch": 2.2653920933246923, "grad_norm": 0.5505486130714417, "learning_rate": 1.4917819630720814e-06, "loss": 0.0827, "step": 6991 }, { "epoch": 2.2657161373946857, "grad_norm": 0.45858538150787354, "learning_rate": 1.490535812001136e-06, "loss": 0.0663, "step": 6992 }, { "epoch": 2.266040181464679, "grad_norm": 0.4944336414337158, "learning_rate": 1.4892900904546336e-06, "loss": 0.0753, "step": 6993 }, { "epoch": 2.2663642255346725, "grad_norm": 0.4706510603427887, "learning_rate": 1.4880447985850387e-06, "loss": 0.0721, "step": 6994 }, { "epoch": 2.2666882696046664, "grad_norm": 0.5018250346183777, "learning_rate": 1.486799936544761e-06, "loss": 0.078, "step": 6995 }, { "epoch": 2.26701231367466, "grad_norm": 0.4667245149612427, "learning_rate": 1.4855555044861609e-06, "loss": 0.0672, "step": 6996 }, { "epoch": 2.2673363577446533, "grad_norm": 0.4803808331489563, "learning_rate": 1.484311502561544e-06, "loss": 0.073, "step": 6997 }, { "epoch": 2.2676604018146467, "grad_norm": 0.47602206468582153, "learning_rate": 1.4830679309231649e-06, "loss": 0.0717, "step": 6998 }, { "epoch": 2.2679844458846405, "grad_norm": 0.4727866053581238, "learning_rate": 1.4818247897232247e-06, "loss": 0.0678, "step": 6999 }, { "epoch": 2.268308489954634, "grad_norm": 0.4802073836326599, "learning_rate": 1.4805820791138715e-06, "loss": 0.0734, "step": 7000 }, { "epoch": 2.2686325340246274, "grad_norm": 0.4852180480957031, "learning_rate": 1.4793397992472009e-06, "loss": 0.0742, "step": 7001 }, { "epoch": 2.268956578094621, "grad_norm": 0.4746887683868408, "learning_rate": 1.478097950275258e-06, "loss": 0.0739, "step": 7002 }, { "epoch": 2.2692806221646142, "grad_norm": 0.47529059648513794, "learning_rate": 1.47685653235003e-06, "loss": 0.0709, "step": 7003 }, { "epoch": 2.269604666234608, "grad_norm": 0.47900229692459106, "learning_rate": 1.4756155456234555e-06, "loss": 0.0763, "step": 7004 }, { "epoch": 2.2699287103046015, "grad_norm": 0.44263532757759094, "learning_rate": 1.4743749902474197e-06, "loss": 0.0652, "step": 7005 }, { "epoch": 2.270252754374595, "grad_norm": 0.48573142290115356, "learning_rate": 1.4731348663737543e-06, "loss": 0.0741, "step": 7006 }, { "epoch": 2.2705767984445884, "grad_norm": 0.45377403497695923, "learning_rate": 1.4718951741542404e-06, "loss": 0.0711, "step": 7007 }, { "epoch": 2.270900842514582, "grad_norm": 0.4826267659664154, "learning_rate": 1.470655913740599e-06, "loss": 0.0716, "step": 7008 }, { "epoch": 2.2712248865845757, "grad_norm": 0.49614590406417847, "learning_rate": 1.4694170852845102e-06, "loss": 0.0728, "step": 7009 }, { "epoch": 2.271548930654569, "grad_norm": 0.49249184131622314, "learning_rate": 1.4681786889375888e-06, "loss": 0.0718, "step": 7010 }, { "epoch": 2.2718729747245625, "grad_norm": 0.5121108889579773, "learning_rate": 1.4669407248514079e-06, "loss": 0.0763, "step": 7011 }, { "epoch": 2.272197018794556, "grad_norm": 0.4713032841682434, "learning_rate": 1.4657031931774778e-06, "loss": 0.0698, "step": 7012 }, { "epoch": 2.2725210628645494, "grad_norm": 0.5088580846786499, "learning_rate": 1.4644660940672628e-06, "loss": 0.0692, "step": 7013 }, { "epoch": 2.2728451069345432, "grad_norm": 0.4591797888278961, "learning_rate": 1.463229427672171e-06, "loss": 0.0668, "step": 7014 }, { "epoch": 2.2731691510045366, "grad_norm": 0.4763369560241699, "learning_rate": 1.461993194143559e-06, "loss": 0.0744, "step": 7015 }, { "epoch": 2.27349319507453, "grad_norm": 0.504949688911438, "learning_rate": 1.4607573936327302e-06, "loss": 0.0775, "step": 7016 }, { "epoch": 2.2738172391445235, "grad_norm": 0.45064422488212585, "learning_rate": 1.4595220262909315e-06, "loss": 0.0708, "step": 7017 }, { "epoch": 2.2741412832145174, "grad_norm": 0.5188299417495728, "learning_rate": 1.4582870922693654e-06, "loss": 0.0749, "step": 7018 }, { "epoch": 2.274465327284511, "grad_norm": 0.4455835819244385, "learning_rate": 1.4570525917191692e-06, "loss": 0.0702, "step": 7019 }, { "epoch": 2.274789371354504, "grad_norm": 0.46820423007011414, "learning_rate": 1.4558185247914409e-06, "loss": 0.0727, "step": 7020 }, { "epoch": 2.2751134154244976, "grad_norm": 0.4896591305732727, "learning_rate": 1.4545848916372129e-06, "loss": 0.0773, "step": 7021 }, { "epoch": 2.2754374594944915, "grad_norm": 0.5099110007286072, "learning_rate": 1.453351692407472e-06, "loss": 0.0783, "step": 7022 }, { "epoch": 2.275761503564485, "grad_norm": 0.4538210332393646, "learning_rate": 1.45211892725315e-06, "loss": 0.0709, "step": 7023 }, { "epoch": 2.2760855476344783, "grad_norm": 0.4581274390220642, "learning_rate": 1.4508865963251252e-06, "loss": 0.0694, "step": 7024 }, { "epoch": 2.2764095917044718, "grad_norm": 0.456988662481308, "learning_rate": 1.4496546997742239e-06, "loss": 0.067, "step": 7025 }, { "epoch": 2.276733635774465, "grad_norm": 0.4534381926059723, "learning_rate": 1.4484232377512165e-06, "loss": 0.0685, "step": 7026 }, { "epoch": 2.277057679844459, "grad_norm": 0.4984075725078583, "learning_rate": 1.4471922104068225e-06, "loss": 0.0792, "step": 7027 }, { "epoch": 2.2773817239144525, "grad_norm": 0.4937223792076111, "learning_rate": 1.445961617891708e-06, "loss": 0.0744, "step": 7028 }, { "epoch": 2.277705767984446, "grad_norm": 0.49529770016670227, "learning_rate": 1.444731460356486e-06, "loss": 0.0767, "step": 7029 }, { "epoch": 2.2780298120544393, "grad_norm": 0.49039947986602783, "learning_rate": 1.4435017379517153e-06, "loss": 0.0763, "step": 7030 }, { "epoch": 2.2783538561244328, "grad_norm": 0.4506426453590393, "learning_rate": 1.4422724508279024e-06, "loss": 0.0704, "step": 7031 }, { "epoch": 2.2786779001944266, "grad_norm": 0.4578206241130829, "learning_rate": 1.4410435991355004e-06, "loss": 0.0729, "step": 7032 }, { "epoch": 2.27900194426442, "grad_norm": 0.44411706924438477, "learning_rate": 1.4398151830249096e-06, "loss": 0.0682, "step": 7033 }, { "epoch": 2.2793259883344135, "grad_norm": 0.5245050191879272, "learning_rate": 1.4385872026464736e-06, "loss": 0.0771, "step": 7034 }, { "epoch": 2.279650032404407, "grad_norm": 0.4982036054134369, "learning_rate": 1.4373596581504872e-06, "loss": 0.0767, "step": 7035 }, { "epoch": 2.2799740764744003, "grad_norm": 0.4747893512248993, "learning_rate": 1.4361325496871893e-06, "loss": 0.0714, "step": 7036 }, { "epoch": 2.280298120544394, "grad_norm": 0.440021276473999, "learning_rate": 1.4349058774067665e-06, "loss": 0.0689, "step": 7037 }, { "epoch": 2.2806221646143876, "grad_norm": 0.46596381068229675, "learning_rate": 1.4336796414593528e-06, "loss": 0.0705, "step": 7038 }, { "epoch": 2.280946208684381, "grad_norm": 0.4599742293357849, "learning_rate": 1.4324538419950234e-06, "loss": 0.0705, "step": 7039 }, { "epoch": 2.2812702527543745, "grad_norm": 0.4699169397354126, "learning_rate": 1.431228479163811e-06, "loss": 0.0718, "step": 7040 }, { "epoch": 2.281594296824368, "grad_norm": 0.4909172058105469, "learning_rate": 1.4300035531156803e-06, "loss": 0.0754, "step": 7041 }, { "epoch": 2.2819183408943617, "grad_norm": 0.5037856698036194, "learning_rate": 1.4287790640005578e-06, "loss": 0.0795, "step": 7042 }, { "epoch": 2.282242384964355, "grad_norm": 0.4997573494911194, "learning_rate": 1.4275550119683046e-06, "loss": 0.0785, "step": 7043 }, { "epoch": 2.2825664290343486, "grad_norm": 0.500052273273468, "learning_rate": 1.4263313971687337e-06, "loss": 0.0798, "step": 7044 }, { "epoch": 2.282890473104342, "grad_norm": 0.5084229707717896, "learning_rate": 1.4251082197516043e-06, "loss": 0.0779, "step": 7045 }, { "epoch": 2.283214517174336, "grad_norm": 0.5037830471992493, "learning_rate": 1.4238854798666208e-06, "loss": 0.0773, "step": 7046 }, { "epoch": 2.2835385612443293, "grad_norm": 0.46606168150901794, "learning_rate": 1.4226631776634363e-06, "loss": 0.0687, "step": 7047 }, { "epoch": 2.2838626053143227, "grad_norm": 0.5235673189163208, "learning_rate": 1.421441313291645e-06, "loss": 0.0829, "step": 7048 }, { "epoch": 2.284186649384316, "grad_norm": 0.4666191041469574, "learning_rate": 1.4202198869007972e-06, "loss": 0.0712, "step": 7049 }, { "epoch": 2.28451069345431, "grad_norm": 0.4553104341030121, "learning_rate": 1.418998898640378e-06, "loss": 0.071, "step": 7050 }, { "epoch": 2.2848347375243034, "grad_norm": 0.5150062441825867, "learning_rate": 1.4177783486598273e-06, "loss": 0.0802, "step": 7051 }, { "epoch": 2.285158781594297, "grad_norm": 0.44989052414894104, "learning_rate": 1.416558237108528e-06, "loss": 0.0725, "step": 7052 }, { "epoch": 2.2854828256642903, "grad_norm": 0.5269073247909546, "learning_rate": 1.4153385641358102e-06, "loss": 0.0756, "step": 7053 }, { "epoch": 2.2858068697342837, "grad_norm": 0.4914553463459015, "learning_rate": 1.4141193298909496e-06, "loss": 0.077, "step": 7054 }, { "epoch": 2.2861309138042776, "grad_norm": 0.524666428565979, "learning_rate": 1.4129005345231694e-06, "loss": 0.0785, "step": 7055 }, { "epoch": 2.286454957874271, "grad_norm": 0.5038748979568481, "learning_rate": 1.4116821781816391e-06, "loss": 0.0784, "step": 7056 }, { "epoch": 2.2867790019442644, "grad_norm": 0.48034003376960754, "learning_rate": 1.4104642610154712e-06, "loss": 0.0704, "step": 7057 }, { "epoch": 2.287103046014258, "grad_norm": 0.5103918313980103, "learning_rate": 1.4092467831737283e-06, "loss": 0.0765, "step": 7058 }, { "epoch": 2.2874270900842513, "grad_norm": 0.4362955689430237, "learning_rate": 1.408029744805418e-06, "loss": 0.0638, "step": 7059 }, { "epoch": 2.287751134154245, "grad_norm": 0.4751020669937134, "learning_rate": 1.4068131460594942e-06, "loss": 0.0724, "step": 7060 }, { "epoch": 2.2880751782242386, "grad_norm": 0.47384995222091675, "learning_rate": 1.4055969870848567e-06, "loss": 0.0718, "step": 7061 }, { "epoch": 2.288399222294232, "grad_norm": 0.596555233001709, "learning_rate": 1.4043812680303527e-06, "loss": 0.0824, "step": 7062 }, { "epoch": 2.2887232663642254, "grad_norm": 0.5346999764442444, "learning_rate": 1.4031659890447703e-06, "loss": 0.0851, "step": 7063 }, { "epoch": 2.289047310434219, "grad_norm": 0.505793571472168, "learning_rate": 1.4019511502768535e-06, "loss": 0.0778, "step": 7064 }, { "epoch": 2.2893713545042127, "grad_norm": 0.48712509870529175, "learning_rate": 1.400736751875283e-06, "loss": 0.0742, "step": 7065 }, { "epoch": 2.289695398574206, "grad_norm": 0.4694879651069641, "learning_rate": 1.3995227939886902e-06, "loss": 0.069, "step": 7066 }, { "epoch": 2.2900194426441995, "grad_norm": 0.4688408374786377, "learning_rate": 1.398309276765652e-06, "loss": 0.0704, "step": 7067 }, { "epoch": 2.290343486714193, "grad_norm": 0.5062603950500488, "learning_rate": 1.3970962003546911e-06, "loss": 0.0803, "step": 7068 }, { "epoch": 2.290667530784187, "grad_norm": 0.47821182012557983, "learning_rate": 1.3958835649042785e-06, "loss": 0.0698, "step": 7069 }, { "epoch": 2.2909915748541803, "grad_norm": 0.46454837918281555, "learning_rate": 1.394671370562824e-06, "loss": 0.0696, "step": 7070 }, { "epoch": 2.2913156189241737, "grad_norm": 0.4921838045120239, "learning_rate": 1.3934596174786941e-06, "loss": 0.0788, "step": 7071 }, { "epoch": 2.291639662994167, "grad_norm": 0.4985063970088959, "learning_rate": 1.39224830580019e-06, "loss": 0.0693, "step": 7072 }, { "epoch": 2.291963707064161, "grad_norm": 0.5150036215782166, "learning_rate": 1.3910374356755707e-06, "loss": 0.0769, "step": 7073 }, { "epoch": 2.2922877511341544, "grad_norm": 0.45300233364105225, "learning_rate": 1.3898270072530306e-06, "loss": 0.0705, "step": 7074 }, { "epoch": 2.292611795204148, "grad_norm": 0.4846388101577759, "learning_rate": 1.3886170206807153e-06, "loss": 0.0718, "step": 7075 }, { "epoch": 2.2929358392741412, "grad_norm": 0.47709107398986816, "learning_rate": 1.3874074761067158e-06, "loss": 0.0701, "step": 7076 }, { "epoch": 2.2932598833441347, "grad_norm": 0.4540219008922577, "learning_rate": 1.3861983736790685e-06, "loss": 0.0676, "step": 7077 }, { "epoch": 2.2935839274141285, "grad_norm": 0.46152257919311523, "learning_rate": 1.3849897135457574e-06, "loss": 0.0693, "step": 7078 }, { "epoch": 2.293907971484122, "grad_norm": 0.5037170648574829, "learning_rate": 1.383781495854707e-06, "loss": 0.0777, "step": 7079 }, { "epoch": 2.2942320155541154, "grad_norm": 0.47302427887916565, "learning_rate": 1.3825737207537959e-06, "loss": 0.0689, "step": 7080 }, { "epoch": 2.294556059624109, "grad_norm": 0.5259142518043518, "learning_rate": 1.3813663883908412e-06, "loss": 0.0786, "step": 7081 }, { "epoch": 2.2948801036941022, "grad_norm": 0.46605339646339417, "learning_rate": 1.380159498913609e-06, "loss": 0.0716, "step": 7082 }, { "epoch": 2.295204147764096, "grad_norm": 0.48375439643859863, "learning_rate": 1.3789530524698113e-06, "loss": 0.0727, "step": 7083 }, { "epoch": 2.2955281918340895, "grad_norm": 0.4879249930381775, "learning_rate": 1.377747049207106e-06, "loss": 0.0751, "step": 7084 }, { "epoch": 2.295852235904083, "grad_norm": 0.5046743154525757, "learning_rate": 1.3765414892730954e-06, "loss": 0.072, "step": 7085 }, { "epoch": 2.2961762799740764, "grad_norm": 0.5172231793403625, "learning_rate": 1.3753363728153291e-06, "loss": 0.0809, "step": 7086 }, { "epoch": 2.29650032404407, "grad_norm": 0.4662172198295593, "learning_rate": 1.374131699981301e-06, "loss": 0.0684, "step": 7087 }, { "epoch": 2.2968243681140637, "grad_norm": 0.5307612419128418, "learning_rate": 1.3729274709184532e-06, "loss": 0.0796, "step": 7088 }, { "epoch": 2.297148412184057, "grad_norm": 0.48822152614593506, "learning_rate": 1.3717236857741684e-06, "loss": 0.0748, "step": 7089 }, { "epoch": 2.2974724562540505, "grad_norm": 0.4695141017436981, "learning_rate": 1.3705203446957803e-06, "loss": 0.0668, "step": 7090 }, { "epoch": 2.297796500324044, "grad_norm": 0.47505518794059753, "learning_rate": 1.369317447830566e-06, "loss": 0.0733, "step": 7091 }, { "epoch": 2.2981205443940373, "grad_norm": 0.45042139291763306, "learning_rate": 1.3681149953257483e-06, "loss": 0.0694, "step": 7092 }, { "epoch": 2.298444588464031, "grad_norm": 0.487425833940506, "learning_rate": 1.3669129873284976e-06, "loss": 0.0764, "step": 7093 }, { "epoch": 2.2987686325340246, "grad_norm": 0.47913771867752075, "learning_rate": 1.3657114239859226e-06, "loss": 0.0734, "step": 7094 }, { "epoch": 2.299092676604018, "grad_norm": 0.4954153597354889, "learning_rate": 1.3645103054450904e-06, "loss": 0.0763, "step": 7095 }, { "epoch": 2.2994167206740115, "grad_norm": 0.449323445558548, "learning_rate": 1.3633096318529986e-06, "loss": 0.0671, "step": 7096 }, { "epoch": 2.2997407647440054, "grad_norm": 0.46951982378959656, "learning_rate": 1.3621094033566057e-06, "loss": 0.0679, "step": 7097 }, { "epoch": 2.3000648088139988, "grad_norm": 0.5145975947380066, "learning_rate": 1.3609096201028026e-06, "loss": 0.074, "step": 7098 }, { "epoch": 2.300388852883992, "grad_norm": 0.47816240787506104, "learning_rate": 1.359710282238433e-06, "loss": 0.073, "step": 7099 }, { "epoch": 2.3007128969539856, "grad_norm": 0.5030921697616577, "learning_rate": 1.3585113899102853e-06, "loss": 0.0776, "step": 7100 }, { "epoch": 2.3010369410239795, "grad_norm": 0.4865356385707855, "learning_rate": 1.3573129432650882e-06, "loss": 0.0767, "step": 7101 }, { "epoch": 2.301360985093973, "grad_norm": 0.4202616214752197, "learning_rate": 1.3561149424495263e-06, "loss": 0.0659, "step": 7102 }, { "epoch": 2.3016850291639663, "grad_norm": 0.48426011204719543, "learning_rate": 1.3549173876102167e-06, "loss": 0.0696, "step": 7103 }, { "epoch": 2.3020090732339598, "grad_norm": 0.46074673533439636, "learning_rate": 1.3537202788937349e-06, "loss": 0.0712, "step": 7104 }, { "epoch": 2.302333117303953, "grad_norm": 0.4761511981487274, "learning_rate": 1.3525236164465904e-06, "loss": 0.0737, "step": 7105 }, { "epoch": 2.302657161373947, "grad_norm": 0.5033185482025146, "learning_rate": 1.351327400415245e-06, "loss": 0.0787, "step": 7106 }, { "epoch": 2.3029812054439405, "grad_norm": 0.456020325422287, "learning_rate": 1.3501316309461044e-06, "loss": 0.0723, "step": 7107 }, { "epoch": 2.303305249513934, "grad_norm": 0.4962615966796875, "learning_rate": 1.3489363081855177e-06, "loss": 0.0737, "step": 7108 }, { "epoch": 2.3036292935839273, "grad_norm": 0.4618525207042694, "learning_rate": 1.3477414322797828e-06, "loss": 0.0738, "step": 7109 }, { "epoch": 2.3039533376539207, "grad_norm": 0.514461100101471, "learning_rate": 1.3465470033751393e-06, "loss": 0.0778, "step": 7110 }, { "epoch": 2.3042773817239146, "grad_norm": 0.47707101702690125, "learning_rate": 1.3453530216177763e-06, "loss": 0.0736, "step": 7111 }, { "epoch": 2.304601425793908, "grad_norm": 0.45344793796539307, "learning_rate": 1.3441594871538221e-06, "loss": 0.0648, "step": 7112 }, { "epoch": 2.3049254698639015, "grad_norm": 0.47207412123680115, "learning_rate": 1.3429664001293557e-06, "loss": 0.0752, "step": 7113 }, { "epoch": 2.305249513933895, "grad_norm": 0.5023806095123291, "learning_rate": 1.3417737606903985e-06, "loss": 0.0731, "step": 7114 }, { "epoch": 2.3055735580038883, "grad_norm": 0.4673008918762207, "learning_rate": 1.3405815689829195e-06, "loss": 0.0706, "step": 7115 }, { "epoch": 2.305897602073882, "grad_norm": 0.4746420085430145, "learning_rate": 1.3393898251528298e-06, "loss": 0.0678, "step": 7116 }, { "epoch": 2.3062216461438756, "grad_norm": 0.487405925989151, "learning_rate": 1.3381985293459899e-06, "loss": 0.0753, "step": 7117 }, { "epoch": 2.306545690213869, "grad_norm": 0.49059462547302246, "learning_rate": 1.3370076817081978e-06, "loss": 0.0784, "step": 7118 }, { "epoch": 2.3068697342838624, "grad_norm": 0.5020880699157715, "learning_rate": 1.3358172823852077e-06, "loss": 0.079, "step": 7119 }, { "epoch": 2.3071937783538563, "grad_norm": 0.4636348783969879, "learning_rate": 1.3346273315227094e-06, "loss": 0.0702, "step": 7120 }, { "epoch": 2.3075178224238497, "grad_norm": 0.5022696852684021, "learning_rate": 1.3334378292663414e-06, "loss": 0.0747, "step": 7121 }, { "epoch": 2.307841866493843, "grad_norm": 0.5041515231132507, "learning_rate": 1.3322487757616886e-06, "loss": 0.0776, "step": 7122 }, { "epoch": 2.3081659105638366, "grad_norm": 0.45286890864372253, "learning_rate": 1.3310601711542787e-06, "loss": 0.0664, "step": 7123 }, { "epoch": 2.3084899546338304, "grad_norm": 0.4740108847618103, "learning_rate": 1.3298720155895879e-06, "loss": 0.0704, "step": 7124 }, { "epoch": 2.308813998703824, "grad_norm": 0.4879896342754364, "learning_rate": 1.3286843092130292e-06, "loss": 0.0721, "step": 7125 }, { "epoch": 2.3091380427738173, "grad_norm": 0.4944480061531067, "learning_rate": 1.3274970521699731e-06, "loss": 0.0775, "step": 7126 }, { "epoch": 2.3094620868438107, "grad_norm": 0.47421595454216003, "learning_rate": 1.326310244605722e-06, "loss": 0.0689, "step": 7127 }, { "epoch": 2.309786130913804, "grad_norm": 0.47668230533599854, "learning_rate": 1.3251238866655363e-06, "loss": 0.0697, "step": 7128 }, { "epoch": 2.310110174983798, "grad_norm": 0.48620912432670593, "learning_rate": 1.3239379784946093e-06, "loss": 0.0712, "step": 7129 }, { "epoch": 2.3104342190537914, "grad_norm": 0.5364415645599365, "learning_rate": 1.322752520238087e-06, "loss": 0.0757, "step": 7130 }, { "epoch": 2.310758263123785, "grad_norm": 0.5277668833732605, "learning_rate": 1.321567512041058e-06, "loss": 0.0783, "step": 7131 }, { "epoch": 2.3110823071937783, "grad_norm": 0.4718340337276459, "learning_rate": 1.3203829540485552e-06, "loss": 0.0689, "step": 7132 }, { "epoch": 2.3114063512637717, "grad_norm": 0.4828222393989563, "learning_rate": 1.3191988464055588e-06, "loss": 0.0733, "step": 7133 }, { "epoch": 2.3117303953337656, "grad_norm": 0.4246669113636017, "learning_rate": 1.3180151892569882e-06, "loss": 0.0604, "step": 7134 }, { "epoch": 2.312054439403759, "grad_norm": 0.4550812542438507, "learning_rate": 1.3168319827477166e-06, "loss": 0.063, "step": 7135 }, { "epoch": 2.3123784834737524, "grad_norm": 0.5037466883659363, "learning_rate": 1.315649227022553e-06, "loss": 0.0802, "step": 7136 }, { "epoch": 2.312702527543746, "grad_norm": 0.43731677532196045, "learning_rate": 1.3144669222262568e-06, "loss": 0.0631, "step": 7137 }, { "epoch": 2.3130265716137393, "grad_norm": 0.49847137928009033, "learning_rate": 1.3132850685035304e-06, "loss": 0.0774, "step": 7138 }, { "epoch": 2.313350615683733, "grad_norm": 0.459532767534256, "learning_rate": 1.3121036659990215e-06, "loss": 0.0703, "step": 7139 }, { "epoch": 2.3136746597537265, "grad_norm": 0.5136400461196899, "learning_rate": 1.3109227148573227e-06, "loss": 0.0796, "step": 7140 }, { "epoch": 2.31399870382372, "grad_norm": 0.4838277995586395, "learning_rate": 1.3097422152229715e-06, "loss": 0.0728, "step": 7141 }, { "epoch": 2.3143227478937134, "grad_norm": 0.5083605051040649, "learning_rate": 1.3085621672404474e-06, "loss": 0.0819, "step": 7142 }, { "epoch": 2.314646791963707, "grad_norm": 0.475130558013916, "learning_rate": 1.3073825710541787e-06, "loss": 0.0722, "step": 7143 }, { "epoch": 2.3149708360337007, "grad_norm": 0.5326772332191467, "learning_rate": 1.3062034268085355e-06, "loss": 0.0801, "step": 7144 }, { "epoch": 2.315294880103694, "grad_norm": 0.497607558965683, "learning_rate": 1.305024734647834e-06, "loss": 0.0699, "step": 7145 }, { "epoch": 2.3156189241736875, "grad_norm": 0.47802019119262695, "learning_rate": 1.303846494716335e-06, "loss": 0.0678, "step": 7146 }, { "epoch": 2.315942968243681, "grad_norm": 0.4831261932849884, "learning_rate": 1.3026687071582432e-06, "loss": 0.0768, "step": 7147 }, { "epoch": 2.316267012313675, "grad_norm": 0.5034778118133545, "learning_rate": 1.3014913721177109e-06, "loss": 0.0724, "step": 7148 }, { "epoch": 2.3165910563836682, "grad_norm": 0.49378588795661926, "learning_rate": 1.300314489738827e-06, "loss": 0.0747, "step": 7149 }, { "epoch": 2.3169151004536617, "grad_norm": 0.4527478814125061, "learning_rate": 1.2991380601656366e-06, "loss": 0.0662, "step": 7150 }, { "epoch": 2.317239144523655, "grad_norm": 0.524268388748169, "learning_rate": 1.2979620835421192e-06, "loss": 0.073, "step": 7151 }, { "epoch": 2.317563188593649, "grad_norm": 0.47499552369117737, "learning_rate": 1.2967865600122042e-06, "loss": 0.0692, "step": 7152 }, { "epoch": 2.3178872326636424, "grad_norm": 0.4901551306247711, "learning_rate": 1.2956114897197641e-06, "loss": 0.0709, "step": 7153 }, { "epoch": 2.318211276733636, "grad_norm": 0.5022875666618347, "learning_rate": 1.294436872808617e-06, "loss": 0.077, "step": 7154 }, { "epoch": 2.3185353208036292, "grad_norm": 0.5198050141334534, "learning_rate": 1.2932627094225253e-06, "loss": 0.0775, "step": 7155 }, { "epoch": 2.3188593648736227, "grad_norm": 0.46014443039894104, "learning_rate": 1.2920889997051906e-06, "loss": 0.0681, "step": 7156 }, { "epoch": 2.3191834089436165, "grad_norm": 0.48765018582344055, "learning_rate": 1.2909157438002706e-06, "loss": 0.0738, "step": 7157 }, { "epoch": 2.31950745301361, "grad_norm": 0.495532751083374, "learning_rate": 1.2897429418513536e-06, "loss": 0.0756, "step": 7158 }, { "epoch": 2.3198314970836034, "grad_norm": 0.5322849750518799, "learning_rate": 1.288570594001985e-06, "loss": 0.0773, "step": 7159 }, { "epoch": 2.320155541153597, "grad_norm": 0.5115591883659363, "learning_rate": 1.2873987003956452e-06, "loss": 0.0796, "step": 7160 }, { "epoch": 2.32047958522359, "grad_norm": 0.4776346981525421, "learning_rate": 1.2862272611757637e-06, "loss": 0.069, "step": 7161 }, { "epoch": 2.320803629293584, "grad_norm": 0.4989016056060791, "learning_rate": 1.2850562764857132e-06, "loss": 0.0713, "step": 7162 }, { "epoch": 2.3211276733635775, "grad_norm": 0.4744539260864258, "learning_rate": 1.283885746468811e-06, "loss": 0.074, "step": 7163 }, { "epoch": 2.321451717433571, "grad_norm": 0.4889492690563202, "learning_rate": 1.2827156712683204e-06, "loss": 0.0731, "step": 7164 }, { "epoch": 2.3217757615035644, "grad_norm": 0.48890548944473267, "learning_rate": 1.2815460510274424e-06, "loss": 0.073, "step": 7165 }, { "epoch": 2.3220998055735578, "grad_norm": 0.5087918639183044, "learning_rate": 1.2803768858893333e-06, "loss": 0.0793, "step": 7166 }, { "epoch": 2.3224238496435516, "grad_norm": 0.49258387088775635, "learning_rate": 1.2792081759970832e-06, "loss": 0.0757, "step": 7167 }, { "epoch": 2.322747893713545, "grad_norm": 0.4936928451061249, "learning_rate": 1.2780399214937323e-06, "loss": 0.0744, "step": 7168 }, { "epoch": 2.3230719377835385, "grad_norm": 0.44905778765678406, "learning_rate": 1.2768721225222635e-06, "loss": 0.0667, "step": 7169 }, { "epoch": 2.323395981853532, "grad_norm": 0.49809473752975464, "learning_rate": 1.2757047792256045e-06, "loss": 0.0762, "step": 7170 }, { "epoch": 2.323720025923526, "grad_norm": 0.4669495224952698, "learning_rate": 1.274537891746626e-06, "loss": 0.0694, "step": 7171 }, { "epoch": 2.324044069993519, "grad_norm": 0.47357267141342163, "learning_rate": 1.273371460228146e-06, "loss": 0.068, "step": 7172 }, { "epoch": 2.3243681140635126, "grad_norm": 0.503506600856781, "learning_rate": 1.2722054848129217e-06, "loss": 0.0742, "step": 7173 }, { "epoch": 2.324692158133506, "grad_norm": 0.5232956409454346, "learning_rate": 1.2710399656436578e-06, "loss": 0.0825, "step": 7174 }, { "epoch": 2.3250162022035, "grad_norm": 0.4886522889137268, "learning_rate": 1.269874902863003e-06, "loss": 0.0708, "step": 7175 }, { "epoch": 2.3253402462734933, "grad_norm": 0.4887654781341553, "learning_rate": 1.2687102966135501e-06, "loss": 0.075, "step": 7176 }, { "epoch": 2.3256642903434868, "grad_norm": 0.4615480899810791, "learning_rate": 1.2675461470378348e-06, "loss": 0.067, "step": 7177 }, { "epoch": 2.32598833441348, "grad_norm": 0.5080332159996033, "learning_rate": 1.2663824542783375e-06, "loss": 0.0723, "step": 7178 }, { "epoch": 2.3263123784834736, "grad_norm": 0.5074442028999329, "learning_rate": 1.2652192184774858e-06, "loss": 0.0771, "step": 7179 }, { "epoch": 2.3266364225534675, "grad_norm": 0.45548608899116516, "learning_rate": 1.2640564397776433e-06, "loss": 0.0698, "step": 7180 }, { "epoch": 2.326960466623461, "grad_norm": 0.45758867263793945, "learning_rate": 1.262894118321129e-06, "loss": 0.0692, "step": 7181 }, { "epoch": 2.3272845106934543, "grad_norm": 0.5235854983329773, "learning_rate": 1.2617322542501947e-06, "loss": 0.0762, "step": 7182 }, { "epoch": 2.3276085547634477, "grad_norm": 0.4564163386821747, "learning_rate": 1.2605708477070439e-06, "loss": 0.0714, "step": 7183 }, { "epoch": 2.327932598833441, "grad_norm": 0.5101970434188843, "learning_rate": 1.25940989883382e-06, "loss": 0.079, "step": 7184 }, { "epoch": 2.328256642903435, "grad_norm": 0.501133918762207, "learning_rate": 1.2582494077726131e-06, "loss": 0.0759, "step": 7185 }, { "epoch": 2.3285806869734285, "grad_norm": 0.4815445840358734, "learning_rate": 1.2570893746654579e-06, "loss": 0.0756, "step": 7186 }, { "epoch": 2.328904731043422, "grad_norm": 0.4634091258049011, "learning_rate": 1.2559297996543252e-06, "loss": 0.0689, "step": 7187 }, { "epoch": 2.3292287751134153, "grad_norm": 0.4817907512187958, "learning_rate": 1.254770682881143e-06, "loss": 0.0729, "step": 7188 }, { "epoch": 2.3295528191834087, "grad_norm": 0.48379072546958923, "learning_rate": 1.2536120244877692e-06, "loss": 0.0719, "step": 7189 }, { "epoch": 2.3298768632534026, "grad_norm": 0.4735885560512543, "learning_rate": 1.252453824616019e-06, "loss": 0.0724, "step": 7190 }, { "epoch": 2.330200907323396, "grad_norm": 0.48822975158691406, "learning_rate": 1.2512960834076404e-06, "loss": 0.075, "step": 7191 }, { "epoch": 2.3305249513933894, "grad_norm": 0.4866192042827606, "learning_rate": 1.2501388010043302e-06, "loss": 0.073, "step": 7192 }, { "epoch": 2.330848995463383, "grad_norm": 0.47962766885757446, "learning_rate": 1.2489819775477302e-06, "loss": 0.0706, "step": 7193 }, { "epoch": 2.3311730395333763, "grad_norm": 0.5421980023384094, "learning_rate": 1.2478256131794225e-06, "loss": 0.0795, "step": 7194 }, { "epoch": 2.33149708360337, "grad_norm": 0.47424715757369995, "learning_rate": 1.2466697080409378e-06, "loss": 0.0719, "step": 7195 }, { "epoch": 2.3318211276733636, "grad_norm": 0.47859951853752136, "learning_rate": 1.2455142622737448e-06, "loss": 0.0778, "step": 7196 }, { "epoch": 2.332145171743357, "grad_norm": 0.47769349813461304, "learning_rate": 1.2443592760192596e-06, "loss": 0.0753, "step": 7197 }, { "epoch": 2.3324692158133504, "grad_norm": 0.4621089994907379, "learning_rate": 1.2432047494188415e-06, "loss": 0.0679, "step": 7198 }, { "epoch": 2.3327932598833443, "grad_norm": 0.4695514142513275, "learning_rate": 1.2420506826137929e-06, "loss": 0.0706, "step": 7199 }, { "epoch": 2.3331173039533377, "grad_norm": 0.46257010102272034, "learning_rate": 1.240897075745362e-06, "loss": 0.0657, "step": 7200 }, { "epoch": 2.333441348023331, "grad_norm": 0.5223076343536377, "learning_rate": 1.2397439289547375e-06, "loss": 0.0798, "step": 7201 }, { "epoch": 2.3337653920933246, "grad_norm": 0.49697694182395935, "learning_rate": 1.2385912423830538e-06, "loss": 0.0708, "step": 7202 }, { "epoch": 2.3340894361633184, "grad_norm": 0.47291916608810425, "learning_rate": 1.2374390161713906e-06, "loss": 0.0723, "step": 7203 }, { "epoch": 2.334413480233312, "grad_norm": 0.4870017468929291, "learning_rate": 1.2362872504607659e-06, "loss": 0.0722, "step": 7204 }, { "epoch": 2.3347375243033053, "grad_norm": 0.508466362953186, "learning_rate": 1.2351359453921463e-06, "loss": 0.0757, "step": 7205 }, { "epoch": 2.3350615683732987, "grad_norm": 0.4754007160663605, "learning_rate": 1.2339851011064403e-06, "loss": 0.0714, "step": 7206 }, { "epoch": 2.335385612443292, "grad_norm": 0.4860064387321472, "learning_rate": 1.2328347177444993e-06, "loss": 0.0749, "step": 7207 }, { "epoch": 2.335709656513286, "grad_norm": 0.5065357685089111, "learning_rate": 1.2316847954471222e-06, "loss": 0.068, "step": 7208 }, { "epoch": 2.3360337005832794, "grad_norm": 0.514007031917572, "learning_rate": 1.230535334355043e-06, "loss": 0.0761, "step": 7209 }, { "epoch": 2.336357744653273, "grad_norm": 0.48480892181396484, "learning_rate": 1.2293863346089502e-06, "loss": 0.0676, "step": 7210 }, { "epoch": 2.3366817887232663, "grad_norm": 0.49941307306289673, "learning_rate": 1.2282377963494647e-06, "loss": 0.0734, "step": 7211 }, { "epoch": 2.3370058327932597, "grad_norm": 0.4956761598587036, "learning_rate": 1.2270897197171628e-06, "loss": 0.0735, "step": 7212 }, { "epoch": 2.3373298768632536, "grad_norm": 0.5128060579299927, "learning_rate": 1.2259421048525516e-06, "loss": 0.0753, "step": 7213 }, { "epoch": 2.337653920933247, "grad_norm": 0.46151235699653625, "learning_rate": 1.2247949518960938e-06, "loss": 0.0676, "step": 7214 }, { "epoch": 2.3379779650032404, "grad_norm": 0.5013076663017273, "learning_rate": 1.2236482609881857e-06, "loss": 0.0733, "step": 7215 }, { "epoch": 2.338302009073234, "grad_norm": 0.47443968057632446, "learning_rate": 1.2225020322691721e-06, "loss": 0.0695, "step": 7216 }, { "epoch": 2.3386260531432272, "grad_norm": 0.4645536541938782, "learning_rate": 1.2213562658793427e-06, "loss": 0.0695, "step": 7217 }, { "epoch": 2.338950097213221, "grad_norm": 0.4841172695159912, "learning_rate": 1.220210961958923e-06, "loss": 0.0673, "step": 7218 }, { "epoch": 2.3392741412832145, "grad_norm": 0.4824322760105133, "learning_rate": 1.2190661206480935e-06, "loss": 0.0712, "step": 7219 }, { "epoch": 2.339598185353208, "grad_norm": 0.4811258912086487, "learning_rate": 1.217921742086967e-06, "loss": 0.0704, "step": 7220 }, { "epoch": 2.3399222294232014, "grad_norm": 0.4667803943157196, "learning_rate": 1.2167778264156066e-06, "loss": 0.0675, "step": 7221 }, { "epoch": 2.3402462734931953, "grad_norm": 0.45968520641326904, "learning_rate": 1.215634373774015e-06, "loss": 0.0703, "step": 7222 }, { "epoch": 2.3405703175631887, "grad_norm": 0.5023238062858582, "learning_rate": 1.2144913843021405e-06, "loss": 0.0745, "step": 7223 }, { "epoch": 2.340894361633182, "grad_norm": 0.4562709629535675, "learning_rate": 1.2133488581398745e-06, "loss": 0.0645, "step": 7224 }, { "epoch": 2.3412184057031755, "grad_norm": 0.517026960849762, "learning_rate": 1.2122067954270505e-06, "loss": 0.075, "step": 7225 }, { "epoch": 2.3415424497731694, "grad_norm": 0.45475083589553833, "learning_rate": 1.2110651963034475e-06, "loss": 0.0697, "step": 7226 }, { "epoch": 2.341866493843163, "grad_norm": 0.527498722076416, "learning_rate": 1.2099240609087832e-06, "loss": 0.08, "step": 7227 }, { "epoch": 2.3421905379131562, "grad_norm": 0.48453056812286377, "learning_rate": 1.2087833893827227e-06, "loss": 0.0697, "step": 7228 }, { "epoch": 2.3425145819831497, "grad_norm": 0.5300154089927673, "learning_rate": 1.2076431818648744e-06, "loss": 0.0837, "step": 7229 }, { "epoch": 2.342838626053143, "grad_norm": 0.45958855748176575, "learning_rate": 1.206503438494787e-06, "loss": 0.0695, "step": 7230 }, { "epoch": 2.343162670123137, "grad_norm": 0.4899384379386902, "learning_rate": 1.2053641594119554e-06, "loss": 0.0711, "step": 7231 }, { "epoch": 2.3434867141931304, "grad_norm": 0.47130003571510315, "learning_rate": 1.204225344755815e-06, "loss": 0.0715, "step": 7232 }, { "epoch": 2.343810758263124, "grad_norm": 0.5333791971206665, "learning_rate": 1.203086994665747e-06, "loss": 0.083, "step": 7233 }, { "epoch": 2.344134802333117, "grad_norm": 0.4682372212409973, "learning_rate": 1.2019491092810754e-06, "loss": 0.0708, "step": 7234 }, { "epoch": 2.3444588464031106, "grad_norm": 0.44578856229782104, "learning_rate": 1.200811688741062e-06, "loss": 0.0649, "step": 7235 }, { "epoch": 2.3447828904731045, "grad_norm": 0.4772430658340454, "learning_rate": 1.1996747331849211e-06, "loss": 0.067, "step": 7236 }, { "epoch": 2.345106934543098, "grad_norm": 0.48849186301231384, "learning_rate": 1.1985382427518022e-06, "loss": 0.0751, "step": 7237 }, { "epoch": 2.3454309786130914, "grad_norm": 0.43585848808288574, "learning_rate": 1.1974022175808014e-06, "loss": 0.065, "step": 7238 }, { "epoch": 2.345755022683085, "grad_norm": 0.4897010326385498, "learning_rate": 1.1962666578109584e-06, "loss": 0.072, "step": 7239 }, { "epoch": 2.346079066753078, "grad_norm": 0.4719051718711853, "learning_rate": 1.1951315635812506e-06, "loss": 0.0684, "step": 7240 }, { "epoch": 2.346403110823072, "grad_norm": 0.48736074566841125, "learning_rate": 1.193996935030608e-06, "loss": 0.071, "step": 7241 }, { "epoch": 2.3467271548930655, "grad_norm": 0.5279738306999207, "learning_rate": 1.1928627722978931e-06, "loss": 0.0776, "step": 7242 }, { "epoch": 2.347051198963059, "grad_norm": 0.472042441368103, "learning_rate": 1.1917290755219212e-06, "loss": 0.067, "step": 7243 }, { "epoch": 2.3473752430330523, "grad_norm": 0.5123307704925537, "learning_rate": 1.190595844841441e-06, "loss": 0.0765, "step": 7244 }, { "epoch": 2.347699287103046, "grad_norm": 0.49297240376472473, "learning_rate": 1.1894630803951545e-06, "loss": 0.0745, "step": 7245 }, { "epoch": 2.3480233311730396, "grad_norm": 0.43049800395965576, "learning_rate": 1.1883307823216972e-06, "loss": 0.0657, "step": 7246 }, { "epoch": 2.348347375243033, "grad_norm": 0.4899204671382904, "learning_rate": 1.1871989507596516e-06, "loss": 0.0754, "step": 7247 }, { "epoch": 2.3486714193130265, "grad_norm": 0.47282499074935913, "learning_rate": 1.1860675858475452e-06, "loss": 0.0722, "step": 7248 }, { "epoch": 2.34899546338302, "grad_norm": 0.4940069317817688, "learning_rate": 1.1849366877238416e-06, "loss": 0.0732, "step": 7249 }, { "epoch": 2.3493195074530138, "grad_norm": 0.5003806948661804, "learning_rate": 1.183806256526958e-06, "loss": 0.0739, "step": 7250 }, { "epoch": 2.349643551523007, "grad_norm": 0.47378280758857727, "learning_rate": 1.1826762923952435e-06, "loss": 0.0649, "step": 7251 }, { "epoch": 2.3499675955930006, "grad_norm": 0.48774582147598267, "learning_rate": 1.1815467954669956e-06, "loss": 0.0698, "step": 7252 }, { "epoch": 2.350291639662994, "grad_norm": 0.5039346218109131, "learning_rate": 1.1804177658804549e-06, "loss": 0.0737, "step": 7253 }, { "epoch": 2.350615683732988, "grad_norm": 0.47579720616340637, "learning_rate": 1.1792892037738035e-06, "loss": 0.0662, "step": 7254 }, { "epoch": 2.3509397278029813, "grad_norm": 0.474889874458313, "learning_rate": 1.1781611092851664e-06, "loss": 0.0677, "step": 7255 }, { "epoch": 2.3512637718729748, "grad_norm": 0.4891919493675232, "learning_rate": 1.1770334825526103e-06, "loss": 0.0712, "step": 7256 }, { "epoch": 2.351587815942968, "grad_norm": 0.5071465373039246, "learning_rate": 1.1759063237141477e-06, "loss": 0.0736, "step": 7257 }, { "epoch": 2.3519118600129616, "grad_norm": 0.4828701615333557, "learning_rate": 1.1747796329077315e-06, "loss": 0.0761, "step": 7258 }, { "epoch": 2.3522359040829555, "grad_norm": 0.517517626285553, "learning_rate": 1.1736534102712566e-06, "loss": 0.0747, "step": 7259 }, { "epoch": 2.352559948152949, "grad_norm": 0.4479466676712036, "learning_rate": 1.172527655942562e-06, "loss": 0.0672, "step": 7260 }, { "epoch": 2.3528839922229423, "grad_norm": 0.4755881130695343, "learning_rate": 1.1714023700594296e-06, "loss": 0.0688, "step": 7261 }, { "epoch": 2.3532080362929357, "grad_norm": 0.4864247441291809, "learning_rate": 1.1702775527595833e-06, "loss": 0.0754, "step": 7262 }, { "epoch": 2.353532080362929, "grad_norm": 0.47295811772346497, "learning_rate": 1.1691532041806919e-06, "loss": 0.0725, "step": 7263 }, { "epoch": 2.353856124432923, "grad_norm": 0.49665603041648865, "learning_rate": 1.1680293244603592e-06, "loss": 0.0745, "step": 7264 }, { "epoch": 2.3541801685029164, "grad_norm": 0.5112475156784058, "learning_rate": 1.1669059137361444e-06, "loss": 0.0797, "step": 7265 }, { "epoch": 2.35450421257291, "grad_norm": 0.5126915574073792, "learning_rate": 1.1657829721455349e-06, "loss": 0.0762, "step": 7266 }, { "epoch": 2.3548282566429033, "grad_norm": 0.5092496275901794, "learning_rate": 1.1646604998259747e-06, "loss": 0.0729, "step": 7267 }, { "epoch": 2.3551523007128967, "grad_norm": 0.46421167254447937, "learning_rate": 1.1635384969148395e-06, "loss": 0.0711, "step": 7268 }, { "epoch": 2.3554763447828906, "grad_norm": 0.47837281227111816, "learning_rate": 1.162416963549452e-06, "loss": 0.0694, "step": 7269 }, { "epoch": 2.355800388852884, "grad_norm": 0.48247772455215454, "learning_rate": 1.161295899867077e-06, "loss": 0.0707, "step": 7270 }, { "epoch": 2.3561244329228774, "grad_norm": 0.48974937200546265, "learning_rate": 1.160175306004923e-06, "loss": 0.0711, "step": 7271 }, { "epoch": 2.356448476992871, "grad_norm": 0.5030304193496704, "learning_rate": 1.1590551821001406e-06, "loss": 0.0754, "step": 7272 }, { "epoch": 2.3567725210628647, "grad_norm": 0.4614565074443817, "learning_rate": 1.1579355282898175e-06, "loss": 0.0662, "step": 7273 }, { "epoch": 2.357096565132858, "grad_norm": 0.46911752223968506, "learning_rate": 1.1568163447109942e-06, "loss": 0.0711, "step": 7274 }, { "epoch": 2.3574206092028516, "grad_norm": 0.463739812374115, "learning_rate": 1.1556976315006445e-06, "loss": 0.0708, "step": 7275 }, { "epoch": 2.357744653272845, "grad_norm": 0.5395808815956116, "learning_rate": 1.154579388795689e-06, "loss": 0.0782, "step": 7276 }, { "epoch": 2.358068697342839, "grad_norm": 0.4603893458843231, "learning_rate": 1.1534616167329899e-06, "loss": 0.0702, "step": 7277 }, { "epoch": 2.3583927414128323, "grad_norm": 0.48242759704589844, "learning_rate": 1.1523443154493509e-06, "loss": 0.0726, "step": 7278 }, { "epoch": 2.3587167854828257, "grad_norm": 0.5345384478569031, "learning_rate": 1.1512274850815197e-06, "loss": 0.0765, "step": 7279 }, { "epoch": 2.359040829552819, "grad_norm": 0.48294004797935486, "learning_rate": 1.1501111257661856e-06, "loss": 0.076, "step": 7280 }, { "epoch": 2.3593648736228126, "grad_norm": 0.5108845233917236, "learning_rate": 1.148995237639981e-06, "loss": 0.0756, "step": 7281 }, { "epoch": 2.3596889176928064, "grad_norm": 0.49022331833839417, "learning_rate": 1.1478798208394775e-06, "loss": 0.0713, "step": 7282 }, { "epoch": 2.3600129617628, "grad_norm": 0.5263938903808594, "learning_rate": 1.146764875501193e-06, "loss": 0.0773, "step": 7283 }, { "epoch": 2.3603370058327933, "grad_norm": 0.4493767023086548, "learning_rate": 1.145650401761585e-06, "loss": 0.0687, "step": 7284 }, { "epoch": 2.3606610499027867, "grad_norm": 0.4451209306716919, "learning_rate": 1.1445363997570546e-06, "loss": 0.067, "step": 7285 }, { "epoch": 2.36098509397278, "grad_norm": 0.48385268449783325, "learning_rate": 1.1434228696239452e-06, "loss": 0.0726, "step": 7286 }, { "epoch": 2.361309138042774, "grad_norm": 0.5328781604766846, "learning_rate": 1.1423098114985437e-06, "loss": 0.0794, "step": 7287 }, { "epoch": 2.3616331821127674, "grad_norm": 0.46778643131256104, "learning_rate": 1.1411972255170727e-06, "loss": 0.0648, "step": 7288 }, { "epoch": 2.361957226182761, "grad_norm": 0.49069154262542725, "learning_rate": 1.1400851118157086e-06, "loss": 0.0691, "step": 7289 }, { "epoch": 2.3622812702527543, "grad_norm": 0.4818800091743469, "learning_rate": 1.1389734705305583e-06, "loss": 0.0677, "step": 7290 }, { "epoch": 2.3626053143227477, "grad_norm": 0.49379661679267883, "learning_rate": 1.137862301797677e-06, "loss": 0.077, "step": 7291 }, { "epoch": 2.3629293583927415, "grad_norm": 0.47791507840156555, "learning_rate": 1.136751605753062e-06, "loss": 0.0728, "step": 7292 }, { "epoch": 2.363253402462735, "grad_norm": 0.48617789149284363, "learning_rate": 1.1356413825326518e-06, "loss": 0.0687, "step": 7293 }, { "epoch": 2.3635774465327284, "grad_norm": 0.4953933656215668, "learning_rate": 1.134531632272327e-06, "loss": 0.0738, "step": 7294 }, { "epoch": 2.363901490602722, "grad_norm": 0.5016205310821533, "learning_rate": 1.1334223551079077e-06, "loss": 0.0768, "step": 7295 }, { "epoch": 2.3642255346727157, "grad_norm": 0.4939001202583313, "learning_rate": 1.132313551175163e-06, "loss": 0.0764, "step": 7296 }, { "epoch": 2.364549578742709, "grad_norm": 0.5102739334106445, "learning_rate": 1.131205220609795e-06, "loss": 0.0762, "step": 7297 }, { "epoch": 2.3648736228127025, "grad_norm": 0.520470917224884, "learning_rate": 1.1300973635474582e-06, "loss": 0.0735, "step": 7298 }, { "epoch": 2.365197666882696, "grad_norm": 0.4633205235004425, "learning_rate": 1.1289899801237392e-06, "loss": 0.0688, "step": 7299 }, { "epoch": 2.3655217109526894, "grad_norm": 0.4387751519680023, "learning_rate": 1.1278830704741716e-06, "loss": 0.0624, "step": 7300 }, { "epoch": 2.3658457550226832, "grad_norm": 0.4592018723487854, "learning_rate": 1.1267766347342318e-06, "loss": 0.0683, "step": 7301 }, { "epoch": 2.3661697990926767, "grad_norm": 0.4675895571708679, "learning_rate": 1.1256706730393363e-06, "loss": 0.0678, "step": 7302 }, { "epoch": 2.36649384316267, "grad_norm": 0.5069913268089294, "learning_rate": 1.1245651855248451e-06, "loss": 0.0771, "step": 7303 }, { "epoch": 2.3668178872326635, "grad_norm": 0.4496638774871826, "learning_rate": 1.1234601723260552e-06, "loss": 0.0675, "step": 7304 }, { "epoch": 2.3671419313026574, "grad_norm": 0.4736442565917969, "learning_rate": 1.1223556335782153e-06, "loss": 0.0722, "step": 7305 }, { "epoch": 2.367465975372651, "grad_norm": 0.4914408028125763, "learning_rate": 1.121251569416506e-06, "loss": 0.0746, "step": 7306 }, { "epoch": 2.3677900194426442, "grad_norm": 0.5002232193946838, "learning_rate": 1.120147979976055e-06, "loss": 0.0766, "step": 7307 }, { "epoch": 2.3681140635126376, "grad_norm": 0.47254279255867004, "learning_rate": 1.1190448653919323e-06, "loss": 0.0734, "step": 7308 }, { "epoch": 2.368438107582631, "grad_norm": 0.45834389328956604, "learning_rate": 1.1179422257991469e-06, "loss": 0.0701, "step": 7309 }, { "epoch": 2.368762151652625, "grad_norm": 0.4369758069515228, "learning_rate": 1.1168400613326519e-06, "loss": 0.0672, "step": 7310 }, { "epoch": 2.3690861957226184, "grad_norm": 0.4970327913761139, "learning_rate": 1.1157383721273413e-06, "loss": 0.074, "step": 7311 }, { "epoch": 2.369410239792612, "grad_norm": 0.4383191764354706, "learning_rate": 1.1146371583180532e-06, "loss": 0.0685, "step": 7312 }, { "epoch": 2.369734283862605, "grad_norm": 0.4854133129119873, "learning_rate": 1.1135364200395615e-06, "loss": 0.0707, "step": 7313 }, { "epoch": 2.3700583279325986, "grad_norm": 0.4635353684425354, "learning_rate": 1.112436157426589e-06, "loss": 0.0681, "step": 7314 }, { "epoch": 2.3703823720025925, "grad_norm": 0.4272409975528717, "learning_rate": 1.111336370613796e-06, "loss": 0.0595, "step": 7315 }, { "epoch": 2.370706416072586, "grad_norm": 0.4680071175098419, "learning_rate": 1.1102370597357858e-06, "loss": 0.0714, "step": 7316 }, { "epoch": 2.3710304601425793, "grad_norm": 0.49491339921951294, "learning_rate": 1.1091382249271037e-06, "loss": 0.0736, "step": 7317 }, { "epoch": 2.3713545042125728, "grad_norm": 0.4780588150024414, "learning_rate": 1.108039866322238e-06, "loss": 0.0718, "step": 7318 }, { "epoch": 2.371678548282566, "grad_norm": 0.5072447657585144, "learning_rate": 1.1069419840556128e-06, "loss": 0.0769, "step": 7319 }, { "epoch": 2.37200259235256, "grad_norm": 0.486572802066803, "learning_rate": 1.105844578261604e-06, "loss": 0.0731, "step": 7320 }, { "epoch": 2.3723266364225535, "grad_norm": 0.4918803572654724, "learning_rate": 1.1047476490745191e-06, "loss": 0.0775, "step": 7321 }, { "epoch": 2.372650680492547, "grad_norm": 0.48813650012016296, "learning_rate": 1.1036511966286123e-06, "loss": 0.0724, "step": 7322 }, { "epoch": 2.3729747245625403, "grad_norm": 0.4994529187679291, "learning_rate": 1.1025552210580803e-06, "loss": 0.0773, "step": 7323 }, { "epoch": 2.373298768632534, "grad_norm": 0.5022187232971191, "learning_rate": 1.1014597224970586e-06, "loss": 0.0747, "step": 7324 }, { "epoch": 2.3736228127025276, "grad_norm": 0.48002323508262634, "learning_rate": 1.1003647010796275e-06, "loss": 0.073, "step": 7325 }, { "epoch": 2.373946856772521, "grad_norm": 0.4666433334350586, "learning_rate": 1.099270156939803e-06, "loss": 0.0665, "step": 7326 }, { "epoch": 2.3742709008425145, "grad_norm": 0.4680749177932739, "learning_rate": 1.0981760902115518e-06, "loss": 0.068, "step": 7327 }, { "epoch": 2.3745949449125083, "grad_norm": 0.4894528090953827, "learning_rate": 1.0970825010287716e-06, "loss": 0.0742, "step": 7328 }, { "epoch": 2.3749189889825018, "grad_norm": 0.48894360661506653, "learning_rate": 1.0959893895253132e-06, "loss": 0.0713, "step": 7329 }, { "epoch": 2.375243033052495, "grad_norm": 0.49191632866859436, "learning_rate": 1.0948967558349581e-06, "loss": 0.0763, "step": 7330 }, { "epoch": 2.3755670771224886, "grad_norm": 0.48632246255874634, "learning_rate": 1.0938046000914365e-06, "loss": 0.0723, "step": 7331 }, { "epoch": 2.375891121192482, "grad_norm": 0.4645938575267792, "learning_rate": 1.0927129224284166e-06, "loss": 0.0713, "step": 7332 }, { "epoch": 2.376215165262476, "grad_norm": 0.4937532842159271, "learning_rate": 1.091621722979509e-06, "loss": 0.0726, "step": 7333 }, { "epoch": 2.3765392093324693, "grad_norm": 0.4999132454395294, "learning_rate": 1.0905310018782682e-06, "loss": 0.0781, "step": 7334 }, { "epoch": 2.3768632534024627, "grad_norm": 0.47685500979423523, "learning_rate": 1.0894407592581835e-06, "loss": 0.0714, "step": 7335 }, { "epoch": 2.377187297472456, "grad_norm": 0.4990130662918091, "learning_rate": 1.0883509952526956e-06, "loss": 0.0758, "step": 7336 }, { "epoch": 2.3775113415424496, "grad_norm": 0.507454514503479, "learning_rate": 1.0872617099951765e-06, "loss": 0.0792, "step": 7337 }, { "epoch": 2.3778353856124435, "grad_norm": 0.4756607115268707, "learning_rate": 1.0861729036189462e-06, "loss": 0.0714, "step": 7338 }, { "epoch": 2.378159429682437, "grad_norm": 0.4667145311832428, "learning_rate": 1.0850845762572638e-06, "loss": 0.0664, "step": 7339 }, { "epoch": 2.3784834737524303, "grad_norm": 0.4863603711128235, "learning_rate": 1.0839967280433294e-06, "loss": 0.0776, "step": 7340 }, { "epoch": 2.3788075178224237, "grad_norm": 0.490573525428772, "learning_rate": 1.0829093591102858e-06, "loss": 0.0795, "step": 7341 }, { "epoch": 2.379131561892417, "grad_norm": 0.48397448658943176, "learning_rate": 1.0818224695912178e-06, "loss": 0.0702, "step": 7342 }, { "epoch": 2.379455605962411, "grad_norm": 0.47108104825019836, "learning_rate": 1.0807360596191473e-06, "loss": 0.0706, "step": 7343 }, { "epoch": 2.3797796500324044, "grad_norm": 0.5208635926246643, "learning_rate": 1.0796501293270418e-06, "loss": 0.0744, "step": 7344 }, { "epoch": 2.380103694102398, "grad_norm": 0.5025206804275513, "learning_rate": 1.0785646788478083e-06, "loss": 0.0774, "step": 7345 }, { "epoch": 2.3804277381723913, "grad_norm": 0.5211962461471558, "learning_rate": 1.0774797083142957e-06, "loss": 0.0806, "step": 7346 }, { "epoch": 2.380751782242385, "grad_norm": 0.49372220039367676, "learning_rate": 1.0763952178592934e-06, "loss": 0.0758, "step": 7347 }, { "epoch": 2.3810758263123786, "grad_norm": 0.5154086351394653, "learning_rate": 1.0753112076155335e-06, "loss": 0.0788, "step": 7348 }, { "epoch": 2.381399870382372, "grad_norm": 0.49311190843582153, "learning_rate": 1.0742276777156896e-06, "loss": 0.0739, "step": 7349 }, { "epoch": 2.3817239144523654, "grad_norm": 0.5144601464271545, "learning_rate": 1.0731446282923702e-06, "loss": 0.0743, "step": 7350 }, { "epoch": 2.3820479585223593, "grad_norm": 0.510590136051178, "learning_rate": 1.0720620594781361e-06, "loss": 0.0779, "step": 7351 }, { "epoch": 2.3823720025923527, "grad_norm": 0.4440791606903076, "learning_rate": 1.0709799714054796e-06, "loss": 0.0671, "step": 7352 }, { "epoch": 2.382696046662346, "grad_norm": 0.46642884612083435, "learning_rate": 1.0698983642068384e-06, "loss": 0.0719, "step": 7353 }, { "epoch": 2.3830200907323396, "grad_norm": 0.4936695098876953, "learning_rate": 1.068817238014591e-06, "loss": 0.074, "step": 7354 }, { "epoch": 2.383344134802333, "grad_norm": 0.49078696966171265, "learning_rate": 1.0677365929610573e-06, "loss": 0.0715, "step": 7355 }, { "epoch": 2.383668178872327, "grad_norm": 0.4980461895465851, "learning_rate": 1.0666564291784985e-06, "loss": 0.076, "step": 7356 }, { "epoch": 2.3839922229423203, "grad_norm": 0.47679758071899414, "learning_rate": 1.0655767467991124e-06, "loss": 0.0687, "step": 7357 }, { "epoch": 2.3843162670123137, "grad_norm": 0.4713037312030792, "learning_rate": 1.0644975459550466e-06, "loss": 0.0733, "step": 7358 }, { "epoch": 2.384640311082307, "grad_norm": 0.482093870639801, "learning_rate": 1.0634188267783807e-06, "loss": 0.0742, "step": 7359 }, { "epoch": 2.3849643551523005, "grad_norm": 0.507264256477356, "learning_rate": 1.0623405894011435e-06, "loss": 0.0747, "step": 7360 }, { "epoch": 2.3852883992222944, "grad_norm": 0.48084744811058044, "learning_rate": 1.0612628339552972e-06, "loss": 0.0723, "step": 7361 }, { "epoch": 2.385612443292288, "grad_norm": 0.4838222563266754, "learning_rate": 1.06018556057275e-06, "loss": 0.0691, "step": 7362 }, { "epoch": 2.3859364873622813, "grad_norm": 0.47494423389434814, "learning_rate": 1.0591087693853503e-06, "loss": 0.0727, "step": 7363 }, { "epoch": 2.3862605314322747, "grad_norm": 0.4764205813407898, "learning_rate": 1.0580324605248865e-06, "loss": 0.0713, "step": 7364 }, { "epoch": 2.386584575502268, "grad_norm": 0.48809897899627686, "learning_rate": 1.0569566341230892e-06, "loss": 0.073, "step": 7365 }, { "epoch": 2.386908619572262, "grad_norm": 0.5656302571296692, "learning_rate": 1.0558812903116273e-06, "loss": 0.0796, "step": 7366 }, { "epoch": 2.3872326636422554, "grad_norm": 0.4584273397922516, "learning_rate": 1.0548064292221134e-06, "loss": 0.0688, "step": 7367 }, { "epoch": 2.387556707712249, "grad_norm": 0.45454642176628113, "learning_rate": 1.0537320509860998e-06, "loss": 0.0687, "step": 7368 }, { "epoch": 2.3878807517822422, "grad_norm": 0.501089334487915, "learning_rate": 1.0526581557350802e-06, "loss": 0.0705, "step": 7369 }, { "epoch": 2.3882047958522357, "grad_norm": 0.4731651842594147, "learning_rate": 1.0515847436004894e-06, "loss": 0.0705, "step": 7370 }, { "epoch": 2.3885288399222295, "grad_norm": 0.46863463521003723, "learning_rate": 1.0505118147137028e-06, "loss": 0.0721, "step": 7371 }, { "epoch": 2.388852883992223, "grad_norm": 0.5042905211448669, "learning_rate": 1.0494393692060355e-06, "loss": 0.0705, "step": 7372 }, { "epoch": 2.3891769280622164, "grad_norm": 0.47537532448768616, "learning_rate": 1.0483674072087462e-06, "loss": 0.0658, "step": 7373 }, { "epoch": 2.38950097213221, "grad_norm": 0.5079628825187683, "learning_rate": 1.0472959288530305e-06, "loss": 0.0747, "step": 7374 }, { "epoch": 2.3898250162022037, "grad_norm": 0.46602755784988403, "learning_rate": 1.0462249342700282e-06, "loss": 0.0699, "step": 7375 }, { "epoch": 2.390149060272197, "grad_norm": 0.49709802865982056, "learning_rate": 1.0451544235908179e-06, "loss": 0.0683, "step": 7376 }, { "epoch": 2.3904731043421905, "grad_norm": 0.4795989394187927, "learning_rate": 1.0440843969464209e-06, "loss": 0.0715, "step": 7377 }, { "epoch": 2.390797148412184, "grad_norm": 0.48927298188209534, "learning_rate": 1.0430148544677971e-06, "loss": 0.0726, "step": 7378 }, { "epoch": 2.391121192482178, "grad_norm": 0.4737934172153473, "learning_rate": 1.041945796285848e-06, "loss": 0.07, "step": 7379 }, { "epoch": 2.3914452365521712, "grad_norm": 0.4971829354763031, "learning_rate": 1.040877222531419e-06, "loss": 0.072, "step": 7380 }, { "epoch": 2.3917692806221647, "grad_norm": 0.4656839668750763, "learning_rate": 1.0398091333352872e-06, "loss": 0.073, "step": 7381 }, { "epoch": 2.392093324692158, "grad_norm": 0.47131016850471497, "learning_rate": 1.0387415288281826e-06, "loss": 0.068, "step": 7382 }, { "epoch": 2.3924173687621515, "grad_norm": 0.5292643904685974, "learning_rate": 1.0376744091407649e-06, "loss": 0.0816, "step": 7383 }, { "epoch": 2.3927414128321454, "grad_norm": 0.46024826169013977, "learning_rate": 1.036607774403643e-06, "loss": 0.0672, "step": 7384 }, { "epoch": 2.393065456902139, "grad_norm": 0.498965322971344, "learning_rate": 1.03554162474736e-06, "loss": 0.0725, "step": 7385 }, { "epoch": 2.393389500972132, "grad_norm": 0.5312982797622681, "learning_rate": 1.0344759603024029e-06, "loss": 0.0756, "step": 7386 }, { "epoch": 2.3937135450421256, "grad_norm": 0.46625569462776184, "learning_rate": 1.0334107811992005e-06, "loss": 0.0721, "step": 7387 }, { "epoch": 2.394037589112119, "grad_norm": 0.4639773666858673, "learning_rate": 1.032346087568117e-06, "loss": 0.0724, "step": 7388 }, { "epoch": 2.394361633182113, "grad_norm": 0.4938350021839142, "learning_rate": 1.031281879539464e-06, "loss": 0.0746, "step": 7389 }, { "epoch": 2.3946856772521063, "grad_norm": 0.5003551244735718, "learning_rate": 1.0302181572434866e-06, "loss": 0.0743, "step": 7390 }, { "epoch": 2.3950097213220998, "grad_norm": 0.45628494024276733, "learning_rate": 1.029154920810379e-06, "loss": 0.0714, "step": 7391 }, { "epoch": 2.395333765392093, "grad_norm": 0.46569904685020447, "learning_rate": 1.0280921703702672e-06, "loss": 0.0702, "step": 7392 }, { "epoch": 2.3956578094620866, "grad_norm": 0.5279820561408997, "learning_rate": 1.0270299060532224e-06, "loss": 0.076, "step": 7393 }, { "epoch": 2.3959818535320805, "grad_norm": 0.48654380440711975, "learning_rate": 1.0259681279892558e-06, "loss": 0.0702, "step": 7394 }, { "epoch": 2.396305897602074, "grad_norm": 0.47849327325820923, "learning_rate": 1.0249068363083193e-06, "loss": 0.0684, "step": 7395 }, { "epoch": 2.3966299416720673, "grad_norm": 0.49411529302597046, "learning_rate": 1.023846031140303e-06, "loss": 0.0707, "step": 7396 }, { "epoch": 2.3969539857420608, "grad_norm": 0.5243039131164551, "learning_rate": 1.0227857126150425e-06, "loss": 0.0747, "step": 7397 }, { "epoch": 2.3972780298120546, "grad_norm": 0.5061764121055603, "learning_rate": 1.021725880862307e-06, "loss": 0.0743, "step": 7398 }, { "epoch": 2.397602073882048, "grad_norm": 0.5003470182418823, "learning_rate": 1.0206665360118106e-06, "loss": 0.0756, "step": 7399 }, { "epoch": 2.3979261179520415, "grad_norm": 0.44817647337913513, "learning_rate": 1.0196076781932078e-06, "loss": 0.0671, "step": 7400 }, { "epoch": 2.398250162022035, "grad_norm": 0.5085217952728271, "learning_rate": 1.018549307536092e-06, "loss": 0.0757, "step": 7401 }, { "epoch": 2.3985742060920288, "grad_norm": 0.47145208716392517, "learning_rate": 1.0174914241699968e-06, "loss": 0.0721, "step": 7402 }, { "epoch": 2.398898250162022, "grad_norm": 0.45912501215934753, "learning_rate": 1.0164340282243984e-06, "loss": 0.0664, "step": 7403 }, { "epoch": 2.3992222942320156, "grad_norm": 0.506362795829773, "learning_rate": 1.0153771198287116e-06, "loss": 0.0748, "step": 7404 }, { "epoch": 2.399546338302009, "grad_norm": 0.47258928418159485, "learning_rate": 1.0143206991122888e-06, "loss": 0.0699, "step": 7405 }, { "epoch": 2.3998703823720025, "grad_norm": 0.5271346569061279, "learning_rate": 1.013264766204431e-06, "loss": 0.078, "step": 7406 }, { "epoch": 2.4001944264419963, "grad_norm": 0.4933684170246124, "learning_rate": 1.0122093212343698e-06, "loss": 0.0729, "step": 7407 }, { "epoch": 2.4005184705119897, "grad_norm": 0.47689729928970337, "learning_rate": 1.0111543643312833e-06, "loss": 0.0666, "step": 7408 }, { "epoch": 2.400842514581983, "grad_norm": 0.4799298048019409, "learning_rate": 1.010099895624288e-06, "loss": 0.0708, "step": 7409 }, { "epoch": 2.4011665586519766, "grad_norm": 0.4732402563095093, "learning_rate": 1.0090459152424382e-06, "loss": 0.0699, "step": 7410 }, { "epoch": 2.40149060272197, "grad_norm": 0.49609676003456116, "learning_rate": 1.0079924233147353e-06, "loss": 0.0752, "step": 7411 }, { "epoch": 2.401814646791964, "grad_norm": 0.49415668845176697, "learning_rate": 1.0069394199701115e-06, "loss": 0.0711, "step": 7412 }, { "epoch": 2.4021386908619573, "grad_norm": 0.4801064729690552, "learning_rate": 1.0058869053374499e-06, "loss": 0.0702, "step": 7413 }, { "epoch": 2.4024627349319507, "grad_norm": 0.5357898473739624, "learning_rate": 1.004834879545562e-06, "loss": 0.0803, "step": 7414 }, { "epoch": 2.402786779001944, "grad_norm": 0.48465752601623535, "learning_rate": 1.003783342723212e-06, "loss": 0.0696, "step": 7415 }, { "epoch": 2.4031108230719376, "grad_norm": 0.5104695558547974, "learning_rate": 1.0027322949990925e-06, "loss": 0.0765, "step": 7416 }, { "epoch": 2.4034348671419314, "grad_norm": 0.510945200920105, "learning_rate": 1.0016817365018438e-06, "loss": 0.0756, "step": 7417 }, { "epoch": 2.403758911211925, "grad_norm": 0.4508892893791199, "learning_rate": 1.0006316673600436e-06, "loss": 0.0676, "step": 7418 }, { "epoch": 2.4040829552819183, "grad_norm": 0.5109551548957825, "learning_rate": 9.995820877022105e-07, "loss": 0.076, "step": 7419 }, { "epoch": 2.4044069993519117, "grad_norm": 0.4841241240501404, "learning_rate": 9.985329976568042e-07, "loss": 0.075, "step": 7420 }, { "epoch": 2.404731043421905, "grad_norm": 0.4857771396636963, "learning_rate": 9.974843973522203e-07, "loss": 0.0721, "step": 7421 }, { "epoch": 2.405055087491899, "grad_norm": 0.4629274308681488, "learning_rate": 9.964362869167993e-07, "loss": 0.0699, "step": 7422 }, { "epoch": 2.4053791315618924, "grad_norm": 0.5079348087310791, "learning_rate": 9.953886664788186e-07, "loss": 0.0766, "step": 7423 }, { "epoch": 2.405703175631886, "grad_norm": 0.4648328423500061, "learning_rate": 9.943415361664982e-07, "loss": 0.0662, "step": 7424 }, { "epoch": 2.4060272197018793, "grad_norm": 0.4789850115776062, "learning_rate": 9.932948961079952e-07, "loss": 0.0696, "step": 7425 }, { "epoch": 2.406351263771873, "grad_norm": 0.47770291566848755, "learning_rate": 9.922487464314096e-07, "loss": 0.0696, "step": 7426 }, { "epoch": 2.4066753078418666, "grad_norm": 0.47265005111694336, "learning_rate": 9.912030872647793e-07, "loss": 0.0681, "step": 7427 }, { "epoch": 2.40699935191186, "grad_norm": 0.4607384502887726, "learning_rate": 9.901579187360844e-07, "loss": 0.0731, "step": 7428 }, { "epoch": 2.4073233959818534, "grad_norm": 0.5032811760902405, "learning_rate": 9.891132409732402e-07, "loss": 0.0709, "step": 7429 }, { "epoch": 2.4076474400518473, "grad_norm": 0.49219810962677, "learning_rate": 9.880690541041072e-07, "loss": 0.0709, "step": 7430 }, { "epoch": 2.4079714841218407, "grad_norm": 0.5096017122268677, "learning_rate": 9.870253582564838e-07, "loss": 0.079, "step": 7431 }, { "epoch": 2.408295528191834, "grad_norm": 0.48637381196022034, "learning_rate": 9.859821535581072e-07, "loss": 0.0695, "step": 7432 }, { "epoch": 2.4086195722618275, "grad_norm": 0.42044156789779663, "learning_rate": 9.84939440136658e-07, "loss": 0.06, "step": 7433 }, { "epoch": 2.408943616331821, "grad_norm": 0.5293217301368713, "learning_rate": 9.838972181197498e-07, "loss": 0.0775, "step": 7434 }, { "epoch": 2.409267660401815, "grad_norm": 0.49000880122184753, "learning_rate": 9.82855487634946e-07, "loss": 0.0677, "step": 7435 }, { "epoch": 2.4095917044718083, "grad_norm": 0.4933015704154968, "learning_rate": 9.818142488097388e-07, "loss": 0.0683, "step": 7436 }, { "epoch": 2.4099157485418017, "grad_norm": 0.4835686683654785, "learning_rate": 9.807735017715713e-07, "loss": 0.0741, "step": 7437 }, { "epoch": 2.410239792611795, "grad_norm": 0.4612524211406708, "learning_rate": 9.797332466478165e-07, "loss": 0.0726, "step": 7438 }, { "epoch": 2.4105638366817885, "grad_norm": 0.592125654220581, "learning_rate": 9.786934835657935e-07, "loss": 0.0808, "step": 7439 }, { "epoch": 2.4108878807517824, "grad_norm": 0.4904743731021881, "learning_rate": 9.776542126527582e-07, "loss": 0.0746, "step": 7440 }, { "epoch": 2.411211924821776, "grad_norm": 0.499237596988678, "learning_rate": 9.766154340359085e-07, "loss": 0.0733, "step": 7441 }, { "epoch": 2.4115359688917692, "grad_norm": 0.4838772416114807, "learning_rate": 9.755771478423815e-07, "loss": 0.0703, "step": 7442 }, { "epoch": 2.4118600129617627, "grad_norm": 0.481381893157959, "learning_rate": 9.745393541992492e-07, "loss": 0.0694, "step": 7443 }, { "epoch": 2.412184057031756, "grad_norm": 0.4760250747203827, "learning_rate": 9.735020532335338e-07, "loss": 0.0725, "step": 7444 }, { "epoch": 2.41250810110175, "grad_norm": 0.4487675726413727, "learning_rate": 9.724652450721855e-07, "loss": 0.0671, "step": 7445 }, { "epoch": 2.4128321451717434, "grad_norm": 0.4938701093196869, "learning_rate": 9.71428929842102e-07, "loss": 0.0761, "step": 7446 }, { "epoch": 2.413156189241737, "grad_norm": 0.49953126907348633, "learning_rate": 9.703931076701178e-07, "loss": 0.0661, "step": 7447 }, { "epoch": 2.4134802333117302, "grad_norm": 0.46972179412841797, "learning_rate": 9.693577786830077e-07, "loss": 0.0726, "step": 7448 }, { "epoch": 2.413804277381724, "grad_norm": 0.4710189700126648, "learning_rate": 9.683229430074859e-07, "loss": 0.0735, "step": 7449 }, { "epoch": 2.4141283214517175, "grad_norm": 0.5037508010864258, "learning_rate": 9.67288600770206e-07, "loss": 0.0794, "step": 7450 }, { "epoch": 2.414452365521711, "grad_norm": 0.49730008840560913, "learning_rate": 9.662547520977632e-07, "loss": 0.0756, "step": 7451 }, { "epoch": 2.4147764095917044, "grad_norm": 0.5043180584907532, "learning_rate": 9.65221397116688e-07, "loss": 0.0784, "step": 7452 }, { "epoch": 2.4151004536616982, "grad_norm": 0.4728096127510071, "learning_rate": 9.641885359534536e-07, "loss": 0.0665, "step": 7453 }, { "epoch": 2.4154244977316917, "grad_norm": 0.5002241134643555, "learning_rate": 9.631561687344733e-07, "loss": 0.0695, "step": 7454 }, { "epoch": 2.415748541801685, "grad_norm": 0.4322189390659332, "learning_rate": 9.621242955860977e-07, "loss": 0.0659, "step": 7455 }, { "epoch": 2.4160725858716785, "grad_norm": 0.4593190848827362, "learning_rate": 9.610929166346188e-07, "loss": 0.0672, "step": 7456 }, { "epoch": 2.416396629941672, "grad_norm": 0.5169757008552551, "learning_rate": 9.60062032006267e-07, "loss": 0.0748, "step": 7457 }, { "epoch": 2.416720674011666, "grad_norm": 0.46424371004104614, "learning_rate": 9.590316418272134e-07, "loss": 0.0673, "step": 7458 }, { "epoch": 2.417044718081659, "grad_norm": 0.46779513359069824, "learning_rate": 9.58001746223568e-07, "loss": 0.0676, "step": 7459 }, { "epoch": 2.4173687621516526, "grad_norm": 0.4487968981266022, "learning_rate": 9.569723453213785e-07, "loss": 0.0645, "step": 7460 }, { "epoch": 2.417692806221646, "grad_norm": 0.44602566957473755, "learning_rate": 9.559434392466337e-07, "loss": 0.0657, "step": 7461 }, { "epoch": 2.4180168502916395, "grad_norm": 0.46878647804260254, "learning_rate": 9.549150281252633e-07, "loss": 0.072, "step": 7462 }, { "epoch": 2.4183408943616334, "grad_norm": 0.48305004835128784, "learning_rate": 9.538871120831332e-07, "loss": 0.0699, "step": 7463 }, { "epoch": 2.4186649384316268, "grad_norm": 0.4601181149482727, "learning_rate": 9.52859691246053e-07, "loss": 0.0674, "step": 7464 }, { "epoch": 2.41898898250162, "grad_norm": 0.5139188170433044, "learning_rate": 9.518327657397647e-07, "loss": 0.0735, "step": 7465 }, { "epoch": 2.4193130265716136, "grad_norm": 0.48624053597450256, "learning_rate": 9.508063356899588e-07, "loss": 0.0701, "step": 7466 }, { "epoch": 2.419637070641607, "grad_norm": 0.5249286890029907, "learning_rate": 9.497804012222561e-07, "loss": 0.0762, "step": 7467 }, { "epoch": 2.419961114711601, "grad_norm": 0.45333951711654663, "learning_rate": 9.48754962462225e-07, "loss": 0.0659, "step": 7468 }, { "epoch": 2.4202851587815943, "grad_norm": 0.44248318672180176, "learning_rate": 9.477300195353667e-07, "loss": 0.063, "step": 7469 }, { "epoch": 2.4206092028515878, "grad_norm": 0.4752964377403259, "learning_rate": 9.467055725671248e-07, "loss": 0.0727, "step": 7470 }, { "epoch": 2.420933246921581, "grad_norm": 0.49173787236213684, "learning_rate": 9.456816216828818e-07, "loss": 0.0747, "step": 7471 }, { "epoch": 2.4212572909915746, "grad_norm": 0.5055178999900818, "learning_rate": 9.446581670079597e-07, "loss": 0.0785, "step": 7472 }, { "epoch": 2.4215813350615685, "grad_norm": 0.45134517550468445, "learning_rate": 9.436352086676203e-07, "loss": 0.0674, "step": 7473 }, { "epoch": 2.421905379131562, "grad_norm": 0.515214204788208, "learning_rate": 9.426127467870599e-07, "loss": 0.077, "step": 7474 }, { "epoch": 2.4222294232015553, "grad_norm": 0.4785626232624054, "learning_rate": 9.415907814914238e-07, "loss": 0.0697, "step": 7475 }, { "epoch": 2.4225534672715487, "grad_norm": 0.4874456524848938, "learning_rate": 9.405693129057858e-07, "loss": 0.0756, "step": 7476 }, { "epoch": 2.4228775113415426, "grad_norm": 0.44153448939323425, "learning_rate": 9.395483411551659e-07, "loss": 0.0646, "step": 7477 }, { "epoch": 2.423201555411536, "grad_norm": 0.47840017080307007, "learning_rate": 9.385278663645209e-07, "loss": 0.0679, "step": 7478 }, { "epoch": 2.4235255994815295, "grad_norm": 0.5270043015480042, "learning_rate": 9.375078886587469e-07, "loss": 0.0784, "step": 7479 }, { "epoch": 2.423849643551523, "grad_norm": 0.47315293550491333, "learning_rate": 9.364884081626791e-07, "loss": 0.0707, "step": 7480 }, { "epoch": 2.4241736876215167, "grad_norm": 0.5027167201042175, "learning_rate": 9.354694250010926e-07, "loss": 0.0786, "step": 7481 }, { "epoch": 2.42449773169151, "grad_norm": 0.516635537147522, "learning_rate": 9.344509392987023e-07, "loss": 0.0767, "step": 7482 }, { "epoch": 2.4248217757615036, "grad_norm": 0.5132464170455933, "learning_rate": 9.334329511801577e-07, "loss": 0.0734, "step": 7483 }, { "epoch": 2.425145819831497, "grad_norm": 0.46370166540145874, "learning_rate": 9.324154607700525e-07, "loss": 0.0741, "step": 7484 }, { "epoch": 2.4254698639014904, "grad_norm": 0.5340031385421753, "learning_rate": 9.313984681929178e-07, "loss": 0.0734, "step": 7485 }, { "epoch": 2.4257939079714843, "grad_norm": 0.45933422446250916, "learning_rate": 9.303819735732234e-07, "loss": 0.0708, "step": 7486 }, { "epoch": 2.4261179520414777, "grad_norm": 0.4411062002182007, "learning_rate": 9.29365977035378e-07, "loss": 0.0656, "step": 7487 }, { "epoch": 2.426441996111471, "grad_norm": 0.4746846556663513, "learning_rate": 9.283504787037322e-07, "loss": 0.0683, "step": 7488 }, { "epoch": 2.4267660401814646, "grad_norm": 0.4785168170928955, "learning_rate": 9.273354787025685e-07, "loss": 0.0698, "step": 7489 }, { "epoch": 2.427090084251458, "grad_norm": 0.4767061471939087, "learning_rate": 9.263209771561182e-07, "loss": 0.0694, "step": 7490 }, { "epoch": 2.427414128321452, "grad_norm": 0.47101208567619324, "learning_rate": 9.253069741885429e-07, "loss": 0.07, "step": 7491 }, { "epoch": 2.4277381723914453, "grad_norm": 0.47950929403305054, "learning_rate": 9.242934699239476e-07, "loss": 0.0669, "step": 7492 }, { "epoch": 2.4280622164614387, "grad_norm": 0.47910645604133606, "learning_rate": 9.232804644863757e-07, "loss": 0.0686, "step": 7493 }, { "epoch": 2.428386260531432, "grad_norm": 0.480186402797699, "learning_rate": 9.222679579998095e-07, "loss": 0.0678, "step": 7494 }, { "epoch": 2.4287103046014256, "grad_norm": 0.5079746842384338, "learning_rate": 9.212559505881707e-07, "loss": 0.0784, "step": 7495 }, { "epoch": 2.4290343486714194, "grad_norm": 0.49334797263145447, "learning_rate": 9.202444423753159e-07, "loss": 0.0729, "step": 7496 }, { "epoch": 2.429358392741413, "grad_norm": 0.544931948184967, "learning_rate": 9.192334334850489e-07, "loss": 0.0851, "step": 7497 }, { "epoch": 2.4296824368114063, "grad_norm": 0.5725739002227783, "learning_rate": 9.182229240411023e-07, "loss": 0.0811, "step": 7498 }, { "epoch": 2.4300064808813997, "grad_norm": 0.47585466504096985, "learning_rate": 9.172129141671571e-07, "loss": 0.071, "step": 7499 }, { "epoch": 2.4303305249513936, "grad_norm": 0.4769103229045868, "learning_rate": 9.162034039868262e-07, "loss": 0.0703, "step": 7500 }, { "epoch": 2.430654569021387, "grad_norm": 0.47839346528053284, "learning_rate": 9.15194393623664e-07, "loss": 0.0741, "step": 7501 }, { "epoch": 2.4309786130913804, "grad_norm": 0.46446266770362854, "learning_rate": 9.141858832011641e-07, "loss": 0.0692, "step": 7502 }, { "epoch": 2.431302657161374, "grad_norm": 0.5156646370887756, "learning_rate": 9.131778728427582e-07, "loss": 0.0802, "step": 7503 }, { "epoch": 2.4316267012313677, "grad_norm": 0.5064626932144165, "learning_rate": 9.121703626718187e-07, "loss": 0.0749, "step": 7504 }, { "epoch": 2.431950745301361, "grad_norm": 0.45814090967178345, "learning_rate": 9.111633528116509e-07, "loss": 0.0686, "step": 7505 }, { "epoch": 2.4322747893713546, "grad_norm": 0.516158938407898, "learning_rate": 9.101568433855084e-07, "loss": 0.0751, "step": 7506 }, { "epoch": 2.432598833441348, "grad_norm": 0.5115140676498413, "learning_rate": 9.091508345165739e-07, "loss": 0.0768, "step": 7507 }, { "epoch": 2.4329228775113414, "grad_norm": 0.4667341709136963, "learning_rate": 9.081453263279749e-07, "loss": 0.0741, "step": 7508 }, { "epoch": 2.4332469215813353, "grad_norm": 0.5099531412124634, "learning_rate": 9.071403189427757e-07, "loss": 0.0712, "step": 7509 }, { "epoch": 2.4335709656513287, "grad_norm": 0.4739290773868561, "learning_rate": 9.061358124839798e-07, "loss": 0.0677, "step": 7510 }, { "epoch": 2.433895009721322, "grad_norm": 0.4962294101715088, "learning_rate": 9.051318070745285e-07, "loss": 0.0782, "step": 7511 }, { "epoch": 2.4342190537913155, "grad_norm": 0.4872860014438629, "learning_rate": 9.041283028373044e-07, "loss": 0.0712, "step": 7512 }, { "epoch": 2.434543097861309, "grad_norm": 0.4947209358215332, "learning_rate": 9.031252998951229e-07, "loss": 0.0765, "step": 7513 }, { "epoch": 2.434867141931303, "grad_norm": 0.4560416340827942, "learning_rate": 9.021227983707442e-07, "loss": 0.0742, "step": 7514 }, { "epoch": 2.4351911860012962, "grad_norm": 0.4807344079017639, "learning_rate": 9.011207983868647e-07, "loss": 0.0703, "step": 7515 }, { "epoch": 2.4355152300712897, "grad_norm": 0.5113034844398499, "learning_rate": 9.001193000661191e-07, "loss": 0.0791, "step": 7516 }, { "epoch": 2.435839274141283, "grad_norm": 0.4940589964389801, "learning_rate": 8.991183035310813e-07, "loss": 0.0709, "step": 7517 }, { "epoch": 2.4361633182112765, "grad_norm": 0.4884170889854431, "learning_rate": 8.98117808904263e-07, "loss": 0.0708, "step": 7518 }, { "epoch": 2.4364873622812704, "grad_norm": 0.47042766213417053, "learning_rate": 8.971178163081173e-07, "loss": 0.0704, "step": 7519 }, { "epoch": 2.436811406351264, "grad_norm": 0.4856167733669281, "learning_rate": 8.961183258650297e-07, "loss": 0.0732, "step": 7520 }, { "epoch": 2.4371354504212572, "grad_norm": 0.4780847430229187, "learning_rate": 8.951193376973321e-07, "loss": 0.071, "step": 7521 }, { "epoch": 2.4374594944912507, "grad_norm": 0.46283453702926636, "learning_rate": 8.941208519272876e-07, "loss": 0.0684, "step": 7522 }, { "epoch": 2.4377835385612445, "grad_norm": 0.4621603488922119, "learning_rate": 8.931228686771048e-07, "loss": 0.0681, "step": 7523 }, { "epoch": 2.438107582631238, "grad_norm": 0.4744093716144562, "learning_rate": 8.92125388068924e-07, "loss": 0.0697, "step": 7524 }, { "epoch": 2.4384316267012314, "grad_norm": 0.47922131419181824, "learning_rate": 8.911284102248286e-07, "loss": 0.0729, "step": 7525 }, { "epoch": 2.438755670771225, "grad_norm": 0.4861237704753876, "learning_rate": 8.901319352668397e-07, "loss": 0.0723, "step": 7526 }, { "epoch": 2.439079714841218, "grad_norm": 0.46727433800697327, "learning_rate": 8.891359633169134e-07, "loss": 0.0657, "step": 7527 }, { "epoch": 2.439403758911212, "grad_norm": 0.4871031641960144, "learning_rate": 8.881404944969507e-07, "loss": 0.0725, "step": 7528 }, { "epoch": 2.4397278029812055, "grad_norm": 0.5125278830528259, "learning_rate": 8.871455289287839e-07, "loss": 0.0765, "step": 7529 }, { "epoch": 2.440051847051199, "grad_norm": 0.5060690641403198, "learning_rate": 8.861510667341905e-07, "loss": 0.0721, "step": 7530 }, { "epoch": 2.4403758911211924, "grad_norm": 0.48133599758148193, "learning_rate": 8.851571080348809e-07, "loss": 0.0716, "step": 7531 }, { "epoch": 2.440699935191186, "grad_norm": 0.4717944264411926, "learning_rate": 8.84163652952506e-07, "loss": 0.0696, "step": 7532 }, { "epoch": 2.4410239792611796, "grad_norm": 0.4690879285335541, "learning_rate": 8.831707016086561e-07, "loss": 0.0698, "step": 7533 }, { "epoch": 2.441348023331173, "grad_norm": 0.4657110273838043, "learning_rate": 8.821782541248575e-07, "loss": 0.0654, "step": 7534 }, { "epoch": 2.4416720674011665, "grad_norm": 0.44634830951690674, "learning_rate": 8.811863106225788e-07, "loss": 0.0632, "step": 7535 }, { "epoch": 2.44199611147116, "grad_norm": 0.43520429730415344, "learning_rate": 8.8019487122322e-07, "loss": 0.0632, "step": 7536 }, { "epoch": 2.442320155541154, "grad_norm": 0.4770278334617615, "learning_rate": 8.792039360481286e-07, "loss": 0.0706, "step": 7537 }, { "epoch": 2.442644199611147, "grad_norm": 0.48332685232162476, "learning_rate": 8.782135052185819e-07, "loss": 0.0689, "step": 7538 }, { "epoch": 2.4429682436811406, "grad_norm": 0.4374450445175171, "learning_rate": 8.772235788557998e-07, "loss": 0.0665, "step": 7539 }, { "epoch": 2.443292287751134, "grad_norm": 0.5159647464752197, "learning_rate": 8.762341570809408e-07, "loss": 0.076, "step": 7540 }, { "epoch": 2.4436163318211275, "grad_norm": 0.47382551431655884, "learning_rate": 8.75245240015099e-07, "loss": 0.0703, "step": 7541 }, { "epoch": 2.4439403758911213, "grad_norm": 0.5286172032356262, "learning_rate": 8.742568277793095e-07, "loss": 0.0835, "step": 7542 }, { "epoch": 2.4442644199611148, "grad_norm": 0.5125248432159424, "learning_rate": 8.732689204945449e-07, "loss": 0.0755, "step": 7543 }, { "epoch": 2.444588464031108, "grad_norm": 0.4747607409954071, "learning_rate": 8.722815182817123e-07, "loss": 0.0683, "step": 7544 }, { "epoch": 2.4449125081011016, "grad_norm": 0.5194625854492188, "learning_rate": 8.712946212616652e-07, "loss": 0.0718, "step": 7545 }, { "epoch": 2.445236552171095, "grad_norm": 0.5028043985366821, "learning_rate": 8.703082295551862e-07, "loss": 0.0797, "step": 7546 }, { "epoch": 2.445560596241089, "grad_norm": 0.5011937022209167, "learning_rate": 8.693223432830012e-07, "loss": 0.0743, "step": 7547 }, { "epoch": 2.4458846403110823, "grad_norm": 0.4265819191932678, "learning_rate": 8.683369625657734e-07, "loss": 0.0619, "step": 7548 }, { "epoch": 2.4462086843810757, "grad_norm": 0.5144858956336975, "learning_rate": 8.673520875241037e-07, "loss": 0.0725, "step": 7549 }, { "epoch": 2.446532728451069, "grad_norm": 0.48485949635505676, "learning_rate": 8.663677182785324e-07, "loss": 0.0689, "step": 7550 }, { "epoch": 2.446856772521063, "grad_norm": 0.4591423273086548, "learning_rate": 8.653838549495336e-07, "loss": 0.0658, "step": 7551 }, { "epoch": 2.4471808165910565, "grad_norm": 0.5020585656166077, "learning_rate": 8.64400497657527e-07, "loss": 0.072, "step": 7552 }, { "epoch": 2.44750486066105, "grad_norm": 0.46597227454185486, "learning_rate": 8.634176465228616e-07, "loss": 0.0708, "step": 7553 }, { "epoch": 2.4478289047310433, "grad_norm": 0.47457510232925415, "learning_rate": 8.624353016658333e-07, "loss": 0.0711, "step": 7554 }, { "epoch": 2.448152948801037, "grad_norm": 0.4893776774406433, "learning_rate": 8.614534632066684e-07, "loss": 0.0755, "step": 7555 }, { "epoch": 2.4484769928710306, "grad_norm": 0.5376947522163391, "learning_rate": 8.604721312655351e-07, "loss": 0.0772, "step": 7556 }, { "epoch": 2.448801036941024, "grad_norm": 0.5056323409080505, "learning_rate": 8.594913059625404e-07, "loss": 0.0779, "step": 7557 }, { "epoch": 2.4491250810110174, "grad_norm": 0.4631410539150238, "learning_rate": 8.585109874177244e-07, "loss": 0.0693, "step": 7558 }, { "epoch": 2.449449125081011, "grad_norm": 0.4900587499141693, "learning_rate": 8.57531175751073e-07, "loss": 0.0691, "step": 7559 }, { "epoch": 2.4497731691510047, "grad_norm": 0.48433420062065125, "learning_rate": 8.56551871082501e-07, "loss": 0.0697, "step": 7560 }, { "epoch": 2.450097213220998, "grad_norm": 0.49739065766334534, "learning_rate": 8.555730735318707e-07, "loss": 0.0707, "step": 7561 }, { "epoch": 2.4504212572909916, "grad_norm": 0.4808198809623718, "learning_rate": 8.545947832189744e-07, "loss": 0.0693, "step": 7562 }, { "epoch": 2.450745301360985, "grad_norm": 0.5187966823577881, "learning_rate": 8.536170002635452e-07, "loss": 0.0784, "step": 7563 }, { "epoch": 2.4510693454309784, "grad_norm": 0.5044239163398743, "learning_rate": 8.526397247852558e-07, "loss": 0.0755, "step": 7564 }, { "epoch": 2.4513933895009723, "grad_norm": 0.502315878868103, "learning_rate": 8.516629569037138e-07, "loss": 0.0755, "step": 7565 }, { "epoch": 2.4517174335709657, "grad_norm": 0.46958866715431213, "learning_rate": 8.506866967384674e-07, "loss": 0.0711, "step": 7566 }, { "epoch": 2.452041477640959, "grad_norm": 0.5248948335647583, "learning_rate": 8.497109444090018e-07, "loss": 0.0769, "step": 7567 }, { "epoch": 2.4523655217109526, "grad_norm": 0.4852147102355957, "learning_rate": 8.487357000347379e-07, "loss": 0.0659, "step": 7568 }, { "epoch": 2.452689565780946, "grad_norm": 0.4870324432849884, "learning_rate": 8.477609637350365e-07, "loss": 0.0692, "step": 7569 }, { "epoch": 2.45301360985094, "grad_norm": 0.46902889013290405, "learning_rate": 8.467867356291964e-07, "loss": 0.0689, "step": 7570 }, { "epoch": 2.4533376539209333, "grad_norm": 0.5177121162414551, "learning_rate": 8.458130158364536e-07, "loss": 0.0748, "step": 7571 }, { "epoch": 2.4536616979909267, "grad_norm": 0.4790119528770447, "learning_rate": 8.448398044759826e-07, "loss": 0.0708, "step": 7572 }, { "epoch": 2.45398574206092, "grad_norm": 0.49437493085861206, "learning_rate": 8.438671016668937e-07, "loss": 0.0709, "step": 7573 }, { "epoch": 2.454309786130914, "grad_norm": 0.4641510248184204, "learning_rate": 8.428949075282389e-07, "loss": 0.0662, "step": 7574 }, { "epoch": 2.4546338302009074, "grad_norm": 0.48109662532806396, "learning_rate": 8.419232221790003e-07, "loss": 0.0664, "step": 7575 }, { "epoch": 2.454957874270901, "grad_norm": 0.5049029588699341, "learning_rate": 8.409520457381093e-07, "loss": 0.0723, "step": 7576 }, { "epoch": 2.4552819183408943, "grad_norm": 0.4541318714618683, "learning_rate": 8.399813783244237e-07, "loss": 0.0677, "step": 7577 }, { "epoch": 2.4556059624108877, "grad_norm": 0.42951998114585876, "learning_rate": 8.390112200567451e-07, "loss": 0.0628, "step": 7578 }, { "epoch": 2.4559300064808816, "grad_norm": 0.4754899740219116, "learning_rate": 8.380415710538115e-07, "loss": 0.0698, "step": 7579 }, { "epoch": 2.456254050550875, "grad_norm": 0.48631545901298523, "learning_rate": 8.370724314342993e-07, "loss": 0.0704, "step": 7580 }, { "epoch": 2.4565780946208684, "grad_norm": 0.4732912480831146, "learning_rate": 8.361038013168221e-07, "loss": 0.0713, "step": 7581 }, { "epoch": 2.456902138690862, "grad_norm": 0.48035740852355957, "learning_rate": 8.351356808199274e-07, "loss": 0.0694, "step": 7582 }, { "epoch": 2.4572261827608557, "grad_norm": 0.5123336315155029, "learning_rate": 8.341680700621091e-07, "loss": 0.0764, "step": 7583 }, { "epoch": 2.457550226830849, "grad_norm": 0.47391778230667114, "learning_rate": 8.332009691617882e-07, "loss": 0.0703, "step": 7584 }, { "epoch": 2.4578742709008425, "grad_norm": 0.4784128963947296, "learning_rate": 8.322343782373333e-07, "loss": 0.0678, "step": 7585 }, { "epoch": 2.458198314970836, "grad_norm": 0.4773249626159668, "learning_rate": 8.312682974070419e-07, "loss": 0.0679, "step": 7586 }, { "epoch": 2.4585223590408294, "grad_norm": 0.45103058218955994, "learning_rate": 8.303027267891545e-07, "loss": 0.068, "step": 7587 }, { "epoch": 2.4588464031108233, "grad_norm": 0.49889039993286133, "learning_rate": 8.293376665018482e-07, "loss": 0.0719, "step": 7588 }, { "epoch": 2.4591704471808167, "grad_norm": 0.49812307953834534, "learning_rate": 8.283731166632359e-07, "loss": 0.077, "step": 7589 }, { "epoch": 2.45949449125081, "grad_norm": 0.4281612038612366, "learning_rate": 8.274090773913706e-07, "loss": 0.0591, "step": 7590 }, { "epoch": 2.4598185353208035, "grad_norm": 0.473037451505661, "learning_rate": 8.264455488042395e-07, "loss": 0.0721, "step": 7591 }, { "epoch": 2.460142579390797, "grad_norm": 0.49381524324417114, "learning_rate": 8.254825310197701e-07, "loss": 0.0709, "step": 7592 }, { "epoch": 2.460466623460791, "grad_norm": 0.550028383731842, "learning_rate": 8.245200241558265e-07, "loss": 0.0735, "step": 7593 }, { "epoch": 2.4607906675307842, "grad_norm": 0.49053382873535156, "learning_rate": 8.235580283302097e-07, "loss": 0.0738, "step": 7594 }, { "epoch": 2.4611147116007777, "grad_norm": 0.5001011490821838, "learning_rate": 8.225965436606598e-07, "loss": 0.0758, "step": 7595 }, { "epoch": 2.461438755670771, "grad_norm": 0.5266391038894653, "learning_rate": 8.216355702648521e-07, "loss": 0.0774, "step": 7596 }, { "epoch": 2.4617627997407645, "grad_norm": 0.4827960431575775, "learning_rate": 8.206751082604014e-07, "loss": 0.0685, "step": 7597 }, { "epoch": 2.4620868438107584, "grad_norm": 0.504069983959198, "learning_rate": 8.197151577648593e-07, "loss": 0.0751, "step": 7598 }, { "epoch": 2.462410887880752, "grad_norm": 0.48526352643966675, "learning_rate": 8.187557188957123e-07, "loss": 0.0732, "step": 7599 }, { "epoch": 2.462734931950745, "grad_norm": 0.46831855177879333, "learning_rate": 8.177967917703877e-07, "loss": 0.0673, "step": 7600 }, { "epoch": 2.4630589760207386, "grad_norm": 0.4636510908603668, "learning_rate": 8.168383765062493e-07, "loss": 0.0701, "step": 7601 }, { "epoch": 2.4633830200907325, "grad_norm": 0.5034536719322205, "learning_rate": 8.158804732205971e-07, "loss": 0.0767, "step": 7602 }, { "epoch": 2.463707064160726, "grad_norm": 0.5184887647628784, "learning_rate": 8.149230820306697e-07, "loss": 0.078, "step": 7603 }, { "epoch": 2.4640311082307194, "grad_norm": 0.4871300160884857, "learning_rate": 8.139662030536421e-07, "loss": 0.0757, "step": 7604 }, { "epoch": 2.464355152300713, "grad_norm": 0.4584501385688782, "learning_rate": 8.130098364066292e-07, "loss": 0.0675, "step": 7605 }, { "epoch": 2.4646791963707066, "grad_norm": 0.44841256737709045, "learning_rate": 8.120539822066759e-07, "loss": 0.0649, "step": 7606 }, { "epoch": 2.4650032404407, "grad_norm": 0.46680083870887756, "learning_rate": 8.110986405707755e-07, "loss": 0.0669, "step": 7607 }, { "epoch": 2.4653272845106935, "grad_norm": 0.49121201038360596, "learning_rate": 8.101438116158488e-07, "loss": 0.0745, "step": 7608 }, { "epoch": 2.465651328580687, "grad_norm": 0.48516207933425903, "learning_rate": 8.091894954587582e-07, "loss": 0.0702, "step": 7609 }, { "epoch": 2.4659753726506803, "grad_norm": 0.438103049993515, "learning_rate": 8.082356922163038e-07, "loss": 0.0648, "step": 7610 }, { "epoch": 2.466299416720674, "grad_norm": 0.47541922330856323, "learning_rate": 8.072824020052206e-07, "loss": 0.069, "step": 7611 }, { "epoch": 2.4666234607906676, "grad_norm": 0.4582546651363373, "learning_rate": 8.063296249421843e-07, "loss": 0.0688, "step": 7612 }, { "epoch": 2.466947504860661, "grad_norm": 0.48613741993904114, "learning_rate": 8.053773611438015e-07, "loss": 0.0701, "step": 7613 }, { "epoch": 2.4672715489306545, "grad_norm": 0.4982259273529053, "learning_rate": 8.044256107266246e-07, "loss": 0.0776, "step": 7614 }, { "epoch": 2.467595593000648, "grad_norm": 0.46958404779434204, "learning_rate": 8.034743738071349e-07, "loss": 0.0637, "step": 7615 }, { "epoch": 2.4679196370706418, "grad_norm": 0.48396363854408264, "learning_rate": 8.02523650501758e-07, "loss": 0.0725, "step": 7616 }, { "epoch": 2.468243681140635, "grad_norm": 0.46536555886268616, "learning_rate": 8.015734409268511e-07, "loss": 0.07, "step": 7617 }, { "epoch": 2.4685677252106286, "grad_norm": 0.5036252737045288, "learning_rate": 8.006237451987109e-07, "loss": 0.0704, "step": 7618 }, { "epoch": 2.468891769280622, "grad_norm": 0.524920642375946, "learning_rate": 7.996745634335712e-07, "loss": 0.0761, "step": 7619 }, { "epoch": 2.4692158133506155, "grad_norm": 0.47863340377807617, "learning_rate": 7.987258957476024e-07, "loss": 0.0677, "step": 7620 }, { "epoch": 2.4695398574206093, "grad_norm": 0.47143682837486267, "learning_rate": 7.977777422569138e-07, "loss": 0.0688, "step": 7621 }, { "epoch": 2.4698639014906028, "grad_norm": 0.4850553572177887, "learning_rate": 7.968301030775477e-07, "loss": 0.0723, "step": 7622 }, { "epoch": 2.470187945560596, "grad_norm": 0.5007749795913696, "learning_rate": 7.958829783254873e-07, "loss": 0.0726, "step": 7623 }, { "epoch": 2.4705119896305896, "grad_norm": 0.4891062378883362, "learning_rate": 7.94936368116651e-07, "loss": 0.0708, "step": 7624 }, { "epoch": 2.4708360337005835, "grad_norm": 0.541865348815918, "learning_rate": 7.939902725668952e-07, "loss": 0.0716, "step": 7625 }, { "epoch": 2.471160077770577, "grad_norm": 0.4788265824317932, "learning_rate": 7.930446917920126e-07, "loss": 0.0719, "step": 7626 }, { "epoch": 2.4714841218405703, "grad_norm": 0.5064488053321838, "learning_rate": 7.920996259077335e-07, "loss": 0.072, "step": 7627 }, { "epoch": 2.4718081659105637, "grad_norm": 0.4691329896450043, "learning_rate": 7.911550750297247e-07, "loss": 0.0711, "step": 7628 }, { "epoch": 2.4721322099805576, "grad_norm": 0.47548046708106995, "learning_rate": 7.902110392735907e-07, "loss": 0.0695, "step": 7629 }, { "epoch": 2.472456254050551, "grad_norm": 0.5309733748435974, "learning_rate": 7.892675187548709e-07, "loss": 0.076, "step": 7630 }, { "epoch": 2.4727802981205445, "grad_norm": 0.4893459379673004, "learning_rate": 7.883245135890432e-07, "loss": 0.0728, "step": 7631 }, { "epoch": 2.473104342190538, "grad_norm": 0.49500328302383423, "learning_rate": 7.873820238915231e-07, "loss": 0.0735, "step": 7632 }, { "epoch": 2.4734283862605313, "grad_norm": 0.4524442255496979, "learning_rate": 7.86440049777662e-07, "loss": 0.0626, "step": 7633 }, { "epoch": 2.473752430330525, "grad_norm": 0.491973876953125, "learning_rate": 7.854985913627494e-07, "loss": 0.0724, "step": 7634 }, { "epoch": 2.4740764744005186, "grad_norm": 0.4897155165672302, "learning_rate": 7.845576487620076e-07, "loss": 0.0702, "step": 7635 }, { "epoch": 2.474400518470512, "grad_norm": 0.5308480858802795, "learning_rate": 7.83617222090603e-07, "loss": 0.082, "step": 7636 }, { "epoch": 2.4747245625405054, "grad_norm": 0.5133422017097473, "learning_rate": 7.826773114636305e-07, "loss": 0.0741, "step": 7637 }, { "epoch": 2.475048606610499, "grad_norm": 0.5054247379302979, "learning_rate": 7.817379169961309e-07, "loss": 0.0762, "step": 7638 }, { "epoch": 2.4753726506804927, "grad_norm": 0.4538639783859253, "learning_rate": 7.807990388030728e-07, "loss": 0.069, "step": 7639 }, { "epoch": 2.475696694750486, "grad_norm": 0.470773845911026, "learning_rate": 7.798606769993672e-07, "loss": 0.0686, "step": 7640 }, { "epoch": 2.4760207388204796, "grad_norm": 0.45735427737236023, "learning_rate": 7.789228316998604e-07, "loss": 0.0706, "step": 7641 }, { "epoch": 2.476344782890473, "grad_norm": 0.5463230013847351, "learning_rate": 7.779855030193362e-07, "loss": 0.0793, "step": 7642 }, { "epoch": 2.4766688269604664, "grad_norm": 0.4804290235042572, "learning_rate": 7.770486910725156e-07, "loss": 0.0677, "step": 7643 }, { "epoch": 2.4769928710304603, "grad_norm": 0.5150229334831238, "learning_rate": 7.761123959740513e-07, "loss": 0.078, "step": 7644 }, { "epoch": 2.4773169151004537, "grad_norm": 0.5204079747200012, "learning_rate": 7.751766178385411e-07, "loss": 0.0691, "step": 7645 }, { "epoch": 2.477640959170447, "grad_norm": 0.49729999899864197, "learning_rate": 7.742413567805129e-07, "loss": 0.0742, "step": 7646 }, { "epoch": 2.4779650032404406, "grad_norm": 0.5113031268119812, "learning_rate": 7.73306612914434e-07, "loss": 0.0714, "step": 7647 }, { "epoch": 2.478289047310434, "grad_norm": 0.5141634345054626, "learning_rate": 7.723723863547084e-07, "loss": 0.0732, "step": 7648 }, { "epoch": 2.478613091380428, "grad_norm": 0.48891061544418335, "learning_rate": 7.714386772156757e-07, "loss": 0.0705, "step": 7649 }, { "epoch": 2.4789371354504213, "grad_norm": 0.46272414922714233, "learning_rate": 7.705054856116129e-07, "loss": 0.0641, "step": 7650 }, { "epoch": 2.4792611795204147, "grad_norm": 0.48565956950187683, "learning_rate": 7.695728116567347e-07, "loss": 0.0705, "step": 7651 }, { "epoch": 2.479585223590408, "grad_norm": 0.5136396288871765, "learning_rate": 7.686406554651915e-07, "loss": 0.0711, "step": 7652 }, { "epoch": 2.479909267660402, "grad_norm": 0.49589043855667114, "learning_rate": 7.677090171510682e-07, "loss": 0.0735, "step": 7653 }, { "epoch": 2.4802333117303954, "grad_norm": 0.5121117234230042, "learning_rate": 7.667778968283895e-07, "loss": 0.0745, "step": 7654 }, { "epoch": 2.480557355800389, "grad_norm": 0.5081775784492493, "learning_rate": 7.658472946111151e-07, "loss": 0.0687, "step": 7655 }, { "epoch": 2.4808813998703823, "grad_norm": 0.43997034430503845, "learning_rate": 7.649172106131425e-07, "loss": 0.0656, "step": 7656 }, { "epoch": 2.481205443940376, "grad_norm": 0.5061107277870178, "learning_rate": 7.639876449483047e-07, "loss": 0.0696, "step": 7657 }, { "epoch": 2.4815294880103695, "grad_norm": 0.5054954886436462, "learning_rate": 7.630585977303717e-07, "loss": 0.0782, "step": 7658 }, { "epoch": 2.481853532080363, "grad_norm": 0.44806042313575745, "learning_rate": 7.621300690730482e-07, "loss": 0.0605, "step": 7659 }, { "epoch": 2.4821775761503564, "grad_norm": 0.49718645215034485, "learning_rate": 7.612020590899805e-07, "loss": 0.0704, "step": 7660 }, { "epoch": 2.48250162022035, "grad_norm": 0.5014523267745972, "learning_rate": 7.602745678947443e-07, "loss": 0.0706, "step": 7661 }, { "epoch": 2.4828256642903437, "grad_norm": 0.5038493871688843, "learning_rate": 7.593475956008578e-07, "loss": 0.076, "step": 7662 }, { "epoch": 2.483149708360337, "grad_norm": 0.5001026391983032, "learning_rate": 7.58421142321773e-07, "loss": 0.0725, "step": 7663 }, { "epoch": 2.4834737524303305, "grad_norm": 0.4998968541622162, "learning_rate": 7.574952081708787e-07, "loss": 0.0744, "step": 7664 }, { "epoch": 2.483797796500324, "grad_norm": 0.46559011936187744, "learning_rate": 7.565697932615013e-07, "loss": 0.071, "step": 7665 }, { "epoch": 2.4841218405703174, "grad_norm": 0.547747790813446, "learning_rate": 7.556448977068992e-07, "loss": 0.0786, "step": 7666 }, { "epoch": 2.4844458846403112, "grad_norm": 0.4957687258720398, "learning_rate": 7.547205216202752e-07, "loss": 0.0722, "step": 7667 }, { "epoch": 2.4847699287103047, "grad_norm": 0.4725322425365448, "learning_rate": 7.537966651147599e-07, "loss": 0.0687, "step": 7668 }, { "epoch": 2.485093972780298, "grad_norm": 0.4966736435890198, "learning_rate": 7.52873328303429e-07, "loss": 0.0736, "step": 7669 }, { "epoch": 2.4854180168502915, "grad_norm": 0.4968201220035553, "learning_rate": 7.519505112992842e-07, "loss": 0.0739, "step": 7670 }, { "epoch": 2.485742060920285, "grad_norm": 0.473723441362381, "learning_rate": 7.510282142152753e-07, "loss": 0.0668, "step": 7671 }, { "epoch": 2.486066104990279, "grad_norm": 0.49232056736946106, "learning_rate": 7.501064371642785e-07, "loss": 0.072, "step": 7672 }, { "epoch": 2.4863901490602722, "grad_norm": 0.4883209466934204, "learning_rate": 7.491851802591121e-07, "loss": 0.0735, "step": 7673 }, { "epoch": 2.4867141931302656, "grad_norm": 0.4559668302536011, "learning_rate": 7.482644436125291e-07, "loss": 0.0713, "step": 7674 }, { "epoch": 2.487038237200259, "grad_norm": 0.492357462644577, "learning_rate": 7.473442273372162e-07, "loss": 0.0728, "step": 7675 }, { "epoch": 2.487362281270253, "grad_norm": 0.4837523400783539, "learning_rate": 7.464245315458029e-07, "loss": 0.0712, "step": 7676 }, { "epoch": 2.4876863253402464, "grad_norm": 0.47218117117881775, "learning_rate": 7.455053563508485e-07, "loss": 0.0686, "step": 7677 }, { "epoch": 2.48801036941024, "grad_norm": 0.42852234840393066, "learning_rate": 7.445867018648517e-07, "loss": 0.0609, "step": 7678 }, { "epoch": 2.488334413480233, "grad_norm": 0.5153549313545227, "learning_rate": 7.436685682002465e-07, "loss": 0.0771, "step": 7679 }, { "epoch": 2.488658457550227, "grad_norm": 0.4937134087085724, "learning_rate": 7.427509554694046e-07, "loss": 0.0732, "step": 7680 }, { "epoch": 2.4889825016202205, "grad_norm": 0.4915093779563904, "learning_rate": 7.418338637846323e-07, "loss": 0.0712, "step": 7681 }, { "epoch": 2.489306545690214, "grad_norm": 0.46614953875541687, "learning_rate": 7.409172932581726e-07, "loss": 0.0686, "step": 7682 }, { "epoch": 2.4896305897602073, "grad_norm": 0.5098023414611816, "learning_rate": 7.400012440022053e-07, "loss": 0.0732, "step": 7683 }, { "epoch": 2.4899546338302008, "grad_norm": 0.5139278769493103, "learning_rate": 7.390857161288467e-07, "loss": 0.0735, "step": 7684 }, { "epoch": 2.4902786779001946, "grad_norm": 0.49719876050949097, "learning_rate": 7.381707097501467e-07, "loss": 0.0753, "step": 7685 }, { "epoch": 2.490602721970188, "grad_norm": 0.4695081114768982, "learning_rate": 7.37256224978094e-07, "loss": 0.0681, "step": 7686 }, { "epoch": 2.4909267660401815, "grad_norm": 0.4486734867095947, "learning_rate": 7.36342261924613e-07, "loss": 0.0668, "step": 7687 }, { "epoch": 2.491250810110175, "grad_norm": 0.4767226278781891, "learning_rate": 7.354288207015636e-07, "loss": 0.0687, "step": 7688 }, { "epoch": 2.4915748541801683, "grad_norm": 0.45571380853652954, "learning_rate": 7.345159014207432e-07, "loss": 0.0677, "step": 7689 }, { "epoch": 2.491898898250162, "grad_norm": 0.49520525336265564, "learning_rate": 7.33603504193881e-07, "loss": 0.0717, "step": 7690 }, { "epoch": 2.4922229423201556, "grad_norm": 0.4969030022621155, "learning_rate": 7.326916291326508e-07, "loss": 0.0767, "step": 7691 }, { "epoch": 2.492546986390149, "grad_norm": 0.5097898840904236, "learning_rate": 7.31780276348652e-07, "loss": 0.0773, "step": 7692 }, { "epoch": 2.4928710304601425, "grad_norm": 0.49189767241477966, "learning_rate": 7.308694459534299e-07, "loss": 0.0721, "step": 7693 }, { "epoch": 2.493195074530136, "grad_norm": 0.5023943185806274, "learning_rate": 7.299591380584581e-07, "loss": 0.0729, "step": 7694 }, { "epoch": 2.4935191186001298, "grad_norm": 0.45873549580574036, "learning_rate": 7.290493527751508e-07, "loss": 0.0647, "step": 7695 }, { "epoch": 2.493843162670123, "grad_norm": 0.45283275842666626, "learning_rate": 7.281400902148578e-07, "loss": 0.0654, "step": 7696 }, { "epoch": 2.4941672067401166, "grad_norm": 0.5040050148963928, "learning_rate": 7.272313504888606e-07, "loss": 0.0744, "step": 7697 }, { "epoch": 2.49449125081011, "grad_norm": 0.4800432324409485, "learning_rate": 7.263231337083842e-07, "loss": 0.0641, "step": 7698 }, { "epoch": 2.4948152948801035, "grad_norm": 0.49461984634399414, "learning_rate": 7.254154399845825e-07, "loss": 0.0718, "step": 7699 }, { "epoch": 2.4951393389500973, "grad_norm": 0.4502789378166199, "learning_rate": 7.245082694285516e-07, "loss": 0.0668, "step": 7700 }, { "epoch": 2.4954633830200907, "grad_norm": 0.5190999507904053, "learning_rate": 7.236016221513176e-07, "loss": 0.0787, "step": 7701 }, { "epoch": 2.495787427090084, "grad_norm": 0.518627405166626, "learning_rate": 7.226954982638463e-07, "loss": 0.0796, "step": 7702 }, { "epoch": 2.4961114711600776, "grad_norm": 0.5438644886016846, "learning_rate": 7.217898978770382e-07, "loss": 0.0752, "step": 7703 }, { "epoch": 2.4964355152300715, "grad_norm": 0.444247305393219, "learning_rate": 7.208848211017305e-07, "loss": 0.0639, "step": 7704 }, { "epoch": 2.496759559300065, "grad_norm": 0.484861820936203, "learning_rate": 7.199802680486956e-07, "loss": 0.0714, "step": 7705 }, { "epoch": 2.4970836033700583, "grad_norm": 0.47993481159210205, "learning_rate": 7.190762388286421e-07, "loss": 0.0697, "step": 7706 }, { "epoch": 2.4974076474400517, "grad_norm": 0.5076626539230347, "learning_rate": 7.181727335522154e-07, "loss": 0.0693, "step": 7707 }, { "epoch": 2.4977316915100456, "grad_norm": 0.510184109210968, "learning_rate": 7.172697523299943e-07, "loss": 0.0725, "step": 7708 }, { "epoch": 2.498055735580039, "grad_norm": 0.46183472871780396, "learning_rate": 7.163672952724948e-07, "loss": 0.0663, "step": 7709 }, { "epoch": 2.4983797796500324, "grad_norm": 0.5151199698448181, "learning_rate": 7.154653624901697e-07, "loss": 0.0771, "step": 7710 }, { "epoch": 2.498703823720026, "grad_norm": 0.4487936198711395, "learning_rate": 7.145639540934069e-07, "loss": 0.0624, "step": 7711 }, { "epoch": 2.4990278677900193, "grad_norm": 0.4931066036224365, "learning_rate": 7.136630701925301e-07, "loss": 0.075, "step": 7712 }, { "epoch": 2.499351911860013, "grad_norm": 0.48808082938194275, "learning_rate": 7.127627108977991e-07, "loss": 0.0742, "step": 7713 }, { "epoch": 2.4996759559300066, "grad_norm": 0.5426340103149414, "learning_rate": 7.118628763194068e-07, "loss": 0.0763, "step": 7714 }, { "epoch": 2.5, "grad_norm": 0.458568274974823, "learning_rate": 7.10963566567488e-07, "loss": 0.0659, "step": 7715 }, { "epoch": 2.5003240440699934, "grad_norm": 0.5094156265258789, "learning_rate": 7.100647817521067e-07, "loss": 0.0748, "step": 7716 }, { "epoch": 2.500648088139987, "grad_norm": 0.47651737928390503, "learning_rate": 7.09166521983266e-07, "loss": 0.0687, "step": 7717 }, { "epoch": 2.5009721322099807, "grad_norm": 0.5027143359184265, "learning_rate": 7.082687873709048e-07, "loss": 0.071, "step": 7718 }, { "epoch": 2.501296176279974, "grad_norm": 0.5082246661186218, "learning_rate": 7.073715780248969e-07, "loss": 0.0727, "step": 7719 }, { "epoch": 2.5016202203499676, "grad_norm": 0.47659170627593994, "learning_rate": 7.064748940550531e-07, "loss": 0.0672, "step": 7720 }, { "epoch": 2.501944264419961, "grad_norm": 0.47924235463142395, "learning_rate": 7.055787355711153e-07, "loss": 0.0711, "step": 7721 }, { "epoch": 2.5022683084899544, "grad_norm": 0.4786684811115265, "learning_rate": 7.046831026827694e-07, "loss": 0.0708, "step": 7722 }, { "epoch": 2.5025923525599483, "grad_norm": 0.4756191372871399, "learning_rate": 7.037879954996274e-07, "loss": 0.073, "step": 7723 }, { "epoch": 2.5029163966299417, "grad_norm": 0.495451420545578, "learning_rate": 7.028934141312466e-07, "loss": 0.0723, "step": 7724 }, { "epoch": 2.503240440699935, "grad_norm": 0.47573190927505493, "learning_rate": 7.019993586871116e-07, "loss": 0.0725, "step": 7725 }, { "epoch": 2.5035644847699285, "grad_norm": 0.5132853388786316, "learning_rate": 7.01105829276647e-07, "loss": 0.0754, "step": 7726 }, { "epoch": 2.503888528839922, "grad_norm": 0.46969881653785706, "learning_rate": 7.002128260092128e-07, "loss": 0.068, "step": 7727 }, { "epoch": 2.504212572909916, "grad_norm": 0.4756503999233246, "learning_rate": 6.993203489941036e-07, "loss": 0.0671, "step": 7728 }, { "epoch": 2.5045366169799093, "grad_norm": 0.4660719037055969, "learning_rate": 6.984283983405504e-07, "loss": 0.0683, "step": 7729 }, { "epoch": 2.5048606610499027, "grad_norm": 0.45893529057502747, "learning_rate": 6.975369741577171e-07, "loss": 0.0679, "step": 7730 }, { "epoch": 2.5051847051198965, "grad_norm": 0.5016549229621887, "learning_rate": 6.96646076554709e-07, "loss": 0.0715, "step": 7731 }, { "epoch": 2.50550874918989, "grad_norm": 0.4874078929424286, "learning_rate": 6.957557056405606e-07, "loss": 0.0704, "step": 7732 }, { "epoch": 2.5058327932598834, "grad_norm": 0.46628010272979736, "learning_rate": 6.948658615242454e-07, "loss": 0.071, "step": 7733 }, { "epoch": 2.506156837329877, "grad_norm": 0.4942968487739563, "learning_rate": 6.939765443146712e-07, "loss": 0.0715, "step": 7734 }, { "epoch": 2.5064808813998702, "grad_norm": 0.4972381591796875, "learning_rate": 6.930877541206832e-07, "loss": 0.074, "step": 7735 }, { "epoch": 2.506804925469864, "grad_norm": 0.49421438574790955, "learning_rate": 6.921994910510599e-07, "loss": 0.0718, "step": 7736 }, { "epoch": 2.5071289695398575, "grad_norm": 0.5161775946617126, "learning_rate": 6.91311755214517e-07, "loss": 0.0766, "step": 7737 }, { "epoch": 2.507453013609851, "grad_norm": 0.4510141611099243, "learning_rate": 6.904245467197029e-07, "loss": 0.0671, "step": 7738 }, { "epoch": 2.5077770576798444, "grad_norm": 0.5020382404327393, "learning_rate": 6.895378656752044e-07, "loss": 0.0714, "step": 7739 }, { "epoch": 2.508101101749838, "grad_norm": 0.4951009154319763, "learning_rate": 6.886517121895425e-07, "loss": 0.0765, "step": 7740 }, { "epoch": 2.5084251458198317, "grad_norm": 0.4750005900859833, "learning_rate": 6.877660863711744e-07, "loss": 0.067, "step": 7741 }, { "epoch": 2.508749189889825, "grad_norm": 0.48607584834098816, "learning_rate": 6.86880988328491e-07, "loss": 0.0734, "step": 7742 }, { "epoch": 2.5090732339598185, "grad_norm": 0.49443286657333374, "learning_rate": 6.859964181698209e-07, "loss": 0.0723, "step": 7743 }, { "epoch": 2.509397278029812, "grad_norm": 0.45458024740219116, "learning_rate": 6.851123760034273e-07, "loss": 0.0671, "step": 7744 }, { "epoch": 2.5097213220998054, "grad_norm": 0.5252810120582581, "learning_rate": 6.842288619375054e-07, "loss": 0.0797, "step": 7745 }, { "epoch": 2.5100453661697992, "grad_norm": 0.5204702019691467, "learning_rate": 6.833458760801931e-07, "loss": 0.0783, "step": 7746 }, { "epoch": 2.5103694102397927, "grad_norm": 0.49748295545578003, "learning_rate": 6.824634185395562e-07, "loss": 0.0703, "step": 7747 }, { "epoch": 2.510693454309786, "grad_norm": 0.49332040548324585, "learning_rate": 6.815814894235994e-07, "loss": 0.0734, "step": 7748 }, { "epoch": 2.5110174983797795, "grad_norm": 0.5083654522895813, "learning_rate": 6.807000888402631e-07, "loss": 0.0751, "step": 7749 }, { "epoch": 2.511341542449773, "grad_norm": 0.4894023835659027, "learning_rate": 6.798192168974216e-07, "loss": 0.0704, "step": 7750 }, { "epoch": 2.511665586519767, "grad_norm": 0.4570043683052063, "learning_rate": 6.789388737028868e-07, "loss": 0.069, "step": 7751 }, { "epoch": 2.51198963058976, "grad_norm": 0.44119545817375183, "learning_rate": 6.780590593644004e-07, "loss": 0.0645, "step": 7752 }, { "epoch": 2.5123136746597536, "grad_norm": 0.48286327719688416, "learning_rate": 6.771797739896479e-07, "loss": 0.0737, "step": 7753 }, { "epoch": 2.5126377187297475, "grad_norm": 0.48988592624664307, "learning_rate": 6.763010176862405e-07, "loss": 0.0723, "step": 7754 }, { "epoch": 2.512961762799741, "grad_norm": 0.5176308751106262, "learning_rate": 6.754227905617338e-07, "loss": 0.074, "step": 7755 }, { "epoch": 2.5132858068697344, "grad_norm": 0.5146079063415527, "learning_rate": 6.745450927236119e-07, "loss": 0.0762, "step": 7756 }, { "epoch": 2.5136098509397278, "grad_norm": 0.5212789177894592, "learning_rate": 6.736679242792965e-07, "loss": 0.0735, "step": 7757 }, { "epoch": 2.513933895009721, "grad_norm": 0.4908018708229065, "learning_rate": 6.727912853361456e-07, "loss": 0.07, "step": 7758 }, { "epoch": 2.514257939079715, "grad_norm": 0.4421240985393524, "learning_rate": 6.719151760014503e-07, "loss": 0.0676, "step": 7759 }, { "epoch": 2.5145819831497085, "grad_norm": 0.43792641162872314, "learning_rate": 6.710395963824396e-07, "loss": 0.0654, "step": 7760 }, { "epoch": 2.514906027219702, "grad_norm": 0.4864790141582489, "learning_rate": 6.701645465862721e-07, "loss": 0.0687, "step": 7761 }, { "epoch": 2.5152300712896953, "grad_norm": 0.46703585982322693, "learning_rate": 6.692900267200509e-07, "loss": 0.0681, "step": 7762 }, { "epoch": 2.5155541153596888, "grad_norm": 0.44520705938339233, "learning_rate": 6.684160368908044e-07, "loss": 0.0639, "step": 7763 }, { "epoch": 2.5158781594296826, "grad_norm": 0.47705909609794617, "learning_rate": 6.675425772055022e-07, "loss": 0.0721, "step": 7764 }, { "epoch": 2.516202203499676, "grad_norm": 0.4667569398880005, "learning_rate": 6.666696477710471e-07, "loss": 0.0662, "step": 7765 }, { "epoch": 2.5165262475696695, "grad_norm": 0.5014678239822388, "learning_rate": 6.657972486942771e-07, "loss": 0.0741, "step": 7766 }, { "epoch": 2.516850291639663, "grad_norm": 0.5059800744056702, "learning_rate": 6.649253800819655e-07, "loss": 0.0787, "step": 7767 }, { "epoch": 2.5171743357096563, "grad_norm": 0.5104771852493286, "learning_rate": 6.640540420408214e-07, "loss": 0.077, "step": 7768 }, { "epoch": 2.51749837977965, "grad_norm": 0.44101908802986145, "learning_rate": 6.631832346774869e-07, "loss": 0.0642, "step": 7769 }, { "epoch": 2.5178224238496436, "grad_norm": 0.4786303639411926, "learning_rate": 6.623129580985404e-07, "loss": 0.0688, "step": 7770 }, { "epoch": 2.518146467919637, "grad_norm": 0.48153790831565857, "learning_rate": 6.614432124104958e-07, "loss": 0.0752, "step": 7771 }, { "epoch": 2.5184705119896305, "grad_norm": 0.4702111780643463, "learning_rate": 6.605739977198017e-07, "loss": 0.0711, "step": 7772 }, { "epoch": 2.518794556059624, "grad_norm": 0.4766141176223755, "learning_rate": 6.597053141328414e-07, "loss": 0.0688, "step": 7773 }, { "epoch": 2.5191186001296177, "grad_norm": 0.5064833760261536, "learning_rate": 6.58837161755933e-07, "loss": 0.0755, "step": 7774 }, { "epoch": 2.519442644199611, "grad_norm": 0.4854080379009247, "learning_rate": 6.579695406953318e-07, "loss": 0.0737, "step": 7775 }, { "epoch": 2.5197666882696046, "grad_norm": 0.49226248264312744, "learning_rate": 6.571024510572222e-07, "loss": 0.0748, "step": 7776 }, { "epoch": 2.5200907323395985, "grad_norm": 0.4566308856010437, "learning_rate": 6.562358929477325e-07, "loss": 0.0698, "step": 7777 }, { "epoch": 2.5204147764095914, "grad_norm": 0.45967721939086914, "learning_rate": 6.553698664729174e-07, "loss": 0.0694, "step": 7778 }, { "epoch": 2.5207388204795853, "grad_norm": 0.6135513782501221, "learning_rate": 6.545043717387717e-07, "loss": 0.0671, "step": 7779 }, { "epoch": 2.5210628645495787, "grad_norm": 0.464465469121933, "learning_rate": 6.536394088512227e-07, "loss": 0.0687, "step": 7780 }, { "epoch": 2.521386908619572, "grad_norm": 0.4690185785293579, "learning_rate": 6.527749779161341e-07, "loss": 0.0693, "step": 7781 }, { "epoch": 2.521710952689566, "grad_norm": 0.5188164710998535, "learning_rate": 6.519110790393052e-07, "loss": 0.0748, "step": 7782 }, { "epoch": 2.5220349967595594, "grad_norm": 0.522108256816864, "learning_rate": 6.510477123264652e-07, "loss": 0.0767, "step": 7783 }, { "epoch": 2.522359040829553, "grad_norm": 0.5431333184242249, "learning_rate": 6.501848778832864e-07, "loss": 0.0754, "step": 7784 }, { "epoch": 2.5226830848995463, "grad_norm": 0.503311038017273, "learning_rate": 6.493225758153665e-07, "loss": 0.0742, "step": 7785 }, { "epoch": 2.5230071289695397, "grad_norm": 0.5031965374946594, "learning_rate": 6.484608062282477e-07, "loss": 0.0747, "step": 7786 }, { "epoch": 2.5233311730395336, "grad_norm": 0.4736836552619934, "learning_rate": 6.475995692273995e-07, "loss": 0.07, "step": 7787 }, { "epoch": 2.523655217109527, "grad_norm": 0.5007358193397522, "learning_rate": 6.467388649182288e-07, "loss": 0.0753, "step": 7788 }, { "epoch": 2.5239792611795204, "grad_norm": 0.49653831124305725, "learning_rate": 6.45878693406079e-07, "loss": 0.0771, "step": 7789 }, { "epoch": 2.524303305249514, "grad_norm": 0.4510502219200134, "learning_rate": 6.450190547962254e-07, "loss": 0.0684, "step": 7790 }, { "epoch": 2.5246273493195073, "grad_norm": 0.4857436418533325, "learning_rate": 6.441599491938811e-07, "loss": 0.0701, "step": 7791 }, { "epoch": 2.524951393389501, "grad_norm": 0.49388056993484497, "learning_rate": 6.433013767041901e-07, "loss": 0.073, "step": 7792 }, { "epoch": 2.5252754374594946, "grad_norm": 0.5577260851860046, "learning_rate": 6.424433374322347e-07, "loss": 0.0812, "step": 7793 }, { "epoch": 2.525599481529488, "grad_norm": 0.4625173509120941, "learning_rate": 6.415858314830304e-07, "loss": 0.0659, "step": 7794 }, { "epoch": 2.5259235255994814, "grad_norm": 0.4894951283931732, "learning_rate": 6.407288589615279e-07, "loss": 0.0704, "step": 7795 }, { "epoch": 2.526247569669475, "grad_norm": 0.4609914720058441, "learning_rate": 6.398724199726114e-07, "loss": 0.0711, "step": 7796 }, { "epoch": 2.5265716137394687, "grad_norm": 0.49507227540016174, "learning_rate": 6.39016514621102e-07, "loss": 0.0731, "step": 7797 }, { "epoch": 2.526895657809462, "grad_norm": 0.5095334053039551, "learning_rate": 6.38161143011753e-07, "loss": 0.0722, "step": 7798 }, { "epoch": 2.5272197018794555, "grad_norm": 0.5216260552406311, "learning_rate": 6.373063052492557e-07, "loss": 0.0731, "step": 7799 }, { "epoch": 2.527543745949449, "grad_norm": 0.44932428002357483, "learning_rate": 6.364520014382314e-07, "loss": 0.0667, "step": 7800 }, { "epoch": 2.5278677900194424, "grad_norm": 0.5045766234397888, "learning_rate": 6.355982316832393e-07, "loss": 0.0738, "step": 7801 }, { "epoch": 2.5281918340894363, "grad_norm": 0.46739453077316284, "learning_rate": 6.347449960887736e-07, "loss": 0.0658, "step": 7802 }, { "epoch": 2.5285158781594297, "grad_norm": 0.4505472779273987, "learning_rate": 6.338922947592607e-07, "loss": 0.0689, "step": 7803 }, { "epoch": 2.528839922229423, "grad_norm": 0.49780258536338806, "learning_rate": 6.330401277990656e-07, "loss": 0.0681, "step": 7804 }, { "epoch": 2.529163966299417, "grad_norm": 0.46541908383369446, "learning_rate": 6.321884953124808e-07, "loss": 0.0649, "step": 7805 }, { "epoch": 2.5294880103694104, "grad_norm": 0.4666377604007721, "learning_rate": 6.313373974037423e-07, "loss": 0.066, "step": 7806 }, { "epoch": 2.529812054439404, "grad_norm": 0.4736870229244232, "learning_rate": 6.304868341770127e-07, "loss": 0.0697, "step": 7807 }, { "epoch": 2.5301360985093972, "grad_norm": 0.5035609602928162, "learning_rate": 6.296368057363966e-07, "loss": 0.0786, "step": 7808 }, { "epoch": 2.5304601425793907, "grad_norm": 0.45186638832092285, "learning_rate": 6.287873121859251e-07, "loss": 0.0662, "step": 7809 }, { "epoch": 2.5307841866493845, "grad_norm": 0.4887215197086334, "learning_rate": 6.279383536295719e-07, "loss": 0.074, "step": 7810 }, { "epoch": 2.531108230719378, "grad_norm": 0.4621054232120514, "learning_rate": 6.27089930171238e-07, "loss": 0.0661, "step": 7811 }, { "epoch": 2.5314322747893714, "grad_norm": 0.4519784152507782, "learning_rate": 6.262420419147641e-07, "loss": 0.063, "step": 7812 }, { "epoch": 2.531756318859365, "grad_norm": 0.47850343585014343, "learning_rate": 6.25394688963924e-07, "loss": 0.0697, "step": 7813 }, { "epoch": 2.5320803629293582, "grad_norm": 0.4962216019630432, "learning_rate": 6.245478714224223e-07, "loss": 0.0735, "step": 7814 }, { "epoch": 2.532404406999352, "grad_norm": 0.5007613301277161, "learning_rate": 6.237015893939053e-07, "loss": 0.0721, "step": 7815 }, { "epoch": 2.5327284510693455, "grad_norm": 0.493139386177063, "learning_rate": 6.228558429819459e-07, "loss": 0.0737, "step": 7816 }, { "epoch": 2.533052495139339, "grad_norm": 0.5349908471107483, "learning_rate": 6.220106322900598e-07, "loss": 0.0792, "step": 7817 }, { "epoch": 2.5333765392093324, "grad_norm": 0.5205992460250854, "learning_rate": 6.211659574216888e-07, "loss": 0.0742, "step": 7818 }, { "epoch": 2.533700583279326, "grad_norm": 0.5088434815406799, "learning_rate": 6.20321818480214e-07, "loss": 0.0716, "step": 7819 }, { "epoch": 2.5340246273493197, "grad_norm": 0.44300928711891174, "learning_rate": 6.194782155689505e-07, "loss": 0.0666, "step": 7820 }, { "epoch": 2.534348671419313, "grad_norm": 0.48843711614608765, "learning_rate": 6.186351487911463e-07, "loss": 0.068, "step": 7821 }, { "epoch": 2.5346727154893065, "grad_norm": 0.4515663981437683, "learning_rate": 6.17792618249986e-07, "loss": 0.0637, "step": 7822 }, { "epoch": 2.5349967595593, "grad_norm": 0.4966040253639221, "learning_rate": 6.169506240485856e-07, "loss": 0.0738, "step": 7823 }, { "epoch": 2.5353208036292934, "grad_norm": 0.4954109787940979, "learning_rate": 6.161091662899971e-07, "loss": 0.0723, "step": 7824 }, { "epoch": 2.535644847699287, "grad_norm": 0.4602881371974945, "learning_rate": 6.152682450772074e-07, "loss": 0.0667, "step": 7825 }, { "epoch": 2.5359688917692806, "grad_norm": 0.4792567789554596, "learning_rate": 6.144278605131371e-07, "loss": 0.0699, "step": 7826 }, { "epoch": 2.536292935839274, "grad_norm": 0.4837825298309326, "learning_rate": 6.135880127006411e-07, "loss": 0.0692, "step": 7827 }, { "epoch": 2.536616979909268, "grad_norm": 0.48817309737205505, "learning_rate": 6.127487017425088e-07, "loss": 0.0731, "step": 7828 }, { "epoch": 2.536941023979261, "grad_norm": 0.46039432287216187, "learning_rate": 6.11909927741463e-07, "loss": 0.0694, "step": 7829 }, { "epoch": 2.537265068049255, "grad_norm": 0.5014173984527588, "learning_rate": 6.110716908001635e-07, "loss": 0.0774, "step": 7830 }, { "epoch": 2.537589112119248, "grad_norm": 0.5282461047172546, "learning_rate": 6.102339910211985e-07, "loss": 0.0802, "step": 7831 }, { "epoch": 2.5379131561892416, "grad_norm": 0.48522257804870605, "learning_rate": 6.093968285070989e-07, "loss": 0.0729, "step": 7832 }, { "epoch": 2.5382372002592355, "grad_norm": 0.49142035841941833, "learning_rate": 6.085602033603221e-07, "loss": 0.0702, "step": 7833 }, { "epoch": 2.538561244329229, "grad_norm": 0.4869705140590668, "learning_rate": 6.077241156832641e-07, "loss": 0.0711, "step": 7834 }, { "epoch": 2.5388852883992223, "grad_norm": 0.49308985471725464, "learning_rate": 6.068885655782553e-07, "loss": 0.0686, "step": 7835 }, { "epoch": 2.5392093324692158, "grad_norm": 0.47202742099761963, "learning_rate": 6.060535531475548e-07, "loss": 0.069, "step": 7836 }, { "epoch": 2.539533376539209, "grad_norm": 0.47661447525024414, "learning_rate": 6.052190784933648e-07, "loss": 0.0689, "step": 7837 }, { "epoch": 2.539857420609203, "grad_norm": 0.5208913087844849, "learning_rate": 6.043851417178132e-07, "loss": 0.0796, "step": 7838 }, { "epoch": 2.5401814646791965, "grad_norm": 0.472245454788208, "learning_rate": 6.035517429229687e-07, "loss": 0.0729, "step": 7839 }, { "epoch": 2.54050550874919, "grad_norm": 0.4980803430080414, "learning_rate": 6.027188822108288e-07, "loss": 0.0736, "step": 7840 }, { "epoch": 2.5408295528191833, "grad_norm": 0.4821029007434845, "learning_rate": 6.018865596833301e-07, "loss": 0.0713, "step": 7841 }, { "epoch": 2.5411535968891767, "grad_norm": 0.4771307706832886, "learning_rate": 6.010547754423385e-07, "loss": 0.0742, "step": 7842 }, { "epoch": 2.5414776409591706, "grad_norm": 0.49014055728912354, "learning_rate": 6.002235295896574e-07, "loss": 0.073, "step": 7843 }, { "epoch": 2.541801685029164, "grad_norm": 0.45903417468070984, "learning_rate": 5.993928222270246e-07, "loss": 0.0694, "step": 7844 }, { "epoch": 2.5421257290991575, "grad_norm": 0.483227014541626, "learning_rate": 5.985626534561062e-07, "loss": 0.0713, "step": 7845 }, { "epoch": 2.542449773169151, "grad_norm": 0.4825564920902252, "learning_rate": 5.977330233785128e-07, "loss": 0.0718, "step": 7846 }, { "epoch": 2.5427738172391443, "grad_norm": 0.4853704571723938, "learning_rate": 5.969039320957787e-07, "loss": 0.0756, "step": 7847 }, { "epoch": 2.543097861309138, "grad_norm": 0.4904906749725342, "learning_rate": 5.960753797093776e-07, "loss": 0.0702, "step": 7848 }, { "epoch": 2.5434219053791316, "grad_norm": 0.4905846416950226, "learning_rate": 5.952473663207176e-07, "loss": 0.0718, "step": 7849 }, { "epoch": 2.543745949449125, "grad_norm": 0.510238766670227, "learning_rate": 5.944198920311378e-07, "loss": 0.0714, "step": 7850 }, { "epoch": 2.5440699935191184, "grad_norm": 0.5145969986915588, "learning_rate": 5.935929569419147e-07, "loss": 0.0745, "step": 7851 }, { "epoch": 2.544394037589112, "grad_norm": 0.4890921711921692, "learning_rate": 5.927665611542555e-07, "loss": 0.0768, "step": 7852 }, { "epoch": 2.5447180816591057, "grad_norm": 0.4760702848434448, "learning_rate": 5.919407047693043e-07, "loss": 0.0675, "step": 7853 }, { "epoch": 2.545042125729099, "grad_norm": 0.4983026385307312, "learning_rate": 5.911153878881387e-07, "loss": 0.0748, "step": 7854 }, { "epoch": 2.5453661697990926, "grad_norm": 0.48434123396873474, "learning_rate": 5.902906106117673e-07, "loss": 0.0745, "step": 7855 }, { "epoch": 2.5456902138690864, "grad_norm": 0.47780078649520874, "learning_rate": 5.894663730411354e-07, "loss": 0.0673, "step": 7856 }, { "epoch": 2.54601425793908, "grad_norm": 0.5478381514549255, "learning_rate": 5.886426752771224e-07, "loss": 0.0742, "step": 7857 }, { "epoch": 2.5463383020090733, "grad_norm": 0.4975893497467041, "learning_rate": 5.878195174205409e-07, "loss": 0.0695, "step": 7858 }, { "epoch": 2.5466623460790667, "grad_norm": 0.4923208951950073, "learning_rate": 5.869968995721382e-07, "loss": 0.0751, "step": 7859 }, { "epoch": 2.54698639014906, "grad_norm": 0.4692155420780182, "learning_rate": 5.861748218325919e-07, "loss": 0.074, "step": 7860 }, { "epoch": 2.547310434219054, "grad_norm": 0.4731508791446686, "learning_rate": 5.8535328430252e-07, "loss": 0.0725, "step": 7861 }, { "epoch": 2.5476344782890474, "grad_norm": 0.4851173162460327, "learning_rate": 5.845322870824671e-07, "loss": 0.0687, "step": 7862 }, { "epoch": 2.547958522359041, "grad_norm": 0.5180558562278748, "learning_rate": 5.837118302729189e-07, "loss": 0.077, "step": 7863 }, { "epoch": 2.5482825664290343, "grad_norm": 0.48749464750289917, "learning_rate": 5.828919139742894e-07, "loss": 0.0724, "step": 7864 }, { "epoch": 2.5486066104990277, "grad_norm": 0.48550012707710266, "learning_rate": 5.82072538286928e-07, "loss": 0.0689, "step": 7865 }, { "epoch": 2.5489306545690216, "grad_norm": 0.5006141066551208, "learning_rate": 5.812537033111193e-07, "loss": 0.072, "step": 7866 }, { "epoch": 2.549254698639015, "grad_norm": 0.4635670781135559, "learning_rate": 5.804354091470809e-07, "loss": 0.0678, "step": 7867 }, { "epoch": 2.5495787427090084, "grad_norm": 0.5107144713401794, "learning_rate": 5.796176558949645e-07, "loss": 0.0728, "step": 7868 }, { "epoch": 2.549902786779002, "grad_norm": 0.4777069687843323, "learning_rate": 5.788004436548522e-07, "loss": 0.0725, "step": 7869 }, { "epoch": 2.5502268308489953, "grad_norm": 0.5170052647590637, "learning_rate": 5.779837725267673e-07, "loss": 0.0743, "step": 7870 }, { "epoch": 2.550550874918989, "grad_norm": 0.5209857821464539, "learning_rate": 5.771676426106593e-07, "loss": 0.0756, "step": 7871 }, { "epoch": 2.5508749189889826, "grad_norm": 0.4826715886592865, "learning_rate": 5.763520540064149e-07, "loss": 0.0721, "step": 7872 }, { "epoch": 2.551198963058976, "grad_norm": 0.4898536801338196, "learning_rate": 5.755370068138555e-07, "loss": 0.0722, "step": 7873 }, { "epoch": 2.5515230071289694, "grad_norm": 0.4834844470024109, "learning_rate": 5.74722501132734e-07, "loss": 0.0702, "step": 7874 }, { "epoch": 2.551847051198963, "grad_norm": 0.4997037947177887, "learning_rate": 5.739085370627384e-07, "loss": 0.0733, "step": 7875 }, { "epoch": 2.5521710952689567, "grad_norm": 0.5183606147766113, "learning_rate": 5.730951147034902e-07, "loss": 0.0755, "step": 7876 }, { "epoch": 2.55249513933895, "grad_norm": 0.5163745880126953, "learning_rate": 5.722822341545453e-07, "loss": 0.0679, "step": 7877 }, { "epoch": 2.5528191834089435, "grad_norm": 0.4591270089149475, "learning_rate": 5.714698955153897e-07, "loss": 0.069, "step": 7878 }, { "epoch": 2.5531432274789374, "grad_norm": 0.5224382877349854, "learning_rate": 5.706580988854476e-07, "loss": 0.0775, "step": 7879 }, { "epoch": 2.5534672715489304, "grad_norm": 0.4884330928325653, "learning_rate": 5.698468443640753e-07, "loss": 0.077, "step": 7880 }, { "epoch": 2.5537913156189243, "grad_norm": 0.4916975796222687, "learning_rate": 5.690361320505616e-07, "loss": 0.0765, "step": 7881 }, { "epoch": 2.5541153596889177, "grad_norm": 0.4709622859954834, "learning_rate": 5.682259620441305e-07, "loss": 0.069, "step": 7882 }, { "epoch": 2.554439403758911, "grad_norm": 0.4830895960330963, "learning_rate": 5.674163344439388e-07, "loss": 0.0746, "step": 7883 }, { "epoch": 2.554763447828905, "grad_norm": 0.48182427883148193, "learning_rate": 5.66607249349077e-07, "loss": 0.0654, "step": 7884 }, { "epoch": 2.5550874918988984, "grad_norm": 0.4703971743583679, "learning_rate": 5.657987068585702e-07, "loss": 0.071, "step": 7885 }, { "epoch": 2.555411535968892, "grad_norm": 0.5118240118026733, "learning_rate": 5.649907070713744e-07, "loss": 0.0697, "step": 7886 }, { "epoch": 2.5557355800388852, "grad_norm": 0.4816184639930725, "learning_rate": 5.641832500863814e-07, "loss": 0.0717, "step": 7887 }, { "epoch": 2.5560596241088787, "grad_norm": 0.4615735113620758, "learning_rate": 5.633763360024169e-07, "loss": 0.0636, "step": 7888 }, { "epoch": 2.5563836681788725, "grad_norm": 0.4630856215953827, "learning_rate": 5.625699649182392e-07, "loss": 0.0671, "step": 7889 }, { "epoch": 2.556707712248866, "grad_norm": 0.4773900508880615, "learning_rate": 5.61764136932541e-07, "loss": 0.0726, "step": 7890 }, { "epoch": 2.5570317563188594, "grad_norm": 0.4864650368690491, "learning_rate": 5.609588521439452e-07, "loss": 0.0717, "step": 7891 }, { "epoch": 2.557355800388853, "grad_norm": 0.47471946477890015, "learning_rate": 5.601541106510144e-07, "loss": 0.0718, "step": 7892 }, { "epoch": 2.557679844458846, "grad_norm": 0.484334260225296, "learning_rate": 5.593499125522372e-07, "loss": 0.0707, "step": 7893 }, { "epoch": 2.55800388852884, "grad_norm": 0.5048068761825562, "learning_rate": 5.585462579460443e-07, "loss": 0.0733, "step": 7894 }, { "epoch": 2.5583279325988335, "grad_norm": 0.5181866884231567, "learning_rate": 5.577431469307915e-07, "loss": 0.0738, "step": 7895 }, { "epoch": 2.558651976668827, "grad_norm": 0.5335702896118164, "learning_rate": 5.569405796047733e-07, "loss": 0.08, "step": 7896 }, { "epoch": 2.5589760207388204, "grad_norm": 0.49718958139419556, "learning_rate": 5.561385560662158e-07, "loss": 0.0719, "step": 7897 }, { "epoch": 2.559300064808814, "grad_norm": 0.46473929286003113, "learning_rate": 5.553370764132793e-07, "loss": 0.0647, "step": 7898 }, { "epoch": 2.5596241088788076, "grad_norm": 0.5155867338180542, "learning_rate": 5.545361407440581e-07, "loss": 0.0772, "step": 7899 }, { "epoch": 2.559948152948801, "grad_norm": 0.460317462682724, "learning_rate": 5.537357491565759e-07, "loss": 0.0667, "step": 7900 }, { "epoch": 2.5602721970187945, "grad_norm": 0.4972425699234009, "learning_rate": 5.529359017487962e-07, "loss": 0.0748, "step": 7901 }, { "epoch": 2.560596241088788, "grad_norm": 0.4749511480331421, "learning_rate": 5.521365986186111e-07, "loss": 0.0693, "step": 7902 }, { "epoch": 2.5609202851587813, "grad_norm": 0.4954456686973572, "learning_rate": 5.513378398638469e-07, "loss": 0.0673, "step": 7903 }, { "epoch": 2.561244329228775, "grad_norm": 0.475407212972641, "learning_rate": 5.505396255822654e-07, "loss": 0.0703, "step": 7904 }, { "epoch": 2.5615683732987686, "grad_norm": 0.4594283401966095, "learning_rate": 5.497419558715588e-07, "loss": 0.0674, "step": 7905 }, { "epoch": 2.561892417368762, "grad_norm": 0.45197737216949463, "learning_rate": 5.489448308293554e-07, "loss": 0.0653, "step": 7906 }, { "epoch": 2.562216461438756, "grad_norm": 0.5138728022575378, "learning_rate": 5.481482505532154e-07, "loss": 0.0777, "step": 7907 }, { "epoch": 2.5625405055087493, "grad_norm": 0.49609872698783875, "learning_rate": 5.47352215140633e-07, "loss": 0.0721, "step": 7908 }, { "epoch": 2.5628645495787428, "grad_norm": 0.47917693853378296, "learning_rate": 5.465567246890336e-07, "loss": 0.0686, "step": 7909 }, { "epoch": 2.563188593648736, "grad_norm": 0.48315778374671936, "learning_rate": 5.457617792957782e-07, "loss": 0.0702, "step": 7910 }, { "epoch": 2.5635126377187296, "grad_norm": 0.5028765201568604, "learning_rate": 5.449673790581611e-07, "loss": 0.0744, "step": 7911 }, { "epoch": 2.5638366817887235, "grad_norm": 0.48444709181785583, "learning_rate": 5.441735240734081e-07, "loss": 0.0714, "step": 7912 }, { "epoch": 2.564160725858717, "grad_norm": 0.4968181252479553, "learning_rate": 5.433802144386808e-07, "loss": 0.0762, "step": 7913 }, { "epoch": 2.5644847699287103, "grad_norm": 0.46512550115585327, "learning_rate": 5.42587450251072e-07, "loss": 0.0641, "step": 7914 }, { "epoch": 2.5648088139987038, "grad_norm": 0.5445234179496765, "learning_rate": 5.417952316076069e-07, "loss": 0.0805, "step": 7915 }, { "epoch": 2.565132858068697, "grad_norm": 0.5239993929862976, "learning_rate": 5.410035586052481e-07, "loss": 0.0711, "step": 7916 }, { "epoch": 2.565456902138691, "grad_norm": 0.46684685349464417, "learning_rate": 5.402124313408868e-07, "loss": 0.0678, "step": 7917 }, { "epoch": 2.5657809462086845, "grad_norm": 0.46517103910446167, "learning_rate": 5.394218499113496e-07, "loss": 0.0673, "step": 7918 }, { "epoch": 2.566104990278678, "grad_norm": 0.4972522556781769, "learning_rate": 5.386318144133961e-07, "loss": 0.0729, "step": 7919 }, { "epoch": 2.5664290343486713, "grad_norm": 0.5065658688545227, "learning_rate": 5.378423249437193e-07, "loss": 0.0784, "step": 7920 }, { "epoch": 2.5667530784186647, "grad_norm": 0.477154940366745, "learning_rate": 5.370533815989459e-07, "loss": 0.0635, "step": 7921 }, { "epoch": 2.5670771224886586, "grad_norm": 0.5431901812553406, "learning_rate": 5.362649844756318e-07, "loss": 0.0738, "step": 7922 }, { "epoch": 2.567401166558652, "grad_norm": 0.4951257109642029, "learning_rate": 5.354771336702735e-07, "loss": 0.0708, "step": 7923 }, { "epoch": 2.5677252106286454, "grad_norm": 0.4686160087585449, "learning_rate": 5.346898292792919e-07, "loss": 0.0678, "step": 7924 }, { "epoch": 2.568049254698639, "grad_norm": 0.5159788131713867, "learning_rate": 5.339030713990495e-07, "loss": 0.0744, "step": 7925 }, { "epoch": 2.5683732987686323, "grad_norm": 0.5049765706062317, "learning_rate": 5.331168601258352e-07, "loss": 0.0774, "step": 7926 }, { "epoch": 2.568697342838626, "grad_norm": 0.4748237729072571, "learning_rate": 5.323311955558746e-07, "loss": 0.0686, "step": 7927 }, { "epoch": 2.5690213869086196, "grad_norm": 0.4686489701271057, "learning_rate": 5.315460777853249e-07, "loss": 0.0702, "step": 7928 }, { "epoch": 2.569345430978613, "grad_norm": 0.5975189805030823, "learning_rate": 5.307615069102773e-07, "loss": 0.0668, "step": 7929 }, { "epoch": 2.569669475048607, "grad_norm": 0.49249792098999023, "learning_rate": 5.299774830267573e-07, "loss": 0.0753, "step": 7930 }, { "epoch": 2.5699935191186, "grad_norm": 0.4822183847427368, "learning_rate": 5.291940062307177e-07, "loss": 0.0718, "step": 7931 }, { "epoch": 2.5703175631885937, "grad_norm": 0.5218304395675659, "learning_rate": 5.284110766180528e-07, "loss": 0.0716, "step": 7932 }, { "epoch": 2.570641607258587, "grad_norm": 0.4451057016849518, "learning_rate": 5.276286942845832e-07, "loss": 0.0644, "step": 7933 }, { "epoch": 2.5709656513285806, "grad_norm": 0.45064833760261536, "learning_rate": 5.268468593260656e-07, "loss": 0.0674, "step": 7934 }, { "epoch": 2.5712896953985744, "grad_norm": 0.5059220790863037, "learning_rate": 5.260655718381885e-07, "loss": 0.0714, "step": 7935 }, { "epoch": 2.571613739468568, "grad_norm": 0.48486247658729553, "learning_rate": 5.252848319165744e-07, "loss": 0.0703, "step": 7936 }, { "epoch": 2.5719377835385613, "grad_norm": 0.5442525744438171, "learning_rate": 5.245046396567788e-07, "loss": 0.0773, "step": 7937 }, { "epoch": 2.5722618276085547, "grad_norm": 0.4933522939682007, "learning_rate": 5.237249951542895e-07, "loss": 0.0727, "step": 7938 }, { "epoch": 2.572585871678548, "grad_norm": 0.49625441431999207, "learning_rate": 5.229458985045265e-07, "loss": 0.0754, "step": 7939 }, { "epoch": 2.572909915748542, "grad_norm": 0.4622628390789032, "learning_rate": 5.22167349802844e-07, "loss": 0.0646, "step": 7940 }, { "epoch": 2.5732339598185354, "grad_norm": 0.5263717174530029, "learning_rate": 5.213893491445293e-07, "loss": 0.0758, "step": 7941 }, { "epoch": 2.573558003888529, "grad_norm": 0.46194687485694885, "learning_rate": 5.206118966248019e-07, "loss": 0.0685, "step": 7942 }, { "epoch": 2.5738820479585223, "grad_norm": 0.5024887323379517, "learning_rate": 5.198349923388146e-07, "loss": 0.0741, "step": 7943 }, { "epoch": 2.5742060920285157, "grad_norm": 0.48845016956329346, "learning_rate": 5.19058636381653e-07, "loss": 0.0737, "step": 7944 }, { "epoch": 2.5745301360985096, "grad_norm": 0.451090544462204, "learning_rate": 5.18282828848336e-07, "loss": 0.0654, "step": 7945 }, { "epoch": 2.574854180168503, "grad_norm": 0.5033639669418335, "learning_rate": 5.175075698338128e-07, "loss": 0.0735, "step": 7946 }, { "epoch": 2.5751782242384964, "grad_norm": 0.47307324409484863, "learning_rate": 5.167328594329707e-07, "loss": 0.0698, "step": 7947 }, { "epoch": 2.57550226830849, "grad_norm": 0.48564478754997253, "learning_rate": 5.159586977406244e-07, "loss": 0.0706, "step": 7948 }, { "epoch": 2.5758263123784833, "grad_norm": 0.4771139621734619, "learning_rate": 5.151850848515249e-07, "loss": 0.0665, "step": 7949 }, { "epoch": 2.576150356448477, "grad_norm": 0.4719444811344147, "learning_rate": 5.144120208603542e-07, "loss": 0.0702, "step": 7950 }, { "epoch": 2.5764744005184705, "grad_norm": 0.4773831367492676, "learning_rate": 5.136395058617289e-07, "loss": 0.072, "step": 7951 }, { "epoch": 2.576798444588464, "grad_norm": 0.4886631667613983, "learning_rate": 5.12867539950197e-07, "loss": 0.0686, "step": 7952 }, { "epoch": 2.5771224886584574, "grad_norm": 0.48384395241737366, "learning_rate": 5.120961232202382e-07, "loss": 0.072, "step": 7953 }, { "epoch": 2.577446532728451, "grad_norm": 0.5068090558052063, "learning_rate": 5.11325255766269e-07, "loss": 0.0766, "step": 7954 }, { "epoch": 2.5777705767984447, "grad_norm": 0.43233636021614075, "learning_rate": 5.10554937682633e-07, "loss": 0.0612, "step": 7955 }, { "epoch": 2.578094620868438, "grad_norm": 0.5227373242378235, "learning_rate": 5.097851690636135e-07, "loss": 0.0731, "step": 7956 }, { "epoch": 2.5784186649384315, "grad_norm": 0.4451727271080017, "learning_rate": 5.090159500034198e-07, "loss": 0.0621, "step": 7957 }, { "epoch": 2.5787427090084254, "grad_norm": 0.4760894477367401, "learning_rate": 5.082472805961974e-07, "loss": 0.0677, "step": 7958 }, { "epoch": 2.579066753078419, "grad_norm": 0.5404555797576904, "learning_rate": 5.074791609360241e-07, "loss": 0.0773, "step": 7959 }, { "epoch": 2.5793907971484122, "grad_norm": 0.44913235306739807, "learning_rate": 5.067115911169113e-07, "loss": 0.0683, "step": 7960 }, { "epoch": 2.5797148412184057, "grad_norm": 0.5068316459655762, "learning_rate": 5.059445712328015e-07, "loss": 0.076, "step": 7961 }, { "epoch": 2.580038885288399, "grad_norm": 0.4982582926750183, "learning_rate": 5.051781013775687e-07, "loss": 0.0736, "step": 7962 }, { "epoch": 2.580362929358393, "grad_norm": 0.4836603105068207, "learning_rate": 5.044121816450254e-07, "loss": 0.0739, "step": 7963 }, { "epoch": 2.5806869734283864, "grad_norm": 0.4805230498313904, "learning_rate": 5.03646812128909e-07, "loss": 0.0673, "step": 7964 }, { "epoch": 2.58101101749838, "grad_norm": 0.45795536041259766, "learning_rate": 5.028819929228945e-07, "loss": 0.0672, "step": 7965 }, { "epoch": 2.5813350615683732, "grad_norm": 0.5124770998954773, "learning_rate": 5.021177241205894e-07, "loss": 0.0724, "step": 7966 }, { "epoch": 2.5816591056383666, "grad_norm": 0.4507506489753723, "learning_rate": 5.013540058155314e-07, "loss": 0.0649, "step": 7967 }, { "epoch": 2.5819831497083605, "grad_norm": 0.4851083755493164, "learning_rate": 5.005908381011926e-07, "loss": 0.0685, "step": 7968 }, { "epoch": 2.582307193778354, "grad_norm": 0.4840677082538605, "learning_rate": 4.998282210709788e-07, "loss": 0.0716, "step": 7969 }, { "epoch": 2.5826312378483474, "grad_norm": 0.48498862981796265, "learning_rate": 4.990661548182252e-07, "loss": 0.0751, "step": 7970 }, { "epoch": 2.582955281918341, "grad_norm": 0.4641924500465393, "learning_rate": 4.983046394362013e-07, "loss": 0.0707, "step": 7971 }, { "epoch": 2.583279325988334, "grad_norm": 0.48013532161712646, "learning_rate": 4.975436750181095e-07, "loss": 0.065, "step": 7972 }, { "epoch": 2.583603370058328, "grad_norm": 0.4640522599220276, "learning_rate": 4.96783261657085e-07, "loss": 0.0677, "step": 7973 }, { "epoch": 2.5839274141283215, "grad_norm": 0.4950551688671112, "learning_rate": 4.960233994461949e-07, "loss": 0.0713, "step": 7974 }, { "epoch": 2.584251458198315, "grad_norm": 0.45873478055000305, "learning_rate": 4.952640884784387e-07, "loss": 0.0626, "step": 7975 }, { "epoch": 2.5845755022683083, "grad_norm": 0.5119264721870422, "learning_rate": 4.9450532884675e-07, "loss": 0.0767, "step": 7976 }, { "epoch": 2.5848995463383018, "grad_norm": 0.5016889572143555, "learning_rate": 4.937471206439903e-07, "loss": 0.0724, "step": 7977 }, { "epoch": 2.5852235904082956, "grad_norm": 0.4209315776824951, "learning_rate": 4.929894639629612e-07, "loss": 0.0608, "step": 7978 }, { "epoch": 2.585547634478289, "grad_norm": 0.46673160791397095, "learning_rate": 4.92232358896389e-07, "loss": 0.0661, "step": 7979 }, { "epoch": 2.5858716785482825, "grad_norm": 0.5124037861824036, "learning_rate": 4.914758055369389e-07, "loss": 0.0793, "step": 7980 }, { "epoch": 2.5861957226182763, "grad_norm": 0.5049538612365723, "learning_rate": 4.907198039772032e-07, "loss": 0.076, "step": 7981 }, { "epoch": 2.5865197666882693, "grad_norm": 0.4571637809276581, "learning_rate": 4.899643543097104e-07, "loss": 0.0637, "step": 7982 }, { "epoch": 2.586843810758263, "grad_norm": 0.4785764813423157, "learning_rate": 4.892094566269212e-07, "loss": 0.0713, "step": 7983 }, { "epoch": 2.5871678548282566, "grad_norm": 0.48923173546791077, "learning_rate": 4.884551110212249e-07, "loss": 0.0677, "step": 7984 }, { "epoch": 2.58749189889825, "grad_norm": 0.48202604055404663, "learning_rate": 4.877013175849493e-07, "loss": 0.0677, "step": 7985 }, { "epoch": 2.587815942968244, "grad_norm": 0.48565518856048584, "learning_rate": 4.869480764103485e-07, "loss": 0.0699, "step": 7986 }, { "epoch": 2.5881399870382373, "grad_norm": 0.44332680106163025, "learning_rate": 4.861953875896153e-07, "loss": 0.0672, "step": 7987 }, { "epoch": 2.5884640311082308, "grad_norm": 0.48864465951919556, "learning_rate": 4.854432512148682e-07, "loss": 0.0652, "step": 7988 }, { "epoch": 2.588788075178224, "grad_norm": 0.5096166133880615, "learning_rate": 4.846916673781632e-07, "loss": 0.0732, "step": 7989 }, { "epoch": 2.5891121192482176, "grad_norm": 0.5203856229782104, "learning_rate": 4.839406361714865e-07, "loss": 0.0804, "step": 7990 }, { "epoch": 2.5894361633182115, "grad_norm": 0.45725682377815247, "learning_rate": 4.831901576867575e-07, "loss": 0.0688, "step": 7991 }, { "epoch": 2.589760207388205, "grad_norm": 0.4365238845348358, "learning_rate": 4.824402320158267e-07, "loss": 0.0669, "step": 7992 }, { "epoch": 2.5900842514581983, "grad_norm": 0.5002686977386475, "learning_rate": 4.816908592504794e-07, "loss": 0.0752, "step": 7993 }, { "epoch": 2.5904082955281917, "grad_norm": 0.5027570128440857, "learning_rate": 4.809420394824288e-07, "loss": 0.0771, "step": 7994 }, { "epoch": 2.590732339598185, "grad_norm": 0.48780351877212524, "learning_rate": 4.801937728033251e-07, "loss": 0.0705, "step": 7995 }, { "epoch": 2.591056383668179, "grad_norm": 0.5296880006790161, "learning_rate": 4.794460593047484e-07, "loss": 0.078, "step": 7996 }, { "epoch": 2.5913804277381725, "grad_norm": 0.5127536654472351, "learning_rate": 4.786988990782115e-07, "loss": 0.0731, "step": 7997 }, { "epoch": 2.591704471808166, "grad_norm": 0.46108219027519226, "learning_rate": 4.779522922151597e-07, "loss": 0.0672, "step": 7998 }, { "epoch": 2.5920285158781593, "grad_norm": 0.48438650369644165, "learning_rate": 4.77206238806971e-07, "loss": 0.0699, "step": 7999 }, { "epoch": 2.5923525599481527, "grad_norm": 0.4760129153728485, "learning_rate": 4.7646073894495546e-07, "loss": 0.0659, "step": 8000 }, { "epoch": 2.5926766040181466, "grad_norm": 0.495082825422287, "learning_rate": 4.757157927203521e-07, "loss": 0.0712, "step": 8001 }, { "epoch": 2.59300064808814, "grad_norm": 0.4683179557323456, "learning_rate": 4.749714002243394e-07, "loss": 0.0674, "step": 8002 }, { "epoch": 2.5933246921581334, "grad_norm": 0.4620020389556885, "learning_rate": 4.742275615480202e-07, "loss": 0.0688, "step": 8003 }, { "epoch": 2.593648736228127, "grad_norm": 0.49512195587158203, "learning_rate": 4.734842767824349e-07, "loss": 0.0727, "step": 8004 }, { "epoch": 2.5939727802981203, "grad_norm": 0.5007457137107849, "learning_rate": 4.7274154601855524e-07, "loss": 0.0729, "step": 8005 }, { "epoch": 2.594296824368114, "grad_norm": 0.4940263032913208, "learning_rate": 4.7199936934728073e-07, "loss": 0.0749, "step": 8006 }, { "epoch": 2.5946208684381076, "grad_norm": 0.4439502954483032, "learning_rate": 4.712577468594515e-07, "loss": 0.0619, "step": 8007 }, { "epoch": 2.594944912508101, "grad_norm": 0.5139973163604736, "learning_rate": 4.7051667864582983e-07, "loss": 0.0727, "step": 8008 }, { "epoch": 2.595268956578095, "grad_norm": 0.48900654911994934, "learning_rate": 4.6977616479711997e-07, "loss": 0.0689, "step": 8009 }, { "epoch": 2.5955930006480883, "grad_norm": 0.5214628577232361, "learning_rate": 4.690362054039499e-07, "loss": 0.0753, "step": 8010 }, { "epoch": 2.5959170447180817, "grad_norm": 0.47031134366989136, "learning_rate": 4.682968005568872e-07, "loss": 0.0654, "step": 8011 }, { "epoch": 2.596241088788075, "grad_norm": 0.5013682246208191, "learning_rate": 4.6755795034642447e-07, "loss": 0.0743, "step": 8012 }, { "epoch": 2.5965651328580686, "grad_norm": 0.48858991265296936, "learning_rate": 4.6681965486299164e-07, "loss": 0.0691, "step": 8013 }, { "epoch": 2.5968891769280624, "grad_norm": 0.4517333209514618, "learning_rate": 4.6608191419694803e-07, "loss": 0.0619, "step": 8014 }, { "epoch": 2.597213220998056, "grad_norm": 0.4958752691745758, "learning_rate": 4.6534472843858647e-07, "loss": 0.0731, "step": 8015 }, { "epoch": 2.5975372650680493, "grad_norm": 0.487255722284317, "learning_rate": 4.646080976781325e-07, "loss": 0.067, "step": 8016 }, { "epoch": 2.5978613091380427, "grad_norm": 0.5102446675300598, "learning_rate": 4.638720220057402e-07, "loss": 0.0723, "step": 8017 }, { "epoch": 2.598185353208036, "grad_norm": 0.4882889986038208, "learning_rate": 4.631365015114991e-07, "loss": 0.0713, "step": 8018 }, { "epoch": 2.59850939727803, "grad_norm": 0.4748913049697876, "learning_rate": 4.624015362854306e-07, "loss": 0.0638, "step": 8019 }, { "epoch": 2.5988334413480234, "grad_norm": 0.5041070580482483, "learning_rate": 4.61667126417486e-07, "loss": 0.0793, "step": 8020 }, { "epoch": 2.599157485418017, "grad_norm": 0.4840593636035919, "learning_rate": 4.609332719975512e-07, "loss": 0.0708, "step": 8021 }, { "epoch": 2.5994815294880103, "grad_norm": 0.4602346420288086, "learning_rate": 4.601999731154422e-07, "loss": 0.0683, "step": 8022 }, { "epoch": 2.5998055735580037, "grad_norm": 0.48712125420570374, "learning_rate": 4.5946722986090764e-07, "loss": 0.0751, "step": 8023 }, { "epoch": 2.6001296176279975, "grad_norm": 0.5014486312866211, "learning_rate": 4.587350423236292e-07, "loss": 0.0761, "step": 8024 }, { "epoch": 2.600453661697991, "grad_norm": 0.4699751138687134, "learning_rate": 4.5800341059321797e-07, "loss": 0.0704, "step": 8025 }, { "epoch": 2.6007777057679844, "grad_norm": 0.4659191071987152, "learning_rate": 4.572723347592195e-07, "loss": 0.0678, "step": 8026 }, { "epoch": 2.601101749837978, "grad_norm": 0.4622977077960968, "learning_rate": 4.5654181491111004e-07, "loss": 0.0683, "step": 8027 }, { "epoch": 2.6014257939079712, "grad_norm": 0.49469050765037537, "learning_rate": 4.558118511382986e-07, "loss": 0.0721, "step": 8028 }, { "epoch": 2.601749837977965, "grad_norm": 0.4948549270629883, "learning_rate": 4.550824435301249e-07, "loss": 0.0746, "step": 8029 }, { "epoch": 2.6020738820479585, "grad_norm": 0.46329209208488464, "learning_rate": 4.543535921758624e-07, "loss": 0.0666, "step": 8030 }, { "epoch": 2.602397926117952, "grad_norm": 0.45832377672195435, "learning_rate": 4.5362529716471594e-07, "loss": 0.0662, "step": 8031 }, { "epoch": 2.602721970187946, "grad_norm": 0.46872031688690186, "learning_rate": 4.5289755858581865e-07, "loss": 0.0684, "step": 8032 }, { "epoch": 2.6030460142579392, "grad_norm": 0.46378257870674133, "learning_rate": 4.5217037652824256e-07, "loss": 0.0694, "step": 8033 }, { "epoch": 2.6033700583279327, "grad_norm": 0.4749624729156494, "learning_rate": 4.514437510809855e-07, "loss": 0.0671, "step": 8034 }, { "epoch": 2.603694102397926, "grad_norm": 0.5038319826126099, "learning_rate": 4.507176823329795e-07, "loss": 0.0703, "step": 8035 }, { "epoch": 2.6040181464679195, "grad_norm": 0.4624274969100952, "learning_rate": 4.4999217037308864e-07, "loss": 0.0684, "step": 8036 }, { "epoch": 2.6043421905379134, "grad_norm": 0.46600112318992615, "learning_rate": 4.4926721529010895e-07, "loss": 0.0695, "step": 8037 }, { "epoch": 2.604666234607907, "grad_norm": 0.4593249559402466, "learning_rate": 4.485428171727685e-07, "loss": 0.0673, "step": 8038 }, { "epoch": 2.6049902786779002, "grad_norm": 0.46818676590919495, "learning_rate": 4.4781897610972347e-07, "loss": 0.0695, "step": 8039 }, { "epoch": 2.6053143227478937, "grad_norm": 0.48558634519577026, "learning_rate": 4.470956921895697e-07, "loss": 0.07, "step": 8040 }, { "epoch": 2.605638366817887, "grad_norm": 0.4467635750770569, "learning_rate": 4.4637296550082533e-07, "loss": 0.0635, "step": 8041 }, { "epoch": 2.605962410887881, "grad_norm": 0.47603222727775574, "learning_rate": 4.456507961319495e-07, "loss": 0.0687, "step": 8042 }, { "epoch": 2.6062864549578744, "grad_norm": 0.4740268290042877, "learning_rate": 4.44929184171326e-07, "loss": 0.0671, "step": 8043 }, { "epoch": 2.606610499027868, "grad_norm": 0.45359960198402405, "learning_rate": 4.442081297072731e-07, "loss": 0.0666, "step": 8044 }, { "epoch": 2.606934543097861, "grad_norm": 0.527958333492279, "learning_rate": 4.434876328280424e-07, "loss": 0.0725, "step": 8045 }, { "epoch": 2.6072585871678546, "grad_norm": 0.5175161957740784, "learning_rate": 4.42767693621815e-07, "loss": 0.0733, "step": 8046 }, { "epoch": 2.6075826312378485, "grad_norm": 0.4354493021965027, "learning_rate": 4.42048312176705e-07, "loss": 0.0649, "step": 8047 }, { "epoch": 2.607906675307842, "grad_norm": 0.5269905924797058, "learning_rate": 4.413294885807562e-07, "loss": 0.0789, "step": 8048 }, { "epoch": 2.6082307193778353, "grad_norm": 0.5139177441596985, "learning_rate": 4.4061122292194725e-07, "loss": 0.072, "step": 8049 }, { "epoch": 2.6085547634478288, "grad_norm": 0.4772281348705292, "learning_rate": 4.398935152881856e-07, "loss": 0.07, "step": 8050 }, { "epoch": 2.608878807517822, "grad_norm": 0.4745042026042938, "learning_rate": 4.391763657673126e-07, "loss": 0.0686, "step": 8051 }, { "epoch": 2.609202851587816, "grad_norm": 0.512739896774292, "learning_rate": 4.384597744471009e-07, "loss": 0.0761, "step": 8052 }, { "epoch": 2.6095268956578095, "grad_norm": 0.49782106280326843, "learning_rate": 4.37743741415253e-07, "loss": 0.0722, "step": 8053 }, { "epoch": 2.609850939727803, "grad_norm": 0.5118449330329895, "learning_rate": 4.37028266759405e-07, "loss": 0.0726, "step": 8054 }, { "epoch": 2.6101749837977968, "grad_norm": 0.5125095844268799, "learning_rate": 4.363133505671252e-07, "loss": 0.0733, "step": 8055 }, { "epoch": 2.6104990278677898, "grad_norm": 0.48923948407173157, "learning_rate": 4.355989929259108e-07, "loss": 0.069, "step": 8056 }, { "epoch": 2.6108230719377836, "grad_norm": 0.49759066104888916, "learning_rate": 4.348851939231924e-07, "loss": 0.0753, "step": 8057 }, { "epoch": 2.611147116007777, "grad_norm": 0.4820518493652344, "learning_rate": 4.341719536463329e-07, "loss": 0.0719, "step": 8058 }, { "epoch": 2.6114711600777705, "grad_norm": 0.49614572525024414, "learning_rate": 4.3345927218262583e-07, "loss": 0.0686, "step": 8059 }, { "epoch": 2.6117952041477643, "grad_norm": 0.48075005412101746, "learning_rate": 4.3274714961929643e-07, "loss": 0.0714, "step": 8060 }, { "epoch": 2.6121192482177578, "grad_norm": 0.5026248693466187, "learning_rate": 4.320355860435005e-07, "loss": 0.0741, "step": 8061 }, { "epoch": 2.612443292287751, "grad_norm": 0.47251930832862854, "learning_rate": 4.313245815423289e-07, "loss": 0.071, "step": 8062 }, { "epoch": 2.6127673363577446, "grad_norm": 0.5004166960716248, "learning_rate": 4.3061413620279825e-07, "loss": 0.0743, "step": 8063 }, { "epoch": 2.613091380427738, "grad_norm": 0.5230023264884949, "learning_rate": 4.2990425011186443e-07, "loss": 0.0813, "step": 8064 }, { "epoch": 2.613415424497732, "grad_norm": 0.4912107586860657, "learning_rate": 4.2919492335640744e-07, "loss": 0.0718, "step": 8065 }, { "epoch": 2.6137394685677253, "grad_norm": 0.47911426424980164, "learning_rate": 4.284861560232428e-07, "loss": 0.0704, "step": 8066 }, { "epoch": 2.6140635126377187, "grad_norm": 0.5090230703353882, "learning_rate": 4.2777794819911733e-07, "loss": 0.0757, "step": 8067 }, { "epoch": 2.614387556707712, "grad_norm": 0.45187073945999146, "learning_rate": 4.270702999707083e-07, "loss": 0.0649, "step": 8068 }, { "epoch": 2.6147116007777056, "grad_norm": 0.488574355840683, "learning_rate": 4.263632114246263e-07, "loss": 0.0716, "step": 8069 }, { "epoch": 2.6150356448476995, "grad_norm": 0.48780202865600586, "learning_rate": 4.25656682647409e-07, "loss": 0.0708, "step": 8070 }, { "epoch": 2.615359688917693, "grad_norm": 0.4608360230922699, "learning_rate": 4.2495071372553263e-07, "loss": 0.0722, "step": 8071 }, { "epoch": 2.6156837329876863, "grad_norm": 0.5059040188789368, "learning_rate": 4.242453047453976e-07, "loss": 0.075, "step": 8072 }, { "epoch": 2.6160077770576797, "grad_norm": 0.47739937901496887, "learning_rate": 4.235404557933409e-07, "loss": 0.0655, "step": 8073 }, { "epoch": 2.616331821127673, "grad_norm": 0.47227174043655396, "learning_rate": 4.2283616695562856e-07, "loss": 0.0666, "step": 8074 }, { "epoch": 2.616655865197667, "grad_norm": 0.4968879222869873, "learning_rate": 4.221324383184594e-07, "loss": 0.0717, "step": 8075 }, { "epoch": 2.6169799092676604, "grad_norm": 0.48942700028419495, "learning_rate": 4.214292699679623e-07, "loss": 0.0737, "step": 8076 }, { "epoch": 2.617303953337654, "grad_norm": 0.47408753633499146, "learning_rate": 4.20726661990199e-07, "loss": 0.0686, "step": 8077 }, { "epoch": 2.6176279974076473, "grad_norm": 0.5070276856422424, "learning_rate": 4.2002461447116174e-07, "loss": 0.0726, "step": 8078 }, { "epoch": 2.6179520414776407, "grad_norm": 0.47806498408317566, "learning_rate": 4.1932312749677353e-07, "loss": 0.0693, "step": 8079 }, { "epoch": 2.6182760855476346, "grad_norm": 0.47707250714302063, "learning_rate": 4.186222011528901e-07, "loss": 0.0685, "step": 8080 }, { "epoch": 2.618600129617628, "grad_norm": 0.5167728066444397, "learning_rate": 4.179218355252984e-07, "loss": 0.0764, "step": 8081 }, { "epoch": 2.6189241736876214, "grad_norm": 0.49237996339797974, "learning_rate": 4.1722203069971547e-07, "loss": 0.0696, "step": 8082 }, { "epoch": 2.6192482177576153, "grad_norm": 0.4985239505767822, "learning_rate": 4.165227867617916e-07, "loss": 0.0772, "step": 8083 }, { "epoch": 2.6195722618276087, "grad_norm": 0.4949018359184265, "learning_rate": 4.158241037971078e-07, "loss": 0.071, "step": 8084 }, { "epoch": 2.619896305897602, "grad_norm": 0.44384217262268066, "learning_rate": 4.15125981891174e-07, "loss": 0.0629, "step": 8085 }, { "epoch": 2.6202203499675956, "grad_norm": 0.45753324031829834, "learning_rate": 4.1442842112943635e-07, "loss": 0.0643, "step": 8086 }, { "epoch": 2.620544394037589, "grad_norm": 0.4839048981666565, "learning_rate": 4.1373142159726766e-07, "loss": 0.0717, "step": 8087 }, { "epoch": 2.620868438107583, "grad_norm": 0.49162375926971436, "learning_rate": 4.1303498337997407e-07, "loss": 0.076, "step": 8088 }, { "epoch": 2.6211924821775763, "grad_norm": 0.5309725999832153, "learning_rate": 4.123391065627935e-07, "loss": 0.0811, "step": 8089 }, { "epoch": 2.6215165262475697, "grad_norm": 0.45731958746910095, "learning_rate": 4.116437912308946e-07, "loss": 0.0646, "step": 8090 }, { "epoch": 2.621840570317563, "grad_norm": 0.5169887542724609, "learning_rate": 4.1094903746937755e-07, "loss": 0.0755, "step": 8091 }, { "epoch": 2.6221646143875565, "grad_norm": 0.4816588759422302, "learning_rate": 4.10254845363271e-07, "loss": 0.0692, "step": 8092 }, { "epoch": 2.6224886584575504, "grad_norm": 0.48074889183044434, "learning_rate": 4.095612149975409e-07, "loss": 0.0708, "step": 8093 }, { "epoch": 2.622812702527544, "grad_norm": 0.5060257315635681, "learning_rate": 4.0886814645707765e-07, "loss": 0.0706, "step": 8094 }, { "epoch": 2.6231367465975373, "grad_norm": 0.49364620447158813, "learning_rate": 4.081756398267089e-07, "loss": 0.067, "step": 8095 }, { "epoch": 2.6234607906675307, "grad_norm": 0.4569275677204132, "learning_rate": 4.0748369519118926e-07, "loss": 0.0688, "step": 8096 }, { "epoch": 2.623784834737524, "grad_norm": 0.47600606083869934, "learning_rate": 4.067923126352058e-07, "loss": 0.0685, "step": 8097 }, { "epoch": 2.624108878807518, "grad_norm": 0.491263210773468, "learning_rate": 4.061014922433781e-07, "loss": 0.0678, "step": 8098 }, { "epoch": 2.6244329228775114, "grad_norm": 0.47192656993865967, "learning_rate": 4.054112341002553e-07, "loss": 0.0696, "step": 8099 }, { "epoch": 2.624756966947505, "grad_norm": 0.511062502861023, "learning_rate": 4.047215382903191e-07, "loss": 0.0808, "step": 8100 }, { "epoch": 2.6250810110174982, "grad_norm": 0.5204436779022217, "learning_rate": 4.040324048979788e-07, "loss": 0.076, "step": 8101 }, { "epoch": 2.6254050550874917, "grad_norm": 0.4860605001449585, "learning_rate": 4.0334383400758184e-07, "loss": 0.0718, "step": 8102 }, { "epoch": 2.6257290991574855, "grad_norm": 0.48059844970703125, "learning_rate": 4.026558257033997e-07, "loss": 0.0709, "step": 8103 }, { "epoch": 2.626053143227479, "grad_norm": 0.48472920060157776, "learning_rate": 4.019683800696389e-07, "loss": 0.064, "step": 8104 }, { "epoch": 2.6263771872974724, "grad_norm": 0.48184579610824585, "learning_rate": 4.0128149719043554e-07, "loss": 0.0717, "step": 8105 }, { "epoch": 2.6267012313674662, "grad_norm": 0.5004919171333313, "learning_rate": 4.0059517714985786e-07, "loss": 0.071, "step": 8106 }, { "epoch": 2.6270252754374592, "grad_norm": 0.4796049892902374, "learning_rate": 3.9990942003190535e-07, "loss": 0.0696, "step": 8107 }, { "epoch": 2.627349319507453, "grad_norm": 0.4539622366428375, "learning_rate": 3.9922422592050704e-07, "loss": 0.067, "step": 8108 }, { "epoch": 2.6276733635774465, "grad_norm": 0.5464158654212952, "learning_rate": 3.985395948995258e-07, "loss": 0.0761, "step": 8109 }, { "epoch": 2.62799740764744, "grad_norm": 0.4742254316806793, "learning_rate": 3.978555270527512e-07, "loss": 0.0722, "step": 8110 }, { "epoch": 2.628321451717434, "grad_norm": 0.45420706272125244, "learning_rate": 3.9717202246390807e-07, "loss": 0.0678, "step": 8111 }, { "epoch": 2.6286454957874272, "grad_norm": 0.4992641508579254, "learning_rate": 3.964890812166505e-07, "loss": 0.0748, "step": 8112 }, { "epoch": 2.6289695398574207, "grad_norm": 0.48898693919181824, "learning_rate": 3.9580670339456393e-07, "loss": 0.0748, "step": 8113 }, { "epoch": 2.629293583927414, "grad_norm": 0.519314706325531, "learning_rate": 3.951248890811649e-07, "loss": 0.0756, "step": 8114 }, { "epoch": 2.6296176279974075, "grad_norm": 0.4789257049560547, "learning_rate": 3.9444363835990207e-07, "loss": 0.0706, "step": 8115 }, { "epoch": 2.6299416720674014, "grad_norm": 0.5213720202445984, "learning_rate": 3.9376295131415056e-07, "loss": 0.0741, "step": 8116 }, { "epoch": 2.630265716137395, "grad_norm": 0.4598376452922821, "learning_rate": 3.9308282802722365e-07, "loss": 0.0639, "step": 8117 }, { "epoch": 2.630589760207388, "grad_norm": 0.48026615381240845, "learning_rate": 3.924032685823581e-07, "loss": 0.0698, "step": 8118 }, { "epoch": 2.6309138042773816, "grad_norm": 0.4799683392047882, "learning_rate": 3.917242730627296e-07, "loss": 0.0684, "step": 8119 }, { "epoch": 2.631237848347375, "grad_norm": 0.46403399109840393, "learning_rate": 3.910458415514379e-07, "loss": 0.0709, "step": 8120 }, { "epoch": 2.631561892417369, "grad_norm": 0.5058795809745789, "learning_rate": 3.9036797413151693e-07, "loss": 0.0774, "step": 8121 }, { "epoch": 2.6318859364873624, "grad_norm": 0.4726886749267578, "learning_rate": 3.896906708859322e-07, "loss": 0.0653, "step": 8122 }, { "epoch": 2.6322099805573558, "grad_norm": 0.49544644355773926, "learning_rate": 3.8901393189757607e-07, "loss": 0.07, "step": 8123 }, { "epoch": 2.632534024627349, "grad_norm": 0.490854948759079, "learning_rate": 3.883377572492786e-07, "loss": 0.0727, "step": 8124 }, { "epoch": 2.6328580686973426, "grad_norm": 0.456512451171875, "learning_rate": 3.8766214702379344e-07, "loss": 0.0669, "step": 8125 }, { "epoch": 2.6331821127673365, "grad_norm": 0.4973542392253876, "learning_rate": 3.8698710130381237e-07, "loss": 0.0727, "step": 8126 }, { "epoch": 2.63350615683733, "grad_norm": 0.49359431862831116, "learning_rate": 3.863126201719519e-07, "loss": 0.0714, "step": 8127 }, { "epoch": 2.6338302009073233, "grad_norm": 0.46149107813835144, "learning_rate": 3.8563870371076283e-07, "loss": 0.066, "step": 8128 }, { "epoch": 2.6341542449773168, "grad_norm": 0.4933355748653412, "learning_rate": 3.8496535200272635e-07, "loss": 0.0693, "step": 8129 }, { "epoch": 2.63447828904731, "grad_norm": 0.5131371021270752, "learning_rate": 3.842925651302531e-07, "loss": 0.0802, "step": 8130 }, { "epoch": 2.634802333117304, "grad_norm": 0.5407600998878479, "learning_rate": 3.836203431756874e-07, "loss": 0.0793, "step": 8131 }, { "epoch": 2.6351263771872975, "grad_norm": 0.48129767179489136, "learning_rate": 3.8294868622130056e-07, "loss": 0.0675, "step": 8132 }, { "epoch": 2.635450421257291, "grad_norm": 0.5019082427024841, "learning_rate": 3.82277594349299e-07, "loss": 0.0712, "step": 8133 }, { "epoch": 2.6357744653272848, "grad_norm": 0.47515133023262024, "learning_rate": 3.8160706764181596e-07, "loss": 0.0664, "step": 8134 }, { "epoch": 2.636098509397278, "grad_norm": 0.49677157402038574, "learning_rate": 3.8093710618091915e-07, "loss": 0.0709, "step": 8135 }, { "epoch": 2.6364225534672716, "grad_norm": 0.450471967458725, "learning_rate": 3.802677100486035e-07, "loss": 0.0704, "step": 8136 }, { "epoch": 2.636746597537265, "grad_norm": 0.5206382870674133, "learning_rate": 3.795988793267985e-07, "loss": 0.0744, "step": 8137 }, { "epoch": 2.6370706416072585, "grad_norm": 0.5079112648963928, "learning_rate": 3.7893061409736143e-07, "loss": 0.0708, "step": 8138 }, { "epoch": 2.6373946856772523, "grad_norm": 0.444327712059021, "learning_rate": 3.782629144420824e-07, "loss": 0.065, "step": 8139 }, { "epoch": 2.6377187297472457, "grad_norm": 0.4883805811405182, "learning_rate": 3.775957804426794e-07, "loss": 0.0723, "step": 8140 }, { "epoch": 2.638042773817239, "grad_norm": 0.511352002620697, "learning_rate": 3.7692921218080604e-07, "loss": 0.0783, "step": 8141 }, { "epoch": 2.6383668178872326, "grad_norm": 0.4845573306083679, "learning_rate": 3.762632097380414e-07, "loss": 0.0685, "step": 8142 }, { "epoch": 2.638690861957226, "grad_norm": 0.46998390555381775, "learning_rate": 3.7559777319589873e-07, "loss": 0.0694, "step": 8143 }, { "epoch": 2.63901490602722, "grad_norm": 0.4673968553543091, "learning_rate": 3.749329026358212e-07, "loss": 0.0666, "step": 8144 }, { "epoch": 2.6393389500972133, "grad_norm": 0.48253533244132996, "learning_rate": 3.7426859813918194e-07, "loss": 0.0725, "step": 8145 }, { "epoch": 2.6396629941672067, "grad_norm": 0.48341622948646545, "learning_rate": 3.7360485978728653e-07, "loss": 0.0734, "step": 8146 }, { "epoch": 2.6399870382372, "grad_norm": 0.465791791677475, "learning_rate": 3.7294168766136786e-07, "loss": 0.0655, "step": 8147 }, { "epoch": 2.6403110823071936, "grad_norm": 0.5187360644340515, "learning_rate": 3.7227908184259476e-07, "loss": 0.0767, "step": 8148 }, { "epoch": 2.6406351263771874, "grad_norm": 0.4869755804538727, "learning_rate": 3.716170424120608e-07, "loss": 0.0703, "step": 8149 }, { "epoch": 2.640959170447181, "grad_norm": 0.4809172749519348, "learning_rate": 3.70955569450796e-07, "loss": 0.0704, "step": 8150 }, { "epoch": 2.6412832145171743, "grad_norm": 0.4652535319328308, "learning_rate": 3.702946630397564e-07, "loss": 0.068, "step": 8151 }, { "epoch": 2.6416072585871677, "grad_norm": 0.507232129573822, "learning_rate": 3.696343232598304e-07, "loss": 0.0731, "step": 8152 }, { "epoch": 2.641931302657161, "grad_norm": 0.4860020577907562, "learning_rate": 3.6897455019183903e-07, "loss": 0.071, "step": 8153 }, { "epoch": 2.642255346727155, "grad_norm": 0.4917910099029541, "learning_rate": 3.6831534391652935e-07, "loss": 0.0679, "step": 8154 }, { "epoch": 2.6425793907971484, "grad_norm": 0.5022282004356384, "learning_rate": 3.676567045145851e-07, "loss": 0.0706, "step": 8155 }, { "epoch": 2.642903434867142, "grad_norm": 0.44981980323791504, "learning_rate": 3.669986320666136e-07, "loss": 0.0684, "step": 8156 }, { "epoch": 2.6432274789371357, "grad_norm": 0.5006513595581055, "learning_rate": 3.663411266531608e-07, "loss": 0.0726, "step": 8157 }, { "epoch": 2.6435515230071287, "grad_norm": 0.5052984356880188, "learning_rate": 3.6568418835469523e-07, "loss": 0.0732, "step": 8158 }, { "epoch": 2.6438755670771226, "grad_norm": 0.5216200947761536, "learning_rate": 3.6502781725162194e-07, "loss": 0.0751, "step": 8159 }, { "epoch": 2.644199611147116, "grad_norm": 0.4888712763786316, "learning_rate": 3.6437201342427396e-07, "loss": 0.0681, "step": 8160 }, { "epoch": 2.6445236552171094, "grad_norm": 0.480238139629364, "learning_rate": 3.6371677695291485e-07, "loss": 0.0677, "step": 8161 }, { "epoch": 2.6448476992871033, "grad_norm": 0.5127687454223633, "learning_rate": 3.6306210791773933e-07, "loss": 0.076, "step": 8162 }, { "epoch": 2.6451717433570967, "grad_norm": 0.4455661177635193, "learning_rate": 3.6240800639887384e-07, "loss": 0.0652, "step": 8163 }, { "epoch": 2.64549578742709, "grad_norm": 0.5006566643714905, "learning_rate": 3.6175447247637217e-07, "loss": 0.0706, "step": 8164 }, { "epoch": 2.6458198314970836, "grad_norm": 0.476716011762619, "learning_rate": 3.611015062302214e-07, "loss": 0.0699, "step": 8165 }, { "epoch": 2.646143875567077, "grad_norm": 0.4877867102622986, "learning_rate": 3.6044910774033826e-07, "loss": 0.0664, "step": 8166 }, { "epoch": 2.646467919637071, "grad_norm": 0.46837756037712097, "learning_rate": 3.5979727708656984e-07, "loss": 0.0636, "step": 8167 }, { "epoch": 2.6467919637070643, "grad_norm": 0.47957879304885864, "learning_rate": 3.591460143486941e-07, "loss": 0.0679, "step": 8168 }, { "epoch": 2.6471160077770577, "grad_norm": 0.4841154217720032, "learning_rate": 3.584953196064195e-07, "loss": 0.0719, "step": 8169 }, { "epoch": 2.647440051847051, "grad_norm": 0.5195610523223877, "learning_rate": 3.5784519293938555e-07, "loss": 0.0712, "step": 8170 }, { "epoch": 2.6477640959170445, "grad_norm": 0.4716542959213257, "learning_rate": 3.571956344271582e-07, "loss": 0.0694, "step": 8171 }, { "epoch": 2.6480881399870384, "grad_norm": 0.4820329546928406, "learning_rate": 3.56546644149241e-07, "loss": 0.0683, "step": 8172 }, { "epoch": 2.648412184057032, "grad_norm": 0.5184935331344604, "learning_rate": 3.558982221850621e-07, "loss": 0.081, "step": 8173 }, { "epoch": 2.6487362281270252, "grad_norm": 0.4963172376155853, "learning_rate": 3.5525036861398244e-07, "loss": 0.0699, "step": 8174 }, { "epoch": 2.6490602721970187, "grad_norm": 0.499603271484375, "learning_rate": 3.5460308351529247e-07, "loss": 0.074, "step": 8175 }, { "epoch": 2.649384316267012, "grad_norm": 0.48043859004974365, "learning_rate": 3.5395636696821443e-07, "loss": 0.0697, "step": 8176 }, { "epoch": 2.649708360337006, "grad_norm": 0.5435832738876343, "learning_rate": 3.5331021905190055e-07, "loss": 0.0798, "step": 8177 }, { "epoch": 2.6500324044069994, "grad_norm": 0.5596851706504822, "learning_rate": 3.5266463984543145e-07, "loss": 0.0686, "step": 8178 }, { "epoch": 2.650356448476993, "grad_norm": 0.4937089681625366, "learning_rate": 3.5201962942782165e-07, "loss": 0.0711, "step": 8179 }, { "epoch": 2.6506804925469862, "grad_norm": 0.4810419976711273, "learning_rate": 3.5137518787801193e-07, "loss": 0.0665, "step": 8180 }, { "epoch": 2.6510045366169797, "grad_norm": 0.5074557662010193, "learning_rate": 3.507313152748787e-07, "loss": 0.0773, "step": 8181 }, { "epoch": 2.6513285806869735, "grad_norm": 0.47159427404403687, "learning_rate": 3.5008801169722275e-07, "loss": 0.0709, "step": 8182 }, { "epoch": 2.651652624756967, "grad_norm": 0.4898209273815155, "learning_rate": 3.4944527722378e-07, "loss": 0.0722, "step": 8183 }, { "epoch": 2.6519766688269604, "grad_norm": 0.46519505977630615, "learning_rate": 3.488031119332147e-07, "loss": 0.0649, "step": 8184 }, { "epoch": 2.6523007128969542, "grad_norm": 0.5056127309799194, "learning_rate": 3.4816151590412075e-07, "loss": 0.0744, "step": 8185 }, { "epoch": 2.6526247569669477, "grad_norm": 0.550579309463501, "learning_rate": 3.4752048921502525e-07, "loss": 0.0801, "step": 8186 }, { "epoch": 2.652948801036941, "grad_norm": 0.47031712532043457, "learning_rate": 3.468800319443805e-07, "loss": 0.0693, "step": 8187 }, { "epoch": 2.6532728451069345, "grad_norm": 0.5020474791526794, "learning_rate": 3.462401441705759e-07, "loss": 0.0738, "step": 8188 }, { "epoch": 2.653596889176928, "grad_norm": 0.49726903438568115, "learning_rate": 3.4560082597192515e-07, "loss": 0.0734, "step": 8189 }, { "epoch": 2.653920933246922, "grad_norm": 0.4900558590888977, "learning_rate": 3.4496207742667485e-07, "loss": 0.071, "step": 8190 }, { "epoch": 2.654244977316915, "grad_norm": 0.4857633411884308, "learning_rate": 3.443238986130021e-07, "loss": 0.0692, "step": 8191 }, { "epoch": 2.6545690213869086, "grad_norm": 0.5156083106994629, "learning_rate": 3.4368628960901427e-07, "loss": 0.0737, "step": 8192 }, { "epoch": 2.654893065456902, "grad_norm": 0.5220831036567688, "learning_rate": 3.430492504927474e-07, "loss": 0.0785, "step": 8193 }, { "epoch": 2.6552171095268955, "grad_norm": 0.5222134590148926, "learning_rate": 3.4241278134217017e-07, "loss": 0.0737, "step": 8194 }, { "epoch": 2.6555411535968894, "grad_norm": 0.5155477523803711, "learning_rate": 3.417768822351791e-07, "loss": 0.0743, "step": 8195 }, { "epoch": 2.655865197666883, "grad_norm": 0.4736745059490204, "learning_rate": 3.4114155324960263e-07, "loss": 0.0689, "step": 8196 }, { "epoch": 2.656189241736876, "grad_norm": 0.5249689817428589, "learning_rate": 3.4050679446319847e-07, "loss": 0.0729, "step": 8197 }, { "epoch": 2.6565132858068696, "grad_norm": 0.49144700169563293, "learning_rate": 3.3987260595365556e-07, "loss": 0.0701, "step": 8198 }, { "epoch": 2.656837329876863, "grad_norm": 0.46199938654899597, "learning_rate": 3.3923898779859186e-07, "loss": 0.0647, "step": 8199 }, { "epoch": 2.657161373946857, "grad_norm": 0.48296552896499634, "learning_rate": 3.38605940075557e-07, "loss": 0.0729, "step": 8200 }, { "epoch": 2.6574854180168503, "grad_norm": 0.48330217599868774, "learning_rate": 3.3797346286202957e-07, "loss": 0.0689, "step": 8201 }, { "epoch": 2.6578094620868438, "grad_norm": 0.48326781392097473, "learning_rate": 3.373415562354165e-07, "loss": 0.0712, "step": 8202 }, { "epoch": 2.658133506156837, "grad_norm": 0.48442551493644714, "learning_rate": 3.36710220273061e-07, "loss": 0.0728, "step": 8203 }, { "epoch": 2.6584575502268306, "grad_norm": 0.44781193137168884, "learning_rate": 3.360794550522295e-07, "loss": 0.0632, "step": 8204 }, { "epoch": 2.6587815942968245, "grad_norm": 0.5006281733512878, "learning_rate": 3.3544926065012253e-07, "loss": 0.0711, "step": 8205 }, { "epoch": 2.659105638366818, "grad_norm": 0.4574485123157501, "learning_rate": 3.3481963714386943e-07, "loss": 0.0668, "step": 8206 }, { "epoch": 2.6594296824368113, "grad_norm": 0.46295037865638733, "learning_rate": 3.3419058461053087e-07, "loss": 0.0637, "step": 8207 }, { "epoch": 2.659753726506805, "grad_norm": 0.6541327238082886, "learning_rate": 3.335621031270964e-07, "loss": 0.0779, "step": 8208 }, { "epoch": 2.660077770576798, "grad_norm": 0.4976052939891815, "learning_rate": 3.329341927704843e-07, "loss": 0.0729, "step": 8209 }, { "epoch": 2.660401814646792, "grad_norm": 0.5218827724456787, "learning_rate": 3.3230685361754833e-07, "loss": 0.0747, "step": 8210 }, { "epoch": 2.6607258587167855, "grad_norm": 0.4913540780544281, "learning_rate": 3.316800857450647e-07, "loss": 0.0704, "step": 8211 }, { "epoch": 2.661049902786779, "grad_norm": 0.5734509825706482, "learning_rate": 3.310538892297477e-07, "loss": 0.0847, "step": 8212 }, { "epoch": 2.6613739468567728, "grad_norm": 0.4933965504169464, "learning_rate": 3.304282641482348e-07, "loss": 0.0725, "step": 8213 }, { "epoch": 2.661697990926766, "grad_norm": 0.49286365509033203, "learning_rate": 3.298032105770971e-07, "loss": 0.0736, "step": 8214 }, { "epoch": 2.6620220349967596, "grad_norm": 0.5159487724304199, "learning_rate": 3.2917872859283606e-07, "loss": 0.0752, "step": 8215 }, { "epoch": 2.662346079066753, "grad_norm": 0.4922940135002136, "learning_rate": 3.285548182718812e-07, "loss": 0.0717, "step": 8216 }, { "epoch": 2.6626701231367464, "grad_norm": 0.523423969745636, "learning_rate": 3.2793147969059413e-07, "loss": 0.079, "step": 8217 }, { "epoch": 2.6629941672067403, "grad_norm": 0.4571373760700226, "learning_rate": 3.2730871292526446e-07, "loss": 0.0692, "step": 8218 }, { "epoch": 2.6633182112767337, "grad_norm": 0.5341159701347351, "learning_rate": 3.2668651805211285e-07, "loss": 0.0738, "step": 8219 }, { "epoch": 2.663642255346727, "grad_norm": 0.4489809572696686, "learning_rate": 3.2606489514729e-07, "loss": 0.0666, "step": 8220 }, { "epoch": 2.6639662994167206, "grad_norm": 0.47798335552215576, "learning_rate": 3.2544384428687736e-07, "loss": 0.0703, "step": 8221 }, { "epoch": 2.664290343486714, "grad_norm": 0.49070990085601807, "learning_rate": 3.2482336554688465e-07, "loss": 0.0717, "step": 8222 }, { "epoch": 2.664614387556708, "grad_norm": 0.501501202583313, "learning_rate": 3.2420345900325277e-07, "loss": 0.0746, "step": 8223 }, { "epoch": 2.6649384316267013, "grad_norm": 0.525124192237854, "learning_rate": 3.235841247318522e-07, "loss": 0.0718, "step": 8224 }, { "epoch": 2.6652624756966947, "grad_norm": 0.4553992748260498, "learning_rate": 3.229653628084845e-07, "loss": 0.0676, "step": 8225 }, { "epoch": 2.665586519766688, "grad_norm": 0.5091498494148254, "learning_rate": 3.2234717330887844e-07, "loss": 0.0738, "step": 8226 }, { "epoch": 2.6659105638366816, "grad_norm": 0.5092705488204956, "learning_rate": 3.2172955630869527e-07, "loss": 0.0766, "step": 8227 }, { "epoch": 2.6662346079066754, "grad_norm": 0.5618413686752319, "learning_rate": 3.211125118835251e-07, "loss": 0.0806, "step": 8228 }, { "epoch": 2.666558651976669, "grad_norm": 0.5213515162467957, "learning_rate": 3.204960401088886e-07, "loss": 0.0792, "step": 8229 }, { "epoch": 2.6668826960466623, "grad_norm": 0.5249233841896057, "learning_rate": 3.198801410602359e-07, "loss": 0.0776, "step": 8230 }, { "epoch": 2.6672067401166557, "grad_norm": 0.47902482748031616, "learning_rate": 3.192648148129457e-07, "loss": 0.071, "step": 8231 }, { "epoch": 2.667530784186649, "grad_norm": 0.5230550169944763, "learning_rate": 3.1865006144233047e-07, "loss": 0.075, "step": 8232 }, { "epoch": 2.667854828256643, "grad_norm": 0.5067039728164673, "learning_rate": 3.1803588102362724e-07, "loss": 0.0729, "step": 8233 }, { "epoch": 2.6681788723266364, "grad_norm": 0.46065452694892883, "learning_rate": 3.1742227363200927e-07, "loss": 0.0675, "step": 8234 }, { "epoch": 2.66850291639663, "grad_norm": 0.4955891966819763, "learning_rate": 3.1680923934257256e-07, "loss": 0.0702, "step": 8235 }, { "epoch": 2.6688269604666237, "grad_norm": 0.5458259582519531, "learning_rate": 3.1619677823034875e-07, "loss": 0.0796, "step": 8236 }, { "epoch": 2.669151004536617, "grad_norm": 0.5087775588035583, "learning_rate": 3.1558489037029626e-07, "loss": 0.0716, "step": 8237 }, { "epoch": 2.6694750486066106, "grad_norm": 0.4729219377040863, "learning_rate": 3.149735758373046e-07, "loss": 0.0679, "step": 8238 }, { "epoch": 2.669799092676604, "grad_norm": 0.4986790418624878, "learning_rate": 3.143628347061939e-07, "loss": 0.0724, "step": 8239 }, { "epoch": 2.6701231367465974, "grad_norm": 0.4468283951282501, "learning_rate": 3.1375266705170935e-07, "loss": 0.065, "step": 8240 }, { "epoch": 2.6704471808165913, "grad_norm": 0.5075255632400513, "learning_rate": 3.1314307294853405e-07, "loss": 0.0789, "step": 8241 }, { "epoch": 2.6707712248865847, "grad_norm": 0.4599548876285553, "learning_rate": 3.1253405247127387e-07, "loss": 0.0657, "step": 8242 }, { "epoch": 2.671095268956578, "grad_norm": 0.4865701496601105, "learning_rate": 3.1192560569446697e-07, "loss": 0.0707, "step": 8243 }, { "epoch": 2.6714193130265715, "grad_norm": 0.5049477815628052, "learning_rate": 3.1131773269258204e-07, "loss": 0.0748, "step": 8244 }, { "epoch": 2.671743357096565, "grad_norm": 0.4652611315250397, "learning_rate": 3.1071043354001626e-07, "loss": 0.0669, "step": 8245 }, { "epoch": 2.672067401166559, "grad_norm": 0.48552364110946655, "learning_rate": 3.1010370831109806e-07, "loss": 0.0734, "step": 8246 }, { "epoch": 2.6723914452365523, "grad_norm": 0.5223255157470703, "learning_rate": 3.09497557080084e-07, "loss": 0.0742, "step": 8247 }, { "epoch": 2.6727154893065457, "grad_norm": 0.4967799782752991, "learning_rate": 3.088919799211626e-07, "loss": 0.0751, "step": 8248 }, { "epoch": 2.673039533376539, "grad_norm": 0.47687217593193054, "learning_rate": 3.0828697690844787e-07, "loss": 0.0653, "step": 8249 }, { "epoch": 2.6733635774465325, "grad_norm": 0.534401535987854, "learning_rate": 3.076825481159884e-07, "loss": 0.0766, "step": 8250 }, { "epoch": 2.6736876215165264, "grad_norm": 0.4932291507720947, "learning_rate": 3.0707869361776e-07, "loss": 0.0724, "step": 8251 }, { "epoch": 2.67401166558652, "grad_norm": 0.5111968517303467, "learning_rate": 3.0647541348766796e-07, "loss": 0.072, "step": 8252 }, { "epoch": 2.6743357096565132, "grad_norm": 0.4867503046989441, "learning_rate": 3.058727077995488e-07, "loss": 0.0682, "step": 8253 }, { "epoch": 2.6746597537265067, "grad_norm": 0.4913567304611206, "learning_rate": 3.052705766271674e-07, "loss": 0.0723, "step": 8254 }, { "epoch": 2.6749837977965, "grad_norm": 0.4733397960662842, "learning_rate": 3.046690200442193e-07, "loss": 0.0675, "step": 8255 }, { "epoch": 2.675307841866494, "grad_norm": 0.48818185925483704, "learning_rate": 3.040680381243294e-07, "loss": 0.0729, "step": 8256 }, { "epoch": 2.6756318859364874, "grad_norm": 0.44772180914878845, "learning_rate": 3.0346763094105057e-07, "loss": 0.0678, "step": 8257 }, { "epoch": 2.675955930006481, "grad_norm": 0.4892347753047943, "learning_rate": 3.0286779856786795e-07, "loss": 0.0697, "step": 8258 }, { "epoch": 2.6762799740764747, "grad_norm": 0.49410268664360046, "learning_rate": 3.022685410781945e-07, "loss": 0.073, "step": 8259 }, { "epoch": 2.6766040181464676, "grad_norm": 0.5007535219192505, "learning_rate": 3.016698585453748e-07, "loss": 0.0717, "step": 8260 }, { "epoch": 2.6769280622164615, "grad_norm": 0.4490124583244324, "learning_rate": 3.010717510426814e-07, "loss": 0.0611, "step": 8261 }, { "epoch": 2.677252106286455, "grad_norm": 0.4742376506328583, "learning_rate": 3.0047421864331516e-07, "loss": 0.065, "step": 8262 }, { "epoch": 2.6775761503564484, "grad_norm": 0.4849986732006073, "learning_rate": 2.9987726142041096e-07, "loss": 0.0707, "step": 8263 }, { "epoch": 2.6779001944264422, "grad_norm": 0.5073363780975342, "learning_rate": 2.9928087944702754e-07, "loss": 0.0708, "step": 8264 }, { "epoch": 2.6782242384964356, "grad_norm": 0.4616087079048157, "learning_rate": 2.986850727961599e-07, "loss": 0.0656, "step": 8265 }, { "epoch": 2.678548282566429, "grad_norm": 0.5197352170944214, "learning_rate": 2.980898415407257e-07, "loss": 0.0719, "step": 8266 }, { "epoch": 2.6788723266364225, "grad_norm": 0.46726688742637634, "learning_rate": 2.9749518575357796e-07, "loss": 0.064, "step": 8267 }, { "epoch": 2.679196370706416, "grad_norm": 0.4749407470226288, "learning_rate": 2.96901105507495e-07, "loss": 0.0665, "step": 8268 }, { "epoch": 2.67952041477641, "grad_norm": 0.4720465838909149, "learning_rate": 2.963076008751875e-07, "loss": 0.0684, "step": 8269 }, { "epoch": 2.679844458846403, "grad_norm": 0.46473428606987, "learning_rate": 2.957146719292947e-07, "loss": 0.0676, "step": 8270 }, { "epoch": 2.6801685029163966, "grad_norm": 0.4980733394622803, "learning_rate": 2.9512231874238404e-07, "loss": 0.0741, "step": 8271 }, { "epoch": 2.68049254698639, "grad_norm": 0.48748525977134705, "learning_rate": 2.945305413869559e-07, "loss": 0.0674, "step": 8272 }, { "epoch": 2.6808165910563835, "grad_norm": 0.47855398058891296, "learning_rate": 2.9393933993543675e-07, "loss": 0.0706, "step": 8273 }, { "epoch": 2.6811406351263773, "grad_norm": 0.475934237241745, "learning_rate": 2.9334871446018375e-07, "loss": 0.0691, "step": 8274 }, { "epoch": 2.6814646791963708, "grad_norm": 0.47559282183647156, "learning_rate": 2.927586650334846e-07, "loss": 0.0702, "step": 8275 }, { "epoch": 2.681788723266364, "grad_norm": 0.5388659834861755, "learning_rate": 2.9216919172755485e-07, "loss": 0.0819, "step": 8276 }, { "epoch": 2.6821127673363576, "grad_norm": 0.4871777892112732, "learning_rate": 2.9158029461454075e-07, "loss": 0.0722, "step": 8277 }, { "epoch": 2.682436811406351, "grad_norm": 0.4801523983478546, "learning_rate": 2.909919737665179e-07, "loss": 0.0706, "step": 8278 }, { "epoch": 2.682760855476345, "grad_norm": 0.5030548572540283, "learning_rate": 2.9040422925549097e-07, "loss": 0.074, "step": 8279 }, { "epoch": 2.6830848995463383, "grad_norm": 0.49741634726524353, "learning_rate": 2.898170611533935e-07, "loss": 0.0729, "step": 8280 }, { "epoch": 2.6834089436163318, "grad_norm": 0.4476093351840973, "learning_rate": 2.8923046953208964e-07, "loss": 0.0647, "step": 8281 }, { "epoch": 2.683732987686325, "grad_norm": 0.45768454670906067, "learning_rate": 2.8864445446337264e-07, "loss": 0.0721, "step": 8282 }, { "epoch": 2.6840570317563186, "grad_norm": 0.49686095118522644, "learning_rate": 2.8805901601896446e-07, "loss": 0.0685, "step": 8283 }, { "epoch": 2.6843810758263125, "grad_norm": 0.44313284754753113, "learning_rate": 2.874741542705178e-07, "loss": 0.063, "step": 8284 }, { "epoch": 2.684705119896306, "grad_norm": 0.5290567278862, "learning_rate": 2.868898692896149e-07, "loss": 0.0732, "step": 8285 }, { "epoch": 2.6850291639662993, "grad_norm": 0.5113586783409119, "learning_rate": 2.8630616114776413e-07, "loss": 0.0752, "step": 8286 }, { "epoch": 2.685353208036293, "grad_norm": 0.47347286343574524, "learning_rate": 2.857230299164082e-07, "loss": 0.0649, "step": 8287 }, { "epoch": 2.6856772521062866, "grad_norm": 0.4577503800392151, "learning_rate": 2.851404756669146e-07, "loss": 0.0634, "step": 8288 }, { "epoch": 2.68600129617628, "grad_norm": 0.48355886340141296, "learning_rate": 2.8455849847058457e-07, "loss": 0.0718, "step": 8289 }, { "epoch": 2.6863253402462735, "grad_norm": 0.515175998210907, "learning_rate": 2.839770983986445e-07, "loss": 0.0788, "step": 8290 }, { "epoch": 2.686649384316267, "grad_norm": 0.4759681224822998, "learning_rate": 2.8339627552225304e-07, "loss": 0.071, "step": 8291 }, { "epoch": 2.6869734283862607, "grad_norm": 0.5212849378585815, "learning_rate": 2.8281602991249825e-07, "loss": 0.0756, "step": 8292 }, { "epoch": 2.687297472456254, "grad_norm": 0.5105800628662109, "learning_rate": 2.822363616403939e-07, "loss": 0.0703, "step": 8293 }, { "epoch": 2.6876215165262476, "grad_norm": 0.4698103368282318, "learning_rate": 2.8165727077688887e-07, "loss": 0.0706, "step": 8294 }, { "epoch": 2.687945560596241, "grad_norm": 0.5046882033348083, "learning_rate": 2.8107875739285474e-07, "loss": 0.0749, "step": 8295 }, { "epoch": 2.6882696046662344, "grad_norm": 0.4877139925956726, "learning_rate": 2.805008215591004e-07, "loss": 0.072, "step": 8296 }, { "epoch": 2.6885936487362283, "grad_norm": 0.5047754049301147, "learning_rate": 2.79923463346356e-07, "loss": 0.0734, "step": 8297 }, { "epoch": 2.6889176928062217, "grad_norm": 0.48555463552474976, "learning_rate": 2.7934668282528554e-07, "loss": 0.069, "step": 8298 }, { "epoch": 2.689241736876215, "grad_norm": 0.5083166360855103, "learning_rate": 2.78770480066482e-07, "loss": 0.0747, "step": 8299 }, { "epoch": 2.6895657809462086, "grad_norm": 0.47517675161361694, "learning_rate": 2.781948551404667e-07, "loss": 0.0701, "step": 8300 }, { "epoch": 2.689889825016202, "grad_norm": 0.4648648798465729, "learning_rate": 2.7761980811769063e-07, "loss": 0.0688, "step": 8301 }, { "epoch": 2.690213869086196, "grad_norm": 0.4912257492542267, "learning_rate": 2.770453390685335e-07, "loss": 0.0716, "step": 8302 }, { "epoch": 2.6905379131561893, "grad_norm": 0.468051940202713, "learning_rate": 2.764714480633057e-07, "loss": 0.0738, "step": 8303 }, { "epoch": 2.6908619572261827, "grad_norm": 0.433032363653183, "learning_rate": 2.7589813517224504e-07, "loss": 0.0663, "step": 8304 }, { "epoch": 2.691186001296176, "grad_norm": 0.49341756105422974, "learning_rate": 2.753254004655198e-07, "loss": 0.0732, "step": 8305 }, { "epoch": 2.6915100453661696, "grad_norm": 0.4939447045326233, "learning_rate": 2.747532440132272e-07, "loss": 0.075, "step": 8306 }, { "epoch": 2.6918340894361634, "grad_norm": 0.4643614888191223, "learning_rate": 2.741816658853935e-07, "loss": 0.0675, "step": 8307 }, { "epoch": 2.692158133506157, "grad_norm": 0.5280596613883972, "learning_rate": 2.736106661519744e-07, "loss": 0.077, "step": 8308 }, { "epoch": 2.6924821775761503, "grad_norm": 0.5050312280654907, "learning_rate": 2.730402448828551e-07, "loss": 0.0751, "step": 8309 }, { "epoch": 2.692806221646144, "grad_norm": 0.4410359859466553, "learning_rate": 2.724704021478486e-07, "loss": 0.0645, "step": 8310 }, { "epoch": 2.693130265716137, "grad_norm": 0.50943523645401, "learning_rate": 2.719011380166997e-07, "loss": 0.0777, "step": 8311 }, { "epoch": 2.693454309786131, "grad_norm": 0.5053391456604004, "learning_rate": 2.7133245255907937e-07, "loss": 0.073, "step": 8312 }, { "epoch": 2.6937783538561244, "grad_norm": 0.4879259467124939, "learning_rate": 2.7076434584458964e-07, "loss": 0.0719, "step": 8313 }, { "epoch": 2.694102397926118, "grad_norm": 0.5183456540107727, "learning_rate": 2.7019681794276166e-07, "loss": 0.0746, "step": 8314 }, { "epoch": 2.6944264419961117, "grad_norm": 0.46866390109062195, "learning_rate": 2.6962986892305533e-07, "loss": 0.0697, "step": 8315 }, { "epoch": 2.694750486066105, "grad_norm": 0.5022212862968445, "learning_rate": 2.6906349885486015e-07, "loss": 0.071, "step": 8316 }, { "epoch": 2.6950745301360985, "grad_norm": 0.5619996190071106, "learning_rate": 2.6849770780749186e-07, "loss": 0.0739, "step": 8317 }, { "epoch": 2.695398574206092, "grad_norm": 0.4827101528644562, "learning_rate": 2.6793249585020163e-07, "loss": 0.071, "step": 8318 }, { "epoch": 2.6957226182760854, "grad_norm": 0.5225281119346619, "learning_rate": 2.67367863052162e-07, "loss": 0.0777, "step": 8319 }, { "epoch": 2.6960466623460793, "grad_norm": 0.5356566309928894, "learning_rate": 2.6680380948248207e-07, "loss": 0.0765, "step": 8320 }, { "epoch": 2.6963707064160727, "grad_norm": 0.5039767026901245, "learning_rate": 2.6624033521019443e-07, "loss": 0.0732, "step": 8321 }, { "epoch": 2.696694750486066, "grad_norm": 0.45116445422172546, "learning_rate": 2.6567744030426335e-07, "loss": 0.0641, "step": 8322 }, { "epoch": 2.6970187945560595, "grad_norm": 0.4902065396308899, "learning_rate": 2.6511512483358204e-07, "loss": 0.0717, "step": 8323 }, { "epoch": 2.697342838626053, "grad_norm": 0.47688916325569153, "learning_rate": 2.6455338886697155e-07, "loss": 0.0712, "step": 8324 }, { "epoch": 2.697666882696047, "grad_norm": 0.5470159649848938, "learning_rate": 2.639922324731847e-07, "loss": 0.077, "step": 8325 }, { "epoch": 2.6979909267660402, "grad_norm": 0.46849918365478516, "learning_rate": 2.6343165572089936e-07, "loss": 0.0709, "step": 8326 }, { "epoch": 2.6983149708360337, "grad_norm": 0.4926462173461914, "learning_rate": 2.6287165867872666e-07, "loss": 0.0685, "step": 8327 }, { "epoch": 2.698639014906027, "grad_norm": 0.4713653028011322, "learning_rate": 2.623122414152035e-07, "loss": 0.0684, "step": 8328 }, { "epoch": 2.6989630589760205, "grad_norm": 0.49385493993759155, "learning_rate": 2.617534039987979e-07, "loss": 0.0718, "step": 8329 }, { "epoch": 2.6992871030460144, "grad_norm": 0.47010338306427, "learning_rate": 2.6119514649790566e-07, "loss": 0.0692, "step": 8330 }, { "epoch": 2.699611147116008, "grad_norm": 0.49910011887550354, "learning_rate": 2.606374689808522e-07, "loss": 0.0716, "step": 8331 }, { "epoch": 2.6999351911860012, "grad_norm": 0.49807921051979065, "learning_rate": 2.600803715158917e-07, "loss": 0.0721, "step": 8332 }, { "epoch": 2.7002592352559946, "grad_norm": 0.4971463084220886, "learning_rate": 2.5952385417120864e-07, "loss": 0.0752, "step": 8333 }, { "epoch": 2.700583279325988, "grad_norm": 0.5131683945655823, "learning_rate": 2.589679170149145e-07, "loss": 0.0728, "step": 8334 }, { "epoch": 2.700907323395982, "grad_norm": 0.49370115995407104, "learning_rate": 2.5841256011505e-07, "loss": 0.0714, "step": 8335 }, { "epoch": 2.7012313674659754, "grad_norm": 0.46200138330459595, "learning_rate": 2.578577835395857e-07, "loss": 0.0659, "step": 8336 }, { "epoch": 2.701555411535969, "grad_norm": 0.47938281297683716, "learning_rate": 2.5730358735642167e-07, "loss": 0.0712, "step": 8337 }, { "epoch": 2.7018794556059627, "grad_norm": 0.4824425280094147, "learning_rate": 2.567499716333854e-07, "loss": 0.0709, "step": 8338 }, { "epoch": 2.702203499675956, "grad_norm": 0.47668445110321045, "learning_rate": 2.561969364382344e-07, "loss": 0.0707, "step": 8339 }, { "epoch": 2.7025275437459495, "grad_norm": 0.5181357860565186, "learning_rate": 2.556444818386555e-07, "loss": 0.069, "step": 8340 }, { "epoch": 2.702851587815943, "grad_norm": 0.4696907103061676, "learning_rate": 2.5509260790226195e-07, "loss": 0.0706, "step": 8341 }, { "epoch": 2.7031756318859363, "grad_norm": 0.5065804719924927, "learning_rate": 2.5454131469660027e-07, "loss": 0.074, "step": 8342 }, { "epoch": 2.70349967595593, "grad_norm": 0.4924456477165222, "learning_rate": 2.539906022891414e-07, "loss": 0.0714, "step": 8343 }, { "epoch": 2.7038237200259236, "grad_norm": 0.48366984724998474, "learning_rate": 2.534404707472876e-07, "loss": 0.0749, "step": 8344 }, { "epoch": 2.704147764095917, "grad_norm": 0.48156607151031494, "learning_rate": 2.5289092013837e-07, "loss": 0.0647, "step": 8345 }, { "epoch": 2.7044718081659105, "grad_norm": 0.49813225865364075, "learning_rate": 2.5234195052964814e-07, "loss": 0.0717, "step": 8346 }, { "epoch": 2.704795852235904, "grad_norm": 0.5039176940917969, "learning_rate": 2.5179356198831164e-07, "loss": 0.0767, "step": 8347 }, { "epoch": 2.7051198963058978, "grad_norm": 0.49406805634498596, "learning_rate": 2.512457545814756e-07, "loss": 0.0698, "step": 8348 }, { "epoch": 2.705443940375891, "grad_norm": 0.5206747055053711, "learning_rate": 2.5069852837618866e-07, "loss": 0.0771, "step": 8349 }, { "epoch": 2.7057679844458846, "grad_norm": 0.4882453978061676, "learning_rate": 2.5015188343942397e-07, "loss": 0.0715, "step": 8350 }, { "epoch": 2.706092028515878, "grad_norm": 0.49470728635787964, "learning_rate": 2.4960581983808796e-07, "loss": 0.0739, "step": 8351 }, { "epoch": 2.7064160725858715, "grad_norm": 0.4713766574859619, "learning_rate": 2.490603376390116e-07, "loss": 0.0692, "step": 8352 }, { "epoch": 2.7067401166558653, "grad_norm": 0.5327262282371521, "learning_rate": 2.4851543690895706e-07, "loss": 0.069, "step": 8353 }, { "epoch": 2.7070641607258588, "grad_norm": 0.5107653141021729, "learning_rate": 2.479711177146155e-07, "loss": 0.075, "step": 8354 }, { "epoch": 2.707388204795852, "grad_norm": 0.5149986743927002, "learning_rate": 2.474273801226051e-07, "loss": 0.0793, "step": 8355 }, { "epoch": 2.7077122488658456, "grad_norm": 0.4663291275501251, "learning_rate": 2.4688422419947623e-07, "loss": 0.0712, "step": 8356 }, { "epoch": 2.708036292935839, "grad_norm": 0.49470773339271545, "learning_rate": 2.4634165001170327e-07, "loss": 0.0696, "step": 8357 }, { "epoch": 2.708360337005833, "grad_norm": 0.5378709435462952, "learning_rate": 2.4579965762569436e-07, "loss": 0.0801, "step": 8358 }, { "epoch": 2.7086843810758263, "grad_norm": 0.4698738157749176, "learning_rate": 2.45258247107783e-07, "loss": 0.0709, "step": 8359 }, { "epoch": 2.7090084251458197, "grad_norm": 0.5029922723770142, "learning_rate": 2.447174185242324e-07, "loss": 0.074, "step": 8360 }, { "epoch": 2.7093324692158136, "grad_norm": 0.45825907588005066, "learning_rate": 2.4417717194123504e-07, "loss": 0.0686, "step": 8361 }, { "epoch": 2.709656513285807, "grad_norm": 0.47053924202919006, "learning_rate": 2.43637507424912e-07, "loss": 0.0706, "step": 8362 }, { "epoch": 2.7099805573558005, "grad_norm": 0.48093780875205994, "learning_rate": 2.4309842504131266e-07, "loss": 0.0733, "step": 8363 }, { "epoch": 2.710304601425794, "grad_norm": 0.4751356244087219, "learning_rate": 2.4255992485641644e-07, "loss": 0.0667, "step": 8364 }, { "epoch": 2.7106286454957873, "grad_norm": 0.5154885053634644, "learning_rate": 2.4202200693612955e-07, "loss": 0.0762, "step": 8365 }, { "epoch": 2.710952689565781, "grad_norm": 0.484735906124115, "learning_rate": 2.4148467134628816e-07, "loss": 0.0711, "step": 8366 }, { "epoch": 2.7112767336357746, "grad_norm": 0.4831327199935913, "learning_rate": 2.4094791815265637e-07, "loss": 0.0698, "step": 8367 }, { "epoch": 2.711600777705768, "grad_norm": 0.5157594680786133, "learning_rate": 2.404117474209289e-07, "loss": 0.0777, "step": 8368 }, { "epoch": 2.7119248217757614, "grad_norm": 0.5025253891944885, "learning_rate": 2.3987615921672645e-07, "loss": 0.0759, "step": 8369 }, { "epoch": 2.712248865845755, "grad_norm": 0.505915105342865, "learning_rate": 2.3934115360560116e-07, "loss": 0.0746, "step": 8370 }, { "epoch": 2.7125729099157487, "grad_norm": 0.5080195665359497, "learning_rate": 2.388067306530323e-07, "loss": 0.0707, "step": 8371 }, { "epoch": 2.712896953985742, "grad_norm": 0.4918041527271271, "learning_rate": 2.382728904244269e-07, "loss": 0.0711, "step": 8372 }, { "epoch": 2.7132209980557356, "grad_norm": 0.5486384630203247, "learning_rate": 2.3773963298512338e-07, "loss": 0.0787, "step": 8373 }, { "epoch": 2.713545042125729, "grad_norm": 0.47039252519607544, "learning_rate": 2.372069584003861e-07, "loss": 0.0696, "step": 8374 }, { "epoch": 2.7138690861957224, "grad_norm": 0.47299376130104065, "learning_rate": 2.3667486673540963e-07, "loss": 0.0719, "step": 8375 }, { "epoch": 2.7141931302657163, "grad_norm": 0.4835331439971924, "learning_rate": 2.3614335805531686e-07, "loss": 0.0684, "step": 8376 }, { "epoch": 2.7145171743357097, "grad_norm": 0.47559183835983276, "learning_rate": 2.3561243242515907e-07, "loss": 0.0712, "step": 8377 }, { "epoch": 2.714841218405703, "grad_norm": 0.49402961134910583, "learning_rate": 2.3508208990991764e-07, "loss": 0.069, "step": 8378 }, { "epoch": 2.7151652624756966, "grad_norm": 0.4470484256744385, "learning_rate": 2.3455233057449899e-07, "loss": 0.0668, "step": 8379 }, { "epoch": 2.71548930654569, "grad_norm": 0.46793654561042786, "learning_rate": 2.3402315448374346e-07, "loss": 0.0647, "step": 8380 }, { "epoch": 2.715813350615684, "grad_norm": 0.5528791546821594, "learning_rate": 2.3349456170241426e-07, "loss": 0.0765, "step": 8381 }, { "epoch": 2.7161373946856773, "grad_norm": 0.5298764705657959, "learning_rate": 2.3296655229520905e-07, "loss": 0.0738, "step": 8382 }, { "epoch": 2.7164614387556707, "grad_norm": 0.48637211322784424, "learning_rate": 2.3243912632674782e-07, "loss": 0.0742, "step": 8383 }, { "epoch": 2.7167854828256646, "grad_norm": 0.48481112718582153, "learning_rate": 2.3191228386158448e-07, "loss": 0.0698, "step": 8384 }, { "epoch": 2.7171095268956575, "grad_norm": 0.43704310059547424, "learning_rate": 2.3138602496419916e-07, "loss": 0.0625, "step": 8385 }, { "epoch": 2.7174335709656514, "grad_norm": 0.4682506024837494, "learning_rate": 2.308603496990003e-07, "loss": 0.0664, "step": 8386 }, { "epoch": 2.717757615035645, "grad_norm": 0.5053842067718506, "learning_rate": 2.3033525813032644e-07, "loss": 0.0735, "step": 8387 }, { "epoch": 2.7180816591056383, "grad_norm": 0.4989098906517029, "learning_rate": 2.2981075032244282e-07, "loss": 0.0717, "step": 8388 }, { "epoch": 2.718405703175632, "grad_norm": 0.4849379062652588, "learning_rate": 2.2928682633954368e-07, "loss": 0.0732, "step": 8389 }, { "epoch": 2.7187297472456255, "grad_norm": 0.5042644739151001, "learning_rate": 2.2876348624575328e-07, "loss": 0.0685, "step": 8390 }, { "epoch": 2.719053791315619, "grad_norm": 0.5008777976036072, "learning_rate": 2.2824073010512315e-07, "loss": 0.0751, "step": 8391 }, { "epoch": 2.7193778353856124, "grad_norm": 0.4873231053352356, "learning_rate": 2.2771855798163322e-07, "loss": 0.0727, "step": 8392 }, { "epoch": 2.719701879455606, "grad_norm": 0.47231167554855347, "learning_rate": 2.2719696993919237e-07, "loss": 0.0703, "step": 8393 }, { "epoch": 2.7200259235255997, "grad_norm": 0.48164358735084534, "learning_rate": 2.2667596604163844e-07, "loss": 0.0685, "step": 8394 }, { "epoch": 2.720349967595593, "grad_norm": 0.4553871750831604, "learning_rate": 2.2615554635273763e-07, "loss": 0.0656, "step": 8395 }, { "epoch": 2.7206740116655865, "grad_norm": 0.5053251385688782, "learning_rate": 2.256357109361823e-07, "loss": 0.077, "step": 8396 }, { "epoch": 2.72099805573558, "grad_norm": 0.45735135674476624, "learning_rate": 2.2511645985559715e-07, "loss": 0.0658, "step": 8397 }, { "epoch": 2.7213220998055734, "grad_norm": 0.4907129406929016, "learning_rate": 2.2459779317453246e-07, "loss": 0.0677, "step": 8398 }, { "epoch": 2.7216461438755672, "grad_norm": 0.47853198647499084, "learning_rate": 2.2407971095646853e-07, "loss": 0.0702, "step": 8399 }, { "epoch": 2.7219701879455607, "grad_norm": 0.5116216540336609, "learning_rate": 2.2356221326481353e-07, "loss": 0.0754, "step": 8400 }, { "epoch": 2.722294232015554, "grad_norm": 0.4731382131576538, "learning_rate": 2.2304530016290405e-07, "loss": 0.0718, "step": 8401 }, { "epoch": 2.7226182760855475, "grad_norm": 0.47295665740966797, "learning_rate": 2.2252897171400613e-07, "loss": 0.0693, "step": 8402 }, { "epoch": 2.722942320155541, "grad_norm": 0.4782242476940155, "learning_rate": 2.220132279813114e-07, "loss": 0.0712, "step": 8403 }, { "epoch": 2.723266364225535, "grad_norm": 0.4973825514316559, "learning_rate": 2.214980690279439e-07, "loss": 0.0692, "step": 8404 }, { "epoch": 2.7235904082955282, "grad_norm": 0.4872826039791107, "learning_rate": 2.2098349491695314e-07, "loss": 0.0723, "step": 8405 }, { "epoch": 2.7239144523655217, "grad_norm": 0.462805837392807, "learning_rate": 2.2046950571131764e-07, "loss": 0.065, "step": 8406 }, { "epoch": 2.724238496435515, "grad_norm": 0.46699562668800354, "learning_rate": 2.199561014739454e-07, "loss": 0.0671, "step": 8407 }, { "epoch": 2.7245625405055085, "grad_norm": 0.5196148157119751, "learning_rate": 2.1944328226767232e-07, "loss": 0.0757, "step": 8408 }, { "epoch": 2.7248865845755024, "grad_norm": 0.5027263760566711, "learning_rate": 2.18931048155262e-07, "loss": 0.0717, "step": 8409 }, { "epoch": 2.725210628645496, "grad_norm": 0.4779914915561676, "learning_rate": 2.1841939919940602e-07, "loss": 0.0695, "step": 8410 }, { "epoch": 2.725534672715489, "grad_norm": 0.53648442029953, "learning_rate": 2.1790833546272816e-07, "loss": 0.0751, "step": 8411 }, { "epoch": 2.725858716785483, "grad_norm": 0.4998103678226471, "learning_rate": 2.1739785700777395e-07, "loss": 0.079, "step": 8412 }, { "epoch": 2.7261827608554765, "grad_norm": 0.49417588114738464, "learning_rate": 2.1688796389702393e-07, "loss": 0.0721, "step": 8413 }, { "epoch": 2.72650680492547, "grad_norm": 0.49027085304260254, "learning_rate": 2.1637865619288322e-07, "loss": 0.0716, "step": 8414 }, { "epoch": 2.7268308489954634, "grad_norm": 0.4957401156425476, "learning_rate": 2.158699339576853e-07, "loss": 0.0711, "step": 8415 }, { "epoch": 2.7271548930654568, "grad_norm": 0.5493224859237671, "learning_rate": 2.1536179725369367e-07, "loss": 0.0747, "step": 8416 }, { "epoch": 2.7274789371354506, "grad_norm": 0.46308425068855286, "learning_rate": 2.1485424614309914e-07, "loss": 0.0692, "step": 8417 }, { "epoch": 2.727802981205444, "grad_norm": 0.48021116852760315, "learning_rate": 2.1434728068802145e-07, "loss": 0.0724, "step": 8418 }, { "epoch": 2.7281270252754375, "grad_norm": 0.4874224364757538, "learning_rate": 2.1384090095050768e-07, "loss": 0.0699, "step": 8419 }, { "epoch": 2.728451069345431, "grad_norm": 0.5004766583442688, "learning_rate": 2.133351069925338e-07, "loss": 0.0692, "step": 8420 }, { "epoch": 2.7287751134154243, "grad_norm": 0.47774559259414673, "learning_rate": 2.1282989887600468e-07, "loss": 0.0681, "step": 8421 }, { "epoch": 2.729099157485418, "grad_norm": 0.520897626876831, "learning_rate": 2.1232527666275205e-07, "loss": 0.0766, "step": 8422 }, { "epoch": 2.7294232015554116, "grad_norm": 0.510453999042511, "learning_rate": 2.1182124041453755e-07, "loss": 0.0688, "step": 8423 }, { "epoch": 2.729747245625405, "grad_norm": 0.49708041548728943, "learning_rate": 2.1131779019304965e-07, "loss": 0.0691, "step": 8424 }, { "epoch": 2.7300712896953985, "grad_norm": 0.48128974437713623, "learning_rate": 2.1081492605990682e-07, "loss": 0.0728, "step": 8425 }, { "epoch": 2.730395333765392, "grad_norm": 0.4515824317932129, "learning_rate": 2.103126480766543e-07, "loss": 0.0628, "step": 8426 }, { "epoch": 2.7307193778353858, "grad_norm": 0.46619004011154175, "learning_rate": 2.0981095630476457e-07, "loss": 0.066, "step": 8427 }, { "epoch": 2.731043421905379, "grad_norm": 0.5173163414001465, "learning_rate": 2.0930985080564292e-07, "loss": 0.0738, "step": 8428 }, { "epoch": 2.7313674659753726, "grad_norm": 0.5011807084083557, "learning_rate": 2.08809331640617e-07, "loss": 0.075, "step": 8429 }, { "epoch": 2.731691510045366, "grad_norm": 0.4686156213283539, "learning_rate": 2.083093988709467e-07, "loss": 0.0669, "step": 8430 }, { "epoch": 2.7320155541153595, "grad_norm": 0.4761897623538971, "learning_rate": 2.0781005255781972e-07, "loss": 0.0714, "step": 8431 }, { "epoch": 2.7323395981853533, "grad_norm": 0.4683878421783447, "learning_rate": 2.0731129276234884e-07, "loss": 0.0713, "step": 8432 }, { "epoch": 2.7326636422553467, "grad_norm": 0.459830105304718, "learning_rate": 2.0681311954558024e-07, "loss": 0.0675, "step": 8433 }, { "epoch": 2.73298768632534, "grad_norm": 0.4843185544013977, "learning_rate": 2.0631553296848239e-07, "loss": 0.0739, "step": 8434 }, { "epoch": 2.733311730395334, "grad_norm": 0.4653462767601013, "learning_rate": 2.0581853309195877e-07, "loss": 0.0683, "step": 8435 }, { "epoch": 2.733635774465327, "grad_norm": 0.4834546744823456, "learning_rate": 2.0532211997683405e-07, "loss": 0.0695, "step": 8436 }, { "epoch": 2.733959818535321, "grad_norm": 0.45494353771209717, "learning_rate": 2.0482629368386686e-07, "loss": 0.0668, "step": 8437 }, { "epoch": 2.7342838626053143, "grad_norm": 0.47521457076072693, "learning_rate": 2.0433105427373978e-07, "loss": 0.0707, "step": 8438 }, { "epoch": 2.7346079066753077, "grad_norm": 0.48294797539711, "learning_rate": 2.03836401807066e-07, "loss": 0.0685, "step": 8439 }, { "epoch": 2.7349319507453016, "grad_norm": 0.5113422870635986, "learning_rate": 2.033423363443865e-07, "loss": 0.076, "step": 8440 }, { "epoch": 2.735255994815295, "grad_norm": 0.5801029801368713, "learning_rate": 2.0284885794616905e-07, "loss": 0.0818, "step": 8441 }, { "epoch": 2.7355800388852884, "grad_norm": 0.5278540849685669, "learning_rate": 2.0235596667281254e-07, "loss": 0.0738, "step": 8442 }, { "epoch": 2.735904082955282, "grad_norm": 0.5190832614898682, "learning_rate": 2.0186366258463985e-07, "loss": 0.0742, "step": 8443 }, { "epoch": 2.7362281270252753, "grad_norm": 0.5184222459793091, "learning_rate": 2.013719457419061e-07, "loss": 0.0682, "step": 8444 }, { "epoch": 2.736552171095269, "grad_norm": 0.47110626101493835, "learning_rate": 2.0088081620479095e-07, "loss": 0.0697, "step": 8445 }, { "epoch": 2.7368762151652626, "grad_norm": 0.45322057604789734, "learning_rate": 2.003902740334057e-07, "loss": 0.0703, "step": 8446 }, { "epoch": 2.737200259235256, "grad_norm": 0.474938303232193, "learning_rate": 1.999003192877863e-07, "loss": 0.0693, "step": 8447 }, { "epoch": 2.7375243033052494, "grad_norm": 0.5155289173126221, "learning_rate": 1.9941095202789973e-07, "loss": 0.0731, "step": 8448 }, { "epoch": 2.737848347375243, "grad_norm": 0.4893336892127991, "learning_rate": 1.989221723136392e-07, "loss": 0.0737, "step": 8449 }, { "epoch": 2.7381723914452367, "grad_norm": 0.4888181984424591, "learning_rate": 1.984339802048274e-07, "loss": 0.0738, "step": 8450 }, { "epoch": 2.73849643551523, "grad_norm": 0.49995023012161255, "learning_rate": 1.9794637576121324e-07, "loss": 0.0724, "step": 8451 }, { "epoch": 2.7388204795852236, "grad_norm": 0.46177735924720764, "learning_rate": 1.9745935904247505e-07, "loss": 0.0679, "step": 8452 }, { "epoch": 2.739144523655217, "grad_norm": 0.46565553545951843, "learning_rate": 1.9697293010821906e-07, "loss": 0.068, "step": 8453 }, { "epoch": 2.7394685677252104, "grad_norm": 0.4851975440979004, "learning_rate": 1.9648708901797932e-07, "loss": 0.0706, "step": 8454 }, { "epoch": 2.7397926117952043, "grad_norm": 0.44226428866386414, "learning_rate": 1.9600183583121878e-07, "loss": 0.0638, "step": 8455 }, { "epoch": 2.7401166558651977, "grad_norm": 0.5337146520614624, "learning_rate": 1.9551717060732667e-07, "loss": 0.0771, "step": 8456 }, { "epoch": 2.740440699935191, "grad_norm": 0.47819724678993225, "learning_rate": 1.950330934056227e-07, "loss": 0.0664, "step": 8457 }, { "epoch": 2.7407647440051845, "grad_norm": 0.4902496635913849, "learning_rate": 1.9454960428535118e-07, "loss": 0.0747, "step": 8458 }, { "epoch": 2.741088788075178, "grad_norm": 0.4811018407344818, "learning_rate": 1.940667033056892e-07, "loss": 0.0727, "step": 8459 }, { "epoch": 2.741412832145172, "grad_norm": 0.5350942611694336, "learning_rate": 1.9358439052573673e-07, "loss": 0.0783, "step": 8460 }, { "epoch": 2.7417368762151653, "grad_norm": 0.5199815630912781, "learning_rate": 1.9310266600452542e-07, "loss": 0.0755, "step": 8461 }, { "epoch": 2.7420609202851587, "grad_norm": 0.45431849360466003, "learning_rate": 1.9262152980101368e-07, "loss": 0.0665, "step": 8462 }, { "epoch": 2.7423849643551526, "grad_norm": 0.5301962494850159, "learning_rate": 1.921409819740866e-07, "loss": 0.0772, "step": 8463 }, { "epoch": 2.742709008425146, "grad_norm": 0.5110758543014526, "learning_rate": 1.9166102258256103e-07, "loss": 0.0734, "step": 8464 }, { "epoch": 2.7430330524951394, "grad_norm": 0.4723348319530487, "learning_rate": 1.9118165168517665e-07, "loss": 0.0665, "step": 8465 }, { "epoch": 2.743357096565133, "grad_norm": 0.5000332593917847, "learning_rate": 1.9070286934060654e-07, "loss": 0.0763, "step": 8466 }, { "epoch": 2.7436811406351262, "grad_norm": 0.5045048594474792, "learning_rate": 1.9022467560744606e-07, "loss": 0.0736, "step": 8467 }, { "epoch": 2.74400518470512, "grad_norm": 0.463461309671402, "learning_rate": 1.8974707054422447e-07, "loss": 0.0657, "step": 8468 }, { "epoch": 2.7443292287751135, "grad_norm": 0.4715588688850403, "learning_rate": 1.8927005420939394e-07, "loss": 0.0736, "step": 8469 }, { "epoch": 2.744653272845107, "grad_norm": 0.524459719657898, "learning_rate": 1.8879362666133716e-07, "loss": 0.0756, "step": 8470 }, { "epoch": 2.7449773169151004, "grad_norm": 0.5173646211624146, "learning_rate": 1.8831778795836476e-07, "loss": 0.0695, "step": 8471 }, { "epoch": 2.745301360985094, "grad_norm": 0.45598724484443665, "learning_rate": 1.87842538158714e-07, "loss": 0.066, "step": 8472 }, { "epoch": 2.7456254050550877, "grad_norm": 0.4994466304779053, "learning_rate": 1.8736787732055228e-07, "loss": 0.0754, "step": 8473 }, { "epoch": 2.745949449125081, "grad_norm": 0.45296186208724976, "learning_rate": 1.8689380550197146e-07, "loss": 0.0634, "step": 8474 }, { "epoch": 2.7462734931950745, "grad_norm": 0.4605759084224701, "learning_rate": 1.8642032276099454e-07, "loss": 0.0682, "step": 8475 }, { "epoch": 2.746597537265068, "grad_norm": 0.5162789225578308, "learning_rate": 1.859474291555713e-07, "loss": 0.0677, "step": 8476 }, { "epoch": 2.7469215813350614, "grad_norm": 0.5212785601615906, "learning_rate": 1.8547512474357876e-07, "loss": 0.0764, "step": 8477 }, { "epoch": 2.7472456254050552, "grad_norm": 0.5051805973052979, "learning_rate": 1.8500340958282292e-07, "loss": 0.0763, "step": 8478 }, { "epoch": 2.7475696694750487, "grad_norm": 0.4878946840763092, "learning_rate": 1.8453228373103705e-07, "loss": 0.0722, "step": 8479 }, { "epoch": 2.747893713545042, "grad_norm": 0.46743884682655334, "learning_rate": 1.840617472458822e-07, "loss": 0.0694, "step": 8480 }, { "epoch": 2.7482177576150355, "grad_norm": 0.4815881848335266, "learning_rate": 1.8359180018494793e-07, "loss": 0.0692, "step": 8481 }, { "epoch": 2.748541801685029, "grad_norm": 0.5062234401702881, "learning_rate": 1.831224426057504e-07, "loss": 0.0741, "step": 8482 }, { "epoch": 2.748865845755023, "grad_norm": 0.4984256625175476, "learning_rate": 1.8265367456573534e-07, "loss": 0.0702, "step": 8483 }, { "epoch": 2.749189889825016, "grad_norm": 0.473633736371994, "learning_rate": 1.8218549612227464e-07, "loss": 0.0681, "step": 8484 }, { "epoch": 2.7495139338950096, "grad_norm": 0.5147225260734558, "learning_rate": 1.8171790733266914e-07, "loss": 0.0726, "step": 8485 }, { "epoch": 2.7498379779650035, "grad_norm": 0.4694821536540985, "learning_rate": 1.812509082541475e-07, "loss": 0.0719, "step": 8486 }, { "epoch": 2.7501620220349965, "grad_norm": 0.4427323341369629, "learning_rate": 1.8078449894386508e-07, "loss": 0.0649, "step": 8487 }, { "epoch": 2.7504860661049904, "grad_norm": 0.5387277603149414, "learning_rate": 1.803186794589068e-07, "loss": 0.079, "step": 8488 }, { "epoch": 2.750810110174984, "grad_norm": 0.5092546343803406, "learning_rate": 1.7985344985628316e-07, "loss": 0.0701, "step": 8489 }, { "epoch": 2.751134154244977, "grad_norm": 0.5002366900444031, "learning_rate": 1.7938881019293642e-07, "loss": 0.0743, "step": 8490 }, { "epoch": 2.751458198314971, "grad_norm": 0.5159744620323181, "learning_rate": 1.7892476052573104e-07, "loss": 0.0761, "step": 8491 }, { "epoch": 2.7517822423849645, "grad_norm": 0.4766468107700348, "learning_rate": 1.784613009114633e-07, "loss": 0.0678, "step": 8492 }, { "epoch": 2.752106286454958, "grad_norm": 0.4794914126396179, "learning_rate": 1.7799843140685613e-07, "loss": 0.0685, "step": 8493 }, { "epoch": 2.7524303305249513, "grad_norm": 0.44357404112815857, "learning_rate": 1.7753615206856033e-07, "loss": 0.0637, "step": 8494 }, { "epoch": 2.7527543745949448, "grad_norm": 0.43333783745765686, "learning_rate": 1.770744629531551e-07, "loss": 0.0626, "step": 8495 }, { "epoch": 2.7530784186649386, "grad_norm": 0.5045689940452576, "learning_rate": 1.7661336411714526e-07, "loss": 0.0752, "step": 8496 }, { "epoch": 2.753402462734932, "grad_norm": 0.545109748840332, "learning_rate": 1.761528556169667e-07, "loss": 0.0779, "step": 8497 }, { "epoch": 2.7537265068049255, "grad_norm": 0.47431567311286926, "learning_rate": 1.7569293750897942e-07, "loss": 0.0703, "step": 8498 }, { "epoch": 2.754050550874919, "grad_norm": 0.4464452862739563, "learning_rate": 1.7523360984947336e-07, "loss": 0.0649, "step": 8499 }, { "epoch": 2.7543745949449123, "grad_norm": 0.4776403307914734, "learning_rate": 1.7477487269466632e-07, "loss": 0.0708, "step": 8500 }, { "epoch": 2.754698639014906, "grad_norm": 0.43605953454971313, "learning_rate": 1.7431672610070337e-07, "loss": 0.0651, "step": 8501 }, { "epoch": 2.7550226830848996, "grad_norm": 0.49093151092529297, "learning_rate": 1.7385917012365694e-07, "loss": 0.0658, "step": 8502 }, { "epoch": 2.755346727154893, "grad_norm": 0.49357640743255615, "learning_rate": 1.734022048195272e-07, "loss": 0.0733, "step": 8503 }, { "epoch": 2.7556707712248865, "grad_norm": 0.4901171326637268, "learning_rate": 1.7294583024424273e-07, "loss": 0.0659, "step": 8504 }, { "epoch": 2.75599481529488, "grad_norm": 0.5076462030410767, "learning_rate": 1.7249004645365884e-07, "loss": 0.0754, "step": 8505 }, { "epoch": 2.7563188593648738, "grad_norm": 0.47774630784988403, "learning_rate": 1.7203485350355986e-07, "loss": 0.0654, "step": 8506 }, { "epoch": 2.756642903434867, "grad_norm": 0.4688727855682373, "learning_rate": 1.715802514496556e-07, "loss": 0.0663, "step": 8507 }, { "epoch": 2.7569669475048606, "grad_norm": 0.45370423793792725, "learning_rate": 1.7112624034758663e-07, "loss": 0.0637, "step": 8508 }, { "epoch": 2.757290991574854, "grad_norm": 0.4860289990901947, "learning_rate": 1.7067282025291842e-07, "loss": 0.0693, "step": 8509 }, { "epoch": 2.7576150356448474, "grad_norm": 0.4762254059314728, "learning_rate": 1.7021999122114552e-07, "loss": 0.0726, "step": 8510 }, { "epoch": 2.7579390797148413, "grad_norm": 0.4850431978702545, "learning_rate": 1.6976775330768913e-07, "loss": 0.0708, "step": 8511 }, { "epoch": 2.7582631237848347, "grad_norm": 0.4895291030406952, "learning_rate": 1.693161065679011e-07, "loss": 0.0703, "step": 8512 }, { "epoch": 2.758587167854828, "grad_norm": 0.4404808580875397, "learning_rate": 1.6886505105705553e-07, "loss": 0.0638, "step": 8513 }, { "epoch": 2.758911211924822, "grad_norm": 0.48556792736053467, "learning_rate": 1.684145868303594e-07, "loss": 0.0704, "step": 8514 }, { "epoch": 2.7592352559948155, "grad_norm": 0.5026125907897949, "learning_rate": 1.679647139429441e-07, "loss": 0.0699, "step": 8515 }, { "epoch": 2.759559300064809, "grad_norm": 0.47186747193336487, "learning_rate": 1.675154324498701e-07, "loss": 0.0666, "step": 8516 }, { "epoch": 2.7598833441348023, "grad_norm": 0.4909684658050537, "learning_rate": 1.6706674240612563e-07, "loss": 0.0727, "step": 8517 }, { "epoch": 2.7602073882047957, "grad_norm": 0.5015256404876709, "learning_rate": 1.6661864386662452e-07, "loss": 0.0729, "step": 8518 }, { "epoch": 2.7605314322747896, "grad_norm": 0.5276386141777039, "learning_rate": 1.6617113688621177e-07, "loss": 0.0787, "step": 8519 }, { "epoch": 2.760855476344783, "grad_norm": 0.47311532497406006, "learning_rate": 1.6572422151965529e-07, "loss": 0.0648, "step": 8520 }, { "epoch": 2.7611795204147764, "grad_norm": 0.48931893706321716, "learning_rate": 1.6527789782165627e-07, "loss": 0.0714, "step": 8521 }, { "epoch": 2.76150356448477, "grad_norm": 0.4892522990703583, "learning_rate": 1.6483216584683825e-07, "loss": 0.071, "step": 8522 }, { "epoch": 2.7618276085547633, "grad_norm": 0.478360116481781, "learning_rate": 1.6438702564975483e-07, "loss": 0.0682, "step": 8523 }, { "epoch": 2.762151652624757, "grad_norm": 0.48810848593711853, "learning_rate": 1.639424772848869e-07, "loss": 0.0719, "step": 8524 }, { "epoch": 2.7624756966947506, "grad_norm": 0.4530242681503296, "learning_rate": 1.6349852080664364e-07, "loss": 0.0644, "step": 8525 }, { "epoch": 2.762799740764744, "grad_norm": 0.49164706468582153, "learning_rate": 1.6305515626936054e-07, "loss": 0.0703, "step": 8526 }, { "epoch": 2.7631237848347374, "grad_norm": 0.47457531094551086, "learning_rate": 1.6261238372730025e-07, "loss": 0.0636, "step": 8527 }, { "epoch": 2.763447828904731, "grad_norm": 0.4845804274082184, "learning_rate": 1.621702032346556e-07, "loss": 0.0739, "step": 8528 }, { "epoch": 2.7637718729747247, "grad_norm": 0.4832020699977875, "learning_rate": 1.6172861484554382e-07, "loss": 0.0723, "step": 8529 }, { "epoch": 2.764095917044718, "grad_norm": 0.5220732688903809, "learning_rate": 1.612876186140111e-07, "loss": 0.071, "step": 8530 }, { "epoch": 2.7644199611147116, "grad_norm": 0.46765822172164917, "learning_rate": 1.608472145940321e-07, "loss": 0.0671, "step": 8531 }, { "epoch": 2.764744005184705, "grad_norm": 0.4872879981994629, "learning_rate": 1.6040740283950694e-07, "loss": 0.0755, "step": 8532 }, { "epoch": 2.7650680492546984, "grad_norm": 0.4944052994251251, "learning_rate": 1.599681834042649e-07, "loss": 0.0732, "step": 8533 }, { "epoch": 2.7653920933246923, "grad_norm": 0.49354100227355957, "learning_rate": 1.5952955634206235e-07, "loss": 0.0701, "step": 8534 }, { "epoch": 2.7657161373946857, "grad_norm": 0.4873565137386322, "learning_rate": 1.5909152170658304e-07, "loss": 0.0712, "step": 8535 }, { "epoch": 2.766040181464679, "grad_norm": 0.4880267381668091, "learning_rate": 1.586540795514374e-07, "loss": 0.0672, "step": 8536 }, { "epoch": 2.766364225534673, "grad_norm": 0.4742276072502136, "learning_rate": 1.582172299301643e-07, "loss": 0.0697, "step": 8537 }, { "epoch": 2.766688269604666, "grad_norm": 0.4935052692890167, "learning_rate": 1.577809728962304e-07, "loss": 0.0743, "step": 8538 }, { "epoch": 2.76701231367466, "grad_norm": 0.49048107862472534, "learning_rate": 1.573453085030291e-07, "loss": 0.0693, "step": 8539 }, { "epoch": 2.7673363577446533, "grad_norm": 0.5113756060600281, "learning_rate": 1.5691023680388162e-07, "loss": 0.0769, "step": 8540 }, { "epoch": 2.7676604018146467, "grad_norm": 0.49390682578086853, "learning_rate": 1.564757578520365e-07, "loss": 0.0727, "step": 8541 }, { "epoch": 2.7679844458846405, "grad_norm": 0.5174000263214111, "learning_rate": 1.5604187170066899e-07, "loss": 0.0784, "step": 8542 }, { "epoch": 2.768308489954634, "grad_norm": 0.5169310569763184, "learning_rate": 1.5560857840288434e-07, "loss": 0.0757, "step": 8543 }, { "epoch": 2.7686325340246274, "grad_norm": 0.48713600635528564, "learning_rate": 1.551758780117113e-07, "loss": 0.0686, "step": 8544 }, { "epoch": 2.768956578094621, "grad_norm": 0.45703360438346863, "learning_rate": 1.547437705801097e-07, "loss": 0.0671, "step": 8545 }, { "epoch": 2.7692806221646142, "grad_norm": 0.5233086347579956, "learning_rate": 1.5431225616096502e-07, "loss": 0.0724, "step": 8546 }, { "epoch": 2.769604666234608, "grad_norm": 0.5077247619628906, "learning_rate": 1.5388133480709e-07, "loss": 0.0733, "step": 8547 }, { "epoch": 2.7699287103046015, "grad_norm": 0.4755188822746277, "learning_rate": 1.5345100657122635e-07, "loss": 0.07, "step": 8548 }, { "epoch": 2.770252754374595, "grad_norm": 0.4951607584953308, "learning_rate": 1.5302127150603973e-07, "loss": 0.0671, "step": 8549 }, { "epoch": 2.7705767984445884, "grad_norm": 0.4756135642528534, "learning_rate": 1.525921296641286e-07, "loss": 0.0725, "step": 8550 }, { "epoch": 2.770900842514582, "grad_norm": 0.4744229316711426, "learning_rate": 1.5216358109801267e-07, "loss": 0.071, "step": 8551 }, { "epoch": 2.7712248865845757, "grad_norm": 0.5493775010108948, "learning_rate": 1.5173562586014546e-07, "loss": 0.082, "step": 8552 }, { "epoch": 2.771548930654569, "grad_norm": 0.510632336139679, "learning_rate": 1.5130826400290177e-07, "loss": 0.0707, "step": 8553 }, { "epoch": 2.7718729747245625, "grad_norm": 0.47911614179611206, "learning_rate": 1.5088149557858757e-07, "loss": 0.0736, "step": 8554 }, { "epoch": 2.772197018794556, "grad_norm": 0.5228129625320435, "learning_rate": 1.5045532063943547e-07, "loss": 0.0756, "step": 8555 }, { "epoch": 2.7725210628645494, "grad_norm": 0.5336810350418091, "learning_rate": 1.500297392376049e-07, "loss": 0.073, "step": 8556 }, { "epoch": 2.7728451069345432, "grad_norm": 0.4823746383190155, "learning_rate": 1.4960475142518306e-07, "loss": 0.069, "step": 8557 }, { "epoch": 2.7731691510045366, "grad_norm": 0.45536088943481445, "learning_rate": 1.4918035725418388e-07, "loss": 0.0651, "step": 8558 }, { "epoch": 2.77349319507453, "grad_norm": 0.5071476101875305, "learning_rate": 1.4875655677654976e-07, "loss": 0.0716, "step": 8559 }, { "epoch": 2.7738172391445235, "grad_norm": 0.48243248462677, "learning_rate": 1.4833335004414917e-07, "loss": 0.0684, "step": 8560 }, { "epoch": 2.774141283214517, "grad_norm": 0.4552246332168579, "learning_rate": 1.4791073710877846e-07, "loss": 0.0645, "step": 8561 }, { "epoch": 2.774465327284511, "grad_norm": 0.47968533635139465, "learning_rate": 1.474887180221618e-07, "loss": 0.0681, "step": 8562 }, { "epoch": 2.774789371354504, "grad_norm": 0.47891002893447876, "learning_rate": 1.4706729283595066e-07, "loss": 0.0713, "step": 8563 }, { "epoch": 2.7751134154244976, "grad_norm": 0.5036091804504395, "learning_rate": 1.4664646160172213e-07, "loss": 0.0707, "step": 8564 }, { "epoch": 2.7754374594944915, "grad_norm": 0.4715958833694458, "learning_rate": 1.4622622437098333e-07, "loss": 0.0694, "step": 8565 }, { "epoch": 2.775761503564485, "grad_norm": 0.5018330812454224, "learning_rate": 1.4580658119516589e-07, "loss": 0.0744, "step": 8566 }, { "epoch": 2.7760855476344783, "grad_norm": 0.5229835510253906, "learning_rate": 1.4538753212563095e-07, "loss": 0.0786, "step": 8567 }, { "epoch": 2.7764095917044718, "grad_norm": 0.4672481119632721, "learning_rate": 1.4496907721366583e-07, "loss": 0.0664, "step": 8568 }, { "epoch": 2.776733635774465, "grad_norm": 0.4925411641597748, "learning_rate": 1.445512165104851e-07, "loss": 0.0753, "step": 8569 }, { "epoch": 2.777057679844459, "grad_norm": 0.5130710601806641, "learning_rate": 1.441339500672312e-07, "loss": 0.0768, "step": 8570 }, { "epoch": 2.7773817239144525, "grad_norm": 0.4710983633995056, "learning_rate": 1.4371727793497325e-07, "loss": 0.0682, "step": 8571 }, { "epoch": 2.777705767984446, "grad_norm": 0.5014706254005432, "learning_rate": 1.433012001647083e-07, "loss": 0.075, "step": 8572 }, { "epoch": 2.7780298120544393, "grad_norm": 0.49718159437179565, "learning_rate": 1.428857168073594e-07, "loss": 0.0695, "step": 8573 }, { "epoch": 2.7783538561244328, "grad_norm": 0.4719870984554291, "learning_rate": 1.4247082791377932e-07, "loss": 0.0661, "step": 8574 }, { "epoch": 2.7786779001944266, "grad_norm": 0.47441691160202026, "learning_rate": 1.4205653353474404e-07, "loss": 0.0652, "step": 8575 }, { "epoch": 2.77900194426442, "grad_norm": 0.4929604232311249, "learning_rate": 1.4164283372096133e-07, "loss": 0.0721, "step": 8576 }, { "epoch": 2.7793259883344135, "grad_norm": 0.5177626013755798, "learning_rate": 1.4122972852306293e-07, "loss": 0.0765, "step": 8577 }, { "epoch": 2.779650032404407, "grad_norm": 0.5820122361183167, "learning_rate": 1.408172179916095e-07, "loss": 0.0749, "step": 8578 }, { "epoch": 2.7799740764744003, "grad_norm": 0.46356290578842163, "learning_rate": 1.4040530217708847e-07, "loss": 0.0668, "step": 8579 }, { "epoch": 2.780298120544394, "grad_norm": 0.5057440400123596, "learning_rate": 1.399939811299128e-07, "loss": 0.0726, "step": 8580 }, { "epoch": 2.7806221646143876, "grad_norm": 0.4511764645576477, "learning_rate": 1.3958325490042613e-07, "loss": 0.0619, "step": 8581 }, { "epoch": 2.780946208684381, "grad_norm": 0.49743250012397766, "learning_rate": 1.3917312353889601e-07, "loss": 0.0713, "step": 8582 }, { "epoch": 2.7812702527543745, "grad_norm": 0.48972567915916443, "learning_rate": 1.3876358709552006e-07, "loss": 0.0689, "step": 8583 }, { "epoch": 2.781594296824368, "grad_norm": 0.4920805096626282, "learning_rate": 1.383546456204199e-07, "loss": 0.071, "step": 8584 }, { "epoch": 2.7819183408943617, "grad_norm": 0.43759530782699585, "learning_rate": 1.3794629916364654e-07, "loss": 0.0649, "step": 8585 }, { "epoch": 2.782242384964355, "grad_norm": 0.48497021198272705, "learning_rate": 1.375385477751784e-07, "loss": 0.07, "step": 8586 }, { "epoch": 2.7825664290343486, "grad_norm": 0.4718039333820343, "learning_rate": 1.3713139150491938e-07, "loss": 0.069, "step": 8587 }, { "epoch": 2.7828904731043425, "grad_norm": 0.4685593545436859, "learning_rate": 1.3672483040270246e-07, "loss": 0.0701, "step": 8588 }, { "epoch": 2.7832145171743354, "grad_norm": 0.4797540307044983, "learning_rate": 1.3631886451828556e-07, "loss": 0.0709, "step": 8589 }, { "epoch": 2.7835385612443293, "grad_norm": 0.5307188034057617, "learning_rate": 1.3591349390135565e-07, "loss": 0.0765, "step": 8590 }, { "epoch": 2.7838626053143227, "grad_norm": 0.45155444741249084, "learning_rate": 1.355087186015258e-07, "loss": 0.0632, "step": 8591 }, { "epoch": 2.784186649384316, "grad_norm": 0.5059266686439514, "learning_rate": 1.351045386683375e-07, "loss": 0.0706, "step": 8592 }, { "epoch": 2.78451069345431, "grad_norm": 0.5184312462806702, "learning_rate": 1.3470095415125727e-07, "loss": 0.0681, "step": 8593 }, { "epoch": 2.7848347375243034, "grad_norm": 0.5246706604957581, "learning_rate": 1.3429796509968062e-07, "loss": 0.0808, "step": 8594 }, { "epoch": 2.785158781594297, "grad_norm": 0.4852238595485687, "learning_rate": 1.338955715629292e-07, "loss": 0.066, "step": 8595 }, { "epoch": 2.7854828256642903, "grad_norm": 0.5338213443756104, "learning_rate": 1.3349377359025307e-07, "loss": 0.0776, "step": 8596 }, { "epoch": 2.7858068697342837, "grad_norm": 0.5105713605880737, "learning_rate": 1.3309257123082674e-07, "loss": 0.0754, "step": 8597 }, { "epoch": 2.7861309138042776, "grad_norm": 0.5126920342445374, "learning_rate": 1.3269196453375544e-07, "loss": 0.0745, "step": 8598 }, { "epoch": 2.786454957874271, "grad_norm": 0.5031168460845947, "learning_rate": 1.3229195354806768e-07, "loss": 0.0731, "step": 8599 }, { "epoch": 2.7867790019442644, "grad_norm": 0.47806215286254883, "learning_rate": 1.3189253832272153e-07, "loss": 0.0687, "step": 8600 }, { "epoch": 2.787103046014258, "grad_norm": 0.5075884461402893, "learning_rate": 1.314937189066029e-07, "loss": 0.0731, "step": 8601 }, { "epoch": 2.7874270900842513, "grad_norm": 0.48211807012557983, "learning_rate": 1.310954953485205e-07, "loss": 0.0712, "step": 8602 }, { "epoch": 2.787751134154245, "grad_norm": 0.48329949378967285, "learning_rate": 1.3069786769721647e-07, "loss": 0.0757, "step": 8603 }, { "epoch": 2.7880751782242386, "grad_norm": 0.48496365547180176, "learning_rate": 1.3030083600135357e-07, "loss": 0.0723, "step": 8604 }, { "epoch": 2.788399222294232, "grad_norm": 0.4774136543273926, "learning_rate": 1.2990440030952732e-07, "loss": 0.0686, "step": 8605 }, { "epoch": 2.7887232663642254, "grad_norm": 0.5031121373176575, "learning_rate": 1.2950856067025507e-07, "loss": 0.0704, "step": 8606 }, { "epoch": 2.789047310434219, "grad_norm": 0.4636278450489044, "learning_rate": 1.2911331713198582e-07, "loss": 0.0641, "step": 8607 }, { "epoch": 2.7893713545042127, "grad_norm": 0.5307515263557434, "learning_rate": 1.2871866974309255e-07, "loss": 0.0775, "step": 8608 }, { "epoch": 2.789695398574206, "grad_norm": 0.5113701224327087, "learning_rate": 1.2832461855187605e-07, "loss": 0.0738, "step": 8609 }, { "epoch": 2.7900194426441995, "grad_norm": 0.4879836440086365, "learning_rate": 1.27931163606565e-07, "loss": 0.0701, "step": 8610 }, { "epoch": 2.790343486714193, "grad_norm": 0.5034375786781311, "learning_rate": 1.275383049553147e-07, "loss": 0.0712, "step": 8611 }, { "epoch": 2.7906675307841864, "grad_norm": 0.4796959459781647, "learning_rate": 1.2714604264620677e-07, "loss": 0.0658, "step": 8612 }, { "epoch": 2.7909915748541803, "grad_norm": 0.47571125626564026, "learning_rate": 1.2675437672724945e-07, "loss": 0.0678, "step": 8613 }, { "epoch": 2.7913156189241737, "grad_norm": 0.4868853688240051, "learning_rate": 1.2636330724638045e-07, "loss": 0.0706, "step": 8614 }, { "epoch": 2.791639662994167, "grad_norm": 0.47460609674453735, "learning_rate": 1.259728342514621e-07, "loss": 0.0689, "step": 8615 }, { "epoch": 2.791963707064161, "grad_norm": 0.4783695340156555, "learning_rate": 1.2558295779028452e-07, "loss": 0.0646, "step": 8616 }, { "epoch": 2.7922877511341544, "grad_norm": 0.4867151379585266, "learning_rate": 1.2519367791056502e-07, "loss": 0.0715, "step": 8617 }, { "epoch": 2.792611795204148, "grad_norm": 0.5441122055053711, "learning_rate": 1.248049946599472e-07, "loss": 0.0746, "step": 8618 }, { "epoch": 2.7929358392741412, "grad_norm": 0.533957839012146, "learning_rate": 1.244169080860025e-07, "loss": 0.0832, "step": 8619 }, { "epoch": 2.7932598833441347, "grad_norm": 0.4913591742515564, "learning_rate": 1.2402941823622948e-07, "loss": 0.0733, "step": 8620 }, { "epoch": 2.7935839274141285, "grad_norm": 0.49308526515960693, "learning_rate": 1.2364252515805252e-07, "loss": 0.0728, "step": 8621 }, { "epoch": 2.793907971484122, "grad_norm": 0.5083343386650085, "learning_rate": 1.2325622889882317e-07, "loss": 0.0724, "step": 8622 }, { "epoch": 2.7942320155541154, "grad_norm": 0.436282753944397, "learning_rate": 1.2287052950582134e-07, "loss": 0.0621, "step": 8623 }, { "epoch": 2.794556059624109, "grad_norm": 0.48184362053871155, "learning_rate": 1.224854270262521e-07, "loss": 0.071, "step": 8624 }, { "epoch": 2.7948801036941022, "grad_norm": 0.46548590064048767, "learning_rate": 1.2210092150724882e-07, "loss": 0.0691, "step": 8625 }, { "epoch": 2.795204147764096, "grad_norm": 0.5401601791381836, "learning_rate": 1.2171701299587058e-07, "loss": 0.0717, "step": 8626 }, { "epoch": 2.7955281918340895, "grad_norm": 0.48991283774375916, "learning_rate": 1.2133370153910528e-07, "loss": 0.0728, "step": 8627 }, { "epoch": 2.795852235904083, "grad_norm": 0.500863254070282, "learning_rate": 1.209509871838649e-07, "loss": 0.0693, "step": 8628 }, { "epoch": 2.7961762799740764, "grad_norm": 0.48141273856163025, "learning_rate": 1.2056886997699192e-07, "loss": 0.0686, "step": 8629 }, { "epoch": 2.79650032404407, "grad_norm": 0.4619733989238739, "learning_rate": 1.2018734996525173e-07, "loss": 0.0664, "step": 8630 }, { "epoch": 2.7968243681140637, "grad_norm": 0.4864651560783386, "learning_rate": 1.198064271953403e-07, "loss": 0.0694, "step": 8631 }, { "epoch": 2.797148412184057, "grad_norm": 0.5243645906448364, "learning_rate": 1.194261017138776e-07, "loss": 0.0721, "step": 8632 }, { "epoch": 2.7974724562540505, "grad_norm": 0.4923947751522064, "learning_rate": 1.19046373567413e-07, "loss": 0.0679, "step": 8633 }, { "epoch": 2.797796500324044, "grad_norm": 0.5136322975158691, "learning_rate": 1.1866724280242104e-07, "loss": 0.0743, "step": 8634 }, { "epoch": 2.7981205443940373, "grad_norm": 0.47235724329948425, "learning_rate": 1.1828870946530291e-07, "loss": 0.0711, "step": 8635 }, { "epoch": 2.798444588464031, "grad_norm": 0.5212631225585938, "learning_rate": 1.1791077360238879e-07, "loss": 0.0774, "step": 8636 }, { "epoch": 2.7987686325340246, "grad_norm": 0.4773974120616913, "learning_rate": 1.1753343525993277e-07, "loss": 0.0671, "step": 8637 }, { "epoch": 2.799092676604018, "grad_norm": 0.4766692519187927, "learning_rate": 1.1715669448411959e-07, "loss": 0.0715, "step": 8638 }, { "epoch": 2.799416720674012, "grad_norm": 0.4838842749595642, "learning_rate": 1.1678055132105682e-07, "loss": 0.0705, "step": 8639 }, { "epoch": 2.7997407647440054, "grad_norm": 0.47889941930770874, "learning_rate": 1.1640500581678093e-07, "loss": 0.0687, "step": 8640 }, { "epoch": 2.8000648088139988, "grad_norm": 0.49360138177871704, "learning_rate": 1.1603005801725575e-07, "loss": 0.0713, "step": 8641 }, { "epoch": 2.800388852883992, "grad_norm": 0.4900329113006592, "learning_rate": 1.1565570796837122e-07, "loss": 0.0688, "step": 8642 }, { "epoch": 2.8007128969539856, "grad_norm": 0.5061361789703369, "learning_rate": 1.1528195571594403e-07, "loss": 0.0794, "step": 8643 }, { "epoch": 2.8010369410239795, "grad_norm": 0.48438015580177307, "learning_rate": 1.1490880130571758e-07, "loss": 0.0728, "step": 8644 }, { "epoch": 2.801360985093973, "grad_norm": 0.49202558398246765, "learning_rate": 1.1453624478336256e-07, "loss": 0.0782, "step": 8645 }, { "epoch": 2.8016850291639663, "grad_norm": 0.4822075664997101, "learning_rate": 1.1416428619447583e-07, "loss": 0.0713, "step": 8646 }, { "epoch": 2.8020090732339598, "grad_norm": 0.4483577311038971, "learning_rate": 1.137929255845821e-07, "loss": 0.0669, "step": 8647 }, { "epoch": 2.802333117303953, "grad_norm": 0.42313840985298157, "learning_rate": 1.1342216299913222e-07, "loss": 0.0586, "step": 8648 }, { "epoch": 2.802657161373947, "grad_norm": 0.4894741177558899, "learning_rate": 1.1305199848350379e-07, "loss": 0.0657, "step": 8649 }, { "epoch": 2.8029812054439405, "grad_norm": 0.5330514311790466, "learning_rate": 1.1268243208300111e-07, "loss": 0.0761, "step": 8650 }, { "epoch": 2.803305249513934, "grad_norm": 0.4709966778755188, "learning_rate": 1.1231346384285691e-07, "loss": 0.068, "step": 8651 }, { "epoch": 2.8036292935839273, "grad_norm": 0.4735042452812195, "learning_rate": 1.1194509380822727e-07, "loss": 0.0711, "step": 8652 }, { "epoch": 2.8039533376539207, "grad_norm": 0.4827316105365753, "learning_rate": 1.1157732202419835e-07, "loss": 0.0709, "step": 8653 }, { "epoch": 2.8042773817239146, "grad_norm": 0.46700969338417053, "learning_rate": 1.1121014853578138e-07, "loss": 0.0688, "step": 8654 }, { "epoch": 2.804601425793908, "grad_norm": 0.4579877257347107, "learning_rate": 1.1084357338791541e-07, "loss": 0.0665, "step": 8655 }, { "epoch": 2.8049254698639015, "grad_norm": 0.5440908074378967, "learning_rate": 1.1047759662546564e-07, "loss": 0.0749, "step": 8656 }, { "epoch": 2.805249513933895, "grad_norm": 0.5029870867729187, "learning_rate": 1.1011221829322294e-07, "loss": 0.0701, "step": 8657 }, { "epoch": 2.8055735580038883, "grad_norm": 0.5622695088386536, "learning_rate": 1.0974743843590762e-07, "loss": 0.0785, "step": 8658 }, { "epoch": 2.805897602073882, "grad_norm": 0.4912731647491455, "learning_rate": 1.09383257098164e-07, "loss": 0.072, "step": 8659 }, { "epoch": 2.8062216461438756, "grad_norm": 0.4591299891471863, "learning_rate": 1.0901967432456583e-07, "loss": 0.0684, "step": 8660 }, { "epoch": 2.806545690213869, "grad_norm": 0.4708253741264343, "learning_rate": 1.0865669015961033e-07, "loss": 0.0672, "step": 8661 }, { "epoch": 2.806869734283863, "grad_norm": 0.5100660920143127, "learning_rate": 1.0829430464772417e-07, "loss": 0.0758, "step": 8662 }, { "epoch": 2.807193778353856, "grad_norm": 0.45392340421676636, "learning_rate": 1.0793251783325965e-07, "loss": 0.066, "step": 8663 }, { "epoch": 2.8075178224238497, "grad_norm": 0.5311992168426514, "learning_rate": 1.0757132976049634e-07, "loss": 0.0756, "step": 8664 }, { "epoch": 2.807841866493843, "grad_norm": 0.4701693058013916, "learning_rate": 1.0721074047364055e-07, "loss": 0.0646, "step": 8665 }, { "epoch": 2.8081659105638366, "grad_norm": 0.534783124923706, "learning_rate": 1.0685075001682255e-07, "loss": 0.0801, "step": 8666 }, { "epoch": 2.8084899546338304, "grad_norm": 0.45467719435691833, "learning_rate": 1.0649135843410485e-07, "loss": 0.0658, "step": 8667 }, { "epoch": 2.808813998703824, "grad_norm": 0.4908236861228943, "learning_rate": 1.0613256576947173e-07, "loss": 0.0693, "step": 8668 }, { "epoch": 2.8091380427738173, "grad_norm": 0.5250102281570435, "learning_rate": 1.0577437206683583e-07, "loss": 0.0786, "step": 8669 }, { "epoch": 2.8094620868438107, "grad_norm": 0.46399766206741333, "learning_rate": 1.0541677737003709e-07, "loss": 0.0696, "step": 8670 }, { "epoch": 2.809786130913804, "grad_norm": 0.4993619918823242, "learning_rate": 1.0505978172284214e-07, "loss": 0.0687, "step": 8671 }, { "epoch": 2.810110174983798, "grad_norm": 0.5269954800605774, "learning_rate": 1.0470338516894273e-07, "loss": 0.0733, "step": 8672 }, { "epoch": 2.8104342190537914, "grad_norm": 0.5174999237060547, "learning_rate": 1.0434758775195841e-07, "loss": 0.0782, "step": 8673 }, { "epoch": 2.810758263123785, "grad_norm": 0.527529776096344, "learning_rate": 1.0399238951543712e-07, "loss": 0.0787, "step": 8674 }, { "epoch": 2.8110823071937783, "grad_norm": 0.4615003168582916, "learning_rate": 1.0363779050284906e-07, "loss": 0.0614, "step": 8675 }, { "epoch": 2.8114063512637717, "grad_norm": 0.5183756947517395, "learning_rate": 1.0328379075759564e-07, "loss": 0.0749, "step": 8676 }, { "epoch": 2.8117303953337656, "grad_norm": 0.4968220293521881, "learning_rate": 1.0293039032300168e-07, "loss": 0.0711, "step": 8677 }, { "epoch": 2.812054439403759, "grad_norm": 0.4645611643791199, "learning_rate": 1.0257758924232142e-07, "loss": 0.0693, "step": 8678 }, { "epoch": 2.8123784834737524, "grad_norm": 0.5472391843795776, "learning_rate": 1.0222538755873313e-07, "loss": 0.0807, "step": 8679 }, { "epoch": 2.812702527543746, "grad_norm": 0.48376771807670593, "learning_rate": 1.0187378531534287e-07, "loss": 0.0707, "step": 8680 }, { "epoch": 2.8130265716137393, "grad_norm": 0.4975831210613251, "learning_rate": 1.0152278255518399e-07, "loss": 0.0709, "step": 8681 }, { "epoch": 2.813350615683733, "grad_norm": 0.5038923621177673, "learning_rate": 1.011723793212166e-07, "loss": 0.0711, "step": 8682 }, { "epoch": 2.8136746597537265, "grad_norm": 0.4740455150604248, "learning_rate": 1.0082257565632469e-07, "loss": 0.0704, "step": 8683 }, { "epoch": 2.81399870382372, "grad_norm": 0.4767991602420807, "learning_rate": 1.0047337160332182e-07, "loss": 0.0654, "step": 8684 }, { "epoch": 2.8143227478937134, "grad_norm": 0.49966102838516235, "learning_rate": 1.0012476720494713e-07, "loss": 0.0739, "step": 8685 }, { "epoch": 2.814646791963707, "grad_norm": 0.47208863496780396, "learning_rate": 9.977676250386647e-08, "loss": 0.0722, "step": 8686 }, { "epoch": 2.8149708360337007, "grad_norm": 0.4928867220878601, "learning_rate": 9.942935754267303e-08, "loss": 0.0684, "step": 8687 }, { "epoch": 2.815294880103694, "grad_norm": 0.5016983151435852, "learning_rate": 9.908255236388386e-08, "loss": 0.0723, "step": 8688 }, { "epoch": 2.8156189241736875, "grad_norm": 0.4868336319923401, "learning_rate": 9.873634700994671e-08, "loss": 0.0701, "step": 8689 }, { "epoch": 2.8159429682436814, "grad_norm": 0.4551834762096405, "learning_rate": 9.839074152323159e-08, "loss": 0.0671, "step": 8690 }, { "epoch": 2.816267012313675, "grad_norm": 0.4637860059738159, "learning_rate": 9.80457359460396e-08, "loss": 0.0712, "step": 8691 }, { "epoch": 2.8165910563836682, "grad_norm": 0.47512730956077576, "learning_rate": 9.770133032059481e-08, "loss": 0.066, "step": 8692 }, { "epoch": 2.8169151004536617, "grad_norm": 0.46657752990722656, "learning_rate": 9.735752468904846e-08, "loss": 0.0653, "step": 8693 }, { "epoch": 2.817239144523655, "grad_norm": 0.5024873614311218, "learning_rate": 9.70143190934808e-08, "loss": 0.0728, "step": 8694 }, { "epoch": 2.817563188593649, "grad_norm": 0.4788472652435303, "learning_rate": 9.667171357589489e-08, "loss": 0.0679, "step": 8695 }, { "epoch": 2.8178872326636424, "grad_norm": 0.4856061041355133, "learning_rate": 9.63297081782244e-08, "loss": 0.0702, "step": 8696 }, { "epoch": 2.818211276733636, "grad_norm": 0.47453123331069946, "learning_rate": 9.59883029423253e-08, "loss": 0.0659, "step": 8697 }, { "epoch": 2.8185353208036292, "grad_norm": 0.5024823546409607, "learning_rate": 9.564749790998473e-08, "loss": 0.076, "step": 8698 }, { "epoch": 2.8188593648736227, "grad_norm": 0.4959494173526764, "learning_rate": 9.530729312291153e-08, "loss": 0.0715, "step": 8699 }, { "epoch": 2.8191834089436165, "grad_norm": 0.500286340713501, "learning_rate": 9.496768862274519e-08, "loss": 0.0764, "step": 8700 }, { "epoch": 2.81950745301361, "grad_norm": 0.4606480598449707, "learning_rate": 9.462868445104912e-08, "loss": 0.0675, "step": 8701 }, { "epoch": 2.8198314970836034, "grad_norm": 0.4570009112358093, "learning_rate": 9.4290280649314e-08, "loss": 0.0682, "step": 8702 }, { "epoch": 2.820155541153597, "grad_norm": 0.46508264541625977, "learning_rate": 9.395247725895784e-08, "loss": 0.0662, "step": 8703 }, { "epoch": 2.82047958522359, "grad_norm": 0.46841660141944885, "learning_rate": 9.361527432132478e-08, "loss": 0.0668, "step": 8704 }, { "epoch": 2.820803629293584, "grad_norm": 0.5106972455978394, "learning_rate": 9.327867187768458e-08, "loss": 0.073, "step": 8705 }, { "epoch": 2.8211276733635775, "grad_norm": 0.484240859746933, "learning_rate": 9.294266996923373e-08, "loss": 0.0691, "step": 8706 }, { "epoch": 2.821451717433571, "grad_norm": 0.4603111147880554, "learning_rate": 9.260726863709601e-08, "loss": 0.0663, "step": 8707 }, { "epoch": 2.8217757615035644, "grad_norm": 0.5028695464134216, "learning_rate": 9.227246792232136e-08, "loss": 0.0706, "step": 8708 }, { "epoch": 2.8220998055735578, "grad_norm": 0.5131447911262512, "learning_rate": 9.193826786588645e-08, "loss": 0.0803, "step": 8709 }, { "epoch": 2.8224238496435516, "grad_norm": 0.4943181276321411, "learning_rate": 9.160466850869354e-08, "loss": 0.0728, "step": 8710 }, { "epoch": 2.822747893713545, "grad_norm": 0.49125006794929504, "learning_rate": 9.127166989157276e-08, "loss": 0.0719, "step": 8711 }, { "epoch": 2.8230719377835385, "grad_norm": 0.4617706537246704, "learning_rate": 9.093927205527875e-08, "loss": 0.0669, "step": 8712 }, { "epoch": 2.8233959818535324, "grad_norm": 0.5301855802536011, "learning_rate": 9.060747504049506e-08, "loss": 0.0729, "step": 8713 }, { "epoch": 2.8237200259235253, "grad_norm": 0.4995158314704895, "learning_rate": 9.02762788878292e-08, "loss": 0.0723, "step": 8714 }, { "epoch": 2.824044069993519, "grad_norm": 0.4507756233215332, "learning_rate": 8.994568363781764e-08, "loss": 0.0615, "step": 8715 }, { "epoch": 2.8243681140635126, "grad_norm": 0.47523167729377747, "learning_rate": 8.961568933092136e-08, "loss": 0.0694, "step": 8716 }, { "epoch": 2.824692158133506, "grad_norm": 0.48729458451271057, "learning_rate": 8.928629600752803e-08, "loss": 0.0704, "step": 8717 }, { "epoch": 2.8250162022035, "grad_norm": 0.5317953824996948, "learning_rate": 8.89575037079532e-08, "loss": 0.0749, "step": 8718 }, { "epoch": 2.8253402462734933, "grad_norm": 0.48030078411102295, "learning_rate": 8.862931247243689e-08, "loss": 0.0694, "step": 8719 }, { "epoch": 2.8256642903434868, "grad_norm": 0.4619617462158203, "learning_rate": 8.830172234114754e-08, "loss": 0.0645, "step": 8720 }, { "epoch": 2.82598833441348, "grad_norm": 0.4722529649734497, "learning_rate": 8.79747333541775e-08, "loss": 0.0697, "step": 8721 }, { "epoch": 2.8263123784834736, "grad_norm": 0.48667117953300476, "learning_rate": 8.764834555154867e-08, "loss": 0.0758, "step": 8722 }, { "epoch": 2.8266364225534675, "grad_norm": 0.49473825097084045, "learning_rate": 8.732255897320685e-08, "loss": 0.0708, "step": 8723 }, { "epoch": 2.826960466623461, "grad_norm": 0.5213518738746643, "learning_rate": 8.699737365902572e-08, "loss": 0.0742, "step": 8724 }, { "epoch": 2.8272845106934543, "grad_norm": 0.5019205808639526, "learning_rate": 8.667278964880398e-08, "loss": 0.0739, "step": 8725 }, { "epoch": 2.8276085547634477, "grad_norm": 0.5222662091255188, "learning_rate": 8.634880698226877e-08, "loss": 0.0747, "step": 8726 }, { "epoch": 2.827932598833441, "grad_norm": 0.4360904395580292, "learning_rate": 8.602542569907168e-08, "loss": 0.062, "step": 8727 }, { "epoch": 2.828256642903435, "grad_norm": 0.4444887936115265, "learning_rate": 8.570264583879052e-08, "loss": 0.0649, "step": 8728 }, { "epoch": 2.8285806869734285, "grad_norm": 0.48832395672798157, "learning_rate": 8.538046744093253e-08, "loss": 0.0709, "step": 8729 }, { "epoch": 2.828904731043422, "grad_norm": 0.46723636984825134, "learning_rate": 8.505889054492789e-08, "loss": 0.0675, "step": 8730 }, { "epoch": 2.8292287751134153, "grad_norm": 0.5746234655380249, "learning_rate": 8.473791519013453e-08, "loss": 0.0764, "step": 8731 }, { "epoch": 2.8295528191834087, "grad_norm": 0.4938589632511139, "learning_rate": 8.441754141583714e-08, "loss": 0.0716, "step": 8732 }, { "epoch": 2.8298768632534026, "grad_norm": 0.5008729696273804, "learning_rate": 8.409776926124546e-08, "loss": 0.0666, "step": 8733 }, { "epoch": 2.830200907323396, "grad_norm": 0.4394344985485077, "learning_rate": 8.377859876549821e-08, "loss": 0.0642, "step": 8734 }, { "epoch": 2.8305249513933894, "grad_norm": 0.447689414024353, "learning_rate": 8.346002996765745e-08, "loss": 0.0667, "step": 8735 }, { "epoch": 2.830848995463383, "grad_norm": 0.5375357866287231, "learning_rate": 8.314206290671256e-08, "loss": 0.0815, "step": 8736 }, { "epoch": 2.8311730395333763, "grad_norm": 0.5031763911247253, "learning_rate": 8.28246976215813e-08, "loss": 0.0695, "step": 8737 }, { "epoch": 2.83149708360337, "grad_norm": 0.542471706867218, "learning_rate": 8.250793415110426e-08, "loss": 0.0764, "step": 8738 }, { "epoch": 2.8318211276733636, "grad_norm": 0.5349651575088501, "learning_rate": 8.219177253405153e-08, "loss": 0.0751, "step": 8739 }, { "epoch": 2.832145171743357, "grad_norm": 0.46823248267173767, "learning_rate": 8.187621280911773e-08, "loss": 0.0685, "step": 8740 }, { "epoch": 2.832469215813351, "grad_norm": 0.4961474537849426, "learning_rate": 8.156125501492417e-08, "loss": 0.0707, "step": 8741 }, { "epoch": 2.8327932598833443, "grad_norm": 0.48361262679100037, "learning_rate": 8.124689919001894e-08, "loss": 0.0721, "step": 8742 }, { "epoch": 2.8331173039533377, "grad_norm": 0.5136740207672119, "learning_rate": 8.093314537287567e-08, "loss": 0.0777, "step": 8743 }, { "epoch": 2.833441348023331, "grad_norm": 0.5095680356025696, "learning_rate": 8.061999360189587e-08, "loss": 0.0742, "step": 8744 }, { "epoch": 2.8337653920933246, "grad_norm": 0.4728745222091675, "learning_rate": 8.030744391540501e-08, "loss": 0.0709, "step": 8745 }, { "epoch": 2.8340894361633184, "grad_norm": 0.48748818039894104, "learning_rate": 7.999549635165693e-08, "loss": 0.0705, "step": 8746 }, { "epoch": 2.834413480233312, "grad_norm": 0.4940890967845917, "learning_rate": 7.968415094883109e-08, "loss": 0.0704, "step": 8747 }, { "epoch": 2.8347375243033053, "grad_norm": 0.5177623629570007, "learning_rate": 7.9373407745032e-08, "loss": 0.0746, "step": 8748 }, { "epoch": 2.8350615683732987, "grad_norm": 0.4558980464935303, "learning_rate": 7.906326677829312e-08, "loss": 0.067, "step": 8749 }, { "epoch": 2.835385612443292, "grad_norm": 0.4997522532939911, "learning_rate": 7.875372808657189e-08, "loss": 0.0719, "step": 8750 }, { "epoch": 2.835709656513286, "grad_norm": 0.4721794128417969, "learning_rate": 7.844479170775299e-08, "loss": 0.0701, "step": 8751 }, { "epoch": 2.8360337005832794, "grad_norm": 0.4765371084213257, "learning_rate": 7.813645767964673e-08, "loss": 0.0708, "step": 8752 }, { "epoch": 2.836357744653273, "grad_norm": 0.4879765212535858, "learning_rate": 7.782872603999126e-08, "loss": 0.0728, "step": 8753 }, { "epoch": 2.8366817887232663, "grad_norm": 0.5061075687408447, "learning_rate": 7.752159682644921e-08, "loss": 0.0681, "step": 8754 }, { "epoch": 2.8370058327932597, "grad_norm": 0.46989887952804565, "learning_rate": 7.721507007661055e-08, "loss": 0.0699, "step": 8755 }, { "epoch": 2.8373298768632536, "grad_norm": 0.47777748107910156, "learning_rate": 7.69091458279908e-08, "loss": 0.0679, "step": 8756 }, { "epoch": 2.837653920933247, "grad_norm": 0.46506989002227783, "learning_rate": 7.66038241180328e-08, "loss": 0.0642, "step": 8757 }, { "epoch": 2.8379779650032404, "grad_norm": 0.4758833050727844, "learning_rate": 7.629910498410442e-08, "loss": 0.0701, "step": 8758 }, { "epoch": 2.838302009073234, "grad_norm": 0.4828174114227295, "learning_rate": 7.599498846350029e-08, "loss": 0.0741, "step": 8759 }, { "epoch": 2.8386260531432272, "grad_norm": 0.4962548613548279, "learning_rate": 7.569147459344172e-08, "loss": 0.0694, "step": 8760 }, { "epoch": 2.838950097213221, "grad_norm": 0.5564802885055542, "learning_rate": 7.53885634110757e-08, "loss": 0.0823, "step": 8761 }, { "epoch": 2.8392741412832145, "grad_norm": 0.46635982394218445, "learning_rate": 7.508625495347533e-08, "loss": 0.0692, "step": 8762 }, { "epoch": 2.839598185353208, "grad_norm": 0.49309298396110535, "learning_rate": 7.478454925764045e-08, "loss": 0.0712, "step": 8763 }, { "epoch": 2.839922229423202, "grad_norm": 0.5187074542045593, "learning_rate": 7.448344636049709e-08, "loss": 0.0742, "step": 8764 }, { "epoch": 2.840246273493195, "grad_norm": 0.4811423718929291, "learning_rate": 7.418294629889744e-08, "loss": 0.0702, "step": 8765 }, { "epoch": 2.8405703175631887, "grad_norm": 0.5099913477897644, "learning_rate": 7.388304910961985e-08, "loss": 0.0753, "step": 8766 }, { "epoch": 2.840894361633182, "grad_norm": 0.5018423199653625, "learning_rate": 7.358375482936719e-08, "loss": 0.0684, "step": 8767 }, { "epoch": 2.8412184057031755, "grad_norm": 0.49818873405456543, "learning_rate": 7.328506349477294e-08, "loss": 0.0726, "step": 8768 }, { "epoch": 2.8415424497731694, "grad_norm": 0.47550129890441895, "learning_rate": 7.298697514239228e-08, "loss": 0.0677, "step": 8769 }, { "epoch": 2.841866493843163, "grad_norm": 0.4928217828273773, "learning_rate": 7.268948980870826e-08, "loss": 0.0699, "step": 8770 }, { "epoch": 2.8421905379131562, "grad_norm": 0.46691080927848816, "learning_rate": 7.239260753013067e-08, "loss": 0.0693, "step": 8771 }, { "epoch": 2.8425145819831497, "grad_norm": 0.49736487865448, "learning_rate": 7.20963283429954e-08, "loss": 0.0737, "step": 8772 }, { "epoch": 2.842838626053143, "grad_norm": 0.5066372752189636, "learning_rate": 7.180065228356347e-08, "loss": 0.0771, "step": 8773 }, { "epoch": 2.843162670123137, "grad_norm": 0.47119608521461487, "learning_rate": 7.15055793880226e-08, "loss": 0.0709, "step": 8774 }, { "epoch": 2.8434867141931304, "grad_norm": 0.4811091125011444, "learning_rate": 7.121110969248834e-08, "loss": 0.0706, "step": 8775 }, { "epoch": 2.843810758263124, "grad_norm": 0.5038498044013977, "learning_rate": 7.091724323299853e-08, "loss": 0.0695, "step": 8776 }, { "epoch": 2.844134802333117, "grad_norm": 0.5106398463249207, "learning_rate": 7.062398004552218e-08, "loss": 0.0789, "step": 8777 }, { "epoch": 2.8444588464031106, "grad_norm": 0.46302106976509094, "learning_rate": 7.033132016595001e-08, "loss": 0.0657, "step": 8778 }, { "epoch": 2.8447828904731045, "grad_norm": 0.46196722984313965, "learning_rate": 7.003926363010116e-08, "loss": 0.0669, "step": 8779 }, { "epoch": 2.845106934543098, "grad_norm": 0.48582008481025696, "learning_rate": 6.974781047372148e-08, "loss": 0.071, "step": 8780 }, { "epoch": 2.8454309786130914, "grad_norm": 0.4900462329387665, "learning_rate": 6.945696073248077e-08, "loss": 0.0735, "step": 8781 }, { "epoch": 2.845755022683085, "grad_norm": 0.5111523270606995, "learning_rate": 6.916671444197665e-08, "loss": 0.0725, "step": 8782 }, { "epoch": 2.846079066753078, "grad_norm": 0.44137054681777954, "learning_rate": 6.887707163773238e-08, "loss": 0.0596, "step": 8783 }, { "epoch": 2.846403110823072, "grad_norm": 0.4662524163722992, "learning_rate": 6.858803235519795e-08, "loss": 0.0694, "step": 8784 }, { "epoch": 2.8467271548930655, "grad_norm": 0.5039228200912476, "learning_rate": 6.829959662974839e-08, "loss": 0.074, "step": 8785 }, { "epoch": 2.847051198963059, "grad_norm": 0.47388955950737, "learning_rate": 6.8011764496686e-08, "loss": 0.0736, "step": 8786 }, { "epoch": 2.8473752430330523, "grad_norm": 0.5250383019447327, "learning_rate": 6.772453599123763e-08, "loss": 0.0734, "step": 8787 }, { "epoch": 2.8476992871030458, "grad_norm": 0.49519991874694824, "learning_rate": 6.743791114855847e-08, "loss": 0.0732, "step": 8788 }, { "epoch": 2.8480233311730396, "grad_norm": 0.45928895473480225, "learning_rate": 6.715189000372768e-08, "loss": 0.066, "step": 8789 }, { "epoch": 2.848347375243033, "grad_norm": 0.4567415714263916, "learning_rate": 6.686647259175227e-08, "loss": 0.0629, "step": 8790 }, { "epoch": 2.8486714193130265, "grad_norm": 0.4983983039855957, "learning_rate": 6.65816589475643e-08, "loss": 0.0717, "step": 8791 }, { "epoch": 2.8489954633830203, "grad_norm": 0.5043929219245911, "learning_rate": 6.629744910602142e-08, "loss": 0.0747, "step": 8792 }, { "epoch": 2.8493195074530138, "grad_norm": 0.45701977610588074, "learning_rate": 6.601384310190917e-08, "loss": 0.0683, "step": 8793 }, { "epoch": 2.849643551523007, "grad_norm": 0.48810067772865295, "learning_rate": 6.573084096993809e-08, "loss": 0.0705, "step": 8794 }, { "epoch": 2.8499675955930006, "grad_norm": 0.4656825661659241, "learning_rate": 6.544844274474438e-08, "loss": 0.0664, "step": 8795 }, { "epoch": 2.850291639662994, "grad_norm": 0.4611987769603729, "learning_rate": 6.516664846089094e-08, "loss": 0.0651, "step": 8796 }, { "epoch": 2.850615683732988, "grad_norm": 0.5130912065505981, "learning_rate": 6.488545815286739e-08, "loss": 0.0766, "step": 8797 }, { "epoch": 2.8509397278029813, "grad_norm": 0.49199214577674866, "learning_rate": 6.460487185508735e-08, "loss": 0.0715, "step": 8798 }, { "epoch": 2.8512637718729748, "grad_norm": 0.4907855987548828, "learning_rate": 6.432488960189331e-08, "loss": 0.0737, "step": 8799 }, { "epoch": 2.851587815942968, "grad_norm": 0.47007766366004944, "learning_rate": 6.404551142755178e-08, "loss": 0.0661, "step": 8800 }, { "epoch": 2.8519118600129616, "grad_norm": 0.49704840779304504, "learning_rate": 6.376673736625538e-08, "loss": 0.0734, "step": 8801 }, { "epoch": 2.8522359040829555, "grad_norm": 0.4636077284812927, "learning_rate": 6.348856745212461e-08, "loss": 0.0691, "step": 8802 }, { "epoch": 2.852559948152949, "grad_norm": 0.46470972895622253, "learning_rate": 6.321100171920335e-08, "loss": 0.0712, "step": 8803 }, { "epoch": 2.8528839922229423, "grad_norm": 0.47441092133522034, "learning_rate": 6.293404020146443e-08, "loss": 0.0694, "step": 8804 }, { "epoch": 2.8532080362929357, "grad_norm": 0.4719110131263733, "learning_rate": 6.265768293280349e-08, "loss": 0.0698, "step": 8805 }, { "epoch": 2.853532080362929, "grad_norm": 0.4885626435279846, "learning_rate": 6.238192994704573e-08, "loss": 0.0672, "step": 8806 }, { "epoch": 2.853856124432923, "grad_norm": 0.5021069049835205, "learning_rate": 6.210678127793912e-08, "loss": 0.0729, "step": 8807 }, { "epoch": 2.8541801685029164, "grad_norm": 0.5236930847167969, "learning_rate": 6.183223695916119e-08, "loss": 0.0786, "step": 8808 }, { "epoch": 2.85450421257291, "grad_norm": 0.4771360754966736, "learning_rate": 6.15582970243117e-08, "loss": 0.0678, "step": 8809 }, { "epoch": 2.8548282566429033, "grad_norm": 0.48816344141960144, "learning_rate": 6.128496150691832e-08, "loss": 0.0706, "step": 8810 }, { "epoch": 2.8551523007128967, "grad_norm": 0.5284494757652283, "learning_rate": 6.101223044043592e-08, "loss": 0.0735, "step": 8811 }, { "epoch": 2.8554763447828906, "grad_norm": 0.5202637910842896, "learning_rate": 6.074010385824281e-08, "loss": 0.0767, "step": 8812 }, { "epoch": 2.855800388852884, "grad_norm": 0.49742886424064636, "learning_rate": 6.046858179364568e-08, "loss": 0.0739, "step": 8813 }, { "epoch": 2.8561244329228774, "grad_norm": 0.5342046618461609, "learning_rate": 6.019766427987572e-08, "loss": 0.0672, "step": 8814 }, { "epoch": 2.8564484769928713, "grad_norm": 0.4571148753166199, "learning_rate": 5.992735135009087e-08, "loss": 0.0674, "step": 8815 }, { "epoch": 2.8567725210628643, "grad_norm": 0.5210033655166626, "learning_rate": 5.965764303737409e-08, "loss": 0.074, "step": 8816 }, { "epoch": 2.857096565132858, "grad_norm": 0.4892861247062683, "learning_rate": 5.938853937473565e-08, "loss": 0.0715, "step": 8817 }, { "epoch": 2.8574206092028516, "grad_norm": 0.5085569024085999, "learning_rate": 5.912004039511143e-08, "loss": 0.0727, "step": 8818 }, { "epoch": 2.857744653272845, "grad_norm": 0.5078155398368835, "learning_rate": 5.8852146131362366e-08, "loss": 0.0724, "step": 8819 }, { "epoch": 2.858068697342839, "grad_norm": 0.49197226762771606, "learning_rate": 5.858485661627722e-08, "loss": 0.0717, "step": 8820 }, { "epoch": 2.8583927414128323, "grad_norm": 0.4717177450656891, "learning_rate": 5.831817188256872e-08, "loss": 0.0679, "step": 8821 }, { "epoch": 2.8587167854828257, "grad_norm": 0.5008045434951782, "learning_rate": 5.805209196287687e-08, "loss": 0.0718, "step": 8822 }, { "epoch": 2.859040829552819, "grad_norm": 0.49238985776901245, "learning_rate": 5.778661688976728e-08, "loss": 0.0708, "step": 8823 }, { "epoch": 2.8593648736228126, "grad_norm": 0.4850955605506897, "learning_rate": 5.7521746695731186e-08, "loss": 0.0706, "step": 8824 }, { "epoch": 2.8596889176928064, "grad_norm": 0.4857136309146881, "learning_rate": 5.725748141318654e-08, "loss": 0.0705, "step": 8825 }, { "epoch": 2.8600129617628, "grad_norm": 0.497732549905777, "learning_rate": 5.699382107447637e-08, "loss": 0.0738, "step": 8826 }, { "epoch": 2.8603370058327933, "grad_norm": 0.4750259816646576, "learning_rate": 5.6730765711870975e-08, "loss": 0.0672, "step": 8827 }, { "epoch": 2.8606610499027867, "grad_norm": 0.4785563051700592, "learning_rate": 5.646831535756569e-08, "loss": 0.0699, "step": 8828 }, { "epoch": 2.86098509397278, "grad_norm": 0.4761826992034912, "learning_rate": 5.620647004368041e-08, "loss": 0.0663, "step": 8829 }, { "epoch": 2.861309138042774, "grad_norm": 0.46488818526268005, "learning_rate": 5.594522980226447e-08, "loss": 0.0659, "step": 8830 }, { "epoch": 2.8616331821127674, "grad_norm": 0.5118567943572998, "learning_rate": 5.568459466529008e-08, "loss": 0.0744, "step": 8831 }, { "epoch": 2.861957226182761, "grad_norm": 0.5114924907684326, "learning_rate": 5.542456466465618e-08, "loss": 0.079, "step": 8832 }, { "epoch": 2.8622812702527543, "grad_norm": 0.4875848889350891, "learning_rate": 5.516513983218841e-08, "loss": 0.0714, "step": 8833 }, { "epoch": 2.8626053143227477, "grad_norm": 0.44935011863708496, "learning_rate": 5.490632019963804e-08, "loss": 0.0645, "step": 8834 }, { "epoch": 2.8629293583927415, "grad_norm": 0.5156768560409546, "learning_rate": 5.464810579868196e-08, "loss": 0.0719, "step": 8835 }, { "epoch": 2.863253402462735, "grad_norm": 0.5234912037849426, "learning_rate": 5.439049666092266e-08, "loss": 0.0735, "step": 8836 }, { "epoch": 2.8635774465327284, "grad_norm": 0.5177478194236755, "learning_rate": 5.4133492817889935e-08, "loss": 0.0749, "step": 8837 }, { "epoch": 2.863901490602722, "grad_norm": 0.47083693742752075, "learning_rate": 5.387709430103749e-08, "loss": 0.0665, "step": 8838 }, { "epoch": 2.8642255346727152, "grad_norm": 0.4397895038127899, "learning_rate": 5.362130114174691e-08, "loss": 0.0616, "step": 8839 }, { "epoch": 2.864549578742709, "grad_norm": 0.512173056602478, "learning_rate": 5.3366113371324245e-08, "loss": 0.0781, "step": 8840 }, { "epoch": 2.8648736228127025, "grad_norm": 0.4663113057613373, "learning_rate": 5.311153102100175e-08, "loss": 0.0685, "step": 8841 }, { "epoch": 2.865197666882696, "grad_norm": 0.4491751492023468, "learning_rate": 5.2857554121938935e-08, "loss": 0.0675, "step": 8842 }, { "epoch": 2.86552171095269, "grad_norm": 0.49800777435302734, "learning_rate": 5.2604182705219274e-08, "loss": 0.0752, "step": 8843 }, { "epoch": 2.8658457550226832, "grad_norm": 0.46831420063972473, "learning_rate": 5.235141680185296e-08, "loss": 0.0712, "step": 8844 }, { "epoch": 2.8661697990926767, "grad_norm": 0.4684108793735504, "learning_rate": 5.209925644277636e-08, "loss": 0.068, "step": 8845 }, { "epoch": 2.86649384316267, "grad_norm": 0.5100395083427429, "learning_rate": 5.1847701658851445e-08, "loss": 0.0746, "step": 8846 }, { "epoch": 2.8668178872326635, "grad_norm": 0.5008055567741394, "learning_rate": 5.159675248086582e-08, "loss": 0.0747, "step": 8847 }, { "epoch": 2.8671419313026574, "grad_norm": 0.48468106985092163, "learning_rate": 5.1346408939533795e-08, "loss": 0.0746, "step": 8848 }, { "epoch": 2.867465975372651, "grad_norm": 0.5062259435653687, "learning_rate": 5.109667106549421e-08, "loss": 0.0732, "step": 8849 }, { "epoch": 2.8677900194426442, "grad_norm": 0.5236890316009521, "learning_rate": 5.084753888931315e-08, "loss": 0.0749, "step": 8850 }, { "epoch": 2.8681140635126376, "grad_norm": 0.46542856097221375, "learning_rate": 5.05990124414818e-08, "loss": 0.0693, "step": 8851 }, { "epoch": 2.868438107582631, "grad_norm": 0.4806134104728699, "learning_rate": 5.035109175241748e-08, "loss": 0.0691, "step": 8852 }, { "epoch": 2.868762151652625, "grad_norm": 0.486376017332077, "learning_rate": 5.010377685246315e-08, "loss": 0.0695, "step": 8853 }, { "epoch": 2.8690861957226184, "grad_norm": 0.47897252440452576, "learning_rate": 4.985706777188792e-08, "loss": 0.0672, "step": 8854 }, { "epoch": 2.869410239792612, "grad_norm": 0.4908261299133301, "learning_rate": 4.961096454088654e-08, "loss": 0.0729, "step": 8855 }, { "epoch": 2.869734283862605, "grad_norm": 0.5017130970954895, "learning_rate": 4.936546718957935e-08, "loss": 0.0742, "step": 8856 }, { "epoch": 2.8700583279325986, "grad_norm": 0.5411863923072815, "learning_rate": 4.912057574801343e-08, "loss": 0.0796, "step": 8857 }, { "epoch": 2.8703823720025925, "grad_norm": 0.5324139595031738, "learning_rate": 4.887629024616036e-08, "loss": 0.0675, "step": 8858 }, { "epoch": 2.870706416072586, "grad_norm": 0.4880392551422119, "learning_rate": 4.863261071391956e-08, "loss": 0.0703, "step": 8859 }, { "epoch": 2.8710304601425793, "grad_norm": 0.5131406784057617, "learning_rate": 4.838953718111328e-08, "loss": 0.0708, "step": 8860 }, { "epoch": 2.8713545042125728, "grad_norm": 0.4706338942050934, "learning_rate": 4.8147069677493274e-08, "loss": 0.0668, "step": 8861 }, { "epoch": 2.871678548282566, "grad_norm": 0.518531084060669, "learning_rate": 4.790520823273359e-08, "loss": 0.0758, "step": 8862 }, { "epoch": 2.87200259235256, "grad_norm": 0.520061194896698, "learning_rate": 4.766395287643666e-08, "loss": 0.0714, "step": 8863 }, { "epoch": 2.8723266364225535, "grad_norm": 0.4812447130680084, "learning_rate": 4.742330363812997e-08, "loss": 0.0704, "step": 8864 }, { "epoch": 2.872650680492547, "grad_norm": 0.4664628803730011, "learning_rate": 4.718326054726552e-08, "loss": 0.0707, "step": 8865 }, { "epoch": 2.8729747245625408, "grad_norm": 0.48417264223098755, "learning_rate": 4.694382363322369e-08, "loss": 0.0739, "step": 8866 }, { "epoch": 2.8732987686325338, "grad_norm": 0.45417407155036926, "learning_rate": 4.670499292530828e-08, "loss": 0.0672, "step": 8867 }, { "epoch": 2.8736228127025276, "grad_norm": 0.5107606649398804, "learning_rate": 4.6466768452750334e-08, "loss": 0.0746, "step": 8868 }, { "epoch": 2.873946856772521, "grad_norm": 0.5404432415962219, "learning_rate": 4.622915024470542e-08, "loss": 0.076, "step": 8869 }, { "epoch": 2.8742709008425145, "grad_norm": 0.4897291660308838, "learning_rate": 4.5992138330256396e-08, "loss": 0.0709, "step": 8870 }, { "epoch": 2.8745949449125083, "grad_norm": 0.5036388039588928, "learning_rate": 4.5755732738411715e-08, "loss": 0.0706, "step": 8871 }, { "epoch": 2.8749189889825018, "grad_norm": 0.4615165889263153, "learning_rate": 4.5519933498103795e-08, "loss": 0.0632, "step": 8872 }, { "epoch": 2.875243033052495, "grad_norm": 0.513347327709198, "learning_rate": 4.5284740638193435e-08, "loss": 0.0761, "step": 8873 }, { "epoch": 2.8755670771224886, "grad_norm": 0.512653648853302, "learning_rate": 4.505015418746539e-08, "loss": 0.0733, "step": 8874 }, { "epoch": 2.875891121192482, "grad_norm": 0.486761212348938, "learning_rate": 4.481617417463113e-08, "loss": 0.0726, "step": 8875 }, { "epoch": 2.876215165262476, "grad_norm": 0.49732506275177, "learning_rate": 4.458280062832665e-08, "loss": 0.0755, "step": 8876 }, { "epoch": 2.8765392093324693, "grad_norm": 0.47248610854148865, "learning_rate": 4.435003357711576e-08, "loss": 0.0706, "step": 8877 }, { "epoch": 2.8768632534024627, "grad_norm": 0.45865169167518616, "learning_rate": 4.411787304948567e-08, "loss": 0.0688, "step": 8878 }, { "epoch": 2.877187297472456, "grad_norm": 0.4855770766735077, "learning_rate": 4.388631907385199e-08, "loss": 0.0696, "step": 8879 }, { "epoch": 2.8775113415424496, "grad_norm": 0.5271292924880981, "learning_rate": 4.365537167855371e-08, "loss": 0.0724, "step": 8880 }, { "epoch": 2.8778353856124435, "grad_norm": 0.4924415349960327, "learning_rate": 4.342503089185657e-08, "loss": 0.0706, "step": 8881 }, { "epoch": 2.878159429682437, "grad_norm": 0.5239536762237549, "learning_rate": 4.319529674195244e-08, "loss": 0.0747, "step": 8882 }, { "epoch": 2.8784834737524303, "grad_norm": 0.5411633849143982, "learning_rate": 4.296616925695829e-08, "loss": 0.0772, "step": 8883 }, { "epoch": 2.8788075178224237, "grad_norm": 0.49615591764450073, "learning_rate": 4.2737648464917236e-08, "loss": 0.066, "step": 8884 }, { "epoch": 2.879131561892417, "grad_norm": 0.4745228588581085, "learning_rate": 4.250973439379858e-08, "loss": 0.0678, "step": 8885 }, { "epoch": 2.879455605962411, "grad_norm": 0.5283622741699219, "learning_rate": 4.2282427071495545e-08, "loss": 0.0759, "step": 8886 }, { "epoch": 2.8797796500324044, "grad_norm": 0.5028887391090393, "learning_rate": 4.2055726525829234e-08, "loss": 0.0761, "step": 8887 }, { "epoch": 2.880103694102398, "grad_norm": 0.4696975648403168, "learning_rate": 4.1829632784545216e-08, "loss": 0.0653, "step": 8888 }, { "epoch": 2.8804277381723913, "grad_norm": 0.45641934871673584, "learning_rate": 4.160414587531525e-08, "loss": 0.0637, "step": 8889 }, { "epoch": 2.8807517822423847, "grad_norm": 0.4784345030784607, "learning_rate": 4.137926582573726e-08, "loss": 0.0674, "step": 8890 }, { "epoch": 2.8810758263123786, "grad_norm": 0.4932958781719208, "learning_rate": 4.1154992663333674e-08, "loss": 0.0719, "step": 8891 }, { "epoch": 2.881399870382372, "grad_norm": 0.4853772521018982, "learning_rate": 4.093132641555364e-08, "loss": 0.0695, "step": 8892 }, { "epoch": 2.8817239144523654, "grad_norm": 0.4972860515117645, "learning_rate": 4.0708267109771935e-08, "loss": 0.0758, "step": 8893 }, { "epoch": 2.8820479585223593, "grad_norm": 0.4952300190925598, "learning_rate": 4.048581477328839e-08, "loss": 0.067, "step": 8894 }, { "epoch": 2.8823720025923527, "grad_norm": 0.4682835638523102, "learning_rate": 4.026396943332955e-08, "loss": 0.0673, "step": 8895 }, { "epoch": 2.882696046662346, "grad_norm": 0.4913991093635559, "learning_rate": 4.004273111704704e-08, "loss": 0.0758, "step": 8896 }, { "epoch": 2.8830200907323396, "grad_norm": 0.47748857736587524, "learning_rate": 3.982209985151753e-08, "loss": 0.0676, "step": 8897 }, { "epoch": 2.883344134802333, "grad_norm": 0.4662216007709503, "learning_rate": 3.9602075663744964e-08, "loss": 0.0677, "step": 8898 }, { "epoch": 2.883668178872327, "grad_norm": 0.4768160581588745, "learning_rate": 3.938265858065837e-08, "loss": 0.0682, "step": 8899 }, { "epoch": 2.8839922229423203, "grad_norm": 0.4909741282463074, "learning_rate": 3.916384862911182e-08, "loss": 0.0725, "step": 8900 }, { "epoch": 2.8843162670123137, "grad_norm": 0.5086570978164673, "learning_rate": 3.8945645835885556e-08, "loss": 0.0754, "step": 8901 }, { "epoch": 2.884640311082307, "grad_norm": 0.4623122215270996, "learning_rate": 3.872805022768489e-08, "loss": 0.0677, "step": 8902 }, { "epoch": 2.8849643551523005, "grad_norm": 0.487517774105072, "learning_rate": 3.8511061831142394e-08, "loss": 0.0717, "step": 8903 }, { "epoch": 2.8852883992222944, "grad_norm": 0.4888652563095093, "learning_rate": 3.829468067281517e-08, "loss": 0.0725, "step": 8904 }, { "epoch": 2.885612443292288, "grad_norm": 0.4961828291416168, "learning_rate": 3.807890677918591e-08, "loss": 0.0769, "step": 8905 }, { "epoch": 2.8859364873622813, "grad_norm": 0.48514750599861145, "learning_rate": 3.78637401766635e-08, "loss": 0.0725, "step": 8906 }, { "epoch": 2.8862605314322747, "grad_norm": 0.4731883704662323, "learning_rate": 3.764918089158187e-08, "loss": 0.0716, "step": 8907 }, { "epoch": 2.886584575502268, "grad_norm": 0.5104616284370422, "learning_rate": 3.743522895020168e-08, "loss": 0.0723, "step": 8908 }, { "epoch": 2.886908619572262, "grad_norm": 0.4929802119731903, "learning_rate": 3.7221884378707554e-08, "loss": 0.0673, "step": 8909 }, { "epoch": 2.8872326636422554, "grad_norm": 0.4420555830001831, "learning_rate": 3.700914720321136e-08, "loss": 0.0649, "step": 8910 }, { "epoch": 2.887556707712249, "grad_norm": 0.507371187210083, "learning_rate": 3.679701744975006e-08, "loss": 0.0692, "step": 8911 }, { "epoch": 2.8878807517822422, "grad_norm": 0.5148362517356873, "learning_rate": 3.658549514428678e-08, "loss": 0.069, "step": 8912 }, { "epoch": 2.8882047958522357, "grad_norm": 0.49123647809028625, "learning_rate": 3.637458031270913e-08, "loss": 0.0703, "step": 8913 }, { "epoch": 2.8885288399222295, "grad_norm": 0.49537959694862366, "learning_rate": 3.616427298083092e-08, "loss": 0.0701, "step": 8914 }, { "epoch": 2.888852883992223, "grad_norm": 0.4905968904495239, "learning_rate": 3.5954573174392106e-08, "loss": 0.0704, "step": 8915 }, { "epoch": 2.8891769280622164, "grad_norm": 0.49671250581741333, "learning_rate": 3.574548091905827e-08, "loss": 0.0738, "step": 8916 }, { "epoch": 2.8895009721322102, "grad_norm": 0.5079336166381836, "learning_rate": 3.553699624041951e-08, "loss": 0.0715, "step": 8917 }, { "epoch": 2.8898250162022032, "grad_norm": 0.48482534289360046, "learning_rate": 3.532911916399262e-08, "loss": 0.0709, "step": 8918 }, { "epoch": 2.890149060272197, "grad_norm": 0.4966800808906555, "learning_rate": 3.512184971522003e-08, "loss": 0.0719, "step": 8919 }, { "epoch": 2.8904731043421905, "grad_norm": 0.490303635597229, "learning_rate": 3.491518791946924e-08, "loss": 0.0677, "step": 8920 }, { "epoch": 2.890797148412184, "grad_norm": 0.4899630546569824, "learning_rate": 3.470913380203389e-08, "loss": 0.073, "step": 8921 }, { "epoch": 2.891121192482178, "grad_norm": 0.4699013829231262, "learning_rate": 3.450368738813215e-08, "loss": 0.0659, "step": 8922 }, { "epoch": 2.8914452365521712, "grad_norm": 0.4546189606189728, "learning_rate": 3.4298848702910006e-08, "loss": 0.0655, "step": 8923 }, { "epoch": 2.8917692806221647, "grad_norm": 0.5081414580345154, "learning_rate": 3.4094617771436854e-08, "loss": 0.0733, "step": 8924 }, { "epoch": 2.892093324692158, "grad_norm": 0.5270804166793823, "learning_rate": 3.389099461870882e-08, "loss": 0.0806, "step": 8925 }, { "epoch": 2.8924173687621515, "grad_norm": 0.5022570490837097, "learning_rate": 3.368797926964762e-08, "loss": 0.0727, "step": 8926 }, { "epoch": 2.8927414128321454, "grad_norm": 0.5113040804862976, "learning_rate": 3.34855717490995e-08, "loss": 0.0735, "step": 8927 }, { "epoch": 2.893065456902139, "grad_norm": 0.5260816812515259, "learning_rate": 3.3283772081838526e-08, "loss": 0.0732, "step": 8928 }, { "epoch": 2.893389500972132, "grad_norm": 0.5216476321220398, "learning_rate": 3.308258029256162e-08, "loss": 0.0672, "step": 8929 }, { "epoch": 2.8937135450421256, "grad_norm": 0.5061860680580139, "learning_rate": 3.288199640589407e-08, "loss": 0.0724, "step": 8930 }, { "epoch": 2.894037589112119, "grad_norm": 0.4715794622898102, "learning_rate": 3.268202044638458e-08, "loss": 0.0712, "step": 8931 }, { "epoch": 2.894361633182113, "grad_norm": 0.5052686333656311, "learning_rate": 3.248265243850801e-08, "loss": 0.0696, "step": 8932 }, { "epoch": 2.8946856772521063, "grad_norm": 0.5165843963623047, "learning_rate": 3.228389240666541e-08, "loss": 0.0732, "step": 8933 }, { "epoch": 2.8950097213220998, "grad_norm": 0.509536623954773, "learning_rate": 3.208574037518397e-08, "loss": 0.0747, "step": 8934 }, { "epoch": 2.895333765392093, "grad_norm": 0.5373370051383972, "learning_rate": 3.188819636831375e-08, "loss": 0.0739, "step": 8935 }, { "epoch": 2.8956578094620866, "grad_norm": 0.4681432545185089, "learning_rate": 3.1691260410234295e-08, "loss": 0.0673, "step": 8936 }, { "epoch": 2.8959818535320805, "grad_norm": 0.5055105090141296, "learning_rate": 3.1494932525046875e-08, "loss": 0.0731, "step": 8937 }, { "epoch": 2.896305897602074, "grad_norm": 0.4752630293369293, "learning_rate": 3.1299212736781156e-08, "loss": 0.0633, "step": 8938 }, { "epoch": 2.8966299416720673, "grad_norm": 0.4678106904029846, "learning_rate": 3.1104101069390766e-08, "loss": 0.0673, "step": 8939 }, { "epoch": 2.8969539857420608, "grad_norm": 0.4853931963443756, "learning_rate": 3.0909597546756046e-08, "loss": 0.0713, "step": 8940 }, { "epoch": 2.897278029812054, "grad_norm": 0.5015696287155151, "learning_rate": 3.071570219268183e-08, "loss": 0.0719, "step": 8941 }, { "epoch": 2.897602073882048, "grad_norm": 0.49477383494377136, "learning_rate": 3.05224150308997e-08, "loss": 0.0721, "step": 8942 }, { "epoch": 2.8979261179520415, "grad_norm": 0.4835866391658783, "learning_rate": 3.032973608506573e-08, "loss": 0.0705, "step": 8943 }, { "epoch": 2.898250162022035, "grad_norm": 0.5060580968856812, "learning_rate": 3.013766537876106e-08, "loss": 0.0744, "step": 8944 }, { "epoch": 2.8985742060920288, "grad_norm": 0.4551170766353607, "learning_rate": 2.9946202935495216e-08, "loss": 0.0646, "step": 8945 }, { "epoch": 2.898898250162022, "grad_norm": 0.5011014342308044, "learning_rate": 2.9755348778699457e-08, "loss": 0.07, "step": 8946 }, { "epoch": 2.8992222942320156, "grad_norm": 0.45690640807151794, "learning_rate": 2.9565102931733426e-08, "loss": 0.0669, "step": 8947 }, { "epoch": 2.899546338302009, "grad_norm": 0.46726682782173157, "learning_rate": 2.937546541788183e-08, "loss": 0.0657, "step": 8948 }, { "epoch": 2.8998703823720025, "grad_norm": 0.49788811802864075, "learning_rate": 2.9186436260353335e-08, "loss": 0.0686, "step": 8949 }, { "epoch": 2.9001944264419963, "grad_norm": 0.4802239239215851, "learning_rate": 2.899801548228387e-08, "loss": 0.0683, "step": 8950 }, { "epoch": 2.9005184705119897, "grad_norm": 0.48868319392204285, "learning_rate": 2.8810203106734436e-08, "loss": 0.0712, "step": 8951 }, { "epoch": 2.900842514581983, "grad_norm": 0.510743260383606, "learning_rate": 2.8622999156691643e-08, "loss": 0.0686, "step": 8952 }, { "epoch": 2.9011665586519766, "grad_norm": 0.45631998777389526, "learning_rate": 2.843640365506606e-08, "loss": 0.0637, "step": 8953 }, { "epoch": 2.90149060272197, "grad_norm": 0.5396420955657959, "learning_rate": 2.8250416624697186e-08, "loss": 0.0779, "step": 8954 }, { "epoch": 2.901814646791964, "grad_norm": 0.519350528717041, "learning_rate": 2.806503808834682e-08, "loss": 0.0756, "step": 8955 }, { "epoch": 2.9021386908619573, "grad_norm": 0.4911840260028839, "learning_rate": 2.7880268068703476e-08, "loss": 0.0702, "step": 8956 }, { "epoch": 2.9024627349319507, "grad_norm": 0.5336769819259644, "learning_rate": 2.7696106588381844e-08, "loss": 0.0759, "step": 8957 }, { "epoch": 2.902786779001944, "grad_norm": 0.45938992500305176, "learning_rate": 2.7512553669921117e-08, "loss": 0.0674, "step": 8958 }, { "epoch": 2.9031108230719376, "grad_norm": 0.47019660472869873, "learning_rate": 2.73296093357861e-08, "loss": 0.0696, "step": 8959 }, { "epoch": 2.9034348671419314, "grad_norm": 0.48484107851982117, "learning_rate": 2.7147273608367775e-08, "loss": 0.0688, "step": 8960 }, { "epoch": 2.903758911211925, "grad_norm": 0.4778381884098053, "learning_rate": 2.696554650998273e-08, "loss": 0.07, "step": 8961 }, { "epoch": 2.9040829552819183, "grad_norm": 0.49290984869003296, "learning_rate": 2.6784428062871514e-08, "loss": 0.0714, "step": 8962 }, { "epoch": 2.9044069993519117, "grad_norm": 0.48641347885131836, "learning_rate": 2.6603918289201948e-08, "loss": 0.07, "step": 8963 }, { "epoch": 2.904731043421905, "grad_norm": 0.46779337525367737, "learning_rate": 2.6424017211066354e-08, "loss": 0.0655, "step": 8964 }, { "epoch": 2.905055087491899, "grad_norm": 0.48739469051361084, "learning_rate": 2.624472485048324e-08, "loss": 0.0723, "step": 8965 }, { "epoch": 2.9053791315618924, "grad_norm": 0.4942921996116638, "learning_rate": 2.6066041229396156e-08, "loss": 0.0716, "step": 8966 }, { "epoch": 2.905703175631886, "grad_norm": 0.5472116470336914, "learning_rate": 2.5887966369674833e-08, "loss": 0.0757, "step": 8967 }, { "epoch": 2.9060272197018797, "grad_norm": 0.4734596014022827, "learning_rate": 2.5710500293112394e-08, "loss": 0.0676, "step": 8968 }, { "epoch": 2.906351263771873, "grad_norm": 0.4330841898918152, "learning_rate": 2.5533643021430355e-08, "loss": 0.0603, "step": 8969 }, { "epoch": 2.9066753078418666, "grad_norm": 0.48858642578125, "learning_rate": 2.5357394576273618e-08, "loss": 0.0725, "step": 8970 }, { "epoch": 2.90699935191186, "grad_norm": 0.4687802791595459, "learning_rate": 2.5181754979213823e-08, "loss": 0.0674, "step": 8971 }, { "epoch": 2.9073233959818534, "grad_norm": 0.4720984101295471, "learning_rate": 2.5006724251747104e-08, "loss": 0.0685, "step": 8972 }, { "epoch": 2.9076474400518473, "grad_norm": 0.5154309868812561, "learning_rate": 2.483230241529522e-08, "loss": 0.0749, "step": 8973 }, { "epoch": 2.9079714841218407, "grad_norm": 0.48242926597595215, "learning_rate": 2.4658489491207193e-08, "loss": 0.0701, "step": 8974 }, { "epoch": 2.908295528191834, "grad_norm": 0.49187803268432617, "learning_rate": 2.4485285500753797e-08, "loss": 0.0698, "step": 8975 }, { "epoch": 2.9086195722618275, "grad_norm": 0.4813985228538513, "learning_rate": 2.4312690465135846e-08, "loss": 0.0717, "step": 8976 }, { "epoch": 2.908943616331821, "grad_norm": 0.4330715537071228, "learning_rate": 2.4140704405475336e-08, "loss": 0.0595, "step": 8977 }, { "epoch": 2.909267660401815, "grad_norm": 0.5288068056106567, "learning_rate": 2.39693273428232e-08, "loss": 0.0798, "step": 8978 }, { "epoch": 2.9095917044718083, "grad_norm": 0.5089949369430542, "learning_rate": 2.3798559298153224e-08, "loss": 0.0753, "step": 8979 }, { "epoch": 2.9099157485418017, "grad_norm": 0.46731987595558167, "learning_rate": 2.362840029236646e-08, "loss": 0.0715, "step": 8980 }, { "epoch": 2.910239792611795, "grad_norm": 0.4632418751716614, "learning_rate": 2.345885034628792e-08, "loss": 0.0685, "step": 8981 }, { "epoch": 2.9105638366817885, "grad_norm": 0.4734707474708557, "learning_rate": 2.3289909480669892e-08, "loss": 0.0641, "step": 8982 }, { "epoch": 2.9108878807517824, "grad_norm": 0.4756975471973419, "learning_rate": 2.3121577716189168e-08, "loss": 0.0696, "step": 8983 }, { "epoch": 2.911211924821776, "grad_norm": 0.47261470556259155, "learning_rate": 2.2953855073446497e-08, "loss": 0.0648, "step": 8984 }, { "epoch": 2.9115359688917692, "grad_norm": 0.5152508616447449, "learning_rate": 2.2786741572971004e-08, "loss": 0.0744, "step": 8985 }, { "epoch": 2.9118600129617627, "grad_norm": 0.5035756826400757, "learning_rate": 2.2620237235215226e-08, "loss": 0.0723, "step": 8986 }, { "epoch": 2.912184057031756, "grad_norm": 0.48373931646347046, "learning_rate": 2.24543420805573e-08, "loss": 0.0669, "step": 8987 }, { "epoch": 2.91250810110175, "grad_norm": 0.49522700905799866, "learning_rate": 2.2289056129301545e-08, "loss": 0.0699, "step": 8988 }, { "epoch": 2.9128321451717434, "grad_norm": 0.4791494607925415, "learning_rate": 2.2124379401677888e-08, "loss": 0.0704, "step": 8989 }, { "epoch": 2.913156189241737, "grad_norm": 0.4697430431842804, "learning_rate": 2.1960311917840206e-08, "loss": 0.0697, "step": 8990 }, { "epoch": 2.9134802333117307, "grad_norm": 0.4840279221534729, "learning_rate": 2.17968536978691e-08, "loss": 0.0694, "step": 8991 }, { "epoch": 2.9138042773817237, "grad_norm": 0.4704105257987976, "learning_rate": 2.1634004761770245e-08, "loss": 0.0733, "step": 8992 }, { "epoch": 2.9141283214517175, "grad_norm": 0.49104562401771545, "learning_rate": 2.1471765129475464e-08, "loss": 0.0693, "step": 8993 }, { "epoch": 2.914452365521711, "grad_norm": 0.5004822611808777, "learning_rate": 2.131013482083999e-08, "loss": 0.0682, "step": 8994 }, { "epoch": 2.9147764095917044, "grad_norm": 0.5043914318084717, "learning_rate": 2.114911385564744e-08, "loss": 0.0714, "step": 8995 }, { "epoch": 2.9151004536616982, "grad_norm": 0.5104712247848511, "learning_rate": 2.098870225360372e-08, "loss": 0.0728, "step": 8996 }, { "epoch": 2.9154244977316917, "grad_norm": 0.4805237650871277, "learning_rate": 2.0828900034342013e-08, "loss": 0.0694, "step": 8997 }, { "epoch": 2.915748541801685, "grad_norm": 0.48914143443107605, "learning_rate": 2.0669707217421676e-08, "loss": 0.0724, "step": 8998 }, { "epoch": 2.9160725858716785, "grad_norm": 0.5172355771064758, "learning_rate": 2.0511123822324897e-08, "loss": 0.0739, "step": 8999 }, { "epoch": 2.916396629941672, "grad_norm": 0.4897490441799164, "learning_rate": 2.0353149868461708e-08, "loss": 0.0692, "step": 9000 }, { "epoch": 2.916720674011666, "grad_norm": 0.44778183102607727, "learning_rate": 2.0195785375166088e-08, "loss": 0.0662, "step": 9001 }, { "epoch": 2.917044718081659, "grad_norm": 0.48703789710998535, "learning_rate": 2.0039030361698187e-08, "loss": 0.0685, "step": 9002 }, { "epoch": 2.9173687621516526, "grad_norm": 0.45660385489463806, "learning_rate": 1.9882884847243213e-08, "loss": 0.0636, "step": 9003 }, { "epoch": 2.917692806221646, "grad_norm": 0.4920264184474945, "learning_rate": 1.9727348850911432e-08, "loss": 0.0702, "step": 9004 }, { "epoch": 2.9180168502916395, "grad_norm": 0.4781172573566437, "learning_rate": 1.957242239173984e-08, "loss": 0.0677, "step": 9005 }, { "epoch": 2.9183408943616334, "grad_norm": 0.5161155462265015, "learning_rate": 1.9418105488689388e-08, "loss": 0.0761, "step": 9006 }, { "epoch": 2.9186649384316268, "grad_norm": 0.45751678943634033, "learning_rate": 1.926439816064718e-08, "loss": 0.0665, "step": 9007 }, { "epoch": 2.91898898250162, "grad_norm": 0.4571021497249603, "learning_rate": 1.911130042642484e-08, "loss": 0.0667, "step": 9008 }, { "epoch": 2.9193130265716136, "grad_norm": 0.4877769649028778, "learning_rate": 1.8958812304761264e-08, "loss": 0.0676, "step": 9009 }, { "epoch": 2.919637070641607, "grad_norm": 0.5144590139389038, "learning_rate": 1.880693381431875e-08, "loss": 0.0726, "step": 9010 }, { "epoch": 2.919961114711601, "grad_norm": 0.5145933032035828, "learning_rate": 1.8655664973685205e-08, "loss": 0.0713, "step": 9011 }, { "epoch": 2.9202851587815943, "grad_norm": 0.5353699326515198, "learning_rate": 1.850500580137582e-08, "loss": 0.074, "step": 9012 }, { "epoch": 2.9206092028515878, "grad_norm": 0.4878300130367279, "learning_rate": 1.835495631582862e-08, "loss": 0.0665, "step": 9013 }, { "epoch": 2.920933246921581, "grad_norm": 0.4564133584499359, "learning_rate": 1.8205516535409472e-08, "loss": 0.0691, "step": 9014 }, { "epoch": 2.9212572909915746, "grad_norm": 0.5551131963729858, "learning_rate": 1.805668647840708e-08, "loss": 0.0759, "step": 9015 }, { "epoch": 2.9215813350615685, "grad_norm": 0.47328266501426697, "learning_rate": 1.7908466163036875e-08, "loss": 0.0693, "step": 9016 }, { "epoch": 2.921905379131562, "grad_norm": 0.46604713797569275, "learning_rate": 1.7760855607440453e-08, "loss": 0.0686, "step": 9017 }, { "epoch": 2.9222294232015553, "grad_norm": 0.4848031997680664, "learning_rate": 1.7613854829683917e-08, "loss": 0.066, "step": 9018 }, { "epoch": 2.922553467271549, "grad_norm": 0.4901368319988251, "learning_rate": 1.746746384775788e-08, "loss": 0.0714, "step": 9019 }, { "epoch": 2.9228775113415426, "grad_norm": 0.5199681520462036, "learning_rate": 1.7321682679579122e-08, "loss": 0.0753, "step": 9020 }, { "epoch": 2.923201555411536, "grad_norm": 0.4540145993232727, "learning_rate": 1.717651134299114e-08, "loss": 0.0639, "step": 9021 }, { "epoch": 2.9235255994815295, "grad_norm": 0.4961194097995758, "learning_rate": 1.703194985576029e-08, "loss": 0.0703, "step": 9022 }, { "epoch": 2.923849643551523, "grad_norm": 0.4682721793651581, "learning_rate": 1.6887998235580183e-08, "loss": 0.0669, "step": 9023 }, { "epoch": 2.9241736876215167, "grad_norm": 0.49916213750839233, "learning_rate": 1.67446565000684e-08, "loss": 0.0723, "step": 9024 }, { "epoch": 2.92449773169151, "grad_norm": 0.4951789081096649, "learning_rate": 1.66019246667698e-08, "loss": 0.0728, "step": 9025 }, { "epoch": 2.9248217757615036, "grad_norm": 0.48040568828582764, "learning_rate": 1.645980275315151e-08, "loss": 0.0677, "step": 9026 }, { "epoch": 2.925145819831497, "grad_norm": 0.4653061330318451, "learning_rate": 1.631829077661018e-08, "loss": 0.0656, "step": 9027 }, { "epoch": 2.9254698639014904, "grad_norm": 0.477243036031723, "learning_rate": 1.6177388754463063e-08, "loss": 0.0697, "step": 9028 }, { "epoch": 2.9257939079714843, "grad_norm": 0.4760611951351166, "learning_rate": 1.6037096703957476e-08, "loss": 0.0726, "step": 9029 }, { "epoch": 2.9261179520414777, "grad_norm": 0.5183154940605164, "learning_rate": 1.589741464226191e-08, "loss": 0.0716, "step": 9030 }, { "epoch": 2.926441996111471, "grad_norm": 0.4699106812477112, "learning_rate": 1.5758342586473242e-08, "loss": 0.0643, "step": 9031 }, { "epoch": 2.9267660401814646, "grad_norm": 0.47856253385543823, "learning_rate": 1.56198805536123e-08, "loss": 0.0713, "step": 9032 }, { "epoch": 2.927090084251458, "grad_norm": 0.4797256886959076, "learning_rate": 1.548202856062553e-08, "loss": 0.0679, "step": 9033 }, { "epoch": 2.927414128321452, "grad_norm": 0.46025872230529785, "learning_rate": 1.5344786624384435e-08, "loss": 0.0696, "step": 9034 }, { "epoch": 2.9277381723914453, "grad_norm": 0.47484642267227173, "learning_rate": 1.5208154761686135e-08, "loss": 0.0652, "step": 9035 }, { "epoch": 2.9280622164614387, "grad_norm": 0.4732259213924408, "learning_rate": 1.5072132989253362e-08, "loss": 0.0673, "step": 9036 }, { "epoch": 2.928386260531432, "grad_norm": 0.5131701231002808, "learning_rate": 1.4936721323733915e-08, "loss": 0.0725, "step": 9037 }, { "epoch": 2.9287103046014256, "grad_norm": 0.4793781638145447, "learning_rate": 1.4801919781700091e-08, "loss": 0.0702, "step": 9038 }, { "epoch": 2.9290343486714194, "grad_norm": 0.5629575252532959, "learning_rate": 1.466772837965147e-08, "loss": 0.0783, "step": 9039 }, { "epoch": 2.929358392741413, "grad_norm": 0.4751351773738861, "learning_rate": 1.4534147134010467e-08, "loss": 0.0694, "step": 9040 }, { "epoch": 2.9296824368114063, "grad_norm": 0.48215755820274353, "learning_rate": 1.4401176061127343e-08, "loss": 0.0701, "step": 9041 }, { "epoch": 2.9300064808814, "grad_norm": 0.5063463449478149, "learning_rate": 1.4268815177275741e-08, "loss": 0.0713, "step": 9042 }, { "epoch": 2.930330524951393, "grad_norm": 0.4733906686306, "learning_rate": 1.4137064498655484e-08, "loss": 0.0708, "step": 9043 }, { "epoch": 2.930654569021387, "grad_norm": 0.48800525069236755, "learning_rate": 1.400592404139145e-08, "loss": 0.0733, "step": 9044 }, { "epoch": 2.9309786130913804, "grad_norm": 0.5243503451347351, "learning_rate": 1.3875393821534133e-08, "loss": 0.0762, "step": 9045 }, { "epoch": 2.931302657161374, "grad_norm": 0.5137089490890503, "learning_rate": 1.3745473855059643e-08, "loss": 0.0728, "step": 9046 }, { "epoch": 2.9316267012313677, "grad_norm": 0.4924151301383972, "learning_rate": 1.3616164157868039e-08, "loss": 0.0704, "step": 9047 }, { "epoch": 2.931950745301361, "grad_norm": 0.4642469882965088, "learning_rate": 1.3487464745786106e-08, "loss": 0.0652, "step": 9048 }, { "epoch": 2.9322747893713546, "grad_norm": 0.44174107909202576, "learning_rate": 1.3359375634565685e-08, "loss": 0.0619, "step": 9049 }, { "epoch": 2.932598833441348, "grad_norm": 0.46095919609069824, "learning_rate": 1.323189683988313e-08, "loss": 0.0683, "step": 9050 }, { "epoch": 2.9329228775113414, "grad_norm": 0.448514461517334, "learning_rate": 1.3105028377340401e-08, "loss": 0.0646, "step": 9051 }, { "epoch": 2.9332469215813353, "grad_norm": 0.5289997458457947, "learning_rate": 1.2978770262465634e-08, "loss": 0.0824, "step": 9052 }, { "epoch": 2.9335709656513287, "grad_norm": 0.49298954010009766, "learning_rate": 1.2853122510710914e-08, "loss": 0.0714, "step": 9053 }, { "epoch": 2.933895009721322, "grad_norm": 0.4439584016799927, "learning_rate": 1.2728085137455048e-08, "loss": 0.061, "step": 9054 }, { "epoch": 2.9342190537913155, "grad_norm": 0.5283500552177429, "learning_rate": 1.2603658158000798e-08, "loss": 0.0794, "step": 9055 }, { "epoch": 2.934543097861309, "grad_norm": 0.48534640669822693, "learning_rate": 1.2479841587577091e-08, "loss": 0.0689, "step": 9056 }, { "epoch": 2.934867141931303, "grad_norm": 0.5000969767570496, "learning_rate": 1.2356635441337917e-08, "loss": 0.0745, "step": 9057 }, { "epoch": 2.9351911860012962, "grad_norm": 0.47974875569343567, "learning_rate": 1.2234039734362323e-08, "loss": 0.0691, "step": 9058 }, { "epoch": 2.9355152300712897, "grad_norm": 0.5280157923698425, "learning_rate": 1.2112054481654977e-08, "loss": 0.0778, "step": 9059 }, { "epoch": 2.935839274141283, "grad_norm": 0.49620354175567627, "learning_rate": 1.1990679698146158e-08, "loss": 0.0739, "step": 9060 }, { "epoch": 2.9361633182112765, "grad_norm": 0.5137200951576233, "learning_rate": 1.1869915398689535e-08, "loss": 0.075, "step": 9061 }, { "epoch": 2.9364873622812704, "grad_norm": 0.5006195902824402, "learning_rate": 1.1749761598067178e-08, "loss": 0.0723, "step": 9062 }, { "epoch": 2.936811406351264, "grad_norm": 0.5092834830284119, "learning_rate": 1.1630218310983432e-08, "loss": 0.0712, "step": 9063 }, { "epoch": 2.9371354504212572, "grad_norm": 0.5249757170677185, "learning_rate": 1.1511285552070483e-08, "loss": 0.0754, "step": 9064 }, { "epoch": 2.9374594944912507, "grad_norm": 0.47460222244262695, "learning_rate": 1.1392963335883356e-08, "loss": 0.0695, "step": 9065 }, { "epoch": 2.937783538561244, "grad_norm": 0.4928366243839264, "learning_rate": 1.1275251676904352e-08, "loss": 0.0769, "step": 9066 }, { "epoch": 2.938107582631238, "grad_norm": 0.5491718649864197, "learning_rate": 1.1158150589539729e-08, "loss": 0.0686, "step": 9067 }, { "epoch": 2.9384316267012314, "grad_norm": 0.4939388334751129, "learning_rate": 1.1041660088121354e-08, "loss": 0.0699, "step": 9068 }, { "epoch": 2.938755670771225, "grad_norm": 0.4884641468524933, "learning_rate": 1.092578018690782e-08, "loss": 0.0684, "step": 9069 }, { "epoch": 2.9390797148412187, "grad_norm": 0.47037163376808167, "learning_rate": 1.0810510900080006e-08, "loss": 0.0681, "step": 9070 }, { "epoch": 2.939403758911212, "grad_norm": 0.5198776721954346, "learning_rate": 1.0695852241747185e-08, "loss": 0.0753, "step": 9071 }, { "epoch": 2.9397278029812055, "grad_norm": 0.5155799388885498, "learning_rate": 1.0581804225941462e-08, "loss": 0.0741, "step": 9072 }, { "epoch": 2.940051847051199, "grad_norm": 0.49065136909484863, "learning_rate": 1.046836686662167e-08, "loss": 0.0716, "step": 9073 }, { "epoch": 2.9403758911211924, "grad_norm": 0.5279356837272644, "learning_rate": 1.0355540177671708e-08, "loss": 0.0713, "step": 9074 }, { "epoch": 2.940699935191186, "grad_norm": 0.5159909725189209, "learning_rate": 1.0243324172899416e-08, "loss": 0.0753, "step": 9075 }, { "epoch": 2.9410239792611796, "grad_norm": 0.48105043172836304, "learning_rate": 1.0131718866039919e-08, "loss": 0.071, "step": 9076 }, { "epoch": 2.941348023331173, "grad_norm": 0.48522862792015076, "learning_rate": 1.0020724270752846e-08, "loss": 0.0686, "step": 9077 }, { "epoch": 2.9416720674011665, "grad_norm": 0.5026848912239075, "learning_rate": 9.910340400621777e-09, "loss": 0.0736, "step": 9078 }, { "epoch": 2.94199611147116, "grad_norm": 0.48883044719696045, "learning_rate": 9.800567269157569e-09, "loss": 0.0701, "step": 9079 }, { "epoch": 2.942320155541154, "grad_norm": 0.4957825243473053, "learning_rate": 9.69140488979503e-09, "loss": 0.0715, "step": 9080 }, { "epoch": 2.942644199611147, "grad_norm": 0.48720213770866394, "learning_rate": 9.582853275894587e-09, "loss": 0.0683, "step": 9081 }, { "epoch": 2.9429682436811406, "grad_norm": 0.5106403231620789, "learning_rate": 9.474912440741723e-09, "loss": 0.0794, "step": 9082 }, { "epoch": 2.943292287751134, "grad_norm": 0.48066288232803345, "learning_rate": 9.367582397547536e-09, "loss": 0.0683, "step": 9083 }, { "epoch": 2.9436163318211275, "grad_norm": 0.45802873373031616, "learning_rate": 9.260863159448741e-09, "loss": 0.066, "step": 9084 }, { "epoch": 2.9439403758911213, "grad_norm": 0.4963512718677521, "learning_rate": 9.154754739505444e-09, "loss": 0.0733, "step": 9085 }, { "epoch": 2.9442644199611148, "grad_norm": 0.5030404925346375, "learning_rate": 9.049257150705592e-09, "loss": 0.0675, "step": 9086 }, { "epoch": 2.944588464031108, "grad_norm": 0.5117946267127991, "learning_rate": 8.944370405960522e-09, "loss": 0.0763, "step": 9087 }, { "epoch": 2.9449125081011016, "grad_norm": 0.4487900137901306, "learning_rate": 8.84009451810719e-09, "loss": 0.0664, "step": 9088 }, { "epoch": 2.945236552171095, "grad_norm": 0.5242525339126587, "learning_rate": 8.73642949990816e-09, "loss": 0.0773, "step": 9089 }, { "epoch": 2.945560596241089, "grad_norm": 0.48610663414001465, "learning_rate": 8.633375364050511e-09, "loss": 0.0688, "step": 9090 }, { "epoch": 2.9458846403110823, "grad_norm": 0.4557715356349945, "learning_rate": 8.53093212314804e-09, "loss": 0.0655, "step": 9091 }, { "epoch": 2.9462086843810757, "grad_norm": 0.440714955329895, "learning_rate": 8.429099789738493e-09, "loss": 0.0648, "step": 9092 }, { "epoch": 2.9465327284510696, "grad_norm": 0.47908642888069153, "learning_rate": 8.327878376284682e-09, "loss": 0.0658, "step": 9093 }, { "epoch": 2.9468567725210626, "grad_norm": 0.4888572692871094, "learning_rate": 8.227267895175584e-09, "loss": 0.0713, "step": 9094 }, { "epoch": 2.9471808165910565, "grad_norm": 0.42246827483177185, "learning_rate": 8.127268358724682e-09, "loss": 0.0571, "step": 9095 }, { "epoch": 2.94750486066105, "grad_norm": 0.49386322498321533, "learning_rate": 8.027879779171077e-09, "loss": 0.0744, "step": 9096 }, { "epoch": 2.9478289047310433, "grad_norm": 0.517785906791687, "learning_rate": 7.929102168678926e-09, "loss": 0.0779, "step": 9097 }, { "epoch": 2.948152948801037, "grad_norm": 0.4539172053337097, "learning_rate": 7.830935539337448e-09, "loss": 0.0654, "step": 9098 }, { "epoch": 2.9484769928710306, "grad_norm": 0.47268781065940857, "learning_rate": 7.733379903161475e-09, "loss": 0.0671, "step": 9099 }, { "epoch": 2.948801036941024, "grad_norm": 0.4961428642272949, "learning_rate": 7.636435272091458e-09, "loss": 0.0675, "step": 9100 }, { "epoch": 2.9491250810110174, "grad_norm": 0.48894017934799194, "learning_rate": 7.540101657991794e-09, "loss": 0.0692, "step": 9101 }, { "epoch": 2.949449125081011, "grad_norm": 0.4910293221473694, "learning_rate": 7.444379072652497e-09, "loss": 0.071, "step": 9102 }, { "epoch": 2.9497731691510047, "grad_norm": 0.49810612201690674, "learning_rate": 7.34926752778975e-09, "loss": 0.0733, "step": 9103 }, { "epoch": 2.950097213220998, "grad_norm": 0.4806065261363983, "learning_rate": 7.254767035044241e-09, "loss": 0.0667, "step": 9104 }, { "epoch": 2.9504212572909916, "grad_norm": 0.5162319540977478, "learning_rate": 7.16087760598172e-09, "loss": 0.0771, "step": 9105 }, { "epoch": 2.950745301360985, "grad_norm": 0.4736173450946808, "learning_rate": 7.067599252092994e-09, "loss": 0.0724, "step": 9106 }, { "epoch": 2.9510693454309784, "grad_norm": 0.489728182554245, "learning_rate": 6.974931984795042e-09, "loss": 0.0749, "step": 9107 }, { "epoch": 2.9513933895009723, "grad_norm": 0.49041807651519775, "learning_rate": 6.882875815429347e-09, "loss": 0.0686, "step": 9108 }, { "epoch": 2.9517174335709657, "grad_norm": 0.4952937066555023, "learning_rate": 6.791430755262451e-09, "loss": 0.0716, "step": 9109 }, { "epoch": 2.952041477640959, "grad_norm": 0.48909682035446167, "learning_rate": 6.7005968154859605e-09, "loss": 0.0706, "step": 9110 }, { "epoch": 2.9523655217109526, "grad_norm": 0.5242758989334106, "learning_rate": 6.610374007218201e-09, "loss": 0.0752, "step": 9111 }, { "epoch": 2.952689565780946, "grad_norm": 0.49710774421691895, "learning_rate": 6.520762341500342e-09, "loss": 0.0685, "step": 9112 }, { "epoch": 2.95301360985094, "grad_norm": 0.46828755736351013, "learning_rate": 6.431761829301386e-09, "loss": 0.0648, "step": 9113 }, { "epoch": 2.9533376539209333, "grad_norm": 0.5269603729248047, "learning_rate": 6.343372481512066e-09, "loss": 0.0716, "step": 9114 }, { "epoch": 2.9536616979909267, "grad_norm": 0.5137341618537903, "learning_rate": 6.2555943089526176e-09, "loss": 0.0709, "step": 9115 }, { "epoch": 2.95398574206092, "grad_norm": 0.48662707209587097, "learning_rate": 6.168427322365001e-09, "loss": 0.07, "step": 9116 }, { "epoch": 2.9543097861309136, "grad_norm": 0.49718114733695984, "learning_rate": 6.0818715324173495e-09, "loss": 0.0746, "step": 9117 }, { "epoch": 2.9546338302009074, "grad_norm": 0.46967118978500366, "learning_rate": 5.995926949704522e-09, "loss": 0.064, "step": 9118 }, { "epoch": 2.954957874270901, "grad_norm": 0.515914797782898, "learning_rate": 5.910593584744217e-09, "loss": 0.0693, "step": 9119 }, { "epoch": 2.9552819183408943, "grad_norm": 0.4651590585708618, "learning_rate": 5.825871447980303e-09, "loss": 0.0644, "step": 9120 }, { "epoch": 2.955605962410888, "grad_norm": 0.4563072621822357, "learning_rate": 5.7417605497828155e-09, "loss": 0.0655, "step": 9121 }, { "epoch": 2.9559300064808816, "grad_norm": 0.5311094522476196, "learning_rate": 5.658260900445744e-09, "loss": 0.0721, "step": 9122 }, { "epoch": 2.956254050550875, "grad_norm": 0.5263093709945679, "learning_rate": 5.575372510188137e-09, "loss": 0.0768, "step": 9123 }, { "epoch": 2.9565780946208684, "grad_norm": 0.4999168813228607, "learning_rate": 5.493095389155767e-09, "loss": 0.0699, "step": 9124 }, { "epoch": 2.956902138690862, "grad_norm": 0.4963438808917999, "learning_rate": 5.411429547417246e-09, "loss": 0.0731, "step": 9125 }, { "epoch": 2.9572261827608557, "grad_norm": 0.46697449684143066, "learning_rate": 5.330374994969023e-09, "loss": 0.0664, "step": 9126 }, { "epoch": 2.957550226830849, "grad_norm": 0.49287012219429016, "learning_rate": 5.2499317417303855e-09, "loss": 0.0714, "step": 9127 }, { "epoch": 2.9578742709008425, "grad_norm": 0.49234694242477417, "learning_rate": 5.1700997975467904e-09, "loss": 0.0727, "step": 9128 }, { "epoch": 2.958198314970836, "grad_norm": 0.5060413479804993, "learning_rate": 5.090879172189866e-09, "loss": 0.0747, "step": 9129 }, { "epoch": 2.9585223590408294, "grad_norm": 0.49898648262023926, "learning_rate": 5.012269875354636e-09, "loss": 0.0695, "step": 9130 }, { "epoch": 2.9588464031108233, "grad_norm": 0.47904735803604126, "learning_rate": 4.934271916662847e-09, "loss": 0.069, "step": 9131 }, { "epoch": 2.9591704471808167, "grad_norm": 0.508891224861145, "learning_rate": 4.8568853056596425e-09, "loss": 0.0721, "step": 9132 }, { "epoch": 2.95949449125081, "grad_norm": 0.47566792368888855, "learning_rate": 4.780110051816889e-09, "loss": 0.0692, "step": 9133 }, { "epoch": 2.9598185353208035, "grad_norm": 0.48462051153182983, "learning_rate": 4.703946164531514e-09, "loss": 0.0685, "step": 9134 }, { "epoch": 2.960142579390797, "grad_norm": 0.511398196220398, "learning_rate": 4.628393653124952e-09, "loss": 0.0727, "step": 9135 }, { "epoch": 2.960466623460791, "grad_norm": 0.4798050820827484, "learning_rate": 4.553452526843693e-09, "loss": 0.0699, "step": 9136 }, { "epoch": 2.9607906675307842, "grad_norm": 0.4587969183921814, "learning_rate": 4.479122794860402e-09, "loss": 0.0641, "step": 9137 }, { "epoch": 2.9611147116007777, "grad_norm": 0.47840559482574463, "learning_rate": 4.405404466272245e-09, "loss": 0.0677, "step": 9138 }, { "epoch": 2.961438755670771, "grad_norm": 0.4877968430519104, "learning_rate": 4.332297550100895e-09, "loss": 0.0721, "step": 9139 }, { "epoch": 2.9617627997407645, "grad_norm": 0.514536440372467, "learning_rate": 4.259802055295304e-09, "loss": 0.0727, "step": 9140 }, { "epoch": 2.9620868438107584, "grad_norm": 0.4699980318546295, "learning_rate": 4.1879179907267085e-09, "loss": 0.0661, "step": 9141 }, { "epoch": 2.962410887880752, "grad_norm": 0.5246437191963196, "learning_rate": 4.116645365194183e-09, "loss": 0.075, "step": 9142 }, { "epoch": 2.962734931950745, "grad_norm": 0.5276486873626709, "learning_rate": 4.045984187420194e-09, "loss": 0.075, "step": 9143 }, { "epoch": 2.963058976020739, "grad_norm": 0.5244945883750916, "learning_rate": 3.975934466053377e-09, "loss": 0.0703, "step": 9144 }, { "epoch": 2.963383020090732, "grad_norm": 0.5038921236991882, "learning_rate": 3.9064962096668766e-09, "loss": 0.0722, "step": 9145 }, { "epoch": 2.963707064160726, "grad_norm": 0.4944347143173218, "learning_rate": 3.837669426758894e-09, "loss": 0.0686, "step": 9146 }, { "epoch": 2.9640311082307194, "grad_norm": 0.467185378074646, "learning_rate": 3.769454125753802e-09, "loss": 0.073, "step": 9147 }, { "epoch": 2.964355152300713, "grad_norm": 0.5159200429916382, "learning_rate": 3.701850315000477e-09, "loss": 0.0746, "step": 9148 }, { "epoch": 2.9646791963707066, "grad_norm": 0.4407907724380493, "learning_rate": 3.6348580027728564e-09, "loss": 0.0651, "step": 9149 }, { "epoch": 2.9650032404407, "grad_norm": 0.47313445806503296, "learning_rate": 3.568477197269382e-09, "loss": 0.0692, "step": 9150 }, { "epoch": 2.9653272845106935, "grad_norm": 0.5187781453132629, "learning_rate": 3.5027079066157764e-09, "loss": 0.08, "step": 9151 }, { "epoch": 2.965651328580687, "grad_norm": 0.4831237494945526, "learning_rate": 3.4375501388606015e-09, "loss": 0.0681, "step": 9152 }, { "epoch": 2.9659753726506803, "grad_norm": 0.47124168276786804, "learning_rate": 3.373003901979144e-09, "loss": 0.0686, "step": 9153 }, { "epoch": 2.966299416720674, "grad_norm": 0.5350909233093262, "learning_rate": 3.3090692038700855e-09, "loss": 0.0735, "step": 9154 }, { "epoch": 2.9666234607906676, "grad_norm": 0.5062280893325806, "learning_rate": 3.2457460523599437e-09, "loss": 0.0741, "step": 9155 }, { "epoch": 2.966947504860661, "grad_norm": 0.4573480784893036, "learning_rate": 3.183034455198075e-09, "loss": 0.0628, "step": 9156 }, { "epoch": 2.9672715489306545, "grad_norm": 0.5378585457801819, "learning_rate": 3.1209344200594517e-09, "loss": 0.0761, "step": 9157 }, { "epoch": 2.967595593000648, "grad_norm": 0.47644490003585815, "learning_rate": 3.059445954545215e-09, "loss": 0.0706, "step": 9158 }, { "epoch": 2.9679196370706418, "grad_norm": 0.5022232532501221, "learning_rate": 2.998569066181012e-09, "loss": 0.071, "step": 9159 }, { "epoch": 2.968243681140635, "grad_norm": 0.46460914611816406, "learning_rate": 2.938303762416994e-09, "loss": 0.0663, "step": 9160 }, { "epoch": 2.9685677252106286, "grad_norm": 0.4863564670085907, "learning_rate": 2.8786500506289284e-09, "loss": 0.0682, "step": 9161 }, { "epoch": 2.968891769280622, "grad_norm": 0.45481154322624207, "learning_rate": 2.8196079381187513e-09, "loss": 0.0656, "step": 9162 }, { "epoch": 2.9692158133506155, "grad_norm": 0.4881567656993866, "learning_rate": 2.7611774321117947e-09, "loss": 0.0697, "step": 9163 }, { "epoch": 2.9695398574206093, "grad_norm": 0.49049463868141174, "learning_rate": 2.7033585397595595e-09, "loss": 0.0669, "step": 9164 }, { "epoch": 2.9698639014906028, "grad_norm": 0.4622054398059845, "learning_rate": 2.646151268138608e-09, "loss": 0.069, "step": 9165 }, { "epoch": 2.970187945560596, "grad_norm": 0.5025736093521118, "learning_rate": 2.5895556242511167e-09, "loss": 0.0726, "step": 9166 }, { "epoch": 2.9705119896305896, "grad_norm": 0.4582308232784271, "learning_rate": 2.5335716150226563e-09, "loss": 0.0642, "step": 9167 }, { "epoch": 2.970836033700583, "grad_norm": 0.46986815333366394, "learning_rate": 2.478199247306634e-09, "loss": 0.0704, "step": 9168 }, { "epoch": 2.971160077770577, "grad_norm": 0.4792775809764862, "learning_rate": 2.4234385278787407e-09, "loss": 0.0658, "step": 9169 }, { "epoch": 2.9714841218405703, "grad_norm": 0.4750364124774933, "learning_rate": 2.3692894634413934e-09, "loss": 0.0663, "step": 9170 }, { "epoch": 2.9718081659105637, "grad_norm": 0.5023250579833984, "learning_rate": 2.3157520606226226e-09, "loss": 0.0707, "step": 9171 }, { "epoch": 2.9721322099805576, "grad_norm": 0.4666900634765625, "learning_rate": 2.26282632597441e-09, "loss": 0.0668, "step": 9172 }, { "epoch": 2.972456254050551, "grad_norm": 0.5010154247283936, "learning_rate": 2.2105122659743515e-09, "loss": 0.0713, "step": 9173 }, { "epoch": 2.9727802981205445, "grad_norm": 0.47971847653388977, "learning_rate": 2.158809887025659e-09, "loss": 0.0685, "step": 9174 }, { "epoch": 2.973104342190538, "grad_norm": 0.4968419373035431, "learning_rate": 2.1077191954554933e-09, "loss": 0.0723, "step": 9175 }, { "epoch": 2.9734283862605313, "grad_norm": 0.48627862334251404, "learning_rate": 2.05724019751663e-09, "loss": 0.0703, "step": 9176 }, { "epoch": 2.973752430330525, "grad_norm": 0.5221262574195862, "learning_rate": 2.0073728993885712e-09, "loss": 0.0772, "step": 9177 }, { "epoch": 2.9740764744005186, "grad_norm": 0.5231309533119202, "learning_rate": 1.958117307173102e-09, "loss": 0.0728, "step": 9178 }, { "epoch": 2.974400518470512, "grad_norm": 0.47055307030677795, "learning_rate": 1.909473426899844e-09, "loss": 0.064, "step": 9179 }, { "epoch": 2.9747245625405054, "grad_norm": 0.44478094577789307, "learning_rate": 1.8614412645212575e-09, "loss": 0.0648, "step": 9180 }, { "epoch": 2.975048606610499, "grad_norm": 0.43987661600112915, "learning_rate": 1.8140208259165293e-09, "loss": 0.0659, "step": 9181 }, { "epoch": 2.9753726506804927, "grad_norm": 0.537038266658783, "learning_rate": 1.7672121168899048e-09, "loss": 0.0733, "step": 9182 }, { "epoch": 2.975696694750486, "grad_norm": 0.46762892603874207, "learning_rate": 1.72101514316958e-09, "loss": 0.0666, "step": 9183 }, { "epoch": 2.9760207388204796, "grad_norm": 0.5129542946815491, "learning_rate": 1.6754299104099204e-09, "loss": 0.0712, "step": 9184 }, { "epoch": 2.976344782890473, "grad_norm": 0.4832301735877991, "learning_rate": 1.630456424190352e-09, "loss": 0.07, "step": 9185 }, { "epoch": 2.9766688269604664, "grad_norm": 0.48235058784484863, "learning_rate": 1.5860946900148056e-09, "loss": 0.0749, "step": 9186 }, { "epoch": 2.9769928710304603, "grad_norm": 0.4909639358520508, "learning_rate": 1.5423447133128267e-09, "loss": 0.0726, "step": 9187 }, { "epoch": 2.9773169151004537, "grad_norm": 0.48089101910591125, "learning_rate": 1.499206499439021e-09, "loss": 0.0727, "step": 9188 }, { "epoch": 2.977640959170447, "grad_norm": 0.44641274213790894, "learning_rate": 1.4566800536730541e-09, "loss": 0.0635, "step": 9189 }, { "epoch": 2.9779650032404406, "grad_norm": 0.48997771739959717, "learning_rate": 1.4147653812196515e-09, "loss": 0.0711, "step": 9190 }, { "epoch": 2.978289047310434, "grad_norm": 0.46297505497932434, "learning_rate": 1.3734624872091539e-09, "loss": 0.0703, "step": 9191 }, { "epoch": 2.978613091380428, "grad_norm": 0.49232834577560425, "learning_rate": 1.3327713766964068e-09, "loss": 0.0718, "step": 9192 }, { "epoch": 2.9789371354504213, "grad_norm": 0.5025762319564819, "learning_rate": 1.2926920546613154e-09, "loss": 0.0788, "step": 9193 }, { "epoch": 2.9792611795204147, "grad_norm": 0.46873152256011963, "learning_rate": 1.2532245260099552e-09, "loss": 0.0674, "step": 9194 }, { "epoch": 2.9795852235904086, "grad_norm": 0.47924181818962097, "learning_rate": 1.2143687955723516e-09, "loss": 0.0702, "step": 9195 }, { "epoch": 2.9799092676604015, "grad_norm": 0.5147335529327393, "learning_rate": 1.1761248681035896e-09, "loss": 0.0725, "step": 9196 }, { "epoch": 2.9802333117303954, "grad_norm": 0.5970619916915894, "learning_rate": 1.138492748284925e-09, "loss": 0.0666, "step": 9197 }, { "epoch": 2.980557355800389, "grad_norm": 0.4947203993797302, "learning_rate": 1.1014724407215627e-09, "loss": 0.0714, "step": 9198 }, { "epoch": 2.9808813998703823, "grad_norm": 0.48733168840408325, "learning_rate": 1.065063949945433e-09, "loss": 0.0715, "step": 9199 }, { "epoch": 2.981205443940376, "grad_norm": 0.5004407167434692, "learning_rate": 1.0292672804118609e-09, "loss": 0.075, "step": 9200 }, { "epoch": 2.9815294880103695, "grad_norm": 0.46872594952583313, "learning_rate": 9.940824365023417e-10, "loss": 0.0662, "step": 9201 }, { "epoch": 2.981853532080363, "grad_norm": 0.4934064447879791, "learning_rate": 9.595094225228753e-10, "loss": 0.0721, "step": 9202 }, { "epoch": 2.9821775761503564, "grad_norm": 0.5108233094215393, "learning_rate": 9.255482427050766e-10, "loss": 0.0714, "step": 9203 }, { "epoch": 2.98250162022035, "grad_norm": 0.48761728405952454, "learning_rate": 8.92198901205621e-10, "loss": 0.0679, "step": 9204 }, { "epoch": 2.9828256642903437, "grad_norm": 0.4855932891368866, "learning_rate": 8.594614021051329e-10, "loss": 0.0725, "step": 9205 }, { "epoch": 2.983149708360337, "grad_norm": 0.48095831274986267, "learning_rate": 8.273357494120726e-10, "loss": 0.0707, "step": 9206 }, { "epoch": 2.9834737524303305, "grad_norm": 0.46818315982818604, "learning_rate": 7.958219470566297e-10, "loss": 0.067, "step": 9207 }, { "epoch": 2.983797796500324, "grad_norm": 0.4704946279525757, "learning_rate": 7.649199988968292e-10, "loss": 0.0665, "step": 9208 }, { "epoch": 2.9841218405703174, "grad_norm": 0.5369235873222351, "learning_rate": 7.346299087146458e-10, "loss": 0.0787, "step": 9209 }, { "epoch": 2.9844458846403112, "grad_norm": 0.510699987411499, "learning_rate": 7.049516802165591e-10, "loss": 0.0787, "step": 9210 }, { "epoch": 2.9847699287103047, "grad_norm": 0.45229703187942505, "learning_rate": 6.758853170363289e-10, "loss": 0.0655, "step": 9211 }, { "epoch": 2.985093972780298, "grad_norm": 0.5180689692497253, "learning_rate": 6.474308227299996e-10, "loss": 0.07, "step": 9212 }, { "epoch": 2.9854180168502915, "grad_norm": 0.4732065796852112, "learning_rate": 6.195882007803411e-10, "loss": 0.0659, "step": 9213 }, { "epoch": 2.985742060920285, "grad_norm": 0.49241015315055847, "learning_rate": 5.923574545957378e-10, "loss": 0.0733, "step": 9214 }, { "epoch": 2.986066104990279, "grad_norm": 0.5081177949905396, "learning_rate": 5.657385875085242e-10, "loss": 0.0702, "step": 9215 }, { "epoch": 2.9863901490602722, "grad_norm": 0.5168772339820862, "learning_rate": 5.397316027766497e-10, "loss": 0.0716, "step": 9216 }, { "epoch": 2.9867141931302656, "grad_norm": 0.4934365451335907, "learning_rate": 5.143365035831238e-10, "loss": 0.0717, "step": 9217 }, { "epoch": 2.987038237200259, "grad_norm": 0.4696829915046692, "learning_rate": 4.895532930360158e-10, "loss": 0.0642, "step": 9218 }, { "epoch": 2.9873622812702525, "grad_norm": 0.47430771589279175, "learning_rate": 4.653819741684551e-10, "loss": 0.0627, "step": 9219 }, { "epoch": 2.9876863253402464, "grad_norm": 0.45280787348747253, "learning_rate": 4.4182254993918596e-10, "loss": 0.0672, "step": 9220 }, { "epoch": 2.98801036941024, "grad_norm": 0.4795883595943451, "learning_rate": 4.1887502323090244e-10, "loss": 0.0673, "step": 9221 }, { "epoch": 2.988334413480233, "grad_norm": 0.49811699986457825, "learning_rate": 3.96539396853024e-10, "loss": 0.0723, "step": 9222 }, { "epoch": 2.988658457550227, "grad_norm": 0.45733726024627686, "learning_rate": 3.748156735389197e-10, "loss": 0.0715, "step": 9223 }, { "epoch": 2.9889825016202205, "grad_norm": 0.4766613245010376, "learning_rate": 3.537038559464634e-10, "loss": 0.0698, "step": 9224 }, { "epoch": 2.989306545690214, "grad_norm": 0.5184661149978638, "learning_rate": 3.332039466613646e-10, "loss": 0.0742, "step": 9225 }, { "epoch": 2.9896305897602073, "grad_norm": 0.4830702841281891, "learning_rate": 3.1331594819106194e-10, "loss": 0.0691, "step": 9226 }, { "epoch": 2.9899546338302008, "grad_norm": 0.5173357129096985, "learning_rate": 2.9403986296971943e-10, "loss": 0.0688, "step": 9227 }, { "epoch": 2.9902786779001946, "grad_norm": 0.48872441053390503, "learning_rate": 2.7537569335767124e-10, "loss": 0.0709, "step": 9228 }, { "epoch": 2.990602721970188, "grad_norm": 0.466548353433609, "learning_rate": 2.5732344163809096e-10, "loss": 0.0702, "step": 9229 }, { "epoch": 2.9909267660401815, "grad_norm": 0.48582354187965393, "learning_rate": 2.398831100214327e-10, "loss": 0.0695, "step": 9230 }, { "epoch": 2.991250810110175, "grad_norm": 0.5102851986885071, "learning_rate": 2.230547006415451e-10, "loss": 0.0727, "step": 9231 }, { "epoch": 2.9915748541801683, "grad_norm": 0.4786846339702606, "learning_rate": 2.0683821555789185e-10, "loss": 0.071, "step": 9232 }, { "epoch": 2.991898898250162, "grad_norm": 0.4758155643939972, "learning_rate": 1.9123365675555172e-10, "loss": 0.0665, "step": 9233 }, { "epoch": 2.9922229423201556, "grad_norm": 0.4775131046772003, "learning_rate": 1.7624102614410832e-10, "loss": 0.0699, "step": 9234 }, { "epoch": 2.992546986390149, "grad_norm": 0.5171465873718262, "learning_rate": 1.618603255587603e-10, "loss": 0.0743, "step": 9235 }, { "epoch": 2.9928710304601425, "grad_norm": 0.5166998505592346, "learning_rate": 1.4809155675976627e-10, "loss": 0.0732, "step": 9236 }, { "epoch": 2.993195074530136, "grad_norm": 0.5328329801559448, "learning_rate": 1.3493472143188968e-10, "loss": 0.0712, "step": 9237 }, { "epoch": 2.9935191186001298, "grad_norm": 0.4790295958518982, "learning_rate": 1.2238982118606412e-10, "loss": 0.0741, "step": 9238 }, { "epoch": 2.993843162670123, "grad_norm": 0.4853643476963043, "learning_rate": 1.1045685755661784e-10, "loss": 0.0687, "step": 9239 }, { "epoch": 2.9941672067401166, "grad_norm": 0.46291592717170715, "learning_rate": 9.91358320046043e-11, "loss": 0.0652, "step": 9240 }, { "epoch": 2.99449125081011, "grad_norm": 0.49624311923980713, "learning_rate": 8.842674591558187e-11, "loss": 0.0691, "step": 9241 }, { "epoch": 2.9948152948801035, "grad_norm": 0.4871693253517151, "learning_rate": 7.832960060016881e-11, "loss": 0.0697, "step": 9242 }, { "epoch": 2.9951393389500973, "grad_norm": 0.4829963445663452, "learning_rate": 6.884439729459847e-11, "loss": 0.0723, "step": 9243 }, { "epoch": 2.9954633830200907, "grad_norm": 0.4893077611923218, "learning_rate": 5.99711371590539e-11, "loss": 0.0718, "step": 9244 }, { "epoch": 2.995787427090084, "grad_norm": 0.5166180729866028, "learning_rate": 5.170982127988833e-11, "loss": 0.0742, "step": 9245 }, { "epoch": 2.996111471160078, "grad_norm": 0.4631396532058716, "learning_rate": 4.406045066851494e-11, "loss": 0.0642, "step": 9246 }, { "epoch": 2.9964355152300715, "grad_norm": 0.46219220757484436, "learning_rate": 3.7023026260296633e-11, "loss": 0.0689, "step": 9247 }, { "epoch": 2.996759559300065, "grad_norm": 0.5113931894302368, "learning_rate": 3.059754891732158e-11, "loss": 0.0783, "step": 9248 }, { "epoch": 2.9970836033700583, "grad_norm": 0.4924919307231903, "learning_rate": 2.4784019426182804e-11, "loss": 0.0701, "step": 9249 }, { "epoch": 2.9974076474400517, "grad_norm": 0.48740407824516296, "learning_rate": 1.958243849742303e-11, "loss": 0.0722, "step": 9250 }, { "epoch": 2.9977316915100456, "grad_norm": 0.492016077041626, "learning_rate": 1.4992806768310274e-11, "loss": 0.0721, "step": 9251 }, { "epoch": 2.998055735580039, "grad_norm": 0.5209946036338806, "learning_rate": 1.1015124800617395e-11, "loss": 0.0813, "step": 9252 }, { "epoch": 2.9983797796500324, "grad_norm": 0.5001586675643921, "learning_rate": 7.649393080622069e-12, "loss": 0.0705, "step": 9253 }, { "epoch": 2.998703823720026, "grad_norm": 0.4529384672641754, "learning_rate": 4.8956120207721554e-12, "loss": 0.0628, "step": 9254 }, { "epoch": 2.9990278677900193, "grad_norm": 0.4866239130496979, "learning_rate": 2.753781958575452e-12, "loss": 0.0698, "step": 9255 }, { "epoch": 2.999351911860013, "grad_norm": 0.4864363372325897, "learning_rate": 1.2239031549343693e-12, "loss": 0.0658, "step": 9256 }, { "epoch": 2.9996759559300066, "grad_norm": 0.49378660321235657, "learning_rate": 3.059757980317102e-13, "loss": 0.073, "step": 9257 }, { "epoch": 3.0, "grad_norm": 0.49170607328414917, "learning_rate": 0.0, "loss": 0.0689, "step": 9258 } ], "logging_steps": 1.0, "max_steps": 9258, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.880459018641395e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }