{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999329863515536, "eval_steps": 500, "global_step": 11191, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8.9351531261867e-05, "grad_norm": 0.7860376238822937, "learning_rate": 2.976190476190476e-07, "loss": 1.8548, "step": 1 }, { "epoch": 0.000178703062523734, "grad_norm": 0.5748365521430969, "learning_rate": 5.952380952380952e-07, "loss": 1.8254, "step": 2 }, { "epoch": 0.000268054593785601, "grad_norm": 0.8621116876602173, "learning_rate": 8.928571428571428e-07, "loss": 1.8688, "step": 3 }, { "epoch": 0.000357406125047468, "grad_norm": 0.6630846261978149, "learning_rate": 1.1904761904761904e-06, "loss": 1.8504, "step": 4 }, { "epoch": 0.000446757656309335, "grad_norm": 0.7190170884132385, "learning_rate": 1.4880952380952381e-06, "loss": 1.8197, "step": 5 }, { "epoch": 0.000536109187571202, "grad_norm": 0.7450002431869507, "learning_rate": 1.7857142857142857e-06, "loss": 1.8078, "step": 6 }, { "epoch": 0.000625460718833069, "grad_norm": 0.903852641582489, "learning_rate": 2.0833333333333334e-06, "loss": 1.9407, "step": 7 }, { "epoch": 0.000714812250094936, "grad_norm": 0.8060555458068848, "learning_rate": 2.3809523809523808e-06, "loss": 1.7434, "step": 8 }, { "epoch": 0.000804163781356803, "grad_norm": 1.1107157468795776, "learning_rate": 2.6785714285714285e-06, "loss": 1.9593, "step": 9 }, { "epoch": 0.00089351531261867, "grad_norm": 0.7935706973075867, "learning_rate": 2.9761904761904763e-06, "loss": 1.8844, "step": 10 }, { "epoch": 0.000982866843880537, "grad_norm": 0.825862467288971, "learning_rate": 3.273809523809524e-06, "loss": 1.8664, "step": 11 }, { "epoch": 0.001072218375142404, "grad_norm": 0.8665326833724976, "learning_rate": 3.5714285714285714e-06, "loss": 1.8477, "step": 12 }, { "epoch": 0.001161569906404271, "grad_norm": 0.7275350689888, "learning_rate": 3.869047619047619e-06, "loss": 1.862, "step": 13 }, { "epoch": 0.001250921437666138, "grad_norm": 0.9508900046348572, "learning_rate": 4.166666666666667e-06, "loss": 1.8362, "step": 14 }, { "epoch": 0.001340272968928005, "grad_norm": 0.9345436096191406, "learning_rate": 4.464285714285715e-06, "loss": 1.9073, "step": 15 }, { "epoch": 0.001429624500189872, "grad_norm": 0.8254266977310181, "learning_rate": 4.7619047619047615e-06, "loss": 1.8047, "step": 16 }, { "epoch": 0.001518976031451739, "grad_norm": 0.8926609754562378, "learning_rate": 5.05952380952381e-06, "loss": 1.8944, "step": 17 }, { "epoch": 0.001608327562713606, "grad_norm": 0.8045806288719177, "learning_rate": 5.357142857142857e-06, "loss": 1.8269, "step": 18 }, { "epoch": 0.001697679093975473, "grad_norm": 0.7670357823371887, "learning_rate": 5.654761904761905e-06, "loss": 1.7618, "step": 19 }, { "epoch": 0.00178703062523734, "grad_norm": 0.8601244688034058, "learning_rate": 5.9523809523809525e-06, "loss": 1.8391, "step": 20 }, { "epoch": 0.001876382156499207, "grad_norm": 0.9491680264472961, "learning_rate": 6.25e-06, "loss": 1.8631, "step": 21 }, { "epoch": 0.001965733687761074, "grad_norm": 0.8884113430976868, "learning_rate": 6.547619047619048e-06, "loss": 1.786, "step": 22 }, { "epoch": 0.002055085219022941, "grad_norm": 0.8622804880142212, "learning_rate": 6.845238095238096e-06, "loss": 1.7653, "step": 23 }, { "epoch": 0.002144436750284808, "grad_norm": 0.8542262315750122, "learning_rate": 7.142857142857143e-06, "loss": 1.7977, "step": 24 }, { "epoch": 0.002233788281546675, "grad_norm": 0.6878840923309326, "learning_rate": 7.4404761904761905e-06, "loss": 1.7566, "step": 25 }, { "epoch": 0.002323139812808542, "grad_norm": 1.2377084493637085, "learning_rate": 7.738095238095238e-06, "loss": 1.7302, "step": 26 }, { "epoch": 0.002412491344070409, "grad_norm": 0.9201034903526306, "learning_rate": 8.035714285714286e-06, "loss": 1.7652, "step": 27 }, { "epoch": 0.002501842875332276, "grad_norm": 0.9445549249649048, "learning_rate": 8.333333333333334e-06, "loss": 1.7671, "step": 28 }, { "epoch": 0.002591194406594143, "grad_norm": 0.7296178340911865, "learning_rate": 8.630952380952381e-06, "loss": 1.6999, "step": 29 }, { "epoch": 0.00268054593785601, "grad_norm": 0.6263100504875183, "learning_rate": 8.92857142857143e-06, "loss": 1.7825, "step": 30 }, { "epoch": 0.002769897469117877, "grad_norm": 0.5757091045379639, "learning_rate": 9.226190476190477e-06, "loss": 1.6878, "step": 31 }, { "epoch": 0.002859249000379744, "grad_norm": 0.5896580219268799, "learning_rate": 9.523809523809523e-06, "loss": 1.658, "step": 32 }, { "epoch": 0.002948600531641611, "grad_norm": 0.4904349446296692, "learning_rate": 9.821428571428573e-06, "loss": 1.6135, "step": 33 }, { "epoch": 0.003037952062903478, "grad_norm": 0.628957211971283, "learning_rate": 1.011904761904762e-05, "loss": 1.617, "step": 34 }, { "epoch": 0.003127303594165345, "grad_norm": 0.5395067930221558, "learning_rate": 1.0416666666666668e-05, "loss": 1.6439, "step": 35 }, { "epoch": 0.003216655125427212, "grad_norm": 0.4463937282562256, "learning_rate": 1.0714285714285714e-05, "loss": 1.5962, "step": 36 }, { "epoch": 0.003306006656689079, "grad_norm": 0.4546874463558197, "learning_rate": 1.1011904761904762e-05, "loss": 1.6277, "step": 37 }, { "epoch": 0.003395358187950946, "grad_norm": 0.45821845531463623, "learning_rate": 1.130952380952381e-05, "loss": 1.6352, "step": 38 }, { "epoch": 0.003484709719212813, "grad_norm": 0.59076327085495, "learning_rate": 1.1607142857142857e-05, "loss": 1.6098, "step": 39 }, { "epoch": 0.00357406125047468, "grad_norm": 0.39401739835739136, "learning_rate": 1.1904761904761905e-05, "loss": 1.6404, "step": 40 }, { "epoch": 0.003663412781736547, "grad_norm": 0.3501727879047394, "learning_rate": 1.2202380952380953e-05, "loss": 1.6279, "step": 41 }, { "epoch": 0.003752764312998414, "grad_norm": 0.3471863269805908, "learning_rate": 1.25e-05, "loss": 1.5823, "step": 42 }, { "epoch": 0.003842115844260281, "grad_norm": 0.3801725506782532, "learning_rate": 1.2797619047619047e-05, "loss": 1.5969, "step": 43 }, { "epoch": 0.003931467375522148, "grad_norm": 0.3517632782459259, "learning_rate": 1.3095238095238096e-05, "loss": 1.6075, "step": 44 }, { "epoch": 0.004020818906784015, "grad_norm": 0.32643967866897583, "learning_rate": 1.3392857142857144e-05, "loss": 1.537, "step": 45 }, { "epoch": 0.004110170438045882, "grad_norm": 0.36205053329467773, "learning_rate": 1.3690476190476192e-05, "loss": 1.5646, "step": 46 }, { "epoch": 0.004199521969307749, "grad_norm": 0.3534471094608307, "learning_rate": 1.398809523809524e-05, "loss": 1.5713, "step": 47 }, { "epoch": 0.004288873500569616, "grad_norm": 0.349733829498291, "learning_rate": 1.4285714285714285e-05, "loss": 1.5545, "step": 48 }, { "epoch": 0.004378225031831483, "grad_norm": 0.3604637682437897, "learning_rate": 1.4583333333333335e-05, "loss": 1.569, "step": 49 }, { "epoch": 0.00446757656309335, "grad_norm": 0.3466920256614685, "learning_rate": 1.4880952380952381e-05, "loss": 1.571, "step": 50 }, { "epoch": 0.004556928094355217, "grad_norm": 0.3057480752468109, "learning_rate": 1.5178571428571429e-05, "loss": 1.4928, "step": 51 }, { "epoch": 0.004646279625617084, "grad_norm": 0.3168238401412964, "learning_rate": 1.5476190476190476e-05, "loss": 1.5952, "step": 52 }, { "epoch": 0.004735631156878951, "grad_norm": 0.33002302050590515, "learning_rate": 1.5773809523809524e-05, "loss": 1.5885, "step": 53 }, { "epoch": 0.004824982688140818, "grad_norm": 0.35049816966056824, "learning_rate": 1.6071428571428572e-05, "loss": 1.5679, "step": 54 }, { "epoch": 0.004914334219402685, "grad_norm": 0.398250937461853, "learning_rate": 1.636904761904762e-05, "loss": 1.4671, "step": 55 }, { "epoch": 0.005003685750664552, "grad_norm": 0.34120944142341614, "learning_rate": 1.6666666666666667e-05, "loss": 1.5189, "step": 56 }, { "epoch": 0.005093037281926419, "grad_norm": 0.3141593635082245, "learning_rate": 1.6964285714285715e-05, "loss": 1.5387, "step": 57 }, { "epoch": 0.005182388813188286, "grad_norm": 0.2942512035369873, "learning_rate": 1.7261904761904763e-05, "loss": 1.5823, "step": 58 }, { "epoch": 0.005271740344450153, "grad_norm": 0.3249220550060272, "learning_rate": 1.755952380952381e-05, "loss": 1.4426, "step": 59 }, { "epoch": 0.00536109187571202, "grad_norm": 0.3305152356624603, "learning_rate": 1.785714285714286e-05, "loss": 1.4518, "step": 60 }, { "epoch": 0.005450443406973887, "grad_norm": 0.31893137097358704, "learning_rate": 1.8154761904761906e-05, "loss": 1.4131, "step": 61 }, { "epoch": 0.005539794938235754, "grad_norm": 0.35763436555862427, "learning_rate": 1.8452380952380954e-05, "loss": 1.4497, "step": 62 }, { "epoch": 0.005629146469497621, "grad_norm": 0.34308096766471863, "learning_rate": 1.8750000000000002e-05, "loss": 1.445, "step": 63 }, { "epoch": 0.005718498000759488, "grad_norm": 0.3230386972427368, "learning_rate": 1.9047619047619046e-05, "loss": 1.4297, "step": 64 }, { "epoch": 0.005807849532021355, "grad_norm": 0.3689243495464325, "learning_rate": 1.9345238095238097e-05, "loss": 1.4096, "step": 65 }, { "epoch": 0.005897201063283222, "grad_norm": 0.2907058894634247, "learning_rate": 1.9642857142857145e-05, "loss": 1.49, "step": 66 }, { "epoch": 0.005986552594545089, "grad_norm": 0.3283701539039612, "learning_rate": 1.9940476190476193e-05, "loss": 1.4467, "step": 67 }, { "epoch": 0.006075904125806956, "grad_norm": 0.29510149359703064, "learning_rate": 2.023809523809524e-05, "loss": 1.4465, "step": 68 }, { "epoch": 0.006165255657068823, "grad_norm": 0.3028480112552643, "learning_rate": 2.0535714285714285e-05, "loss": 1.5013, "step": 69 }, { "epoch": 0.00625460718833069, "grad_norm": 0.30100324749946594, "learning_rate": 2.0833333333333336e-05, "loss": 1.3521, "step": 70 }, { "epoch": 0.006343958719592557, "grad_norm": 0.2899802029132843, "learning_rate": 2.113095238095238e-05, "loss": 1.4652, "step": 71 }, { "epoch": 0.006433310250854424, "grad_norm": 0.37080496549606323, "learning_rate": 2.1428571428571428e-05, "loss": 1.3875, "step": 72 }, { "epoch": 0.006522661782116291, "grad_norm": 0.3363150656223297, "learning_rate": 2.172619047619048e-05, "loss": 1.355, "step": 73 }, { "epoch": 0.006612013313378158, "grad_norm": 0.3124528229236603, "learning_rate": 2.2023809523809524e-05, "loss": 1.3817, "step": 74 }, { "epoch": 0.006701364844640025, "grad_norm": 0.293714702129364, "learning_rate": 2.2321428571428575e-05, "loss": 1.3825, "step": 75 }, { "epoch": 0.006790716375901892, "grad_norm": 0.3099130690097809, "learning_rate": 2.261904761904762e-05, "loss": 1.4276, "step": 76 }, { "epoch": 0.006880067907163759, "grad_norm": 0.32042503356933594, "learning_rate": 2.2916666666666667e-05, "loss": 1.4544, "step": 77 }, { "epoch": 0.006969419438425626, "grad_norm": 0.3349694311618805, "learning_rate": 2.3214285714285715e-05, "loss": 1.3263, "step": 78 }, { "epoch": 0.007058770969687493, "grad_norm": 0.3454136848449707, "learning_rate": 2.3511904761904762e-05, "loss": 1.3386, "step": 79 }, { "epoch": 0.00714812250094936, "grad_norm": 0.30371707677841187, "learning_rate": 2.380952380952381e-05, "loss": 1.3713, "step": 80 }, { "epoch": 0.007237474032211227, "grad_norm": 0.35178881883621216, "learning_rate": 2.4107142857142858e-05, "loss": 1.2754, "step": 81 }, { "epoch": 0.007326825563473094, "grad_norm": 0.34617844223976135, "learning_rate": 2.4404761904761906e-05, "loss": 1.4094, "step": 82 }, { "epoch": 0.007416177094734961, "grad_norm": 0.31313589215278625, "learning_rate": 2.4702380952380953e-05, "loss": 1.3358, "step": 83 }, { "epoch": 0.007505528625996828, "grad_norm": 0.3196007311344147, "learning_rate": 2.5e-05, "loss": 1.4212, "step": 84 }, { "epoch": 0.007594880157258695, "grad_norm": 0.3096368610858917, "learning_rate": 2.529761904761905e-05, "loss": 1.408, "step": 85 }, { "epoch": 0.007684231688520562, "grad_norm": 0.4201064705848694, "learning_rate": 2.5595238095238093e-05, "loss": 1.4166, "step": 86 }, { "epoch": 0.007773583219782429, "grad_norm": 0.2974705398082733, "learning_rate": 2.5892857142857148e-05, "loss": 1.4298, "step": 87 }, { "epoch": 0.007862934751044296, "grad_norm": 0.29475000500679016, "learning_rate": 2.6190476190476192e-05, "loss": 1.363, "step": 88 }, { "epoch": 0.007952286282306162, "grad_norm": 0.29409998655319214, "learning_rate": 2.648809523809524e-05, "loss": 1.3672, "step": 89 }, { "epoch": 0.00804163781356803, "grad_norm": 0.33392786979675293, "learning_rate": 2.6785714285714288e-05, "loss": 1.3839, "step": 90 }, { "epoch": 0.008130989344829897, "grad_norm": 0.35475441813468933, "learning_rate": 2.7083333333333332e-05, "loss": 1.3376, "step": 91 }, { "epoch": 0.008220340876091765, "grad_norm": 0.3339526951313019, "learning_rate": 2.7380952380952383e-05, "loss": 1.3865, "step": 92 }, { "epoch": 0.00830969240735363, "grad_norm": 0.35839566588401794, "learning_rate": 2.767857142857143e-05, "loss": 1.2942, "step": 93 }, { "epoch": 0.008399043938615498, "grad_norm": 0.3491029739379883, "learning_rate": 2.797619047619048e-05, "loss": 1.2828, "step": 94 }, { "epoch": 0.008488395469877365, "grad_norm": 0.3070695400238037, "learning_rate": 2.8273809523809523e-05, "loss": 1.3261, "step": 95 }, { "epoch": 0.008577747001139231, "grad_norm": 0.3386940360069275, "learning_rate": 2.857142857142857e-05, "loss": 1.3626, "step": 96 }, { "epoch": 0.008667098532401099, "grad_norm": 0.3212776184082031, "learning_rate": 2.886904761904762e-05, "loss": 1.3482, "step": 97 }, { "epoch": 0.008756450063662966, "grad_norm": 0.3101659119129181, "learning_rate": 2.916666666666667e-05, "loss": 1.3217, "step": 98 }, { "epoch": 0.008845801594924834, "grad_norm": 0.3669290542602539, "learning_rate": 2.9464285714285718e-05, "loss": 1.3461, "step": 99 }, { "epoch": 0.0089351531261867, "grad_norm": 0.3357386291027069, "learning_rate": 2.9761904761904762e-05, "loss": 1.3613, "step": 100 }, { "epoch": 0.009024504657448567, "grad_norm": 0.3187052607536316, "learning_rate": 3.005952380952381e-05, "loss": 1.2509, "step": 101 }, { "epoch": 0.009113856188710434, "grad_norm": 0.36679479479789734, "learning_rate": 3.0357142857142857e-05, "loss": 1.3448, "step": 102 }, { "epoch": 0.0092032077199723, "grad_norm": 0.3677942752838135, "learning_rate": 3.0654761904761905e-05, "loss": 1.3676, "step": 103 }, { "epoch": 0.009292559251234168, "grad_norm": 0.36349645256996155, "learning_rate": 3.095238095238095e-05, "loss": 1.3115, "step": 104 }, { "epoch": 0.009381910782496035, "grad_norm": 0.3469372093677521, "learning_rate": 3.125e-05, "loss": 1.3233, "step": 105 }, { "epoch": 0.009471262313757903, "grad_norm": 0.556049644947052, "learning_rate": 3.154761904761905e-05, "loss": 1.299, "step": 106 }, { "epoch": 0.009560613845019768, "grad_norm": 0.3648083508014679, "learning_rate": 3.1845238095238096e-05, "loss": 1.3484, "step": 107 }, { "epoch": 0.009649965376281636, "grad_norm": 0.3116719424724579, "learning_rate": 3.2142857142857144e-05, "loss": 1.3059, "step": 108 }, { "epoch": 0.009739316907543503, "grad_norm": 0.3249325454235077, "learning_rate": 3.244047619047619e-05, "loss": 1.2851, "step": 109 }, { "epoch": 0.00982866843880537, "grad_norm": 0.3656911551952362, "learning_rate": 3.273809523809524e-05, "loss": 1.2504, "step": 110 }, { "epoch": 0.009918019970067237, "grad_norm": 0.3634350895881653, "learning_rate": 3.303571428571429e-05, "loss": 1.4084, "step": 111 }, { "epoch": 0.010007371501329104, "grad_norm": 0.39990171790122986, "learning_rate": 3.3333333333333335e-05, "loss": 1.3367, "step": 112 }, { "epoch": 0.010096723032590972, "grad_norm": 0.4223339855670929, "learning_rate": 3.363095238095238e-05, "loss": 1.2866, "step": 113 }, { "epoch": 0.010186074563852838, "grad_norm": 0.3745315968990326, "learning_rate": 3.392857142857143e-05, "loss": 1.3021, "step": 114 }, { "epoch": 0.010275426095114705, "grad_norm": 0.4239887297153473, "learning_rate": 3.422619047619048e-05, "loss": 1.2906, "step": 115 }, { "epoch": 0.010364777626376573, "grad_norm": 0.3679508566856384, "learning_rate": 3.4523809523809526e-05, "loss": 1.2425, "step": 116 }, { "epoch": 0.010454129157638438, "grad_norm": 0.38168981671333313, "learning_rate": 3.4821428571428574e-05, "loss": 1.213, "step": 117 }, { "epoch": 0.010543480688900306, "grad_norm": 0.33976343274116516, "learning_rate": 3.511904761904762e-05, "loss": 1.2012, "step": 118 }, { "epoch": 0.010632832220162173, "grad_norm": 0.3905802369117737, "learning_rate": 3.541666666666667e-05, "loss": 1.2856, "step": 119 }, { "epoch": 0.01072218375142404, "grad_norm": 0.3847375810146332, "learning_rate": 3.571428571428572e-05, "loss": 1.3329, "step": 120 }, { "epoch": 0.010811535282685907, "grad_norm": 0.38759222626686096, "learning_rate": 3.6011904761904765e-05, "loss": 1.3153, "step": 121 }, { "epoch": 0.010900886813947774, "grad_norm": 0.36399710178375244, "learning_rate": 3.630952380952381e-05, "loss": 1.2413, "step": 122 }, { "epoch": 0.010990238345209642, "grad_norm": 0.390902042388916, "learning_rate": 3.6607142857142853e-05, "loss": 1.2319, "step": 123 }, { "epoch": 0.011079589876471507, "grad_norm": 0.37981510162353516, "learning_rate": 3.690476190476191e-05, "loss": 1.2977, "step": 124 }, { "epoch": 0.011168941407733375, "grad_norm": 0.3691035807132721, "learning_rate": 3.7202380952380956e-05, "loss": 1.2927, "step": 125 }, { "epoch": 0.011258292938995242, "grad_norm": 0.3708125948905945, "learning_rate": 3.7500000000000003e-05, "loss": 1.2989, "step": 126 }, { "epoch": 0.01134764447025711, "grad_norm": 0.3609068989753723, "learning_rate": 3.779761904761905e-05, "loss": 1.3181, "step": 127 }, { "epoch": 0.011436996001518976, "grad_norm": 0.4091266095638275, "learning_rate": 3.809523809523809e-05, "loss": 1.2894, "step": 128 }, { "epoch": 0.011526347532780843, "grad_norm": 0.41847166419029236, "learning_rate": 3.839285714285715e-05, "loss": 1.1952, "step": 129 }, { "epoch": 0.01161569906404271, "grad_norm": 0.5045241713523865, "learning_rate": 3.8690476190476195e-05, "loss": 1.2808, "step": 130 }, { "epoch": 0.011705050595304576, "grad_norm": 0.4047856032848358, "learning_rate": 3.898809523809524e-05, "loss": 1.2858, "step": 131 }, { "epoch": 0.011794402126566444, "grad_norm": 0.4096450209617615, "learning_rate": 3.928571428571429e-05, "loss": 1.3116, "step": 132 }, { "epoch": 0.011883753657828311, "grad_norm": 0.3629261553287506, "learning_rate": 3.958333333333333e-05, "loss": 1.2968, "step": 133 }, { "epoch": 0.011973105189090179, "grad_norm": 0.40072211623191833, "learning_rate": 3.9880952380952386e-05, "loss": 1.257, "step": 134 }, { "epoch": 0.012062456720352045, "grad_norm": 0.40854790806770325, "learning_rate": 4.017857142857143e-05, "loss": 1.2404, "step": 135 }, { "epoch": 0.012151808251613912, "grad_norm": 0.43366968631744385, "learning_rate": 4.047619047619048e-05, "loss": 1.2719, "step": 136 }, { "epoch": 0.01224115978287578, "grad_norm": 0.35978153347969055, "learning_rate": 4.077380952380952e-05, "loss": 1.2299, "step": 137 }, { "epoch": 0.012330511314137645, "grad_norm": 0.3845697343349457, "learning_rate": 4.107142857142857e-05, "loss": 1.2342, "step": 138 }, { "epoch": 0.012419862845399513, "grad_norm": 0.48100224137306213, "learning_rate": 4.136904761904762e-05, "loss": 1.2568, "step": 139 }, { "epoch": 0.01250921437666138, "grad_norm": 0.40048471093177795, "learning_rate": 4.166666666666667e-05, "loss": 1.2981, "step": 140 }, { "epoch": 0.012598565907923248, "grad_norm": 0.46120962500572205, "learning_rate": 4.196428571428572e-05, "loss": 1.2794, "step": 141 }, { "epoch": 0.012687917439185114, "grad_norm": 0.4530184864997864, "learning_rate": 4.226190476190476e-05, "loss": 1.2063, "step": 142 }, { "epoch": 0.012777268970446981, "grad_norm": 0.39693745970726013, "learning_rate": 4.255952380952381e-05, "loss": 1.3012, "step": 143 }, { "epoch": 0.012866620501708849, "grad_norm": 0.40875282883644104, "learning_rate": 4.2857142857142856e-05, "loss": 1.2265, "step": 144 }, { "epoch": 0.012955972032970714, "grad_norm": 0.4045838415622711, "learning_rate": 4.315476190476191e-05, "loss": 1.2228, "step": 145 }, { "epoch": 0.013045323564232582, "grad_norm": 0.4586333632469177, "learning_rate": 4.345238095238096e-05, "loss": 1.215, "step": 146 }, { "epoch": 0.01313467509549445, "grad_norm": 0.4658620059490204, "learning_rate": 4.375e-05, "loss": 1.3559, "step": 147 }, { "epoch": 0.013224026626756317, "grad_norm": 0.4266040325164795, "learning_rate": 4.404761904761905e-05, "loss": 1.2215, "step": 148 }, { "epoch": 0.013313378158018183, "grad_norm": 0.39535924792289734, "learning_rate": 4.4345238095238095e-05, "loss": 1.3024, "step": 149 }, { "epoch": 0.01340272968928005, "grad_norm": 0.4783201515674591, "learning_rate": 4.464285714285715e-05, "loss": 1.2651, "step": 150 }, { "epoch": 0.013492081220541918, "grad_norm": 0.44912487268447876, "learning_rate": 4.494047619047619e-05, "loss": 1.2107, "step": 151 }, { "epoch": 0.013581432751803783, "grad_norm": 0.40533971786499023, "learning_rate": 4.523809523809524e-05, "loss": 1.2386, "step": 152 }, { "epoch": 0.01367078428306565, "grad_norm": 0.47869575023651123, "learning_rate": 4.5535714285714286e-05, "loss": 1.2709, "step": 153 }, { "epoch": 0.013760135814327518, "grad_norm": 0.45831164717674255, "learning_rate": 4.5833333333333334e-05, "loss": 1.2435, "step": 154 }, { "epoch": 0.013849487345589386, "grad_norm": 0.5027099847793579, "learning_rate": 4.613095238095239e-05, "loss": 1.2706, "step": 155 }, { "epoch": 0.013938838876851252, "grad_norm": 0.4425562024116516, "learning_rate": 4.642857142857143e-05, "loss": 1.1868, "step": 156 }, { "epoch": 0.014028190408113119, "grad_norm": 0.5197780728340149, "learning_rate": 4.672619047619048e-05, "loss": 1.2776, "step": 157 }, { "epoch": 0.014117541939374987, "grad_norm": 0.4800921380519867, "learning_rate": 4.7023809523809525e-05, "loss": 1.2213, "step": 158 }, { "epoch": 0.014206893470636852, "grad_norm": 0.4182106852531433, "learning_rate": 4.732142857142857e-05, "loss": 1.2358, "step": 159 }, { "epoch": 0.01429624500189872, "grad_norm": 0.4423435926437378, "learning_rate": 4.761904761904762e-05, "loss": 1.263, "step": 160 }, { "epoch": 0.014385596533160587, "grad_norm": 0.5085612535476685, "learning_rate": 4.791666666666667e-05, "loss": 1.206, "step": 161 }, { "epoch": 0.014474948064422455, "grad_norm": 0.4774646461009979, "learning_rate": 4.8214285714285716e-05, "loss": 1.2295, "step": 162 }, { "epoch": 0.01456429959568432, "grad_norm": 0.4879673719406128, "learning_rate": 4.8511904761904764e-05, "loss": 1.2492, "step": 163 }, { "epoch": 0.014653651126946188, "grad_norm": 0.4564456343650818, "learning_rate": 4.880952380952381e-05, "loss": 1.196, "step": 164 }, { "epoch": 0.014743002658208056, "grad_norm": 0.4189358353614807, "learning_rate": 4.910714285714286e-05, "loss": 1.2245, "step": 165 }, { "epoch": 0.014832354189469921, "grad_norm": 0.48813703656196594, "learning_rate": 4.940476190476191e-05, "loss": 1.1321, "step": 166 }, { "epoch": 0.014921705720731789, "grad_norm": 0.5083624124526978, "learning_rate": 4.9702380952380955e-05, "loss": 1.1557, "step": 167 }, { "epoch": 0.015011057251993656, "grad_norm": 0.4980645179748535, "learning_rate": 5e-05, "loss": 1.2758, "step": 168 }, { "epoch": 0.015100408783255524, "grad_norm": 0.4460103511810303, "learning_rate": 5.029761904761905e-05, "loss": 1.2135, "step": 169 }, { "epoch": 0.01518976031451739, "grad_norm": 0.4435397684574127, "learning_rate": 5.05952380952381e-05, "loss": 1.295, "step": 170 }, { "epoch": 0.015279111845779257, "grad_norm": 0.5360916256904602, "learning_rate": 5.089285714285714e-05, "loss": 1.2055, "step": 171 }, { "epoch": 0.015368463377041125, "grad_norm": 0.5073984265327454, "learning_rate": 5.119047619047619e-05, "loss": 1.2104, "step": 172 }, { "epoch": 0.01545781490830299, "grad_norm": 0.4850315451622009, "learning_rate": 5.1488095238095234e-05, "loss": 1.2246, "step": 173 }, { "epoch": 0.015547166439564858, "grad_norm": 0.46031928062438965, "learning_rate": 5.1785714285714296e-05, "loss": 1.1649, "step": 174 }, { "epoch": 0.015636517970826724, "grad_norm": 0.4688253104686737, "learning_rate": 5.208333333333334e-05, "loss": 1.2064, "step": 175 }, { "epoch": 0.015725869502088593, "grad_norm": 0.44692277908325195, "learning_rate": 5.2380952380952384e-05, "loss": 1.2672, "step": 176 }, { "epoch": 0.01581522103335046, "grad_norm": 0.5310292840003967, "learning_rate": 5.267857142857143e-05, "loss": 1.2706, "step": 177 }, { "epoch": 0.015904572564612324, "grad_norm": 0.5005099177360535, "learning_rate": 5.297619047619048e-05, "loss": 1.2482, "step": 178 }, { "epoch": 0.015993924095874194, "grad_norm": 0.45003390312194824, "learning_rate": 5.327380952380953e-05, "loss": 1.25, "step": 179 }, { "epoch": 0.01608327562713606, "grad_norm": 0.4997723400592804, "learning_rate": 5.3571428571428575e-05, "loss": 1.1774, "step": 180 }, { "epoch": 0.01617262715839793, "grad_norm": 0.4725038409233093, "learning_rate": 5.3869047619047616e-05, "loss": 1.226, "step": 181 }, { "epoch": 0.016261978689659794, "grad_norm": 0.47115814685821533, "learning_rate": 5.4166666666666664e-05, "loss": 1.2809, "step": 182 }, { "epoch": 0.01635133022092166, "grad_norm": 0.4939117133617401, "learning_rate": 5.446428571428571e-05, "loss": 1.2195, "step": 183 }, { "epoch": 0.01644068175218353, "grad_norm": 0.4947628974914551, "learning_rate": 5.4761904761904766e-05, "loss": 1.1385, "step": 184 }, { "epoch": 0.016530033283445395, "grad_norm": 0.43742480874061584, "learning_rate": 5.5059523809523814e-05, "loss": 1.2191, "step": 185 }, { "epoch": 0.01661938481470726, "grad_norm": 0.48737379908561707, "learning_rate": 5.535714285714286e-05, "loss": 1.2033, "step": 186 }, { "epoch": 0.01670873634596913, "grad_norm": 0.46898481249809265, "learning_rate": 5.565476190476191e-05, "loss": 1.2672, "step": 187 }, { "epoch": 0.016798087877230996, "grad_norm": 0.5089002251625061, "learning_rate": 5.595238095238096e-05, "loss": 1.1853, "step": 188 }, { "epoch": 0.01688743940849286, "grad_norm": 0.5069641470909119, "learning_rate": 5.6250000000000005e-05, "loss": 1.1838, "step": 189 }, { "epoch": 0.01697679093975473, "grad_norm": 0.48622408509254456, "learning_rate": 5.6547619047619046e-05, "loss": 1.2445, "step": 190 }, { "epoch": 0.017066142471016597, "grad_norm": 0.48085153102874756, "learning_rate": 5.6845238095238094e-05, "loss": 1.1636, "step": 191 }, { "epoch": 0.017155494002278462, "grad_norm": 0.4827207326889038, "learning_rate": 5.714285714285714e-05, "loss": 1.2002, "step": 192 }, { "epoch": 0.01724484553354033, "grad_norm": 0.5341354608535767, "learning_rate": 5.744047619047619e-05, "loss": 1.2213, "step": 193 }, { "epoch": 0.017334197064802197, "grad_norm": 0.4994199872016907, "learning_rate": 5.773809523809524e-05, "loss": 1.1627, "step": 194 }, { "epoch": 0.017423548596064067, "grad_norm": 0.5125264525413513, "learning_rate": 5.803571428571429e-05, "loss": 1.2425, "step": 195 }, { "epoch": 0.017512900127325932, "grad_norm": 0.534212589263916, "learning_rate": 5.833333333333334e-05, "loss": 1.1437, "step": 196 }, { "epoch": 0.017602251658587798, "grad_norm": 0.4822109341621399, "learning_rate": 5.863095238095239e-05, "loss": 1.1876, "step": 197 }, { "epoch": 0.017691603189849667, "grad_norm": 0.4595533311367035, "learning_rate": 5.8928571428571435e-05, "loss": 1.1951, "step": 198 }, { "epoch": 0.017780954721111533, "grad_norm": 0.4773840606212616, "learning_rate": 5.922619047619048e-05, "loss": 1.2074, "step": 199 }, { "epoch": 0.0178703062523734, "grad_norm": 0.4863610565662384, "learning_rate": 5.9523809523809524e-05, "loss": 1.1874, "step": 200 }, { "epoch": 0.017959657783635268, "grad_norm": 0.5983034372329712, "learning_rate": 5.982142857142857e-05, "loss": 1.1794, "step": 201 }, { "epoch": 0.018049009314897134, "grad_norm": 0.5169399380683899, "learning_rate": 6.011904761904762e-05, "loss": 1.25, "step": 202 }, { "epoch": 0.018138360846159, "grad_norm": 0.49807149171829224, "learning_rate": 6.041666666666667e-05, "loss": 1.2069, "step": 203 }, { "epoch": 0.01822771237742087, "grad_norm": 0.488290399312973, "learning_rate": 6.0714285714285715e-05, "loss": 1.2747, "step": 204 }, { "epoch": 0.018317063908682735, "grad_norm": 0.49439024925231934, "learning_rate": 6.101190476190477e-05, "loss": 1.2401, "step": 205 }, { "epoch": 0.0184064154399446, "grad_norm": 0.5511735081672668, "learning_rate": 6.130952380952381e-05, "loss": 1.1785, "step": 206 }, { "epoch": 0.01849576697120647, "grad_norm": 0.5392850637435913, "learning_rate": 6.160714285714286e-05, "loss": 1.2753, "step": 207 }, { "epoch": 0.018585118502468335, "grad_norm": 0.4930606484413147, "learning_rate": 6.19047619047619e-05, "loss": 1.2354, "step": 208 }, { "epoch": 0.018674470033730205, "grad_norm": 0.49877017736434937, "learning_rate": 6.220238095238095e-05, "loss": 1.2199, "step": 209 }, { "epoch": 0.01876382156499207, "grad_norm": 0.5193923711776733, "learning_rate": 6.25e-05, "loss": 1.1794, "step": 210 }, { "epoch": 0.018853173096253936, "grad_norm": 0.5702071189880371, "learning_rate": 6.279761904761905e-05, "loss": 1.237, "step": 211 }, { "epoch": 0.018942524627515805, "grad_norm": 0.542413055896759, "learning_rate": 6.30952380952381e-05, "loss": 1.2469, "step": 212 }, { "epoch": 0.01903187615877767, "grad_norm": 0.518735945224762, "learning_rate": 6.339285714285714e-05, "loss": 1.1726, "step": 213 }, { "epoch": 0.019121227690039537, "grad_norm": 0.49173060059547424, "learning_rate": 6.369047619047619e-05, "loss": 1.1974, "step": 214 }, { "epoch": 0.019210579221301406, "grad_norm": 0.46574926376342773, "learning_rate": 6.398809523809524e-05, "loss": 1.2094, "step": 215 }, { "epoch": 0.019299930752563272, "grad_norm": 0.5576440691947937, "learning_rate": 6.428571428571429e-05, "loss": 1.1892, "step": 216 }, { "epoch": 0.019389282283825138, "grad_norm": 0.527204692363739, "learning_rate": 6.458333333333334e-05, "loss": 1.2032, "step": 217 }, { "epoch": 0.019478633815087007, "grad_norm": 0.5345803499221802, "learning_rate": 6.488095238095238e-05, "loss": 1.1977, "step": 218 }, { "epoch": 0.019567985346348873, "grad_norm": 0.5545653700828552, "learning_rate": 6.517857142857143e-05, "loss": 1.1988, "step": 219 }, { "epoch": 0.01965733687761074, "grad_norm": 0.5376014113426208, "learning_rate": 6.547619047619048e-05, "loss": 1.1331, "step": 220 }, { "epoch": 0.019746688408872608, "grad_norm": 0.5965554714202881, "learning_rate": 6.577380952380953e-05, "loss": 1.1333, "step": 221 }, { "epoch": 0.019836039940134473, "grad_norm": 0.48786482214927673, "learning_rate": 6.607142857142857e-05, "loss": 1.2119, "step": 222 }, { "epoch": 0.019925391471396343, "grad_norm": 0.5010102391242981, "learning_rate": 6.636904761904762e-05, "loss": 1.1576, "step": 223 }, { "epoch": 0.02001474300265821, "grad_norm": 0.5798513293266296, "learning_rate": 6.666666666666667e-05, "loss": 1.2731, "step": 224 }, { "epoch": 0.020104094533920074, "grad_norm": 0.600602388381958, "learning_rate": 6.696428571428572e-05, "loss": 1.1185, "step": 225 }, { "epoch": 0.020193446065181943, "grad_norm": 0.5265418291091919, "learning_rate": 6.726190476190477e-05, "loss": 1.2328, "step": 226 }, { "epoch": 0.02028279759644381, "grad_norm": 0.6028567552566528, "learning_rate": 6.755952380952381e-05, "loss": 1.222, "step": 227 }, { "epoch": 0.020372149127705675, "grad_norm": 0.5283799171447754, "learning_rate": 6.785714285714286e-05, "loss": 1.2209, "step": 228 }, { "epoch": 0.020461500658967544, "grad_norm": 0.5363395810127258, "learning_rate": 6.815476190476191e-05, "loss": 1.1309, "step": 229 }, { "epoch": 0.02055085219022941, "grad_norm": 0.5541881918907166, "learning_rate": 6.845238095238096e-05, "loss": 1.1629, "step": 230 }, { "epoch": 0.020640203721491276, "grad_norm": 0.5489634275436401, "learning_rate": 6.875e-05, "loss": 1.148, "step": 231 }, { "epoch": 0.020729555252753145, "grad_norm": 0.582408607006073, "learning_rate": 6.904761904761905e-05, "loss": 1.1505, "step": 232 }, { "epoch": 0.02081890678401501, "grad_norm": 0.5389354825019836, "learning_rate": 6.93452380952381e-05, "loss": 1.1644, "step": 233 }, { "epoch": 0.020908258315276877, "grad_norm": 0.6120844483375549, "learning_rate": 6.964285714285715e-05, "loss": 1.1522, "step": 234 }, { "epoch": 0.020997609846538746, "grad_norm": 0.5017001628875732, "learning_rate": 6.99404761904762e-05, "loss": 1.2153, "step": 235 }, { "epoch": 0.02108696137780061, "grad_norm": 0.5250133275985718, "learning_rate": 7.023809523809524e-05, "loss": 1.2756, "step": 236 }, { "epoch": 0.02117631290906248, "grad_norm": 0.5131645202636719, "learning_rate": 7.053571428571429e-05, "loss": 1.1909, "step": 237 }, { "epoch": 0.021265664440324347, "grad_norm": 0.5468138456344604, "learning_rate": 7.083333333333334e-05, "loss": 1.1209, "step": 238 }, { "epoch": 0.021355015971586212, "grad_norm": 0.5460432171821594, "learning_rate": 7.113095238095239e-05, "loss": 1.2472, "step": 239 }, { "epoch": 0.02144436750284808, "grad_norm": 0.5693673491477966, "learning_rate": 7.142857142857143e-05, "loss": 1.1466, "step": 240 }, { "epoch": 0.021533719034109947, "grad_norm": 0.5243647694587708, "learning_rate": 7.172619047619048e-05, "loss": 1.1108, "step": 241 }, { "epoch": 0.021623070565371813, "grad_norm": 0.51932692527771, "learning_rate": 7.202380952380953e-05, "loss": 1.2149, "step": 242 }, { "epoch": 0.021712422096633682, "grad_norm": 0.5603271722793579, "learning_rate": 7.232142857142858e-05, "loss": 1.1534, "step": 243 }, { "epoch": 0.021801773627895548, "grad_norm": 0.5533414483070374, "learning_rate": 7.261904761904762e-05, "loss": 1.1692, "step": 244 }, { "epoch": 0.021891125159157414, "grad_norm": 0.5640945434570312, "learning_rate": 7.291666666666667e-05, "loss": 1.191, "step": 245 }, { "epoch": 0.021980476690419283, "grad_norm": 0.565290629863739, "learning_rate": 7.321428571428571e-05, "loss": 1.1163, "step": 246 }, { "epoch": 0.02206982822168115, "grad_norm": 0.5642583966255188, "learning_rate": 7.351190476190477e-05, "loss": 1.1447, "step": 247 }, { "epoch": 0.022159179752943015, "grad_norm": 0.5096793174743652, "learning_rate": 7.380952380952382e-05, "loss": 1.1812, "step": 248 }, { "epoch": 0.022248531284204884, "grad_norm": 0.49320727586746216, "learning_rate": 7.410714285714286e-05, "loss": 1.1238, "step": 249 }, { "epoch": 0.02233788281546675, "grad_norm": 0.5757387280464172, "learning_rate": 7.440476190476191e-05, "loss": 1.2047, "step": 250 }, { "epoch": 0.02242723434672862, "grad_norm": 0.5943542718887329, "learning_rate": 7.470238095238096e-05, "loss": 1.146, "step": 251 }, { "epoch": 0.022516585877990485, "grad_norm": 0.5858287215232849, "learning_rate": 7.500000000000001e-05, "loss": 1.1685, "step": 252 }, { "epoch": 0.02260593740925235, "grad_norm": 0.4847318232059479, "learning_rate": 7.529761904761905e-05, "loss": 1.1975, "step": 253 }, { "epoch": 0.02269528894051422, "grad_norm": 0.5576103925704956, "learning_rate": 7.55952380952381e-05, "loss": 1.158, "step": 254 }, { "epoch": 0.022784640471776085, "grad_norm": 0.5339264869689941, "learning_rate": 7.589285714285714e-05, "loss": 1.1694, "step": 255 }, { "epoch": 0.02287399200303795, "grad_norm": 0.5801980495452881, "learning_rate": 7.619047619047618e-05, "loss": 1.1646, "step": 256 }, { "epoch": 0.02296334353429982, "grad_norm": 0.5421757698059082, "learning_rate": 7.648809523809523e-05, "loss": 1.1546, "step": 257 }, { "epoch": 0.023052695065561686, "grad_norm": 0.546415388584137, "learning_rate": 7.67857142857143e-05, "loss": 1.2262, "step": 258 }, { "epoch": 0.023142046596823552, "grad_norm": 0.584190309047699, "learning_rate": 7.708333333333334e-05, "loss": 1.1528, "step": 259 }, { "epoch": 0.02323139812808542, "grad_norm": 0.5467146635055542, "learning_rate": 7.738095238095239e-05, "loss": 1.1452, "step": 260 }, { "epoch": 0.023320749659347287, "grad_norm": 0.5554835796356201, "learning_rate": 7.767857142857144e-05, "loss": 1.1936, "step": 261 }, { "epoch": 0.023410101190609153, "grad_norm": 0.5491194725036621, "learning_rate": 7.797619047619048e-05, "loss": 1.1152, "step": 262 }, { "epoch": 0.023499452721871022, "grad_norm": 0.5814348459243774, "learning_rate": 7.827380952380953e-05, "loss": 1.1432, "step": 263 }, { "epoch": 0.023588804253132888, "grad_norm": 0.598092257976532, "learning_rate": 7.857142857142858e-05, "loss": 1.1661, "step": 264 }, { "epoch": 0.023678155784394753, "grad_norm": 0.6232405304908752, "learning_rate": 7.886904761904761e-05, "loss": 1.1545, "step": 265 }, { "epoch": 0.023767507315656623, "grad_norm": 0.53800368309021, "learning_rate": 7.916666666666666e-05, "loss": 1.2058, "step": 266 }, { "epoch": 0.02385685884691849, "grad_norm": 0.5912578105926514, "learning_rate": 7.946428571428571e-05, "loss": 1.1638, "step": 267 }, { "epoch": 0.023946210378180358, "grad_norm": 0.6035777926445007, "learning_rate": 7.976190476190477e-05, "loss": 1.1325, "step": 268 }, { "epoch": 0.024035561909442223, "grad_norm": 0.5701507925987244, "learning_rate": 8.005952380952382e-05, "loss": 1.1693, "step": 269 }, { "epoch": 0.02412491344070409, "grad_norm": 0.9521252512931824, "learning_rate": 8.035714285714287e-05, "loss": 1.1926, "step": 270 }, { "epoch": 0.02421426497196596, "grad_norm": 0.5579119324684143, "learning_rate": 8.065476190476191e-05, "loss": 1.2114, "step": 271 }, { "epoch": 0.024303616503227824, "grad_norm": 0.5646568536758423, "learning_rate": 8.095238095238096e-05, "loss": 1.1351, "step": 272 }, { "epoch": 0.02439296803448969, "grad_norm": 0.5492066740989685, "learning_rate": 8.125000000000001e-05, "loss": 1.187, "step": 273 }, { "epoch": 0.02448231956575156, "grad_norm": 0.5730810761451721, "learning_rate": 8.154761904761904e-05, "loss": 1.1687, "step": 274 }, { "epoch": 0.024571671097013425, "grad_norm": 0.5157971978187561, "learning_rate": 8.184523809523809e-05, "loss": 1.1312, "step": 275 }, { "epoch": 0.02466102262827529, "grad_norm": 0.5865328311920166, "learning_rate": 8.214285714285714e-05, "loss": 1.207, "step": 276 }, { "epoch": 0.02475037415953716, "grad_norm": 0.47492775321006775, "learning_rate": 8.244047619047619e-05, "loss": 1.1525, "step": 277 }, { "epoch": 0.024839725690799026, "grad_norm": 0.5046519041061401, "learning_rate": 8.273809523809524e-05, "loss": 1.1959, "step": 278 }, { "epoch": 0.02492907722206089, "grad_norm": 0.5498790144920349, "learning_rate": 8.30357142857143e-05, "loss": 1.1969, "step": 279 }, { "epoch": 0.02501842875332276, "grad_norm": 0.5528784394264221, "learning_rate": 8.333333333333334e-05, "loss": 1.0772, "step": 280 }, { "epoch": 0.025107780284584626, "grad_norm": 0.5229126811027527, "learning_rate": 8.363095238095239e-05, "loss": 1.1485, "step": 281 }, { "epoch": 0.025197131815846496, "grad_norm": 0.5191675424575806, "learning_rate": 8.392857142857144e-05, "loss": 1.1568, "step": 282 }, { "epoch": 0.02528648334710836, "grad_norm": 0.5272664427757263, "learning_rate": 8.422619047619049e-05, "loss": 1.1794, "step": 283 }, { "epoch": 0.025375834878370227, "grad_norm": 0.5842853784561157, "learning_rate": 8.452380952380952e-05, "loss": 1.101, "step": 284 }, { "epoch": 0.025465186409632096, "grad_norm": 0.501756489276886, "learning_rate": 8.482142857142857e-05, "loss": 1.1639, "step": 285 }, { "epoch": 0.025554537940893962, "grad_norm": 0.491220623254776, "learning_rate": 8.511904761904762e-05, "loss": 1.2092, "step": 286 }, { "epoch": 0.025643889472155828, "grad_norm": 0.5032030940055847, "learning_rate": 8.541666666666666e-05, "loss": 1.181, "step": 287 }, { "epoch": 0.025733241003417697, "grad_norm": 0.5295215249061584, "learning_rate": 8.571428571428571e-05, "loss": 1.1861, "step": 288 }, { "epoch": 0.025822592534679563, "grad_norm": 0.5146911144256592, "learning_rate": 8.601190476190477e-05, "loss": 1.1854, "step": 289 }, { "epoch": 0.02591194406594143, "grad_norm": 0.5277708172798157, "learning_rate": 8.630952380952382e-05, "loss": 1.1318, "step": 290 }, { "epoch": 0.026001295597203298, "grad_norm": 0.5158389210700989, "learning_rate": 8.660714285714287e-05, "loss": 1.1603, "step": 291 }, { "epoch": 0.026090647128465164, "grad_norm": 0.4982542395591736, "learning_rate": 8.690476190476192e-05, "loss": 1.185, "step": 292 }, { "epoch": 0.02617999865972703, "grad_norm": 0.5195929408073425, "learning_rate": 8.720238095238095e-05, "loss": 1.2306, "step": 293 }, { "epoch": 0.0262693501909889, "grad_norm": 0.48286932706832886, "learning_rate": 8.75e-05, "loss": 1.1665, "step": 294 }, { "epoch": 0.026358701722250764, "grad_norm": 0.5617235898971558, "learning_rate": 8.779761904761905e-05, "loss": 1.1484, "step": 295 }, { "epoch": 0.026448053253512634, "grad_norm": 0.5541561841964722, "learning_rate": 8.80952380952381e-05, "loss": 0.9718, "step": 296 }, { "epoch": 0.0265374047847745, "grad_norm": 0.47326454520225525, "learning_rate": 8.839285714285714e-05, "loss": 1.1289, "step": 297 }, { "epoch": 0.026626756316036365, "grad_norm": 0.5681378841400146, "learning_rate": 8.869047619047619e-05, "loss": 1.1569, "step": 298 }, { "epoch": 0.026716107847298234, "grad_norm": 0.5514600276947021, "learning_rate": 8.898809523809524e-05, "loss": 1.1495, "step": 299 }, { "epoch": 0.0268054593785601, "grad_norm": 0.5337786078453064, "learning_rate": 8.92857142857143e-05, "loss": 1.2366, "step": 300 }, { "epoch": 0.026894810909821966, "grad_norm": 0.5156247019767761, "learning_rate": 8.958333333333335e-05, "loss": 1.1969, "step": 301 }, { "epoch": 0.026984162441083835, "grad_norm": 0.5061919689178467, "learning_rate": 8.988095238095238e-05, "loss": 1.1633, "step": 302 }, { "epoch": 0.0270735139723457, "grad_norm": 0.6192631721496582, "learning_rate": 9.017857142857143e-05, "loss": 1.1552, "step": 303 }, { "epoch": 0.027162865503607567, "grad_norm": 0.5444059371948242, "learning_rate": 9.047619047619048e-05, "loss": 1.1532, "step": 304 }, { "epoch": 0.027252217034869436, "grad_norm": 0.49790215492248535, "learning_rate": 9.077380952380952e-05, "loss": 1.1339, "step": 305 }, { "epoch": 0.0273415685661313, "grad_norm": 0.5882278680801392, "learning_rate": 9.107142857142857e-05, "loss": 1.1195, "step": 306 }, { "epoch": 0.027430920097393167, "grad_norm": 0.5879011154174805, "learning_rate": 9.136904761904762e-05, "loss": 1.1341, "step": 307 }, { "epoch": 0.027520271628655037, "grad_norm": 0.5458969473838806, "learning_rate": 9.166666666666667e-05, "loss": 1.1498, "step": 308 }, { "epoch": 0.027609623159916902, "grad_norm": 0.5539296865463257, "learning_rate": 9.196428571428572e-05, "loss": 1.1261, "step": 309 }, { "epoch": 0.02769897469117877, "grad_norm": 0.508406400680542, "learning_rate": 9.226190476190478e-05, "loss": 1.1318, "step": 310 }, { "epoch": 0.027788326222440637, "grad_norm": 0.5266230702400208, "learning_rate": 9.255952380952382e-05, "loss": 1.1463, "step": 311 }, { "epoch": 0.027877677753702503, "grad_norm": 0.5170016288757324, "learning_rate": 9.285714285714286e-05, "loss": 1.1736, "step": 312 }, { "epoch": 0.027967029284964372, "grad_norm": 0.5622848272323608, "learning_rate": 9.31547619047619e-05, "loss": 1.2113, "step": 313 }, { "epoch": 0.028056380816226238, "grad_norm": 0.5831321477890015, "learning_rate": 9.345238095238095e-05, "loss": 1.1497, "step": 314 }, { "epoch": 0.028145732347488104, "grad_norm": 0.46541884541511536, "learning_rate": 9.375e-05, "loss": 1.146, "step": 315 }, { "epoch": 0.028235083878749973, "grad_norm": 0.5889435410499573, "learning_rate": 9.404761904761905e-05, "loss": 1.1694, "step": 316 }, { "epoch": 0.02832443541001184, "grad_norm": 0.49173977971076965, "learning_rate": 9.43452380952381e-05, "loss": 1.1854, "step": 317 }, { "epoch": 0.028413786941273705, "grad_norm": 0.6178017258644104, "learning_rate": 9.464285714285715e-05, "loss": 1.1072, "step": 318 }, { "epoch": 0.028503138472535574, "grad_norm": 0.5283975005149841, "learning_rate": 9.494047619047619e-05, "loss": 1.0578, "step": 319 }, { "epoch": 0.02859249000379744, "grad_norm": 0.5168601870536804, "learning_rate": 9.523809523809524e-05, "loss": 1.1003, "step": 320 }, { "epoch": 0.028681841535059305, "grad_norm": 0.4885217845439911, "learning_rate": 9.553571428571429e-05, "loss": 1.1953, "step": 321 }, { "epoch": 0.028771193066321175, "grad_norm": 0.4856249690055847, "learning_rate": 9.583333333333334e-05, "loss": 1.2207, "step": 322 }, { "epoch": 0.02886054459758304, "grad_norm": 0.5465936660766602, "learning_rate": 9.613095238095238e-05, "loss": 1.1668, "step": 323 }, { "epoch": 0.02894989612884491, "grad_norm": 0.5614081025123596, "learning_rate": 9.642857142857143e-05, "loss": 1.1091, "step": 324 }, { "epoch": 0.029039247660106775, "grad_norm": 0.5728946328163147, "learning_rate": 9.672619047619048e-05, "loss": 1.1232, "step": 325 }, { "epoch": 0.02912859919136864, "grad_norm": 0.6166178584098816, "learning_rate": 9.702380952380953e-05, "loss": 1.1756, "step": 326 }, { "epoch": 0.02921795072263051, "grad_norm": 0.5272330641746521, "learning_rate": 9.732142857142858e-05, "loss": 1.1131, "step": 327 }, { "epoch": 0.029307302253892376, "grad_norm": 0.4859834909439087, "learning_rate": 9.761904761904762e-05, "loss": 1.2047, "step": 328 }, { "epoch": 0.029396653785154242, "grad_norm": 0.5186814069747925, "learning_rate": 9.791666666666667e-05, "loss": 1.0997, "step": 329 }, { "epoch": 0.02948600531641611, "grad_norm": 0.5721623301506042, "learning_rate": 9.821428571428572e-05, "loss": 1.0986, "step": 330 }, { "epoch": 0.029575356847677977, "grad_norm": 0.5345954895019531, "learning_rate": 9.851190476190477e-05, "loss": 1.1971, "step": 331 }, { "epoch": 0.029664708378939843, "grad_norm": 0.5651283264160156, "learning_rate": 9.880952380952381e-05, "loss": 1.1753, "step": 332 }, { "epoch": 0.029754059910201712, "grad_norm": 0.6897709369659424, "learning_rate": 9.910714285714286e-05, "loss": 1.1662, "step": 333 }, { "epoch": 0.029843411441463578, "grad_norm": 0.5349520444869995, "learning_rate": 9.940476190476191e-05, "loss": 1.1446, "step": 334 }, { "epoch": 0.029932762972725444, "grad_norm": 0.5659092664718628, "learning_rate": 9.970238095238096e-05, "loss": 1.1273, "step": 335 }, { "epoch": 0.030022114503987313, "grad_norm": 0.4331722557544708, "learning_rate": 0.0001, "loss": 1.192, "step": 336 }, { "epoch": 0.03011146603524918, "grad_norm": 0.5557327270507812, "learning_rate": 9.999999790598352e-05, "loss": 1.1184, "step": 337 }, { "epoch": 0.030200817566511048, "grad_norm": 0.5717477798461914, "learning_rate": 9.999999162393425e-05, "loss": 1.1695, "step": 338 }, { "epoch": 0.030290169097772913, "grad_norm": 0.6119928359985352, "learning_rate": 9.999998115385273e-05, "loss": 1.1202, "step": 339 }, { "epoch": 0.03037952062903478, "grad_norm": 0.5220287442207336, "learning_rate": 9.999996649573982e-05, "loss": 1.0898, "step": 340 }, { "epoch": 0.03046887216029665, "grad_norm": 0.5707947015762329, "learning_rate": 9.999994764959675e-05, "loss": 1.1645, "step": 341 }, { "epoch": 0.030558223691558514, "grad_norm": 0.5686008930206299, "learning_rate": 9.99999246154251e-05, "loss": 1.1141, "step": 342 }, { "epoch": 0.03064757522282038, "grad_norm": 0.551203727722168, "learning_rate": 9.999989739322682e-05, "loss": 1.1881, "step": 343 }, { "epoch": 0.03073692675408225, "grad_norm": 0.5531251430511475, "learning_rate": 9.999986598300417e-05, "loss": 1.0881, "step": 344 }, { "epoch": 0.030826278285344115, "grad_norm": 0.5076424479484558, "learning_rate": 9.999983038475978e-05, "loss": 1.1175, "step": 345 }, { "epoch": 0.03091562981660598, "grad_norm": 0.5364487171173096, "learning_rate": 9.999979059849662e-05, "loss": 1.1934, "step": 346 }, { "epoch": 0.03100498134786785, "grad_norm": 0.557384729385376, "learning_rate": 9.999974662421805e-05, "loss": 1.1748, "step": 347 }, { "epoch": 0.031094332879129716, "grad_norm": 0.5250005125999451, "learning_rate": 9.999969846192774e-05, "loss": 1.1338, "step": 348 }, { "epoch": 0.03118368441039158, "grad_norm": 0.4806252121925354, "learning_rate": 9.999964611162974e-05, "loss": 1.1639, "step": 349 }, { "epoch": 0.03127303594165345, "grad_norm": 0.520580530166626, "learning_rate": 9.99995895733284e-05, "loss": 1.1534, "step": 350 }, { "epoch": 0.03136238747291532, "grad_norm": 0.5017713308334351, "learning_rate": 9.999952884702848e-05, "loss": 1.1473, "step": 351 }, { "epoch": 0.031451739004177186, "grad_norm": 0.5286357402801514, "learning_rate": 9.999946393273506e-05, "loss": 1.179, "step": 352 }, { "epoch": 0.03154109053543905, "grad_norm": 0.5509297251701355, "learning_rate": 9.999939483045359e-05, "loss": 1.1269, "step": 353 }, { "epoch": 0.03163044206670092, "grad_norm": 0.4697697162628174, "learning_rate": 9.999932154018983e-05, "loss": 1.1569, "step": 354 }, { "epoch": 0.031719793597962787, "grad_norm": 0.4342000484466553, "learning_rate": 9.999924406194996e-05, "loss": 1.1648, "step": 355 }, { "epoch": 0.03180914512922465, "grad_norm": 0.5409436225891113, "learning_rate": 9.999916239574043e-05, "loss": 1.0954, "step": 356 }, { "epoch": 0.03189849666048652, "grad_norm": 0.529708981513977, "learning_rate": 9.99990765415681e-05, "loss": 1.1114, "step": 357 }, { "epoch": 0.03198784819174839, "grad_norm": 0.5057774186134338, "learning_rate": 9.999898649944016e-05, "loss": 1.1176, "step": 358 }, { "epoch": 0.032077199723010257, "grad_norm": 0.4980236291885376, "learning_rate": 9.999889226936415e-05, "loss": 1.1574, "step": 359 }, { "epoch": 0.03216655125427212, "grad_norm": 0.49295690655708313, "learning_rate": 9.999879385134797e-05, "loss": 1.1922, "step": 360 }, { "epoch": 0.03225590278553399, "grad_norm": 0.4822183847427368, "learning_rate": 9.999869124539984e-05, "loss": 1.0773, "step": 361 }, { "epoch": 0.03234525431679586, "grad_norm": 0.5717249512672424, "learning_rate": 9.999858445152839e-05, "loss": 1.1254, "step": 362 }, { "epoch": 0.03243460584805772, "grad_norm": 0.49593716859817505, "learning_rate": 9.999847346974253e-05, "loss": 1.1105, "step": 363 }, { "epoch": 0.03252395737931959, "grad_norm": 0.5179683566093445, "learning_rate": 9.999835830005158e-05, "loss": 1.0686, "step": 364 }, { "epoch": 0.03261330891058146, "grad_norm": 0.5655368566513062, "learning_rate": 9.999823894246517e-05, "loss": 1.0889, "step": 365 }, { "epoch": 0.03270266044184332, "grad_norm": 0.49578356742858887, "learning_rate": 9.999811539699331e-05, "loss": 1.1124, "step": 366 }, { "epoch": 0.03279201197310519, "grad_norm": 0.5003335475921631, "learning_rate": 9.999798766364634e-05, "loss": 1.1669, "step": 367 }, { "epoch": 0.03288136350436706, "grad_norm": 0.5258259177207947, "learning_rate": 9.999785574243496e-05, "loss": 1.1333, "step": 368 }, { "epoch": 0.03297071503562892, "grad_norm": 0.5132459998130798, "learning_rate": 9.999771963337024e-05, "loss": 1.1795, "step": 369 }, { "epoch": 0.03306006656689079, "grad_norm": 0.48852595686912537, "learning_rate": 9.999757933646354e-05, "loss": 1.127, "step": 370 }, { "epoch": 0.03314941809815266, "grad_norm": 0.530531108379364, "learning_rate": 9.999743485172666e-05, "loss": 1.1424, "step": 371 }, { "epoch": 0.03323876962941452, "grad_norm": 0.45526745915412903, "learning_rate": 9.999728617917165e-05, "loss": 1.213, "step": 372 }, { "epoch": 0.03332812116067639, "grad_norm": 0.5421516299247742, "learning_rate": 9.9997133318811e-05, "loss": 1.1793, "step": 373 }, { "epoch": 0.03341747269193826, "grad_norm": 0.5866374373435974, "learning_rate": 9.999697627065752e-05, "loss": 1.0692, "step": 374 }, { "epoch": 0.03350682422320012, "grad_norm": 0.5176992416381836, "learning_rate": 9.999681503472433e-05, "loss": 1.1502, "step": 375 }, { "epoch": 0.03359617575446199, "grad_norm": 0.5438038110733032, "learning_rate": 9.999664961102495e-05, "loss": 1.1342, "step": 376 }, { "epoch": 0.03368552728572386, "grad_norm": 0.5158547759056091, "learning_rate": 9.999647999957325e-05, "loss": 1.1954, "step": 377 }, { "epoch": 0.03377487881698572, "grad_norm": 0.46927282214164734, "learning_rate": 9.999630620038343e-05, "loss": 1.1292, "step": 378 }, { "epoch": 0.03386423034824759, "grad_norm": 0.5032052397727966, "learning_rate": 9.999612821347003e-05, "loss": 1.108, "step": 379 }, { "epoch": 0.03395358187950946, "grad_norm": 0.48456865549087524, "learning_rate": 9.999594603884798e-05, "loss": 1.1719, "step": 380 }, { "epoch": 0.034042933410771324, "grad_norm": 0.49805065989494324, "learning_rate": 9.999575967653252e-05, "loss": 1.2259, "step": 381 }, { "epoch": 0.03413228494203319, "grad_norm": 0.5582330226898193, "learning_rate": 9.999556912653929e-05, "loss": 1.2235, "step": 382 }, { "epoch": 0.03422163647329506, "grad_norm": 0.5083693265914917, "learning_rate": 9.999537438888423e-05, "loss": 1.1593, "step": 383 }, { "epoch": 0.034310988004556925, "grad_norm": 0.49797725677490234, "learning_rate": 9.999517546358364e-05, "loss": 1.1255, "step": 384 }, { "epoch": 0.034400339535818794, "grad_norm": 0.5025597214698792, "learning_rate": 9.999497235065418e-05, "loss": 1.093, "step": 385 }, { "epoch": 0.03448969106708066, "grad_norm": 0.49305295944213867, "learning_rate": 9.999476505011289e-05, "loss": 1.1264, "step": 386 }, { "epoch": 0.034579042598342526, "grad_norm": 0.46636465191841125, "learning_rate": 9.999455356197713e-05, "loss": 1.0769, "step": 387 }, { "epoch": 0.034668394129604395, "grad_norm": 0.5070281028747559, "learning_rate": 9.999433788626461e-05, "loss": 1.0925, "step": 388 }, { "epoch": 0.034757745660866264, "grad_norm": 0.5060571432113647, "learning_rate": 9.999411802299339e-05, "loss": 1.2208, "step": 389 }, { "epoch": 0.03484709719212813, "grad_norm": 0.5558078289031982, "learning_rate": 9.999389397218186e-05, "loss": 1.1395, "step": 390 }, { "epoch": 0.034936448723389996, "grad_norm": 0.49151378870010376, "learning_rate": 9.999366573384884e-05, "loss": 1.1426, "step": 391 }, { "epoch": 0.035025800254651865, "grad_norm": 0.4773429036140442, "learning_rate": 9.99934333080134e-05, "loss": 1.1587, "step": 392 }, { "epoch": 0.035115151785913734, "grad_norm": 0.43944311141967773, "learning_rate": 9.999319669469505e-05, "loss": 1.2301, "step": 393 }, { "epoch": 0.035204503317175596, "grad_norm": 0.4840754270553589, "learning_rate": 9.999295589391358e-05, "loss": 1.1404, "step": 394 }, { "epoch": 0.035293854848437466, "grad_norm": 0.49783623218536377, "learning_rate": 9.999271090568918e-05, "loss": 1.0676, "step": 395 }, { "epoch": 0.035383206379699335, "grad_norm": 0.47931092977523804, "learning_rate": 9.999246173004233e-05, "loss": 1.1528, "step": 396 }, { "epoch": 0.0354725579109612, "grad_norm": 0.47932660579681396, "learning_rate": 9.999220836699395e-05, "loss": 1.1604, "step": 397 }, { "epoch": 0.035561909442223066, "grad_norm": 0.5339416861534119, "learning_rate": 9.999195081656522e-05, "loss": 1.097, "step": 398 }, { "epoch": 0.035651260973484936, "grad_norm": 0.5252090096473694, "learning_rate": 9.999168907877776e-05, "loss": 1.085, "step": 399 }, { "epoch": 0.0357406125047468, "grad_norm": 0.5167770981788635, "learning_rate": 9.999142315365345e-05, "loss": 1.1563, "step": 400 }, { "epoch": 0.03582996403600867, "grad_norm": 0.6297523975372314, "learning_rate": 9.999115304121457e-05, "loss": 1.0762, "step": 401 }, { "epoch": 0.035919315567270536, "grad_norm": 0.5326511859893799, "learning_rate": 9.999087874148379e-05, "loss": 1.1082, "step": 402 }, { "epoch": 0.0360086670985324, "grad_norm": 0.5279747843742371, "learning_rate": 9.999060025448403e-05, "loss": 1.1412, "step": 403 }, { "epoch": 0.03609801862979427, "grad_norm": 0.4827171564102173, "learning_rate": 9.999031758023863e-05, "loss": 1.1705, "step": 404 }, { "epoch": 0.03618737016105614, "grad_norm": 0.5073494911193848, "learning_rate": 9.999003071877129e-05, "loss": 1.1579, "step": 405 }, { "epoch": 0.036276721692318, "grad_norm": 0.4927610158920288, "learning_rate": 9.9989739670106e-05, "loss": 1.1158, "step": 406 }, { "epoch": 0.03636607322357987, "grad_norm": 0.45825162529945374, "learning_rate": 9.998944443426719e-05, "loss": 1.1761, "step": 407 }, { "epoch": 0.03645542475484174, "grad_norm": 0.5363956093788147, "learning_rate": 9.998914501127954e-05, "loss": 1.15, "step": 408 }, { "epoch": 0.0365447762861036, "grad_norm": 0.5403926968574524, "learning_rate": 9.998884140116816e-05, "loss": 1.0831, "step": 409 }, { "epoch": 0.03663412781736547, "grad_norm": 0.5043397545814514, "learning_rate": 9.998853360395846e-05, "loss": 1.0479, "step": 410 }, { "epoch": 0.03672347934862734, "grad_norm": 0.5072253346443176, "learning_rate": 9.998822161967623e-05, "loss": 1.0596, "step": 411 }, { "epoch": 0.0368128308798892, "grad_norm": 0.4440356492996216, "learning_rate": 9.99879054483476e-05, "loss": 1.1423, "step": 412 }, { "epoch": 0.03690218241115107, "grad_norm": 0.5258128046989441, "learning_rate": 9.998758508999906e-05, "loss": 1.1072, "step": 413 }, { "epoch": 0.03699153394241294, "grad_norm": 0.48225632309913635, "learning_rate": 9.998726054465744e-05, "loss": 1.1436, "step": 414 }, { "epoch": 0.0370808854736748, "grad_norm": 0.49311602115631104, "learning_rate": 9.998693181234992e-05, "loss": 1.0847, "step": 415 }, { "epoch": 0.03717023700493667, "grad_norm": 0.46585437655448914, "learning_rate": 9.998659889310406e-05, "loss": 1.1562, "step": 416 }, { "epoch": 0.03725958853619854, "grad_norm": 0.5066165924072266, "learning_rate": 9.99862617869477e-05, "loss": 1.1278, "step": 417 }, { "epoch": 0.03734894006746041, "grad_norm": 0.4618039131164551, "learning_rate": 9.998592049390911e-05, "loss": 1.1738, "step": 418 }, { "epoch": 0.03743829159872227, "grad_norm": 0.4884462356567383, "learning_rate": 9.998557501401687e-05, "loss": 1.1826, "step": 419 }, { "epoch": 0.03752764312998414, "grad_norm": 0.4807124137878418, "learning_rate": 9.99852253472999e-05, "loss": 1.1397, "step": 420 }, { "epoch": 0.03761699466124601, "grad_norm": 0.4974918067455292, "learning_rate": 9.998487149378752e-05, "loss": 1.1481, "step": 421 }, { "epoch": 0.03770634619250787, "grad_norm": 0.42765894532203674, "learning_rate": 9.998451345350935e-05, "loss": 1.1967, "step": 422 }, { "epoch": 0.03779569772376974, "grad_norm": 0.509459912776947, "learning_rate": 9.998415122649537e-05, "loss": 1.1086, "step": 423 }, { "epoch": 0.03788504925503161, "grad_norm": 0.4979933202266693, "learning_rate": 9.998378481277593e-05, "loss": 1.1003, "step": 424 }, { "epoch": 0.03797440078629347, "grad_norm": 0.4624142348766327, "learning_rate": 9.998341421238173e-05, "loss": 1.1323, "step": 425 }, { "epoch": 0.03806375231755534, "grad_norm": 0.4574481248855591, "learning_rate": 9.998303942534382e-05, "loss": 1.1195, "step": 426 }, { "epoch": 0.03815310384881721, "grad_norm": 0.542635977268219, "learning_rate": 9.998266045169356e-05, "loss": 1.1185, "step": 427 }, { "epoch": 0.038242455380079074, "grad_norm": 0.48302701115608215, "learning_rate": 9.99822772914627e-05, "loss": 1.13, "step": 428 }, { "epoch": 0.03833180691134094, "grad_norm": 0.4780206084251404, "learning_rate": 9.998188994468337e-05, "loss": 1.0496, "step": 429 }, { "epoch": 0.03842115844260281, "grad_norm": 0.5406165719032288, "learning_rate": 9.998149841138797e-05, "loss": 1.0936, "step": 430 }, { "epoch": 0.038510509973864675, "grad_norm": 0.4520138204097748, "learning_rate": 9.998110269160932e-05, "loss": 1.2128, "step": 431 }, { "epoch": 0.038599861505126544, "grad_norm": 0.4702879786491394, "learning_rate": 9.998070278538057e-05, "loss": 1.2563, "step": 432 }, { "epoch": 0.03868921303638841, "grad_norm": 0.5223757028579712, "learning_rate": 9.998029869273518e-05, "loss": 1.091, "step": 433 }, { "epoch": 0.038778564567650275, "grad_norm": 0.5411685705184937, "learning_rate": 9.997989041370704e-05, "loss": 1.1356, "step": 434 }, { "epoch": 0.038867916098912145, "grad_norm": 0.48428136110305786, "learning_rate": 9.997947794833034e-05, "loss": 1.2075, "step": 435 }, { "epoch": 0.038957267630174014, "grad_norm": 0.4824248254299164, "learning_rate": 9.997906129663961e-05, "loss": 1.1918, "step": 436 }, { "epoch": 0.039046619161435876, "grad_norm": 0.5200150609016418, "learning_rate": 9.997864045866975e-05, "loss": 1.1364, "step": 437 }, { "epoch": 0.039135970692697745, "grad_norm": 0.5119284391403198, "learning_rate": 9.997821543445602e-05, "loss": 1.1136, "step": 438 }, { "epoch": 0.039225322223959615, "grad_norm": 0.4953431785106659, "learning_rate": 9.997778622403402e-05, "loss": 1.2217, "step": 439 }, { "epoch": 0.03931467375522148, "grad_norm": 0.49554356932640076, "learning_rate": 9.997735282743969e-05, "loss": 1.1535, "step": 440 }, { "epoch": 0.039404025286483346, "grad_norm": 0.5138264894485474, "learning_rate": 9.997691524470936e-05, "loss": 0.9905, "step": 441 }, { "epoch": 0.039493376817745215, "grad_norm": 0.4627537727355957, "learning_rate": 9.997647347587964e-05, "loss": 1.1075, "step": 442 }, { "epoch": 0.03958272834900708, "grad_norm": 0.5191687941551208, "learning_rate": 9.997602752098758e-05, "loss": 1.172, "step": 443 }, { "epoch": 0.03967207988026895, "grad_norm": 0.4805525839328766, "learning_rate": 9.997557738007049e-05, "loss": 1.154, "step": 444 }, { "epoch": 0.039761431411530816, "grad_norm": 0.5130792260169983, "learning_rate": 9.99751230531661e-05, "loss": 1.0842, "step": 445 }, { "epoch": 0.039850782942792685, "grad_norm": 0.4610874354839325, "learning_rate": 9.997466454031246e-05, "loss": 1.0973, "step": 446 }, { "epoch": 0.03994013447405455, "grad_norm": 0.43982231616973877, "learning_rate": 9.997420184154798e-05, "loss": 1.156, "step": 447 }, { "epoch": 0.04002948600531642, "grad_norm": 0.49081990122795105, "learning_rate": 9.99737349569114e-05, "loss": 1.0796, "step": 448 }, { "epoch": 0.040118837536578286, "grad_norm": 0.5369452238082886, "learning_rate": 9.997326388644183e-05, "loss": 1.0677, "step": 449 }, { "epoch": 0.04020818906784015, "grad_norm": 0.4784664213657379, "learning_rate": 9.997278863017874e-05, "loss": 1.0792, "step": 450 }, { "epoch": 0.04029754059910202, "grad_norm": 0.53533935546875, "learning_rate": 9.997230918816191e-05, "loss": 1.0963, "step": 451 }, { "epoch": 0.04038689213036389, "grad_norm": 0.5231233239173889, "learning_rate": 9.997182556043155e-05, "loss": 1.1181, "step": 452 }, { "epoch": 0.04047624366162575, "grad_norm": 0.4834752380847931, "learning_rate": 9.997133774702812e-05, "loss": 1.1174, "step": 453 }, { "epoch": 0.04056559519288762, "grad_norm": 0.48885536193847656, "learning_rate": 9.997084574799252e-05, "loss": 1.0655, "step": 454 }, { "epoch": 0.04065494672414949, "grad_norm": 0.4479488730430603, "learning_rate": 9.99703495633659e-05, "loss": 1.1568, "step": 455 }, { "epoch": 0.04074429825541135, "grad_norm": 0.5108731985092163, "learning_rate": 9.996984919318989e-05, "loss": 1.1712, "step": 456 }, { "epoch": 0.04083364978667322, "grad_norm": 0.5030118227005005, "learning_rate": 9.996934463750636e-05, "loss": 1.0666, "step": 457 }, { "epoch": 0.04092300131793509, "grad_norm": 0.5227623581886292, "learning_rate": 9.996883589635757e-05, "loss": 1.1652, "step": 458 }, { "epoch": 0.04101235284919695, "grad_norm": 0.4760054349899292, "learning_rate": 9.996832296978616e-05, "loss": 1.151, "step": 459 }, { "epoch": 0.04110170438045882, "grad_norm": 0.42174920439720154, "learning_rate": 9.996780585783508e-05, "loss": 1.1306, "step": 460 }, { "epoch": 0.04119105591172069, "grad_norm": 0.42818742990493774, "learning_rate": 9.996728456054762e-05, "loss": 1.1775, "step": 461 }, { "epoch": 0.04128040744298255, "grad_norm": 0.48023608326911926, "learning_rate": 9.996675907796749e-05, "loss": 1.1809, "step": 462 }, { "epoch": 0.04136975897424442, "grad_norm": 0.45978084206581116, "learning_rate": 9.996622941013867e-05, "loss": 1.2312, "step": 463 }, { "epoch": 0.04145911050550629, "grad_norm": 0.4810321629047394, "learning_rate": 9.996569555710553e-05, "loss": 1.0946, "step": 464 }, { "epoch": 0.04154846203676815, "grad_norm": 0.43682757019996643, "learning_rate": 9.996515751891279e-05, "loss": 1.1556, "step": 465 }, { "epoch": 0.04163781356803002, "grad_norm": 0.4703795313835144, "learning_rate": 9.996461529560553e-05, "loss": 1.1268, "step": 466 }, { "epoch": 0.04172716509929189, "grad_norm": 0.44790583848953247, "learning_rate": 9.996406888722914e-05, "loss": 1.0907, "step": 467 }, { "epoch": 0.04181651663055375, "grad_norm": 0.5048056840896606, "learning_rate": 9.996351829382941e-05, "loss": 1.0709, "step": 468 }, { "epoch": 0.04190586816181562, "grad_norm": 0.4316783845424652, "learning_rate": 9.996296351545244e-05, "loss": 1.1355, "step": 469 }, { "epoch": 0.04199521969307749, "grad_norm": 0.4551528990268707, "learning_rate": 9.996240455214472e-05, "loss": 1.0943, "step": 470 }, { "epoch": 0.042084571224339354, "grad_norm": 0.4864242970943451, "learning_rate": 9.996184140395306e-05, "loss": 1.0194, "step": 471 }, { "epoch": 0.04217392275560122, "grad_norm": 0.5039882659912109, "learning_rate": 9.996127407092462e-05, "loss": 1.1392, "step": 472 }, { "epoch": 0.04226327428686309, "grad_norm": 0.5886643528938293, "learning_rate": 9.996070255310692e-05, "loss": 1.0797, "step": 473 }, { "epoch": 0.04235262581812496, "grad_norm": 0.5036435723304749, "learning_rate": 9.996012685054786e-05, "loss": 1.1886, "step": 474 }, { "epoch": 0.042441977349386824, "grad_norm": 0.4894576370716095, "learning_rate": 9.995954696329562e-05, "loss": 1.1631, "step": 475 }, { "epoch": 0.04253132888064869, "grad_norm": 0.4920431673526764, "learning_rate": 9.99589628913988e-05, "loss": 1.1382, "step": 476 }, { "epoch": 0.04262068041191056, "grad_norm": 0.42358994483947754, "learning_rate": 9.995837463490632e-05, "loss": 1.1086, "step": 477 }, { "epoch": 0.042710031943172425, "grad_norm": 0.4317459464073181, "learning_rate": 9.995778219386744e-05, "loss": 1.132, "step": 478 }, { "epoch": 0.042799383474434294, "grad_norm": 0.4853540062904358, "learning_rate": 9.995718556833178e-05, "loss": 1.0694, "step": 479 }, { "epoch": 0.04288873500569616, "grad_norm": 0.49975448846817017, "learning_rate": 9.995658475834933e-05, "loss": 1.1211, "step": 480 }, { "epoch": 0.042978086536958025, "grad_norm": 0.4265710115432739, "learning_rate": 9.995597976397042e-05, "loss": 1.1266, "step": 481 }, { "epoch": 0.043067438068219895, "grad_norm": 0.5126653909683228, "learning_rate": 9.995537058524569e-05, "loss": 1.1113, "step": 482 }, { "epoch": 0.043156789599481764, "grad_norm": 0.5130075812339783, "learning_rate": 9.99547572222262e-05, "loss": 1.1382, "step": 483 }, { "epoch": 0.043246141130743626, "grad_norm": 0.46531277894973755, "learning_rate": 9.995413967496333e-05, "loss": 1.0806, "step": 484 }, { "epoch": 0.043335492662005495, "grad_norm": 0.4815559983253479, "learning_rate": 9.995351794350876e-05, "loss": 1.136, "step": 485 }, { "epoch": 0.043424844193267365, "grad_norm": 0.417111873626709, "learning_rate": 9.99528920279146e-05, "loss": 1.158, "step": 486 }, { "epoch": 0.04351419572452923, "grad_norm": 0.4570912718772888, "learning_rate": 9.995226192823329e-05, "loss": 1.134, "step": 487 }, { "epoch": 0.043603547255791096, "grad_norm": 0.5166110992431641, "learning_rate": 9.995162764451758e-05, "loss": 1.1111, "step": 488 }, { "epoch": 0.043692898787052965, "grad_norm": 0.4371122419834137, "learning_rate": 9.99509891768206e-05, "loss": 1.1687, "step": 489 }, { "epoch": 0.04378225031831483, "grad_norm": 0.5123320817947388, "learning_rate": 9.995034652519586e-05, "loss": 1.1159, "step": 490 }, { "epoch": 0.0438716018495767, "grad_norm": 0.5270020961761475, "learning_rate": 9.994969968969715e-05, "loss": 1.0321, "step": 491 }, { "epoch": 0.043960953380838566, "grad_norm": 0.43343213200569153, "learning_rate": 9.994904867037867e-05, "loss": 1.1311, "step": 492 }, { "epoch": 0.04405030491210043, "grad_norm": 0.5779858827590942, "learning_rate": 9.994839346729495e-05, "loss": 1.0312, "step": 493 }, { "epoch": 0.0441396564433623, "grad_norm": 0.4757930338382721, "learning_rate": 9.994773408050084e-05, "loss": 1.1148, "step": 494 }, { "epoch": 0.04422900797462417, "grad_norm": 0.483888179063797, "learning_rate": 9.994707051005164e-05, "loss": 1.1387, "step": 495 }, { "epoch": 0.04431835950588603, "grad_norm": 0.4878624677658081, "learning_rate": 9.994640275600285e-05, "loss": 1.1294, "step": 496 }, { "epoch": 0.0444077110371479, "grad_norm": 0.5229454040527344, "learning_rate": 9.994573081841046e-05, "loss": 1.0685, "step": 497 }, { "epoch": 0.04449706256840977, "grad_norm": 0.49779602885246277, "learning_rate": 9.994505469733071e-05, "loss": 1.1596, "step": 498 }, { "epoch": 0.04458641409967163, "grad_norm": 0.5035576224327087, "learning_rate": 9.994437439282027e-05, "loss": 1.1966, "step": 499 }, { "epoch": 0.0446757656309335, "grad_norm": 0.4735001027584076, "learning_rate": 9.99436899049361e-05, "loss": 1.1213, "step": 500 }, { "epoch": 0.04476511716219537, "grad_norm": 0.5072652697563171, "learning_rate": 9.994300123373554e-05, "loss": 1.1671, "step": 501 }, { "epoch": 0.04485446869345724, "grad_norm": 0.4931294620037079, "learning_rate": 9.994230837927627e-05, "loss": 1.1065, "step": 502 }, { "epoch": 0.0449438202247191, "grad_norm": 0.4530344307422638, "learning_rate": 9.994161134161634e-05, "loss": 1.1528, "step": 503 }, { "epoch": 0.04503317175598097, "grad_norm": 0.5151768326759338, "learning_rate": 9.99409101208141e-05, "loss": 0.9932, "step": 504 }, { "epoch": 0.04512252328724284, "grad_norm": 0.5038042068481445, "learning_rate": 9.994020471692833e-05, "loss": 1.1132, "step": 505 }, { "epoch": 0.0452118748185047, "grad_norm": 0.47841110825538635, "learning_rate": 9.993949513001807e-05, "loss": 1.1563, "step": 506 }, { "epoch": 0.04530122634976657, "grad_norm": 0.5167407989501953, "learning_rate": 9.993878136014278e-05, "loss": 1.1383, "step": 507 }, { "epoch": 0.04539057788102844, "grad_norm": 0.5208450555801392, "learning_rate": 9.993806340736225e-05, "loss": 1.1225, "step": 508 }, { "epoch": 0.0454799294122903, "grad_norm": 0.5085687637329102, "learning_rate": 9.99373412717366e-05, "loss": 1.1747, "step": 509 }, { "epoch": 0.04556928094355217, "grad_norm": 0.5266485214233398, "learning_rate": 9.993661495332633e-05, "loss": 1.1311, "step": 510 }, { "epoch": 0.04565863247481404, "grad_norm": 0.5289018154144287, "learning_rate": 9.993588445219227e-05, "loss": 1.114, "step": 511 }, { "epoch": 0.0457479840060759, "grad_norm": 0.4422353208065033, "learning_rate": 9.99351497683956e-05, "loss": 1.178, "step": 512 }, { "epoch": 0.04583733553733777, "grad_norm": 0.5575171113014221, "learning_rate": 9.993441090199787e-05, "loss": 1.0279, "step": 513 }, { "epoch": 0.04592668706859964, "grad_norm": 0.53556227684021, "learning_rate": 9.993366785306097e-05, "loss": 1.1212, "step": 514 }, { "epoch": 0.0460160385998615, "grad_norm": 0.5543893575668335, "learning_rate": 9.993292062164714e-05, "loss": 1.0113, "step": 515 }, { "epoch": 0.04610539013112337, "grad_norm": 0.5223544239997864, "learning_rate": 9.993216920781894e-05, "loss": 1.0224, "step": 516 }, { "epoch": 0.04619474166238524, "grad_norm": 0.5060791969299316, "learning_rate": 9.993141361163935e-05, "loss": 1.1628, "step": 517 }, { "epoch": 0.046284093193647104, "grad_norm": 0.4724702537059784, "learning_rate": 9.993065383317163e-05, "loss": 1.1255, "step": 518 }, { "epoch": 0.04637344472490897, "grad_norm": 0.47285956144332886, "learning_rate": 9.992988987247944e-05, "loss": 1.1659, "step": 519 }, { "epoch": 0.04646279625617084, "grad_norm": 0.4982796013355255, "learning_rate": 9.992912172962674e-05, "loss": 1.1952, "step": 520 }, { "epoch": 0.046552147787432704, "grad_norm": 0.48061710596084595, "learning_rate": 9.99283494046779e-05, "loss": 1.1388, "step": 521 }, { "epoch": 0.046641499318694574, "grad_norm": 0.45872175693511963, "learning_rate": 9.99275728976976e-05, "loss": 1.1262, "step": 522 }, { "epoch": 0.04673085084995644, "grad_norm": 0.4480443000793457, "learning_rate": 9.992679220875088e-05, "loss": 1.1235, "step": 523 }, { "epoch": 0.046820202381218305, "grad_norm": 0.43175145983695984, "learning_rate": 9.992600733790314e-05, "loss": 1.1185, "step": 524 }, { "epoch": 0.046909553912480174, "grad_norm": 0.41958916187286377, "learning_rate": 9.99252182852201e-05, "loss": 1.1661, "step": 525 }, { "epoch": 0.046998905443742044, "grad_norm": 0.4344329237937927, "learning_rate": 9.992442505076787e-05, "loss": 1.1364, "step": 526 }, { "epoch": 0.047088256975003906, "grad_norm": 0.4704360067844391, "learning_rate": 9.992362763461287e-05, "loss": 1.0588, "step": 527 }, { "epoch": 0.047177608506265775, "grad_norm": 0.4403749704360962, "learning_rate": 9.992282603682192e-05, "loss": 1.1517, "step": 528 }, { "epoch": 0.047266960037527644, "grad_norm": 0.4906516969203949, "learning_rate": 9.992202025746215e-05, "loss": 1.0916, "step": 529 }, { "epoch": 0.04735631156878951, "grad_norm": 0.5123149752616882, "learning_rate": 9.992121029660106e-05, "loss": 1.0794, "step": 530 }, { "epoch": 0.047445663100051376, "grad_norm": 0.46089616417884827, "learning_rate": 9.992039615430648e-05, "loss": 1.1846, "step": 531 }, { "epoch": 0.047535014631313245, "grad_norm": 0.5317028760910034, "learning_rate": 9.99195778306466e-05, "loss": 1.1289, "step": 532 }, { "epoch": 0.047624366162575114, "grad_norm": 0.48291924595832825, "learning_rate": 9.991875532568999e-05, "loss": 1.1427, "step": 533 }, { "epoch": 0.04771371769383698, "grad_norm": 0.46745729446411133, "learning_rate": 9.991792863950552e-05, "loss": 1.1027, "step": 534 }, { "epoch": 0.047803069225098846, "grad_norm": 0.4555657207965851, "learning_rate": 9.991709777216242e-05, "loss": 1.0926, "step": 535 }, { "epoch": 0.047892420756360715, "grad_norm": 0.4779694080352783, "learning_rate": 9.991626272373033e-05, "loss": 1.0919, "step": 536 }, { "epoch": 0.04798177228762258, "grad_norm": 0.4468933045864105, "learning_rate": 9.991542349427916e-05, "loss": 1.1903, "step": 537 }, { "epoch": 0.04807112381888445, "grad_norm": 0.5170602202415466, "learning_rate": 9.99145800838792e-05, "loss": 1.0183, "step": 538 }, { "epoch": 0.048160475350146316, "grad_norm": 0.4570893347263336, "learning_rate": 9.991373249260112e-05, "loss": 1.0834, "step": 539 }, { "epoch": 0.04824982688140818, "grad_norm": 0.4547278881072998, "learning_rate": 9.99128807205159e-05, "loss": 1.1534, "step": 540 }, { "epoch": 0.04833917841267005, "grad_norm": 0.47675079107284546, "learning_rate": 9.991202476769488e-05, "loss": 1.0934, "step": 541 }, { "epoch": 0.04842852994393192, "grad_norm": 0.4400666654109955, "learning_rate": 9.991116463420976e-05, "loss": 1.0891, "step": 542 }, { "epoch": 0.04851788147519378, "grad_norm": 0.4726406931877136, "learning_rate": 9.99103003201326e-05, "loss": 1.0805, "step": 543 }, { "epoch": 0.04860723300645565, "grad_norm": 0.484070748090744, "learning_rate": 9.990943182553579e-05, "loss": 1.0694, "step": 544 }, { "epoch": 0.04869658453771752, "grad_norm": 0.5411748290061951, "learning_rate": 9.990855915049204e-05, "loss": 1.0275, "step": 545 }, { "epoch": 0.04878593606897938, "grad_norm": 0.46557214856147766, "learning_rate": 9.990768229507447e-05, "loss": 1.2306, "step": 546 }, { "epoch": 0.04887528760024125, "grad_norm": 0.5048271417617798, "learning_rate": 9.990680125935657e-05, "loss": 1.063, "step": 547 }, { "epoch": 0.04896463913150312, "grad_norm": 0.4694403409957886, "learning_rate": 9.990591604341206e-05, "loss": 1.117, "step": 548 }, { "epoch": 0.04905399066276498, "grad_norm": 0.5101834535598755, "learning_rate": 9.990502664731515e-05, "loss": 1.096, "step": 549 }, { "epoch": 0.04914334219402685, "grad_norm": 0.4212850332260132, "learning_rate": 9.99041330711403e-05, "loss": 1.2322, "step": 550 }, { "epoch": 0.04923269372528872, "grad_norm": 0.4730430245399475, "learning_rate": 9.990323531496235e-05, "loss": 1.1706, "step": 551 }, { "epoch": 0.04932204525655058, "grad_norm": 0.4167949855327606, "learning_rate": 9.990233337885652e-05, "loss": 1.1796, "step": 552 }, { "epoch": 0.04941139678781245, "grad_norm": 0.44390869140625, "learning_rate": 9.990142726289837e-05, "loss": 1.1358, "step": 553 }, { "epoch": 0.04950074831907432, "grad_norm": 0.47111496329307556, "learning_rate": 9.990051696716375e-05, "loss": 1.1188, "step": 554 }, { "epoch": 0.04959009985033618, "grad_norm": 0.40082886815071106, "learning_rate": 9.989960249172894e-05, "loss": 1.1225, "step": 555 }, { "epoch": 0.04967945138159805, "grad_norm": 0.42682352662086487, "learning_rate": 9.989868383667054e-05, "loss": 1.0989, "step": 556 }, { "epoch": 0.04976880291285992, "grad_norm": 0.45663878321647644, "learning_rate": 9.989776100206548e-05, "loss": 1.1296, "step": 557 }, { "epoch": 0.04985815444412178, "grad_norm": 0.5189902782440186, "learning_rate": 9.989683398799106e-05, "loss": 1.0356, "step": 558 }, { "epoch": 0.04994750597538365, "grad_norm": 0.41676539182662964, "learning_rate": 9.989590279452492e-05, "loss": 1.0858, "step": 559 }, { "epoch": 0.05003685750664552, "grad_norm": 0.47391507029533386, "learning_rate": 9.989496742174509e-05, "loss": 1.1989, "step": 560 }, { "epoch": 0.05012620903790739, "grad_norm": 0.48219195008277893, "learning_rate": 9.989402786972988e-05, "loss": 1.0994, "step": 561 }, { "epoch": 0.05021556056916925, "grad_norm": 0.5182317495346069, "learning_rate": 9.989308413855802e-05, "loss": 1.068, "step": 562 }, { "epoch": 0.05030491210043112, "grad_norm": 0.44291019439697266, "learning_rate": 9.989213622830853e-05, "loss": 1.0893, "step": 563 }, { "epoch": 0.05039426363169299, "grad_norm": 0.4342930018901825, "learning_rate": 9.989118413906082e-05, "loss": 1.1266, "step": 564 }, { "epoch": 0.050483615162954854, "grad_norm": 0.4785180985927582, "learning_rate": 9.989022787089463e-05, "loss": 1.1128, "step": 565 }, { "epoch": 0.05057296669421672, "grad_norm": 0.4673145115375519, "learning_rate": 9.988926742389009e-05, "loss": 1.087, "step": 566 }, { "epoch": 0.05066231822547859, "grad_norm": 0.42585358023643494, "learning_rate": 9.98883027981276e-05, "loss": 1.1635, "step": 567 }, { "epoch": 0.050751669756740454, "grad_norm": 0.4596778154373169, "learning_rate": 9.988733399368799e-05, "loss": 1.1818, "step": 568 }, { "epoch": 0.050841021288002324, "grad_norm": 0.4845656454563141, "learning_rate": 9.988636101065239e-05, "loss": 1.1218, "step": 569 }, { "epoch": 0.05093037281926419, "grad_norm": 0.4579009413719177, "learning_rate": 9.988538384910231e-05, "loss": 1.0752, "step": 570 }, { "epoch": 0.051019724350526055, "grad_norm": 0.4427371025085449, "learning_rate": 9.988440250911959e-05, "loss": 1.1323, "step": 571 }, { "epoch": 0.051109075881787924, "grad_norm": 0.4617055654525757, "learning_rate": 9.988341699078643e-05, "loss": 1.1641, "step": 572 }, { "epoch": 0.051198427413049793, "grad_norm": 0.4859091639518738, "learning_rate": 9.988242729418538e-05, "loss": 1.0782, "step": 573 }, { "epoch": 0.051287778944311656, "grad_norm": 0.4534977674484253, "learning_rate": 9.988143341939933e-05, "loss": 1.1194, "step": 574 }, { "epoch": 0.051377130475573525, "grad_norm": 0.46073511242866516, "learning_rate": 9.988043536651153e-05, "loss": 1.091, "step": 575 }, { "epoch": 0.051466482006835394, "grad_norm": 0.4721985459327698, "learning_rate": 9.98794331356056e-05, "loss": 1.1074, "step": 576 }, { "epoch": 0.05155583353809726, "grad_norm": 0.45086175203323364, "learning_rate": 9.987842672676544e-05, "loss": 1.0933, "step": 577 }, { "epoch": 0.051645185069359126, "grad_norm": 0.4174632728099823, "learning_rate": 9.98774161400754e-05, "loss": 1.1387, "step": 578 }, { "epoch": 0.051734536600620995, "grad_norm": 0.4772718548774719, "learning_rate": 9.987640137562008e-05, "loss": 1.1551, "step": 579 }, { "epoch": 0.05182388813188286, "grad_norm": 0.4615418016910553, "learning_rate": 9.987538243348453e-05, "loss": 1.1593, "step": 580 }, { "epoch": 0.05191323966314473, "grad_norm": 0.48848336935043335, "learning_rate": 9.987435931375406e-05, "loss": 1.1167, "step": 581 }, { "epoch": 0.052002591194406596, "grad_norm": 0.4644308090209961, "learning_rate": 9.987333201651436e-05, "loss": 1.0925, "step": 582 }, { "epoch": 0.05209194272566846, "grad_norm": 0.46790382266044617, "learning_rate": 9.98723005418515e-05, "loss": 1.1186, "step": 583 }, { "epoch": 0.05218129425693033, "grad_norm": 0.4854678213596344, "learning_rate": 9.987126488985188e-05, "loss": 1.1301, "step": 584 }, { "epoch": 0.0522706457881922, "grad_norm": 0.47811320424079895, "learning_rate": 9.987022506060221e-05, "loss": 1.1431, "step": 585 }, { "epoch": 0.05235999731945406, "grad_norm": 0.5481966733932495, "learning_rate": 9.986918105418963e-05, "loss": 1.0918, "step": 586 }, { "epoch": 0.05244934885071593, "grad_norm": 0.4821578562259674, "learning_rate": 9.986813287070158e-05, "loss": 1.0715, "step": 587 }, { "epoch": 0.0525387003819778, "grad_norm": 0.46433693170547485, "learning_rate": 9.986708051022583e-05, "loss": 1.0865, "step": 588 }, { "epoch": 0.052628051913239667, "grad_norm": 0.44410815834999084, "learning_rate": 9.986602397285054e-05, "loss": 1.0884, "step": 589 }, { "epoch": 0.05271740344450153, "grad_norm": 0.4573810398578644, "learning_rate": 9.986496325866422e-05, "loss": 1.1896, "step": 590 }, { "epoch": 0.0528067549757634, "grad_norm": 0.5605972409248352, "learning_rate": 9.986389836775569e-05, "loss": 1.0038, "step": 591 }, { "epoch": 0.05289610650702527, "grad_norm": 0.4885507822036743, "learning_rate": 9.986282930021418e-05, "loss": 1.1346, "step": 592 }, { "epoch": 0.05298545803828713, "grad_norm": 0.4384009838104248, "learning_rate": 9.986175605612921e-05, "loss": 1.1165, "step": 593 }, { "epoch": 0.053074809569549, "grad_norm": 0.4354461133480072, "learning_rate": 9.986067863559067e-05, "loss": 1.0599, "step": 594 }, { "epoch": 0.05316416110081087, "grad_norm": 0.503073513507843, "learning_rate": 9.985959703868884e-05, "loss": 1.1166, "step": 595 }, { "epoch": 0.05325351263207273, "grad_norm": 0.5100207924842834, "learning_rate": 9.985851126551428e-05, "loss": 1.1165, "step": 596 }, { "epoch": 0.0533428641633346, "grad_norm": 0.5106130838394165, "learning_rate": 9.985742131615794e-05, "loss": 1.0688, "step": 597 }, { "epoch": 0.05343221569459647, "grad_norm": 0.46070772409439087, "learning_rate": 9.985632719071113e-05, "loss": 1.1599, "step": 598 }, { "epoch": 0.05352156722585833, "grad_norm": 0.45849481225013733, "learning_rate": 9.985522888926549e-05, "loss": 1.0446, "step": 599 }, { "epoch": 0.0536109187571202, "grad_norm": 0.4356798827648163, "learning_rate": 9.985412641191301e-05, "loss": 1.1269, "step": 600 }, { "epoch": 0.05370027028838207, "grad_norm": 0.5090345144271851, "learning_rate": 9.985301975874604e-05, "loss": 1.0689, "step": 601 }, { "epoch": 0.05378962181964393, "grad_norm": 0.45965901017189026, "learning_rate": 9.985190892985726e-05, "loss": 1.0531, "step": 602 }, { "epoch": 0.0538789733509058, "grad_norm": 0.45686468482017517, "learning_rate": 9.985079392533974e-05, "loss": 1.0928, "step": 603 }, { "epoch": 0.05396832488216767, "grad_norm": 0.46496498584747314, "learning_rate": 9.984967474528684e-05, "loss": 0.9913, "step": 604 }, { "epoch": 0.05405767641342953, "grad_norm": 0.41646531224250793, "learning_rate": 9.984855138979233e-05, "loss": 1.1572, "step": 605 }, { "epoch": 0.0541470279446914, "grad_norm": 0.46098536252975464, "learning_rate": 9.984742385895029e-05, "loss": 1.0705, "step": 606 }, { "epoch": 0.05423637947595327, "grad_norm": 0.44525283575057983, "learning_rate": 9.984629215285516e-05, "loss": 1.1466, "step": 607 }, { "epoch": 0.05432573100721513, "grad_norm": 0.5084784030914307, "learning_rate": 9.984515627160176e-05, "loss": 1.0186, "step": 608 }, { "epoch": 0.054415082538477, "grad_norm": 0.4281807839870453, "learning_rate": 9.98440162152852e-05, "loss": 1.1293, "step": 609 }, { "epoch": 0.05450443406973887, "grad_norm": 0.44532129168510437, "learning_rate": 9.984287198400098e-05, "loss": 1.1377, "step": 610 }, { "epoch": 0.054593785601000734, "grad_norm": 0.5224083662033081, "learning_rate": 9.984172357784495e-05, "loss": 1.1273, "step": 611 }, { "epoch": 0.0546831371322626, "grad_norm": 0.5680818557739258, "learning_rate": 9.984057099691329e-05, "loss": 1.1502, "step": 612 }, { "epoch": 0.05477248866352447, "grad_norm": 0.46198832988739014, "learning_rate": 9.983941424130255e-05, "loss": 1.1101, "step": 613 }, { "epoch": 0.054861840194786335, "grad_norm": 0.4595877528190613, "learning_rate": 9.983825331110961e-05, "loss": 1.1089, "step": 614 }, { "epoch": 0.054951191726048204, "grad_norm": 0.45024290680885315, "learning_rate": 9.983708820643173e-05, "loss": 1.116, "step": 615 }, { "epoch": 0.05504054325731007, "grad_norm": 0.4610383212566376, "learning_rate": 9.983591892736647e-05, "loss": 1.0447, "step": 616 }, { "epoch": 0.05512989478857194, "grad_norm": 0.5474917888641357, "learning_rate": 9.983474547401182e-05, "loss": 1.0719, "step": 617 }, { "epoch": 0.055219246319833805, "grad_norm": 0.3890456259250641, "learning_rate": 9.9833567846466e-05, "loss": 1.0726, "step": 618 }, { "epoch": 0.055308597851095674, "grad_norm": 0.495962917804718, "learning_rate": 9.983238604482771e-05, "loss": 1.1493, "step": 619 }, { "epoch": 0.05539794938235754, "grad_norm": 0.46767839789390564, "learning_rate": 9.983120006919591e-05, "loss": 1.1153, "step": 620 }, { "epoch": 0.055487300913619406, "grad_norm": 0.49606284499168396, "learning_rate": 9.983000991966993e-05, "loss": 1.1023, "step": 621 }, { "epoch": 0.055576652444881275, "grad_norm": 0.45764997601509094, "learning_rate": 9.982881559634947e-05, "loss": 1.0941, "step": 622 }, { "epoch": 0.055666003976143144, "grad_norm": 0.5058903694152832, "learning_rate": 9.982761709933457e-05, "loss": 1.0503, "step": 623 }, { "epoch": 0.055755355507405006, "grad_norm": 0.4385363757610321, "learning_rate": 9.982641442872562e-05, "loss": 1.1275, "step": 624 }, { "epoch": 0.055844707038666876, "grad_norm": 0.43508297204971313, "learning_rate": 9.982520758462335e-05, "loss": 1.1353, "step": 625 }, { "epoch": 0.055934058569928745, "grad_norm": 0.49032166600227356, "learning_rate": 9.982399656712884e-05, "loss": 1.0699, "step": 626 }, { "epoch": 0.05602341010119061, "grad_norm": 0.5139771699905396, "learning_rate": 9.982278137634353e-05, "loss": 1.0745, "step": 627 }, { "epoch": 0.056112761632452476, "grad_norm": 0.4978711009025574, "learning_rate": 9.982156201236921e-05, "loss": 1.0771, "step": 628 }, { "epoch": 0.056202113163714346, "grad_norm": 0.45232483744621277, "learning_rate": 9.9820338475308e-05, "loss": 1.1367, "step": 629 }, { "epoch": 0.05629146469497621, "grad_norm": 0.5391058921813965, "learning_rate": 9.981911076526243e-05, "loss": 0.9913, "step": 630 }, { "epoch": 0.05638081622623808, "grad_norm": 0.4652855396270752, "learning_rate": 9.981787888233527e-05, "loss": 1.0855, "step": 631 }, { "epoch": 0.056470167757499946, "grad_norm": 0.4794527590274811, "learning_rate": 9.981664282662974e-05, "loss": 1.2006, "step": 632 }, { "epoch": 0.05655951928876181, "grad_norm": 0.4287259578704834, "learning_rate": 9.981540259824938e-05, "loss": 1.0524, "step": 633 }, { "epoch": 0.05664887082002368, "grad_norm": 0.4706929922103882, "learning_rate": 9.981415819729804e-05, "loss": 0.9986, "step": 634 }, { "epoch": 0.05673822235128555, "grad_norm": 0.5352094769477844, "learning_rate": 9.981290962387998e-05, "loss": 1.0363, "step": 635 }, { "epoch": 0.05682757388254741, "grad_norm": 0.4566076695919037, "learning_rate": 9.981165687809976e-05, "loss": 1.089, "step": 636 }, { "epoch": 0.05691692541380928, "grad_norm": 0.5029745697975159, "learning_rate": 9.981039996006234e-05, "loss": 0.9928, "step": 637 }, { "epoch": 0.05700627694507115, "grad_norm": 0.49635255336761475, "learning_rate": 9.980913886987296e-05, "loss": 1.1236, "step": 638 }, { "epoch": 0.05709562847633301, "grad_norm": 0.5090769529342651, "learning_rate": 9.98078736076373e-05, "loss": 1.0525, "step": 639 }, { "epoch": 0.05718498000759488, "grad_norm": 0.46991288661956787, "learning_rate": 9.980660417346129e-05, "loss": 1.1371, "step": 640 }, { "epoch": 0.05727433153885675, "grad_norm": 0.5006728172302246, "learning_rate": 9.980533056745128e-05, "loss": 1.0505, "step": 641 }, { "epoch": 0.05736368307011861, "grad_norm": 0.45537087321281433, "learning_rate": 9.980405278971396e-05, "loss": 1.1296, "step": 642 }, { "epoch": 0.05745303460138048, "grad_norm": 0.4972824454307556, "learning_rate": 9.980277084035634e-05, "loss": 1.1512, "step": 643 }, { "epoch": 0.05754238613264235, "grad_norm": 0.6011673212051392, "learning_rate": 9.980148471948581e-05, "loss": 1.0242, "step": 644 }, { "epoch": 0.05763173766390421, "grad_norm": 0.4560789167881012, "learning_rate": 9.980019442721008e-05, "loss": 1.1034, "step": 645 }, { "epoch": 0.05772108919516608, "grad_norm": 0.46284574270248413, "learning_rate": 9.979889996363723e-05, "loss": 1.1125, "step": 646 }, { "epoch": 0.05781044072642795, "grad_norm": 0.5078014731407166, "learning_rate": 9.97976013288757e-05, "loss": 1.0878, "step": 647 }, { "epoch": 0.05789979225768982, "grad_norm": 0.4990587830543518, "learning_rate": 9.979629852303426e-05, "loss": 1.0623, "step": 648 }, { "epoch": 0.05798914378895168, "grad_norm": 0.4506543278694153, "learning_rate": 9.979499154622201e-05, "loss": 1.1416, "step": 649 }, { "epoch": 0.05807849532021355, "grad_norm": 0.4424755573272705, "learning_rate": 9.979368039854847e-05, "loss": 1.094, "step": 650 }, { "epoch": 0.05816784685147542, "grad_norm": 0.4033477008342743, "learning_rate": 9.979236508012341e-05, "loss": 1.13, "step": 651 }, { "epoch": 0.05825719838273728, "grad_norm": 0.43958303332328796, "learning_rate": 9.979104559105703e-05, "loss": 1.1477, "step": 652 }, { "epoch": 0.05834654991399915, "grad_norm": 0.44301077723503113, "learning_rate": 9.978972193145986e-05, "loss": 1.1097, "step": 653 }, { "epoch": 0.05843590144526102, "grad_norm": 0.44206470251083374, "learning_rate": 9.978839410144274e-05, "loss": 1.0702, "step": 654 }, { "epoch": 0.05852525297652288, "grad_norm": 0.47731223702430725, "learning_rate": 9.978706210111692e-05, "loss": 1.109, "step": 655 }, { "epoch": 0.05861460450778475, "grad_norm": 0.47083383798599243, "learning_rate": 9.978572593059394e-05, "loss": 1.1559, "step": 656 }, { "epoch": 0.05870395603904662, "grad_norm": 0.5734443068504333, "learning_rate": 9.978438558998575e-05, "loss": 1.0478, "step": 657 }, { "epoch": 0.058793307570308484, "grad_norm": 0.4939190745353699, "learning_rate": 9.978304107940461e-05, "loss": 1.0989, "step": 658 }, { "epoch": 0.05888265910157035, "grad_norm": 0.42895951867103577, "learning_rate": 9.978169239896311e-05, "loss": 1.1302, "step": 659 }, { "epoch": 0.05897201063283222, "grad_norm": 0.513512372970581, "learning_rate": 9.978033954877425e-05, "loss": 1.1085, "step": 660 }, { "epoch": 0.059061362164094085, "grad_norm": 0.4882372319698334, "learning_rate": 9.977898252895134e-05, "loss": 1.0566, "step": 661 }, { "epoch": 0.059150713695355954, "grad_norm": 0.4673426151275635, "learning_rate": 9.977762133960802e-05, "loss": 1.028, "step": 662 }, { "epoch": 0.05924006522661782, "grad_norm": 0.523703932762146, "learning_rate": 9.977625598085834e-05, "loss": 1.0267, "step": 663 }, { "epoch": 0.059329416757879685, "grad_norm": 0.5089814066886902, "learning_rate": 9.977488645281662e-05, "loss": 1.1286, "step": 664 }, { "epoch": 0.059418768289141555, "grad_norm": 0.48868119716644287, "learning_rate": 9.977351275559763e-05, "loss": 1.0537, "step": 665 }, { "epoch": 0.059508119820403424, "grad_norm": 0.5465951561927795, "learning_rate": 9.977213488931638e-05, "loss": 1.1084, "step": 666 }, { "epoch": 0.059597471351665286, "grad_norm": 0.49999743700027466, "learning_rate": 9.97707528540883e-05, "loss": 1.094, "step": 667 }, { "epoch": 0.059686822882927155, "grad_norm": 0.5092505216598511, "learning_rate": 9.976936665002916e-05, "loss": 1.0616, "step": 668 }, { "epoch": 0.059776174414189025, "grad_norm": 0.5057411193847656, "learning_rate": 9.976797627725505e-05, "loss": 1.0719, "step": 669 }, { "epoch": 0.05986552594545089, "grad_norm": 0.5061649084091187, "learning_rate": 9.976658173588244e-05, "loss": 1.1306, "step": 670 }, { "epoch": 0.059954877476712756, "grad_norm": 0.4602451026439667, "learning_rate": 9.976518302602813e-05, "loss": 1.074, "step": 671 }, { "epoch": 0.060044229007974625, "grad_norm": 0.47477665543556213, "learning_rate": 9.97637801478093e-05, "loss": 1.0751, "step": 672 }, { "epoch": 0.06013358053923649, "grad_norm": 0.4398420751094818, "learning_rate": 9.976237310134342e-05, "loss": 1.1357, "step": 673 }, { "epoch": 0.06022293207049836, "grad_norm": 0.49362048506736755, "learning_rate": 9.976096188674837e-05, "loss": 1.1307, "step": 674 }, { "epoch": 0.060312283601760226, "grad_norm": 0.4223507344722748, "learning_rate": 9.975954650414236e-05, "loss": 1.122, "step": 675 }, { "epoch": 0.060401635133022095, "grad_norm": 0.4884486496448517, "learning_rate": 9.975812695364392e-05, "loss": 1.1486, "step": 676 }, { "epoch": 0.06049098666428396, "grad_norm": 0.4124647080898285, "learning_rate": 9.975670323537197e-05, "loss": 1.0908, "step": 677 }, { "epoch": 0.06058033819554583, "grad_norm": 0.5047022700309753, "learning_rate": 9.975527534944574e-05, "loss": 0.9936, "step": 678 }, { "epoch": 0.060669689726807696, "grad_norm": 0.509061872959137, "learning_rate": 9.975384329598486e-05, "loss": 1.0482, "step": 679 }, { "epoch": 0.06075904125806956, "grad_norm": 0.5170226693153381, "learning_rate": 9.975240707510926e-05, "loss": 1.0872, "step": 680 }, { "epoch": 0.06084839278933143, "grad_norm": 0.43989554047584534, "learning_rate": 9.975096668693926e-05, "loss": 1.1313, "step": 681 }, { "epoch": 0.0609377443205933, "grad_norm": 0.43840011954307556, "learning_rate": 9.974952213159547e-05, "loss": 1.1161, "step": 682 }, { "epoch": 0.06102709585185516, "grad_norm": 0.5931734442710876, "learning_rate": 9.974807340919893e-05, "loss": 0.9472, "step": 683 }, { "epoch": 0.06111644738311703, "grad_norm": 0.4824213683605194, "learning_rate": 9.974662051987096e-05, "loss": 1.0452, "step": 684 }, { "epoch": 0.0612057989143789, "grad_norm": 0.5604041218757629, "learning_rate": 9.974516346373326e-05, "loss": 1.1357, "step": 685 }, { "epoch": 0.06129515044564076, "grad_norm": 0.42983385920524597, "learning_rate": 9.974370224090788e-05, "loss": 1.166, "step": 686 }, { "epoch": 0.06138450197690263, "grad_norm": 0.4323766827583313, "learning_rate": 9.97422368515172e-05, "loss": 1.0949, "step": 687 }, { "epoch": 0.0614738535081645, "grad_norm": 0.4329332113265991, "learning_rate": 9.974076729568396e-05, "loss": 1.1318, "step": 688 }, { "epoch": 0.06156320503942636, "grad_norm": 0.48779281973838806, "learning_rate": 9.973929357353126e-05, "loss": 1.1396, "step": 689 }, { "epoch": 0.06165255657068823, "grad_norm": 0.485921710729599, "learning_rate": 9.973781568518256e-05, "loss": 1.0039, "step": 690 }, { "epoch": 0.0617419081019501, "grad_norm": 0.45170992612838745, "learning_rate": 9.973633363076163e-05, "loss": 1.0345, "step": 691 }, { "epoch": 0.06183125963321196, "grad_norm": 0.5609152317047119, "learning_rate": 9.973484741039258e-05, "loss": 1.0191, "step": 692 }, { "epoch": 0.06192061116447383, "grad_norm": 0.4308573007583618, "learning_rate": 9.973335702419995e-05, "loss": 1.0891, "step": 693 }, { "epoch": 0.0620099626957357, "grad_norm": 0.4530807137489319, "learning_rate": 9.973186247230855e-05, "loss": 1.1436, "step": 694 }, { "epoch": 0.06209931422699756, "grad_norm": 0.4708486795425415, "learning_rate": 9.973036375484354e-05, "loss": 1.0609, "step": 695 }, { "epoch": 0.06218866575825943, "grad_norm": 0.4028553068637848, "learning_rate": 9.97288608719305e-05, "loss": 1.0699, "step": 696 }, { "epoch": 0.0622780172895213, "grad_norm": 0.4526921510696411, "learning_rate": 9.97273538236953e-05, "loss": 1.1414, "step": 697 }, { "epoch": 0.06236736882078316, "grad_norm": 0.46698683500289917, "learning_rate": 9.972584261026413e-05, "loss": 1.1178, "step": 698 }, { "epoch": 0.06245672035204503, "grad_norm": 0.5231309533119202, "learning_rate": 9.972432723176361e-05, "loss": 1.1514, "step": 699 }, { "epoch": 0.0625460718833069, "grad_norm": 0.38658031821250916, "learning_rate": 9.972280768832068e-05, "loss": 1.1243, "step": 700 }, { "epoch": 0.06263542341456876, "grad_norm": 0.4694380760192871, "learning_rate": 9.972128398006259e-05, "loss": 1.1258, "step": 701 }, { "epoch": 0.06272477494583063, "grad_norm": 0.4181990623474121, "learning_rate": 9.971975610711697e-05, "loss": 1.1512, "step": 702 }, { "epoch": 0.0628141264770925, "grad_norm": 0.41654473543167114, "learning_rate": 9.971822406961179e-05, "loss": 1.1766, "step": 703 }, { "epoch": 0.06290347800835437, "grad_norm": 0.44843196868896484, "learning_rate": 9.971668786767541e-05, "loss": 1.0496, "step": 704 }, { "epoch": 0.06299282953961624, "grad_norm": 0.5500750541687012, "learning_rate": 9.971514750143647e-05, "loss": 1.0574, "step": 705 }, { "epoch": 0.0630821810708781, "grad_norm": 0.4476911127567291, "learning_rate": 9.971360297102401e-05, "loss": 1.0639, "step": 706 }, { "epoch": 0.06317153260213997, "grad_norm": 0.46156466007232666, "learning_rate": 9.971205427656738e-05, "loss": 1.1028, "step": 707 }, { "epoch": 0.06326088413340183, "grad_norm": 0.45186087489128113, "learning_rate": 9.971050141819632e-05, "loss": 1.0948, "step": 708 }, { "epoch": 0.0633502356646637, "grad_norm": 0.4499582052230835, "learning_rate": 9.970894439604088e-05, "loss": 1.0978, "step": 709 }, { "epoch": 0.06343958719592557, "grad_norm": 0.4321632385253906, "learning_rate": 9.970738321023149e-05, "loss": 1.1102, "step": 710 }, { "epoch": 0.06352893872718744, "grad_norm": 0.4565223455429077, "learning_rate": 9.970581786089891e-05, "loss": 1.0825, "step": 711 }, { "epoch": 0.0636182902584493, "grad_norm": 0.4617339074611664, "learning_rate": 9.970424834817428e-05, "loss": 1.0773, "step": 712 }, { "epoch": 0.06370764178971117, "grad_norm": 0.4778772294521332, "learning_rate": 9.970267467218904e-05, "loss": 1.0918, "step": 713 }, { "epoch": 0.06379699332097304, "grad_norm": 0.46952787041664124, "learning_rate": 9.970109683307498e-05, "loss": 1.0939, "step": 714 }, { "epoch": 0.0638863448522349, "grad_norm": 0.4276919364929199, "learning_rate": 9.969951483096429e-05, "loss": 1.0416, "step": 715 }, { "epoch": 0.06397569638349677, "grad_norm": 0.45656710863113403, "learning_rate": 9.969792866598948e-05, "loss": 1.0634, "step": 716 }, { "epoch": 0.06406504791475864, "grad_norm": 0.4644415080547333, "learning_rate": 9.96963383382834e-05, "loss": 1.1223, "step": 717 }, { "epoch": 0.06415439944602051, "grad_norm": 0.5773541331291199, "learning_rate": 9.969474384797926e-05, "loss": 1.0547, "step": 718 }, { "epoch": 0.06424375097728237, "grad_norm": 0.5227424502372742, "learning_rate": 9.969314519521063e-05, "loss": 1.1057, "step": 719 }, { "epoch": 0.06433310250854424, "grad_norm": 0.4427839517593384, "learning_rate": 9.969154238011138e-05, "loss": 1.0599, "step": 720 }, { "epoch": 0.0644224540398061, "grad_norm": 0.472834050655365, "learning_rate": 9.968993540281579e-05, "loss": 1.0368, "step": 721 }, { "epoch": 0.06451180557106798, "grad_norm": 0.4237770438194275, "learning_rate": 9.968832426345845e-05, "loss": 1.1564, "step": 722 }, { "epoch": 0.06460115710232985, "grad_norm": 0.3942003548145294, "learning_rate": 9.968670896217431e-05, "loss": 1.1415, "step": 723 }, { "epoch": 0.06469050863359171, "grad_norm": 0.43824538588523865, "learning_rate": 9.968508949909868e-05, "loss": 1.118, "step": 724 }, { "epoch": 0.06477986016485357, "grad_norm": 0.4967290461063385, "learning_rate": 9.968346587436719e-05, "loss": 1.0834, "step": 725 }, { "epoch": 0.06486921169611544, "grad_norm": 0.5201587080955505, "learning_rate": 9.968183808811586e-05, "loss": 1.1773, "step": 726 }, { "epoch": 0.06495856322737731, "grad_norm": 0.42590588331222534, "learning_rate": 9.968020614048101e-05, "loss": 1.1176, "step": 727 }, { "epoch": 0.06504791475863918, "grad_norm": 0.4532450735569, "learning_rate": 9.967857003159933e-05, "loss": 1.0961, "step": 728 }, { "epoch": 0.06513726628990105, "grad_norm": 0.49376237392425537, "learning_rate": 9.96769297616079e-05, "loss": 1.1232, "step": 729 }, { "epoch": 0.06522661782116292, "grad_norm": 0.4753684401512146, "learning_rate": 9.967528533064408e-05, "loss": 1.0848, "step": 730 }, { "epoch": 0.06531596935242477, "grad_norm": 0.478083997964859, "learning_rate": 9.96736367388456e-05, "loss": 1.1025, "step": 731 }, { "epoch": 0.06540532088368664, "grad_norm": 0.46657711267471313, "learning_rate": 9.967198398635056e-05, "loss": 1.1299, "step": 732 }, { "epoch": 0.06549467241494851, "grad_norm": 0.4510549306869507, "learning_rate": 9.96703270732974e-05, "loss": 1.1, "step": 733 }, { "epoch": 0.06558402394621038, "grad_norm": 0.4335935115814209, "learning_rate": 9.96686659998249e-05, "loss": 1.0771, "step": 734 }, { "epoch": 0.06567337547747225, "grad_norm": 0.39717864990234375, "learning_rate": 9.96670007660722e-05, "loss": 1.1836, "step": 735 }, { "epoch": 0.06576272700873412, "grad_norm": 0.5292769074440002, "learning_rate": 9.966533137217878e-05, "loss": 1.0035, "step": 736 }, { "epoch": 0.06585207853999597, "grad_norm": 0.4478752911090851, "learning_rate": 9.966365781828443e-05, "loss": 1.0829, "step": 737 }, { "epoch": 0.06594143007125784, "grad_norm": 0.4209703207015991, "learning_rate": 9.966198010452939e-05, "loss": 1.1075, "step": 738 }, { "epoch": 0.06603078160251971, "grad_norm": 0.5448471903800964, "learning_rate": 9.966029823105416e-05, "loss": 1.0948, "step": 739 }, { "epoch": 0.06612013313378158, "grad_norm": 0.40626657009124756, "learning_rate": 9.965861219799958e-05, "loss": 1.1325, "step": 740 }, { "epoch": 0.06620948466504345, "grad_norm": 0.41559332609176636, "learning_rate": 9.965692200550693e-05, "loss": 1.1513, "step": 741 }, { "epoch": 0.06629883619630532, "grad_norm": 0.4277205169200897, "learning_rate": 9.965522765371777e-05, "loss": 1.1243, "step": 742 }, { "epoch": 0.06638818772756717, "grad_norm": 0.4078103303909302, "learning_rate": 9.965352914277399e-05, "loss": 1.1174, "step": 743 }, { "epoch": 0.06647753925882904, "grad_norm": 0.4964846968650818, "learning_rate": 9.965182647281788e-05, "loss": 1.0881, "step": 744 }, { "epoch": 0.06656689079009091, "grad_norm": 0.442452609539032, "learning_rate": 9.965011964399204e-05, "loss": 1.0964, "step": 745 }, { "epoch": 0.06665624232135278, "grad_norm": 0.4539310038089752, "learning_rate": 9.964840865643948e-05, "loss": 1.0969, "step": 746 }, { "epoch": 0.06674559385261465, "grad_norm": 0.4463444948196411, "learning_rate": 9.964669351030345e-05, "loss": 1.1043, "step": 747 }, { "epoch": 0.06683494538387652, "grad_norm": 0.4720766246318817, "learning_rate": 9.964497420572765e-05, "loss": 1.0618, "step": 748 }, { "epoch": 0.06692429691513839, "grad_norm": 0.42916223406791687, "learning_rate": 9.964325074285609e-05, "loss": 1.1004, "step": 749 }, { "epoch": 0.06701364844640025, "grad_norm": 0.4502628743648529, "learning_rate": 9.96415231218331e-05, "loss": 1.0862, "step": 750 }, { "epoch": 0.06710299997766211, "grad_norm": 0.4129611551761627, "learning_rate": 9.963979134280343e-05, "loss": 1.1337, "step": 751 }, { "epoch": 0.06719235150892398, "grad_norm": 0.4612719714641571, "learning_rate": 9.963805540591211e-05, "loss": 1.1208, "step": 752 }, { "epoch": 0.06728170304018585, "grad_norm": 0.5239313840866089, "learning_rate": 9.963631531130455e-05, "loss": 1.1208, "step": 753 }, { "epoch": 0.06737105457144772, "grad_norm": 0.4685198664665222, "learning_rate": 9.963457105912647e-05, "loss": 1.0893, "step": 754 }, { "epoch": 0.06746040610270959, "grad_norm": 0.4528690278530121, "learning_rate": 9.963282264952403e-05, "loss": 1.042, "step": 755 }, { "epoch": 0.06754975763397145, "grad_norm": 0.5180427432060242, "learning_rate": 9.963107008264364e-05, "loss": 1.0768, "step": 756 }, { "epoch": 0.06763910916523332, "grad_norm": 0.4094788432121277, "learning_rate": 9.96293133586321e-05, "loss": 1.0914, "step": 757 }, { "epoch": 0.06772846069649519, "grad_norm": 0.4870153069496155, "learning_rate": 9.962755247763654e-05, "loss": 1.019, "step": 758 }, { "epoch": 0.06781781222775705, "grad_norm": 0.4720255434513092, "learning_rate": 9.962578743980449e-05, "loss": 1.0146, "step": 759 }, { "epoch": 0.06790716375901892, "grad_norm": 0.45057880878448486, "learning_rate": 9.962401824528376e-05, "loss": 1.0702, "step": 760 }, { "epoch": 0.06799651529028079, "grad_norm": 0.44907015562057495, "learning_rate": 9.962224489422254e-05, "loss": 1.0297, "step": 761 }, { "epoch": 0.06808586682154265, "grad_norm": 0.4270571172237396, "learning_rate": 9.962046738676938e-05, "loss": 1.0743, "step": 762 }, { "epoch": 0.06817521835280452, "grad_norm": 0.47250601649284363, "learning_rate": 9.961868572307315e-05, "loss": 1.1362, "step": 763 }, { "epoch": 0.06826456988406639, "grad_norm": 0.5045757293701172, "learning_rate": 9.96168999032831e-05, "loss": 1.04, "step": 764 }, { "epoch": 0.06835392141532826, "grad_norm": 0.4600796699523926, "learning_rate": 9.961510992754882e-05, "loss": 1.0663, "step": 765 }, { "epoch": 0.06844327294659013, "grad_norm": 0.4397517442703247, "learning_rate": 9.961331579602022e-05, "loss": 1.1174, "step": 766 }, { "epoch": 0.068532624477852, "grad_norm": 0.4315062165260315, "learning_rate": 9.961151750884758e-05, "loss": 1.0979, "step": 767 }, { "epoch": 0.06862197600911385, "grad_norm": 0.41421815752983093, "learning_rate": 9.960971506618152e-05, "loss": 1.0429, "step": 768 }, { "epoch": 0.06871132754037572, "grad_norm": 0.49965426325798035, "learning_rate": 9.960790846817303e-05, "loss": 1.1674, "step": 769 }, { "epoch": 0.06880067907163759, "grad_norm": 0.4487605392932892, "learning_rate": 9.960609771497341e-05, "loss": 1.1274, "step": 770 }, { "epoch": 0.06889003060289946, "grad_norm": 0.5031136870384216, "learning_rate": 9.960428280673435e-05, "loss": 1.0788, "step": 771 }, { "epoch": 0.06897938213416133, "grad_norm": 0.4574654996395111, "learning_rate": 9.960246374360787e-05, "loss": 1.1115, "step": 772 }, { "epoch": 0.0690687336654232, "grad_norm": 0.3996691405773163, "learning_rate": 9.960064052574632e-05, "loss": 1.1178, "step": 773 }, { "epoch": 0.06915808519668505, "grad_norm": 0.45163261890411377, "learning_rate": 9.959881315330241e-05, "loss": 1.1403, "step": 774 }, { "epoch": 0.06924743672794692, "grad_norm": 0.48119327425956726, "learning_rate": 9.959698162642923e-05, "loss": 1.1177, "step": 775 }, { "epoch": 0.06933678825920879, "grad_norm": 0.4419304132461548, "learning_rate": 9.959514594528018e-05, "loss": 1.0816, "step": 776 }, { "epoch": 0.06942613979047066, "grad_norm": 0.48990410566329956, "learning_rate": 9.959330611000898e-05, "loss": 1.1581, "step": 777 }, { "epoch": 0.06951549132173253, "grad_norm": 0.4900878071784973, "learning_rate": 9.95914621207698e-05, "loss": 1.0604, "step": 778 }, { "epoch": 0.0696048428529944, "grad_norm": 0.473408967256546, "learning_rate": 9.958961397771704e-05, "loss": 1.0345, "step": 779 }, { "epoch": 0.06969419438425627, "grad_norm": 0.4891606867313385, "learning_rate": 9.958776168100555e-05, "loss": 1.0432, "step": 780 }, { "epoch": 0.06978354591551812, "grad_norm": 0.5358079075813293, "learning_rate": 9.958590523079041e-05, "loss": 1.1014, "step": 781 }, { "epoch": 0.06987289744677999, "grad_norm": 0.455711305141449, "learning_rate": 9.95840446272272e-05, "loss": 1.0498, "step": 782 }, { "epoch": 0.06996224897804186, "grad_norm": 0.42493581771850586, "learning_rate": 9.958217987047169e-05, "loss": 1.0354, "step": 783 }, { "epoch": 0.07005160050930373, "grad_norm": 0.46666839718818665, "learning_rate": 9.958031096068012e-05, "loss": 1.0204, "step": 784 }, { "epoch": 0.0701409520405656, "grad_norm": 0.40961867570877075, "learning_rate": 9.957843789800902e-05, "loss": 1.1588, "step": 785 }, { "epoch": 0.07023030357182747, "grad_norm": 0.4467061758041382, "learning_rate": 9.957656068261527e-05, "loss": 1.0933, "step": 786 }, { "epoch": 0.07031965510308932, "grad_norm": 0.4334403872489929, "learning_rate": 9.957467931465613e-05, "loss": 1.1161, "step": 787 }, { "epoch": 0.07040900663435119, "grad_norm": 0.5627568364143372, "learning_rate": 9.957279379428917e-05, "loss": 1.0551, "step": 788 }, { "epoch": 0.07049835816561306, "grad_norm": 0.422397643327713, "learning_rate": 9.95709041216723e-05, "loss": 1.1089, "step": 789 }, { "epoch": 0.07058770969687493, "grad_norm": 0.5722343325614929, "learning_rate": 9.956901029696384e-05, "loss": 1.0943, "step": 790 }, { "epoch": 0.0706770612281368, "grad_norm": 0.4556920826435089, "learning_rate": 9.95671123203224e-05, "loss": 1.0813, "step": 791 }, { "epoch": 0.07076641275939867, "grad_norm": 0.43123292922973633, "learning_rate": 9.956521019190694e-05, "loss": 1.0318, "step": 792 }, { "epoch": 0.07085576429066053, "grad_norm": 0.5033695101737976, "learning_rate": 9.956330391187682e-05, "loss": 1.0647, "step": 793 }, { "epoch": 0.0709451158219224, "grad_norm": 0.45806631445884705, "learning_rate": 9.956139348039168e-05, "loss": 1.1287, "step": 794 }, { "epoch": 0.07103446735318426, "grad_norm": 0.43176013231277466, "learning_rate": 9.955947889761155e-05, "loss": 1.061, "step": 795 }, { "epoch": 0.07112381888444613, "grad_norm": 0.4724084138870239, "learning_rate": 9.95575601636968e-05, "loss": 1.0868, "step": 796 }, { "epoch": 0.071213170415708, "grad_norm": 0.4638439416885376, "learning_rate": 9.955563727880814e-05, "loss": 1.0874, "step": 797 }, { "epoch": 0.07130252194696987, "grad_norm": 0.4180169403553009, "learning_rate": 9.955371024310662e-05, "loss": 1.1168, "step": 798 }, { "epoch": 0.07139187347823173, "grad_norm": 0.455720454454422, "learning_rate": 9.955177905675367e-05, "loss": 1.1141, "step": 799 }, { "epoch": 0.0714812250094936, "grad_norm": 0.4505751132965088, "learning_rate": 9.954984371991105e-05, "loss": 1.0927, "step": 800 }, { "epoch": 0.07157057654075547, "grad_norm": 0.4087337553501129, "learning_rate": 9.954790423274085e-05, "loss": 1.104, "step": 801 }, { "epoch": 0.07165992807201733, "grad_norm": 0.46492061018943787, "learning_rate": 9.954596059540553e-05, "loss": 1.0794, "step": 802 }, { "epoch": 0.0717492796032792, "grad_norm": 0.4779999554157257, "learning_rate": 9.954401280806789e-05, "loss": 1.0552, "step": 803 }, { "epoch": 0.07183863113454107, "grad_norm": 0.480494886636734, "learning_rate": 9.954206087089107e-05, "loss": 1.0328, "step": 804 }, { "epoch": 0.07192798266580294, "grad_norm": 0.40479815006256104, "learning_rate": 9.954010478403857e-05, "loss": 1.0973, "step": 805 }, { "epoch": 0.0720173341970648, "grad_norm": 0.49492841958999634, "learning_rate": 9.953814454767423e-05, "loss": 1.0628, "step": 806 }, { "epoch": 0.07210668572832667, "grad_norm": 0.49477580189704895, "learning_rate": 9.953618016196224e-05, "loss": 1.083, "step": 807 }, { "epoch": 0.07219603725958854, "grad_norm": 0.4487774670124054, "learning_rate": 9.953421162706717e-05, "loss": 1.0459, "step": 808 }, { "epoch": 0.0722853887908504, "grad_norm": 0.4162748157978058, "learning_rate": 9.953223894315386e-05, "loss": 1.1345, "step": 809 }, { "epoch": 0.07237474032211227, "grad_norm": 0.4790276288986206, "learning_rate": 9.953026211038757e-05, "loss": 1.1452, "step": 810 }, { "epoch": 0.07246409185337414, "grad_norm": 0.4182452857494354, "learning_rate": 9.952828112893388e-05, "loss": 1.06, "step": 811 }, { "epoch": 0.072553443384636, "grad_norm": 0.4895060360431671, "learning_rate": 9.95262959989587e-05, "loss": 1.0784, "step": 812 }, { "epoch": 0.07264279491589787, "grad_norm": 0.44416457414627075, "learning_rate": 9.952430672062831e-05, "loss": 1.1275, "step": 813 }, { "epoch": 0.07273214644715974, "grad_norm": 0.3927420377731323, "learning_rate": 9.952231329410936e-05, "loss": 1.1797, "step": 814 }, { "epoch": 0.0728214979784216, "grad_norm": 0.43822696805000305, "learning_rate": 9.952031571956878e-05, "loss": 1.074, "step": 815 }, { "epoch": 0.07291084950968348, "grad_norm": 0.44089022278785706, "learning_rate": 9.951831399717394e-05, "loss": 1.0785, "step": 816 }, { "epoch": 0.07300020104094535, "grad_norm": 0.4124377369880676, "learning_rate": 9.951630812709245e-05, "loss": 1.065, "step": 817 }, { "epoch": 0.0730895525722072, "grad_norm": 0.43494531512260437, "learning_rate": 9.951429810949237e-05, "loss": 1.0736, "step": 818 }, { "epoch": 0.07317890410346907, "grad_norm": 0.4180351793766022, "learning_rate": 9.951228394454201e-05, "loss": 1.0566, "step": 819 }, { "epoch": 0.07326825563473094, "grad_norm": 0.4254007637500763, "learning_rate": 9.951026563241014e-05, "loss": 1.079, "step": 820 }, { "epoch": 0.07335760716599281, "grad_norm": 0.42448723316192627, "learning_rate": 9.950824317326577e-05, "loss": 1.1101, "step": 821 }, { "epoch": 0.07344695869725468, "grad_norm": 0.4987415075302124, "learning_rate": 9.95062165672783e-05, "loss": 1.0266, "step": 822 }, { "epoch": 0.07353631022851655, "grad_norm": 0.4084853529930115, "learning_rate": 9.95041858146175e-05, "loss": 1.0966, "step": 823 }, { "epoch": 0.0736256617597784, "grad_norm": 0.4334554374217987, "learning_rate": 9.950215091545347e-05, "loss": 1.11, "step": 824 }, { "epoch": 0.07371501329104027, "grad_norm": 0.5075578093528748, "learning_rate": 9.950011186995665e-05, "loss": 1.014, "step": 825 }, { "epoch": 0.07380436482230214, "grad_norm": 0.4870114028453827, "learning_rate": 9.94980686782978e-05, "loss": 1.0139, "step": 826 }, { "epoch": 0.07389371635356401, "grad_norm": 0.44856953620910645, "learning_rate": 9.949602134064812e-05, "loss": 1.089, "step": 827 }, { "epoch": 0.07398306788482588, "grad_norm": 0.48632118105888367, "learning_rate": 9.949396985717904e-05, "loss": 1.1153, "step": 828 }, { "epoch": 0.07407241941608775, "grad_norm": 0.44190341234207153, "learning_rate": 9.949191422806244e-05, "loss": 1.2339, "step": 829 }, { "epoch": 0.0741617709473496, "grad_norm": 0.4298502504825592, "learning_rate": 9.948985445347046e-05, "loss": 1.1638, "step": 830 }, { "epoch": 0.07425112247861147, "grad_norm": 0.4637526869773865, "learning_rate": 9.948779053357564e-05, "loss": 1.0537, "step": 831 }, { "epoch": 0.07434047400987334, "grad_norm": 0.4532431364059448, "learning_rate": 9.948572246855086e-05, "loss": 1.0724, "step": 832 }, { "epoch": 0.07442982554113521, "grad_norm": 0.52618807554245, "learning_rate": 9.948365025856936e-05, "loss": 1.0945, "step": 833 }, { "epoch": 0.07451917707239708, "grad_norm": 0.4329455494880676, "learning_rate": 9.948157390380468e-05, "loss": 1.0166, "step": 834 }, { "epoch": 0.07460852860365895, "grad_norm": 0.4594184160232544, "learning_rate": 9.947949340443076e-05, "loss": 1.1553, "step": 835 }, { "epoch": 0.07469788013492082, "grad_norm": 0.4881901443004608, "learning_rate": 9.947740876062185e-05, "loss": 1.0782, "step": 836 }, { "epoch": 0.07478723166618267, "grad_norm": 0.5016196370124817, "learning_rate": 9.947531997255256e-05, "loss": 1.0892, "step": 837 }, { "epoch": 0.07487658319744454, "grad_norm": 0.4345821738243103, "learning_rate": 9.947322704039785e-05, "loss": 1.1057, "step": 838 }, { "epoch": 0.07496593472870641, "grad_norm": 0.4423231780529022, "learning_rate": 9.947112996433305e-05, "loss": 1.0884, "step": 839 }, { "epoch": 0.07505528625996828, "grad_norm": 0.5169707536697388, "learning_rate": 9.946902874453376e-05, "loss": 1.015, "step": 840 }, { "epoch": 0.07514463779123015, "grad_norm": 0.44377315044403076, "learning_rate": 9.946692338117603e-05, "loss": 1.0713, "step": 841 }, { "epoch": 0.07523398932249202, "grad_norm": 0.4438510239124298, "learning_rate": 9.94648138744362e-05, "loss": 1.0522, "step": 842 }, { "epoch": 0.07532334085375388, "grad_norm": 0.5507922768592834, "learning_rate": 9.946270022449093e-05, "loss": 1.0294, "step": 843 }, { "epoch": 0.07541269238501574, "grad_norm": 0.48594483733177185, "learning_rate": 9.946058243151728e-05, "loss": 1.0541, "step": 844 }, { "epoch": 0.07550204391627761, "grad_norm": 0.4724047780036926, "learning_rate": 9.945846049569265e-05, "loss": 1.0744, "step": 845 }, { "epoch": 0.07559139544753948, "grad_norm": 0.5080140233039856, "learning_rate": 9.945633441719476e-05, "loss": 1.0812, "step": 846 }, { "epoch": 0.07568074697880135, "grad_norm": 0.44215860962867737, "learning_rate": 9.945420419620171e-05, "loss": 1.1124, "step": 847 }, { "epoch": 0.07577009851006322, "grad_norm": 0.43543803691864014, "learning_rate": 9.94520698328919e-05, "loss": 1.108, "step": 848 }, { "epoch": 0.07585945004132508, "grad_norm": 0.4514320194721222, "learning_rate": 9.944993132744411e-05, "loss": 1.0689, "step": 849 }, { "epoch": 0.07594880157258695, "grad_norm": 0.4318578243255615, "learning_rate": 9.94477886800375e-05, "loss": 1.0977, "step": 850 }, { "epoch": 0.07603815310384882, "grad_norm": 0.4266031086444855, "learning_rate": 9.944564189085149e-05, "loss": 1.0782, "step": 851 }, { "epoch": 0.07612750463511068, "grad_norm": 0.3994167149066925, "learning_rate": 9.944349096006593e-05, "loss": 1.1211, "step": 852 }, { "epoch": 0.07621685616637255, "grad_norm": 0.4137265980243683, "learning_rate": 9.944133588786097e-05, "loss": 1.0609, "step": 853 }, { "epoch": 0.07630620769763442, "grad_norm": 0.4087986648082733, "learning_rate": 9.943917667441712e-05, "loss": 1.0968, "step": 854 }, { "epoch": 0.07639555922889628, "grad_norm": 0.4582999646663666, "learning_rate": 9.943701331991524e-05, "loss": 1.0594, "step": 855 }, { "epoch": 0.07648491076015815, "grad_norm": 0.44180095195770264, "learning_rate": 9.943484582453653e-05, "loss": 1.0959, "step": 856 }, { "epoch": 0.07657426229142002, "grad_norm": 0.5081307291984558, "learning_rate": 9.943267418846256e-05, "loss": 1.0916, "step": 857 }, { "epoch": 0.07666361382268189, "grad_norm": 0.4701635241508484, "learning_rate": 9.94304984118752e-05, "loss": 1.0602, "step": 858 }, { "epoch": 0.07675296535394376, "grad_norm": 0.42284098267555237, "learning_rate": 9.942831849495671e-05, "loss": 1.1025, "step": 859 }, { "epoch": 0.07684231688520562, "grad_norm": 0.4433005154132843, "learning_rate": 9.942613443788967e-05, "loss": 1.1403, "step": 860 }, { "epoch": 0.0769316684164675, "grad_norm": 0.41836610436439514, "learning_rate": 9.942394624085703e-05, "loss": 1.0193, "step": 861 }, { "epoch": 0.07702101994772935, "grad_norm": 0.5095844268798828, "learning_rate": 9.942175390404208e-05, "loss": 1.0305, "step": 862 }, { "epoch": 0.07711037147899122, "grad_norm": 0.4428594410419464, "learning_rate": 9.941955742762843e-05, "loss": 1.0768, "step": 863 }, { "epoch": 0.07719972301025309, "grad_norm": 0.5194300413131714, "learning_rate": 9.941735681180009e-05, "loss": 1.0837, "step": 864 }, { "epoch": 0.07728907454151496, "grad_norm": 0.5022509098052979, "learning_rate": 9.941515205674134e-05, "loss": 1.0751, "step": 865 }, { "epoch": 0.07737842607277683, "grad_norm": 0.45329055190086365, "learning_rate": 9.94129431626369e-05, "loss": 1.1309, "step": 866 }, { "epoch": 0.0774677776040387, "grad_norm": 0.44875457882881165, "learning_rate": 9.941073012967174e-05, "loss": 0.9794, "step": 867 }, { "epoch": 0.07755712913530055, "grad_norm": 0.5031179189682007, "learning_rate": 9.940851295803128e-05, "loss": 1.086, "step": 868 }, { "epoch": 0.07764648066656242, "grad_norm": 0.42640596628189087, "learning_rate": 9.94062916479012e-05, "loss": 1.068, "step": 869 }, { "epoch": 0.07773583219782429, "grad_norm": 0.39963454008102417, "learning_rate": 9.940406619946754e-05, "loss": 1.1031, "step": 870 }, { "epoch": 0.07782518372908616, "grad_norm": 0.42264753580093384, "learning_rate": 9.940183661291674e-05, "loss": 1.0831, "step": 871 }, { "epoch": 0.07791453526034803, "grad_norm": 0.45042434334754944, "learning_rate": 9.939960288843553e-05, "loss": 1.0708, "step": 872 }, { "epoch": 0.0780038867916099, "grad_norm": 0.41273242235183716, "learning_rate": 9.939736502621104e-05, "loss": 1.122, "step": 873 }, { "epoch": 0.07809323832287175, "grad_norm": 0.43225687742233276, "learning_rate": 9.939512302643066e-05, "loss": 1.1367, "step": 874 }, { "epoch": 0.07818258985413362, "grad_norm": 0.4413902759552002, "learning_rate": 9.939287688928223e-05, "loss": 1.0814, "step": 875 }, { "epoch": 0.07827194138539549, "grad_norm": 0.4078238308429718, "learning_rate": 9.939062661495386e-05, "loss": 1.0772, "step": 876 }, { "epoch": 0.07836129291665736, "grad_norm": 0.3733663260936737, "learning_rate": 9.938837220363406e-05, "loss": 1.1371, "step": 877 }, { "epoch": 0.07845064444791923, "grad_norm": 0.39089709520339966, "learning_rate": 9.938611365551164e-05, "loss": 1.0742, "step": 878 }, { "epoch": 0.0785399959791811, "grad_norm": 0.41924816370010376, "learning_rate": 9.938385097077579e-05, "loss": 1.1673, "step": 879 }, { "epoch": 0.07862934751044295, "grad_norm": 0.5209245681762695, "learning_rate": 9.9381584149616e-05, "loss": 1.0325, "step": 880 }, { "epoch": 0.07871869904170482, "grad_norm": 0.38127946853637695, "learning_rate": 9.937931319222218e-05, "loss": 1.1191, "step": 881 }, { "epoch": 0.07880805057296669, "grad_norm": 0.4516022503376007, "learning_rate": 9.937703809878455e-05, "loss": 1.0864, "step": 882 }, { "epoch": 0.07889740210422856, "grad_norm": 0.5131936073303223, "learning_rate": 9.937475886949364e-05, "loss": 1.0033, "step": 883 }, { "epoch": 0.07898675363549043, "grad_norm": 0.3989183008670807, "learning_rate": 9.937247550454039e-05, "loss": 1.1386, "step": 884 }, { "epoch": 0.0790761051667523, "grad_norm": 0.4165286123752594, "learning_rate": 9.937018800411604e-05, "loss": 1.131, "step": 885 }, { "epoch": 0.07916545669801416, "grad_norm": 0.49382612109184265, "learning_rate": 9.936789636841219e-05, "loss": 1.0344, "step": 886 }, { "epoch": 0.07925480822927602, "grad_norm": 0.4288516938686371, "learning_rate": 9.93656005976208e-05, "loss": 1.0715, "step": 887 }, { "epoch": 0.0793441597605379, "grad_norm": 0.5689799785614014, "learning_rate": 9.936330069193415e-05, "loss": 1.0104, "step": 888 }, { "epoch": 0.07943351129179976, "grad_norm": 0.40826788544654846, "learning_rate": 9.936099665154491e-05, "loss": 1.1411, "step": 889 }, { "epoch": 0.07952286282306163, "grad_norm": 0.4648611843585968, "learning_rate": 9.935868847664605e-05, "loss": 1.1341, "step": 890 }, { "epoch": 0.0796122143543235, "grad_norm": 0.45681998133659363, "learning_rate": 9.935637616743089e-05, "loss": 1.1331, "step": 891 }, { "epoch": 0.07970156588558537, "grad_norm": 0.4700930416584015, "learning_rate": 9.935405972409313e-05, "loss": 1.0357, "step": 892 }, { "epoch": 0.07979091741684723, "grad_norm": 0.42509207129478455, "learning_rate": 9.93517391468268e-05, "loss": 1.1024, "step": 893 }, { "epoch": 0.0798802689481091, "grad_norm": 0.46535390615463257, "learning_rate": 9.934941443582626e-05, "loss": 1.1248, "step": 894 }, { "epoch": 0.07996962047937096, "grad_norm": 0.5609625577926636, "learning_rate": 9.934708559128622e-05, "loss": 0.947, "step": 895 }, { "epoch": 0.08005897201063283, "grad_norm": 0.5007188320159912, "learning_rate": 9.934475261340177e-05, "loss": 1.1, "step": 896 }, { "epoch": 0.0801483235418947, "grad_norm": 0.5065818428993225, "learning_rate": 9.934241550236831e-05, "loss": 1.0299, "step": 897 }, { "epoch": 0.08023767507315657, "grad_norm": 0.44581353664398193, "learning_rate": 9.934007425838161e-05, "loss": 1.1251, "step": 898 }, { "epoch": 0.08032702660441843, "grad_norm": 0.4115321636199951, "learning_rate": 9.933772888163776e-05, "loss": 1.1807, "step": 899 }, { "epoch": 0.0804163781356803, "grad_norm": 0.47303566336631775, "learning_rate": 9.933537937233321e-05, "loss": 1.1472, "step": 900 }, { "epoch": 0.08050572966694217, "grad_norm": 0.4314580261707306, "learning_rate": 9.933302573066477e-05, "loss": 1.089, "step": 901 }, { "epoch": 0.08059508119820404, "grad_norm": 0.47710415720939636, "learning_rate": 9.933066795682955e-05, "loss": 1.0226, "step": 902 }, { "epoch": 0.0806844327294659, "grad_norm": 0.4161222577095032, "learning_rate": 9.932830605102508e-05, "loss": 1.0913, "step": 903 }, { "epoch": 0.08077378426072777, "grad_norm": 0.485124409198761, "learning_rate": 9.932594001344918e-05, "loss": 1.0963, "step": 904 }, { "epoch": 0.08086313579198963, "grad_norm": 0.47519686818122864, "learning_rate": 9.93235698443e-05, "loss": 1.0747, "step": 905 }, { "epoch": 0.0809524873232515, "grad_norm": 0.4680500328540802, "learning_rate": 9.932119554377611e-05, "loss": 1.1009, "step": 906 }, { "epoch": 0.08104183885451337, "grad_norm": 0.49420708417892456, "learning_rate": 9.931881711207638e-05, "loss": 1.0773, "step": 907 }, { "epoch": 0.08113119038577524, "grad_norm": 0.3618301749229431, "learning_rate": 9.93164345494e-05, "loss": 1.0759, "step": 908 }, { "epoch": 0.0812205419170371, "grad_norm": 0.4082967936992645, "learning_rate": 9.931404785594656e-05, "loss": 1.133, "step": 909 }, { "epoch": 0.08130989344829898, "grad_norm": 0.5318341851234436, "learning_rate": 9.931165703191595e-05, "loss": 1.0725, "step": 910 }, { "epoch": 0.08139924497956083, "grad_norm": 0.46658989787101746, "learning_rate": 9.930926207750845e-05, "loss": 1.0376, "step": 911 }, { "epoch": 0.0814885965108227, "grad_norm": 0.5218292474746704, "learning_rate": 9.930686299292464e-05, "loss": 1.0949, "step": 912 }, { "epoch": 0.08157794804208457, "grad_norm": 0.48732948303222656, "learning_rate": 9.930445977836548e-05, "loss": 1.0491, "step": 913 }, { "epoch": 0.08166729957334644, "grad_norm": 0.40489867329597473, "learning_rate": 9.930205243403229e-05, "loss": 1.0856, "step": 914 }, { "epoch": 0.08175665110460831, "grad_norm": 0.5249372720718384, "learning_rate": 9.929964096012668e-05, "loss": 1.0358, "step": 915 }, { "epoch": 0.08184600263587018, "grad_norm": 0.4804689288139343, "learning_rate": 9.929722535685062e-05, "loss": 1.0793, "step": 916 }, { "epoch": 0.08193535416713203, "grad_norm": 0.43911850452423096, "learning_rate": 9.929480562440649e-05, "loss": 1.1816, "step": 917 }, { "epoch": 0.0820247056983939, "grad_norm": 0.4654462933540344, "learning_rate": 9.929238176299693e-05, "loss": 1.1595, "step": 918 }, { "epoch": 0.08211405722965577, "grad_norm": 0.44041964411735535, "learning_rate": 9.928995377282498e-05, "loss": 1.0732, "step": 919 }, { "epoch": 0.08220340876091764, "grad_norm": 0.4381254315376282, "learning_rate": 9.928752165409401e-05, "loss": 1.0728, "step": 920 }, { "epoch": 0.08229276029217951, "grad_norm": 0.5617743730545044, "learning_rate": 9.928508540700774e-05, "loss": 1.0313, "step": 921 }, { "epoch": 0.08238211182344138, "grad_norm": 0.4664022922515869, "learning_rate": 9.928264503177023e-05, "loss": 1.0456, "step": 922 }, { "epoch": 0.08247146335470325, "grad_norm": 0.4458063542842865, "learning_rate": 9.928020052858587e-05, "loss": 1.0782, "step": 923 }, { "epoch": 0.0825608148859651, "grad_norm": 0.44314441084861755, "learning_rate": 9.927775189765943e-05, "loss": 1.0905, "step": 924 }, { "epoch": 0.08265016641722697, "grad_norm": 0.4562947154045105, "learning_rate": 9.927529913919601e-05, "loss": 1.0686, "step": 925 }, { "epoch": 0.08273951794848884, "grad_norm": 0.4639774560928345, "learning_rate": 9.927284225340105e-05, "loss": 1.0761, "step": 926 }, { "epoch": 0.08282886947975071, "grad_norm": 0.4482592046260834, "learning_rate": 9.927038124048034e-05, "loss": 1.0584, "step": 927 }, { "epoch": 0.08291822101101258, "grad_norm": 0.4356086254119873, "learning_rate": 9.926791610064002e-05, "loss": 1.0996, "step": 928 }, { "epoch": 0.08300757254227445, "grad_norm": 0.3952994644641876, "learning_rate": 9.926544683408656e-05, "loss": 1.1176, "step": 929 }, { "epoch": 0.0830969240735363, "grad_norm": 0.4380575716495514, "learning_rate": 9.92629734410268e-05, "loss": 1.0432, "step": 930 }, { "epoch": 0.08318627560479817, "grad_norm": 0.54874587059021, "learning_rate": 9.92604959216679e-05, "loss": 0.9305, "step": 931 }, { "epoch": 0.08327562713606004, "grad_norm": 0.5379965901374817, "learning_rate": 9.925801427621739e-05, "loss": 1.1302, "step": 932 }, { "epoch": 0.08336497866732191, "grad_norm": 0.4604237377643585, "learning_rate": 9.925552850488314e-05, "loss": 1.0884, "step": 933 }, { "epoch": 0.08345433019858378, "grad_norm": 0.4671541750431061, "learning_rate": 9.925303860787335e-05, "loss": 0.9864, "step": 934 }, { "epoch": 0.08354368172984565, "grad_norm": 0.48653197288513184, "learning_rate": 9.925054458539658e-05, "loss": 1.1002, "step": 935 }, { "epoch": 0.0836330332611075, "grad_norm": 0.4400951564311981, "learning_rate": 9.924804643766172e-05, "loss": 1.1034, "step": 936 }, { "epoch": 0.08372238479236938, "grad_norm": 0.4999461770057678, "learning_rate": 9.924554416487802e-05, "loss": 1.1624, "step": 937 }, { "epoch": 0.08381173632363124, "grad_norm": 0.45174461603164673, "learning_rate": 9.92430377672551e-05, "loss": 1.0431, "step": 938 }, { "epoch": 0.08390108785489311, "grad_norm": 0.4591819643974304, "learning_rate": 9.924052724500284e-05, "loss": 1.0554, "step": 939 }, { "epoch": 0.08399043938615498, "grad_norm": 0.4611966609954834, "learning_rate": 9.923801259833159e-05, "loss": 1.0504, "step": 940 }, { "epoch": 0.08407979091741685, "grad_norm": 0.3897397518157959, "learning_rate": 9.923549382745192e-05, "loss": 1.0838, "step": 941 }, { "epoch": 0.08416914244867871, "grad_norm": 0.3815537095069885, "learning_rate": 9.923297093257485e-05, "loss": 1.095, "step": 942 }, { "epoch": 0.08425849397994058, "grad_norm": 0.4189860224723816, "learning_rate": 9.923044391391165e-05, "loss": 1.0924, "step": 943 }, { "epoch": 0.08434784551120245, "grad_norm": 0.45325958728790283, "learning_rate": 9.922791277167404e-05, "loss": 1.0615, "step": 944 }, { "epoch": 0.08443719704246432, "grad_norm": 0.4996306300163269, "learning_rate": 9.9225377506074e-05, "loss": 1.0995, "step": 945 }, { "epoch": 0.08452654857372618, "grad_norm": 0.4180799424648285, "learning_rate": 9.922283811732388e-05, "loss": 1.1514, "step": 946 }, { "epoch": 0.08461590010498805, "grad_norm": 0.433929443359375, "learning_rate": 9.92202946056364e-05, "loss": 1.056, "step": 947 }, { "epoch": 0.08470525163624992, "grad_norm": 0.479571133852005, "learning_rate": 9.921774697122459e-05, "loss": 1.0351, "step": 948 }, { "epoch": 0.08479460316751178, "grad_norm": 0.46040210127830505, "learning_rate": 9.921519521430185e-05, "loss": 0.9939, "step": 949 }, { "epoch": 0.08488395469877365, "grad_norm": 0.43157997727394104, "learning_rate": 9.92126393350819e-05, "loss": 1.0588, "step": 950 }, { "epoch": 0.08497330623003552, "grad_norm": 0.4109559655189514, "learning_rate": 9.921007933377887e-05, "loss": 1.1222, "step": 951 }, { "epoch": 0.08506265776129739, "grad_norm": 0.4423241913318634, "learning_rate": 9.920751521060712e-05, "loss": 1.0276, "step": 952 }, { "epoch": 0.08515200929255926, "grad_norm": 0.454683393239975, "learning_rate": 9.920494696578146e-05, "loss": 1.0437, "step": 953 }, { "epoch": 0.08524136082382112, "grad_norm": 0.47767403721809387, "learning_rate": 9.920237459951702e-05, "loss": 1.0884, "step": 954 }, { "epoch": 0.08533071235508298, "grad_norm": 0.47979000210762024, "learning_rate": 9.919979811202923e-05, "loss": 1.0598, "step": 955 }, { "epoch": 0.08542006388634485, "grad_norm": 0.5115445852279663, "learning_rate": 9.919721750353395e-05, "loss": 1.024, "step": 956 }, { "epoch": 0.08550941541760672, "grad_norm": 0.4317951798439026, "learning_rate": 9.919463277424727e-05, "loss": 1.1506, "step": 957 }, { "epoch": 0.08559876694886859, "grad_norm": 0.4962863624095917, "learning_rate": 9.919204392438573e-05, "loss": 1.0262, "step": 958 }, { "epoch": 0.08568811848013046, "grad_norm": 0.44821789860725403, "learning_rate": 9.918945095416616e-05, "loss": 1.0722, "step": 959 }, { "epoch": 0.08577747001139233, "grad_norm": 0.44719305634498596, "learning_rate": 9.918685386380573e-05, "loss": 1.0362, "step": 960 }, { "epoch": 0.08586682154265418, "grad_norm": 0.4721403121948242, "learning_rate": 9.918425265352202e-05, "loss": 0.972, "step": 961 }, { "epoch": 0.08595617307391605, "grad_norm": 0.43076246976852417, "learning_rate": 9.918164732353288e-05, "loss": 1.0746, "step": 962 }, { "epoch": 0.08604552460517792, "grad_norm": 0.4410285949707031, "learning_rate": 9.917903787405653e-05, "loss": 1.0802, "step": 963 }, { "epoch": 0.08613487613643979, "grad_norm": 0.4716387689113617, "learning_rate": 9.917642430531155e-05, "loss": 1.165, "step": 964 }, { "epoch": 0.08622422766770166, "grad_norm": 0.38331499695777893, "learning_rate": 9.917380661751685e-05, "loss": 1.0947, "step": 965 }, { "epoch": 0.08631357919896353, "grad_norm": 0.47889336943626404, "learning_rate": 9.917118481089169e-05, "loss": 1.0482, "step": 966 }, { "epoch": 0.08640293073022538, "grad_norm": 0.4541834890842438, "learning_rate": 9.916855888565569e-05, "loss": 1.0942, "step": 967 }, { "epoch": 0.08649228226148725, "grad_norm": 0.4245148003101349, "learning_rate": 9.916592884202878e-05, "loss": 1.0635, "step": 968 }, { "epoch": 0.08658163379274912, "grad_norm": 0.40217721462249756, "learning_rate": 9.916329468023124e-05, "loss": 1.0662, "step": 969 }, { "epoch": 0.08667098532401099, "grad_norm": 0.4782769978046417, "learning_rate": 9.916065640048374e-05, "loss": 1.0232, "step": 970 }, { "epoch": 0.08676033685527286, "grad_norm": 0.4472961127758026, "learning_rate": 9.915801400300727e-05, "loss": 1.0723, "step": 971 }, { "epoch": 0.08684968838653473, "grad_norm": 0.4397634267807007, "learning_rate": 9.91553674880231e-05, "loss": 1.1551, "step": 972 }, { "epoch": 0.08693903991779658, "grad_norm": 0.481342077255249, "learning_rate": 9.915271685575297e-05, "loss": 1.1133, "step": 973 }, { "epoch": 0.08702839144905845, "grad_norm": 0.3950006365776062, "learning_rate": 9.915006210641886e-05, "loss": 1.0542, "step": 974 }, { "epoch": 0.08711774298032032, "grad_norm": 0.46636098623275757, "learning_rate": 9.914740324024316e-05, "loss": 1.0893, "step": 975 }, { "epoch": 0.08720709451158219, "grad_norm": 0.48313936591148376, "learning_rate": 9.914474025744856e-05, "loss": 1.0318, "step": 976 }, { "epoch": 0.08729644604284406, "grad_norm": 0.480648934841156, "learning_rate": 9.914207315825812e-05, "loss": 1.1249, "step": 977 }, { "epoch": 0.08738579757410593, "grad_norm": 0.4614575207233429, "learning_rate": 9.913940194289524e-05, "loss": 1.133, "step": 978 }, { "epoch": 0.0874751491053678, "grad_norm": 0.4390462040901184, "learning_rate": 9.913672661158364e-05, "loss": 1.1004, "step": 979 }, { "epoch": 0.08756450063662966, "grad_norm": 0.5013588070869446, "learning_rate": 9.913404716454744e-05, "loss": 1.1423, "step": 980 }, { "epoch": 0.08765385216789152, "grad_norm": 0.40994441509246826, "learning_rate": 9.913136360201106e-05, "loss": 1.058, "step": 981 }, { "epoch": 0.0877432036991534, "grad_norm": 0.4297054409980774, "learning_rate": 9.912867592419928e-05, "loss": 1.0135, "step": 982 }, { "epoch": 0.08783255523041526, "grad_norm": 0.4328972399234772, "learning_rate": 9.91259841313372e-05, "loss": 1.1261, "step": 983 }, { "epoch": 0.08792190676167713, "grad_norm": 0.42971837520599365, "learning_rate": 9.912328822365033e-05, "loss": 1.1578, "step": 984 }, { "epoch": 0.088011258292939, "grad_norm": 0.4185878336429596, "learning_rate": 9.912058820136443e-05, "loss": 1.0351, "step": 985 }, { "epoch": 0.08810060982420086, "grad_norm": 0.4461815655231476, "learning_rate": 9.911788406470569e-05, "loss": 1.0864, "step": 986 }, { "epoch": 0.08818996135546273, "grad_norm": 0.402701199054718, "learning_rate": 9.911517581390059e-05, "loss": 1.1004, "step": 987 }, { "epoch": 0.0882793128867246, "grad_norm": 0.4615456163883209, "learning_rate": 9.9112463449176e-05, "loss": 1.0427, "step": 988 }, { "epoch": 0.08836866441798646, "grad_norm": 0.49826958775520325, "learning_rate": 9.91097469707591e-05, "loss": 1.0445, "step": 989 }, { "epoch": 0.08845801594924833, "grad_norm": 0.5109001994132996, "learning_rate": 9.91070263788774e-05, "loss": 1.0151, "step": 990 }, { "epoch": 0.0885473674805102, "grad_norm": 0.4601687788963318, "learning_rate": 9.910430167375881e-05, "loss": 0.9897, "step": 991 }, { "epoch": 0.08863671901177206, "grad_norm": 0.48032835125923157, "learning_rate": 9.910157285563154e-05, "loss": 1.1166, "step": 992 }, { "epoch": 0.08872607054303393, "grad_norm": 0.3616820275783539, "learning_rate": 9.909883992472415e-05, "loss": 1.1109, "step": 993 }, { "epoch": 0.0888154220742958, "grad_norm": 0.5358753204345703, "learning_rate": 9.909610288126557e-05, "loss": 1.0476, "step": 994 }, { "epoch": 0.08890477360555767, "grad_norm": 0.4149110019207001, "learning_rate": 9.909336172548505e-05, "loss": 1.068, "step": 995 }, { "epoch": 0.08899412513681954, "grad_norm": 0.43261730670928955, "learning_rate": 9.909061645761217e-05, "loss": 1.0922, "step": 996 }, { "epoch": 0.0890834766680814, "grad_norm": 0.504966139793396, "learning_rate": 9.90878670778769e-05, "loss": 1.0179, "step": 997 }, { "epoch": 0.08917282819934326, "grad_norm": 0.42445361614227295, "learning_rate": 9.908511358650953e-05, "loss": 1.0927, "step": 998 }, { "epoch": 0.08926217973060513, "grad_norm": 0.4488779306411743, "learning_rate": 9.908235598374069e-05, "loss": 1.0652, "step": 999 }, { "epoch": 0.089351531261867, "grad_norm": 0.4371225833892822, "learning_rate": 9.907959426980136e-05, "loss": 1.0874, "step": 1000 }, { "epoch": 0.08944088279312887, "grad_norm": 0.3913322389125824, "learning_rate": 9.907682844492284e-05, "loss": 1.0903, "step": 1001 }, { "epoch": 0.08953023432439074, "grad_norm": 0.38679686188697815, "learning_rate": 9.907405850933681e-05, "loss": 1.0884, "step": 1002 }, { "epoch": 0.0896195858556526, "grad_norm": 0.4290361702442169, "learning_rate": 9.907128446327531e-05, "loss": 1.087, "step": 1003 }, { "epoch": 0.08970893738691448, "grad_norm": 0.42051953077316284, "learning_rate": 9.906850630697068e-05, "loss": 1.1921, "step": 1004 }, { "epoch": 0.08979828891817633, "grad_norm": 0.4305054247379303, "learning_rate": 9.90657240406556e-05, "loss": 1.1087, "step": 1005 }, { "epoch": 0.0898876404494382, "grad_norm": 0.4126817286014557, "learning_rate": 9.906293766456312e-05, "loss": 1.0889, "step": 1006 }, { "epoch": 0.08997699198070007, "grad_norm": 0.4437747895717621, "learning_rate": 9.906014717892666e-05, "loss": 1.0852, "step": 1007 }, { "epoch": 0.09006634351196194, "grad_norm": 0.40008100867271423, "learning_rate": 9.905735258397993e-05, "loss": 1.0639, "step": 1008 }, { "epoch": 0.09015569504322381, "grad_norm": 0.4763084650039673, "learning_rate": 9.905455387995699e-05, "loss": 0.9696, "step": 1009 }, { "epoch": 0.09024504657448568, "grad_norm": 0.5551483631134033, "learning_rate": 9.905175106709228e-05, "loss": 1.0921, "step": 1010 }, { "epoch": 0.09033439810574753, "grad_norm": 0.398017555475235, "learning_rate": 9.904894414562056e-05, "loss": 1.0868, "step": 1011 }, { "epoch": 0.0904237496370094, "grad_norm": 0.41361182928085327, "learning_rate": 9.904613311577695e-05, "loss": 1.0827, "step": 1012 }, { "epoch": 0.09051310116827127, "grad_norm": 0.43735066056251526, "learning_rate": 9.90433179777969e-05, "loss": 1.0752, "step": 1013 }, { "epoch": 0.09060245269953314, "grad_norm": 0.43464162945747375, "learning_rate": 9.904049873191621e-05, "loss": 1.1453, "step": 1014 }, { "epoch": 0.09069180423079501, "grad_norm": 0.42488962411880493, "learning_rate": 9.903767537837101e-05, "loss": 1.0359, "step": 1015 }, { "epoch": 0.09078115576205688, "grad_norm": 0.4419018626213074, "learning_rate": 9.90348479173978e-05, "loss": 1.0873, "step": 1016 }, { "epoch": 0.09087050729331873, "grad_norm": 0.45977696776390076, "learning_rate": 9.903201634923338e-05, "loss": 1.0001, "step": 1017 }, { "epoch": 0.0909598588245806, "grad_norm": 0.4522443115711212, "learning_rate": 9.902918067411497e-05, "loss": 1.0555, "step": 1018 }, { "epoch": 0.09104921035584247, "grad_norm": 0.43287041783332825, "learning_rate": 9.902634089228007e-05, "loss": 1.0233, "step": 1019 }, { "epoch": 0.09113856188710434, "grad_norm": 0.5111227035522461, "learning_rate": 9.902349700396651e-05, "loss": 0.9717, "step": 1020 }, { "epoch": 0.09122791341836621, "grad_norm": 0.44105076789855957, "learning_rate": 9.902064900941255e-05, "loss": 0.9953, "step": 1021 }, { "epoch": 0.09131726494962808, "grad_norm": 0.46010032296180725, "learning_rate": 9.90177969088567e-05, "loss": 1.0938, "step": 1022 }, { "epoch": 0.09140661648088994, "grad_norm": 0.4434414803981781, "learning_rate": 9.901494070253788e-05, "loss": 1.1415, "step": 1023 }, { "epoch": 0.0914959680121518, "grad_norm": 0.4490472972393036, "learning_rate": 9.90120803906953e-05, "loss": 1.0611, "step": 1024 }, { "epoch": 0.09158531954341367, "grad_norm": 0.3900289833545685, "learning_rate": 9.900921597356856e-05, "loss": 1.1096, "step": 1025 }, { "epoch": 0.09167467107467554, "grad_norm": 0.478982150554657, "learning_rate": 9.900634745139758e-05, "loss": 1.0677, "step": 1026 }, { "epoch": 0.09176402260593741, "grad_norm": 0.37474942207336426, "learning_rate": 9.900347482442262e-05, "loss": 1.0386, "step": 1027 }, { "epoch": 0.09185337413719928, "grad_norm": 0.5137802958488464, "learning_rate": 9.900059809288431e-05, "loss": 1.0332, "step": 1028 }, { "epoch": 0.09194272566846114, "grad_norm": 0.4314192831516266, "learning_rate": 9.899771725702362e-05, "loss": 1.0801, "step": 1029 }, { "epoch": 0.092032077199723, "grad_norm": 0.48988327383995056, "learning_rate": 9.899483231708181e-05, "loss": 1.0857, "step": 1030 }, { "epoch": 0.09212142873098488, "grad_norm": 0.4243643283843994, "learning_rate": 9.899194327330056e-05, "loss": 1.096, "step": 1031 }, { "epoch": 0.09221078026224674, "grad_norm": 0.4380730986595154, "learning_rate": 9.898905012592183e-05, "loss": 1.0709, "step": 1032 }, { "epoch": 0.09230013179350861, "grad_norm": 0.4659455716609955, "learning_rate": 9.898615287518798e-05, "loss": 1.0621, "step": 1033 }, { "epoch": 0.09238948332477048, "grad_norm": 0.46971428394317627, "learning_rate": 9.898325152134167e-05, "loss": 1.1165, "step": 1034 }, { "epoch": 0.09247883485603235, "grad_norm": 0.47089582681655884, "learning_rate": 9.898034606462592e-05, "loss": 1.1352, "step": 1035 }, { "epoch": 0.09256818638729421, "grad_norm": 0.44664955139160156, "learning_rate": 9.897743650528408e-05, "loss": 1.0728, "step": 1036 }, { "epoch": 0.09265753791855608, "grad_norm": 0.4320451617240906, "learning_rate": 9.897452284355989e-05, "loss": 1.1007, "step": 1037 }, { "epoch": 0.09274688944981795, "grad_norm": 0.4943344295024872, "learning_rate": 9.897160507969738e-05, "loss": 1.0915, "step": 1038 }, { "epoch": 0.09283624098107982, "grad_norm": 0.43696850538253784, "learning_rate": 9.896868321394093e-05, "loss": 1.1057, "step": 1039 }, { "epoch": 0.09292559251234168, "grad_norm": 0.39653998613357544, "learning_rate": 9.896575724653529e-05, "loss": 1.0422, "step": 1040 }, { "epoch": 0.09301494404360355, "grad_norm": 0.4009700417518616, "learning_rate": 9.896282717772556e-05, "loss": 1.0917, "step": 1041 }, { "epoch": 0.09310429557486541, "grad_norm": 0.4337768852710724, "learning_rate": 9.895989300775714e-05, "loss": 1.089, "step": 1042 }, { "epoch": 0.09319364710612728, "grad_norm": 0.4776664078235626, "learning_rate": 9.895695473687581e-05, "loss": 1.0316, "step": 1043 }, { "epoch": 0.09328299863738915, "grad_norm": 0.4160580337047577, "learning_rate": 9.895401236532769e-05, "loss": 1.0437, "step": 1044 }, { "epoch": 0.09337235016865102, "grad_norm": 0.4131687879562378, "learning_rate": 9.895106589335919e-05, "loss": 1.0765, "step": 1045 }, { "epoch": 0.09346170169991289, "grad_norm": 0.39853063225746155, "learning_rate": 9.894811532121716e-05, "loss": 1.0726, "step": 1046 }, { "epoch": 0.09355105323117476, "grad_norm": 0.46733424067497253, "learning_rate": 9.894516064914871e-05, "loss": 1.0561, "step": 1047 }, { "epoch": 0.09364040476243661, "grad_norm": 0.3866161108016968, "learning_rate": 9.894220187740135e-05, "loss": 1.0816, "step": 1048 }, { "epoch": 0.09372975629369848, "grad_norm": 0.42076459527015686, "learning_rate": 9.89392390062229e-05, "loss": 0.9866, "step": 1049 }, { "epoch": 0.09381910782496035, "grad_norm": 0.4648950695991516, "learning_rate": 9.893627203586152e-05, "loss": 1.0732, "step": 1050 }, { "epoch": 0.09390845935622222, "grad_norm": 0.4551086723804474, "learning_rate": 9.893330096656574e-05, "loss": 1.1067, "step": 1051 }, { "epoch": 0.09399781088748409, "grad_norm": 0.43443188071250916, "learning_rate": 9.893032579858442e-05, "loss": 1.0777, "step": 1052 }, { "epoch": 0.09408716241874596, "grad_norm": 0.4357977509498596, "learning_rate": 9.892734653216673e-05, "loss": 0.995, "step": 1053 }, { "epoch": 0.09417651395000781, "grad_norm": 0.4856785833835602, "learning_rate": 9.892436316756226e-05, "loss": 1.0041, "step": 1054 }, { "epoch": 0.09426586548126968, "grad_norm": 0.4476493299007416, "learning_rate": 9.892137570502087e-05, "loss": 1.0459, "step": 1055 }, { "epoch": 0.09435521701253155, "grad_norm": 0.4645354449748993, "learning_rate": 9.89183841447928e-05, "loss": 1.1058, "step": 1056 }, { "epoch": 0.09444456854379342, "grad_norm": 0.45503050088882446, "learning_rate": 9.891538848712863e-05, "loss": 1.0721, "step": 1057 }, { "epoch": 0.09453392007505529, "grad_norm": 0.4932888448238373, "learning_rate": 9.891238873227925e-05, "loss": 1.0064, "step": 1058 }, { "epoch": 0.09462327160631716, "grad_norm": 0.43276292085647583, "learning_rate": 9.890938488049597e-05, "loss": 1.1481, "step": 1059 }, { "epoch": 0.09471262313757901, "grad_norm": 0.4331585764884949, "learning_rate": 9.890637693203038e-05, "loss": 1.0468, "step": 1060 }, { "epoch": 0.09480197466884088, "grad_norm": 0.5495041608810425, "learning_rate": 9.89033648871344e-05, "loss": 1.0436, "step": 1061 }, { "epoch": 0.09489132620010275, "grad_norm": 0.47634175419807434, "learning_rate": 9.890034874606033e-05, "loss": 1.0175, "step": 1062 }, { "epoch": 0.09498067773136462, "grad_norm": 0.400700181722641, "learning_rate": 9.889732850906083e-05, "loss": 1.1379, "step": 1063 }, { "epoch": 0.09507002926262649, "grad_norm": 0.49066829681396484, "learning_rate": 9.889430417638885e-05, "loss": 1.115, "step": 1064 }, { "epoch": 0.09515938079388836, "grad_norm": 0.3815631568431854, "learning_rate": 9.889127574829773e-05, "loss": 1.1274, "step": 1065 }, { "epoch": 0.09524873232515023, "grad_norm": 0.5222175717353821, "learning_rate": 9.88882432250411e-05, "loss": 1.0232, "step": 1066 }, { "epoch": 0.09533808385641208, "grad_norm": 0.4751056134700775, "learning_rate": 9.888520660687302e-05, "loss": 1.1066, "step": 1067 }, { "epoch": 0.09542743538767395, "grad_norm": 0.5163382887840271, "learning_rate": 9.888216589404779e-05, "loss": 1.0504, "step": 1068 }, { "epoch": 0.09551678691893582, "grad_norm": 0.5940535068511963, "learning_rate": 9.887912108682011e-05, "loss": 1.0346, "step": 1069 }, { "epoch": 0.09560613845019769, "grad_norm": 0.38179200887680054, "learning_rate": 9.887607218544503e-05, "loss": 1.1381, "step": 1070 }, { "epoch": 0.09569548998145956, "grad_norm": 0.3916855454444885, "learning_rate": 9.887301919017794e-05, "loss": 1.1062, "step": 1071 }, { "epoch": 0.09578484151272143, "grad_norm": 0.48447951674461365, "learning_rate": 9.886996210127452e-05, "loss": 1.0219, "step": 1072 }, { "epoch": 0.09587419304398329, "grad_norm": 0.4082842171192169, "learning_rate": 9.886690091899088e-05, "loss": 1.0546, "step": 1073 }, { "epoch": 0.09596354457524515, "grad_norm": 0.4271855354309082, "learning_rate": 9.886383564358339e-05, "loss": 1.05, "step": 1074 }, { "epoch": 0.09605289610650702, "grad_norm": 0.37129080295562744, "learning_rate": 9.886076627530883e-05, "loss": 1.0979, "step": 1075 }, { "epoch": 0.0961422476377689, "grad_norm": 0.4285633862018585, "learning_rate": 9.885769281442426e-05, "loss": 1.0915, "step": 1076 }, { "epoch": 0.09623159916903076, "grad_norm": 0.5488273501396179, "learning_rate": 9.885461526118713e-05, "loss": 0.9937, "step": 1077 }, { "epoch": 0.09632095070029263, "grad_norm": 0.42456749081611633, "learning_rate": 9.885153361585523e-05, "loss": 1.1105, "step": 1078 }, { "epoch": 0.09641030223155449, "grad_norm": 0.38298559188842773, "learning_rate": 9.884844787868667e-05, "loss": 1.1525, "step": 1079 }, { "epoch": 0.09649965376281636, "grad_norm": 0.46398019790649414, "learning_rate": 9.884535804993991e-05, "loss": 1.0575, "step": 1080 }, { "epoch": 0.09658900529407823, "grad_norm": 0.47431719303131104, "learning_rate": 9.884226412987375e-05, "loss": 1.0777, "step": 1081 }, { "epoch": 0.0966783568253401, "grad_norm": 0.4811946451663971, "learning_rate": 9.883916611874735e-05, "loss": 1.0186, "step": 1082 }, { "epoch": 0.09676770835660196, "grad_norm": 0.4855489730834961, "learning_rate": 9.883606401682022e-05, "loss": 1.1432, "step": 1083 }, { "epoch": 0.09685705988786383, "grad_norm": 0.423043817281723, "learning_rate": 9.883295782435216e-05, "loss": 1.0769, "step": 1084 }, { "epoch": 0.09694641141912569, "grad_norm": 0.5006414651870728, "learning_rate": 9.882984754160334e-05, "loss": 1.0417, "step": 1085 }, { "epoch": 0.09703576295038756, "grad_norm": 0.41438305377960205, "learning_rate": 9.882673316883432e-05, "loss": 1.091, "step": 1086 }, { "epoch": 0.09712511448164943, "grad_norm": 0.4482746124267578, "learning_rate": 9.882361470630594e-05, "loss": 1.0692, "step": 1087 }, { "epoch": 0.0972144660129113, "grad_norm": 0.43151143193244934, "learning_rate": 9.882049215427941e-05, "loss": 1.0767, "step": 1088 }, { "epoch": 0.09730381754417317, "grad_norm": 0.47808754444122314, "learning_rate": 9.881736551301627e-05, "loss": 1.1291, "step": 1089 }, { "epoch": 0.09739316907543503, "grad_norm": 0.3970921039581299, "learning_rate": 9.88142347827784e-05, "loss": 1.0847, "step": 1090 }, { "epoch": 0.0974825206066969, "grad_norm": 0.4331444501876831, "learning_rate": 9.881109996382807e-05, "loss": 1.0468, "step": 1091 }, { "epoch": 0.09757187213795876, "grad_norm": 0.48726001381874084, "learning_rate": 9.880796105642782e-05, "loss": 1.0272, "step": 1092 }, { "epoch": 0.09766122366922063, "grad_norm": 0.39687567949295044, "learning_rate": 9.880481806084057e-05, "loss": 1.0803, "step": 1093 }, { "epoch": 0.0977505752004825, "grad_norm": 0.510676920413971, "learning_rate": 9.880167097732957e-05, "loss": 0.9923, "step": 1094 }, { "epoch": 0.09783992673174437, "grad_norm": 0.4363247752189636, "learning_rate": 9.879851980615847e-05, "loss": 1.0961, "step": 1095 }, { "epoch": 0.09792927826300624, "grad_norm": 0.4566675126552582, "learning_rate": 9.879536454759115e-05, "loss": 0.9882, "step": 1096 }, { "epoch": 0.0980186297942681, "grad_norm": 0.3779030442237854, "learning_rate": 9.879220520189195e-05, "loss": 1.1436, "step": 1097 }, { "epoch": 0.09810798132552996, "grad_norm": 0.45519158244132996, "learning_rate": 9.878904176932546e-05, "loss": 1.0497, "step": 1098 }, { "epoch": 0.09819733285679183, "grad_norm": 0.3910900056362152, "learning_rate": 9.878587425015668e-05, "loss": 1.0654, "step": 1099 }, { "epoch": 0.0982866843880537, "grad_norm": 0.45868462324142456, "learning_rate": 9.878270264465091e-05, "loss": 1.0361, "step": 1100 }, { "epoch": 0.09837603591931557, "grad_norm": 0.435067743062973, "learning_rate": 9.877952695307382e-05, "loss": 1.0632, "step": 1101 }, { "epoch": 0.09846538745057744, "grad_norm": 0.5049869418144226, "learning_rate": 9.877634717569137e-05, "loss": 0.9519, "step": 1102 }, { "epoch": 0.09855473898183931, "grad_norm": 0.48381614685058594, "learning_rate": 9.877316331276995e-05, "loss": 1.0368, "step": 1103 }, { "epoch": 0.09864409051310116, "grad_norm": 0.4501636326313019, "learning_rate": 9.876997536457619e-05, "loss": 0.9595, "step": 1104 }, { "epoch": 0.09873344204436303, "grad_norm": 0.4924948811531067, "learning_rate": 9.876678333137716e-05, "loss": 1.0592, "step": 1105 }, { "epoch": 0.0988227935756249, "grad_norm": 0.387658029794693, "learning_rate": 9.876358721344022e-05, "loss": 1.0821, "step": 1106 }, { "epoch": 0.09891214510688677, "grad_norm": 0.5033579468727112, "learning_rate": 9.876038701103305e-05, "loss": 1.0504, "step": 1107 }, { "epoch": 0.09900149663814864, "grad_norm": 0.47466421127319336, "learning_rate": 9.875718272442373e-05, "loss": 1.009, "step": 1108 }, { "epoch": 0.09909084816941051, "grad_norm": 0.43417561054229736, "learning_rate": 9.875397435388063e-05, "loss": 1.0584, "step": 1109 }, { "epoch": 0.09918019970067236, "grad_norm": 0.4432922899723053, "learning_rate": 9.875076189967252e-05, "loss": 1.0067, "step": 1110 }, { "epoch": 0.09926955123193423, "grad_norm": 0.45428282022476196, "learning_rate": 9.874754536206843e-05, "loss": 1.1137, "step": 1111 }, { "epoch": 0.0993589027631961, "grad_norm": 0.4388342797756195, "learning_rate": 9.874432474133781e-05, "loss": 1.0513, "step": 1112 }, { "epoch": 0.09944825429445797, "grad_norm": 0.42276880145072937, "learning_rate": 9.874110003775043e-05, "loss": 1.0552, "step": 1113 }, { "epoch": 0.09953760582571984, "grad_norm": 0.4311312735080719, "learning_rate": 9.873787125157636e-05, "loss": 1.0679, "step": 1114 }, { "epoch": 0.09962695735698171, "grad_norm": 0.38914674520492554, "learning_rate": 9.873463838308608e-05, "loss": 1.0837, "step": 1115 }, { "epoch": 0.09971630888824357, "grad_norm": 0.46259498596191406, "learning_rate": 9.873140143255036e-05, "loss": 1.0772, "step": 1116 }, { "epoch": 0.09980566041950543, "grad_norm": 0.4010103642940521, "learning_rate": 9.872816040024034e-05, "loss": 1.0801, "step": 1117 }, { "epoch": 0.0998950119507673, "grad_norm": 0.3827916383743286, "learning_rate": 9.872491528642746e-05, "loss": 1.1128, "step": 1118 }, { "epoch": 0.09998436348202917, "grad_norm": 0.41605910658836365, "learning_rate": 9.872166609138355e-05, "loss": 1.0274, "step": 1119 }, { "epoch": 0.10007371501329104, "grad_norm": 0.44363489747047424, "learning_rate": 9.87184128153808e-05, "loss": 1.0553, "step": 1120 }, { "epoch": 0.10016306654455291, "grad_norm": 0.4547342360019684, "learning_rate": 9.871515545869166e-05, "loss": 1.0827, "step": 1121 }, { "epoch": 0.10025241807581478, "grad_norm": 0.4331105649471283, "learning_rate": 9.871189402158898e-05, "loss": 1.0772, "step": 1122 }, { "epoch": 0.10034176960707664, "grad_norm": 0.47358986735343933, "learning_rate": 9.870862850434593e-05, "loss": 1.124, "step": 1123 }, { "epoch": 0.1004311211383385, "grad_norm": 0.4455796480178833, "learning_rate": 9.870535890723607e-05, "loss": 1.0435, "step": 1124 }, { "epoch": 0.10052047266960037, "grad_norm": 0.42114776372909546, "learning_rate": 9.870208523053323e-05, "loss": 1.1176, "step": 1125 }, { "epoch": 0.10060982420086224, "grad_norm": 0.4320701062679291, "learning_rate": 9.869880747451164e-05, "loss": 1.0214, "step": 1126 }, { "epoch": 0.10069917573212411, "grad_norm": 0.41799092292785645, "learning_rate": 9.86955256394458e-05, "loss": 1.0928, "step": 1127 }, { "epoch": 0.10078852726338598, "grad_norm": 0.4450794458389282, "learning_rate": 9.869223972561066e-05, "loss": 1.1019, "step": 1128 }, { "epoch": 0.10087787879464784, "grad_norm": 0.42303231358528137, "learning_rate": 9.86889497332814e-05, "loss": 1.0618, "step": 1129 }, { "epoch": 0.10096723032590971, "grad_norm": 0.5273473858833313, "learning_rate": 9.868565566273363e-05, "loss": 1.0161, "step": 1130 }, { "epoch": 0.10105658185717158, "grad_norm": 0.4437713921070099, "learning_rate": 9.868235751424324e-05, "loss": 1.0227, "step": 1131 }, { "epoch": 0.10114593338843345, "grad_norm": 0.41443753242492676, "learning_rate": 9.86790552880865e-05, "loss": 1.036, "step": 1132 }, { "epoch": 0.10123528491969531, "grad_norm": 0.5034554600715637, "learning_rate": 9.867574898453998e-05, "loss": 0.9698, "step": 1133 }, { "epoch": 0.10132463645095718, "grad_norm": 0.4698033630847931, "learning_rate": 9.867243860388065e-05, "loss": 1.0825, "step": 1134 }, { "epoch": 0.10141398798221904, "grad_norm": 0.48983025550842285, "learning_rate": 9.866912414638577e-05, "loss": 1.0424, "step": 1135 }, { "epoch": 0.10150333951348091, "grad_norm": 0.5005711317062378, "learning_rate": 9.866580561233297e-05, "loss": 1.0549, "step": 1136 }, { "epoch": 0.10159269104474278, "grad_norm": 0.44725582003593445, "learning_rate": 9.866248300200021e-05, "loss": 1.0877, "step": 1137 }, { "epoch": 0.10168204257600465, "grad_norm": 0.4655127227306366, "learning_rate": 9.865915631566579e-05, "loss": 1.1502, "step": 1138 }, { "epoch": 0.10177139410726652, "grad_norm": 0.4426875114440918, "learning_rate": 9.865582555360837e-05, "loss": 1.1273, "step": 1139 }, { "epoch": 0.10186074563852839, "grad_norm": 0.4345450699329376, "learning_rate": 9.865249071610691e-05, "loss": 1.1256, "step": 1140 }, { "epoch": 0.10195009716979024, "grad_norm": 0.4371299743652344, "learning_rate": 9.864915180344076e-05, "loss": 1.0296, "step": 1141 }, { "epoch": 0.10203944870105211, "grad_norm": 0.44082698225975037, "learning_rate": 9.864580881588959e-05, "loss": 1.0628, "step": 1142 }, { "epoch": 0.10212880023231398, "grad_norm": 0.5174263715744019, "learning_rate": 9.864246175373339e-05, "loss": 0.9576, "step": 1143 }, { "epoch": 0.10221815176357585, "grad_norm": 0.42104923725128174, "learning_rate": 9.863911061725256e-05, "loss": 1.017, "step": 1144 }, { "epoch": 0.10230750329483772, "grad_norm": 0.39812588691711426, "learning_rate": 9.863575540672772e-05, "loss": 1.0241, "step": 1145 }, { "epoch": 0.10239685482609959, "grad_norm": 0.5662054419517517, "learning_rate": 9.863239612243997e-05, "loss": 1.0541, "step": 1146 }, { "epoch": 0.10248620635736144, "grad_norm": 0.4463253319263458, "learning_rate": 9.862903276467066e-05, "loss": 1.0583, "step": 1147 }, { "epoch": 0.10257555788862331, "grad_norm": 0.3903212249279022, "learning_rate": 9.86256653337015e-05, "loss": 1.1279, "step": 1148 }, { "epoch": 0.10266490941988518, "grad_norm": 0.518621563911438, "learning_rate": 9.862229382981454e-05, "loss": 1.0526, "step": 1149 }, { "epoch": 0.10275426095114705, "grad_norm": 0.5142053365707397, "learning_rate": 9.861891825329222e-05, "loss": 1.0619, "step": 1150 }, { "epoch": 0.10284361248240892, "grad_norm": 0.45500150322914124, "learning_rate": 9.861553860441725e-05, "loss": 1.0992, "step": 1151 }, { "epoch": 0.10293296401367079, "grad_norm": 0.4826822578907013, "learning_rate": 9.861215488347272e-05, "loss": 1.0001, "step": 1152 }, { "epoch": 0.10302231554493266, "grad_norm": 0.44567134976387024, "learning_rate": 9.860876709074204e-05, "loss": 1.1272, "step": 1153 }, { "epoch": 0.10311166707619451, "grad_norm": 0.4271107614040375, "learning_rate": 9.860537522650898e-05, "loss": 1.0903, "step": 1154 }, { "epoch": 0.10320101860745638, "grad_norm": 0.4095713198184967, "learning_rate": 9.860197929105768e-05, "loss": 1.0451, "step": 1155 }, { "epoch": 0.10329037013871825, "grad_norm": 0.42766252160072327, "learning_rate": 9.859857928467254e-05, "loss": 1.0661, "step": 1156 }, { "epoch": 0.10337972166998012, "grad_norm": 0.4731602072715759, "learning_rate": 9.859517520763834e-05, "loss": 1.0923, "step": 1157 }, { "epoch": 0.10346907320124199, "grad_norm": 0.5062028169631958, "learning_rate": 9.859176706024024e-05, "loss": 0.9769, "step": 1158 }, { "epoch": 0.10355842473250386, "grad_norm": 0.433315634727478, "learning_rate": 9.858835484276369e-05, "loss": 1.0987, "step": 1159 }, { "epoch": 0.10364777626376571, "grad_norm": 0.4237388074398041, "learning_rate": 9.858493855549453e-05, "loss": 1.0466, "step": 1160 }, { "epoch": 0.10373712779502758, "grad_norm": 0.45744040608406067, "learning_rate": 9.858151819871888e-05, "loss": 1.039, "step": 1161 }, { "epoch": 0.10382647932628945, "grad_norm": 0.5162191390991211, "learning_rate": 9.857809377272323e-05, "loss": 1.0173, "step": 1162 }, { "epoch": 0.10391583085755132, "grad_norm": 0.4239135682582855, "learning_rate": 9.857466527779444e-05, "loss": 1.044, "step": 1163 }, { "epoch": 0.10400518238881319, "grad_norm": 0.5142119526863098, "learning_rate": 9.857123271421964e-05, "loss": 1.0666, "step": 1164 }, { "epoch": 0.10409453392007506, "grad_norm": 0.49715521931648254, "learning_rate": 9.856779608228638e-05, "loss": 1.0999, "step": 1165 }, { "epoch": 0.10418388545133692, "grad_norm": 0.4236762523651123, "learning_rate": 9.856435538228251e-05, "loss": 1.0383, "step": 1166 }, { "epoch": 0.10427323698259879, "grad_norm": 0.4198687970638275, "learning_rate": 9.85609106144962e-05, "loss": 1.0291, "step": 1167 }, { "epoch": 0.10436258851386065, "grad_norm": 0.4170401990413666, "learning_rate": 9.8557461779216e-05, "loss": 1.056, "step": 1168 }, { "epoch": 0.10445194004512252, "grad_norm": 0.4177343547344208, "learning_rate": 9.855400887673082e-05, "loss": 1.0697, "step": 1169 }, { "epoch": 0.1045412915763844, "grad_norm": 0.4959930181503296, "learning_rate": 9.855055190732983e-05, "loss": 1.1104, "step": 1170 }, { "epoch": 0.10463064310764626, "grad_norm": 0.4635033905506134, "learning_rate": 9.85470908713026e-05, "loss": 0.9893, "step": 1171 }, { "epoch": 0.10471999463890812, "grad_norm": 0.42118409276008606, "learning_rate": 9.854362576893905e-05, "loss": 1.0985, "step": 1172 }, { "epoch": 0.10480934617016999, "grad_norm": 0.41519343852996826, "learning_rate": 9.85401566005294e-05, "loss": 1.0862, "step": 1173 }, { "epoch": 0.10489869770143186, "grad_norm": 0.42314785718917847, "learning_rate": 9.853668336636422e-05, "loss": 1.1004, "step": 1174 }, { "epoch": 0.10498804923269373, "grad_norm": 0.4355357885360718, "learning_rate": 9.853320606673446e-05, "loss": 1.0732, "step": 1175 }, { "epoch": 0.1050774007639556, "grad_norm": 0.3647025525569916, "learning_rate": 9.852972470193136e-05, "loss": 1.0505, "step": 1176 }, { "epoch": 0.10516675229521746, "grad_norm": 0.4869689643383026, "learning_rate": 9.852623927224653e-05, "loss": 1.0354, "step": 1177 }, { "epoch": 0.10525610382647933, "grad_norm": 0.38133373856544495, "learning_rate": 9.85227497779719e-05, "loss": 1.0863, "step": 1178 }, { "epoch": 0.10534545535774119, "grad_norm": 0.5018585920333862, "learning_rate": 9.851925621939976e-05, "loss": 1.093, "step": 1179 }, { "epoch": 0.10543480688900306, "grad_norm": 0.44408929347991943, "learning_rate": 9.851575859682274e-05, "loss": 1.0619, "step": 1180 }, { "epoch": 0.10552415842026493, "grad_norm": 0.4483299255371094, "learning_rate": 9.85122569105338e-05, "loss": 1.0438, "step": 1181 }, { "epoch": 0.1056135099515268, "grad_norm": 0.3859767019748688, "learning_rate": 9.850875116082623e-05, "loss": 1.1045, "step": 1182 }, { "epoch": 0.10570286148278867, "grad_norm": 0.4310990571975708, "learning_rate": 9.850524134799371e-05, "loss": 1.0604, "step": 1183 }, { "epoch": 0.10579221301405053, "grad_norm": 0.5007196664810181, "learning_rate": 9.850172747233018e-05, "loss": 0.9943, "step": 1184 }, { "epoch": 0.10588156454531239, "grad_norm": 0.4328349530696869, "learning_rate": 9.849820953412997e-05, "loss": 1.0766, "step": 1185 }, { "epoch": 0.10597091607657426, "grad_norm": 0.4742787480354309, "learning_rate": 9.849468753368777e-05, "loss": 1.0821, "step": 1186 }, { "epoch": 0.10606026760783613, "grad_norm": 0.43033096194267273, "learning_rate": 9.849116147129857e-05, "loss": 1.0192, "step": 1187 }, { "epoch": 0.106149619139098, "grad_norm": 0.416620671749115, "learning_rate": 9.848763134725771e-05, "loss": 1.1052, "step": 1188 }, { "epoch": 0.10623897067035987, "grad_norm": 0.44439324736595154, "learning_rate": 9.848409716186091e-05, "loss": 1.0822, "step": 1189 }, { "epoch": 0.10632832220162174, "grad_norm": 0.4544062316417694, "learning_rate": 9.848055891540416e-05, "loss": 1.0642, "step": 1190 }, { "epoch": 0.10641767373288359, "grad_norm": 0.5025431513786316, "learning_rate": 9.847701660818381e-05, "loss": 0.9832, "step": 1191 }, { "epoch": 0.10650702526414546, "grad_norm": 0.4252472519874573, "learning_rate": 9.847347024049662e-05, "loss": 1.0422, "step": 1192 }, { "epoch": 0.10659637679540733, "grad_norm": 0.43434327840805054, "learning_rate": 9.84699198126396e-05, "loss": 1.1426, "step": 1193 }, { "epoch": 0.1066857283266692, "grad_norm": 0.40715593099594116, "learning_rate": 9.846636532491014e-05, "loss": 1.0338, "step": 1194 }, { "epoch": 0.10677507985793107, "grad_norm": 0.47930577397346497, "learning_rate": 9.846280677760596e-05, "loss": 1.1011, "step": 1195 }, { "epoch": 0.10686443138919294, "grad_norm": 0.45403239130973816, "learning_rate": 9.845924417102514e-05, "loss": 1.0613, "step": 1196 }, { "epoch": 0.10695378292045479, "grad_norm": 0.3767177164554596, "learning_rate": 9.84556775054661e-05, "loss": 1.1216, "step": 1197 }, { "epoch": 0.10704313445171666, "grad_norm": 0.41710513830184937, "learning_rate": 9.845210678122756e-05, "loss": 1.0588, "step": 1198 }, { "epoch": 0.10713248598297853, "grad_norm": 0.49159350991249084, "learning_rate": 9.844853199860861e-05, "loss": 1.0458, "step": 1199 }, { "epoch": 0.1072218375142404, "grad_norm": 0.4163805842399597, "learning_rate": 9.844495315790869e-05, "loss": 1.0478, "step": 1200 }, { "epoch": 0.10731118904550227, "grad_norm": 0.3860858678817749, "learning_rate": 9.844137025942754e-05, "loss": 1.0728, "step": 1201 }, { "epoch": 0.10740054057676414, "grad_norm": 0.44660601019859314, "learning_rate": 9.84377833034653e-05, "loss": 1.1211, "step": 1202 }, { "epoch": 0.107489892108026, "grad_norm": 0.4287334382534027, "learning_rate": 9.843419229032238e-05, "loss": 1.0144, "step": 1203 }, { "epoch": 0.10757924363928786, "grad_norm": 0.4286378026008606, "learning_rate": 9.843059722029959e-05, "loss": 1.1338, "step": 1204 }, { "epoch": 0.10766859517054973, "grad_norm": 0.3777160048484802, "learning_rate": 9.842699809369806e-05, "loss": 1.0877, "step": 1205 }, { "epoch": 0.1077579467018116, "grad_norm": 0.40929606556892395, "learning_rate": 9.842339491081924e-05, "loss": 1.0229, "step": 1206 }, { "epoch": 0.10784729823307347, "grad_norm": 0.40165361762046814, "learning_rate": 9.841978767196495e-05, "loss": 1.042, "step": 1207 }, { "epoch": 0.10793664976433534, "grad_norm": 0.4323919713497162, "learning_rate": 9.841617637743731e-05, "loss": 1.0781, "step": 1208 }, { "epoch": 0.10802600129559721, "grad_norm": 0.5180752873420715, "learning_rate": 9.841256102753882e-05, "loss": 1.1395, "step": 1209 }, { "epoch": 0.10811535282685907, "grad_norm": 0.40795522928237915, "learning_rate": 9.84089416225723e-05, "loss": 1.0676, "step": 1210 }, { "epoch": 0.10820470435812093, "grad_norm": 0.4201282560825348, "learning_rate": 9.840531816284093e-05, "loss": 1.0493, "step": 1211 }, { "epoch": 0.1082940558893828, "grad_norm": 0.4624415636062622, "learning_rate": 9.84016906486482e-05, "loss": 1.0656, "step": 1212 }, { "epoch": 0.10838340742064467, "grad_norm": 0.415123850107193, "learning_rate": 9.839805908029795e-05, "loss": 1.0556, "step": 1213 }, { "epoch": 0.10847275895190654, "grad_norm": 0.41990217566490173, "learning_rate": 9.839442345809435e-05, "loss": 1.0851, "step": 1214 }, { "epoch": 0.10856211048316841, "grad_norm": 0.43788111209869385, "learning_rate": 9.839078378234196e-05, "loss": 1.106, "step": 1215 }, { "epoch": 0.10865146201443027, "grad_norm": 0.4749810993671417, "learning_rate": 9.838714005334562e-05, "loss": 1.0649, "step": 1216 }, { "epoch": 0.10874081354569214, "grad_norm": 0.4938737750053406, "learning_rate": 9.838349227141051e-05, "loss": 1.0409, "step": 1217 }, { "epoch": 0.108830165076954, "grad_norm": 0.48466798663139343, "learning_rate": 9.83798404368422e-05, "loss": 1.1215, "step": 1218 }, { "epoch": 0.10891951660821587, "grad_norm": 0.3890851140022278, "learning_rate": 9.837618454994657e-05, "loss": 1.0944, "step": 1219 }, { "epoch": 0.10900886813947774, "grad_norm": 0.4145745635032654, "learning_rate": 9.837252461102981e-05, "loss": 1.1748, "step": 1220 }, { "epoch": 0.10909821967073961, "grad_norm": 0.4121316969394684, "learning_rate": 9.836886062039853e-05, "loss": 1.0474, "step": 1221 }, { "epoch": 0.10918757120200147, "grad_norm": 0.3703002333641052, "learning_rate": 9.836519257835957e-05, "loss": 1.1139, "step": 1222 }, { "epoch": 0.10927692273326334, "grad_norm": 0.44502654671669006, "learning_rate": 9.836152048522022e-05, "loss": 1.0527, "step": 1223 }, { "epoch": 0.1093662742645252, "grad_norm": 0.41378626227378845, "learning_rate": 9.835784434128802e-05, "loss": 1.1426, "step": 1224 }, { "epoch": 0.10945562579578708, "grad_norm": 0.5465456247329712, "learning_rate": 9.83541641468709e-05, "loss": 1.057, "step": 1225 }, { "epoch": 0.10954497732704895, "grad_norm": 0.49641984701156616, "learning_rate": 9.835047990227712e-05, "loss": 1.0913, "step": 1226 }, { "epoch": 0.10963432885831081, "grad_norm": 0.5275992155075073, "learning_rate": 9.834679160781526e-05, "loss": 0.987, "step": 1227 }, { "epoch": 0.10972368038957267, "grad_norm": 0.43388912081718445, "learning_rate": 9.834309926379426e-05, "loss": 1.0185, "step": 1228 }, { "epoch": 0.10981303192083454, "grad_norm": 0.5272001028060913, "learning_rate": 9.833940287052341e-05, "loss": 1.0355, "step": 1229 }, { "epoch": 0.10990238345209641, "grad_norm": 0.39785364270210266, "learning_rate": 9.833570242831229e-05, "loss": 1.0603, "step": 1230 }, { "epoch": 0.10999173498335828, "grad_norm": 0.5039787292480469, "learning_rate": 9.833199793747089e-05, "loss": 1.0754, "step": 1231 }, { "epoch": 0.11008108651462015, "grad_norm": 0.5028790831565857, "learning_rate": 9.832828939830947e-05, "loss": 1.0872, "step": 1232 }, { "epoch": 0.11017043804588202, "grad_norm": 0.4891465902328491, "learning_rate": 9.832457681113866e-05, "loss": 0.9673, "step": 1233 }, { "epoch": 0.11025978957714389, "grad_norm": 0.5017561316490173, "learning_rate": 9.832086017626947e-05, "loss": 1.0323, "step": 1234 }, { "epoch": 0.11034914110840574, "grad_norm": 0.41430050134658813, "learning_rate": 9.831713949401316e-05, "loss": 1.0685, "step": 1235 }, { "epoch": 0.11043849263966761, "grad_norm": 0.4018838703632355, "learning_rate": 9.831341476468139e-05, "loss": 1.0245, "step": 1236 }, { "epoch": 0.11052784417092948, "grad_norm": 0.48818472027778625, "learning_rate": 9.830968598858614e-05, "loss": 1.0407, "step": 1237 }, { "epoch": 0.11061719570219135, "grad_norm": 0.45621582865715027, "learning_rate": 9.830595316603976e-05, "loss": 1.0304, "step": 1238 }, { "epoch": 0.11070654723345322, "grad_norm": 0.5320197343826294, "learning_rate": 9.83022162973549e-05, "loss": 1.0693, "step": 1239 }, { "epoch": 0.11079589876471509, "grad_norm": 0.4279773533344269, "learning_rate": 9.829847538284455e-05, "loss": 1.1345, "step": 1240 }, { "epoch": 0.11088525029597694, "grad_norm": 0.4108751118183136, "learning_rate": 9.829473042282207e-05, "loss": 1.1014, "step": 1241 }, { "epoch": 0.11097460182723881, "grad_norm": 0.5319353938102722, "learning_rate": 9.829098141760111e-05, "loss": 0.9735, "step": 1242 }, { "epoch": 0.11106395335850068, "grad_norm": 0.42923703789711, "learning_rate": 9.828722836749575e-05, "loss": 1.0158, "step": 1243 }, { "epoch": 0.11115330488976255, "grad_norm": 0.4357326924800873, "learning_rate": 9.828347127282027e-05, "loss": 1.0393, "step": 1244 }, { "epoch": 0.11124265642102442, "grad_norm": 0.3578936755657196, "learning_rate": 9.827971013388944e-05, "loss": 1.0764, "step": 1245 }, { "epoch": 0.11133200795228629, "grad_norm": 0.35362938046455383, "learning_rate": 9.827594495101823e-05, "loss": 1.1115, "step": 1246 }, { "epoch": 0.11142135948354814, "grad_norm": 0.41567638516426086, "learning_rate": 9.827217572452208e-05, "loss": 1.1021, "step": 1247 }, { "epoch": 0.11151071101481001, "grad_norm": 0.40736740827560425, "learning_rate": 9.826840245471665e-05, "loss": 1.03, "step": 1248 }, { "epoch": 0.11160006254607188, "grad_norm": 0.4363911747932434, "learning_rate": 9.826462514191801e-05, "loss": 1.0462, "step": 1249 }, { "epoch": 0.11168941407733375, "grad_norm": 0.4702802896499634, "learning_rate": 9.826084378644254e-05, "loss": 1.029, "step": 1250 }, { "epoch": 0.11177876560859562, "grad_norm": 0.45278483629226685, "learning_rate": 9.8257058388607e-05, "loss": 1.1249, "step": 1251 }, { "epoch": 0.11186811713985749, "grad_norm": 0.47644123435020447, "learning_rate": 9.825326894872842e-05, "loss": 1.0096, "step": 1252 }, { "epoch": 0.11195746867111935, "grad_norm": 0.48887526988983154, "learning_rate": 9.824947546712424e-05, "loss": 1.0465, "step": 1253 }, { "epoch": 0.11204682020238121, "grad_norm": 0.4377509355545044, "learning_rate": 9.824567794411216e-05, "loss": 1.049, "step": 1254 }, { "epoch": 0.11213617173364308, "grad_norm": 0.5497344732284546, "learning_rate": 9.824187638001032e-05, "loss": 0.9813, "step": 1255 }, { "epoch": 0.11222552326490495, "grad_norm": 0.39412158727645874, "learning_rate": 9.82380707751371e-05, "loss": 1.0328, "step": 1256 }, { "epoch": 0.11231487479616682, "grad_norm": 0.5611281394958496, "learning_rate": 9.823426112981126e-05, "loss": 0.9994, "step": 1257 }, { "epoch": 0.11240422632742869, "grad_norm": 0.4828655421733856, "learning_rate": 9.823044744435193e-05, "loss": 1.062, "step": 1258 }, { "epoch": 0.11249357785869055, "grad_norm": 0.3836156129837036, "learning_rate": 9.822662971907852e-05, "loss": 1.1001, "step": 1259 }, { "epoch": 0.11258292938995242, "grad_norm": 0.42271852493286133, "learning_rate": 9.822280795431082e-05, "loss": 1.0411, "step": 1260 }, { "epoch": 0.11267228092121429, "grad_norm": 0.4254275858402252, "learning_rate": 9.821898215036891e-05, "loss": 1.0963, "step": 1261 }, { "epoch": 0.11276163245247615, "grad_norm": 0.46899744868278503, "learning_rate": 9.821515230757329e-05, "loss": 1.054, "step": 1262 }, { "epoch": 0.11285098398373802, "grad_norm": 0.4628446400165558, "learning_rate": 9.821131842624471e-05, "loss": 1.0433, "step": 1263 }, { "epoch": 0.11294033551499989, "grad_norm": 0.4287131726741791, "learning_rate": 9.820748050670433e-05, "loss": 1.0086, "step": 1264 }, { "epoch": 0.11302968704626176, "grad_norm": 0.4528293013572693, "learning_rate": 9.820363854927362e-05, "loss": 1.0296, "step": 1265 }, { "epoch": 0.11311903857752362, "grad_norm": 0.5807567834854126, "learning_rate": 9.819979255427434e-05, "loss": 1.0242, "step": 1266 }, { "epoch": 0.11320839010878549, "grad_norm": 0.4787788391113281, "learning_rate": 9.819594252202866e-05, "loss": 1.0764, "step": 1267 }, { "epoch": 0.11329774164004736, "grad_norm": 0.4397558271884918, "learning_rate": 9.819208845285908e-05, "loss": 1.0668, "step": 1268 }, { "epoch": 0.11338709317130923, "grad_norm": 0.5197393894195557, "learning_rate": 9.81882303470884e-05, "loss": 1.0614, "step": 1269 }, { "epoch": 0.1134764447025711, "grad_norm": 0.4312942326068878, "learning_rate": 9.818436820503976e-05, "loss": 1.1124, "step": 1270 }, { "epoch": 0.11356579623383296, "grad_norm": 0.36695683002471924, "learning_rate": 9.818050202703668e-05, "loss": 1.0542, "step": 1271 }, { "epoch": 0.11365514776509482, "grad_norm": 0.47956258058547974, "learning_rate": 9.817663181340299e-05, "loss": 1.0063, "step": 1272 }, { "epoch": 0.11374449929635669, "grad_norm": 0.40785545110702515, "learning_rate": 9.817275756446287e-05, "loss": 1.0145, "step": 1273 }, { "epoch": 0.11383385082761856, "grad_norm": 0.5532562732696533, "learning_rate": 9.816887928054082e-05, "loss": 1.0415, "step": 1274 }, { "epoch": 0.11392320235888043, "grad_norm": 0.4029194414615631, "learning_rate": 9.816499696196167e-05, "loss": 1.0276, "step": 1275 }, { "epoch": 0.1140125538901423, "grad_norm": 0.38297927379608154, "learning_rate": 9.816111060905062e-05, "loss": 1.0914, "step": 1276 }, { "epoch": 0.11410190542140417, "grad_norm": 0.44156894087791443, "learning_rate": 9.815722022213322e-05, "loss": 1.0469, "step": 1277 }, { "epoch": 0.11419125695266602, "grad_norm": 0.44452789425849915, "learning_rate": 9.81533258015353e-05, "loss": 0.9441, "step": 1278 }, { "epoch": 0.11428060848392789, "grad_norm": 0.4572380483150482, "learning_rate": 9.814942734758306e-05, "loss": 1.0353, "step": 1279 }, { "epoch": 0.11436996001518976, "grad_norm": 0.39455848932266235, "learning_rate": 9.814552486060305e-05, "loss": 1.0873, "step": 1280 }, { "epoch": 0.11445931154645163, "grad_norm": 0.3743246793746948, "learning_rate": 9.814161834092212e-05, "loss": 1.0684, "step": 1281 }, { "epoch": 0.1145486630777135, "grad_norm": 0.4070795774459839, "learning_rate": 9.81377077888675e-05, "loss": 1.143, "step": 1282 }, { "epoch": 0.11463801460897537, "grad_norm": 0.4575822353363037, "learning_rate": 9.813379320476677e-05, "loss": 1.0032, "step": 1283 }, { "epoch": 0.11472736614023722, "grad_norm": 0.4624904692173004, "learning_rate": 9.812987458894778e-05, "loss": 1.1421, "step": 1284 }, { "epoch": 0.11481671767149909, "grad_norm": 0.45701712369918823, "learning_rate": 9.812595194173875e-05, "loss": 1.0939, "step": 1285 }, { "epoch": 0.11490606920276096, "grad_norm": 0.4664194583892822, "learning_rate": 9.812202526346827e-05, "loss": 1.0381, "step": 1286 }, { "epoch": 0.11499542073402283, "grad_norm": 0.42624375224113464, "learning_rate": 9.811809455446523e-05, "loss": 1.0903, "step": 1287 }, { "epoch": 0.1150847722652847, "grad_norm": 0.40351542830467224, "learning_rate": 9.811415981505887e-05, "loss": 1.0591, "step": 1288 }, { "epoch": 0.11517412379654657, "grad_norm": 0.46651527285575867, "learning_rate": 9.811022104557877e-05, "loss": 1.0647, "step": 1289 }, { "epoch": 0.11526347532780842, "grad_norm": 0.4483588635921478, "learning_rate": 9.810627824635483e-05, "loss": 1.0734, "step": 1290 }, { "epoch": 0.11535282685907029, "grad_norm": 0.45187729597091675, "learning_rate": 9.810233141771732e-05, "loss": 1.0024, "step": 1291 }, { "epoch": 0.11544217839033216, "grad_norm": 0.43645814061164856, "learning_rate": 9.809838055999681e-05, "loss": 1.1, "step": 1292 }, { "epoch": 0.11553152992159403, "grad_norm": 0.42564859986305237, "learning_rate": 9.809442567352425e-05, "loss": 1.0762, "step": 1293 }, { "epoch": 0.1156208814528559, "grad_norm": 0.419881135225296, "learning_rate": 9.809046675863087e-05, "loss": 1.1093, "step": 1294 }, { "epoch": 0.11571023298411777, "grad_norm": 0.46777603030204773, "learning_rate": 9.808650381564831e-05, "loss": 1.0415, "step": 1295 }, { "epoch": 0.11579958451537964, "grad_norm": 0.4529683589935303, "learning_rate": 9.80825368449085e-05, "loss": 1.0256, "step": 1296 }, { "epoch": 0.1158889360466415, "grad_norm": 0.3957824110984802, "learning_rate": 9.807856584674368e-05, "loss": 1.1027, "step": 1297 }, { "epoch": 0.11597828757790336, "grad_norm": 0.3844730854034424, "learning_rate": 9.807459082148648e-05, "loss": 1.0713, "step": 1298 }, { "epoch": 0.11606763910916523, "grad_norm": 0.3445946276187897, "learning_rate": 9.80706117694699e-05, "loss": 1.1058, "step": 1299 }, { "epoch": 0.1161569906404271, "grad_norm": 0.39715394377708435, "learning_rate": 9.806662869102717e-05, "loss": 1.0711, "step": 1300 }, { "epoch": 0.11624634217168897, "grad_norm": 0.5070072412490845, "learning_rate": 9.806264158649193e-05, "loss": 1.0142, "step": 1301 }, { "epoch": 0.11633569370295084, "grad_norm": 0.4780667722225189, "learning_rate": 9.805865045619813e-05, "loss": 1.0362, "step": 1302 }, { "epoch": 0.1164250452342127, "grad_norm": 0.45890098810195923, "learning_rate": 9.80546553004801e-05, "loss": 0.9997, "step": 1303 }, { "epoch": 0.11651439676547456, "grad_norm": 0.46433109045028687, "learning_rate": 9.805065611967248e-05, "loss": 0.9268, "step": 1304 }, { "epoch": 0.11660374829673643, "grad_norm": 0.43207046389579773, "learning_rate": 9.804665291411022e-05, "loss": 1.0814, "step": 1305 }, { "epoch": 0.1166930998279983, "grad_norm": 0.3997035622596741, "learning_rate": 9.804264568412862e-05, "loss": 1.0454, "step": 1306 }, { "epoch": 0.11678245135926017, "grad_norm": 0.4062361717224121, "learning_rate": 9.803863443006336e-05, "loss": 1.1057, "step": 1307 }, { "epoch": 0.11687180289052204, "grad_norm": 0.3779062032699585, "learning_rate": 9.80346191522504e-05, "loss": 1.1132, "step": 1308 }, { "epoch": 0.1169611544217839, "grad_norm": 0.4400861859321594, "learning_rate": 9.803059985102609e-05, "loss": 1.0412, "step": 1309 }, { "epoch": 0.11705050595304577, "grad_norm": 0.48002249002456665, "learning_rate": 9.802657652672706e-05, "loss": 0.9414, "step": 1310 }, { "epoch": 0.11713985748430764, "grad_norm": 0.5008269548416138, "learning_rate": 9.802254917969032e-05, "loss": 1.0162, "step": 1311 }, { "epoch": 0.1172292090155695, "grad_norm": 0.4323752224445343, "learning_rate": 9.801851781025322e-05, "loss": 1.1209, "step": 1312 }, { "epoch": 0.11731856054683137, "grad_norm": 0.5104453563690186, "learning_rate": 9.80144824187534e-05, "loss": 0.9974, "step": 1313 }, { "epoch": 0.11740791207809324, "grad_norm": 0.4443637430667877, "learning_rate": 9.801044300552887e-05, "loss": 1.0159, "step": 1314 }, { "epoch": 0.1174972636093551, "grad_norm": 0.44141674041748047, "learning_rate": 9.800639957091799e-05, "loss": 1.0203, "step": 1315 }, { "epoch": 0.11758661514061697, "grad_norm": 0.44127362966537476, "learning_rate": 9.800235211525945e-05, "loss": 1.1218, "step": 1316 }, { "epoch": 0.11767596667187884, "grad_norm": 0.401253342628479, "learning_rate": 9.799830063889223e-05, "loss": 1.1097, "step": 1317 }, { "epoch": 0.1177653182031407, "grad_norm": 0.5445295572280884, "learning_rate": 9.799424514215572e-05, "loss": 1.0315, "step": 1318 }, { "epoch": 0.11785466973440258, "grad_norm": 0.3961075246334076, "learning_rate": 9.79901856253896e-05, "loss": 1.0794, "step": 1319 }, { "epoch": 0.11794402126566444, "grad_norm": 0.4312506318092346, "learning_rate": 9.798612208893389e-05, "loss": 1.0732, "step": 1320 }, { "epoch": 0.11803337279692631, "grad_norm": 0.5154565572738647, "learning_rate": 9.798205453312895e-05, "loss": 1.1045, "step": 1321 }, { "epoch": 0.11812272432818817, "grad_norm": 0.4929812550544739, "learning_rate": 9.79779829583155e-05, "loss": 1.009, "step": 1322 }, { "epoch": 0.11821207585945004, "grad_norm": 0.39952999353408813, "learning_rate": 9.797390736483459e-05, "loss": 1.0558, "step": 1323 }, { "epoch": 0.11830142739071191, "grad_norm": 0.47065678238868713, "learning_rate": 9.796982775302755e-05, "loss": 1.0119, "step": 1324 }, { "epoch": 0.11839077892197378, "grad_norm": 0.39249664545059204, "learning_rate": 9.796574412323611e-05, "loss": 1.0805, "step": 1325 }, { "epoch": 0.11848013045323565, "grad_norm": 0.38709941506385803, "learning_rate": 9.796165647580233e-05, "loss": 1.0919, "step": 1326 }, { "epoch": 0.11856948198449752, "grad_norm": 0.4576520323753357, "learning_rate": 9.795756481106857e-05, "loss": 1.082, "step": 1327 }, { "epoch": 0.11865883351575937, "grad_norm": 0.42923304438591003, "learning_rate": 9.795346912937757e-05, "loss": 0.9918, "step": 1328 }, { "epoch": 0.11874818504702124, "grad_norm": 0.38031134009361267, "learning_rate": 9.79493694310724e-05, "loss": 1.0198, "step": 1329 }, { "epoch": 0.11883753657828311, "grad_norm": 0.39930686354637146, "learning_rate": 9.794526571649643e-05, "loss": 1.0082, "step": 1330 }, { "epoch": 0.11892688810954498, "grad_norm": 0.463544636964798, "learning_rate": 9.794115798599339e-05, "loss": 1.1003, "step": 1331 }, { "epoch": 0.11901623964080685, "grad_norm": 0.42002353072166443, "learning_rate": 9.793704623990736e-05, "loss": 1.0518, "step": 1332 }, { "epoch": 0.11910559117206872, "grad_norm": 0.39078399538993835, "learning_rate": 9.793293047858274e-05, "loss": 1.0775, "step": 1333 }, { "epoch": 0.11919494270333057, "grad_norm": 0.44137701392173767, "learning_rate": 9.792881070236426e-05, "loss": 1.0588, "step": 1334 }, { "epoch": 0.11928429423459244, "grad_norm": 0.40128040313720703, "learning_rate": 9.7924686911597e-05, "loss": 1.0676, "step": 1335 }, { "epoch": 0.11937364576585431, "grad_norm": 0.46440789103507996, "learning_rate": 9.792055910662636e-05, "loss": 1.1613, "step": 1336 }, { "epoch": 0.11946299729711618, "grad_norm": 0.36990535259246826, "learning_rate": 9.791642728779811e-05, "loss": 1.0511, "step": 1337 }, { "epoch": 0.11955234882837805, "grad_norm": 0.47823598980903625, "learning_rate": 9.791229145545831e-05, "loss": 0.9977, "step": 1338 }, { "epoch": 0.11964170035963992, "grad_norm": 0.3946368992328644, "learning_rate": 9.790815160995342e-05, "loss": 1.0871, "step": 1339 }, { "epoch": 0.11973105189090177, "grad_norm": 0.5002843141555786, "learning_rate": 9.790400775163014e-05, "loss": 0.9361, "step": 1340 }, { "epoch": 0.11982040342216364, "grad_norm": 0.4414635896682739, "learning_rate": 9.789985988083558e-05, "loss": 1.0857, "step": 1341 }, { "epoch": 0.11990975495342551, "grad_norm": 0.4567340314388275, "learning_rate": 9.789570799791721e-05, "loss": 0.9954, "step": 1342 }, { "epoch": 0.11999910648468738, "grad_norm": 0.46271204948425293, "learning_rate": 9.789155210322276e-05, "loss": 0.9978, "step": 1343 }, { "epoch": 0.12008845801594925, "grad_norm": 0.45726093649864197, "learning_rate": 9.788739219710032e-05, "loss": 1.0101, "step": 1344 }, { "epoch": 0.12017780954721112, "grad_norm": 0.4213901162147522, "learning_rate": 9.788322827989836e-05, "loss": 1.061, "step": 1345 }, { "epoch": 0.12026716107847298, "grad_norm": 0.5426849126815796, "learning_rate": 9.787906035196562e-05, "loss": 1.0625, "step": 1346 }, { "epoch": 0.12035651260973484, "grad_norm": 0.4493926167488098, "learning_rate": 9.787488841365122e-05, "loss": 1.021, "step": 1347 }, { "epoch": 0.12044586414099671, "grad_norm": 0.47702398896217346, "learning_rate": 9.787071246530459e-05, "loss": 1.0161, "step": 1348 }, { "epoch": 0.12053521567225858, "grad_norm": 0.5239232778549194, "learning_rate": 9.786653250727555e-05, "loss": 1.0321, "step": 1349 }, { "epoch": 0.12062456720352045, "grad_norm": 0.4591730237007141, "learning_rate": 9.786234853991418e-05, "loss": 0.9884, "step": 1350 }, { "epoch": 0.12071391873478232, "grad_norm": 0.39837637543678284, "learning_rate": 9.785816056357095e-05, "loss": 1.0764, "step": 1351 }, { "epoch": 0.12080327026604419, "grad_norm": 0.42193183302879333, "learning_rate": 9.785396857859664e-05, "loss": 1.0636, "step": 1352 }, { "epoch": 0.12089262179730605, "grad_norm": 0.48460525274276733, "learning_rate": 9.784977258534239e-05, "loss": 1.0683, "step": 1353 }, { "epoch": 0.12098197332856792, "grad_norm": 0.45954951643943787, "learning_rate": 9.784557258415963e-05, "loss": 1.0613, "step": 1354 }, { "epoch": 0.12107132485982978, "grad_norm": 0.4340897500514984, "learning_rate": 9.784136857540015e-05, "loss": 0.9937, "step": 1355 }, { "epoch": 0.12116067639109165, "grad_norm": 0.39842215180397034, "learning_rate": 9.783716055941612e-05, "loss": 1.0619, "step": 1356 }, { "epoch": 0.12125002792235352, "grad_norm": 0.38093817234039307, "learning_rate": 9.783294853655999e-05, "loss": 1.0357, "step": 1357 }, { "epoch": 0.12133937945361539, "grad_norm": 0.5032150149345398, "learning_rate": 9.782873250718455e-05, "loss": 0.914, "step": 1358 }, { "epoch": 0.12142873098487725, "grad_norm": 0.4284474551677704, "learning_rate": 9.782451247164295e-05, "loss": 1.0058, "step": 1359 }, { "epoch": 0.12151808251613912, "grad_norm": 0.40385428071022034, "learning_rate": 9.782028843028865e-05, "loss": 1.0313, "step": 1360 }, { "epoch": 0.12160743404740099, "grad_norm": 0.48033663630485535, "learning_rate": 9.781606038347547e-05, "loss": 1.005, "step": 1361 }, { "epoch": 0.12169678557866286, "grad_norm": 0.3936779201030731, "learning_rate": 9.781182833155755e-05, "loss": 1.0181, "step": 1362 }, { "epoch": 0.12178613710992472, "grad_norm": 0.46711423993110657, "learning_rate": 9.780759227488936e-05, "loss": 1.0816, "step": 1363 }, { "epoch": 0.1218754886411866, "grad_norm": 0.43602418899536133, "learning_rate": 9.780335221382574e-05, "loss": 1.1094, "step": 1364 }, { "epoch": 0.12196484017244845, "grad_norm": 0.3990572690963745, "learning_rate": 9.779910814872182e-05, "loss": 0.9995, "step": 1365 }, { "epoch": 0.12205419170371032, "grad_norm": 0.5048510432243347, "learning_rate": 9.77948600799331e-05, "loss": 1.0082, "step": 1366 }, { "epoch": 0.12214354323497219, "grad_norm": 0.3975992202758789, "learning_rate": 9.779060800781537e-05, "loss": 1.0578, "step": 1367 }, { "epoch": 0.12223289476623406, "grad_norm": 0.4395469129085541, "learning_rate": 9.778635193272483e-05, "loss": 1.0318, "step": 1368 }, { "epoch": 0.12232224629749593, "grad_norm": 0.4118629992008209, "learning_rate": 9.778209185501794e-05, "loss": 1.0436, "step": 1369 }, { "epoch": 0.1224115978287578, "grad_norm": 0.4724205732345581, "learning_rate": 9.777782777505153e-05, "loss": 0.9656, "step": 1370 }, { "epoch": 0.12250094936001965, "grad_norm": 0.49588900804519653, "learning_rate": 9.777355969318278e-05, "loss": 0.9988, "step": 1371 }, { "epoch": 0.12259030089128152, "grad_norm": 0.4462149441242218, "learning_rate": 9.776928760976918e-05, "loss": 1.0914, "step": 1372 }, { "epoch": 0.12267965242254339, "grad_norm": 0.3685830533504486, "learning_rate": 9.776501152516855e-05, "loss": 1.0973, "step": 1373 }, { "epoch": 0.12276900395380526, "grad_norm": 0.5166415572166443, "learning_rate": 9.776073143973904e-05, "loss": 0.9822, "step": 1374 }, { "epoch": 0.12285835548506713, "grad_norm": 0.4535563588142395, "learning_rate": 9.775644735383922e-05, "loss": 1.0124, "step": 1375 }, { "epoch": 0.122947707016329, "grad_norm": 0.40610021352767944, "learning_rate": 9.775215926782788e-05, "loss": 1.0909, "step": 1376 }, { "epoch": 0.12303705854759087, "grad_norm": 0.4109049141407013, "learning_rate": 9.774786718206419e-05, "loss": 1.0908, "step": 1377 }, { "epoch": 0.12312641007885272, "grad_norm": 0.4295797348022461, "learning_rate": 9.774357109690767e-05, "loss": 1.0351, "step": 1378 }, { "epoch": 0.12321576161011459, "grad_norm": 0.49584078788757324, "learning_rate": 9.773927101271816e-05, "loss": 1.0216, "step": 1379 }, { "epoch": 0.12330511314137646, "grad_norm": 0.44206714630126953, "learning_rate": 9.773496692985584e-05, "loss": 0.9854, "step": 1380 }, { "epoch": 0.12339446467263833, "grad_norm": 0.39388856291770935, "learning_rate": 9.773065884868122e-05, "loss": 1.0904, "step": 1381 }, { "epoch": 0.1234838162039002, "grad_norm": 0.4499436914920807, "learning_rate": 9.772634676955515e-05, "loss": 1.0563, "step": 1382 }, { "epoch": 0.12357316773516207, "grad_norm": 0.48682132363319397, "learning_rate": 9.772203069283881e-05, "loss": 1.0727, "step": 1383 }, { "epoch": 0.12366251926642392, "grad_norm": 0.38047704100608826, "learning_rate": 9.771771061889373e-05, "loss": 1.0871, "step": 1384 }, { "epoch": 0.12375187079768579, "grad_norm": 0.46172595024108887, "learning_rate": 9.771338654808173e-05, "loss": 1.0829, "step": 1385 }, { "epoch": 0.12384122232894766, "grad_norm": 0.44145411252975464, "learning_rate": 9.770905848076504e-05, "loss": 1.0631, "step": 1386 }, { "epoch": 0.12393057386020953, "grad_norm": 0.4803364872932434, "learning_rate": 9.770472641730615e-05, "loss": 0.9805, "step": 1387 }, { "epoch": 0.1240199253914714, "grad_norm": 0.4117061197757721, "learning_rate": 9.770039035806792e-05, "loss": 1.023, "step": 1388 }, { "epoch": 0.12410927692273327, "grad_norm": 0.4862026572227478, "learning_rate": 9.769605030341357e-05, "loss": 1.0725, "step": 1389 }, { "epoch": 0.12419862845399512, "grad_norm": 0.4829283654689789, "learning_rate": 9.769170625370658e-05, "loss": 0.993, "step": 1390 }, { "epoch": 0.124287979985257, "grad_norm": 0.4735516309738159, "learning_rate": 9.768735820931085e-05, "loss": 1.059, "step": 1391 }, { "epoch": 0.12437733151651886, "grad_norm": 0.4585529565811157, "learning_rate": 9.768300617059055e-05, "loss": 1.0455, "step": 1392 }, { "epoch": 0.12446668304778073, "grad_norm": 0.40741658210754395, "learning_rate": 9.767865013791022e-05, "loss": 0.9718, "step": 1393 }, { "epoch": 0.1245560345790426, "grad_norm": 0.4379764497280121, "learning_rate": 9.767429011163473e-05, "loss": 1.0147, "step": 1394 }, { "epoch": 0.12464538611030447, "grad_norm": 0.46133920550346375, "learning_rate": 9.766992609212926e-05, "loss": 1.037, "step": 1395 }, { "epoch": 0.12473473764156633, "grad_norm": 0.4180471897125244, "learning_rate": 9.766555807975936e-05, "loss": 1.0846, "step": 1396 }, { "epoch": 0.1248240891728282, "grad_norm": 0.5262375473976135, "learning_rate": 9.76611860748909e-05, "loss": 1.0615, "step": 1397 }, { "epoch": 0.12491344070409006, "grad_norm": 0.4973163604736328, "learning_rate": 9.765681007789008e-05, "loss": 1.0329, "step": 1398 }, { "epoch": 0.12500279223535193, "grad_norm": 0.48663395643234253, "learning_rate": 9.765243008912342e-05, "loss": 1.011, "step": 1399 }, { "epoch": 0.1250921437666138, "grad_norm": 0.52272629737854, "learning_rate": 9.76480461089578e-05, "loss": 1.0271, "step": 1400 }, { "epoch": 0.12518149529787567, "grad_norm": 0.5239830613136292, "learning_rate": 9.764365813776042e-05, "loss": 1.061, "step": 1401 }, { "epoch": 0.12527084682913753, "grad_norm": 0.44980114698410034, "learning_rate": 9.763926617589883e-05, "loss": 1.0671, "step": 1402 }, { "epoch": 0.1253601983603994, "grad_norm": 0.4098733961582184, "learning_rate": 9.763487022374092e-05, "loss": 0.9897, "step": 1403 }, { "epoch": 0.12544954989166127, "grad_norm": 0.360730916261673, "learning_rate": 9.763047028165484e-05, "loss": 1.0978, "step": 1404 }, { "epoch": 0.12553890142292312, "grad_norm": 0.518429160118103, "learning_rate": 9.762606635000919e-05, "loss": 0.9659, "step": 1405 }, { "epoch": 0.125628252954185, "grad_norm": 0.4658845365047455, "learning_rate": 9.762165842917283e-05, "loss": 1.0005, "step": 1406 }, { "epoch": 0.12571760448544686, "grad_norm": 0.3965921700000763, "learning_rate": 9.761724651951498e-05, "loss": 1.029, "step": 1407 }, { "epoch": 0.12580695601670874, "grad_norm": 0.4184513986110687, "learning_rate": 9.761283062140514e-05, "loss": 0.9864, "step": 1408 }, { "epoch": 0.1258963075479706, "grad_norm": 0.37843790650367737, "learning_rate": 9.760841073521323e-05, "loss": 1.1095, "step": 1409 }, { "epoch": 0.12598565907923248, "grad_norm": 0.4006551504135132, "learning_rate": 9.760398686130946e-05, "loss": 1.0732, "step": 1410 }, { "epoch": 0.12607501061049434, "grad_norm": 0.407956600189209, "learning_rate": 9.759955900006436e-05, "loss": 1.064, "step": 1411 }, { "epoch": 0.1261643621417562, "grad_norm": 0.42004308104515076, "learning_rate": 9.759512715184881e-05, "loss": 1.0558, "step": 1412 }, { "epoch": 0.12625371367301808, "grad_norm": 0.411745548248291, "learning_rate": 9.759069131703406e-05, "loss": 1.0318, "step": 1413 }, { "epoch": 0.12634306520427993, "grad_norm": 0.40602388978004456, "learning_rate": 9.75862514959916e-05, "loss": 1.0533, "step": 1414 }, { "epoch": 0.1264324167355418, "grad_norm": 0.4616459906101227, "learning_rate": 9.758180768909337e-05, "loss": 1.0691, "step": 1415 }, { "epoch": 0.12652176826680367, "grad_norm": 0.5043428540229797, "learning_rate": 9.757735989671156e-05, "loss": 1.0047, "step": 1416 }, { "epoch": 0.12661111979806555, "grad_norm": 0.4009447693824768, "learning_rate": 9.75729081192187e-05, "loss": 1.0422, "step": 1417 }, { "epoch": 0.1267004713293274, "grad_norm": 0.3939031660556793, "learning_rate": 9.756845235698772e-05, "loss": 1.0726, "step": 1418 }, { "epoch": 0.12678982286058926, "grad_norm": 0.42009732127189636, "learning_rate": 9.756399261039179e-05, "loss": 1.0992, "step": 1419 }, { "epoch": 0.12687917439185115, "grad_norm": 0.475175142288208, "learning_rate": 9.75595288798045e-05, "loss": 1.079, "step": 1420 }, { "epoch": 0.126968525923113, "grad_norm": 0.42921051383018494, "learning_rate": 9.755506116559971e-05, "loss": 0.999, "step": 1421 }, { "epoch": 0.12705787745437488, "grad_norm": 0.4029146730899811, "learning_rate": 9.755058946815164e-05, "loss": 1.0903, "step": 1422 }, { "epoch": 0.12714722898563674, "grad_norm": 0.4076593518257141, "learning_rate": 9.754611378783486e-05, "loss": 1.1189, "step": 1423 }, { "epoch": 0.1272365805168986, "grad_norm": 0.41739514470100403, "learning_rate": 9.754163412502424e-05, "loss": 1.0114, "step": 1424 }, { "epoch": 0.12732593204816048, "grad_norm": 0.4175979197025299, "learning_rate": 9.7537150480095e-05, "loss": 1.0852, "step": 1425 }, { "epoch": 0.12741528357942233, "grad_norm": 0.5235275030136108, "learning_rate": 9.75326628534227e-05, "loss": 1.0744, "step": 1426 }, { "epoch": 0.12750463511068422, "grad_norm": 0.41696855425834656, "learning_rate": 9.752817124538324e-05, "loss": 1.0345, "step": 1427 }, { "epoch": 0.12759398664194607, "grad_norm": 0.4305160641670227, "learning_rate": 9.752367565635281e-05, "loss": 1.0633, "step": 1428 }, { "epoch": 0.12768333817320796, "grad_norm": 0.4274474084377289, "learning_rate": 9.751917608670797e-05, "loss": 1.0398, "step": 1429 }, { "epoch": 0.1277726897044698, "grad_norm": 0.41223591566085815, "learning_rate": 9.751467253682563e-05, "loss": 1.0496, "step": 1430 }, { "epoch": 0.12786204123573167, "grad_norm": 0.4205789566040039, "learning_rate": 9.751016500708298e-05, "loss": 0.9926, "step": 1431 }, { "epoch": 0.12795139276699355, "grad_norm": 0.3847178816795349, "learning_rate": 9.75056534978576e-05, "loss": 1.0912, "step": 1432 }, { "epoch": 0.1280407442982554, "grad_norm": 0.3665478527545929, "learning_rate": 9.750113800952738e-05, "loss": 1.0538, "step": 1433 }, { "epoch": 0.1281300958295173, "grad_norm": 0.4411013126373291, "learning_rate": 9.74966185424705e-05, "loss": 1.0351, "step": 1434 }, { "epoch": 0.12821944736077914, "grad_norm": 0.5092450380325317, "learning_rate": 9.749209509706555e-05, "loss": 1.0013, "step": 1435 }, { "epoch": 0.12830879889204103, "grad_norm": 0.39857053756713867, "learning_rate": 9.74875676736914e-05, "loss": 1.0148, "step": 1436 }, { "epoch": 0.12839815042330288, "grad_norm": 0.4596467912197113, "learning_rate": 9.74830362727273e-05, "loss": 1.0708, "step": 1437 }, { "epoch": 0.12848750195456474, "grad_norm": 0.4768296182155609, "learning_rate": 9.747850089455275e-05, "loss": 1.0696, "step": 1438 }, { "epoch": 0.12857685348582662, "grad_norm": 0.39800745248794556, "learning_rate": 9.747396153954767e-05, "loss": 1.0498, "step": 1439 }, { "epoch": 0.12866620501708848, "grad_norm": 0.36220765113830566, "learning_rate": 9.746941820809229e-05, "loss": 1.0571, "step": 1440 }, { "epoch": 0.12875555654835036, "grad_norm": 0.4107576012611389, "learning_rate": 9.746487090056713e-05, "loss": 1.0934, "step": 1441 }, { "epoch": 0.1288449080796122, "grad_norm": 0.5009557604789734, "learning_rate": 9.746031961735311e-05, "loss": 1.021, "step": 1442 }, { "epoch": 0.12893425961087407, "grad_norm": 0.43069615960121155, "learning_rate": 9.745576435883142e-05, "loss": 1.0339, "step": 1443 }, { "epoch": 0.12902361114213595, "grad_norm": 0.515500009059906, "learning_rate": 9.745120512538362e-05, "loss": 0.9844, "step": 1444 }, { "epoch": 0.1291129626733978, "grad_norm": 0.49051618576049805, "learning_rate": 9.744664191739161e-05, "loss": 1.0346, "step": 1445 }, { "epoch": 0.1292023142046597, "grad_norm": 0.4764910042285919, "learning_rate": 9.744207473523759e-05, "loss": 1.0409, "step": 1446 }, { "epoch": 0.12929166573592155, "grad_norm": 0.37674903869628906, "learning_rate": 9.74375035793041e-05, "loss": 1.0546, "step": 1447 }, { "epoch": 0.12938101726718343, "grad_norm": 0.524577796459198, "learning_rate": 9.743292844997407e-05, "loss": 1.0141, "step": 1448 }, { "epoch": 0.12947036879844528, "grad_norm": 0.43543311953544617, "learning_rate": 9.742834934763066e-05, "loss": 0.9927, "step": 1449 }, { "epoch": 0.12955972032970714, "grad_norm": 0.4244259297847748, "learning_rate": 9.742376627265745e-05, "loss": 1.0312, "step": 1450 }, { "epoch": 0.12964907186096902, "grad_norm": 0.41922134160995483, "learning_rate": 9.74191792254383e-05, "loss": 1.0319, "step": 1451 }, { "epoch": 0.12973842339223088, "grad_norm": 0.38788026571273804, "learning_rate": 9.741458820635745e-05, "loss": 1.1334, "step": 1452 }, { "epoch": 0.12982777492349276, "grad_norm": 0.42001378536224365, "learning_rate": 9.740999321579943e-05, "loss": 1.0905, "step": 1453 }, { "epoch": 0.12991712645475462, "grad_norm": 0.41198793053627014, "learning_rate": 9.740539425414912e-05, "loss": 1.0529, "step": 1454 }, { "epoch": 0.13000647798601647, "grad_norm": 0.36228737235069275, "learning_rate": 9.740079132179175e-05, "loss": 1.0103, "step": 1455 }, { "epoch": 0.13009582951727836, "grad_norm": 0.48605382442474365, "learning_rate": 9.739618441911285e-05, "loss": 1.0234, "step": 1456 }, { "epoch": 0.1301851810485402, "grad_norm": 0.49182093143463135, "learning_rate": 9.739157354649829e-05, "loss": 0.9932, "step": 1457 }, { "epoch": 0.1302745325798021, "grad_norm": 0.3926076591014862, "learning_rate": 9.738695870433428e-05, "loss": 1.0766, "step": 1458 }, { "epoch": 0.13036388411106395, "grad_norm": 0.5217881202697754, "learning_rate": 9.738233989300739e-05, "loss": 1.0239, "step": 1459 }, { "epoch": 0.13045323564232583, "grad_norm": 0.4107765555381775, "learning_rate": 9.737771711290447e-05, "loss": 1.0517, "step": 1460 }, { "epoch": 0.1305425871735877, "grad_norm": 0.48638463020324707, "learning_rate": 9.737309036441271e-05, "loss": 0.9408, "step": 1461 }, { "epoch": 0.13063193870484954, "grad_norm": 0.4553143084049225, "learning_rate": 9.736845964791968e-05, "loss": 1.0842, "step": 1462 }, { "epoch": 0.13072129023611143, "grad_norm": 0.42393583059310913, "learning_rate": 9.736382496381325e-05, "loss": 1.0693, "step": 1463 }, { "epoch": 0.13081064176737328, "grad_norm": 0.43372994661331177, "learning_rate": 9.735918631248162e-05, "loss": 1.0923, "step": 1464 }, { "epoch": 0.13089999329863516, "grad_norm": 0.42476969957351685, "learning_rate": 9.735454369431332e-05, "loss": 1.1099, "step": 1465 }, { "epoch": 0.13098934482989702, "grad_norm": 0.4615803360939026, "learning_rate": 9.734989710969722e-05, "loss": 1.0546, "step": 1466 }, { "epoch": 0.1310786963611589, "grad_norm": 0.3829108774662018, "learning_rate": 9.734524655902253e-05, "loss": 1.0674, "step": 1467 }, { "epoch": 0.13116804789242076, "grad_norm": 0.4632830321788788, "learning_rate": 9.734059204267878e-05, "loss": 1.0149, "step": 1468 }, { "epoch": 0.1312573994236826, "grad_norm": 0.4512673020362854, "learning_rate": 9.733593356105581e-05, "loss": 1.018, "step": 1469 }, { "epoch": 0.1313467509549445, "grad_norm": 0.5254853963851929, "learning_rate": 9.733127111454385e-05, "loss": 0.9973, "step": 1470 }, { "epoch": 0.13143610248620635, "grad_norm": 0.4638168215751648, "learning_rate": 9.732660470353343e-05, "loss": 1.0137, "step": 1471 }, { "epoch": 0.13152545401746824, "grad_norm": 0.40837833285331726, "learning_rate": 9.732193432841539e-05, "loss": 1.1323, "step": 1472 }, { "epoch": 0.1316148055487301, "grad_norm": 0.39538437128067017, "learning_rate": 9.731725998958095e-05, "loss": 1.0323, "step": 1473 }, { "epoch": 0.13170415707999195, "grad_norm": 0.429143488407135, "learning_rate": 9.73125816874216e-05, "loss": 1.0897, "step": 1474 }, { "epoch": 0.13179350861125383, "grad_norm": 0.48212477564811707, "learning_rate": 9.730789942232923e-05, "loss": 0.9875, "step": 1475 }, { "epoch": 0.13188286014251568, "grad_norm": 0.37595105171203613, "learning_rate": 9.730321319469601e-05, "loss": 1.0909, "step": 1476 }, { "epoch": 0.13197221167377757, "grad_norm": 0.4069534242153168, "learning_rate": 9.729852300491447e-05, "loss": 1.0292, "step": 1477 }, { "epoch": 0.13206156320503942, "grad_norm": 0.4816598892211914, "learning_rate": 9.729382885337747e-05, "loss": 1.0329, "step": 1478 }, { "epoch": 0.1321509147363013, "grad_norm": 0.4182417392730713, "learning_rate": 9.728913074047819e-05, "loss": 1.0831, "step": 1479 }, { "epoch": 0.13224026626756316, "grad_norm": 0.4294276535511017, "learning_rate": 9.728442866661013e-05, "loss": 1.0979, "step": 1480 }, { "epoch": 0.13232961779882502, "grad_norm": 0.4855419099330902, "learning_rate": 9.727972263216716e-05, "loss": 0.994, "step": 1481 }, { "epoch": 0.1324189693300869, "grad_norm": 0.3743133544921875, "learning_rate": 9.727501263754346e-05, "loss": 1.1104, "step": 1482 }, { "epoch": 0.13250832086134876, "grad_norm": 0.47284814715385437, "learning_rate": 9.727029868313352e-05, "loss": 1.0346, "step": 1483 }, { "epoch": 0.13259767239261064, "grad_norm": 0.4814388155937195, "learning_rate": 9.726558076933221e-05, "loss": 1.0394, "step": 1484 }, { "epoch": 0.1326870239238725, "grad_norm": 0.48564615845680237, "learning_rate": 9.726085889653469e-05, "loss": 1.0232, "step": 1485 }, { "epoch": 0.13277637545513435, "grad_norm": 0.4509584605693817, "learning_rate": 9.725613306513648e-05, "loss": 1.0218, "step": 1486 }, { "epoch": 0.13286572698639623, "grad_norm": 0.45788347721099854, "learning_rate": 9.725140327553342e-05, "loss": 1.0633, "step": 1487 }, { "epoch": 0.1329550785176581, "grad_norm": 0.5591961741447449, "learning_rate": 9.724666952812166e-05, "loss": 1.0504, "step": 1488 }, { "epoch": 0.13304443004891997, "grad_norm": 0.49788787961006165, "learning_rate": 9.724193182329772e-05, "loss": 0.9796, "step": 1489 }, { "epoch": 0.13313378158018183, "grad_norm": 0.5562415719032288, "learning_rate": 9.723719016145843e-05, "loss": 0.9633, "step": 1490 }, { "epoch": 0.1332231331114437, "grad_norm": 0.49074721336364746, "learning_rate": 9.723244454300093e-05, "loss": 1.0881, "step": 1491 }, { "epoch": 0.13331248464270556, "grad_norm": 0.42609742283821106, "learning_rate": 9.722769496832275e-05, "loss": 1.0958, "step": 1492 }, { "epoch": 0.13340183617396742, "grad_norm": 0.5812770128250122, "learning_rate": 9.722294143782171e-05, "loss": 1.1263, "step": 1493 }, { "epoch": 0.1334911877052293, "grad_norm": 0.4071921408176422, "learning_rate": 9.721818395189597e-05, "loss": 1.0965, "step": 1494 }, { "epoch": 0.13358053923649116, "grad_norm": 0.4325934648513794, "learning_rate": 9.7213422510944e-05, "loss": 1.0716, "step": 1495 }, { "epoch": 0.13366989076775304, "grad_norm": 0.4646300673484802, "learning_rate": 9.720865711536464e-05, "loss": 1.0785, "step": 1496 }, { "epoch": 0.1337592422990149, "grad_norm": 0.4847221076488495, "learning_rate": 9.720388776555704e-05, "loss": 1.0767, "step": 1497 }, { "epoch": 0.13384859383027678, "grad_norm": 0.3830418884754181, "learning_rate": 9.71991144619207e-05, "loss": 1.0585, "step": 1498 }, { "epoch": 0.13393794536153864, "grad_norm": 0.4215502142906189, "learning_rate": 9.719433720485539e-05, "loss": 1.0535, "step": 1499 }, { "epoch": 0.1340272968928005, "grad_norm": 0.4030102789402008, "learning_rate": 9.718955599476129e-05, "loss": 1.0467, "step": 1500 }, { "epoch": 0.13411664842406237, "grad_norm": 0.4001905024051666, "learning_rate": 9.718477083203887e-05, "loss": 1.0354, "step": 1501 }, { "epoch": 0.13420599995532423, "grad_norm": 0.6067075133323669, "learning_rate": 9.717998171708895e-05, "loss": 0.9813, "step": 1502 }, { "epoch": 0.1342953514865861, "grad_norm": 0.4722452163696289, "learning_rate": 9.717518865031266e-05, "loss": 1.0531, "step": 1503 }, { "epoch": 0.13438470301784797, "grad_norm": 0.45017173886299133, "learning_rate": 9.717039163211146e-05, "loss": 0.9219, "step": 1504 }, { "epoch": 0.13447405454910982, "grad_norm": 0.5280361771583557, "learning_rate": 9.716559066288715e-05, "loss": 0.9697, "step": 1505 }, { "epoch": 0.1345634060803717, "grad_norm": 0.4430614709854126, "learning_rate": 9.716078574304189e-05, "loss": 1.0229, "step": 1506 }, { "epoch": 0.13465275761163356, "grad_norm": 0.4364091157913208, "learning_rate": 9.715597687297813e-05, "loss": 1.0449, "step": 1507 }, { "epoch": 0.13474210914289544, "grad_norm": 0.43157270550727844, "learning_rate": 9.715116405309865e-05, "loss": 1.0936, "step": 1508 }, { "epoch": 0.1348314606741573, "grad_norm": 0.439143568277359, "learning_rate": 9.714634728380658e-05, "loss": 1.0145, "step": 1509 }, { "epoch": 0.13492081220541918, "grad_norm": 0.3858429789543152, "learning_rate": 9.714152656550539e-05, "loss": 1.1219, "step": 1510 }, { "epoch": 0.13501016373668104, "grad_norm": 0.4329967200756073, "learning_rate": 9.713670189859887e-05, "loss": 0.9739, "step": 1511 }, { "epoch": 0.1350995152679429, "grad_norm": 0.4508189260959625, "learning_rate": 9.713187328349111e-05, "loss": 1.0605, "step": 1512 }, { "epoch": 0.13518886679920478, "grad_norm": 0.4645385444164276, "learning_rate": 9.712704072058656e-05, "loss": 0.9873, "step": 1513 }, { "epoch": 0.13527821833046663, "grad_norm": 0.5293644666671753, "learning_rate": 9.712220421029003e-05, "loss": 1.0814, "step": 1514 }, { "epoch": 0.13536756986172852, "grad_norm": 0.4389038681983948, "learning_rate": 9.711736375300661e-05, "loss": 1.0325, "step": 1515 }, { "epoch": 0.13545692139299037, "grad_norm": 0.349090039730072, "learning_rate": 9.711251934914174e-05, "loss": 1.1654, "step": 1516 }, { "epoch": 0.13554627292425223, "grad_norm": 0.5453407168388367, "learning_rate": 9.710767099910119e-05, "loss": 1.0776, "step": 1517 }, { "epoch": 0.1356356244555141, "grad_norm": 0.4334562420845032, "learning_rate": 9.710281870329105e-05, "loss": 1.0226, "step": 1518 }, { "epoch": 0.13572497598677596, "grad_norm": 0.3693782687187195, "learning_rate": 9.709796246211777e-05, "loss": 1.0437, "step": 1519 }, { "epoch": 0.13581432751803785, "grad_norm": 0.49413368105888367, "learning_rate": 9.709310227598811e-05, "loss": 1.0567, "step": 1520 }, { "epoch": 0.1359036790492997, "grad_norm": 0.4487200081348419, "learning_rate": 9.708823814530917e-05, "loss": 0.981, "step": 1521 }, { "epoch": 0.13599303058056159, "grad_norm": 0.38888999819755554, "learning_rate": 9.708337007048834e-05, "loss": 1.0563, "step": 1522 }, { "epoch": 0.13608238211182344, "grad_norm": 0.3906208276748657, "learning_rate": 9.70784980519334e-05, "loss": 1.0467, "step": 1523 }, { "epoch": 0.1361717336430853, "grad_norm": 0.3793841302394867, "learning_rate": 9.707362209005244e-05, "loss": 1.0433, "step": 1524 }, { "epoch": 0.13626108517434718, "grad_norm": 0.38040420413017273, "learning_rate": 9.706874218525385e-05, "loss": 1.0493, "step": 1525 }, { "epoch": 0.13635043670560903, "grad_norm": 0.5357340574264526, "learning_rate": 9.706385833794638e-05, "loss": 0.9978, "step": 1526 }, { "epoch": 0.13643978823687092, "grad_norm": 0.45937198400497437, "learning_rate": 9.705897054853912e-05, "loss": 1.0814, "step": 1527 }, { "epoch": 0.13652913976813277, "grad_norm": 0.41274699568748474, "learning_rate": 9.705407881744146e-05, "loss": 1.0937, "step": 1528 }, { "epoch": 0.13661849129939466, "grad_norm": 0.4878060519695282, "learning_rate": 9.704918314506313e-05, "loss": 1.0107, "step": 1529 }, { "epoch": 0.1367078428306565, "grad_norm": 0.4097362756729126, "learning_rate": 9.704428353181421e-05, "loss": 1.1267, "step": 1530 }, { "epoch": 0.13679719436191837, "grad_norm": 0.40500837564468384, "learning_rate": 9.703937997810511e-05, "loss": 1.0243, "step": 1531 }, { "epoch": 0.13688654589318025, "grad_norm": 0.4004482626914978, "learning_rate": 9.70344724843465e-05, "loss": 1.0469, "step": 1532 }, { "epoch": 0.1369758974244421, "grad_norm": 0.5236450433731079, "learning_rate": 9.702956105094948e-05, "loss": 0.9977, "step": 1533 }, { "epoch": 0.137065248955704, "grad_norm": 0.4368230998516083, "learning_rate": 9.702464567832543e-05, "loss": 1.0354, "step": 1534 }, { "epoch": 0.13715460048696584, "grad_norm": 0.4011850357055664, "learning_rate": 9.701972636688606e-05, "loss": 1.0555, "step": 1535 }, { "epoch": 0.1372439520182277, "grad_norm": 0.4206826984882355, "learning_rate": 9.701480311704339e-05, "loss": 1.007, "step": 1536 }, { "epoch": 0.13733330354948958, "grad_norm": 0.45997175574302673, "learning_rate": 9.700987592920983e-05, "loss": 1.0538, "step": 1537 }, { "epoch": 0.13742265508075144, "grad_norm": 0.4319978356361389, "learning_rate": 9.700494480379807e-05, "loss": 0.9962, "step": 1538 }, { "epoch": 0.13751200661201332, "grad_norm": 0.4265231490135193, "learning_rate": 9.700000974122115e-05, "loss": 0.9786, "step": 1539 }, { "epoch": 0.13760135814327518, "grad_norm": 0.4962487518787384, "learning_rate": 9.699507074189242e-05, "loss": 1.0389, "step": 1540 }, { "epoch": 0.13769070967453706, "grad_norm": 0.40575921535491943, "learning_rate": 9.699012780622561e-05, "loss": 1.0018, "step": 1541 }, { "epoch": 0.13778006120579891, "grad_norm": 0.4042010009288788, "learning_rate": 9.698518093463469e-05, "loss": 1.0876, "step": 1542 }, { "epoch": 0.13786941273706077, "grad_norm": 0.37715786695480347, "learning_rate": 9.698023012753405e-05, "loss": 1.0111, "step": 1543 }, { "epoch": 0.13795876426832265, "grad_norm": 0.4954730272293091, "learning_rate": 9.697527538533837e-05, "loss": 0.9983, "step": 1544 }, { "epoch": 0.1380481157995845, "grad_norm": 0.41885560750961304, "learning_rate": 9.697031670846265e-05, "loss": 1.042, "step": 1545 }, { "epoch": 0.1381374673308464, "grad_norm": 0.3857840895652771, "learning_rate": 9.696535409732224e-05, "loss": 1.0516, "step": 1546 }, { "epoch": 0.13822681886210825, "grad_norm": 0.3953034281730652, "learning_rate": 9.696038755233282e-05, "loss": 1.1183, "step": 1547 }, { "epoch": 0.1383161703933701, "grad_norm": 0.4239901006221771, "learning_rate": 9.695541707391036e-05, "loss": 1.0304, "step": 1548 }, { "epoch": 0.13840552192463199, "grad_norm": 0.4670591652393341, "learning_rate": 9.695044266247122e-05, "loss": 1.0292, "step": 1549 }, { "epoch": 0.13849487345589384, "grad_norm": 0.40943849086761475, "learning_rate": 9.694546431843205e-05, "loss": 1.0806, "step": 1550 }, { "epoch": 0.13858422498715572, "grad_norm": 0.4126596748828888, "learning_rate": 9.694048204220985e-05, "loss": 1.0379, "step": 1551 }, { "epoch": 0.13867357651841758, "grad_norm": 0.4415280222892761, "learning_rate": 9.693549583422191e-05, "loss": 1.0358, "step": 1552 }, { "epoch": 0.13876292804967946, "grad_norm": 0.4215603768825531, "learning_rate": 9.69305056948859e-05, "loss": 1.0874, "step": 1553 }, { "epoch": 0.13885227958094132, "grad_norm": 0.42005017399787903, "learning_rate": 9.692551162461981e-05, "loss": 1.0552, "step": 1554 }, { "epoch": 0.13894163111220317, "grad_norm": 0.4029698669910431, "learning_rate": 9.692051362384193e-05, "loss": 1.0081, "step": 1555 }, { "epoch": 0.13903098264346506, "grad_norm": 0.5039706826210022, "learning_rate": 9.691551169297089e-05, "loss": 1.0135, "step": 1556 }, { "epoch": 0.1391203341747269, "grad_norm": 0.48726481199264526, "learning_rate": 9.691050583242567e-05, "loss": 1.011, "step": 1557 }, { "epoch": 0.1392096857059888, "grad_norm": 0.4722976088523865, "learning_rate": 9.690549604262555e-05, "loss": 0.9555, "step": 1558 }, { "epoch": 0.13929903723725065, "grad_norm": 0.44578781723976135, "learning_rate": 9.690048232399017e-05, "loss": 1.0367, "step": 1559 }, { "epoch": 0.13938838876851253, "grad_norm": 0.47405532002449036, "learning_rate": 9.689546467693946e-05, "loss": 1.0977, "step": 1560 }, { "epoch": 0.1394777402997744, "grad_norm": 0.4186593294143677, "learning_rate": 9.689044310189371e-05, "loss": 1.0139, "step": 1561 }, { "epoch": 0.13956709183103624, "grad_norm": 0.3761426508426666, "learning_rate": 9.688541759927354e-05, "loss": 1.0328, "step": 1562 }, { "epoch": 0.13965644336229813, "grad_norm": 0.4144037365913391, "learning_rate": 9.688038816949989e-05, "loss": 1.0543, "step": 1563 }, { "epoch": 0.13974579489355998, "grad_norm": 0.5622879862785339, "learning_rate": 9.687535481299402e-05, "loss": 0.9816, "step": 1564 }, { "epoch": 0.13983514642482187, "grad_norm": 0.39679154753685, "learning_rate": 9.687031753017753e-05, "loss": 1.075, "step": 1565 }, { "epoch": 0.13992449795608372, "grad_norm": 0.48569121956825256, "learning_rate": 9.686527632147234e-05, "loss": 0.9407, "step": 1566 }, { "epoch": 0.14001384948734558, "grad_norm": 0.43438124656677246, "learning_rate": 9.68602311873007e-05, "loss": 1.0061, "step": 1567 }, { "epoch": 0.14010320101860746, "grad_norm": 0.4626932144165039, "learning_rate": 9.685518212808522e-05, "loss": 0.9583, "step": 1568 }, { "epoch": 0.14019255254986931, "grad_norm": 0.5100006461143494, "learning_rate": 9.685012914424878e-05, "loss": 0.9871, "step": 1569 }, { "epoch": 0.1402819040811312, "grad_norm": 0.4893263280391693, "learning_rate": 9.684507223621465e-05, "loss": 0.9678, "step": 1570 }, { "epoch": 0.14037125561239305, "grad_norm": 0.40761256217956543, "learning_rate": 9.684001140440639e-05, "loss": 1.0929, "step": 1571 }, { "epoch": 0.14046060714365494, "grad_norm": 0.4098682403564453, "learning_rate": 9.68349466492479e-05, "loss": 1.1117, "step": 1572 }, { "epoch": 0.1405499586749168, "grad_norm": 0.36809906363487244, "learning_rate": 9.682987797116339e-05, "loss": 1.0983, "step": 1573 }, { "epoch": 0.14063931020617865, "grad_norm": 0.4545081853866577, "learning_rate": 9.682480537057743e-05, "loss": 1.0238, "step": 1574 }, { "epoch": 0.14072866173744053, "grad_norm": 0.46472567319869995, "learning_rate": 9.681972884791492e-05, "loss": 1.0105, "step": 1575 }, { "epoch": 0.14081801326870239, "grad_norm": 0.4712905287742615, "learning_rate": 9.681464840360103e-05, "loss": 0.9822, "step": 1576 }, { "epoch": 0.14090736479996427, "grad_norm": 0.4782291352748871, "learning_rate": 9.680956403806135e-05, "loss": 1.0426, "step": 1577 }, { "epoch": 0.14099671633122612, "grad_norm": 0.4774461090564728, "learning_rate": 9.680447575172173e-05, "loss": 1.0958, "step": 1578 }, { "epoch": 0.141086067862488, "grad_norm": 0.41189044713974, "learning_rate": 9.679938354500835e-05, "loss": 1.0193, "step": 1579 }, { "epoch": 0.14117541939374986, "grad_norm": 0.45908036828041077, "learning_rate": 9.679428741834776e-05, "loss": 1.0789, "step": 1580 }, { "epoch": 0.14126477092501172, "grad_norm": 0.439585417509079, "learning_rate": 9.67891873721668e-05, "loss": 1.047, "step": 1581 }, { "epoch": 0.1413541224562736, "grad_norm": 0.3884674906730652, "learning_rate": 9.678408340689267e-05, "loss": 1.0571, "step": 1582 }, { "epoch": 0.14144347398753546, "grad_norm": 0.46724075078964233, "learning_rate": 9.677897552295288e-05, "loss": 1.0396, "step": 1583 }, { "epoch": 0.14153282551879734, "grad_norm": 0.4285435378551483, "learning_rate": 9.677386372077524e-05, "loss": 0.9729, "step": 1584 }, { "epoch": 0.1416221770500592, "grad_norm": 0.41435936093330383, "learning_rate": 9.676874800078796e-05, "loss": 1.0059, "step": 1585 }, { "epoch": 0.14171152858132105, "grad_norm": 0.4586467146873474, "learning_rate": 9.67636283634195e-05, "loss": 1.0339, "step": 1586 }, { "epoch": 0.14180088011258293, "grad_norm": 0.5122067928314209, "learning_rate": 9.675850480909872e-05, "loss": 1.001, "step": 1587 }, { "epoch": 0.1418902316438448, "grad_norm": 0.42933905124664307, "learning_rate": 9.675337733825473e-05, "loss": 1.0386, "step": 1588 }, { "epoch": 0.14197958317510667, "grad_norm": 0.4264965355396271, "learning_rate": 9.674824595131704e-05, "loss": 1.0794, "step": 1589 }, { "epoch": 0.14206893470636853, "grad_norm": 0.4125423729419708, "learning_rate": 9.674311064871547e-05, "loss": 1.1203, "step": 1590 }, { "epoch": 0.1421582862376304, "grad_norm": 0.3605044484138489, "learning_rate": 9.67379714308801e-05, "loss": 1.0368, "step": 1591 }, { "epoch": 0.14224763776889227, "grad_norm": 0.45730555057525635, "learning_rate": 9.673282829824146e-05, "loss": 1.0428, "step": 1592 }, { "epoch": 0.14233698930015412, "grad_norm": 0.45879489183425903, "learning_rate": 9.672768125123031e-05, "loss": 1.055, "step": 1593 }, { "epoch": 0.142426340831416, "grad_norm": 0.4261813461780548, "learning_rate": 9.672253029027777e-05, "loss": 1.0486, "step": 1594 }, { "epoch": 0.14251569236267786, "grad_norm": 0.3854468762874603, "learning_rate": 9.671737541581529e-05, "loss": 1.0092, "step": 1595 }, { "epoch": 0.14260504389393974, "grad_norm": 0.4169517457485199, "learning_rate": 9.671221662827465e-05, "loss": 1.063, "step": 1596 }, { "epoch": 0.1426943954252016, "grad_norm": 0.41241368651390076, "learning_rate": 9.670705392808796e-05, "loss": 1.0649, "step": 1597 }, { "epoch": 0.14278374695646345, "grad_norm": 0.3985016644001007, "learning_rate": 9.670188731568764e-05, "loss": 1.0126, "step": 1598 }, { "epoch": 0.14287309848772534, "grad_norm": 0.395868182182312, "learning_rate": 9.669671679150642e-05, "loss": 1.0801, "step": 1599 }, { "epoch": 0.1429624500189872, "grad_norm": 0.3894529938697815, "learning_rate": 9.669154235597746e-05, "loss": 1.0315, "step": 1600 }, { "epoch": 0.14305180155024907, "grad_norm": 0.4293975234031677, "learning_rate": 9.66863640095341e-05, "loss": 1.0163, "step": 1601 }, { "epoch": 0.14314115308151093, "grad_norm": 0.3886623978614807, "learning_rate": 9.668118175261015e-05, "loss": 1.0487, "step": 1602 }, { "epoch": 0.1432305046127728, "grad_norm": 0.46701672673225403, "learning_rate": 9.66759955856396e-05, "loss": 1.0463, "step": 1603 }, { "epoch": 0.14331985614403467, "grad_norm": 0.4126945436000824, "learning_rate": 9.667080550905691e-05, "loss": 1.0268, "step": 1604 }, { "epoch": 0.14340920767529652, "grad_norm": 0.4913039207458496, "learning_rate": 9.666561152329679e-05, "loss": 0.9639, "step": 1605 }, { "epoch": 0.1434985592065584, "grad_norm": 0.3920893967151642, "learning_rate": 9.666041362879427e-05, "loss": 1.0301, "step": 1606 }, { "epoch": 0.14358791073782026, "grad_norm": 0.38557082414627075, "learning_rate": 9.665521182598476e-05, "loss": 1.0137, "step": 1607 }, { "epoch": 0.14367726226908215, "grad_norm": 0.532264769077301, "learning_rate": 9.665000611530392e-05, "loss": 0.9705, "step": 1608 }, { "epoch": 0.143766613800344, "grad_norm": 0.42515647411346436, "learning_rate": 9.664479649718784e-05, "loss": 1.0133, "step": 1609 }, { "epoch": 0.14385596533160588, "grad_norm": 0.42825859785079956, "learning_rate": 9.663958297207286e-05, "loss": 1.0272, "step": 1610 }, { "epoch": 0.14394531686286774, "grad_norm": 0.3822021782398224, "learning_rate": 9.663436554039567e-05, "loss": 1.0168, "step": 1611 }, { "epoch": 0.1440346683941296, "grad_norm": 0.43384209275245667, "learning_rate": 9.662914420259325e-05, "loss": 1.031, "step": 1612 }, { "epoch": 0.14412401992539148, "grad_norm": 0.4819833040237427, "learning_rate": 9.662391895910299e-05, "loss": 1.011, "step": 1613 }, { "epoch": 0.14421337145665333, "grad_norm": 0.38827648758888245, "learning_rate": 9.661868981036255e-05, "loss": 1.0561, "step": 1614 }, { "epoch": 0.14430272298791522, "grad_norm": 0.5991657376289368, "learning_rate": 9.661345675680991e-05, "loss": 0.9748, "step": 1615 }, { "epoch": 0.14439207451917707, "grad_norm": 0.568602442741394, "learning_rate": 9.660821979888339e-05, "loss": 1.0853, "step": 1616 }, { "epoch": 0.14448142605043893, "grad_norm": 0.4651549458503723, "learning_rate": 9.660297893702168e-05, "loss": 1.0078, "step": 1617 }, { "epoch": 0.1445707775817008, "grad_norm": 0.426548570394516, "learning_rate": 9.659773417166371e-05, "loss": 0.9844, "step": 1618 }, { "epoch": 0.14466012911296267, "grad_norm": 0.4951413571834564, "learning_rate": 9.659248550324882e-05, "loss": 0.8695, "step": 1619 }, { "epoch": 0.14474948064422455, "grad_norm": 0.4711915850639343, "learning_rate": 9.658723293221661e-05, "loss": 1.03, "step": 1620 }, { "epoch": 0.1448388321754864, "grad_norm": 0.38477903604507446, "learning_rate": 9.658197645900708e-05, "loss": 1.0262, "step": 1621 }, { "epoch": 0.1449281837067483, "grad_norm": 0.5240241885185242, "learning_rate": 9.657671608406047e-05, "loss": 0.9587, "step": 1622 }, { "epoch": 0.14501753523801014, "grad_norm": 0.4152171015739441, "learning_rate": 9.657145180781743e-05, "loss": 1.0534, "step": 1623 }, { "epoch": 0.145106886769272, "grad_norm": 0.43236833810806274, "learning_rate": 9.65661836307189e-05, "loss": 1.0896, "step": 1624 }, { "epoch": 0.14519623830053388, "grad_norm": 0.5443985462188721, "learning_rate": 9.656091155320611e-05, "loss": 1.0644, "step": 1625 }, { "epoch": 0.14528558983179574, "grad_norm": 0.4063294529914856, "learning_rate": 9.655563557572069e-05, "loss": 1.0385, "step": 1626 }, { "epoch": 0.14537494136305762, "grad_norm": 0.5186159610748291, "learning_rate": 9.655035569870452e-05, "loss": 1.0842, "step": 1627 }, { "epoch": 0.14546429289431947, "grad_norm": 0.4194406569004059, "learning_rate": 9.654507192259989e-05, "loss": 1.0013, "step": 1628 }, { "epoch": 0.14555364442558133, "grad_norm": 0.4991320073604584, "learning_rate": 9.653978424784934e-05, "loss": 1.096, "step": 1629 }, { "epoch": 0.1456429959568432, "grad_norm": 0.4811127185821533, "learning_rate": 9.653449267489579e-05, "loss": 0.9865, "step": 1630 }, { "epoch": 0.14573234748810507, "grad_norm": 0.4587719440460205, "learning_rate": 9.652919720418245e-05, "loss": 1.0453, "step": 1631 }, { "epoch": 0.14582169901936695, "grad_norm": 0.40233114361763, "learning_rate": 9.652389783615287e-05, "loss": 1.1234, "step": 1632 }, { "epoch": 0.1459110505506288, "grad_norm": 0.4691876471042633, "learning_rate": 9.651859457125096e-05, "loss": 0.9824, "step": 1633 }, { "epoch": 0.1460004020818907, "grad_norm": 0.41825005412101746, "learning_rate": 9.651328740992088e-05, "loss": 1.0688, "step": 1634 }, { "epoch": 0.14608975361315255, "grad_norm": 0.4713682532310486, "learning_rate": 9.65079763526072e-05, "loss": 1.0345, "step": 1635 }, { "epoch": 0.1461791051444144, "grad_norm": 0.41411858797073364, "learning_rate": 9.650266139975474e-05, "loss": 1.0163, "step": 1636 }, { "epoch": 0.14626845667567628, "grad_norm": 0.3815430700778961, "learning_rate": 9.649734255180873e-05, "loss": 1.0626, "step": 1637 }, { "epoch": 0.14635780820693814, "grad_norm": 0.41789570450782776, "learning_rate": 9.649201980921465e-05, "loss": 0.9784, "step": 1638 }, { "epoch": 0.14644715973820002, "grad_norm": 0.43766146898269653, "learning_rate": 9.648669317241833e-05, "loss": 1.0253, "step": 1639 }, { "epoch": 0.14653651126946188, "grad_norm": 0.43024829030036926, "learning_rate": 9.648136264186595e-05, "loss": 1.0291, "step": 1640 }, { "epoch": 0.14662586280072376, "grad_norm": 0.39453184604644775, "learning_rate": 9.647602821800399e-05, "loss": 1.0444, "step": 1641 }, { "epoch": 0.14671521433198562, "grad_norm": 0.44370511174201965, "learning_rate": 9.647068990127928e-05, "loss": 1.0723, "step": 1642 }, { "epoch": 0.14680456586324747, "grad_norm": 0.46351152658462524, "learning_rate": 9.646534769213893e-05, "loss": 1.0513, "step": 1643 }, { "epoch": 0.14689391739450935, "grad_norm": 0.4535273313522339, "learning_rate": 9.646000159103043e-05, "loss": 1.0497, "step": 1644 }, { "epoch": 0.1469832689257712, "grad_norm": 0.43159547448158264, "learning_rate": 9.645465159840157e-05, "loss": 1.0305, "step": 1645 }, { "epoch": 0.1470726204570331, "grad_norm": 0.4147747755050659, "learning_rate": 9.644929771470048e-05, "loss": 1.0223, "step": 1646 }, { "epoch": 0.14716197198829495, "grad_norm": 0.45313653349876404, "learning_rate": 9.64439399403756e-05, "loss": 0.9908, "step": 1647 }, { "epoch": 0.1472513235195568, "grad_norm": 0.3770694434642792, "learning_rate": 9.643857827587566e-05, "loss": 1.0579, "step": 1648 }, { "epoch": 0.1473406750508187, "grad_norm": 0.3648872971534729, "learning_rate": 9.643321272164981e-05, "loss": 1.0841, "step": 1649 }, { "epoch": 0.14743002658208054, "grad_norm": 0.48402151465415955, "learning_rate": 9.642784327814744e-05, "loss": 1.0109, "step": 1650 }, { "epoch": 0.14751937811334243, "grad_norm": 0.42030149698257446, "learning_rate": 9.642246994581832e-05, "loss": 1.0677, "step": 1651 }, { "epoch": 0.14760872964460428, "grad_norm": 0.46909406781196594, "learning_rate": 9.641709272511252e-05, "loss": 0.9945, "step": 1652 }, { "epoch": 0.14769808117586616, "grad_norm": 0.4196245074272156, "learning_rate": 9.641171161648043e-05, "loss": 1.1265, "step": 1653 }, { "epoch": 0.14778743270712802, "grad_norm": 0.3893241286277771, "learning_rate": 9.640632662037278e-05, "loss": 1.0552, "step": 1654 }, { "epoch": 0.14787678423838987, "grad_norm": 0.44918498396873474, "learning_rate": 9.640093773724061e-05, "loss": 1.0326, "step": 1655 }, { "epoch": 0.14796613576965176, "grad_norm": 0.5244982242584229, "learning_rate": 9.63955449675353e-05, "loss": 1.0296, "step": 1656 }, { "epoch": 0.1480554873009136, "grad_norm": 0.4323536157608032, "learning_rate": 9.639014831170858e-05, "loss": 1.0664, "step": 1657 }, { "epoch": 0.1481448388321755, "grad_norm": 0.437716007232666, "learning_rate": 9.638474777021244e-05, "loss": 1.0745, "step": 1658 }, { "epoch": 0.14823419036343735, "grad_norm": 0.39507436752319336, "learning_rate": 9.637934334349927e-05, "loss": 1.0435, "step": 1659 }, { "epoch": 0.1483235418946992, "grad_norm": 0.422969251871109, "learning_rate": 9.63739350320217e-05, "loss": 1.0611, "step": 1660 }, { "epoch": 0.1484128934259611, "grad_norm": 0.3984132409095764, "learning_rate": 9.636852283623276e-05, "loss": 1.0717, "step": 1661 }, { "epoch": 0.14850224495722295, "grad_norm": 0.4262081980705261, "learning_rate": 9.63631067565858e-05, "loss": 1.0278, "step": 1662 }, { "epoch": 0.14859159648848483, "grad_norm": 0.43201524019241333, "learning_rate": 9.635768679353445e-05, "loss": 0.9729, "step": 1663 }, { "epoch": 0.14868094801974668, "grad_norm": 0.4402812719345093, "learning_rate": 9.635226294753267e-05, "loss": 1.0292, "step": 1664 }, { "epoch": 0.14877029955100857, "grad_norm": 0.41984882950782776, "learning_rate": 9.634683521903483e-05, "loss": 1.036, "step": 1665 }, { "epoch": 0.14885965108227042, "grad_norm": 0.38695448637008667, "learning_rate": 9.634140360849548e-05, "loss": 1.0454, "step": 1666 }, { "epoch": 0.14894900261353228, "grad_norm": 0.44035449624061584, "learning_rate": 9.633596811636964e-05, "loss": 1.0825, "step": 1667 }, { "epoch": 0.14903835414479416, "grad_norm": 0.4655371904373169, "learning_rate": 9.633052874311255e-05, "loss": 1.0182, "step": 1668 }, { "epoch": 0.14912770567605602, "grad_norm": 0.42064300179481506, "learning_rate": 9.632508548917983e-05, "loss": 1.0113, "step": 1669 }, { "epoch": 0.1492170572073179, "grad_norm": 0.4009134769439697, "learning_rate": 9.631963835502742e-05, "loss": 1.0587, "step": 1670 }, { "epoch": 0.14930640873857975, "grad_norm": 0.3674651086330414, "learning_rate": 9.631418734111155e-05, "loss": 1.0356, "step": 1671 }, { "epoch": 0.14939576026984164, "grad_norm": 0.416415810585022, "learning_rate": 9.630873244788883e-05, "loss": 1.0074, "step": 1672 }, { "epoch": 0.1494851118011035, "grad_norm": 0.5505238771438599, "learning_rate": 9.630327367581616e-05, "loss": 1.0533, "step": 1673 }, { "epoch": 0.14957446333236535, "grad_norm": 0.42817139625549316, "learning_rate": 9.629781102535074e-05, "loss": 0.9811, "step": 1674 }, { "epoch": 0.14966381486362723, "grad_norm": 0.4695313274860382, "learning_rate": 9.629234449695015e-05, "loss": 1.1321, "step": 1675 }, { "epoch": 0.1497531663948891, "grad_norm": 0.5143523216247559, "learning_rate": 9.628687409107229e-05, "loss": 0.9853, "step": 1676 }, { "epoch": 0.14984251792615097, "grad_norm": 0.4100930392742157, "learning_rate": 9.628139980817532e-05, "loss": 1.1377, "step": 1677 }, { "epoch": 0.14993186945741283, "grad_norm": 0.41417646408081055, "learning_rate": 9.627592164871779e-05, "loss": 0.9765, "step": 1678 }, { "epoch": 0.15002122098867468, "grad_norm": 0.3846787214279175, "learning_rate": 9.627043961315856e-05, "loss": 1.0585, "step": 1679 }, { "epoch": 0.15011057251993656, "grad_norm": 0.4256773889064789, "learning_rate": 9.626495370195683e-05, "loss": 1.0301, "step": 1680 }, { "epoch": 0.15019992405119842, "grad_norm": 0.5811467170715332, "learning_rate": 9.625946391557204e-05, "loss": 1.0538, "step": 1681 }, { "epoch": 0.1502892755824603, "grad_norm": 0.4346529543399811, "learning_rate": 9.625397025446408e-05, "loss": 1.0657, "step": 1682 }, { "epoch": 0.15037862711372216, "grad_norm": 0.3570214509963989, "learning_rate": 9.624847271909308e-05, "loss": 1.0752, "step": 1683 }, { "epoch": 0.15046797864498404, "grad_norm": 0.4259663224220276, "learning_rate": 9.62429713099195e-05, "loss": 1.1056, "step": 1684 }, { "epoch": 0.1505573301762459, "grad_norm": 0.3912825882434845, "learning_rate": 9.623746602740417e-05, "loss": 1.0072, "step": 1685 }, { "epoch": 0.15064668170750775, "grad_norm": 0.41152167320251465, "learning_rate": 9.623195687200822e-05, "loss": 1.0224, "step": 1686 }, { "epoch": 0.15073603323876963, "grad_norm": 0.397203266620636, "learning_rate": 9.622644384419306e-05, "loss": 1.0702, "step": 1687 }, { "epoch": 0.1508253847700315, "grad_norm": 0.4435591399669647, "learning_rate": 9.62209269444205e-05, "loss": 0.9521, "step": 1688 }, { "epoch": 0.15091473630129337, "grad_norm": 0.36750173568725586, "learning_rate": 9.621540617315262e-05, "loss": 0.9843, "step": 1689 }, { "epoch": 0.15100408783255523, "grad_norm": 0.4787936210632324, "learning_rate": 9.620988153085187e-05, "loss": 1.0932, "step": 1690 }, { "epoch": 0.15109343936381708, "grad_norm": 0.44318270683288574, "learning_rate": 9.620435301798097e-05, "loss": 1.0223, "step": 1691 }, { "epoch": 0.15118279089507897, "grad_norm": 0.47096431255340576, "learning_rate": 9.619882063500299e-05, "loss": 1.0623, "step": 1692 }, { "epoch": 0.15127214242634082, "grad_norm": 0.44428104162216187, "learning_rate": 9.619328438238135e-05, "loss": 1.0413, "step": 1693 }, { "epoch": 0.1513614939576027, "grad_norm": 0.4424484670162201, "learning_rate": 9.618774426057975e-05, "loss": 1.0759, "step": 1694 }, { "epoch": 0.15145084548886456, "grad_norm": 0.3779531717300415, "learning_rate": 9.618220027006225e-05, "loss": 1.0476, "step": 1695 }, { "epoch": 0.15154019702012644, "grad_norm": 0.40432384610176086, "learning_rate": 9.617665241129321e-05, "loss": 1.1215, "step": 1696 }, { "epoch": 0.1516295485513883, "grad_norm": 0.48599207401275635, "learning_rate": 9.617110068473732e-05, "loss": 1.0024, "step": 1697 }, { "epoch": 0.15171890008265015, "grad_norm": 0.49614617228507996, "learning_rate": 9.61655450908596e-05, "loss": 0.9352, "step": 1698 }, { "epoch": 0.15180825161391204, "grad_norm": 0.5036540627479553, "learning_rate": 9.615998563012538e-05, "loss": 0.9844, "step": 1699 }, { "epoch": 0.1518976031451739, "grad_norm": 0.3981391191482544, "learning_rate": 9.615442230300036e-05, "loss": 1.0222, "step": 1700 }, { "epoch": 0.15198695467643578, "grad_norm": 0.4211319386959076, "learning_rate": 9.614885510995047e-05, "loss": 1.0769, "step": 1701 }, { "epoch": 0.15207630620769763, "grad_norm": 0.4759863317012787, "learning_rate": 9.614328405144207e-05, "loss": 0.999, "step": 1702 }, { "epoch": 0.15216565773895951, "grad_norm": 0.48188042640686035, "learning_rate": 9.613770912794178e-05, "loss": 0.9254, "step": 1703 }, { "epoch": 0.15225500927022137, "grad_norm": 0.38263580203056335, "learning_rate": 9.613213033991655e-05, "loss": 1.0517, "step": 1704 }, { "epoch": 0.15234436080148323, "grad_norm": 0.44071927666664124, "learning_rate": 9.612654768783368e-05, "loss": 1.0019, "step": 1705 }, { "epoch": 0.1524337123327451, "grad_norm": 0.44804859161376953, "learning_rate": 9.612096117216076e-05, "loss": 1.062, "step": 1706 }, { "epoch": 0.15252306386400696, "grad_norm": 0.4194650948047638, "learning_rate": 9.611537079336574e-05, "loss": 1.0101, "step": 1707 }, { "epoch": 0.15261241539526885, "grad_norm": 0.4241783618927002, "learning_rate": 9.610977655191684e-05, "loss": 1.0197, "step": 1708 }, { "epoch": 0.1527017669265307, "grad_norm": 0.48272955417633057, "learning_rate": 9.610417844828268e-05, "loss": 1.014, "step": 1709 }, { "epoch": 0.15279111845779256, "grad_norm": 0.4096718728542328, "learning_rate": 9.609857648293212e-05, "loss": 1.0106, "step": 1710 }, { "epoch": 0.15288046998905444, "grad_norm": 0.3911011815071106, "learning_rate": 9.609297065633443e-05, "loss": 1.0542, "step": 1711 }, { "epoch": 0.1529698215203163, "grad_norm": 0.43495795130729675, "learning_rate": 9.60873609689591e-05, "loss": 1.0039, "step": 1712 }, { "epoch": 0.15305917305157818, "grad_norm": 0.4525046646595001, "learning_rate": 9.608174742127605e-05, "loss": 0.9883, "step": 1713 }, { "epoch": 0.15314852458284003, "grad_norm": 0.3769172430038452, "learning_rate": 9.607613001375546e-05, "loss": 1.1064, "step": 1714 }, { "epoch": 0.15323787611410192, "grad_norm": 0.435320645570755, "learning_rate": 9.607050874686784e-05, "loss": 1.0457, "step": 1715 }, { "epoch": 0.15332722764536377, "grad_norm": 0.38679543137550354, "learning_rate": 9.606488362108404e-05, "loss": 1.0426, "step": 1716 }, { "epoch": 0.15341657917662563, "grad_norm": 0.446005254983902, "learning_rate": 9.605925463687522e-05, "loss": 1.041, "step": 1717 }, { "epoch": 0.1535059307078875, "grad_norm": 0.4369790554046631, "learning_rate": 9.605362179471287e-05, "loss": 0.9566, "step": 1718 }, { "epoch": 0.15359528223914937, "grad_norm": 0.3627423942089081, "learning_rate": 9.604798509506879e-05, "loss": 1.0247, "step": 1719 }, { "epoch": 0.15368463377041125, "grad_norm": 0.40787839889526367, "learning_rate": 9.604234453841512e-05, "loss": 1.1035, "step": 1720 }, { "epoch": 0.1537739853016731, "grad_norm": 0.4434973895549774, "learning_rate": 9.603670012522432e-05, "loss": 1.0767, "step": 1721 }, { "epoch": 0.153863336832935, "grad_norm": 0.4459492564201355, "learning_rate": 9.603105185596917e-05, "loss": 0.9853, "step": 1722 }, { "epoch": 0.15395268836419684, "grad_norm": 0.45601245760917664, "learning_rate": 9.602539973112278e-05, "loss": 1.0228, "step": 1723 }, { "epoch": 0.1540420398954587, "grad_norm": 0.44149455428123474, "learning_rate": 9.601974375115856e-05, "loss": 1.0443, "step": 1724 }, { "epoch": 0.15413139142672058, "grad_norm": 0.5166652798652649, "learning_rate": 9.601408391655027e-05, "loss": 0.9864, "step": 1725 }, { "epoch": 0.15422074295798244, "grad_norm": 0.5009913444519043, "learning_rate": 9.600842022777198e-05, "loss": 1.0516, "step": 1726 }, { "epoch": 0.15431009448924432, "grad_norm": 0.4483872652053833, "learning_rate": 9.600275268529807e-05, "loss": 0.9622, "step": 1727 }, { "epoch": 0.15439944602050618, "grad_norm": 0.4618902802467346, "learning_rate": 9.599708128960327e-05, "loss": 0.9304, "step": 1728 }, { "epoch": 0.15448879755176803, "grad_norm": 0.39204496145248413, "learning_rate": 9.599140604116263e-05, "loss": 1.0674, "step": 1729 }, { "epoch": 0.15457814908302991, "grad_norm": 0.44558286666870117, "learning_rate": 9.598572694045149e-05, "loss": 1.0133, "step": 1730 }, { "epoch": 0.15466750061429177, "grad_norm": 0.39136767387390137, "learning_rate": 9.598004398794556e-05, "loss": 0.9855, "step": 1731 }, { "epoch": 0.15475685214555365, "grad_norm": 0.47076520323753357, "learning_rate": 9.597435718412082e-05, "loss": 1.0118, "step": 1732 }, { "epoch": 0.1548462036768155, "grad_norm": 0.373670369386673, "learning_rate": 9.596866652945362e-05, "loss": 1.0864, "step": 1733 }, { "epoch": 0.1549355552080774, "grad_norm": 0.3965423107147217, "learning_rate": 9.59629720244206e-05, "loss": 0.9902, "step": 1734 }, { "epoch": 0.15502490673933925, "grad_norm": 0.4660866856575012, "learning_rate": 9.595727366949875e-05, "loss": 1.0477, "step": 1735 }, { "epoch": 0.1551142582706011, "grad_norm": 0.4512030780315399, "learning_rate": 9.595157146516535e-05, "loss": 1.058, "step": 1736 }, { "epoch": 0.15520360980186299, "grad_norm": 0.4368734359741211, "learning_rate": 9.594586541189804e-05, "loss": 1.0307, "step": 1737 }, { "epoch": 0.15529296133312484, "grad_norm": 0.5563750863075256, "learning_rate": 9.594015551017477e-05, "loss": 0.9361, "step": 1738 }, { "epoch": 0.15538231286438672, "grad_norm": 0.43998295068740845, "learning_rate": 9.593444176047378e-05, "loss": 1.041, "step": 1739 }, { "epoch": 0.15547166439564858, "grad_norm": 0.6449422836303711, "learning_rate": 9.592872416327365e-05, "loss": 0.9818, "step": 1740 }, { "epoch": 0.15556101592691043, "grad_norm": 0.40925419330596924, "learning_rate": 9.592300271905332e-05, "loss": 1.0854, "step": 1741 }, { "epoch": 0.15565036745817232, "grad_norm": 0.3952295780181885, "learning_rate": 9.591727742829199e-05, "loss": 1.1511, "step": 1742 }, { "epoch": 0.15573971898943417, "grad_norm": 0.49510762095451355, "learning_rate": 9.591154829146927e-05, "loss": 0.9822, "step": 1743 }, { "epoch": 0.15582907052069606, "grad_norm": 0.43776780366897583, "learning_rate": 9.590581530906497e-05, "loss": 1.0781, "step": 1744 }, { "epoch": 0.1559184220519579, "grad_norm": 0.40279847383499146, "learning_rate": 9.590007848155932e-05, "loss": 1.0741, "step": 1745 }, { "epoch": 0.1560077735832198, "grad_norm": 0.36434587836265564, "learning_rate": 9.589433780943284e-05, "loss": 1.0873, "step": 1746 }, { "epoch": 0.15609712511448165, "grad_norm": 0.3859378695487976, "learning_rate": 9.588859329316637e-05, "loss": 1.0719, "step": 1747 }, { "epoch": 0.1561864766457435, "grad_norm": 0.34931498765945435, "learning_rate": 9.588284493324106e-05, "loss": 1.0645, "step": 1748 }, { "epoch": 0.1562758281770054, "grad_norm": 0.3696591556072235, "learning_rate": 9.587709273013845e-05, "loss": 1.0927, "step": 1749 }, { "epoch": 0.15636517970826724, "grad_norm": 0.4064505100250244, "learning_rate": 9.587133668434027e-05, "loss": 1.0693, "step": 1750 }, { "epoch": 0.15645453123952913, "grad_norm": 0.5394856333732605, "learning_rate": 9.586557679632871e-05, "loss": 1.0407, "step": 1751 }, { "epoch": 0.15654388277079098, "grad_norm": 0.47162312269210815, "learning_rate": 9.58598130665862e-05, "loss": 1.0118, "step": 1752 }, { "epoch": 0.15663323430205287, "grad_norm": 0.46519139409065247, "learning_rate": 9.585404549559551e-05, "loss": 1.0815, "step": 1753 }, { "epoch": 0.15672258583331472, "grad_norm": 0.4289345443248749, "learning_rate": 9.584827408383974e-05, "loss": 1.039, "step": 1754 }, { "epoch": 0.15681193736457658, "grad_norm": 0.4443482756614685, "learning_rate": 9.58424988318023e-05, "loss": 1.0424, "step": 1755 }, { "epoch": 0.15690128889583846, "grad_norm": 0.4442220628261566, "learning_rate": 9.583671973996695e-05, "loss": 1.0615, "step": 1756 }, { "epoch": 0.15699064042710031, "grad_norm": 0.4155905544757843, "learning_rate": 9.583093680881774e-05, "loss": 1.0328, "step": 1757 }, { "epoch": 0.1570799919583622, "grad_norm": 0.40333032608032227, "learning_rate": 9.582515003883904e-05, "loss": 1.0237, "step": 1758 }, { "epoch": 0.15716934348962405, "grad_norm": 0.4274482727050781, "learning_rate": 9.581935943051557e-05, "loss": 1.0246, "step": 1759 }, { "epoch": 0.1572586950208859, "grad_norm": 0.47601258754730225, "learning_rate": 9.581356498433233e-05, "loss": 1.0917, "step": 1760 }, { "epoch": 0.1573480465521478, "grad_norm": 0.37168294191360474, "learning_rate": 9.580776670077471e-05, "loss": 1.0442, "step": 1761 }, { "epoch": 0.15743739808340965, "grad_norm": 0.48139792680740356, "learning_rate": 9.580196458032836e-05, "loss": 1.1097, "step": 1762 }, { "epoch": 0.15752674961467153, "grad_norm": 0.49613094329833984, "learning_rate": 9.579615862347924e-05, "loss": 1.0116, "step": 1763 }, { "epoch": 0.15761610114593338, "grad_norm": 0.3607613742351532, "learning_rate": 9.579034883071368e-05, "loss": 1.0563, "step": 1764 }, { "epoch": 0.15770545267719527, "grad_norm": 0.5845707654953003, "learning_rate": 9.578453520251833e-05, "loss": 1.0129, "step": 1765 }, { "epoch": 0.15779480420845712, "grad_norm": 0.4461520314216614, "learning_rate": 9.577871773938011e-05, "loss": 0.99, "step": 1766 }, { "epoch": 0.15788415573971898, "grad_norm": 0.4064408242702484, "learning_rate": 9.577289644178634e-05, "loss": 1.0397, "step": 1767 }, { "epoch": 0.15797350727098086, "grad_norm": 0.475175678730011, "learning_rate": 9.576707131022458e-05, "loss": 0.9823, "step": 1768 }, { "epoch": 0.15806285880224272, "grad_norm": 0.5856608152389526, "learning_rate": 9.576124234518275e-05, "loss": 0.9467, "step": 1769 }, { "epoch": 0.1581522103335046, "grad_norm": 0.44236186146736145, "learning_rate": 9.57554095471491e-05, "loss": 0.9745, "step": 1770 }, { "epoch": 0.15824156186476646, "grad_norm": 0.38273531198501587, "learning_rate": 9.574957291661218e-05, "loss": 1.0545, "step": 1771 }, { "epoch": 0.1583309133960283, "grad_norm": 0.37142929434776306, "learning_rate": 9.574373245406086e-05, "loss": 0.9557, "step": 1772 }, { "epoch": 0.1584202649272902, "grad_norm": 0.4939393997192383, "learning_rate": 9.573788815998437e-05, "loss": 1.027, "step": 1773 }, { "epoch": 0.15850961645855205, "grad_norm": 0.5472790598869324, "learning_rate": 9.573204003487221e-05, "loss": 0.987, "step": 1774 }, { "epoch": 0.15859896798981393, "grad_norm": 0.44142383337020874, "learning_rate": 9.572618807921423e-05, "loss": 1.0675, "step": 1775 }, { "epoch": 0.1586883195210758, "grad_norm": 0.4367615878582001, "learning_rate": 9.572033229350059e-05, "loss": 1.0437, "step": 1776 }, { "epoch": 0.15877767105233767, "grad_norm": 0.38631755113601685, "learning_rate": 9.571447267822178e-05, "loss": 1.0157, "step": 1777 }, { "epoch": 0.15886702258359953, "grad_norm": 0.5570868849754333, "learning_rate": 9.57086092338686e-05, "loss": 0.9977, "step": 1778 }, { "epoch": 0.15895637411486138, "grad_norm": 0.4448052942752838, "learning_rate": 9.570274196093217e-05, "loss": 1.0383, "step": 1779 }, { "epoch": 0.15904572564612326, "grad_norm": 0.46690064668655396, "learning_rate": 9.569687085990395e-05, "loss": 1.0864, "step": 1780 }, { "epoch": 0.15913507717738512, "grad_norm": 0.411737322807312, "learning_rate": 9.569099593127571e-05, "loss": 0.9839, "step": 1781 }, { "epoch": 0.159224428708647, "grad_norm": 0.4243115484714508, "learning_rate": 9.568511717553952e-05, "loss": 0.9728, "step": 1782 }, { "epoch": 0.15931378023990886, "grad_norm": 0.36506015062332153, "learning_rate": 9.567923459318782e-05, "loss": 1.0646, "step": 1783 }, { "epoch": 0.15940313177117074, "grad_norm": 0.47241315245628357, "learning_rate": 9.56733481847133e-05, "loss": 1.0306, "step": 1784 }, { "epoch": 0.1594924833024326, "grad_norm": 0.4867679178714752, "learning_rate": 9.566745795060901e-05, "loss": 0.9727, "step": 1785 }, { "epoch": 0.15958183483369445, "grad_norm": 0.47150614857673645, "learning_rate": 9.566156389136835e-05, "loss": 1.0212, "step": 1786 }, { "epoch": 0.15967118636495634, "grad_norm": 0.4018579423427582, "learning_rate": 9.5655666007485e-05, "loss": 1.0142, "step": 1787 }, { "epoch": 0.1597605378962182, "grad_norm": 0.4174135625362396, "learning_rate": 9.564976429945298e-05, "loss": 1.0768, "step": 1788 }, { "epoch": 0.15984988942748007, "grad_norm": 0.5170420408248901, "learning_rate": 9.56438587677666e-05, "loss": 0.9997, "step": 1789 }, { "epoch": 0.15993924095874193, "grad_norm": 0.4216405153274536, "learning_rate": 9.563794941292051e-05, "loss": 1.0355, "step": 1790 }, { "epoch": 0.16002859249000378, "grad_norm": 0.431983083486557, "learning_rate": 9.563203623540969e-05, "loss": 1.0086, "step": 1791 }, { "epoch": 0.16011794402126567, "grad_norm": 0.45916739106178284, "learning_rate": 9.562611923572944e-05, "loss": 1.0688, "step": 1792 }, { "epoch": 0.16020729555252752, "grad_norm": 0.45291900634765625, "learning_rate": 9.562019841437537e-05, "loss": 1.057, "step": 1793 }, { "epoch": 0.1602966470837894, "grad_norm": 0.4525638520717621, "learning_rate": 9.56142737718434e-05, "loss": 1.1029, "step": 1794 }, { "epoch": 0.16038599861505126, "grad_norm": 0.4891865849494934, "learning_rate": 9.560834530862979e-05, "loss": 1.0798, "step": 1795 }, { "epoch": 0.16047535014631314, "grad_norm": 0.3669840097427368, "learning_rate": 9.560241302523109e-05, "loss": 1.0062, "step": 1796 }, { "epoch": 0.160564701677575, "grad_norm": 0.4095466732978821, "learning_rate": 9.559647692214424e-05, "loss": 0.9993, "step": 1797 }, { "epoch": 0.16065405320883686, "grad_norm": 0.5019549131393433, "learning_rate": 9.559053699986642e-05, "loss": 1.0768, "step": 1798 }, { "epoch": 0.16074340474009874, "grad_norm": 0.40123990178108215, "learning_rate": 9.558459325889515e-05, "loss": 1.0714, "step": 1799 }, { "epoch": 0.1608327562713606, "grad_norm": 0.4176787734031677, "learning_rate": 9.557864569972832e-05, "loss": 1.0862, "step": 1800 }, { "epoch": 0.16092210780262248, "grad_norm": 0.4730657935142517, "learning_rate": 9.557269432286407e-05, "loss": 1.0686, "step": 1801 }, { "epoch": 0.16101145933388433, "grad_norm": 0.4306131899356842, "learning_rate": 9.55667391288009e-05, "loss": 0.9852, "step": 1802 }, { "epoch": 0.1611008108651462, "grad_norm": 0.40398168563842773, "learning_rate": 9.556078011803762e-05, "loss": 1.003, "step": 1803 }, { "epoch": 0.16119016239640807, "grad_norm": 0.3656451106071472, "learning_rate": 9.555481729107336e-05, "loss": 1.0516, "step": 1804 }, { "epoch": 0.16127951392766993, "grad_norm": 0.5433897376060486, "learning_rate": 9.554885064840758e-05, "loss": 0.9463, "step": 1805 }, { "epoch": 0.1613688654589318, "grad_norm": 0.3712410628795624, "learning_rate": 9.554288019054003e-05, "loss": 1.0896, "step": 1806 }, { "epoch": 0.16145821699019366, "grad_norm": 0.4026222825050354, "learning_rate": 9.553690591797082e-05, "loss": 1.1181, "step": 1807 }, { "epoch": 0.16154756852145555, "grad_norm": 0.45911315083503723, "learning_rate": 9.553092783120034e-05, "loss": 0.9981, "step": 1808 }, { "epoch": 0.1616369200527174, "grad_norm": 0.44033879041671753, "learning_rate": 9.552494593072935e-05, "loss": 1.0534, "step": 1809 }, { "epoch": 0.16172627158397926, "grad_norm": 0.42678460478782654, "learning_rate": 9.551896021705886e-05, "loss": 1.0123, "step": 1810 }, { "epoch": 0.16181562311524114, "grad_norm": 0.48469212651252747, "learning_rate": 9.551297069069027e-05, "loss": 0.9846, "step": 1811 }, { "epoch": 0.161904974646503, "grad_norm": 0.4134121537208557, "learning_rate": 9.550697735212523e-05, "loss": 1.1542, "step": 1812 }, { "epoch": 0.16199432617776488, "grad_norm": 0.44429683685302734, "learning_rate": 9.550098020186579e-05, "loss": 1.0126, "step": 1813 }, { "epoch": 0.16208367770902674, "grad_norm": 0.4219866096973419, "learning_rate": 9.549497924041424e-05, "loss": 1.0309, "step": 1814 }, { "epoch": 0.16217302924028862, "grad_norm": 0.38047802448272705, "learning_rate": 9.548897446827322e-05, "loss": 1.0875, "step": 1815 }, { "epoch": 0.16226238077155047, "grad_norm": 0.37720173597335815, "learning_rate": 9.548296588594575e-05, "loss": 1.0008, "step": 1816 }, { "epoch": 0.16235173230281233, "grad_norm": 0.4290127754211426, "learning_rate": 9.547695349393504e-05, "loss": 1.0353, "step": 1817 }, { "epoch": 0.1624410838340742, "grad_norm": 0.4377609193325043, "learning_rate": 9.547093729274474e-05, "loss": 1.0352, "step": 1818 }, { "epoch": 0.16253043536533607, "grad_norm": 0.3721230924129486, "learning_rate": 9.546491728287876e-05, "loss": 1.0833, "step": 1819 }, { "epoch": 0.16261978689659795, "grad_norm": 0.43184563517570496, "learning_rate": 9.545889346484134e-05, "loss": 0.9966, "step": 1820 }, { "epoch": 0.1627091384278598, "grad_norm": 0.41010618209838867, "learning_rate": 9.545286583913702e-05, "loss": 0.9989, "step": 1821 }, { "epoch": 0.16279848995912166, "grad_norm": 0.5316981077194214, "learning_rate": 9.54468344062707e-05, "loss": 0.9844, "step": 1822 }, { "epoch": 0.16288784149038354, "grad_norm": 0.35896286368370056, "learning_rate": 9.544079916674757e-05, "loss": 1.0765, "step": 1823 }, { "epoch": 0.1629771930216454, "grad_norm": 0.45125991106033325, "learning_rate": 9.543476012107313e-05, "loss": 1.1044, "step": 1824 }, { "epoch": 0.16306654455290728, "grad_norm": 0.4255354404449463, "learning_rate": 9.542871726975327e-05, "loss": 1.0465, "step": 1825 }, { "epoch": 0.16315589608416914, "grad_norm": 0.4363675117492676, "learning_rate": 9.542267061329406e-05, "loss": 1.0555, "step": 1826 }, { "epoch": 0.16324524761543102, "grad_norm": 0.3892308175563812, "learning_rate": 9.541662015220205e-05, "loss": 1.055, "step": 1827 }, { "epoch": 0.16333459914669288, "grad_norm": 0.43629783391952515, "learning_rate": 9.541056588698397e-05, "loss": 1.0117, "step": 1828 }, { "epoch": 0.16342395067795473, "grad_norm": 0.454475998878479, "learning_rate": 9.540450781814696e-05, "loss": 1.0849, "step": 1829 }, { "epoch": 0.16351330220921662, "grad_norm": 0.41698095202445984, "learning_rate": 9.539844594619845e-05, "loss": 1.0137, "step": 1830 }, { "epoch": 0.16360265374047847, "grad_norm": 0.41076821088790894, "learning_rate": 9.539238027164619e-05, "loss": 0.9891, "step": 1831 }, { "epoch": 0.16369200527174035, "grad_norm": 0.4605240821838379, "learning_rate": 9.538631079499823e-05, "loss": 0.9447, "step": 1832 }, { "epoch": 0.1637813568030022, "grad_norm": 0.36705470085144043, "learning_rate": 9.538023751676294e-05, "loss": 1.023, "step": 1833 }, { "epoch": 0.16387070833426406, "grad_norm": 0.44232022762298584, "learning_rate": 9.537416043744905e-05, "loss": 0.9757, "step": 1834 }, { "epoch": 0.16396005986552595, "grad_norm": 0.41922441124916077, "learning_rate": 9.536807955756557e-05, "loss": 1.0834, "step": 1835 }, { "epoch": 0.1640494113967878, "grad_norm": 0.44340333342552185, "learning_rate": 9.536199487762182e-05, "loss": 0.9799, "step": 1836 }, { "epoch": 0.1641387629280497, "grad_norm": 0.5596421360969543, "learning_rate": 9.53559063981275e-05, "loss": 0.921, "step": 1837 }, { "epoch": 0.16422811445931154, "grad_norm": 0.3959425091743469, "learning_rate": 9.534981411959255e-05, "loss": 1.0426, "step": 1838 }, { "epoch": 0.16431746599057342, "grad_norm": 0.431082546710968, "learning_rate": 9.534371804252728e-05, "loss": 1.1178, "step": 1839 }, { "epoch": 0.16440681752183528, "grad_norm": 0.42235735058784485, "learning_rate": 9.533761816744228e-05, "loss": 1.0315, "step": 1840 }, { "epoch": 0.16449616905309714, "grad_norm": 0.4385235905647278, "learning_rate": 9.533151449484851e-05, "loss": 1.0135, "step": 1841 }, { "epoch": 0.16458552058435902, "grad_norm": 0.41497084498405457, "learning_rate": 9.53254070252572e-05, "loss": 1.0006, "step": 1842 }, { "epoch": 0.16467487211562087, "grad_norm": 0.4509193003177643, "learning_rate": 9.531929575917991e-05, "loss": 0.9943, "step": 1843 }, { "epoch": 0.16476422364688276, "grad_norm": 0.45146405696868896, "learning_rate": 9.531318069712854e-05, "loss": 0.9926, "step": 1844 }, { "epoch": 0.1648535751781446, "grad_norm": 0.43658965826034546, "learning_rate": 9.530706183961526e-05, "loss": 0.9999, "step": 1845 }, { "epoch": 0.1649429267094065, "grad_norm": 0.4209139347076416, "learning_rate": 9.530093918715264e-05, "loss": 1.0795, "step": 1846 }, { "epoch": 0.16503227824066835, "grad_norm": 0.39283883571624756, "learning_rate": 9.529481274025347e-05, "loss": 1.101, "step": 1847 }, { "epoch": 0.1651216297719302, "grad_norm": 0.5160419344902039, "learning_rate": 9.528868249943095e-05, "loss": 0.9985, "step": 1848 }, { "epoch": 0.1652109813031921, "grad_norm": 0.5293583869934082, "learning_rate": 9.528254846519851e-05, "loss": 0.9466, "step": 1849 }, { "epoch": 0.16530033283445394, "grad_norm": 0.4382696747779846, "learning_rate": 9.527641063806996e-05, "loss": 1.0173, "step": 1850 }, { "epoch": 0.16538968436571583, "grad_norm": 0.4551234841346741, "learning_rate": 9.52702690185594e-05, "loss": 0.9932, "step": 1851 }, { "epoch": 0.16547903589697768, "grad_norm": 0.42946553230285645, "learning_rate": 9.526412360718127e-05, "loss": 1.0688, "step": 1852 }, { "epoch": 0.16556838742823954, "grad_norm": 0.5565560460090637, "learning_rate": 9.525797440445031e-05, "loss": 1.024, "step": 1853 }, { "epoch": 0.16565773895950142, "grad_norm": 0.483884334564209, "learning_rate": 9.525182141088159e-05, "loss": 0.9546, "step": 1854 }, { "epoch": 0.16574709049076328, "grad_norm": 0.4862859547138214, "learning_rate": 9.524566462699045e-05, "loss": 0.9695, "step": 1855 }, { "epoch": 0.16583644202202516, "grad_norm": 0.5029091238975525, "learning_rate": 9.523950405329262e-05, "loss": 0.9478, "step": 1856 }, { "epoch": 0.16592579355328702, "grad_norm": 0.6389296650886536, "learning_rate": 9.523333969030413e-05, "loss": 1.0239, "step": 1857 }, { "epoch": 0.1660151450845489, "grad_norm": 0.39252007007598877, "learning_rate": 9.522717153854125e-05, "loss": 1.1161, "step": 1858 }, { "epoch": 0.16610449661581075, "grad_norm": 0.45961377024650574, "learning_rate": 9.522099959852071e-05, "loss": 1.0234, "step": 1859 }, { "epoch": 0.1661938481470726, "grad_norm": 0.3859967291355133, "learning_rate": 9.52148238707594e-05, "loss": 1.0622, "step": 1860 }, { "epoch": 0.1662831996783345, "grad_norm": 0.48423507809638977, "learning_rate": 9.520864435577466e-05, "loss": 1.0117, "step": 1861 }, { "epoch": 0.16637255120959635, "grad_norm": 0.4309901297092438, "learning_rate": 9.520246105408403e-05, "loss": 1.0523, "step": 1862 }, { "epoch": 0.16646190274085823, "grad_norm": 0.487351655960083, "learning_rate": 9.519627396620549e-05, "loss": 0.9941, "step": 1863 }, { "epoch": 0.1665512542721201, "grad_norm": 0.44148099422454834, "learning_rate": 9.519008309265724e-05, "loss": 1.0301, "step": 1864 }, { "epoch": 0.16664060580338197, "grad_norm": 0.4186917245388031, "learning_rate": 9.518388843395786e-05, "loss": 0.996, "step": 1865 }, { "epoch": 0.16672995733464382, "grad_norm": 0.4065192937850952, "learning_rate": 9.517768999062617e-05, "loss": 1.0351, "step": 1866 }, { "epoch": 0.16681930886590568, "grad_norm": 0.402235209941864, "learning_rate": 9.51714877631814e-05, "loss": 1.0227, "step": 1867 }, { "epoch": 0.16690866039716756, "grad_norm": 0.4134058952331543, "learning_rate": 9.516528175214303e-05, "loss": 1.0518, "step": 1868 }, { "epoch": 0.16699801192842942, "grad_norm": 0.39137229323387146, "learning_rate": 9.515907195803088e-05, "loss": 1.0648, "step": 1869 }, { "epoch": 0.1670873634596913, "grad_norm": 0.43055975437164307, "learning_rate": 9.51528583813651e-05, "loss": 0.9951, "step": 1870 }, { "epoch": 0.16717671499095316, "grad_norm": 0.45151713490486145, "learning_rate": 9.514664102266615e-05, "loss": 1.0613, "step": 1871 }, { "epoch": 0.167266066522215, "grad_norm": 0.45818963646888733, "learning_rate": 9.514041988245477e-05, "loss": 1.0874, "step": 1872 }, { "epoch": 0.1673554180534769, "grad_norm": 0.38174670934677124, "learning_rate": 9.513419496125206e-05, "loss": 1.0888, "step": 1873 }, { "epoch": 0.16744476958473875, "grad_norm": 0.4613979160785675, "learning_rate": 9.512796625957943e-05, "loss": 0.9875, "step": 1874 }, { "epoch": 0.16753412111600063, "grad_norm": 0.49428126215934753, "learning_rate": 9.512173377795859e-05, "loss": 0.9716, "step": 1875 }, { "epoch": 0.1676234726472625, "grad_norm": 0.5154160261154175, "learning_rate": 9.511549751691159e-05, "loss": 0.9448, "step": 1876 }, { "epoch": 0.16771282417852437, "grad_norm": 0.4421161711215973, "learning_rate": 9.510925747696077e-05, "loss": 1.009, "step": 1877 }, { "epoch": 0.16780217570978623, "grad_norm": 0.5333375930786133, "learning_rate": 9.510301365862882e-05, "loss": 1.0682, "step": 1878 }, { "epoch": 0.16789152724104808, "grad_norm": 0.35133177042007446, "learning_rate": 9.509676606243869e-05, "loss": 1.0646, "step": 1879 }, { "epoch": 0.16798087877230997, "grad_norm": 0.39709219336509705, "learning_rate": 9.509051468891372e-05, "loss": 1.0559, "step": 1880 }, { "epoch": 0.16807023030357182, "grad_norm": 0.45971524715423584, "learning_rate": 9.508425953857752e-05, "loss": 1.0145, "step": 1881 }, { "epoch": 0.1681595818348337, "grad_norm": 0.44881150126457214, "learning_rate": 9.507800061195401e-05, "loss": 1.0404, "step": 1882 }, { "epoch": 0.16824893336609556, "grad_norm": 0.5401568412780762, "learning_rate": 9.507173790956746e-05, "loss": 0.9762, "step": 1883 }, { "epoch": 0.16833828489735742, "grad_norm": 0.48548710346221924, "learning_rate": 9.506547143194242e-05, "loss": 1.0197, "step": 1884 }, { "epoch": 0.1684276364286193, "grad_norm": 0.4435642659664154, "learning_rate": 9.505920117960379e-05, "loss": 1.0289, "step": 1885 }, { "epoch": 0.16851698795988115, "grad_norm": 0.4363146722316742, "learning_rate": 9.505292715307676e-05, "loss": 1.0346, "step": 1886 }, { "epoch": 0.16860633949114304, "grad_norm": 0.45172473788261414, "learning_rate": 9.504664935288685e-05, "loss": 1.0222, "step": 1887 }, { "epoch": 0.1686956910224049, "grad_norm": 0.4839716851711273, "learning_rate": 9.504036777955991e-05, "loss": 1.0304, "step": 1888 }, { "epoch": 0.16878504255366678, "grad_norm": 0.4805682599544525, "learning_rate": 9.503408243362206e-05, "loss": 1.0433, "step": 1889 }, { "epoch": 0.16887439408492863, "grad_norm": 0.4286407232284546, "learning_rate": 9.502779331559977e-05, "loss": 1.059, "step": 1890 }, { "epoch": 0.16896374561619049, "grad_norm": 0.39787065982818604, "learning_rate": 9.502150042601985e-05, "loss": 1.1139, "step": 1891 }, { "epoch": 0.16905309714745237, "grad_norm": 0.42047420144081116, "learning_rate": 9.501520376540936e-05, "loss": 1.0147, "step": 1892 }, { "epoch": 0.16914244867871422, "grad_norm": 0.4411332309246063, "learning_rate": 9.500890333429573e-05, "loss": 1.0576, "step": 1893 }, { "epoch": 0.1692318002099761, "grad_norm": 0.43769532442092896, "learning_rate": 9.500259913320668e-05, "loss": 1.0455, "step": 1894 }, { "epoch": 0.16932115174123796, "grad_norm": 0.4141598343849182, "learning_rate": 9.499629116267026e-05, "loss": 1.0782, "step": 1895 }, { "epoch": 0.16941050327249985, "grad_norm": 0.4736109972000122, "learning_rate": 9.498997942321483e-05, "loss": 0.9884, "step": 1896 }, { "epoch": 0.1694998548037617, "grad_norm": 0.38666075468063354, "learning_rate": 9.498366391536907e-05, "loss": 1.0701, "step": 1897 }, { "epoch": 0.16958920633502356, "grad_norm": 0.4399631917476654, "learning_rate": 9.497734463966196e-05, "loss": 0.996, "step": 1898 }, { "epoch": 0.16967855786628544, "grad_norm": 0.3586561977863312, "learning_rate": 9.497102159662281e-05, "loss": 1.0366, "step": 1899 }, { "epoch": 0.1697679093975473, "grad_norm": 0.3949701189994812, "learning_rate": 9.496469478678126e-05, "loss": 1.0616, "step": 1900 }, { "epoch": 0.16985726092880918, "grad_norm": 0.38532134890556335, "learning_rate": 9.495836421066722e-05, "loss": 1.0576, "step": 1901 }, { "epoch": 0.16994661246007103, "grad_norm": 0.4000793993473053, "learning_rate": 9.495202986881095e-05, "loss": 1.0262, "step": 1902 }, { "epoch": 0.1700359639913329, "grad_norm": 0.3810192048549652, "learning_rate": 9.494569176174304e-05, "loss": 1.0097, "step": 1903 }, { "epoch": 0.17012531552259477, "grad_norm": 0.4256387948989868, "learning_rate": 9.493934988999436e-05, "loss": 1.0186, "step": 1904 }, { "epoch": 0.17021466705385663, "grad_norm": 0.4718528389930725, "learning_rate": 9.493300425409609e-05, "loss": 0.9866, "step": 1905 }, { "epoch": 0.1703040185851185, "grad_norm": 0.3986504077911377, "learning_rate": 9.492665485457976e-05, "loss": 1.0735, "step": 1906 }, { "epoch": 0.17039337011638037, "grad_norm": 0.4253017008304596, "learning_rate": 9.492030169197722e-05, "loss": 1.0168, "step": 1907 }, { "epoch": 0.17048272164764225, "grad_norm": 0.34923726320266724, "learning_rate": 9.49139447668206e-05, "loss": 1.1098, "step": 1908 }, { "epoch": 0.1705720731789041, "grad_norm": 0.46308934688568115, "learning_rate": 9.490758407964234e-05, "loss": 0.9807, "step": 1909 }, { "epoch": 0.17066142471016596, "grad_norm": 0.5132005214691162, "learning_rate": 9.490121963097525e-05, "loss": 1.0316, "step": 1910 }, { "epoch": 0.17075077624142784, "grad_norm": 0.4535066485404968, "learning_rate": 9.489485142135238e-05, "loss": 0.9653, "step": 1911 }, { "epoch": 0.1708401277726897, "grad_norm": 0.41993486881256104, "learning_rate": 9.488847945130718e-05, "loss": 1.0554, "step": 1912 }, { "epoch": 0.17092947930395158, "grad_norm": 0.46821948885917664, "learning_rate": 9.488210372137335e-05, "loss": 0.9511, "step": 1913 }, { "epoch": 0.17101883083521344, "grad_norm": 0.5106322765350342, "learning_rate": 9.487572423208491e-05, "loss": 0.9633, "step": 1914 }, { "epoch": 0.1711081823664753, "grad_norm": 0.4303719997406006, "learning_rate": 9.486934098397622e-05, "loss": 1.0664, "step": 1915 }, { "epoch": 0.17119753389773718, "grad_norm": 0.3766723573207855, "learning_rate": 9.486295397758196e-05, "loss": 1.0226, "step": 1916 }, { "epoch": 0.17128688542899903, "grad_norm": 0.4431716799736023, "learning_rate": 9.48565632134371e-05, "loss": 1.0226, "step": 1917 }, { "epoch": 0.1713762369602609, "grad_norm": 0.41526105999946594, "learning_rate": 9.485016869207695e-05, "loss": 1.0072, "step": 1918 }, { "epoch": 0.17146558849152277, "grad_norm": 0.4168999195098877, "learning_rate": 9.484377041403706e-05, "loss": 1.0846, "step": 1919 }, { "epoch": 0.17155494002278465, "grad_norm": 0.4972972571849823, "learning_rate": 9.483736837985344e-05, "loss": 1.0172, "step": 1920 }, { "epoch": 0.1716442915540465, "grad_norm": 0.4009465277194977, "learning_rate": 9.483096259006228e-05, "loss": 1.04, "step": 1921 }, { "epoch": 0.17173364308530836, "grad_norm": 0.37553495168685913, "learning_rate": 9.482455304520013e-05, "loss": 1.0406, "step": 1922 }, { "epoch": 0.17182299461657025, "grad_norm": 0.37648624181747437, "learning_rate": 9.481813974580386e-05, "loss": 1.0371, "step": 1923 }, { "epoch": 0.1719123461478321, "grad_norm": 0.3838346302509308, "learning_rate": 9.481172269241067e-05, "loss": 0.9912, "step": 1924 }, { "epoch": 0.17200169767909398, "grad_norm": 0.4350638687610626, "learning_rate": 9.480530188555805e-05, "loss": 1.0651, "step": 1925 }, { "epoch": 0.17209104921035584, "grad_norm": 0.46230390667915344, "learning_rate": 9.47988773257838e-05, "loss": 0.9876, "step": 1926 }, { "epoch": 0.17218040074161772, "grad_norm": 0.41337013244628906, "learning_rate": 9.479244901362605e-05, "loss": 1.09, "step": 1927 }, { "epoch": 0.17226975227287958, "grad_norm": 0.4298297166824341, "learning_rate": 9.478601694962323e-05, "loss": 1.1069, "step": 1928 }, { "epoch": 0.17235910380414143, "grad_norm": 0.4469965398311615, "learning_rate": 9.477958113431413e-05, "loss": 1.0354, "step": 1929 }, { "epoch": 0.17244845533540332, "grad_norm": 0.4125996530056, "learning_rate": 9.477314156823779e-05, "loss": 1.0402, "step": 1930 }, { "epoch": 0.17253780686666517, "grad_norm": 0.44828668236732483, "learning_rate": 9.476669825193359e-05, "loss": 1.0471, "step": 1931 }, { "epoch": 0.17262715839792706, "grad_norm": 0.4745740592479706, "learning_rate": 9.476025118594124e-05, "loss": 1.01, "step": 1932 }, { "epoch": 0.1727165099291889, "grad_norm": 0.46582111716270447, "learning_rate": 9.475380037080073e-05, "loss": 1.0378, "step": 1933 }, { "epoch": 0.17280586146045077, "grad_norm": 0.45007333159446716, "learning_rate": 9.47473458070524e-05, "loss": 1.0021, "step": 1934 }, { "epoch": 0.17289521299171265, "grad_norm": 0.3929324448108673, "learning_rate": 9.474088749523689e-05, "loss": 0.9972, "step": 1935 }, { "epoch": 0.1729845645229745, "grad_norm": 0.42300376296043396, "learning_rate": 9.473442543589515e-05, "loss": 1.0603, "step": 1936 }, { "epoch": 0.1730739160542364, "grad_norm": 0.5826932191848755, "learning_rate": 9.472795962956844e-05, "loss": 0.9595, "step": 1937 }, { "epoch": 0.17316326758549824, "grad_norm": 0.5283807516098022, "learning_rate": 9.472149007679836e-05, "loss": 0.9726, "step": 1938 }, { "epoch": 0.17325261911676013, "grad_norm": 0.4322817921638489, "learning_rate": 9.471501677812677e-05, "loss": 0.9882, "step": 1939 }, { "epoch": 0.17334197064802198, "grad_norm": 0.42819416522979736, "learning_rate": 9.47085397340959e-05, "loss": 1.0776, "step": 1940 }, { "epoch": 0.17343132217928384, "grad_norm": 0.379272997379303, "learning_rate": 9.470205894524829e-05, "loss": 1.0924, "step": 1941 }, { "epoch": 0.17352067371054572, "grad_norm": 0.4515071213245392, "learning_rate": 9.469557441212674e-05, "loss": 1.0024, "step": 1942 }, { "epoch": 0.17361002524180758, "grad_norm": 0.4782482981681824, "learning_rate": 9.468908613527441e-05, "loss": 0.9794, "step": 1943 }, { "epoch": 0.17369937677306946, "grad_norm": 0.3846750855445862, "learning_rate": 9.468259411523476e-05, "loss": 0.9786, "step": 1944 }, { "epoch": 0.1737887283043313, "grad_norm": 0.4089057743549347, "learning_rate": 9.467609835255158e-05, "loss": 1.1039, "step": 1945 }, { "epoch": 0.17387807983559317, "grad_norm": 0.5269849896430969, "learning_rate": 9.466959884776894e-05, "loss": 1.0726, "step": 1946 }, { "epoch": 0.17396743136685505, "grad_norm": 0.43416985869407654, "learning_rate": 9.466309560143126e-05, "loss": 1.0341, "step": 1947 }, { "epoch": 0.1740567828981169, "grad_norm": 0.41887369751930237, "learning_rate": 9.465658861408324e-05, "loss": 1.0519, "step": 1948 }, { "epoch": 0.1741461344293788, "grad_norm": 0.47824710607528687, "learning_rate": 9.465007788626993e-05, "loss": 1.0775, "step": 1949 }, { "epoch": 0.17423548596064065, "grad_norm": 0.3974583148956299, "learning_rate": 9.464356341853666e-05, "loss": 1.026, "step": 1950 }, { "epoch": 0.17432483749190253, "grad_norm": 0.4442320764064789, "learning_rate": 9.463704521142909e-05, "loss": 1.0781, "step": 1951 }, { "epoch": 0.17441418902316438, "grad_norm": 0.4253045618534088, "learning_rate": 9.463052326549317e-05, "loss": 1.0449, "step": 1952 }, { "epoch": 0.17450354055442624, "grad_norm": 0.40720415115356445, "learning_rate": 9.462399758127521e-05, "loss": 1.0281, "step": 1953 }, { "epoch": 0.17459289208568812, "grad_norm": 0.46845048666000366, "learning_rate": 9.46174681593218e-05, "loss": 1.0313, "step": 1954 }, { "epoch": 0.17468224361694998, "grad_norm": 0.4253866374492645, "learning_rate": 9.461093500017984e-05, "loss": 1.0191, "step": 1955 }, { "epoch": 0.17477159514821186, "grad_norm": 0.41982749104499817, "learning_rate": 9.460439810439655e-05, "loss": 1.0332, "step": 1956 }, { "epoch": 0.17486094667947372, "grad_norm": 0.45301470160484314, "learning_rate": 9.459785747251948e-05, "loss": 0.9582, "step": 1957 }, { "epoch": 0.1749502982107356, "grad_norm": 0.44690030813217163, "learning_rate": 9.459131310509646e-05, "loss": 1.0285, "step": 1958 }, { "epoch": 0.17503964974199746, "grad_norm": 0.5572661757469177, "learning_rate": 9.458476500267566e-05, "loss": 0.9837, "step": 1959 }, { "epoch": 0.1751290012732593, "grad_norm": 0.3876509666442871, "learning_rate": 9.457821316580555e-05, "loss": 1.058, "step": 1960 }, { "epoch": 0.1752183528045212, "grad_norm": 0.4823840856552124, "learning_rate": 9.457165759503493e-05, "loss": 0.9689, "step": 1961 }, { "epoch": 0.17530770433578305, "grad_norm": 0.4866883456707001, "learning_rate": 9.456509829091287e-05, "loss": 1.0991, "step": 1962 }, { "epoch": 0.17539705586704493, "grad_norm": 0.47182294726371765, "learning_rate": 9.455853525398881e-05, "loss": 0.898, "step": 1963 }, { "epoch": 0.1754864073983068, "grad_norm": 0.4766868054866791, "learning_rate": 9.455196848481244e-05, "loss": 1.017, "step": 1964 }, { "epoch": 0.17557575892956864, "grad_norm": 0.47620826959609985, "learning_rate": 9.454539798393385e-05, "loss": 1.117, "step": 1965 }, { "epoch": 0.17566511046083053, "grad_norm": 0.391517698764801, "learning_rate": 9.453882375190335e-05, "loss": 1.0444, "step": 1966 }, { "epoch": 0.17575446199209238, "grad_norm": 0.4250124394893646, "learning_rate": 9.45322457892716e-05, "loss": 1.1342, "step": 1967 }, { "epoch": 0.17584381352335426, "grad_norm": 0.4004350006580353, "learning_rate": 9.45256640965896e-05, "loss": 1.0022, "step": 1968 }, { "epoch": 0.17593316505461612, "grad_norm": 0.4620151221752167, "learning_rate": 9.451907867440862e-05, "loss": 1.0139, "step": 1969 }, { "epoch": 0.176022516585878, "grad_norm": 0.43265095353126526, "learning_rate": 9.451248952328025e-05, "loss": 1.1093, "step": 1970 }, { "epoch": 0.17611186811713986, "grad_norm": 0.5694372653961182, "learning_rate": 9.450589664375643e-05, "loss": 0.9312, "step": 1971 }, { "epoch": 0.1762012196484017, "grad_norm": 0.4211016297340393, "learning_rate": 9.449930003638935e-05, "loss": 1.0137, "step": 1972 }, { "epoch": 0.1762905711796636, "grad_norm": 0.44613519310951233, "learning_rate": 9.449269970173158e-05, "loss": 0.969, "step": 1973 }, { "epoch": 0.17637992271092545, "grad_norm": 0.3986136019229889, "learning_rate": 9.448609564033593e-05, "loss": 1.1211, "step": 1974 }, { "epoch": 0.17646927424218734, "grad_norm": 0.5278311967849731, "learning_rate": 9.44794878527556e-05, "loss": 0.9798, "step": 1975 }, { "epoch": 0.1765586257734492, "grad_norm": 0.5130774974822998, "learning_rate": 9.447287633954405e-05, "loss": 1.0396, "step": 1976 }, { "epoch": 0.17664797730471105, "grad_norm": 0.483941912651062, "learning_rate": 9.446626110125505e-05, "loss": 0.9771, "step": 1977 }, { "epoch": 0.17673732883597293, "grad_norm": 0.46051788330078125, "learning_rate": 9.445964213844269e-05, "loss": 1.0256, "step": 1978 }, { "epoch": 0.17682668036723478, "grad_norm": 0.5272265076637268, "learning_rate": 9.445301945166143e-05, "loss": 0.9884, "step": 1979 }, { "epoch": 0.17691603189849667, "grad_norm": 0.4559291899204254, "learning_rate": 9.444639304146593e-05, "loss": 0.9899, "step": 1980 }, { "epoch": 0.17700538342975852, "grad_norm": 0.42420053482055664, "learning_rate": 9.443976290841126e-05, "loss": 0.9969, "step": 1981 }, { "epoch": 0.1770947349610204, "grad_norm": 0.5223949551582336, "learning_rate": 9.443312905305274e-05, "loss": 1.0163, "step": 1982 }, { "epoch": 0.17718408649228226, "grad_norm": 0.4252959191799164, "learning_rate": 9.442649147594606e-05, "loss": 1.0653, "step": 1983 }, { "epoch": 0.17727343802354412, "grad_norm": 0.40141063928604126, "learning_rate": 9.441985017764715e-05, "loss": 1.0184, "step": 1984 }, { "epoch": 0.177362789554806, "grad_norm": 0.4690384566783905, "learning_rate": 9.44132051587123e-05, "loss": 0.984, "step": 1985 }, { "epoch": 0.17745214108606785, "grad_norm": 0.412387490272522, "learning_rate": 9.440655641969814e-05, "loss": 1.0184, "step": 1986 }, { "epoch": 0.17754149261732974, "grad_norm": 0.37352073192596436, "learning_rate": 9.43999039611615e-05, "loss": 1.0471, "step": 1987 }, { "epoch": 0.1776308441485916, "grad_norm": 0.4083310663700104, "learning_rate": 9.439324778365965e-05, "loss": 1.043, "step": 1988 }, { "epoch": 0.17772019567985348, "grad_norm": 0.41002601385116577, "learning_rate": 9.43865878877501e-05, "loss": 0.9932, "step": 1989 }, { "epoch": 0.17780954721111533, "grad_norm": 0.5643458962440491, "learning_rate": 9.437992427399069e-05, "loss": 0.9207, "step": 1990 }, { "epoch": 0.1778988987423772, "grad_norm": 0.43547579646110535, "learning_rate": 9.437325694293957e-05, "loss": 1.0288, "step": 1991 }, { "epoch": 0.17798825027363907, "grad_norm": 0.41671237349510193, "learning_rate": 9.43665858951552e-05, "loss": 1.0792, "step": 1992 }, { "epoch": 0.17807760180490093, "grad_norm": 0.3886220455169678, "learning_rate": 9.435991113119634e-05, "loss": 1.0739, "step": 1993 }, { "epoch": 0.1781669533361628, "grad_norm": 0.42051705718040466, "learning_rate": 9.435323265162207e-05, "loss": 0.9779, "step": 1994 }, { "epoch": 0.17825630486742466, "grad_norm": 0.4543735086917877, "learning_rate": 9.43465504569918e-05, "loss": 0.9808, "step": 1995 }, { "epoch": 0.17834565639868652, "grad_norm": 0.43816715478897095, "learning_rate": 9.433986454786523e-05, "loss": 1.028, "step": 1996 }, { "epoch": 0.1784350079299484, "grad_norm": 0.3944636881351471, "learning_rate": 9.433317492480238e-05, "loss": 1.0616, "step": 1997 }, { "epoch": 0.17852435946121026, "grad_norm": 0.39649829268455505, "learning_rate": 9.432648158836357e-05, "loss": 0.9368, "step": 1998 }, { "epoch": 0.17861371099247214, "grad_norm": 0.422755628824234, "learning_rate": 9.431978453910943e-05, "loss": 1.065, "step": 1999 }, { "epoch": 0.178703062523734, "grad_norm": 0.40051138401031494, "learning_rate": 9.431308377760094e-05, "loss": 1.0022, "step": 2000 }, { "epoch": 0.17879241405499588, "grad_norm": 0.514773428440094, "learning_rate": 9.430637930439933e-05, "loss": 0.9755, "step": 2001 }, { "epoch": 0.17888176558625773, "grad_norm": 0.36524417996406555, "learning_rate": 9.429967112006619e-05, "loss": 1.0829, "step": 2002 }, { "epoch": 0.1789711171175196, "grad_norm": 0.3744485378265381, "learning_rate": 9.429295922516337e-05, "loss": 1.0275, "step": 2003 }, { "epoch": 0.17906046864878147, "grad_norm": 0.4923553168773651, "learning_rate": 9.42862436202531e-05, "loss": 0.9636, "step": 2004 }, { "epoch": 0.17914982018004333, "grad_norm": 0.3743354082107544, "learning_rate": 9.427952430589789e-05, "loss": 1.0459, "step": 2005 }, { "epoch": 0.1792391717113052, "grad_norm": 0.4594390392303467, "learning_rate": 9.42728012826605e-05, "loss": 1.0414, "step": 2006 }, { "epoch": 0.17932852324256707, "grad_norm": 0.4210093915462494, "learning_rate": 9.42660745511041e-05, "loss": 1.088, "step": 2007 }, { "epoch": 0.17941787477382895, "grad_norm": 0.4122314751148224, "learning_rate": 9.425934411179211e-05, "loss": 0.9287, "step": 2008 }, { "epoch": 0.1795072263050908, "grad_norm": 0.41803067922592163, "learning_rate": 9.425260996528829e-05, "loss": 1.0501, "step": 2009 }, { "epoch": 0.17959657783635266, "grad_norm": 0.4542330503463745, "learning_rate": 9.424587211215669e-05, "loss": 1.0908, "step": 2010 }, { "epoch": 0.17968592936761454, "grad_norm": 0.4137341380119324, "learning_rate": 9.423913055296165e-05, "loss": 0.9796, "step": 2011 }, { "epoch": 0.1797752808988764, "grad_norm": 0.3948180079460144, "learning_rate": 9.42323852882679e-05, "loss": 1.0888, "step": 2012 }, { "epoch": 0.17986463243013828, "grad_norm": 0.398710161447525, "learning_rate": 9.422563631864038e-05, "loss": 1.0466, "step": 2013 }, { "epoch": 0.17995398396140014, "grad_norm": 0.42401716113090515, "learning_rate": 9.421888364464442e-05, "loss": 1.0015, "step": 2014 }, { "epoch": 0.180043335492662, "grad_norm": 0.45904120802879333, "learning_rate": 9.42121272668456e-05, "loss": 0.9718, "step": 2015 }, { "epoch": 0.18013268702392388, "grad_norm": 0.45379403233528137, "learning_rate": 9.420536718580986e-05, "loss": 1.0008, "step": 2016 }, { "epoch": 0.18022203855518573, "grad_norm": 0.4098423421382904, "learning_rate": 9.419860340210342e-05, "loss": 1.1183, "step": 2017 }, { "epoch": 0.18031139008644761, "grad_norm": 0.3899945616722107, "learning_rate": 9.419183591629284e-05, "loss": 1.0653, "step": 2018 }, { "epoch": 0.18040074161770947, "grad_norm": 0.44282397627830505, "learning_rate": 9.418506472894492e-05, "loss": 1.0232, "step": 2019 }, { "epoch": 0.18049009314897135, "grad_norm": 0.3919029235839844, "learning_rate": 9.417828984062687e-05, "loss": 1.031, "step": 2020 }, { "epoch": 0.1805794446802332, "grad_norm": 0.3580346703529358, "learning_rate": 9.417151125190614e-05, "loss": 1.1478, "step": 2021 }, { "epoch": 0.18066879621149506, "grad_norm": 0.4946131706237793, "learning_rate": 9.416472896335051e-05, "loss": 0.9887, "step": 2022 }, { "epoch": 0.18075814774275695, "grad_norm": 0.4798043966293335, "learning_rate": 9.415794297552805e-05, "loss": 0.9341, "step": 2023 }, { "epoch": 0.1808474992740188, "grad_norm": 0.42261967062950134, "learning_rate": 9.415115328900719e-05, "loss": 1.063, "step": 2024 }, { "epoch": 0.18093685080528069, "grad_norm": 0.4271705448627472, "learning_rate": 9.414435990435663e-05, "loss": 1.0327, "step": 2025 }, { "epoch": 0.18102620233654254, "grad_norm": 0.394653856754303, "learning_rate": 9.413756282214537e-05, "loss": 1.1644, "step": 2026 }, { "epoch": 0.1811155538678044, "grad_norm": 0.43346062302589417, "learning_rate": 9.413076204294275e-05, "loss": 1.0556, "step": 2027 }, { "epoch": 0.18120490539906628, "grad_norm": 0.390903115272522, "learning_rate": 9.412395756731843e-05, "loss": 1.0185, "step": 2028 }, { "epoch": 0.18129425693032813, "grad_norm": 0.3963910937309265, "learning_rate": 9.41171493958423e-05, "loss": 1.0377, "step": 2029 }, { "epoch": 0.18138360846159002, "grad_norm": 0.4594999849796295, "learning_rate": 9.41103375290847e-05, "loss": 1.0494, "step": 2030 }, { "epoch": 0.18147295999285187, "grad_norm": 0.42716097831726074, "learning_rate": 9.410352196761612e-05, "loss": 0.9836, "step": 2031 }, { "epoch": 0.18156231152411376, "grad_norm": 0.43300801515579224, "learning_rate": 9.409670271200745e-05, "loss": 1.0358, "step": 2032 }, { "epoch": 0.1816516630553756, "grad_norm": 0.3748253583908081, "learning_rate": 9.408987976282993e-05, "loss": 1.0185, "step": 2033 }, { "epoch": 0.18174101458663747, "grad_norm": 0.4405686855316162, "learning_rate": 9.408305312065498e-05, "loss": 1.0303, "step": 2034 }, { "epoch": 0.18183036611789935, "grad_norm": 0.4379626512527466, "learning_rate": 9.407622278605445e-05, "loss": 0.984, "step": 2035 }, { "epoch": 0.1819197176491612, "grad_norm": 0.39666110277175903, "learning_rate": 9.406938875960045e-05, "loss": 1.1574, "step": 2036 }, { "epoch": 0.1820090691804231, "grad_norm": 0.39308610558509827, "learning_rate": 9.406255104186541e-05, "loss": 1.0372, "step": 2037 }, { "epoch": 0.18209842071168494, "grad_norm": 0.4402790069580078, "learning_rate": 9.405570963342203e-05, "loss": 1.102, "step": 2038 }, { "epoch": 0.18218777224294683, "grad_norm": 0.3761218190193176, "learning_rate": 9.404886453484336e-05, "loss": 1.0392, "step": 2039 }, { "epoch": 0.18227712377420868, "grad_norm": 0.62852942943573, "learning_rate": 9.404201574670278e-05, "loss": 0.9496, "step": 2040 }, { "epoch": 0.18236647530547054, "grad_norm": 0.4357655644416809, "learning_rate": 9.40351632695739e-05, "loss": 1.0507, "step": 2041 }, { "epoch": 0.18245582683673242, "grad_norm": 0.4296508729457855, "learning_rate": 9.402830710403074e-05, "loss": 1.0769, "step": 2042 }, { "epoch": 0.18254517836799428, "grad_norm": 0.4572366774082184, "learning_rate": 9.402144725064753e-05, "loss": 0.9844, "step": 2043 }, { "epoch": 0.18263452989925616, "grad_norm": 0.48715341091156006, "learning_rate": 9.40145837099989e-05, "loss": 0.9948, "step": 2044 }, { "epoch": 0.18272388143051801, "grad_norm": 0.4071628749370575, "learning_rate": 9.40077164826597e-05, "loss": 1.0108, "step": 2045 }, { "epoch": 0.18281323296177987, "grad_norm": 0.4973810017108917, "learning_rate": 9.400084556920517e-05, "loss": 1.0148, "step": 2046 }, { "epoch": 0.18290258449304175, "grad_norm": 0.4013235867023468, "learning_rate": 9.399397097021082e-05, "loss": 0.9818, "step": 2047 }, { "epoch": 0.1829919360243036, "grad_norm": 0.4264853596687317, "learning_rate": 9.398709268625244e-05, "loss": 1.0554, "step": 2048 }, { "epoch": 0.1830812875555655, "grad_norm": 0.5234047174453735, "learning_rate": 9.398021071790617e-05, "loss": 1.0184, "step": 2049 }, { "epoch": 0.18317063908682735, "grad_norm": 0.4011201560497284, "learning_rate": 9.397332506574848e-05, "loss": 1.0643, "step": 2050 }, { "epoch": 0.18325999061808923, "grad_norm": 0.4270339906215668, "learning_rate": 9.396643573035608e-05, "loss": 1.0095, "step": 2051 }, { "epoch": 0.18334934214935109, "grad_norm": 0.3894960880279541, "learning_rate": 9.395954271230604e-05, "loss": 1.0769, "step": 2052 }, { "epoch": 0.18343869368061294, "grad_norm": 0.3954775035381317, "learning_rate": 9.395264601217573e-05, "loss": 1.0095, "step": 2053 }, { "epoch": 0.18352804521187482, "grad_norm": 0.4338105618953705, "learning_rate": 9.394574563054282e-05, "loss": 0.9931, "step": 2054 }, { "epoch": 0.18361739674313668, "grad_norm": 0.4074452519416809, "learning_rate": 9.393884156798527e-05, "loss": 1.0317, "step": 2055 }, { "epoch": 0.18370674827439856, "grad_norm": 0.39676398038864136, "learning_rate": 9.393193382508138e-05, "loss": 0.9515, "step": 2056 }, { "epoch": 0.18379609980566042, "grad_norm": 0.42293867468833923, "learning_rate": 9.392502240240977e-05, "loss": 1.1357, "step": 2057 }, { "epoch": 0.18388545133692227, "grad_norm": 0.44433289766311646, "learning_rate": 9.391810730054932e-05, "loss": 1.0308, "step": 2058 }, { "epoch": 0.18397480286818416, "grad_norm": 0.4930141270160675, "learning_rate": 9.391118852007926e-05, "loss": 1.0061, "step": 2059 }, { "epoch": 0.184064154399446, "grad_norm": 0.3936423361301422, "learning_rate": 9.390426606157907e-05, "loss": 1.0441, "step": 2060 }, { "epoch": 0.1841535059307079, "grad_norm": 0.5013602375984192, "learning_rate": 9.389733992562863e-05, "loss": 1.0648, "step": 2061 }, { "epoch": 0.18424285746196975, "grad_norm": 0.4181208610534668, "learning_rate": 9.389041011280808e-05, "loss": 1.0172, "step": 2062 }, { "epoch": 0.18433220899323163, "grad_norm": 0.3877794146537781, "learning_rate": 9.388347662369782e-05, "loss": 1.0403, "step": 2063 }, { "epoch": 0.1844215605244935, "grad_norm": 0.4322667419910431, "learning_rate": 9.387653945887864e-05, "loss": 1.0691, "step": 2064 }, { "epoch": 0.18451091205575534, "grad_norm": 0.4271637201309204, "learning_rate": 9.386959861893158e-05, "loss": 1.0122, "step": 2065 }, { "epoch": 0.18460026358701723, "grad_norm": 0.3978729248046875, "learning_rate": 9.386265410443802e-05, "loss": 1.0317, "step": 2066 }, { "epoch": 0.18468961511827908, "grad_norm": 0.40776076912879944, "learning_rate": 9.385570591597966e-05, "loss": 0.9875, "step": 2067 }, { "epoch": 0.18477896664954097, "grad_norm": 0.3995235860347748, "learning_rate": 9.384875405413843e-05, "loss": 0.9726, "step": 2068 }, { "epoch": 0.18486831818080282, "grad_norm": 0.3666049540042877, "learning_rate": 9.384179851949666e-05, "loss": 1.0619, "step": 2069 }, { "epoch": 0.1849576697120647, "grad_norm": 0.37846672534942627, "learning_rate": 9.383483931263695e-05, "loss": 1.0445, "step": 2070 }, { "epoch": 0.18504702124332656, "grad_norm": 0.44983261823654175, "learning_rate": 9.382787643414221e-05, "loss": 0.9651, "step": 2071 }, { "epoch": 0.18513637277458841, "grad_norm": 0.46223461627960205, "learning_rate": 9.382090988459564e-05, "loss": 0.9688, "step": 2072 }, { "epoch": 0.1852257243058503, "grad_norm": 0.3715665638446808, "learning_rate": 9.381393966458077e-05, "loss": 1.0355, "step": 2073 }, { "epoch": 0.18531507583711215, "grad_norm": 0.45313575863838196, "learning_rate": 9.380696577468142e-05, "loss": 0.9863, "step": 2074 }, { "epoch": 0.18540442736837404, "grad_norm": 0.45069244503974915, "learning_rate": 9.379998821548175e-05, "loss": 1.0141, "step": 2075 }, { "epoch": 0.1854937788996359, "grad_norm": 0.3656700551509857, "learning_rate": 9.37930069875662e-05, "loss": 1.0549, "step": 2076 }, { "epoch": 0.18558313043089775, "grad_norm": 0.44431185722351074, "learning_rate": 9.37860220915195e-05, "loss": 1.0339, "step": 2077 }, { "epoch": 0.18567248196215963, "grad_norm": 0.5284201502799988, "learning_rate": 9.377903352792672e-05, "loss": 0.9624, "step": 2078 }, { "epoch": 0.18576183349342149, "grad_norm": 0.3846912682056427, "learning_rate": 9.377204129737325e-05, "loss": 1.0481, "step": 2079 }, { "epoch": 0.18585118502468337, "grad_norm": 0.41815292835235596, "learning_rate": 9.376504540044472e-05, "loss": 1.0371, "step": 2080 }, { "epoch": 0.18594053655594522, "grad_norm": 0.38547441363334656, "learning_rate": 9.375804583772716e-05, "loss": 1.0483, "step": 2081 }, { "epoch": 0.1860298880872071, "grad_norm": 0.4458550810813904, "learning_rate": 9.375104260980683e-05, "loss": 1.0046, "step": 2082 }, { "epoch": 0.18611923961846896, "grad_norm": 0.5194621086120605, "learning_rate": 9.374403571727032e-05, "loss": 1.0876, "step": 2083 }, { "epoch": 0.18620859114973082, "grad_norm": 0.41328129172325134, "learning_rate": 9.373702516070453e-05, "loss": 1.0493, "step": 2084 }, { "epoch": 0.1862979426809927, "grad_norm": 0.4268978536128998, "learning_rate": 9.373001094069671e-05, "loss": 0.998, "step": 2085 }, { "epoch": 0.18638729421225456, "grad_norm": 0.4924493134021759, "learning_rate": 9.372299305783432e-05, "loss": 1.0129, "step": 2086 }, { "epoch": 0.18647664574351644, "grad_norm": 0.49052175879478455, "learning_rate": 9.371597151270521e-05, "loss": 0.9506, "step": 2087 }, { "epoch": 0.1865659972747783, "grad_norm": 0.44871586561203003, "learning_rate": 9.370894630589753e-05, "loss": 0.9917, "step": 2088 }, { "epoch": 0.18665534880604015, "grad_norm": 0.4465029239654541, "learning_rate": 9.370191743799968e-05, "loss": 1.0708, "step": 2089 }, { "epoch": 0.18674470033730203, "grad_norm": 0.42947274446487427, "learning_rate": 9.369488490960042e-05, "loss": 1.0145, "step": 2090 }, { "epoch": 0.1868340518685639, "grad_norm": 0.43883195519447327, "learning_rate": 9.368784872128878e-05, "loss": 1.0238, "step": 2091 }, { "epoch": 0.18692340339982577, "grad_norm": 0.4147821068763733, "learning_rate": 9.368080887365413e-05, "loss": 1.019, "step": 2092 }, { "epoch": 0.18701275493108763, "grad_norm": 0.4450010061264038, "learning_rate": 9.367376536728613e-05, "loss": 0.9557, "step": 2093 }, { "epoch": 0.1871021064623495, "grad_norm": 0.5454333424568176, "learning_rate": 9.366671820277477e-05, "loss": 1.013, "step": 2094 }, { "epoch": 0.18719145799361137, "grad_norm": 0.41965803503990173, "learning_rate": 9.36596673807103e-05, "loss": 0.9919, "step": 2095 }, { "epoch": 0.18728080952487322, "grad_norm": 0.4064487814903259, "learning_rate": 9.365261290168331e-05, "loss": 0.9643, "step": 2096 }, { "epoch": 0.1873701610561351, "grad_norm": 0.46642303466796875, "learning_rate": 9.364555476628467e-05, "loss": 0.9944, "step": 2097 }, { "epoch": 0.18745951258739696, "grad_norm": 0.41478562355041504, "learning_rate": 9.36384929751056e-05, "loss": 1.0993, "step": 2098 }, { "epoch": 0.18754886411865884, "grad_norm": 0.3898516893386841, "learning_rate": 9.36314275287376e-05, "loss": 1.0821, "step": 2099 }, { "epoch": 0.1876382156499207, "grad_norm": 0.5555103421211243, "learning_rate": 9.362435842777246e-05, "loss": 0.8719, "step": 2100 }, { "epoch": 0.18772756718118258, "grad_norm": 0.38932761549949646, "learning_rate": 9.36172856728023e-05, "loss": 1.0611, "step": 2101 }, { "epoch": 0.18781691871244444, "grad_norm": 0.411034494638443, "learning_rate": 9.361020926441955e-05, "loss": 0.9853, "step": 2102 }, { "epoch": 0.1879062702437063, "grad_norm": 0.47794967889785767, "learning_rate": 9.36031292032169e-05, "loss": 1.0755, "step": 2103 }, { "epoch": 0.18799562177496817, "grad_norm": 0.40055814385414124, "learning_rate": 9.359604548978742e-05, "loss": 1.072, "step": 2104 }, { "epoch": 0.18808497330623003, "grad_norm": 0.4187031686306, "learning_rate": 9.358895812472442e-05, "loss": 1.0448, "step": 2105 }, { "epoch": 0.1881743248374919, "grad_norm": 0.39350834488868713, "learning_rate": 9.358186710862156e-05, "loss": 1.0356, "step": 2106 }, { "epoch": 0.18826367636875377, "grad_norm": 0.36614513397216797, "learning_rate": 9.357477244207278e-05, "loss": 1.0865, "step": 2107 }, { "epoch": 0.18835302790001562, "grad_norm": 0.4072873890399933, "learning_rate": 9.356767412567234e-05, "loss": 1.043, "step": 2108 }, { "epoch": 0.1884423794312775, "grad_norm": 0.3540877103805542, "learning_rate": 9.356057216001477e-05, "loss": 1.0828, "step": 2109 }, { "epoch": 0.18853173096253936, "grad_norm": 0.41696932911872864, "learning_rate": 9.355346654569497e-05, "loss": 1.0219, "step": 2110 }, { "epoch": 0.18862108249380125, "grad_norm": 0.5428374409675598, "learning_rate": 9.354635728330811e-05, "loss": 1.0378, "step": 2111 }, { "epoch": 0.1887104340250631, "grad_norm": 0.3925424814224243, "learning_rate": 9.353924437344966e-05, "loss": 1.0572, "step": 2112 }, { "epoch": 0.18879978555632498, "grad_norm": 0.3727796971797943, "learning_rate": 9.353212781671538e-05, "loss": 1.0736, "step": 2113 }, { "epoch": 0.18888913708758684, "grad_norm": 0.4229578971862793, "learning_rate": 9.352500761370139e-05, "loss": 1.0785, "step": 2114 }, { "epoch": 0.1889784886188487, "grad_norm": 0.4206151068210602, "learning_rate": 9.351788376500406e-05, "loss": 1.0187, "step": 2115 }, { "epoch": 0.18906784015011058, "grad_norm": 0.4051399230957031, "learning_rate": 9.35107562712201e-05, "loss": 1.0228, "step": 2116 }, { "epoch": 0.18915719168137243, "grad_norm": 0.3675979971885681, "learning_rate": 9.350362513294651e-05, "loss": 1.024, "step": 2117 }, { "epoch": 0.18924654321263432, "grad_norm": 0.43674102425575256, "learning_rate": 9.349649035078061e-05, "loss": 1.0003, "step": 2118 }, { "epoch": 0.18933589474389617, "grad_norm": 0.4083952009677887, "learning_rate": 9.348935192532001e-05, "loss": 1.0628, "step": 2119 }, { "epoch": 0.18942524627515803, "grad_norm": 0.4974057972431183, "learning_rate": 9.34822098571626e-05, "loss": 1.0283, "step": 2120 }, { "epoch": 0.1895145978064199, "grad_norm": 0.39554762840270996, "learning_rate": 9.347506414690663e-05, "loss": 1.0382, "step": 2121 }, { "epoch": 0.18960394933768177, "grad_norm": 0.40948179364204407, "learning_rate": 9.346791479515063e-05, "loss": 1.0227, "step": 2122 }, { "epoch": 0.18969330086894365, "grad_norm": 0.42662203311920166, "learning_rate": 9.346076180249345e-05, "loss": 1.0179, "step": 2123 }, { "epoch": 0.1897826524002055, "grad_norm": 0.40149882435798645, "learning_rate": 9.345360516953418e-05, "loss": 1.0281, "step": 2124 }, { "epoch": 0.1898720039314674, "grad_norm": 0.49369674921035767, "learning_rate": 9.344644489687233e-05, "loss": 0.9582, "step": 2125 }, { "epoch": 0.18996135546272924, "grad_norm": 0.43468207120895386, "learning_rate": 9.343928098510759e-05, "loss": 1.0257, "step": 2126 }, { "epoch": 0.1900507069939911, "grad_norm": 0.5939388275146484, "learning_rate": 9.343211343484004e-05, "loss": 1.0418, "step": 2127 }, { "epoch": 0.19014005852525298, "grad_norm": 0.42721638083457947, "learning_rate": 9.342494224667004e-05, "loss": 0.9871, "step": 2128 }, { "epoch": 0.19022941005651484, "grad_norm": 0.4589712917804718, "learning_rate": 9.341776742119825e-05, "loss": 0.9894, "step": 2129 }, { "epoch": 0.19031876158777672, "grad_norm": 0.3878852128982544, "learning_rate": 9.341058895902563e-05, "loss": 1.0514, "step": 2130 }, { "epoch": 0.19040811311903857, "grad_norm": 0.47722190618515015, "learning_rate": 9.340340686075349e-05, "loss": 0.9354, "step": 2131 }, { "epoch": 0.19049746465030046, "grad_norm": 0.4715014398097992, "learning_rate": 9.339622112698334e-05, "loss": 1.0487, "step": 2132 }, { "epoch": 0.1905868161815623, "grad_norm": 0.44013863801956177, "learning_rate": 9.338903175831712e-05, "loss": 1.0149, "step": 2133 }, { "epoch": 0.19067616771282417, "grad_norm": 0.4588448107242584, "learning_rate": 9.3381838755357e-05, "loss": 1.0304, "step": 2134 }, { "epoch": 0.19076551924408605, "grad_norm": 0.47280123829841614, "learning_rate": 9.337464211870546e-05, "loss": 1.0329, "step": 2135 }, { "epoch": 0.1908548707753479, "grad_norm": 0.4202229380607605, "learning_rate": 9.336744184896531e-05, "loss": 1.1011, "step": 2136 }, { "epoch": 0.1909442223066098, "grad_norm": 0.3864685297012329, "learning_rate": 9.336023794673962e-05, "loss": 1.0398, "step": 2137 }, { "epoch": 0.19103357383787165, "grad_norm": 0.3656110465526581, "learning_rate": 9.335303041263183e-05, "loss": 0.9896, "step": 2138 }, { "epoch": 0.1911229253691335, "grad_norm": 0.3890793025493622, "learning_rate": 9.334581924724564e-05, "loss": 1.0371, "step": 2139 }, { "epoch": 0.19121227690039538, "grad_norm": 0.4095704257488251, "learning_rate": 9.333860445118505e-05, "loss": 1.0538, "step": 2140 }, { "epoch": 0.19130162843165724, "grad_norm": 0.3988470733165741, "learning_rate": 9.333138602505437e-05, "loss": 0.9512, "step": 2141 }, { "epoch": 0.19139097996291912, "grad_norm": 0.42787396907806396, "learning_rate": 9.332416396945824e-05, "loss": 1.0253, "step": 2142 }, { "epoch": 0.19148033149418098, "grad_norm": 0.44595029950141907, "learning_rate": 9.331693828500159e-05, "loss": 1.005, "step": 2143 }, { "epoch": 0.19156968302544286, "grad_norm": 0.44697070121765137, "learning_rate": 9.33097089722896e-05, "loss": 0.9655, "step": 2144 }, { "epoch": 0.19165903455670472, "grad_norm": 0.45841777324676514, "learning_rate": 9.330247603192786e-05, "loss": 1.0242, "step": 2145 }, { "epoch": 0.19174838608796657, "grad_norm": 0.4379693865776062, "learning_rate": 9.32952394645222e-05, "loss": 1.0129, "step": 2146 }, { "epoch": 0.19183773761922845, "grad_norm": 0.3796353042125702, "learning_rate": 9.32879992706787e-05, "loss": 1.0516, "step": 2147 }, { "epoch": 0.1919270891504903, "grad_norm": 0.5354682207107544, "learning_rate": 9.328075545100385e-05, "loss": 0.85, "step": 2148 }, { "epoch": 0.1920164406817522, "grad_norm": 0.5834673643112183, "learning_rate": 9.32735080061044e-05, "loss": 1.0113, "step": 2149 }, { "epoch": 0.19210579221301405, "grad_norm": 0.46997979283332825, "learning_rate": 9.326625693658739e-05, "loss": 1.0355, "step": 2150 }, { "epoch": 0.19219514374427593, "grad_norm": 0.47971826791763306, "learning_rate": 9.325900224306019e-05, "loss": 0.9207, "step": 2151 }, { "epoch": 0.1922844952755378, "grad_norm": 0.49349215626716614, "learning_rate": 9.325174392613043e-05, "loss": 0.9519, "step": 2152 }, { "epoch": 0.19237384680679964, "grad_norm": 0.603679895401001, "learning_rate": 9.32444819864061e-05, "loss": 0.9713, "step": 2153 }, { "epoch": 0.19246319833806153, "grad_norm": 0.44981256127357483, "learning_rate": 9.323721642449543e-05, "loss": 0.9577, "step": 2154 }, { "epoch": 0.19255254986932338, "grad_norm": 0.3963879644870758, "learning_rate": 9.322994724100702e-05, "loss": 1.0324, "step": 2155 }, { "epoch": 0.19264190140058526, "grad_norm": 0.4557402729988098, "learning_rate": 9.322267443654972e-05, "loss": 1.042, "step": 2156 }, { "epoch": 0.19273125293184712, "grad_norm": 0.4886689782142639, "learning_rate": 9.321539801173274e-05, "loss": 1.01, "step": 2157 }, { "epoch": 0.19282060446310897, "grad_norm": 0.416227787733078, "learning_rate": 9.320811796716552e-05, "loss": 0.9619, "step": 2158 }, { "epoch": 0.19290995599437086, "grad_norm": 0.44444751739501953, "learning_rate": 9.320083430345785e-05, "loss": 1.0472, "step": 2159 }, { "epoch": 0.1929993075256327, "grad_norm": 0.4092031717300415, "learning_rate": 9.319354702121981e-05, "loss": 1.0692, "step": 2160 }, { "epoch": 0.1930886590568946, "grad_norm": 0.3944936692714691, "learning_rate": 9.318625612106182e-05, "loss": 1.0244, "step": 2161 }, { "epoch": 0.19317801058815645, "grad_norm": 0.457409530878067, "learning_rate": 9.317896160359454e-05, "loss": 1.1001, "step": 2162 }, { "epoch": 0.19326736211941833, "grad_norm": 0.4606925845146179, "learning_rate": 9.317166346942897e-05, "loss": 1.0092, "step": 2163 }, { "epoch": 0.1933567136506802, "grad_norm": 0.458124041557312, "learning_rate": 9.31643617191764e-05, "loss": 1.0144, "step": 2164 }, { "epoch": 0.19344606518194205, "grad_norm": 0.3879139721393585, "learning_rate": 9.315705635344844e-05, "loss": 0.9889, "step": 2165 }, { "epoch": 0.19353541671320393, "grad_norm": 0.3774491548538208, "learning_rate": 9.314974737285699e-05, "loss": 1.0031, "step": 2166 }, { "epoch": 0.19362476824446578, "grad_norm": 0.38061612844467163, "learning_rate": 9.314243477801425e-05, "loss": 0.9962, "step": 2167 }, { "epoch": 0.19371411977572767, "grad_norm": 0.4952597916126251, "learning_rate": 9.313511856953274e-05, "loss": 1.0836, "step": 2168 }, { "epoch": 0.19380347130698952, "grad_norm": 0.41984468698501587, "learning_rate": 9.312779874802526e-05, "loss": 1.0027, "step": 2169 }, { "epoch": 0.19389282283825138, "grad_norm": 0.47165659070014954, "learning_rate": 9.312047531410493e-05, "loss": 1.0573, "step": 2170 }, { "epoch": 0.19398217436951326, "grad_norm": 0.39716780185699463, "learning_rate": 9.311314826838515e-05, "loss": 1.0426, "step": 2171 }, { "epoch": 0.19407152590077512, "grad_norm": 0.4184114336967468, "learning_rate": 9.310581761147966e-05, "loss": 1.0309, "step": 2172 }, { "epoch": 0.194160877432037, "grad_norm": 0.4498063027858734, "learning_rate": 9.309848334400246e-05, "loss": 0.9592, "step": 2173 }, { "epoch": 0.19425022896329885, "grad_norm": 0.4691358208656311, "learning_rate": 9.30911454665679e-05, "loss": 1.0557, "step": 2174 }, { "epoch": 0.19433958049456074, "grad_norm": 0.4362775981426239, "learning_rate": 9.308380397979057e-05, "loss": 1.1005, "step": 2175 }, { "epoch": 0.1944289320258226, "grad_norm": 0.39508792757987976, "learning_rate": 9.307645888428542e-05, "loss": 1.0745, "step": 2176 }, { "epoch": 0.19451828355708445, "grad_norm": 0.4104553461074829, "learning_rate": 9.306911018066769e-05, "loss": 1.0801, "step": 2177 }, { "epoch": 0.19460763508834633, "grad_norm": 0.4288639426231384, "learning_rate": 9.306175786955289e-05, "loss": 1.1468, "step": 2178 }, { "epoch": 0.1946969866196082, "grad_norm": 0.4734762907028198, "learning_rate": 9.305440195155686e-05, "loss": 1.0742, "step": 2179 }, { "epoch": 0.19478633815087007, "grad_norm": 0.45509761571884155, "learning_rate": 9.304704242729575e-05, "loss": 0.9943, "step": 2180 }, { "epoch": 0.19487568968213193, "grad_norm": 0.412614643573761, "learning_rate": 9.303967929738598e-05, "loss": 1.0259, "step": 2181 }, { "epoch": 0.1949650412133938, "grad_norm": 0.38879382610321045, "learning_rate": 9.30323125624443e-05, "loss": 1.0245, "step": 2182 }, { "epoch": 0.19505439274465566, "grad_norm": 0.459376722574234, "learning_rate": 9.302494222308774e-05, "loss": 1.0003, "step": 2183 }, { "epoch": 0.19514374427591752, "grad_norm": 0.41094890236854553, "learning_rate": 9.301756827993367e-05, "loss": 0.9735, "step": 2184 }, { "epoch": 0.1952330958071794, "grad_norm": 0.4334050118923187, "learning_rate": 9.301019073359972e-05, "loss": 0.9722, "step": 2185 }, { "epoch": 0.19532244733844126, "grad_norm": 0.43469616770744324, "learning_rate": 9.300280958470384e-05, "loss": 1.0839, "step": 2186 }, { "epoch": 0.19541179886970314, "grad_norm": 0.45029255747795105, "learning_rate": 9.299542483386428e-05, "loss": 0.9987, "step": 2187 }, { "epoch": 0.195501150400965, "grad_norm": 0.42933982610702515, "learning_rate": 9.298803648169958e-05, "loss": 1.0202, "step": 2188 }, { "epoch": 0.19559050193222685, "grad_norm": 0.37860357761383057, "learning_rate": 9.298064452882862e-05, "loss": 1.053, "step": 2189 }, { "epoch": 0.19567985346348873, "grad_norm": 0.43233370780944824, "learning_rate": 9.297324897587054e-05, "loss": 1.07, "step": 2190 }, { "epoch": 0.1957692049947506, "grad_norm": 0.4745257794857025, "learning_rate": 9.296584982344478e-05, "loss": 0.951, "step": 2191 }, { "epoch": 0.19585855652601247, "grad_norm": 0.453050434589386, "learning_rate": 9.295844707217114e-05, "loss": 0.9839, "step": 2192 }, { "epoch": 0.19594790805727433, "grad_norm": 0.4505593776702881, "learning_rate": 9.295104072266965e-05, "loss": 1.0556, "step": 2193 }, { "epoch": 0.1960372595885362, "grad_norm": 0.41230955719947815, "learning_rate": 9.294363077556066e-05, "loss": 1.0569, "step": 2194 }, { "epoch": 0.19612661111979807, "grad_norm": 0.38011908531188965, "learning_rate": 9.293621723146485e-05, "loss": 1.0311, "step": 2195 }, { "epoch": 0.19621596265105992, "grad_norm": 0.37792640924453735, "learning_rate": 9.292880009100318e-05, "loss": 1.0998, "step": 2196 }, { "epoch": 0.1963053141823218, "grad_norm": 0.42640724778175354, "learning_rate": 9.292137935479692e-05, "loss": 1.0311, "step": 2197 }, { "epoch": 0.19639466571358366, "grad_norm": 0.39782777428627014, "learning_rate": 9.291395502346763e-05, "loss": 1.035, "step": 2198 }, { "epoch": 0.19648401724484554, "grad_norm": 0.39150485396385193, "learning_rate": 9.290652709763717e-05, "loss": 1.0524, "step": 2199 }, { "epoch": 0.1965733687761074, "grad_norm": 0.41855573654174805, "learning_rate": 9.289909557792771e-05, "loss": 1.05, "step": 2200 }, { "epoch": 0.19666272030736925, "grad_norm": 0.4688778817653656, "learning_rate": 9.289166046496172e-05, "loss": 1.0219, "step": 2201 }, { "epoch": 0.19675207183863114, "grad_norm": 0.4490063488483429, "learning_rate": 9.2884221759362e-05, "loss": 0.9859, "step": 2202 }, { "epoch": 0.196841423369893, "grad_norm": 0.37221378087997437, "learning_rate": 9.287677946175157e-05, "loss": 1.0359, "step": 2203 }, { "epoch": 0.19693077490115488, "grad_norm": 0.5122733116149902, "learning_rate": 9.286933357275385e-05, "loss": 0.9099, "step": 2204 }, { "epoch": 0.19702012643241673, "grad_norm": 0.48560839891433716, "learning_rate": 9.286188409299246e-05, "loss": 0.9573, "step": 2205 }, { "epoch": 0.19710947796367861, "grad_norm": 0.42828473448753357, "learning_rate": 9.285443102309142e-05, "loss": 1.0508, "step": 2206 }, { "epoch": 0.19719882949494047, "grad_norm": 0.417341947555542, "learning_rate": 9.284697436367497e-05, "loss": 1.085, "step": 2207 }, { "epoch": 0.19728818102620232, "grad_norm": 0.4165228307247162, "learning_rate": 9.283951411536773e-05, "loss": 1.1126, "step": 2208 }, { "epoch": 0.1973775325574642, "grad_norm": 0.4660586714744568, "learning_rate": 9.283205027879454e-05, "loss": 0.9542, "step": 2209 }, { "epoch": 0.19746688408872606, "grad_norm": 0.41941720247268677, "learning_rate": 9.282458285458055e-05, "loss": 1.0903, "step": 2210 }, { "epoch": 0.19755623561998795, "grad_norm": 0.4800897538661957, "learning_rate": 9.281711184335131e-05, "loss": 0.9575, "step": 2211 }, { "epoch": 0.1976455871512498, "grad_norm": 0.39509686827659607, "learning_rate": 9.280963724573253e-05, "loss": 1.0522, "step": 2212 }, { "epoch": 0.19773493868251169, "grad_norm": 0.4314636290073395, "learning_rate": 9.280215906235032e-05, "loss": 1.0846, "step": 2213 }, { "epoch": 0.19782429021377354, "grad_norm": 0.39220911264419556, "learning_rate": 9.279467729383105e-05, "loss": 0.9884, "step": 2214 }, { "epoch": 0.1979136417450354, "grad_norm": 0.45912548899650574, "learning_rate": 9.27871919408014e-05, "loss": 0.984, "step": 2215 }, { "epoch": 0.19800299327629728, "grad_norm": 0.48036208748817444, "learning_rate": 9.277970300388834e-05, "loss": 0.9564, "step": 2216 }, { "epoch": 0.19809234480755913, "grad_norm": 0.5072821974754333, "learning_rate": 9.277221048371917e-05, "loss": 0.911, "step": 2217 }, { "epoch": 0.19818169633882102, "grad_norm": 0.48071086406707764, "learning_rate": 9.276471438092145e-05, "loss": 1.0484, "step": 2218 }, { "epoch": 0.19827104787008287, "grad_norm": 0.5344045758247375, "learning_rate": 9.275721469612304e-05, "loss": 0.8887, "step": 2219 }, { "epoch": 0.19836039940134473, "grad_norm": 0.43411949276924133, "learning_rate": 9.274971142995216e-05, "loss": 1.0001, "step": 2220 }, { "epoch": 0.1984497509326066, "grad_norm": 0.4209814667701721, "learning_rate": 9.274220458303727e-05, "loss": 1.0129, "step": 2221 }, { "epoch": 0.19853910246386847, "grad_norm": 0.33118653297424316, "learning_rate": 9.273469415600713e-05, "loss": 1.0303, "step": 2222 }, { "epoch": 0.19862845399513035, "grad_norm": 0.4255508780479431, "learning_rate": 9.272718014949085e-05, "loss": 0.9976, "step": 2223 }, { "epoch": 0.1987178055263922, "grad_norm": 0.4046938121318817, "learning_rate": 9.271966256411779e-05, "loss": 1.0221, "step": 2224 }, { "epoch": 0.1988071570576541, "grad_norm": 0.4717459976673126, "learning_rate": 9.271214140051763e-05, "loss": 0.9862, "step": 2225 }, { "epoch": 0.19889650858891594, "grad_norm": 0.4104228615760803, "learning_rate": 9.270461665932034e-05, "loss": 1.024, "step": 2226 }, { "epoch": 0.1989858601201778, "grad_norm": 0.41744041442871094, "learning_rate": 9.269708834115622e-05, "loss": 1.0642, "step": 2227 }, { "epoch": 0.19907521165143968, "grad_norm": 0.4957825541496277, "learning_rate": 9.268955644665582e-05, "loss": 0.978, "step": 2228 }, { "epoch": 0.19916456318270154, "grad_norm": 0.4245834946632385, "learning_rate": 9.268202097645005e-05, "loss": 0.9519, "step": 2229 }, { "epoch": 0.19925391471396342, "grad_norm": 0.4112485647201538, "learning_rate": 9.267448193117005e-05, "loss": 1.0033, "step": 2230 }, { "epoch": 0.19934326624522528, "grad_norm": 0.45706307888031006, "learning_rate": 9.266693931144732e-05, "loss": 1.0904, "step": 2231 }, { "epoch": 0.19943261777648713, "grad_norm": 0.44358548521995544, "learning_rate": 9.265939311791362e-05, "loss": 0.9685, "step": 2232 }, { "epoch": 0.19952196930774901, "grad_norm": 0.4372340440750122, "learning_rate": 9.265184335120103e-05, "loss": 1.0331, "step": 2233 }, { "epoch": 0.19961132083901087, "grad_norm": 0.5012243986129761, "learning_rate": 9.264429001194193e-05, "loss": 0.9479, "step": 2234 }, { "epoch": 0.19970067237027275, "grad_norm": 0.45009467005729675, "learning_rate": 9.263673310076897e-05, "loss": 0.9737, "step": 2235 }, { "epoch": 0.1997900239015346, "grad_norm": 0.3963136076927185, "learning_rate": 9.262917261831515e-05, "loss": 1.0775, "step": 2236 }, { "epoch": 0.1998793754327965, "grad_norm": 0.4253182113170624, "learning_rate": 9.262160856521372e-05, "loss": 1.013, "step": 2237 }, { "epoch": 0.19996872696405835, "grad_norm": 0.40562012791633606, "learning_rate": 9.261404094209827e-05, "loss": 1.0651, "step": 2238 }, { "epoch": 0.2000580784953202, "grad_norm": 0.42175614833831787, "learning_rate": 9.260646974960265e-05, "loss": 1.052, "step": 2239 }, { "epoch": 0.20014743002658208, "grad_norm": 0.3817167580127716, "learning_rate": 9.259889498836105e-05, "loss": 1.0804, "step": 2240 }, { "epoch": 0.20023678155784394, "grad_norm": 0.42921021580696106, "learning_rate": 9.259131665900792e-05, "loss": 0.9756, "step": 2241 }, { "epoch": 0.20032613308910582, "grad_norm": 0.4635266363620758, "learning_rate": 9.258373476217801e-05, "loss": 0.9926, "step": 2242 }, { "epoch": 0.20041548462036768, "grad_norm": 0.45946842432022095, "learning_rate": 9.257614929850642e-05, "loss": 0.9873, "step": 2243 }, { "epoch": 0.20050483615162956, "grad_norm": 0.4113311767578125, "learning_rate": 9.256856026862847e-05, "loss": 1.0364, "step": 2244 }, { "epoch": 0.20059418768289142, "grad_norm": 0.38544759154319763, "learning_rate": 9.256096767317989e-05, "loss": 1.0698, "step": 2245 }, { "epoch": 0.20068353921415327, "grad_norm": 0.483548104763031, "learning_rate": 9.255337151279658e-05, "loss": 0.9635, "step": 2246 }, { "epoch": 0.20077289074541516, "grad_norm": 0.41710737347602844, "learning_rate": 9.254577178811482e-05, "loss": 0.9602, "step": 2247 }, { "epoch": 0.200862242276677, "grad_norm": 0.41707658767700195, "learning_rate": 9.253816849977117e-05, "loss": 1.0529, "step": 2248 }, { "epoch": 0.2009515938079389, "grad_norm": 0.49464061856269836, "learning_rate": 9.253056164840248e-05, "loss": 1.0603, "step": 2249 }, { "epoch": 0.20104094533920075, "grad_norm": 0.4921278655529022, "learning_rate": 9.252295123464592e-05, "loss": 1.0333, "step": 2250 }, { "epoch": 0.2011302968704626, "grad_norm": 0.41099363565444946, "learning_rate": 9.251533725913893e-05, "loss": 0.9884, "step": 2251 }, { "epoch": 0.2012196484017245, "grad_norm": 0.40399447083473206, "learning_rate": 9.250771972251925e-05, "loss": 0.9832, "step": 2252 }, { "epoch": 0.20130899993298634, "grad_norm": 0.39591044187545776, "learning_rate": 9.250009862542495e-05, "loss": 1.0229, "step": 2253 }, { "epoch": 0.20139835146424823, "grad_norm": 0.4617488980293274, "learning_rate": 9.249247396849437e-05, "loss": 0.9989, "step": 2254 }, { "epoch": 0.20148770299551008, "grad_norm": 0.3816708028316498, "learning_rate": 9.248484575236616e-05, "loss": 1.1098, "step": 2255 }, { "epoch": 0.20157705452677196, "grad_norm": 0.43088236451148987, "learning_rate": 9.247721397767926e-05, "loss": 0.9852, "step": 2256 }, { "epoch": 0.20166640605803382, "grad_norm": 0.39949992299079895, "learning_rate": 9.246957864507292e-05, "loss": 1.0444, "step": 2257 }, { "epoch": 0.20175575758929568, "grad_norm": 0.5272142291069031, "learning_rate": 9.246193975518667e-05, "loss": 0.9864, "step": 2258 }, { "epoch": 0.20184510912055756, "grad_norm": 0.44182446599006653, "learning_rate": 9.245429730866035e-05, "loss": 0.9412, "step": 2259 }, { "epoch": 0.20193446065181941, "grad_norm": 0.4208958148956299, "learning_rate": 9.244665130613411e-05, "loss": 1.0687, "step": 2260 }, { "epoch": 0.2020238121830813, "grad_norm": 0.5523428916931152, "learning_rate": 9.243900174824838e-05, "loss": 0.9329, "step": 2261 }, { "epoch": 0.20211316371434315, "grad_norm": 0.3945762813091278, "learning_rate": 9.243134863564387e-05, "loss": 1.0095, "step": 2262 }, { "epoch": 0.202202515245605, "grad_norm": 0.5793361067771912, "learning_rate": 9.242369196896163e-05, "loss": 0.9279, "step": 2263 }, { "epoch": 0.2022918667768669, "grad_norm": 0.4429938495159149, "learning_rate": 9.241603174884299e-05, "loss": 1.0672, "step": 2264 }, { "epoch": 0.20238121830812875, "grad_norm": 0.4614643454551697, "learning_rate": 9.240836797592958e-05, "loss": 1.0062, "step": 2265 }, { "epoch": 0.20247056983939063, "grad_norm": 0.4438598155975342, "learning_rate": 9.240070065086328e-05, "loss": 0.9787, "step": 2266 }, { "epoch": 0.20255992137065248, "grad_norm": 0.3669438362121582, "learning_rate": 9.239302977428637e-05, "loss": 1.0367, "step": 2267 }, { "epoch": 0.20264927290191437, "grad_norm": 0.43813052773475647, "learning_rate": 9.238535534684131e-05, "loss": 0.9767, "step": 2268 }, { "epoch": 0.20273862443317622, "grad_norm": 0.4687936007976532, "learning_rate": 9.237767736917098e-05, "loss": 1.0335, "step": 2269 }, { "epoch": 0.20282797596443808, "grad_norm": 0.3983701467514038, "learning_rate": 9.236999584191843e-05, "loss": 1.0086, "step": 2270 }, { "epoch": 0.20291732749569996, "grad_norm": 0.4753643870353699, "learning_rate": 9.23623107657271e-05, "loss": 0.9726, "step": 2271 }, { "epoch": 0.20300667902696182, "grad_norm": 0.4003112316131592, "learning_rate": 9.235462214124071e-05, "loss": 1.0838, "step": 2272 }, { "epoch": 0.2030960305582237, "grad_norm": 0.4177161157131195, "learning_rate": 9.234692996910323e-05, "loss": 1.0637, "step": 2273 }, { "epoch": 0.20318538208948556, "grad_norm": 0.48143237829208374, "learning_rate": 9.233923424995899e-05, "loss": 0.9929, "step": 2274 }, { "epoch": 0.20327473362074744, "grad_norm": 0.4232881963253021, "learning_rate": 9.233153498445258e-05, "loss": 1.0583, "step": 2275 }, { "epoch": 0.2033640851520093, "grad_norm": 0.47992992401123047, "learning_rate": 9.232383217322889e-05, "loss": 0.9638, "step": 2276 }, { "epoch": 0.20345343668327115, "grad_norm": 0.4345097839832306, "learning_rate": 9.23161258169331e-05, "loss": 1.0855, "step": 2277 }, { "epoch": 0.20354278821453303, "grad_norm": 0.45901912450790405, "learning_rate": 9.230841591621073e-05, "loss": 1.0315, "step": 2278 }, { "epoch": 0.2036321397457949, "grad_norm": 0.45470988750457764, "learning_rate": 9.230070247170755e-05, "loss": 1.0359, "step": 2279 }, { "epoch": 0.20372149127705677, "grad_norm": 0.4291987419128418, "learning_rate": 9.229298548406964e-05, "loss": 1.0296, "step": 2280 }, { "epoch": 0.20381084280831863, "grad_norm": 0.3951917588710785, "learning_rate": 9.228526495394339e-05, "loss": 0.9465, "step": 2281 }, { "epoch": 0.20390019433958048, "grad_norm": 0.3695099353790283, "learning_rate": 9.227754088197548e-05, "loss": 1.03, "step": 2282 }, { "epoch": 0.20398954587084236, "grad_norm": 0.40235844254493713, "learning_rate": 9.226981326881286e-05, "loss": 1.0398, "step": 2283 }, { "epoch": 0.20407889740210422, "grad_norm": 0.42109382152557373, "learning_rate": 9.226208211510282e-05, "loss": 1.0524, "step": 2284 }, { "epoch": 0.2041682489333661, "grad_norm": 0.4391646981239319, "learning_rate": 9.225434742149293e-05, "loss": 1.0501, "step": 2285 }, { "epoch": 0.20425760046462796, "grad_norm": 0.45787525177001953, "learning_rate": 9.224660918863104e-05, "loss": 0.9669, "step": 2286 }, { "epoch": 0.20434695199588984, "grad_norm": 0.3951761722564697, "learning_rate": 9.22388674171653e-05, "loss": 0.9985, "step": 2287 }, { "epoch": 0.2044363035271517, "grad_norm": 0.5199114680290222, "learning_rate": 9.22311221077442e-05, "loss": 0.946, "step": 2288 }, { "epoch": 0.20452565505841355, "grad_norm": 0.41517964005470276, "learning_rate": 9.222337326101647e-05, "loss": 1.0267, "step": 2289 }, { "epoch": 0.20461500658967544, "grad_norm": 0.45859935879707336, "learning_rate": 9.221562087763114e-05, "loss": 0.9967, "step": 2290 }, { "epoch": 0.2047043581209373, "grad_norm": 0.4338325560092926, "learning_rate": 9.220786495823758e-05, "loss": 1.023, "step": 2291 }, { "epoch": 0.20479370965219917, "grad_norm": 0.41822728514671326, "learning_rate": 9.220010550348544e-05, "loss": 1.0318, "step": 2292 }, { "epoch": 0.20488306118346103, "grad_norm": 0.4380994141101837, "learning_rate": 9.219234251402464e-05, "loss": 1.0465, "step": 2293 }, { "epoch": 0.20497241271472288, "grad_norm": 0.4628269672393799, "learning_rate": 9.218457599050542e-05, "loss": 0.9637, "step": 2294 }, { "epoch": 0.20506176424598477, "grad_norm": 0.41425222158432007, "learning_rate": 9.217680593357829e-05, "loss": 1.0134, "step": 2295 }, { "epoch": 0.20515111577724662, "grad_norm": 0.33953461050987244, "learning_rate": 9.216903234389412e-05, "loss": 1.0589, "step": 2296 }, { "epoch": 0.2052404673085085, "grad_norm": 0.42335033416748047, "learning_rate": 9.216125522210398e-05, "loss": 0.9652, "step": 2297 }, { "epoch": 0.20532981883977036, "grad_norm": 0.39172297716140747, "learning_rate": 9.21534745688593e-05, "loss": 1.0338, "step": 2298 }, { "epoch": 0.20541917037103224, "grad_norm": 0.4049052298069, "learning_rate": 9.214569038481183e-05, "loss": 0.9537, "step": 2299 }, { "epoch": 0.2055085219022941, "grad_norm": 0.4806058406829834, "learning_rate": 9.213790267061352e-05, "loss": 0.9846, "step": 2300 }, { "epoch": 0.20559787343355596, "grad_norm": 0.437541663646698, "learning_rate": 9.213011142691671e-05, "loss": 0.9776, "step": 2301 }, { "epoch": 0.20568722496481784, "grad_norm": 0.42535480856895447, "learning_rate": 9.2122316654374e-05, "loss": 0.9601, "step": 2302 }, { "epoch": 0.2057765764960797, "grad_norm": 0.3970091640949249, "learning_rate": 9.211451835363828e-05, "loss": 0.9903, "step": 2303 }, { "epoch": 0.20586592802734158, "grad_norm": 0.4786219894886017, "learning_rate": 9.210671652536274e-05, "loss": 0.9416, "step": 2304 }, { "epoch": 0.20595527955860343, "grad_norm": 0.4173687696456909, "learning_rate": 9.209891117020087e-05, "loss": 1.0231, "step": 2305 }, { "epoch": 0.20604463108986532, "grad_norm": 0.49292880296707153, "learning_rate": 9.209110228880642e-05, "loss": 0.8597, "step": 2306 }, { "epoch": 0.20613398262112717, "grad_norm": 0.4839416742324829, "learning_rate": 9.208328988183352e-05, "loss": 1.0004, "step": 2307 }, { "epoch": 0.20622333415238903, "grad_norm": 0.5510335564613342, "learning_rate": 9.207547394993651e-05, "loss": 0.9456, "step": 2308 }, { "epoch": 0.2063126856836509, "grad_norm": 0.3770674169063568, "learning_rate": 9.206765449377006e-05, "loss": 1.0004, "step": 2309 }, { "epoch": 0.20640203721491276, "grad_norm": 0.43098023533821106, "learning_rate": 9.205983151398915e-05, "loss": 1.0377, "step": 2310 }, { "epoch": 0.20649138874617465, "grad_norm": 0.4793153405189514, "learning_rate": 9.205200501124902e-05, "loss": 0.9746, "step": 2311 }, { "epoch": 0.2065807402774365, "grad_norm": 0.5100858807563782, "learning_rate": 9.204417498620522e-05, "loss": 1.021, "step": 2312 }, { "epoch": 0.20667009180869836, "grad_norm": 0.42289164662361145, "learning_rate": 9.203634143951361e-05, "loss": 0.9899, "step": 2313 }, { "epoch": 0.20675944333996024, "grad_norm": 0.4890194237232208, "learning_rate": 9.202850437183033e-05, "loss": 0.94, "step": 2314 }, { "epoch": 0.2068487948712221, "grad_norm": 0.3784908652305603, "learning_rate": 9.202066378381183e-05, "loss": 1.0157, "step": 2315 }, { "epoch": 0.20693814640248398, "grad_norm": 0.4029442369937897, "learning_rate": 9.201281967611481e-05, "loss": 1.0325, "step": 2316 }, { "epoch": 0.20702749793374584, "grad_norm": 0.413496732711792, "learning_rate": 9.200497204939633e-05, "loss": 1.0768, "step": 2317 }, { "epoch": 0.20711684946500772, "grad_norm": 0.4051424562931061, "learning_rate": 9.19971209043137e-05, "loss": 1.0192, "step": 2318 }, { "epoch": 0.20720620099626957, "grad_norm": 0.399827241897583, "learning_rate": 9.198926624152453e-05, "loss": 1.0137, "step": 2319 }, { "epoch": 0.20729555252753143, "grad_norm": 0.41111016273498535, "learning_rate": 9.198140806168673e-05, "loss": 1.0135, "step": 2320 }, { "epoch": 0.2073849040587933, "grad_norm": 0.49021974205970764, "learning_rate": 9.197354636545853e-05, "loss": 1.0126, "step": 2321 }, { "epoch": 0.20747425559005517, "grad_norm": 0.37917208671569824, "learning_rate": 9.196568115349842e-05, "loss": 1.0455, "step": 2322 }, { "epoch": 0.20756360712131705, "grad_norm": 0.4736994802951813, "learning_rate": 9.195781242646517e-05, "loss": 1.0129, "step": 2323 }, { "epoch": 0.2076529586525789, "grad_norm": 0.444503515958786, "learning_rate": 9.194994018501793e-05, "loss": 0.998, "step": 2324 }, { "epoch": 0.2077423101838408, "grad_norm": 0.4442484378814697, "learning_rate": 9.194206442981601e-05, "loss": 0.9962, "step": 2325 }, { "epoch": 0.20783166171510264, "grad_norm": 0.48136886954307556, "learning_rate": 9.193418516151912e-05, "loss": 1.0792, "step": 2326 }, { "epoch": 0.2079210132463645, "grad_norm": 0.4007897675037384, "learning_rate": 9.192630238078725e-05, "loss": 0.9787, "step": 2327 }, { "epoch": 0.20801036477762638, "grad_norm": 0.42562204599380493, "learning_rate": 9.191841608828066e-05, "loss": 1.0511, "step": 2328 }, { "epoch": 0.20809971630888824, "grad_norm": 0.4017636179924011, "learning_rate": 9.19105262846599e-05, "loss": 1.0076, "step": 2329 }, { "epoch": 0.20818906784015012, "grad_norm": 0.4797520637512207, "learning_rate": 9.190263297058583e-05, "loss": 1.0116, "step": 2330 }, { "epoch": 0.20827841937141198, "grad_norm": 0.37784191966056824, "learning_rate": 9.189473614671959e-05, "loss": 1.0355, "step": 2331 }, { "epoch": 0.20836777090267383, "grad_norm": 0.43021056056022644, "learning_rate": 9.188683581372264e-05, "loss": 1.0424, "step": 2332 }, { "epoch": 0.20845712243393572, "grad_norm": 0.3950195610523224, "learning_rate": 9.187893197225672e-05, "loss": 1.0493, "step": 2333 }, { "epoch": 0.20854647396519757, "grad_norm": 0.40289580821990967, "learning_rate": 9.187102462298384e-05, "loss": 1.0145, "step": 2334 }, { "epoch": 0.20863582549645945, "grad_norm": 0.3758835196495056, "learning_rate": 9.186311376656633e-05, "loss": 1.0352, "step": 2335 }, { "epoch": 0.2087251770277213, "grad_norm": 0.44723740220069885, "learning_rate": 9.185519940366682e-05, "loss": 1.0553, "step": 2336 }, { "epoch": 0.2088145285589832, "grad_norm": 0.4580692946910858, "learning_rate": 9.18472815349482e-05, "loss": 1.0657, "step": 2337 }, { "epoch": 0.20890388009024505, "grad_norm": 0.5509561896324158, "learning_rate": 9.183936016107371e-05, "loss": 0.9759, "step": 2338 }, { "epoch": 0.2089932316215069, "grad_norm": 0.5175040364265442, "learning_rate": 9.183143528270682e-05, "loss": 1.0432, "step": 2339 }, { "epoch": 0.2090825831527688, "grad_norm": 0.40656107664108276, "learning_rate": 9.182350690051133e-05, "loss": 1.0077, "step": 2340 }, { "epoch": 0.20917193468403064, "grad_norm": 0.44654014706611633, "learning_rate": 9.181557501515134e-05, "loss": 1.0218, "step": 2341 }, { "epoch": 0.20926128621529252, "grad_norm": 0.4651091396808624, "learning_rate": 9.180763962729123e-05, "loss": 1.0464, "step": 2342 }, { "epoch": 0.20935063774655438, "grad_norm": 0.4404200613498688, "learning_rate": 9.179970073759565e-05, "loss": 1.0241, "step": 2343 }, { "epoch": 0.20943998927781624, "grad_norm": 0.5655425190925598, "learning_rate": 9.17917583467296e-05, "loss": 0.9452, "step": 2344 }, { "epoch": 0.20952934080907812, "grad_norm": 0.442924827337265, "learning_rate": 9.178381245535829e-05, "loss": 1.0046, "step": 2345 }, { "epoch": 0.20961869234033997, "grad_norm": 0.40375635027885437, "learning_rate": 9.177586306414731e-05, "loss": 1.0673, "step": 2346 }, { "epoch": 0.20970804387160186, "grad_norm": 0.46348804235458374, "learning_rate": 9.176791017376252e-05, "loss": 1.0156, "step": 2347 }, { "epoch": 0.2097973954028637, "grad_norm": 0.41273656487464905, "learning_rate": 9.175995378487003e-05, "loss": 1.0553, "step": 2348 }, { "epoch": 0.2098867469341256, "grad_norm": 0.37597787380218506, "learning_rate": 9.175199389813627e-05, "loss": 0.9665, "step": 2349 }, { "epoch": 0.20997609846538745, "grad_norm": 0.4291061758995056, "learning_rate": 9.174403051422798e-05, "loss": 0.9993, "step": 2350 }, { "epoch": 0.2100654499966493, "grad_norm": 0.363323837518692, "learning_rate": 9.173606363381219e-05, "loss": 1.0601, "step": 2351 }, { "epoch": 0.2101548015279112, "grad_norm": 0.4082275629043579, "learning_rate": 9.172809325755618e-05, "loss": 1.0879, "step": 2352 }, { "epoch": 0.21024415305917304, "grad_norm": 0.41467830538749695, "learning_rate": 9.172011938612757e-05, "loss": 1.0513, "step": 2353 }, { "epoch": 0.21033350459043493, "grad_norm": 0.40125423669815063, "learning_rate": 9.171214202019428e-05, "loss": 1.0362, "step": 2354 }, { "epoch": 0.21042285612169678, "grad_norm": 0.38793444633483887, "learning_rate": 9.170416116042444e-05, "loss": 0.9959, "step": 2355 }, { "epoch": 0.21051220765295867, "grad_norm": 0.4493177533149719, "learning_rate": 9.169617680748659e-05, "loss": 1.004, "step": 2356 }, { "epoch": 0.21060155918422052, "grad_norm": 0.39111337065696716, "learning_rate": 9.168818896204948e-05, "loss": 0.9994, "step": 2357 }, { "epoch": 0.21069091071548238, "grad_norm": 0.37774431705474854, "learning_rate": 9.168019762478218e-05, "loss": 1.0901, "step": 2358 }, { "epoch": 0.21078026224674426, "grad_norm": 0.3941672444343567, "learning_rate": 9.167220279635406e-05, "loss": 1.0383, "step": 2359 }, { "epoch": 0.21086961377800612, "grad_norm": 0.39567404985427856, "learning_rate": 9.166420447743475e-05, "loss": 0.986, "step": 2360 }, { "epoch": 0.210958965309268, "grad_norm": 0.5099696516990662, "learning_rate": 9.165620266869421e-05, "loss": 0.9966, "step": 2361 }, { "epoch": 0.21104831684052985, "grad_norm": 0.4867641031742096, "learning_rate": 9.164819737080267e-05, "loss": 1.037, "step": 2362 }, { "epoch": 0.2111376683717917, "grad_norm": 0.41066691279411316, "learning_rate": 9.164018858443066e-05, "loss": 1.0574, "step": 2363 }, { "epoch": 0.2112270199030536, "grad_norm": 0.4427512586116791, "learning_rate": 9.163217631024901e-05, "loss": 0.989, "step": 2364 }, { "epoch": 0.21131637143431545, "grad_norm": 0.4066883325576782, "learning_rate": 9.162416054892882e-05, "loss": 0.979, "step": 2365 }, { "epoch": 0.21140572296557733, "grad_norm": 0.4055411219596863, "learning_rate": 9.161614130114151e-05, "loss": 1.0365, "step": 2366 }, { "epoch": 0.21149507449683919, "grad_norm": 0.4873175621032715, "learning_rate": 9.160811856755877e-05, "loss": 0.9807, "step": 2367 }, { "epoch": 0.21158442602810107, "grad_norm": 0.4377719461917877, "learning_rate": 9.160009234885258e-05, "loss": 1.0499, "step": 2368 }, { "epoch": 0.21167377755936292, "grad_norm": 0.4379160702228546, "learning_rate": 9.159206264569524e-05, "loss": 1.0161, "step": 2369 }, { "epoch": 0.21176312909062478, "grad_norm": 0.41256406903266907, "learning_rate": 9.158402945875932e-05, "loss": 0.9943, "step": 2370 }, { "epoch": 0.21185248062188666, "grad_norm": 0.3979935646057129, "learning_rate": 9.157599278871767e-05, "loss": 1.0566, "step": 2371 }, { "epoch": 0.21194183215314852, "grad_norm": 0.4434359073638916, "learning_rate": 9.156795263624345e-05, "loss": 0.9642, "step": 2372 }, { "epoch": 0.2120311836844104, "grad_norm": 0.42993849515914917, "learning_rate": 9.155990900201012e-05, "loss": 0.9881, "step": 2373 }, { "epoch": 0.21212053521567226, "grad_norm": 0.4635986387729645, "learning_rate": 9.155186188669143e-05, "loss": 0.988, "step": 2374 }, { "epoch": 0.2122098867469341, "grad_norm": 0.3796444535255432, "learning_rate": 9.154381129096137e-05, "loss": 1.0141, "step": 2375 }, { "epoch": 0.212299238278196, "grad_norm": 0.468389093875885, "learning_rate": 9.15357572154943e-05, "loss": 0.9412, "step": 2376 }, { "epoch": 0.21238858980945785, "grad_norm": 0.410773366689682, "learning_rate": 9.152769966096482e-05, "loss": 1.0115, "step": 2377 }, { "epoch": 0.21247794134071973, "grad_norm": 0.41390907764434814, "learning_rate": 9.151963862804784e-05, "loss": 1.0499, "step": 2378 }, { "epoch": 0.2125672928719816, "grad_norm": 0.4567076563835144, "learning_rate": 9.151157411741858e-05, "loss": 0.9748, "step": 2379 }, { "epoch": 0.21265664440324347, "grad_norm": 0.4838281273841858, "learning_rate": 9.150350612975247e-05, "loss": 1.0076, "step": 2380 }, { "epoch": 0.21274599593450533, "grad_norm": 0.4495866000652313, "learning_rate": 9.149543466572535e-05, "loss": 0.959, "step": 2381 }, { "epoch": 0.21283534746576718, "grad_norm": 0.4260130226612091, "learning_rate": 9.148735972601326e-05, "loss": 1.0522, "step": 2382 }, { "epoch": 0.21292469899702907, "grad_norm": 0.3835292160511017, "learning_rate": 9.147928131129256e-05, "loss": 1.0342, "step": 2383 }, { "epoch": 0.21301405052829092, "grad_norm": 0.4930969774723053, "learning_rate": 9.147119942223993e-05, "loss": 0.9595, "step": 2384 }, { "epoch": 0.2131034020595528, "grad_norm": 0.6031138896942139, "learning_rate": 9.14631140595323e-05, "loss": 1.0145, "step": 2385 }, { "epoch": 0.21319275359081466, "grad_norm": 0.5279069542884827, "learning_rate": 9.145502522384688e-05, "loss": 0.9652, "step": 2386 }, { "epoch": 0.21328210512207654, "grad_norm": 0.4844546616077423, "learning_rate": 9.144693291586124e-05, "loss": 1.0302, "step": 2387 }, { "epoch": 0.2133714566533384, "grad_norm": 0.4257798194885254, "learning_rate": 9.143883713625317e-05, "loss": 1.0513, "step": 2388 }, { "epoch": 0.21346080818460025, "grad_norm": 0.41385725140571594, "learning_rate": 9.143073788570077e-05, "loss": 0.9939, "step": 2389 }, { "epoch": 0.21355015971586214, "grad_norm": 0.3999519944190979, "learning_rate": 9.142263516488245e-05, "loss": 0.9706, "step": 2390 }, { "epoch": 0.213639511247124, "grad_norm": 0.3895846903324127, "learning_rate": 9.141452897447692e-05, "loss": 1.074, "step": 2391 }, { "epoch": 0.21372886277838588, "grad_norm": 0.44434553384780884, "learning_rate": 9.140641931516314e-05, "loss": 1.0137, "step": 2392 }, { "epoch": 0.21381821430964773, "grad_norm": 0.3749370574951172, "learning_rate": 9.139830618762038e-05, "loss": 1.0124, "step": 2393 }, { "epoch": 0.21390756584090959, "grad_norm": 0.37694430351257324, "learning_rate": 9.139018959252819e-05, "loss": 1.0097, "step": 2394 }, { "epoch": 0.21399691737217147, "grad_norm": 0.3813773989677429, "learning_rate": 9.138206953056644e-05, "loss": 1.0833, "step": 2395 }, { "epoch": 0.21408626890343332, "grad_norm": 0.4502156972885132, "learning_rate": 9.137394600241527e-05, "loss": 0.9692, "step": 2396 }, { "epoch": 0.2141756204346952, "grad_norm": 0.41509172320365906, "learning_rate": 9.136581900875512e-05, "loss": 0.9825, "step": 2397 }, { "epoch": 0.21426497196595706, "grad_norm": 0.4003329277038574, "learning_rate": 9.135768855026668e-05, "loss": 1.0834, "step": 2398 }, { "epoch": 0.21435432349721895, "grad_norm": 0.4339703917503357, "learning_rate": 9.1349554627631e-05, "loss": 1.0946, "step": 2399 }, { "epoch": 0.2144436750284808, "grad_norm": 0.43198466300964355, "learning_rate": 9.134141724152934e-05, "loss": 0.9449, "step": 2400 }, { "epoch": 0.21453302655974266, "grad_norm": 0.44103294610977173, "learning_rate": 9.133327639264332e-05, "loss": 0.9653, "step": 2401 }, { "epoch": 0.21462237809100454, "grad_norm": 0.4824722707271576, "learning_rate": 9.132513208165486e-05, "loss": 1.0428, "step": 2402 }, { "epoch": 0.2147117296222664, "grad_norm": 0.49322792887687683, "learning_rate": 9.131698430924605e-05, "loss": 1.0646, "step": 2403 }, { "epoch": 0.21480108115352828, "grad_norm": 0.4989064931869507, "learning_rate": 9.130883307609942e-05, "loss": 1.0013, "step": 2404 }, { "epoch": 0.21489043268479013, "grad_norm": 0.459974467754364, "learning_rate": 9.130067838289769e-05, "loss": 1.0519, "step": 2405 }, { "epoch": 0.214979784216052, "grad_norm": 0.44831836223602295, "learning_rate": 9.129252023032391e-05, "loss": 1.055, "step": 2406 }, { "epoch": 0.21506913574731387, "grad_norm": 0.4193100929260254, "learning_rate": 9.128435861906142e-05, "loss": 0.9937, "step": 2407 }, { "epoch": 0.21515848727857573, "grad_norm": 0.4522268772125244, "learning_rate": 9.127619354979384e-05, "loss": 1.076, "step": 2408 }, { "epoch": 0.2152478388098376, "grad_norm": 0.4380955100059509, "learning_rate": 9.126802502320509e-05, "loss": 0.9917, "step": 2409 }, { "epoch": 0.21533719034109947, "grad_norm": 0.45167332887649536, "learning_rate": 9.125985303997933e-05, "loss": 1.0884, "step": 2410 }, { "epoch": 0.21542654187236135, "grad_norm": 0.4360027611255646, "learning_rate": 9.125167760080108e-05, "loss": 0.9882, "step": 2411 }, { "epoch": 0.2155158934036232, "grad_norm": 0.390898197889328, "learning_rate": 9.124349870635515e-05, "loss": 1.0076, "step": 2412 }, { "epoch": 0.21560524493488506, "grad_norm": 0.4726807475090027, "learning_rate": 9.123531635732656e-05, "loss": 0.9496, "step": 2413 }, { "epoch": 0.21569459646614694, "grad_norm": 0.4268726706504822, "learning_rate": 9.122713055440069e-05, "loss": 1.0008, "step": 2414 }, { "epoch": 0.2157839479974088, "grad_norm": 0.43627238273620605, "learning_rate": 9.121894129826318e-05, "loss": 0.9642, "step": 2415 }, { "epoch": 0.21587329952867068, "grad_norm": 0.4274177849292755, "learning_rate": 9.121074858959997e-05, "loss": 1.0273, "step": 2416 }, { "epoch": 0.21596265105993254, "grad_norm": 0.4479106068611145, "learning_rate": 9.12025524290973e-05, "loss": 0.9998, "step": 2417 }, { "epoch": 0.21605200259119442, "grad_norm": 0.45606303215026855, "learning_rate": 9.119435281744169e-05, "loss": 0.9753, "step": 2418 }, { "epoch": 0.21614135412245628, "grad_norm": 0.40159735083580017, "learning_rate": 9.118614975531991e-05, "loss": 1.011, "step": 2419 }, { "epoch": 0.21623070565371813, "grad_norm": 0.45181921124458313, "learning_rate": 9.117794324341908e-05, "loss": 1.0253, "step": 2420 }, { "epoch": 0.21632005718498, "grad_norm": 0.41297322511672974, "learning_rate": 9.116973328242658e-05, "loss": 1.001, "step": 2421 }, { "epoch": 0.21640940871624187, "grad_norm": 0.39003559947013855, "learning_rate": 9.116151987303007e-05, "loss": 1.0044, "step": 2422 }, { "epoch": 0.21649876024750375, "grad_norm": 0.45136478543281555, "learning_rate": 9.115330301591753e-05, "loss": 0.9569, "step": 2423 }, { "epoch": 0.2165881117787656, "grad_norm": 0.4354073107242584, "learning_rate": 9.114508271177722e-05, "loss": 0.9862, "step": 2424 }, { "epoch": 0.21667746331002746, "grad_norm": 0.3921179175376892, "learning_rate": 9.113685896129763e-05, "loss": 0.9894, "step": 2425 }, { "epoch": 0.21676681484128935, "grad_norm": 0.42139732837677, "learning_rate": 9.112863176516762e-05, "loss": 0.9777, "step": 2426 }, { "epoch": 0.2168561663725512, "grad_norm": 0.5038270950317383, "learning_rate": 9.11204011240763e-05, "loss": 1.1157, "step": 2427 }, { "epoch": 0.21694551790381308, "grad_norm": 0.5017008781433105, "learning_rate": 9.111216703871308e-05, "loss": 0.9634, "step": 2428 }, { "epoch": 0.21703486943507494, "grad_norm": 0.5621154308319092, "learning_rate": 9.110392950976764e-05, "loss": 1.0222, "step": 2429 }, { "epoch": 0.21712422096633682, "grad_norm": 0.43633437156677246, "learning_rate": 9.109568853792998e-05, "loss": 1.1109, "step": 2430 }, { "epoch": 0.21721357249759868, "grad_norm": 0.48714277148246765, "learning_rate": 9.108744412389034e-05, "loss": 0.9751, "step": 2431 }, { "epoch": 0.21730292402886053, "grad_norm": 0.4233207702636719, "learning_rate": 9.107919626833931e-05, "loss": 1.0543, "step": 2432 }, { "epoch": 0.21739227556012242, "grad_norm": 0.3775826096534729, "learning_rate": 9.107094497196771e-05, "loss": 1.0672, "step": 2433 }, { "epoch": 0.21748162709138427, "grad_norm": 0.47611549496650696, "learning_rate": 9.106269023546667e-05, "loss": 1.0062, "step": 2434 }, { "epoch": 0.21757097862264616, "grad_norm": 0.40070483088493347, "learning_rate": 9.105443205952765e-05, "loss": 0.9908, "step": 2435 }, { "epoch": 0.217660330153908, "grad_norm": 0.4242134392261505, "learning_rate": 9.104617044484233e-05, "loss": 1.0447, "step": 2436 }, { "epoch": 0.21774968168516987, "grad_norm": 0.4634241461753845, "learning_rate": 9.103790539210271e-05, "loss": 1.0203, "step": 2437 }, { "epoch": 0.21783903321643175, "grad_norm": 0.46753108501434326, "learning_rate": 9.102963690200108e-05, "loss": 0.892, "step": 2438 }, { "epoch": 0.2179283847476936, "grad_norm": 0.47275984287261963, "learning_rate": 9.102136497523002e-05, "loss": 1.0528, "step": 2439 }, { "epoch": 0.2180177362789555, "grad_norm": 0.4334014058113098, "learning_rate": 9.101308961248238e-05, "loss": 0.9923, "step": 2440 }, { "epoch": 0.21810708781021734, "grad_norm": 0.41837531328201294, "learning_rate": 9.100481081445132e-05, "loss": 0.9946, "step": 2441 }, { "epoch": 0.21819643934147923, "grad_norm": 0.4113021194934845, "learning_rate": 9.099652858183028e-05, "loss": 1.0441, "step": 2442 }, { "epoch": 0.21828579087274108, "grad_norm": 0.4260026514530182, "learning_rate": 9.098824291531296e-05, "loss": 1.0988, "step": 2443 }, { "epoch": 0.21837514240400294, "grad_norm": 0.4235369563102722, "learning_rate": 9.097995381559341e-05, "loss": 1.0867, "step": 2444 }, { "epoch": 0.21846449393526482, "grad_norm": 0.4195026755332947, "learning_rate": 9.097166128336592e-05, "loss": 1.0309, "step": 2445 }, { "epoch": 0.21855384546652667, "grad_norm": 0.4182673990726471, "learning_rate": 9.096336531932506e-05, "loss": 1.0566, "step": 2446 }, { "epoch": 0.21864319699778856, "grad_norm": 0.40884897112846375, "learning_rate": 9.095506592416572e-05, "loss": 1.0218, "step": 2447 }, { "epoch": 0.2187325485290504, "grad_norm": 0.4604688584804535, "learning_rate": 9.094676309858305e-05, "loss": 1.0578, "step": 2448 }, { "epoch": 0.2188219000603123, "grad_norm": 0.4072805941104889, "learning_rate": 9.093845684327251e-05, "loss": 1.0117, "step": 2449 }, { "epoch": 0.21891125159157415, "grad_norm": 0.3600502014160156, "learning_rate": 9.093014715892984e-05, "loss": 1.0826, "step": 2450 }, { "epoch": 0.219000603122836, "grad_norm": 0.421089231967926, "learning_rate": 9.092183404625107e-05, "loss": 1.0072, "step": 2451 }, { "epoch": 0.2190899546540979, "grad_norm": 0.4491601586341858, "learning_rate": 9.09135175059325e-05, "loss": 1.0409, "step": 2452 }, { "epoch": 0.21917930618535975, "grad_norm": 0.4364774823188782, "learning_rate": 9.090519753867072e-05, "loss": 1.0142, "step": 2453 }, { "epoch": 0.21926865771662163, "grad_norm": 0.4364645779132843, "learning_rate": 9.089687414516265e-05, "loss": 1.0297, "step": 2454 }, { "epoch": 0.21935800924788348, "grad_norm": 0.4864216148853302, "learning_rate": 9.088854732610543e-05, "loss": 1.0635, "step": 2455 }, { "epoch": 0.21944736077914534, "grad_norm": 0.4514232873916626, "learning_rate": 9.088021708219652e-05, "loss": 1.0224, "step": 2456 }, { "epoch": 0.21953671231040722, "grad_norm": 0.3978760540485382, "learning_rate": 9.087188341413369e-05, "loss": 1.0442, "step": 2457 }, { "epoch": 0.21962606384166908, "grad_norm": 0.36813050508499146, "learning_rate": 9.086354632261496e-05, "loss": 1.0114, "step": 2458 }, { "epoch": 0.21971541537293096, "grad_norm": 0.4238460063934326, "learning_rate": 9.085520580833866e-05, "loss": 1.0065, "step": 2459 }, { "epoch": 0.21980476690419282, "grad_norm": 0.57405024766922, "learning_rate": 9.084686187200338e-05, "loss": 0.9361, "step": 2460 }, { "epoch": 0.2198941184354547, "grad_norm": 0.5002081990242004, "learning_rate": 9.0838514514308e-05, "loss": 1.0109, "step": 2461 }, { "epoch": 0.21998346996671655, "grad_norm": 0.397339403629303, "learning_rate": 9.083016373595174e-05, "loss": 1.0188, "step": 2462 }, { "epoch": 0.2200728214979784, "grad_norm": 0.4922274947166443, "learning_rate": 9.082180953763406e-05, "loss": 0.9552, "step": 2463 }, { "epoch": 0.2201621730292403, "grad_norm": 0.49494630098342896, "learning_rate": 9.08134519200547e-05, "loss": 1.0058, "step": 2464 }, { "epoch": 0.22025152456050215, "grad_norm": 0.44430068135261536, "learning_rate": 9.080509088391369e-05, "loss": 1.0486, "step": 2465 }, { "epoch": 0.22034087609176403, "grad_norm": 0.4087202548980713, "learning_rate": 9.079672642991137e-05, "loss": 1.0006, "step": 2466 }, { "epoch": 0.2204302276230259, "grad_norm": 0.49828004837036133, "learning_rate": 9.078835855874835e-05, "loss": 1.0229, "step": 2467 }, { "epoch": 0.22051957915428777, "grad_norm": 0.42710915207862854, "learning_rate": 9.077998727112554e-05, "loss": 1.0254, "step": 2468 }, { "epoch": 0.22060893068554963, "grad_norm": 0.40693971514701843, "learning_rate": 9.077161256774409e-05, "loss": 1.0551, "step": 2469 }, { "epoch": 0.22069828221681148, "grad_norm": 0.4143005609512329, "learning_rate": 9.076323444930551e-05, "loss": 0.9634, "step": 2470 }, { "epoch": 0.22078763374807336, "grad_norm": 0.4316956102848053, "learning_rate": 9.075485291651154e-05, "loss": 0.9414, "step": 2471 }, { "epoch": 0.22087698527933522, "grad_norm": 0.42413267493247986, "learning_rate": 9.07464679700642e-05, "loss": 0.9748, "step": 2472 }, { "epoch": 0.2209663368105971, "grad_norm": 0.4333771765232086, "learning_rate": 9.073807961066588e-05, "loss": 0.9875, "step": 2473 }, { "epoch": 0.22105568834185896, "grad_norm": 0.4280893802642822, "learning_rate": 9.072968783901913e-05, "loss": 0.972, "step": 2474 }, { "epoch": 0.2211450398731208, "grad_norm": 0.4792003333568573, "learning_rate": 9.072129265582689e-05, "loss": 1.0064, "step": 2475 }, { "epoch": 0.2212343914043827, "grad_norm": 0.44787389039993286, "learning_rate": 9.071289406179231e-05, "loss": 0.9806, "step": 2476 }, { "epoch": 0.22132374293564455, "grad_norm": 0.43630531430244446, "learning_rate": 9.070449205761891e-05, "loss": 0.9906, "step": 2477 }, { "epoch": 0.22141309446690643, "grad_norm": 0.46949827671051025, "learning_rate": 9.069608664401041e-05, "loss": 1.0052, "step": 2478 }, { "epoch": 0.2215024459981683, "grad_norm": 0.37613534927368164, "learning_rate": 9.068767782167086e-05, "loss": 0.9737, "step": 2479 }, { "epoch": 0.22159179752943017, "grad_norm": 0.4791412353515625, "learning_rate": 9.06792655913046e-05, "loss": 0.9505, "step": 2480 }, { "epoch": 0.22168114906069203, "grad_norm": 0.4607903063297272, "learning_rate": 9.067084995361623e-05, "loss": 1.1162, "step": 2481 }, { "epoch": 0.22177050059195388, "grad_norm": 0.40373310446739197, "learning_rate": 9.066243090931066e-05, "loss": 0.9888, "step": 2482 }, { "epoch": 0.22185985212321577, "grad_norm": 0.40429624915122986, "learning_rate": 9.065400845909308e-05, "loss": 1.0024, "step": 2483 }, { "epoch": 0.22194920365447762, "grad_norm": 0.40631914138793945, "learning_rate": 9.064558260366893e-05, "loss": 0.9719, "step": 2484 }, { "epoch": 0.2220385551857395, "grad_norm": 0.43395867943763733, "learning_rate": 9.063715334374401e-05, "loss": 0.9634, "step": 2485 }, { "epoch": 0.22212790671700136, "grad_norm": 0.4469437003135681, "learning_rate": 9.062872068002432e-05, "loss": 0.9666, "step": 2486 }, { "epoch": 0.22221725824826322, "grad_norm": 0.5009754300117493, "learning_rate": 9.062028461321621e-05, "loss": 0.9931, "step": 2487 }, { "epoch": 0.2223066097795251, "grad_norm": 0.4312589764595032, "learning_rate": 9.061184514402627e-05, "loss": 1.0201, "step": 2488 }, { "epoch": 0.22239596131078695, "grad_norm": 0.45867425203323364, "learning_rate": 9.060340227316142e-05, "loss": 1.0177, "step": 2489 }, { "epoch": 0.22248531284204884, "grad_norm": 0.39511245489120483, "learning_rate": 9.059495600132883e-05, "loss": 1.0383, "step": 2490 }, { "epoch": 0.2225746643733107, "grad_norm": 0.4442897439002991, "learning_rate": 9.058650632923595e-05, "loss": 0.9576, "step": 2491 }, { "epoch": 0.22266401590457258, "grad_norm": 0.556317925453186, "learning_rate": 9.057805325759057e-05, "loss": 0.9748, "step": 2492 }, { "epoch": 0.22275336743583443, "grad_norm": 0.49973076581954956, "learning_rate": 9.056959678710067e-05, "loss": 0.8914, "step": 2493 }, { "epoch": 0.2228427189670963, "grad_norm": 0.5200818181037903, "learning_rate": 9.056113691847461e-05, "loss": 0.9384, "step": 2494 }, { "epoch": 0.22293207049835817, "grad_norm": 0.4469231963157654, "learning_rate": 9.055267365242099e-05, "loss": 0.9844, "step": 2495 }, { "epoch": 0.22302142202962003, "grad_norm": 0.4275433123111725, "learning_rate": 9.054420698964868e-05, "loss": 1.0414, "step": 2496 }, { "epoch": 0.2231107735608819, "grad_norm": 0.36767879128456116, "learning_rate": 9.053573693086687e-05, "loss": 1.026, "step": 2497 }, { "epoch": 0.22320012509214376, "grad_norm": 0.3746047019958496, "learning_rate": 9.052726347678502e-05, "loss": 1.0667, "step": 2498 }, { "epoch": 0.22328947662340565, "grad_norm": 0.44350770115852356, "learning_rate": 9.051878662811286e-05, "loss": 1.0337, "step": 2499 }, { "epoch": 0.2233788281546675, "grad_norm": 0.42601439356803894, "learning_rate": 9.051030638556041e-05, "loss": 1.0673, "step": 2500 }, { "epoch": 0.22346817968592936, "grad_norm": 0.4272880554199219, "learning_rate": 9.050182274983798e-05, "loss": 1.0239, "step": 2501 }, { "epoch": 0.22355753121719124, "grad_norm": 0.4652038812637329, "learning_rate": 9.04933357216562e-05, "loss": 0.959, "step": 2502 }, { "epoch": 0.2236468827484531, "grad_norm": 0.4481859505176544, "learning_rate": 9.048484530172592e-05, "loss": 1.0601, "step": 2503 }, { "epoch": 0.22373623427971498, "grad_norm": 0.4447591304779053, "learning_rate": 9.04763514907583e-05, "loss": 1.0246, "step": 2504 }, { "epoch": 0.22382558581097683, "grad_norm": 0.37949299812316895, "learning_rate": 9.046785428946481e-05, "loss": 1.0761, "step": 2505 }, { "epoch": 0.2239149373422387, "grad_norm": 0.44117259979248047, "learning_rate": 9.045935369855716e-05, "loss": 1.0082, "step": 2506 }, { "epoch": 0.22400428887350057, "grad_norm": 0.4014883041381836, "learning_rate": 9.045084971874738e-05, "loss": 1.0867, "step": 2507 }, { "epoch": 0.22409364040476243, "grad_norm": 0.5892843008041382, "learning_rate": 9.044234235074775e-05, "loss": 0.9446, "step": 2508 }, { "epoch": 0.2241829919360243, "grad_norm": 0.4554404318332672, "learning_rate": 9.043383159527087e-05, "loss": 0.939, "step": 2509 }, { "epoch": 0.22427234346728617, "grad_norm": 0.3739197552204132, "learning_rate": 9.04253174530296e-05, "loss": 1.0603, "step": 2510 }, { "epoch": 0.22436169499854805, "grad_norm": 0.48913314938545227, "learning_rate": 9.041679992473708e-05, "loss": 0.9852, "step": 2511 }, { "epoch": 0.2244510465298099, "grad_norm": 0.3724241256713867, "learning_rate": 9.040827901110676e-05, "loss": 1.0422, "step": 2512 }, { "epoch": 0.22454039806107176, "grad_norm": 0.4462158679962158, "learning_rate": 9.039975471285235e-05, "loss": 1.0337, "step": 2513 }, { "epoch": 0.22462974959233364, "grad_norm": 0.4481222629547119, "learning_rate": 9.039122703068785e-05, "loss": 1.0114, "step": 2514 }, { "epoch": 0.2247191011235955, "grad_norm": 0.4646155536174774, "learning_rate": 9.038269596532755e-05, "loss": 1.0595, "step": 2515 }, { "epoch": 0.22480845265485738, "grad_norm": 0.4418215751647949, "learning_rate": 9.037416151748603e-05, "loss": 0.9829, "step": 2516 }, { "epoch": 0.22489780418611924, "grad_norm": 0.4447876811027527, "learning_rate": 9.036562368787811e-05, "loss": 0.9914, "step": 2517 }, { "epoch": 0.2249871557173811, "grad_norm": 0.47525134682655334, "learning_rate": 9.035708247721895e-05, "loss": 1.0124, "step": 2518 }, { "epoch": 0.22507650724864298, "grad_norm": 0.5248557925224304, "learning_rate": 9.034853788622393e-05, "loss": 0.9379, "step": 2519 }, { "epoch": 0.22516585877990483, "grad_norm": 0.4031184911727905, "learning_rate": 9.033998991560881e-05, "loss": 0.9834, "step": 2520 }, { "epoch": 0.22525521031116671, "grad_norm": 0.38345515727996826, "learning_rate": 9.033143856608952e-05, "loss": 1.043, "step": 2521 }, { "epoch": 0.22534456184242857, "grad_norm": 0.40574783086776733, "learning_rate": 9.032288383838236e-05, "loss": 1.0062, "step": 2522 }, { "epoch": 0.22543391337369045, "grad_norm": 0.5696341395378113, "learning_rate": 9.031432573320387e-05, "loss": 0.9409, "step": 2523 }, { "epoch": 0.2255232649049523, "grad_norm": 0.39741194248199463, "learning_rate": 9.030576425127087e-05, "loss": 1.0018, "step": 2524 }, { "epoch": 0.22561261643621416, "grad_norm": 0.41686055064201355, "learning_rate": 9.029719939330047e-05, "loss": 0.9734, "step": 2525 }, { "epoch": 0.22570196796747605, "grad_norm": 0.39542368054389954, "learning_rate": 9.028863116001012e-05, "loss": 0.9796, "step": 2526 }, { "epoch": 0.2257913194987379, "grad_norm": 0.4378524720668793, "learning_rate": 9.028005955211744e-05, "loss": 0.9701, "step": 2527 }, { "epoch": 0.22588067102999979, "grad_norm": 0.5109492540359497, "learning_rate": 9.027148457034043e-05, "loss": 1.0008, "step": 2528 }, { "epoch": 0.22597002256126164, "grad_norm": 0.35746294260025024, "learning_rate": 9.02629062153973e-05, "loss": 1.037, "step": 2529 }, { "epoch": 0.22605937409252352, "grad_norm": 0.44290691614151, "learning_rate": 9.025432448800662e-05, "loss": 1.0122, "step": 2530 }, { "epoch": 0.22614872562378538, "grad_norm": 0.4638045132160187, "learning_rate": 9.02457393888872e-05, "loss": 1.0214, "step": 2531 }, { "epoch": 0.22623807715504723, "grad_norm": 0.41130268573760986, "learning_rate": 9.023715091875809e-05, "loss": 1.0102, "step": 2532 }, { "epoch": 0.22632742868630912, "grad_norm": 0.4205174446105957, "learning_rate": 9.022855907833871e-05, "loss": 0.9937, "step": 2533 }, { "epoch": 0.22641678021757097, "grad_norm": 0.4878970980644226, "learning_rate": 9.02199638683487e-05, "loss": 0.9084, "step": 2534 }, { "epoch": 0.22650613174883286, "grad_norm": 0.44246265292167664, "learning_rate": 9.0211365289508e-05, "loss": 0.946, "step": 2535 }, { "epoch": 0.2265954832800947, "grad_norm": 0.4307198226451874, "learning_rate": 9.020276334253683e-05, "loss": 1.0387, "step": 2536 }, { "epoch": 0.22668483481135657, "grad_norm": 0.45680615305900574, "learning_rate": 9.019415802815569e-05, "loss": 1.0461, "step": 2537 }, { "epoch": 0.22677418634261845, "grad_norm": 0.3967481851577759, "learning_rate": 9.01855493470854e-05, "loss": 1.0014, "step": 2538 }, { "epoch": 0.2268635378738803, "grad_norm": 0.49700799584388733, "learning_rate": 9.0176937300047e-05, "loss": 1.0862, "step": 2539 }, { "epoch": 0.2269528894051422, "grad_norm": 0.4415007531642914, "learning_rate": 9.016832188776183e-05, "loss": 1.049, "step": 2540 }, { "epoch": 0.22704224093640404, "grad_norm": 0.39791885018348694, "learning_rate": 9.015970311095156e-05, "loss": 0.9441, "step": 2541 }, { "epoch": 0.22713159246766593, "grad_norm": 0.41884180903434753, "learning_rate": 9.015108097033806e-05, "loss": 1.0447, "step": 2542 }, { "epoch": 0.22722094399892778, "grad_norm": 0.4121531546115875, "learning_rate": 9.014245546664357e-05, "loss": 1.0104, "step": 2543 }, { "epoch": 0.22731029553018964, "grad_norm": 0.43119367957115173, "learning_rate": 9.013382660059053e-05, "loss": 0.9879, "step": 2544 }, { "epoch": 0.22739964706145152, "grad_norm": 0.4344024360179901, "learning_rate": 9.012519437290172e-05, "loss": 1.0233, "step": 2545 }, { "epoch": 0.22748899859271338, "grad_norm": 0.49549728631973267, "learning_rate": 9.011655878430019e-05, "loss": 0.9671, "step": 2546 }, { "epoch": 0.22757835012397526, "grad_norm": 0.4547901153564453, "learning_rate": 9.010791983550923e-05, "loss": 0.9462, "step": 2547 }, { "epoch": 0.22766770165523711, "grad_norm": 0.40302136540412903, "learning_rate": 9.009927752725247e-05, "loss": 1.0536, "step": 2548 }, { "epoch": 0.22775705318649897, "grad_norm": 0.4038864076137543, "learning_rate": 9.009063186025379e-05, "loss": 1.0665, "step": 2549 }, { "epoch": 0.22784640471776085, "grad_norm": 0.4060629606246948, "learning_rate": 9.008198283523737e-05, "loss": 0.9799, "step": 2550 }, { "epoch": 0.2279357562490227, "grad_norm": 0.49202004075050354, "learning_rate": 9.007333045292764e-05, "loss": 1.0176, "step": 2551 }, { "epoch": 0.2280251077802846, "grad_norm": 0.45856913924217224, "learning_rate": 9.006467471404932e-05, "loss": 0.9464, "step": 2552 }, { "epoch": 0.22811445931154645, "grad_norm": 0.44552847743034363, "learning_rate": 9.005601561932745e-05, "loss": 1.0197, "step": 2553 }, { "epoch": 0.22820381084280833, "grad_norm": 0.40756505727767944, "learning_rate": 9.00473531694873e-05, "loss": 0.9694, "step": 2554 }, { "epoch": 0.22829316237407019, "grad_norm": 0.4505196511745453, "learning_rate": 9.003868736525444e-05, "loss": 1.0085, "step": 2555 }, { "epoch": 0.22838251390533204, "grad_norm": 0.40980064868927, "learning_rate": 9.003001820735474e-05, "loss": 1.0383, "step": 2556 }, { "epoch": 0.22847186543659392, "grad_norm": 0.37999671697616577, "learning_rate": 9.002134569651433e-05, "loss": 1.026, "step": 2557 }, { "epoch": 0.22856121696785578, "grad_norm": 0.4312988817691803, "learning_rate": 9.00126698334596e-05, "loss": 1.0029, "step": 2558 }, { "epoch": 0.22865056849911766, "grad_norm": 0.45107796788215637, "learning_rate": 9.000399061891728e-05, "loss": 1.0555, "step": 2559 }, { "epoch": 0.22873992003037952, "grad_norm": 0.39357370138168335, "learning_rate": 8.999530805361434e-05, "loss": 1.0421, "step": 2560 }, { "epoch": 0.2288292715616414, "grad_norm": 0.4320610761642456, "learning_rate": 8.998662213827802e-05, "loss": 1.0491, "step": 2561 }, { "epoch": 0.22891862309290326, "grad_norm": 0.4132126271724701, "learning_rate": 8.997793287363588e-05, "loss": 0.9682, "step": 2562 }, { "epoch": 0.2290079746241651, "grad_norm": 0.38975366950035095, "learning_rate": 8.996924026041573e-05, "loss": 1.0131, "step": 2563 }, { "epoch": 0.229097326155427, "grad_norm": 0.42048487067222595, "learning_rate": 8.996054429934567e-05, "loss": 1.0186, "step": 2564 }, { "epoch": 0.22918667768668885, "grad_norm": 0.40763533115386963, "learning_rate": 8.995184499115405e-05, "loss": 0.9722, "step": 2565 }, { "epoch": 0.22927602921795073, "grad_norm": 0.47947773337364197, "learning_rate": 8.994314233656958e-05, "loss": 0.9427, "step": 2566 }, { "epoch": 0.2293653807492126, "grad_norm": 0.4561311602592468, "learning_rate": 8.993443633632116e-05, "loss": 0.9793, "step": 2567 }, { "epoch": 0.22945473228047444, "grad_norm": 0.3957562744617462, "learning_rate": 8.992572699113804e-05, "loss": 1.0227, "step": 2568 }, { "epoch": 0.22954408381173633, "grad_norm": 0.3969632685184479, "learning_rate": 8.99170143017497e-05, "loss": 1.001, "step": 2569 }, { "epoch": 0.22963343534299818, "grad_norm": 0.4251578748226166, "learning_rate": 8.990829826888592e-05, "loss": 1.0243, "step": 2570 }, { "epoch": 0.22972278687426007, "grad_norm": 0.48977455496788025, "learning_rate": 8.989957889327678e-05, "loss": 0.9637, "step": 2571 }, { "epoch": 0.22981213840552192, "grad_norm": 0.42375892400741577, "learning_rate": 8.98908561756526e-05, "loss": 0.9662, "step": 2572 }, { "epoch": 0.2299014899367838, "grad_norm": 0.3831101655960083, "learning_rate": 8.988213011674402e-05, "loss": 1.0247, "step": 2573 }, { "epoch": 0.22999084146804566, "grad_norm": 0.4255763590335846, "learning_rate": 8.987340071728192e-05, "loss": 1.0308, "step": 2574 }, { "epoch": 0.23008019299930751, "grad_norm": 0.4933546185493469, "learning_rate": 8.986466797799749e-05, "loss": 0.9334, "step": 2575 }, { "epoch": 0.2301695445305694, "grad_norm": 0.4253925085067749, "learning_rate": 8.985593189962221e-05, "loss": 1.0554, "step": 2576 }, { "epoch": 0.23025889606183125, "grad_norm": 0.4756745398044586, "learning_rate": 8.984719248288778e-05, "loss": 0.9267, "step": 2577 }, { "epoch": 0.23034824759309314, "grad_norm": 0.41270750761032104, "learning_rate": 8.983844972852625e-05, "loss": 0.9618, "step": 2578 }, { "epoch": 0.230437599124355, "grad_norm": 0.4245428144931793, "learning_rate": 8.982970363726989e-05, "loss": 1.0846, "step": 2579 }, { "epoch": 0.23052695065561685, "grad_norm": 0.595262348651886, "learning_rate": 8.98209542098513e-05, "loss": 0.9651, "step": 2580 }, { "epoch": 0.23061630218687873, "grad_norm": 0.4939884543418884, "learning_rate": 8.981220144700335e-05, "loss": 1.0355, "step": 2581 }, { "epoch": 0.23070565371814059, "grad_norm": 0.430448979139328, "learning_rate": 8.980344534945915e-05, "loss": 1.0304, "step": 2582 }, { "epoch": 0.23079500524940247, "grad_norm": 0.4117549657821655, "learning_rate": 8.979468591795213e-05, "loss": 1.024, "step": 2583 }, { "epoch": 0.23088435678066432, "grad_norm": 0.3863102197647095, "learning_rate": 8.978592315321597e-05, "loss": 1.0221, "step": 2584 }, { "epoch": 0.2309737083119262, "grad_norm": 0.37732282280921936, "learning_rate": 8.977715705598469e-05, "loss": 1.0347, "step": 2585 }, { "epoch": 0.23106305984318806, "grad_norm": 0.3825008273124695, "learning_rate": 8.976838762699249e-05, "loss": 1.1106, "step": 2586 }, { "epoch": 0.23115241137444992, "grad_norm": 0.38581812381744385, "learning_rate": 8.975961486697392e-05, "loss": 0.9931, "step": 2587 }, { "epoch": 0.2312417629057118, "grad_norm": 0.48402634263038635, "learning_rate": 8.975083877666382e-05, "loss": 0.9774, "step": 2588 }, { "epoch": 0.23133111443697366, "grad_norm": 0.4185434877872467, "learning_rate": 8.974205935679725e-05, "loss": 1.0566, "step": 2589 }, { "epoch": 0.23142046596823554, "grad_norm": 0.37079721689224243, "learning_rate": 8.973327660810958e-05, "loss": 1.0591, "step": 2590 }, { "epoch": 0.2315098174994974, "grad_norm": 0.44048118591308594, "learning_rate": 8.972449053133647e-05, "loss": 1.0344, "step": 2591 }, { "epoch": 0.23159916903075928, "grad_norm": 0.3690267503261566, "learning_rate": 8.971570112721385e-05, "loss": 1.0284, "step": 2592 }, { "epoch": 0.23168852056202113, "grad_norm": 0.38595226407051086, "learning_rate": 8.970690839647792e-05, "loss": 1.0338, "step": 2593 }, { "epoch": 0.231777872093283, "grad_norm": 0.427207350730896, "learning_rate": 8.969811233986519e-05, "loss": 0.9762, "step": 2594 }, { "epoch": 0.23186722362454487, "grad_norm": 0.4339447617530823, "learning_rate": 8.968931295811236e-05, "loss": 0.9732, "step": 2595 }, { "epoch": 0.23195657515580673, "grad_norm": 0.5293307304382324, "learning_rate": 8.968051025195653e-05, "loss": 0.9787, "step": 2596 }, { "epoch": 0.2320459266870686, "grad_norm": 0.44257649779319763, "learning_rate": 8.9671704222135e-05, "loss": 0.9879, "step": 2597 }, { "epoch": 0.23213527821833047, "grad_norm": 0.4917474091053009, "learning_rate": 8.966289486938538e-05, "loss": 1.0853, "step": 2598 }, { "epoch": 0.23222462974959232, "grad_norm": 0.41678449511528015, "learning_rate": 8.965408219444554e-05, "loss": 1.0143, "step": 2599 }, { "epoch": 0.2323139812808542, "grad_norm": 0.49743688106536865, "learning_rate": 8.964526619805362e-05, "loss": 0.9425, "step": 2600 }, { "epoch": 0.23240333281211606, "grad_norm": 0.4517749845981598, "learning_rate": 8.963644688094807e-05, "loss": 1.0329, "step": 2601 }, { "epoch": 0.23249268434337794, "grad_norm": 0.48534515500068665, "learning_rate": 8.96276242438676e-05, "loss": 0.9492, "step": 2602 }, { "epoch": 0.2325820358746398, "grad_norm": 0.44116154313087463, "learning_rate": 8.96187982875512e-05, "loss": 0.9327, "step": 2603 }, { "epoch": 0.23267138740590168, "grad_norm": 0.377189576625824, "learning_rate": 8.960996901273815e-05, "loss": 1.0739, "step": 2604 }, { "epoch": 0.23276073893716354, "grad_norm": 0.40555354952812195, "learning_rate": 8.960113642016797e-05, "loss": 1.0098, "step": 2605 }, { "epoch": 0.2328500904684254, "grad_norm": 0.64093416929245, "learning_rate": 8.95923005105805e-05, "loss": 0.8996, "step": 2606 }, { "epoch": 0.23293944199968727, "grad_norm": 0.4546898305416107, "learning_rate": 8.958346128471584e-05, "loss": 0.979, "step": 2607 }, { "epoch": 0.23302879353094913, "grad_norm": 0.4061562120914459, "learning_rate": 8.957461874331436e-05, "loss": 1.0129, "step": 2608 }, { "epoch": 0.233118145062211, "grad_norm": 0.45059117674827576, "learning_rate": 8.956577288711673e-05, "loss": 1.0107, "step": 2609 }, { "epoch": 0.23320749659347287, "grad_norm": 0.4028307795524597, "learning_rate": 8.955692371686388e-05, "loss": 1.0118, "step": 2610 }, { "epoch": 0.23329684812473475, "grad_norm": 0.5479282736778259, "learning_rate": 8.954807123329704e-05, "loss": 0.9751, "step": 2611 }, { "epoch": 0.2333861996559966, "grad_norm": 0.4075985550880432, "learning_rate": 8.953921543715767e-05, "loss": 1.0192, "step": 2612 }, { "epoch": 0.23347555118725846, "grad_norm": 0.41962021589279175, "learning_rate": 8.953035632918754e-05, "loss": 0.9887, "step": 2613 }, { "epoch": 0.23356490271852035, "grad_norm": 0.46383902430534363, "learning_rate": 8.952149391012872e-05, "loss": 1.0025, "step": 2614 }, { "epoch": 0.2336542542497822, "grad_norm": 0.51430743932724, "learning_rate": 8.95126281807235e-05, "loss": 1.0262, "step": 2615 }, { "epoch": 0.23374360578104408, "grad_norm": 0.45170411467552185, "learning_rate": 8.95037591417145e-05, "loss": 0.9814, "step": 2616 }, { "epoch": 0.23383295731230594, "grad_norm": 0.4477570652961731, "learning_rate": 8.94948867938446e-05, "loss": 1.0245, "step": 2617 }, { "epoch": 0.2339223088435678, "grad_norm": 0.3824307918548584, "learning_rate": 8.948601113785693e-05, "loss": 1.0084, "step": 2618 }, { "epoch": 0.23401166037482968, "grad_norm": 0.5118735432624817, "learning_rate": 8.947713217449495e-05, "loss": 0.9164, "step": 2619 }, { "epoch": 0.23410101190609153, "grad_norm": 0.4788876175880432, "learning_rate": 8.946824990450236e-05, "loss": 1.0392, "step": 2620 }, { "epoch": 0.23419036343735342, "grad_norm": 0.43572187423706055, "learning_rate": 8.945936432862312e-05, "loss": 0.9694, "step": 2621 }, { "epoch": 0.23427971496861527, "grad_norm": 0.437338650226593, "learning_rate": 8.945047544760153e-05, "loss": 0.9954, "step": 2622 }, { "epoch": 0.23436906649987715, "grad_norm": 0.40690669417381287, "learning_rate": 8.944158326218208e-05, "loss": 1.0038, "step": 2623 }, { "epoch": 0.234458418031139, "grad_norm": 0.40458807349205017, "learning_rate": 8.943268777310964e-05, "loss": 0.9944, "step": 2624 }, { "epoch": 0.23454776956240087, "grad_norm": 0.396119624376297, "learning_rate": 8.942378898112928e-05, "loss": 0.9914, "step": 2625 }, { "epoch": 0.23463712109366275, "grad_norm": 0.4142768085002899, "learning_rate": 8.941488688698634e-05, "loss": 1.1052, "step": 2626 }, { "epoch": 0.2347264726249246, "grad_norm": 0.516160786151886, "learning_rate": 8.940598149142652e-05, "loss": 1.0511, "step": 2627 }, { "epoch": 0.2348158241561865, "grad_norm": 0.41457611322402954, "learning_rate": 8.93970727951957e-05, "loss": 0.9767, "step": 2628 }, { "epoch": 0.23490517568744834, "grad_norm": 0.41176801919937134, "learning_rate": 8.938816079904009e-05, "loss": 1.0566, "step": 2629 }, { "epoch": 0.2349945272187102, "grad_norm": 0.42432960867881775, "learning_rate": 8.937924550370618e-05, "loss": 1.0717, "step": 2630 }, { "epoch": 0.23508387874997208, "grad_norm": 0.5278024077415466, "learning_rate": 8.937032690994068e-05, "loss": 0.9483, "step": 2631 }, { "epoch": 0.23517323028123394, "grad_norm": 0.39076733589172363, "learning_rate": 8.936140501849066e-05, "loss": 1.0294, "step": 2632 }, { "epoch": 0.23526258181249582, "grad_norm": 0.41586795449256897, "learning_rate": 8.935247983010339e-05, "loss": 0.9949, "step": 2633 }, { "epoch": 0.23535193334375767, "grad_norm": 0.4252259135246277, "learning_rate": 8.93435513455265e-05, "loss": 0.9886, "step": 2634 }, { "epoch": 0.23544128487501956, "grad_norm": 0.41890689730644226, "learning_rate": 8.93346195655078e-05, "loss": 1.0233, "step": 2635 }, { "epoch": 0.2355306364062814, "grad_norm": 0.38035479187965393, "learning_rate": 8.932568449079541e-05, "loss": 1.0627, "step": 2636 }, { "epoch": 0.23561998793754327, "grad_norm": 0.414369136095047, "learning_rate": 8.931674612213778e-05, "loss": 0.974, "step": 2637 }, { "epoch": 0.23570933946880515, "grad_norm": 0.3929893374443054, "learning_rate": 8.930780446028359e-05, "loss": 1.031, "step": 2638 }, { "epoch": 0.235798691000067, "grad_norm": 0.45517128705978394, "learning_rate": 8.929885950598177e-05, "loss": 0.996, "step": 2639 }, { "epoch": 0.2358880425313289, "grad_norm": 0.39736053347587585, "learning_rate": 8.928991125998157e-05, "loss": 1.001, "step": 2640 }, { "epoch": 0.23597739406259075, "grad_norm": 0.38671961426734924, "learning_rate": 8.92809597230325e-05, "loss": 1.0809, "step": 2641 }, { "epoch": 0.23606674559385263, "grad_norm": 0.4453861117362976, "learning_rate": 8.927200489588435e-05, "loss": 1.0721, "step": 2642 }, { "epoch": 0.23615609712511448, "grad_norm": 0.4946734309196472, "learning_rate": 8.926304677928718e-05, "loss": 0.9619, "step": 2643 }, { "epoch": 0.23624544865637634, "grad_norm": 0.34986773133277893, "learning_rate": 8.925408537399133e-05, "loss": 1.0727, "step": 2644 }, { "epoch": 0.23633480018763822, "grad_norm": 0.5045349597930908, "learning_rate": 8.924512068074742e-05, "loss": 0.9678, "step": 2645 }, { "epoch": 0.23642415171890008, "grad_norm": 0.4377862215042114, "learning_rate": 8.923615270030632e-05, "loss": 0.9723, "step": 2646 }, { "epoch": 0.23651350325016196, "grad_norm": 0.5501147508621216, "learning_rate": 8.922718143341921e-05, "loss": 0.9724, "step": 2647 }, { "epoch": 0.23660285478142382, "grad_norm": 0.42605921626091003, "learning_rate": 8.921820688083753e-05, "loss": 1.0548, "step": 2648 }, { "epoch": 0.23669220631268567, "grad_norm": 0.36013084650039673, "learning_rate": 8.920922904331297e-05, "loss": 1.0721, "step": 2649 }, { "epoch": 0.23678155784394755, "grad_norm": 0.4354332685470581, "learning_rate": 8.920024792159754e-05, "loss": 0.9691, "step": 2650 }, { "epoch": 0.2368709093752094, "grad_norm": 0.45081332325935364, "learning_rate": 8.919126351644351e-05, "loss": 0.8911, "step": 2651 }, { "epoch": 0.2369602609064713, "grad_norm": 0.41693955659866333, "learning_rate": 8.918227582860341e-05, "loss": 1.0437, "step": 2652 }, { "epoch": 0.23704961243773315, "grad_norm": 0.4205388128757477, "learning_rate": 8.917328485883005e-05, "loss": 1.0024, "step": 2653 }, { "epoch": 0.23713896396899503, "grad_norm": 0.3820997476577759, "learning_rate": 8.916429060787654e-05, "loss": 1.0151, "step": 2654 }, { "epoch": 0.2372283155002569, "grad_norm": 0.5409601330757141, "learning_rate": 8.915529307649621e-05, "loss": 0.931, "step": 2655 }, { "epoch": 0.23731766703151874, "grad_norm": 0.40477868914604187, "learning_rate": 8.914629226544273e-05, "loss": 1.0052, "step": 2656 }, { "epoch": 0.23740701856278063, "grad_norm": 0.5156925320625305, "learning_rate": 8.913728817547002e-05, "loss": 0.9712, "step": 2657 }, { "epoch": 0.23749637009404248, "grad_norm": 0.4694739282131195, "learning_rate": 8.912828080733223e-05, "loss": 0.9844, "step": 2658 }, { "epoch": 0.23758572162530436, "grad_norm": 0.520553708076477, "learning_rate": 8.911927016178385e-05, "loss": 0.9508, "step": 2659 }, { "epoch": 0.23767507315656622, "grad_norm": 0.4804941415786743, "learning_rate": 8.911025623957961e-05, "loss": 0.9502, "step": 2660 }, { "epoch": 0.23776442468782807, "grad_norm": 0.49052369594573975, "learning_rate": 8.910123904147452e-05, "loss": 0.9866, "step": 2661 }, { "epoch": 0.23785377621908996, "grad_norm": 0.45931488275527954, "learning_rate": 8.909221856822388e-05, "loss": 0.9628, "step": 2662 }, { "epoch": 0.2379431277503518, "grad_norm": 0.43489325046539307, "learning_rate": 8.908319482058325e-05, "loss": 1.0695, "step": 2663 }, { "epoch": 0.2380324792816137, "grad_norm": 0.47782567143440247, "learning_rate": 8.907416779930843e-05, "loss": 0.9529, "step": 2664 }, { "epoch": 0.23812183081287555, "grad_norm": 0.4324779808521271, "learning_rate": 8.906513750515559e-05, "loss": 0.9556, "step": 2665 }, { "epoch": 0.23821118234413743, "grad_norm": 0.4681794047355652, "learning_rate": 8.905610393888106e-05, "loss": 0.9987, "step": 2666 }, { "epoch": 0.2383005338753993, "grad_norm": 0.443891704082489, "learning_rate": 8.904706710124152e-05, "loss": 1.0109, "step": 2667 }, { "epoch": 0.23838988540666115, "grad_norm": 0.46368691325187683, "learning_rate": 8.90380269929939e-05, "loss": 0.9511, "step": 2668 }, { "epoch": 0.23847923693792303, "grad_norm": 0.5165659189224243, "learning_rate": 8.90289836148954e-05, "loss": 0.9759, "step": 2669 }, { "epoch": 0.23856858846918488, "grad_norm": 0.42744940519332886, "learning_rate": 8.90199369677035e-05, "loss": 0.9876, "step": 2670 }, { "epoch": 0.23865794000044677, "grad_norm": 0.41597381234169006, "learning_rate": 8.901088705217598e-05, "loss": 1.0021, "step": 2671 }, { "epoch": 0.23874729153170862, "grad_norm": 0.3914920687675476, "learning_rate": 8.900183386907082e-05, "loss": 1.1311, "step": 2672 }, { "epoch": 0.2388366430629705, "grad_norm": 0.49854883551597595, "learning_rate": 8.899277741914633e-05, "loss": 0.9727, "step": 2673 }, { "epoch": 0.23892599459423236, "grad_norm": 0.4740012586116791, "learning_rate": 8.898371770316111e-05, "loss": 0.9915, "step": 2674 }, { "epoch": 0.23901534612549422, "grad_norm": 0.4568859040737152, "learning_rate": 8.897465472187401e-05, "loss": 0.981, "step": 2675 }, { "epoch": 0.2391046976567561, "grad_norm": 0.4102030098438263, "learning_rate": 8.896558847604414e-05, "loss": 0.9928, "step": 2676 }, { "epoch": 0.23919404918801795, "grad_norm": 0.50264573097229, "learning_rate": 8.895651896643088e-05, "loss": 0.9527, "step": 2677 }, { "epoch": 0.23928340071927984, "grad_norm": 0.4080445170402527, "learning_rate": 8.894744619379391e-05, "loss": 1.1131, "step": 2678 }, { "epoch": 0.2393727522505417, "grad_norm": 0.45948582887649536, "learning_rate": 8.893837015889317e-05, "loss": 0.9385, "step": 2679 }, { "epoch": 0.23946210378180355, "grad_norm": 0.4374150335788727, "learning_rate": 8.892929086248888e-05, "loss": 0.9693, "step": 2680 }, { "epoch": 0.23955145531306543, "grad_norm": 0.4575091302394867, "learning_rate": 8.892020830534152e-05, "loss": 0.9821, "step": 2681 }, { "epoch": 0.2396408068443273, "grad_norm": 0.39326122403144836, "learning_rate": 8.891112248821186e-05, "loss": 1.0787, "step": 2682 }, { "epoch": 0.23973015837558917, "grad_norm": 0.4379619061946869, "learning_rate": 8.890203341186092e-05, "loss": 1.0188, "step": 2683 }, { "epoch": 0.23981950990685102, "grad_norm": 0.47353631258010864, "learning_rate": 8.889294107705002e-05, "loss": 1.0197, "step": 2684 }, { "epoch": 0.2399088614381129, "grad_norm": 0.4937237799167633, "learning_rate": 8.888384548454075e-05, "loss": 0.9871, "step": 2685 }, { "epoch": 0.23999821296937476, "grad_norm": 0.39890146255493164, "learning_rate": 8.887474663509493e-05, "loss": 1.0274, "step": 2686 }, { "epoch": 0.24008756450063662, "grad_norm": 0.3992265462875366, "learning_rate": 8.886564452947471e-05, "loss": 1.0258, "step": 2687 }, { "epoch": 0.2401769160318985, "grad_norm": 0.4458499550819397, "learning_rate": 8.885653916844248e-05, "loss": 0.9608, "step": 2688 }, { "epoch": 0.24026626756316036, "grad_norm": 0.39621952176094055, "learning_rate": 8.884743055276092e-05, "loss": 0.9997, "step": 2689 }, { "epoch": 0.24035561909442224, "grad_norm": 0.44387656450271606, "learning_rate": 8.883831868319297e-05, "loss": 1.0045, "step": 2690 }, { "epoch": 0.2404449706256841, "grad_norm": 0.38763174414634705, "learning_rate": 8.882920356050184e-05, "loss": 1.0502, "step": 2691 }, { "epoch": 0.24053432215694595, "grad_norm": 0.4442698657512665, "learning_rate": 8.882008518545101e-05, "loss": 0.9964, "step": 2692 }, { "epoch": 0.24062367368820783, "grad_norm": 0.39915651082992554, "learning_rate": 8.881096355880428e-05, "loss": 1.0732, "step": 2693 }, { "epoch": 0.2407130252194697, "grad_norm": 0.458362877368927, "learning_rate": 8.880183868132563e-05, "loss": 1.0264, "step": 2694 }, { "epoch": 0.24080237675073157, "grad_norm": 0.4793623685836792, "learning_rate": 8.87927105537794e-05, "loss": 0.931, "step": 2695 }, { "epoch": 0.24089172828199343, "grad_norm": 0.4859834909439087, "learning_rate": 8.878357917693016e-05, "loss": 0.9891, "step": 2696 }, { "epoch": 0.2409810798132553, "grad_norm": 0.4228796362876892, "learning_rate": 8.877444455154278e-05, "loss": 1.0693, "step": 2697 }, { "epoch": 0.24107043134451717, "grad_norm": 0.37542724609375, "learning_rate": 8.876530667838234e-05, "loss": 1.0717, "step": 2698 }, { "epoch": 0.24115978287577902, "grad_norm": 0.44685491919517517, "learning_rate": 8.875616555821426e-05, "loss": 1.0261, "step": 2699 }, { "epoch": 0.2412491344070409, "grad_norm": 0.37416496872901917, "learning_rate": 8.874702119180421e-05, "loss": 1.1008, "step": 2700 }, { "epoch": 0.24133848593830276, "grad_norm": 0.3862450420856476, "learning_rate": 8.873787357991812e-05, "loss": 1.0433, "step": 2701 }, { "epoch": 0.24142783746956464, "grad_norm": 0.4019640386104584, "learning_rate": 8.87287227233222e-05, "loss": 1.0231, "step": 2702 }, { "epoch": 0.2415171890008265, "grad_norm": 0.4376417398452759, "learning_rate": 8.871956862278294e-05, "loss": 1.0422, "step": 2703 }, { "epoch": 0.24160654053208838, "grad_norm": 0.39731988310813904, "learning_rate": 8.871041127906707e-05, "loss": 1.0429, "step": 2704 }, { "epoch": 0.24169589206335024, "grad_norm": 0.44788575172424316, "learning_rate": 8.870125069294166e-05, "loss": 1.0384, "step": 2705 }, { "epoch": 0.2417852435946121, "grad_norm": 0.43335989117622375, "learning_rate": 8.869208686517395e-05, "loss": 1.0378, "step": 2706 }, { "epoch": 0.24187459512587398, "grad_norm": 0.4897671639919281, "learning_rate": 8.868291979653154e-05, "loss": 0.9621, "step": 2707 }, { "epoch": 0.24196394665713583, "grad_norm": 0.4613479971885681, "learning_rate": 8.867374948778228e-05, "loss": 0.9983, "step": 2708 }, { "epoch": 0.24205329818839771, "grad_norm": 0.5041788220405579, "learning_rate": 8.866457593969427e-05, "loss": 1.0287, "step": 2709 }, { "epoch": 0.24214264971965957, "grad_norm": 0.45073777437210083, "learning_rate": 8.865539915303588e-05, "loss": 0.9705, "step": 2710 }, { "epoch": 0.24223200125092142, "grad_norm": 0.39561158418655396, "learning_rate": 8.864621912857578e-05, "loss": 1.0513, "step": 2711 }, { "epoch": 0.2423213527821833, "grad_norm": 0.40363892912864685, "learning_rate": 8.86370358670829e-05, "loss": 0.9696, "step": 2712 }, { "epoch": 0.24241070431344516, "grad_norm": 0.4564259648323059, "learning_rate": 8.86278493693264e-05, "loss": 1.0501, "step": 2713 }, { "epoch": 0.24250005584470705, "grad_norm": 0.42725691199302673, "learning_rate": 8.861865963607578e-05, "loss": 0.9827, "step": 2714 }, { "epoch": 0.2425894073759689, "grad_norm": 0.4821705222129822, "learning_rate": 8.860946666810079e-05, "loss": 0.952, "step": 2715 }, { "epoch": 0.24267875890723078, "grad_norm": 0.42694029211997986, "learning_rate": 8.86002704661714e-05, "loss": 0.9642, "step": 2716 }, { "epoch": 0.24276811043849264, "grad_norm": 0.45525282621383667, "learning_rate": 8.85910710310579e-05, "loss": 0.9894, "step": 2717 }, { "epoch": 0.2428574619697545, "grad_norm": 0.4108658730983734, "learning_rate": 8.858186836353087e-05, "loss": 0.966, "step": 2718 }, { "epoch": 0.24294681350101638, "grad_norm": 0.40456750988960266, "learning_rate": 8.857266246436111e-05, "loss": 1.0883, "step": 2719 }, { "epoch": 0.24303616503227823, "grad_norm": 0.4102852940559387, "learning_rate": 8.856345333431971e-05, "loss": 1.0069, "step": 2720 }, { "epoch": 0.24312551656354012, "grad_norm": 0.40076547861099243, "learning_rate": 8.855424097417802e-05, "loss": 1.0423, "step": 2721 }, { "epoch": 0.24321486809480197, "grad_norm": 0.3830002248287201, "learning_rate": 8.854502538470771e-05, "loss": 1.003, "step": 2722 }, { "epoch": 0.24330421962606383, "grad_norm": 0.4300539493560791, "learning_rate": 8.853580656668065e-05, "loss": 1.0192, "step": 2723 }, { "epoch": 0.2433935711573257, "grad_norm": 0.3924980163574219, "learning_rate": 8.852658452086904e-05, "loss": 1.021, "step": 2724 }, { "epoch": 0.24348292268858757, "grad_norm": 0.3826366364955902, "learning_rate": 8.851735924804531e-05, "loss": 1.0014, "step": 2725 }, { "epoch": 0.24357227421984945, "grad_norm": 0.48502808809280396, "learning_rate": 8.850813074898217e-05, "loss": 1.0423, "step": 2726 }, { "epoch": 0.2436616257511113, "grad_norm": 0.40080320835113525, "learning_rate": 8.849889902445263e-05, "loss": 1.0273, "step": 2727 }, { "epoch": 0.2437509772823732, "grad_norm": 0.4349658489227295, "learning_rate": 8.848966407522992e-05, "loss": 1.0189, "step": 2728 }, { "epoch": 0.24384032881363504, "grad_norm": 0.4866269826889038, "learning_rate": 8.848042590208756e-05, "loss": 0.9429, "step": 2729 }, { "epoch": 0.2439296803448969, "grad_norm": 0.38999444246292114, "learning_rate": 8.847118450579937e-05, "loss": 0.964, "step": 2730 }, { "epoch": 0.24401903187615878, "grad_norm": 0.39741405844688416, "learning_rate": 8.84619398871394e-05, "loss": 1.038, "step": 2731 }, { "epoch": 0.24410838340742064, "grad_norm": 0.42767640948295593, "learning_rate": 8.845269204688199e-05, "loss": 1.041, "step": 2732 }, { "epoch": 0.24419773493868252, "grad_norm": 0.42031916975975037, "learning_rate": 8.844344098580176e-05, "loss": 0.9304, "step": 2733 }, { "epoch": 0.24428708646994438, "grad_norm": 0.34102049469947815, "learning_rate": 8.843418670467353e-05, "loss": 1.021, "step": 2734 }, { "epoch": 0.24437643800120626, "grad_norm": 0.387407511472702, "learning_rate": 8.842492920427252e-05, "loss": 1.0539, "step": 2735 }, { "epoch": 0.24446578953246811, "grad_norm": 0.373221218585968, "learning_rate": 8.84156684853741e-05, "loss": 1.0037, "step": 2736 }, { "epoch": 0.24455514106372997, "grad_norm": 0.3756154477596283, "learning_rate": 8.840640454875396e-05, "loss": 1.0619, "step": 2737 }, { "epoch": 0.24464449259499185, "grad_norm": 0.47706297039985657, "learning_rate": 8.839713739518807e-05, "loss": 0.8904, "step": 2738 }, { "epoch": 0.2447338441262537, "grad_norm": 0.49002113938331604, "learning_rate": 8.838786702545262e-05, "loss": 1.0568, "step": 2739 }, { "epoch": 0.2448231956575156, "grad_norm": 0.4314875304698944, "learning_rate": 8.837859344032413e-05, "loss": 0.9789, "step": 2740 }, { "epoch": 0.24491254718877745, "grad_norm": 0.4264450967311859, "learning_rate": 8.836931664057935e-05, "loss": 0.9799, "step": 2741 }, { "epoch": 0.2450018987200393, "grad_norm": 0.4409767985343933, "learning_rate": 8.836003662699533e-05, "loss": 0.9571, "step": 2742 }, { "epoch": 0.24509125025130118, "grad_norm": 0.47590428590774536, "learning_rate": 8.835075340034933e-05, "loss": 0.9076, "step": 2743 }, { "epoch": 0.24518060178256304, "grad_norm": 0.4312697947025299, "learning_rate": 8.834146696141895e-05, "loss": 0.9393, "step": 2744 }, { "epoch": 0.24526995331382492, "grad_norm": 0.42560774087905884, "learning_rate": 8.833217731098203e-05, "loss": 1.0549, "step": 2745 }, { "epoch": 0.24535930484508678, "grad_norm": 0.44607481360435486, "learning_rate": 8.832288444981666e-05, "loss": 0.9967, "step": 2746 }, { "epoch": 0.24544865637634866, "grad_norm": 0.49672216176986694, "learning_rate": 8.831358837870122e-05, "loss": 1.0662, "step": 2747 }, { "epoch": 0.24553800790761052, "grad_norm": 0.44877761602401733, "learning_rate": 8.830428909841437e-05, "loss": 1.0782, "step": 2748 }, { "epoch": 0.24562735943887237, "grad_norm": 0.5102913975715637, "learning_rate": 8.829498660973501e-05, "loss": 0.9089, "step": 2749 }, { "epoch": 0.24571671097013426, "grad_norm": 0.4264692962169647, "learning_rate": 8.828568091344234e-05, "loss": 1.003, "step": 2750 }, { "epoch": 0.2458060625013961, "grad_norm": 0.45728904008865356, "learning_rate": 8.827637201031577e-05, "loss": 0.9578, "step": 2751 }, { "epoch": 0.245895414032658, "grad_norm": 0.4567210078239441, "learning_rate": 8.826705990113506e-05, "loss": 1.0127, "step": 2752 }, { "epoch": 0.24598476556391985, "grad_norm": 0.4181562662124634, "learning_rate": 8.825774458668019e-05, "loss": 1.0222, "step": 2753 }, { "epoch": 0.24607411709518173, "grad_norm": 0.415363609790802, "learning_rate": 8.824842606773142e-05, "loss": 0.9997, "step": 2754 }, { "epoch": 0.2461634686264436, "grad_norm": 0.43965113162994385, "learning_rate": 8.823910434506925e-05, "loss": 1.0537, "step": 2755 }, { "epoch": 0.24625282015770544, "grad_norm": 0.36847561597824097, "learning_rate": 8.82297794194745e-05, "loss": 1.0743, "step": 2756 }, { "epoch": 0.24634217168896733, "grad_norm": 0.5313237309455872, "learning_rate": 8.822045129172822e-05, "loss": 1.0059, "step": 2757 }, { "epoch": 0.24643152322022918, "grad_norm": 0.41933882236480713, "learning_rate": 8.821111996261176e-05, "loss": 0.9882, "step": 2758 }, { "epoch": 0.24652087475149106, "grad_norm": 0.43498140573501587, "learning_rate": 8.820178543290668e-05, "loss": 1.0393, "step": 2759 }, { "epoch": 0.24661022628275292, "grad_norm": 0.5251235365867615, "learning_rate": 8.819244770339488e-05, "loss": 1.0354, "step": 2760 }, { "epoch": 0.24669957781401478, "grad_norm": 0.4627592861652374, "learning_rate": 8.818310677485848e-05, "loss": 0.9657, "step": 2761 }, { "epoch": 0.24678892934527666, "grad_norm": 0.4479570984840393, "learning_rate": 8.817376264807989e-05, "loss": 0.9823, "step": 2762 }, { "epoch": 0.24687828087653851, "grad_norm": 0.40790751576423645, "learning_rate": 8.816441532384177e-05, "loss": 1.0679, "step": 2763 }, { "epoch": 0.2469676324078004, "grad_norm": 0.5192294120788574, "learning_rate": 8.815506480292706e-05, "loss": 1.0415, "step": 2764 }, { "epoch": 0.24705698393906225, "grad_norm": 0.5589134097099304, "learning_rate": 8.814571108611896e-05, "loss": 0.9845, "step": 2765 }, { "epoch": 0.24714633547032414, "grad_norm": 0.38889801502227783, "learning_rate": 8.813635417420096e-05, "loss": 1.066, "step": 2766 }, { "epoch": 0.247235687001586, "grad_norm": 0.41053298115730286, "learning_rate": 8.812699406795682e-05, "loss": 0.99, "step": 2767 }, { "epoch": 0.24732503853284785, "grad_norm": 0.4181362986564636, "learning_rate": 8.81176307681705e-05, "loss": 0.9906, "step": 2768 }, { "epoch": 0.24741439006410973, "grad_norm": 0.42989280819892883, "learning_rate": 8.810826427562629e-05, "loss": 0.9261, "step": 2769 }, { "epoch": 0.24750374159537158, "grad_norm": 0.3895682096481323, "learning_rate": 8.809889459110875e-05, "loss": 0.9863, "step": 2770 }, { "epoch": 0.24759309312663347, "grad_norm": 0.4124111235141754, "learning_rate": 8.808952171540268e-05, "loss": 0.9897, "step": 2771 }, { "epoch": 0.24768244465789532, "grad_norm": 0.4256143271923065, "learning_rate": 8.808014564929316e-05, "loss": 0.9599, "step": 2772 }, { "epoch": 0.24777179618915718, "grad_norm": 0.4850587248802185, "learning_rate": 8.807076639356556e-05, "loss": 0.9145, "step": 2773 }, { "epoch": 0.24786114772041906, "grad_norm": 0.46698129177093506, "learning_rate": 8.806138394900544e-05, "loss": 1.0141, "step": 2774 }, { "epoch": 0.24795049925168092, "grad_norm": 0.4754186272621155, "learning_rate": 8.805199831639872e-05, "loss": 1.0272, "step": 2775 }, { "epoch": 0.2480398507829428, "grad_norm": 0.4320179224014282, "learning_rate": 8.804260949653154e-05, "loss": 0.9598, "step": 2776 }, { "epoch": 0.24812920231420466, "grad_norm": 0.5230510830879211, "learning_rate": 8.80332174901903e-05, "loss": 0.9854, "step": 2777 }, { "epoch": 0.24821855384546654, "grad_norm": 0.4366409182548523, "learning_rate": 8.80238222981617e-05, "loss": 1.0322, "step": 2778 }, { "epoch": 0.2483079053767284, "grad_norm": 0.46669307351112366, "learning_rate": 8.801442392123267e-05, "loss": 1.0078, "step": 2779 }, { "epoch": 0.24839725690799025, "grad_norm": 0.570253849029541, "learning_rate": 8.800502236019044e-05, "loss": 1.0106, "step": 2780 }, { "epoch": 0.24848660843925213, "grad_norm": 0.4147552251815796, "learning_rate": 8.799561761582247e-05, "loss": 1.0206, "step": 2781 }, { "epoch": 0.248575959970514, "grad_norm": 0.39246666431427, "learning_rate": 8.798620968891653e-05, "loss": 1.0486, "step": 2782 }, { "epoch": 0.24866531150177587, "grad_norm": 0.4565700590610504, "learning_rate": 8.797679858026062e-05, "loss": 0.9924, "step": 2783 }, { "epoch": 0.24875466303303773, "grad_norm": 0.38775768876075745, "learning_rate": 8.796738429064303e-05, "loss": 1.0482, "step": 2784 }, { "epoch": 0.2488440145642996, "grad_norm": 0.4036676287651062, "learning_rate": 8.795796682085231e-05, "loss": 1.0342, "step": 2785 }, { "epoch": 0.24893336609556146, "grad_norm": 0.5216470956802368, "learning_rate": 8.794854617167725e-05, "loss": 1.0855, "step": 2786 }, { "epoch": 0.24902271762682332, "grad_norm": 0.4413949251174927, "learning_rate": 8.793912234390695e-05, "loss": 0.9762, "step": 2787 }, { "epoch": 0.2491120691580852, "grad_norm": 0.4308202862739563, "learning_rate": 8.792969533833076e-05, "loss": 0.9775, "step": 2788 }, { "epoch": 0.24920142068934706, "grad_norm": 0.5357450246810913, "learning_rate": 8.792026515573828e-05, "loss": 1.0041, "step": 2789 }, { "epoch": 0.24929077222060894, "grad_norm": 0.35964709520339966, "learning_rate": 8.791083179691939e-05, "loss": 1.012, "step": 2790 }, { "epoch": 0.2493801237518708, "grad_norm": 0.535650908946991, "learning_rate": 8.790139526266423e-05, "loss": 1.0273, "step": 2791 }, { "epoch": 0.24946947528313265, "grad_norm": 0.4403007924556732, "learning_rate": 8.789195555376323e-05, "loss": 0.9549, "step": 2792 }, { "epoch": 0.24955882681439454, "grad_norm": 0.3707777261734009, "learning_rate": 8.788251267100704e-05, "loss": 1.0049, "step": 2793 }, { "epoch": 0.2496481783456564, "grad_norm": 0.4524572491645813, "learning_rate": 8.787306661518662e-05, "loss": 1.0156, "step": 2794 }, { "epoch": 0.24973752987691827, "grad_norm": 0.4172338545322418, "learning_rate": 8.786361738709319e-05, "loss": 0.9847, "step": 2795 }, { "epoch": 0.24982688140818013, "grad_norm": 0.40239232778549194, "learning_rate": 8.78541649875182e-05, "loss": 1.0954, "step": 2796 }, { "epoch": 0.249916232939442, "grad_norm": 0.44025593996047974, "learning_rate": 8.784470941725338e-05, "loss": 1.0388, "step": 2797 }, { "epoch": 0.25000558447070387, "grad_norm": 0.3682102859020233, "learning_rate": 8.783525067709075e-05, "loss": 1.0017, "step": 2798 }, { "epoch": 0.25009493600196575, "grad_norm": 0.5301668643951416, "learning_rate": 8.782578876782259e-05, "loss": 0.8846, "step": 2799 }, { "epoch": 0.2501842875332276, "grad_norm": 0.5186197757720947, "learning_rate": 8.781632369024141e-05, "loss": 0.9255, "step": 2800 }, { "epoch": 0.25027363906448946, "grad_norm": 0.4258555769920349, "learning_rate": 8.780685544514006e-05, "loss": 0.9937, "step": 2801 }, { "epoch": 0.25036299059575134, "grad_norm": 0.44377055764198303, "learning_rate": 8.779738403331157e-05, "loss": 1.0151, "step": 2802 }, { "epoch": 0.2504523421270132, "grad_norm": 0.3976726830005646, "learning_rate": 8.778790945554926e-05, "loss": 1.0148, "step": 2803 }, { "epoch": 0.25054169365827506, "grad_norm": 0.34984302520751953, "learning_rate": 8.777843171264675e-05, "loss": 1.0027, "step": 2804 }, { "epoch": 0.25063104518953694, "grad_norm": 0.36528366804122925, "learning_rate": 8.776895080539789e-05, "loss": 1.0559, "step": 2805 }, { "epoch": 0.2507203967207988, "grad_norm": 0.48383477330207825, "learning_rate": 8.775946673459681e-05, "loss": 1.0472, "step": 2806 }, { "epoch": 0.25080974825206065, "grad_norm": 0.3872027099132538, "learning_rate": 8.774997950103791e-05, "loss": 1.0441, "step": 2807 }, { "epoch": 0.25089909978332253, "grad_norm": 0.45994165539741516, "learning_rate": 8.774048910551584e-05, "loss": 0.9451, "step": 2808 }, { "epoch": 0.2509884513145844, "grad_norm": 0.39448827505111694, "learning_rate": 8.773099554882552e-05, "loss": 1.0241, "step": 2809 }, { "epoch": 0.25107780284584624, "grad_norm": 0.39023977518081665, "learning_rate": 8.772149883176215e-05, "loss": 0.9925, "step": 2810 }, { "epoch": 0.2511671543771081, "grad_norm": 0.49682340025901794, "learning_rate": 8.771199895512115e-05, "loss": 0.9678, "step": 2811 }, { "epoch": 0.25125650590837, "grad_norm": 0.5279167890548706, "learning_rate": 8.770249591969829e-05, "loss": 0.9971, "step": 2812 }, { "epoch": 0.2513458574396319, "grad_norm": 0.46056196093559265, "learning_rate": 8.769298972628948e-05, "loss": 1.0181, "step": 2813 }, { "epoch": 0.2514352089708937, "grad_norm": 0.40414974093437195, "learning_rate": 8.768348037569102e-05, "loss": 0.9936, "step": 2814 }, { "epoch": 0.2515245605021556, "grad_norm": 0.45471900701522827, "learning_rate": 8.76739678686994e-05, "loss": 1.0091, "step": 2815 }, { "epoch": 0.2516139120334175, "grad_norm": 0.4152718186378479, "learning_rate": 8.766445220611139e-05, "loss": 1.0165, "step": 2816 }, { "epoch": 0.2517032635646793, "grad_norm": 0.3915504515171051, "learning_rate": 8.765493338872403e-05, "loss": 1.028, "step": 2817 }, { "epoch": 0.2517926150959412, "grad_norm": 0.4392106533050537, "learning_rate": 8.764541141733464e-05, "loss": 0.9371, "step": 2818 }, { "epoch": 0.2518819666272031, "grad_norm": 0.39547622203826904, "learning_rate": 8.763588629274077e-05, "loss": 1.018, "step": 2819 }, { "epoch": 0.25197131815846496, "grad_norm": 0.35008326172828674, "learning_rate": 8.762635801574025e-05, "loss": 0.9658, "step": 2820 }, { "epoch": 0.2520606696897268, "grad_norm": 0.4563378393650055, "learning_rate": 8.761682658713119e-05, "loss": 0.966, "step": 2821 }, { "epoch": 0.2521500212209887, "grad_norm": 0.3921094536781311, "learning_rate": 8.760729200771192e-05, "loss": 1.0282, "step": 2822 }, { "epoch": 0.25223937275225056, "grad_norm": 0.4152204990386963, "learning_rate": 8.759775427828108e-05, "loss": 1.0107, "step": 2823 }, { "epoch": 0.2523287242835124, "grad_norm": 0.47061243653297424, "learning_rate": 8.758821339963756e-05, "loss": 1.0095, "step": 2824 }, { "epoch": 0.25241807581477427, "grad_norm": 0.49046018719673157, "learning_rate": 8.75786693725805e-05, "loss": 1.0251, "step": 2825 }, { "epoch": 0.25250742734603615, "grad_norm": 0.43877169489860535, "learning_rate": 8.756912219790933e-05, "loss": 0.9794, "step": 2826 }, { "epoch": 0.25259677887729803, "grad_norm": 0.4174705445766449, "learning_rate": 8.755957187642372e-05, "loss": 0.9845, "step": 2827 }, { "epoch": 0.25268613040855986, "grad_norm": 0.3626020550727844, "learning_rate": 8.755001840892361e-05, "loss": 1.0018, "step": 2828 }, { "epoch": 0.25277548193982174, "grad_norm": 0.48767632246017456, "learning_rate": 8.754046179620919e-05, "loss": 1.0055, "step": 2829 }, { "epoch": 0.2528648334710836, "grad_norm": 0.4239792823791504, "learning_rate": 8.753090203908095e-05, "loss": 0.9721, "step": 2830 }, { "epoch": 0.25295418500234546, "grad_norm": 0.4804183542728424, "learning_rate": 8.752133913833962e-05, "loss": 0.9585, "step": 2831 }, { "epoch": 0.25304353653360734, "grad_norm": 0.45902127027511597, "learning_rate": 8.751177309478618e-05, "loss": 1.0059, "step": 2832 }, { "epoch": 0.2531328880648692, "grad_norm": 0.473166823387146, "learning_rate": 8.750220390922188e-05, "loss": 0.9744, "step": 2833 }, { "epoch": 0.2532222395961311, "grad_norm": 0.475212037563324, "learning_rate": 8.74926315824483e-05, "loss": 1.0587, "step": 2834 }, { "epoch": 0.25331159112739293, "grad_norm": 0.4180943965911865, "learning_rate": 8.748305611526715e-05, "loss": 0.9937, "step": 2835 }, { "epoch": 0.2534009426586548, "grad_norm": 0.4551735520362854, "learning_rate": 8.747347750848052e-05, "loss": 0.968, "step": 2836 }, { "epoch": 0.2534902941899167, "grad_norm": 0.3868304491043091, "learning_rate": 8.74638957628907e-05, "loss": 0.9912, "step": 2837 }, { "epoch": 0.2535796457211785, "grad_norm": 0.43097737431526184, "learning_rate": 8.745431087930028e-05, "loss": 1.0214, "step": 2838 }, { "epoch": 0.2536689972524404, "grad_norm": 0.37458547949790955, "learning_rate": 8.74447228585121e-05, "loss": 1.017, "step": 2839 }, { "epoch": 0.2537583487837023, "grad_norm": 0.5047030448913574, "learning_rate": 8.743513170132924e-05, "loss": 0.9561, "step": 2840 }, { "epoch": 0.2538477003149642, "grad_norm": 0.3879890441894531, "learning_rate": 8.742553740855506e-05, "loss": 1.0405, "step": 2841 }, { "epoch": 0.253937051846226, "grad_norm": 0.44273656606674194, "learning_rate": 8.74159399809932e-05, "loss": 1.0599, "step": 2842 }, { "epoch": 0.2540264033774879, "grad_norm": 0.4389507472515106, "learning_rate": 8.740633941944754e-05, "loss": 1.0179, "step": 2843 }, { "epoch": 0.25411575490874977, "grad_norm": 0.44529905915260315, "learning_rate": 8.739673572472225e-05, "loss": 1.0689, "step": 2844 }, { "epoch": 0.2542051064400116, "grad_norm": 0.4119149148464203, "learning_rate": 8.738712889762171e-05, "loss": 0.9636, "step": 2845 }, { "epoch": 0.2542944579712735, "grad_norm": 0.4760536551475525, "learning_rate": 8.73775189389506e-05, "loss": 0.963, "step": 2846 }, { "epoch": 0.25438380950253536, "grad_norm": 0.4643342196941376, "learning_rate": 8.736790584951387e-05, "loss": 0.9906, "step": 2847 }, { "epoch": 0.2544731610337972, "grad_norm": 0.402329683303833, "learning_rate": 8.735828963011671e-05, "loss": 1.0063, "step": 2848 }, { "epoch": 0.2545625125650591, "grad_norm": 0.41884222626686096, "learning_rate": 8.734867028156458e-05, "loss": 1.0401, "step": 2849 }, { "epoch": 0.25465186409632096, "grad_norm": 0.3897888660430908, "learning_rate": 8.733904780466321e-05, "loss": 1.057, "step": 2850 }, { "epoch": 0.25474121562758284, "grad_norm": 0.4296242296695709, "learning_rate": 8.732942220021858e-05, "loss": 0.9684, "step": 2851 }, { "epoch": 0.25483056715884467, "grad_norm": 0.47800928354263306, "learning_rate": 8.731979346903693e-05, "loss": 1.0239, "step": 2852 }, { "epoch": 0.25491991869010655, "grad_norm": 0.4393937587738037, "learning_rate": 8.731016161192479e-05, "loss": 1.0716, "step": 2853 }, { "epoch": 0.25500927022136843, "grad_norm": 0.46051809191703796, "learning_rate": 8.730052662968891e-05, "loss": 1.0035, "step": 2854 }, { "epoch": 0.25509862175263026, "grad_norm": 0.3697504997253418, "learning_rate": 8.729088852313633e-05, "loss": 0.9667, "step": 2855 }, { "epoch": 0.25518797328389214, "grad_norm": 0.4968481659889221, "learning_rate": 8.728124729307434e-05, "loss": 0.9228, "step": 2856 }, { "epoch": 0.255277324815154, "grad_norm": 0.46425577998161316, "learning_rate": 8.727160294031051e-05, "loss": 1.0125, "step": 2857 }, { "epoch": 0.2553666763464159, "grad_norm": 0.40039297938346863, "learning_rate": 8.726195546565263e-05, "loss": 1.0081, "step": 2858 }, { "epoch": 0.25545602787767774, "grad_norm": 0.48892778158187866, "learning_rate": 8.725230486990882e-05, "loss": 0.9115, "step": 2859 }, { "epoch": 0.2555453794089396, "grad_norm": 0.3827580511569977, "learning_rate": 8.724265115388739e-05, "loss": 1.0488, "step": 2860 }, { "epoch": 0.2556347309402015, "grad_norm": 0.45204082131385803, "learning_rate": 8.723299431839693e-05, "loss": 1.0264, "step": 2861 }, { "epoch": 0.25572408247146333, "grad_norm": 0.46111056208610535, "learning_rate": 8.722333436424633e-05, "loss": 0.9979, "step": 2862 }, { "epoch": 0.2558134340027252, "grad_norm": 0.40219980478286743, "learning_rate": 8.721367129224471e-05, "loss": 1.0384, "step": 2863 }, { "epoch": 0.2559027855339871, "grad_norm": 0.37858057022094727, "learning_rate": 8.720400510320146e-05, "loss": 1.0113, "step": 2864 }, { "epoch": 0.255992137065249, "grad_norm": 0.4286115765571594, "learning_rate": 8.71943357979262e-05, "loss": 0.9691, "step": 2865 }, { "epoch": 0.2560814885965108, "grad_norm": 0.4207289218902588, "learning_rate": 8.718466337722885e-05, "loss": 0.9316, "step": 2866 }, { "epoch": 0.2561708401277727, "grad_norm": 0.4365525245666504, "learning_rate": 8.717498784191958e-05, "loss": 0.99, "step": 2867 }, { "epoch": 0.2562601916590346, "grad_norm": 0.4580113887786865, "learning_rate": 8.716530919280883e-05, "loss": 0.9109, "step": 2868 }, { "epoch": 0.2563495431902964, "grad_norm": 0.4263405203819275, "learning_rate": 8.715562743070729e-05, "loss": 0.9391, "step": 2869 }, { "epoch": 0.2564388947215583, "grad_norm": 0.4391098916530609, "learning_rate": 8.71459425564259e-05, "loss": 1.0104, "step": 2870 }, { "epoch": 0.25652824625282017, "grad_norm": 0.45276468992233276, "learning_rate": 8.713625457077585e-05, "loss": 1.1036, "step": 2871 }, { "epoch": 0.25661759778408205, "grad_norm": 0.45625758171081543, "learning_rate": 8.712656347456867e-05, "loss": 0.9503, "step": 2872 }, { "epoch": 0.2567069493153439, "grad_norm": 0.40368926525115967, "learning_rate": 8.711686926861604e-05, "loss": 1.0154, "step": 2873 }, { "epoch": 0.25679630084660576, "grad_norm": 0.43693703413009644, "learning_rate": 8.710717195372997e-05, "loss": 1.0454, "step": 2874 }, { "epoch": 0.25688565237786765, "grad_norm": 0.38676273822784424, "learning_rate": 8.709747153072272e-05, "loss": 1.0724, "step": 2875 }, { "epoch": 0.2569750039091295, "grad_norm": 0.441709041595459, "learning_rate": 8.708776800040679e-05, "loss": 0.9994, "step": 2876 }, { "epoch": 0.25706435544039136, "grad_norm": 0.40171146392822266, "learning_rate": 8.707806136359497e-05, "loss": 1.0599, "step": 2877 }, { "epoch": 0.25715370697165324, "grad_norm": 0.40110501646995544, "learning_rate": 8.706835162110028e-05, "loss": 1.015, "step": 2878 }, { "epoch": 0.25724305850291507, "grad_norm": 0.4818125069141388, "learning_rate": 8.705863877373603e-05, "loss": 1.0023, "step": 2879 }, { "epoch": 0.25733241003417695, "grad_norm": 0.3713032901287079, "learning_rate": 8.704892282231575e-05, "loss": 1.0398, "step": 2880 }, { "epoch": 0.25742176156543883, "grad_norm": 0.509074330329895, "learning_rate": 8.70392037676533e-05, "loss": 0.9623, "step": 2881 }, { "epoch": 0.2575111130967007, "grad_norm": 0.4063872694969177, "learning_rate": 8.70294816105627e-05, "loss": 0.9864, "step": 2882 }, { "epoch": 0.25760046462796254, "grad_norm": 0.4200826585292816, "learning_rate": 8.701975635185833e-05, "loss": 0.9905, "step": 2883 }, { "epoch": 0.2576898161592244, "grad_norm": 0.44294077157974243, "learning_rate": 8.701002799235475e-05, "loss": 0.9429, "step": 2884 }, { "epoch": 0.2577791676904863, "grad_norm": 0.3688490092754364, "learning_rate": 8.700029653286684e-05, "loss": 1.0168, "step": 2885 }, { "epoch": 0.25786851922174814, "grad_norm": 0.381213903427124, "learning_rate": 8.699056197420967e-05, "loss": 1.0477, "step": 2886 }, { "epoch": 0.25795787075301, "grad_norm": 0.4321887195110321, "learning_rate": 8.698082431719867e-05, "loss": 0.9478, "step": 2887 }, { "epoch": 0.2580472222842719, "grad_norm": 0.39009973406791687, "learning_rate": 8.697108356264944e-05, "loss": 0.9456, "step": 2888 }, { "epoch": 0.2581365738155338, "grad_norm": 0.39922964572906494, "learning_rate": 8.696133971137788e-05, "loss": 1.0529, "step": 2889 }, { "epoch": 0.2582259253467956, "grad_norm": 0.45770078897476196, "learning_rate": 8.695159276420013e-05, "loss": 0.9618, "step": 2890 }, { "epoch": 0.2583152768780575, "grad_norm": 0.4154913127422333, "learning_rate": 8.694184272193262e-05, "loss": 1.016, "step": 2891 }, { "epoch": 0.2584046284093194, "grad_norm": 0.3861916661262512, "learning_rate": 8.6932089585392e-05, "loss": 1.0038, "step": 2892 }, { "epoch": 0.2584939799405812, "grad_norm": 0.39484497904777527, "learning_rate": 8.692233335539521e-05, "loss": 0.9882, "step": 2893 }, { "epoch": 0.2585833314718431, "grad_norm": 0.36382368206977844, "learning_rate": 8.691257403275945e-05, "loss": 1.087, "step": 2894 }, { "epoch": 0.258672683003105, "grad_norm": 0.42892515659332275, "learning_rate": 8.690281161830216e-05, "loss": 1.1341, "step": 2895 }, { "epoch": 0.25876203453436686, "grad_norm": 0.42802634835243225, "learning_rate": 8.689304611284103e-05, "loss": 0.993, "step": 2896 }, { "epoch": 0.2588513860656287, "grad_norm": 0.6133880019187927, "learning_rate": 8.688327751719403e-05, "loss": 0.9809, "step": 2897 }, { "epoch": 0.25894073759689057, "grad_norm": 0.4578530788421631, "learning_rate": 8.68735058321794e-05, "loss": 0.9413, "step": 2898 }, { "epoch": 0.25903008912815245, "grad_norm": 0.4118945896625519, "learning_rate": 8.68637310586156e-05, "loss": 0.9512, "step": 2899 }, { "epoch": 0.2591194406594143, "grad_norm": 0.4931046962738037, "learning_rate": 8.685395319732141e-05, "loss": 1.0285, "step": 2900 }, { "epoch": 0.25920879219067616, "grad_norm": 0.4491409361362457, "learning_rate": 8.684417224911578e-05, "loss": 1.0044, "step": 2901 }, { "epoch": 0.25929814372193805, "grad_norm": 0.37248972058296204, "learning_rate": 8.683438821481802e-05, "loss": 1.0201, "step": 2902 }, { "epoch": 0.25938749525319993, "grad_norm": 0.46489718556404114, "learning_rate": 8.68246010952476e-05, "loss": 0.9569, "step": 2903 }, { "epoch": 0.25947684678446176, "grad_norm": 0.4184110164642334, "learning_rate": 8.681481089122432e-05, "loss": 0.9632, "step": 2904 }, { "epoch": 0.25956619831572364, "grad_norm": 0.38718414306640625, "learning_rate": 8.68050176035682e-05, "loss": 1.0251, "step": 2905 }, { "epoch": 0.2596555498469855, "grad_norm": 0.3973325490951538, "learning_rate": 8.679522123309956e-05, "loss": 1.0239, "step": 2906 }, { "epoch": 0.25974490137824735, "grad_norm": 0.37302878499031067, "learning_rate": 8.678542178063893e-05, "loss": 1.0898, "step": 2907 }, { "epoch": 0.25983425290950923, "grad_norm": 0.46653488278388977, "learning_rate": 8.677561924700713e-05, "loss": 1.0144, "step": 2908 }, { "epoch": 0.2599236044407711, "grad_norm": 0.43336376547813416, "learning_rate": 8.676581363302518e-05, "loss": 0.9507, "step": 2909 }, { "epoch": 0.26001295597203294, "grad_norm": 0.5737956762313843, "learning_rate": 8.675600493951448e-05, "loss": 0.9631, "step": 2910 }, { "epoch": 0.2601023075032948, "grad_norm": 0.460018128156662, "learning_rate": 8.674619316729657e-05, "loss": 0.9619, "step": 2911 }, { "epoch": 0.2601916590345567, "grad_norm": 0.420095831155777, "learning_rate": 8.673637831719328e-05, "loss": 0.9587, "step": 2912 }, { "epoch": 0.2602810105658186, "grad_norm": 0.46309584379196167, "learning_rate": 8.672656039002674e-05, "loss": 0.9664, "step": 2913 }, { "epoch": 0.2603703620970804, "grad_norm": 0.5068766474723816, "learning_rate": 8.671673938661929e-05, "loss": 1.0086, "step": 2914 }, { "epoch": 0.2604597136283423, "grad_norm": 0.46311527490615845, "learning_rate": 8.670691530779354e-05, "loss": 0.9858, "step": 2915 }, { "epoch": 0.2605490651596042, "grad_norm": 0.4441559612751007, "learning_rate": 8.669708815437237e-05, "loss": 0.9832, "step": 2916 }, { "epoch": 0.260638416690866, "grad_norm": 0.4474044442176819, "learning_rate": 8.668725792717889e-05, "loss": 1.0313, "step": 2917 }, { "epoch": 0.2607277682221279, "grad_norm": 0.5184552669525146, "learning_rate": 8.667742462703649e-05, "loss": 1.0102, "step": 2918 }, { "epoch": 0.2608171197533898, "grad_norm": 0.4744894206523895, "learning_rate": 8.666758825476886e-05, "loss": 0.9695, "step": 2919 }, { "epoch": 0.26090647128465166, "grad_norm": 0.40858200192451477, "learning_rate": 8.665774881119985e-05, "loss": 0.9797, "step": 2920 }, { "epoch": 0.2609958228159135, "grad_norm": 0.394593745470047, "learning_rate": 8.664790629715363e-05, "loss": 1.0465, "step": 2921 }, { "epoch": 0.2610851743471754, "grad_norm": 0.4352075457572937, "learning_rate": 8.663806071345462e-05, "loss": 0.9832, "step": 2922 }, { "epoch": 0.26117452587843726, "grad_norm": 0.4699738323688507, "learning_rate": 8.662821206092748e-05, "loss": 1.064, "step": 2923 }, { "epoch": 0.2612638774096991, "grad_norm": 0.4367484450340271, "learning_rate": 8.661836034039717e-05, "loss": 0.9967, "step": 2924 }, { "epoch": 0.26135322894096097, "grad_norm": 0.3603065609931946, "learning_rate": 8.660850555268886e-05, "loss": 1.0077, "step": 2925 }, { "epoch": 0.26144258047222285, "grad_norm": 0.3636876344680786, "learning_rate": 8.659864769862798e-05, "loss": 1.0128, "step": 2926 }, { "epoch": 0.26153193200348474, "grad_norm": 0.37722858786582947, "learning_rate": 8.658878677904024e-05, "loss": 0.9895, "step": 2927 }, { "epoch": 0.26162128353474656, "grad_norm": 0.4058395028114319, "learning_rate": 8.65789227947516e-05, "loss": 0.965, "step": 2928 }, { "epoch": 0.26171063506600845, "grad_norm": 0.3944026529788971, "learning_rate": 8.656905574658829e-05, "loss": 1.0081, "step": 2929 }, { "epoch": 0.26179998659727033, "grad_norm": 0.5204667448997498, "learning_rate": 8.655918563537675e-05, "loss": 1.0273, "step": 2930 }, { "epoch": 0.26188933812853216, "grad_norm": 0.466569721698761, "learning_rate": 8.654931246194372e-05, "loss": 1.0126, "step": 2931 }, { "epoch": 0.26197868965979404, "grad_norm": 0.49643585085868835, "learning_rate": 8.653943622711618e-05, "loss": 0.9925, "step": 2932 }, { "epoch": 0.2620680411910559, "grad_norm": 0.42305490374565125, "learning_rate": 8.652955693172137e-05, "loss": 0.9461, "step": 2933 }, { "epoch": 0.2621573927223178, "grad_norm": 0.5041871666908264, "learning_rate": 8.65196745765868e-05, "loss": 0.9714, "step": 2934 }, { "epoch": 0.26224674425357963, "grad_norm": 0.4066472351551056, "learning_rate": 8.65097891625402e-05, "loss": 1.0175, "step": 2935 }, { "epoch": 0.2623360957848415, "grad_norm": 0.41952818632125854, "learning_rate": 8.649990069040961e-05, "loss": 0.998, "step": 2936 }, { "epoch": 0.2624254473161034, "grad_norm": 0.44271522760391235, "learning_rate": 8.649000916102325e-05, "loss": 0.9336, "step": 2937 }, { "epoch": 0.2625147988473652, "grad_norm": 0.47163864970207214, "learning_rate": 8.64801145752097e-05, "loss": 1.0735, "step": 2938 }, { "epoch": 0.2626041503786271, "grad_norm": 0.40220436453819275, "learning_rate": 8.647021693379768e-05, "loss": 0.99, "step": 2939 }, { "epoch": 0.262693501909889, "grad_norm": 0.37812304496765137, "learning_rate": 8.646031623761626e-05, "loss": 1.0802, "step": 2940 }, { "epoch": 0.2627828534411508, "grad_norm": 0.4696109890937805, "learning_rate": 8.645041248749471e-05, "loss": 0.9638, "step": 2941 }, { "epoch": 0.2628722049724127, "grad_norm": 0.4705543518066406, "learning_rate": 8.644050568426259e-05, "loss": 0.9096, "step": 2942 }, { "epoch": 0.2629615565036746, "grad_norm": 0.4492562413215637, "learning_rate": 8.643059582874969e-05, "loss": 0.9718, "step": 2943 }, { "epoch": 0.26305090803493647, "grad_norm": 0.44293731451034546, "learning_rate": 8.642068292178605e-05, "loss": 0.9978, "step": 2944 }, { "epoch": 0.2631402595661983, "grad_norm": 0.4119532108306885, "learning_rate": 8.641076696420201e-05, "loss": 1.0805, "step": 2945 }, { "epoch": 0.2632296110974602, "grad_norm": 0.5227283239364624, "learning_rate": 8.640084795682813e-05, "loss": 0.9369, "step": 2946 }, { "epoch": 0.26331896262872206, "grad_norm": 0.3896709382534027, "learning_rate": 8.639092590049521e-05, "loss": 0.9635, "step": 2947 }, { "epoch": 0.2634083141599839, "grad_norm": 0.3982143998146057, "learning_rate": 8.638100079603437e-05, "loss": 1.0144, "step": 2948 }, { "epoch": 0.2634976656912458, "grad_norm": 0.40874722599983215, "learning_rate": 8.63710726442769e-05, "loss": 1.0396, "step": 2949 }, { "epoch": 0.26358701722250766, "grad_norm": 0.4392448365688324, "learning_rate": 8.636114144605442e-05, "loss": 1.0296, "step": 2950 }, { "epoch": 0.26367636875376954, "grad_norm": 0.44011494517326355, "learning_rate": 8.635120720219876e-05, "loss": 1.0233, "step": 2951 }, { "epoch": 0.26376572028503137, "grad_norm": 0.4407230615615845, "learning_rate": 8.634126991354202e-05, "loss": 1.0963, "step": 2952 }, { "epoch": 0.26385507181629325, "grad_norm": 0.562527596950531, "learning_rate": 8.633132958091655e-05, "loss": 0.9788, "step": 2953 }, { "epoch": 0.26394442334755513, "grad_norm": 0.44428539276123047, "learning_rate": 8.632138620515498e-05, "loss": 1.0182, "step": 2954 }, { "epoch": 0.26403377487881696, "grad_norm": 0.4478207230567932, "learning_rate": 8.631143978709013e-05, "loss": 0.981, "step": 2955 }, { "epoch": 0.26412312641007885, "grad_norm": 0.48832935094833374, "learning_rate": 8.630149032755517e-05, "loss": 1.0158, "step": 2956 }, { "epoch": 0.26421247794134073, "grad_norm": 0.47887471318244934, "learning_rate": 8.629153782738344e-05, "loss": 0.9785, "step": 2957 }, { "epoch": 0.2643018294726026, "grad_norm": 0.481168657541275, "learning_rate": 8.628158228740857e-05, "loss": 0.949, "step": 2958 }, { "epoch": 0.26439118100386444, "grad_norm": 0.42248255014419556, "learning_rate": 8.627162370846446e-05, "loss": 1.0021, "step": 2959 }, { "epoch": 0.2644805325351263, "grad_norm": 0.3783293664455414, "learning_rate": 8.626166209138524e-05, "loss": 1.0025, "step": 2960 }, { "epoch": 0.2645698840663882, "grad_norm": 0.40361276268959045, "learning_rate": 8.62516974370053e-05, "loss": 0.9737, "step": 2961 }, { "epoch": 0.26465923559765003, "grad_norm": 0.4194972813129425, "learning_rate": 8.624172974615926e-05, "loss": 1.0377, "step": 2962 }, { "epoch": 0.2647485871289119, "grad_norm": 0.4278354346752167, "learning_rate": 8.623175901968206e-05, "loss": 1.0373, "step": 2963 }, { "epoch": 0.2648379386601738, "grad_norm": 0.430337518453598, "learning_rate": 8.622178525840885e-05, "loss": 0.9437, "step": 2964 }, { "epoch": 0.2649272901914357, "grad_norm": 0.39128750562667847, "learning_rate": 8.6211808463175e-05, "loss": 1.03, "step": 2965 }, { "epoch": 0.2650166417226975, "grad_norm": 0.4273848235607147, "learning_rate": 8.620182863481622e-05, "loss": 0.967, "step": 2966 }, { "epoch": 0.2651059932539594, "grad_norm": 0.43095484375953674, "learning_rate": 8.619184577416842e-05, "loss": 1.0008, "step": 2967 }, { "epoch": 0.2651953447852213, "grad_norm": 0.4028542935848236, "learning_rate": 8.618185988206775e-05, "loss": 1.0687, "step": 2968 }, { "epoch": 0.2652846963164831, "grad_norm": 0.45364266633987427, "learning_rate": 8.617187095935065e-05, "loss": 0.9845, "step": 2969 }, { "epoch": 0.265374047847745, "grad_norm": 0.4723089039325714, "learning_rate": 8.616187900685377e-05, "loss": 1.0094, "step": 2970 }, { "epoch": 0.26546339937900687, "grad_norm": 0.45047450065612793, "learning_rate": 8.615188402541408e-05, "loss": 1.025, "step": 2971 }, { "epoch": 0.2655527509102687, "grad_norm": 0.44591468572616577, "learning_rate": 8.614188601586875e-05, "loss": 0.9725, "step": 2972 }, { "epoch": 0.2656421024415306, "grad_norm": 0.5437367558479309, "learning_rate": 8.613188497905523e-05, "loss": 0.9072, "step": 2973 }, { "epoch": 0.26573145397279246, "grad_norm": 0.36033257842063904, "learning_rate": 8.612188091581119e-05, "loss": 0.9875, "step": 2974 }, { "epoch": 0.26582080550405435, "grad_norm": 0.4065442979335785, "learning_rate": 8.611187382697458e-05, "loss": 1.0593, "step": 2975 }, { "epoch": 0.2659101570353162, "grad_norm": 0.42632168531417847, "learning_rate": 8.610186371338365e-05, "loss": 1.0284, "step": 2976 }, { "epoch": 0.26599950856657806, "grad_norm": 0.45668208599090576, "learning_rate": 8.609185057587678e-05, "loss": 1.0186, "step": 2977 }, { "epoch": 0.26608886009783994, "grad_norm": 0.4407256245613098, "learning_rate": 8.608183441529274e-05, "loss": 0.9508, "step": 2978 }, { "epoch": 0.26617821162910177, "grad_norm": 0.4113801419734955, "learning_rate": 8.607181523247045e-05, "loss": 1.0019, "step": 2979 }, { "epoch": 0.26626756316036365, "grad_norm": 0.5497756004333496, "learning_rate": 8.606179302824914e-05, "loss": 1.0021, "step": 2980 }, { "epoch": 0.26635691469162553, "grad_norm": 0.3906475901603699, "learning_rate": 8.605176780346826e-05, "loss": 1.003, "step": 2981 }, { "epoch": 0.2664462662228874, "grad_norm": 0.4320390820503235, "learning_rate": 8.604173955896756e-05, "loss": 0.9869, "step": 2982 }, { "epoch": 0.26653561775414925, "grad_norm": 0.47781896591186523, "learning_rate": 8.6031708295587e-05, "loss": 0.9723, "step": 2983 }, { "epoch": 0.26662496928541113, "grad_norm": 0.4359282851219177, "learning_rate": 8.602167401416678e-05, "loss": 1.0909, "step": 2984 }, { "epoch": 0.266714320816673, "grad_norm": 0.3747998774051666, "learning_rate": 8.601163671554739e-05, "loss": 0.9894, "step": 2985 }, { "epoch": 0.26680367234793484, "grad_norm": 0.4070686101913452, "learning_rate": 8.60015964005696e-05, "loss": 0.9766, "step": 2986 }, { "epoch": 0.2668930238791967, "grad_norm": 0.43301576375961304, "learning_rate": 8.599155307007434e-05, "loss": 0.9774, "step": 2987 }, { "epoch": 0.2669823754104586, "grad_norm": 0.436482697725296, "learning_rate": 8.598150672490288e-05, "loss": 1.0462, "step": 2988 }, { "epoch": 0.2670717269417205, "grad_norm": 0.5423337817192078, "learning_rate": 8.59714573658967e-05, "loss": 0.9865, "step": 2989 }, { "epoch": 0.2671610784729823, "grad_norm": 0.42116260528564453, "learning_rate": 8.596140499389751e-05, "loss": 1.0272, "step": 2990 }, { "epoch": 0.2672504300042442, "grad_norm": 0.4866194725036621, "learning_rate": 8.595134960974735e-05, "loss": 1.0235, "step": 2991 }, { "epoch": 0.2673397815355061, "grad_norm": 0.39662399888038635, "learning_rate": 8.594129121428844e-05, "loss": 0.9877, "step": 2992 }, { "epoch": 0.2674291330667679, "grad_norm": 0.48044952750205994, "learning_rate": 8.59312298083633e-05, "loss": 0.9672, "step": 2993 }, { "epoch": 0.2675184845980298, "grad_norm": 0.40628960728645325, "learning_rate": 8.592116539281466e-05, "loss": 1.0034, "step": 2994 }, { "epoch": 0.2676078361292917, "grad_norm": 0.3931500017642975, "learning_rate": 8.591109796848551e-05, "loss": 1.0643, "step": 2995 }, { "epoch": 0.26769718766055356, "grad_norm": 0.4383373260498047, "learning_rate": 8.590102753621913e-05, "loss": 0.943, "step": 2996 }, { "epoch": 0.2677865391918154, "grad_norm": 0.41193974018096924, "learning_rate": 8.5890954096859e-05, "loss": 1.0634, "step": 2997 }, { "epoch": 0.26787589072307727, "grad_norm": 0.480523020029068, "learning_rate": 8.588087765124893e-05, "loss": 0.9083, "step": 2998 }, { "epoch": 0.26796524225433915, "grad_norm": 0.592286229133606, "learning_rate": 8.587079820023287e-05, "loss": 0.9664, "step": 2999 }, { "epoch": 0.268054593785601, "grad_norm": 0.361767053604126, "learning_rate": 8.586071574465511e-05, "loss": 1.0011, "step": 3000 }, { "epoch": 0.26814394531686286, "grad_norm": 0.38865599036216736, "learning_rate": 8.585063028536016e-05, "loss": 1.0153, "step": 3001 }, { "epoch": 0.26823329684812475, "grad_norm": 0.4458223879337311, "learning_rate": 8.584054182319279e-05, "loss": 1.1077, "step": 3002 }, { "epoch": 0.2683226483793866, "grad_norm": 0.4879177212715149, "learning_rate": 8.583045035899799e-05, "loss": 0.9969, "step": 3003 }, { "epoch": 0.26841199991064846, "grad_norm": 0.47598427534103394, "learning_rate": 8.582035589362107e-05, "loss": 0.9383, "step": 3004 }, { "epoch": 0.26850135144191034, "grad_norm": 0.5034103393554688, "learning_rate": 8.581025842790751e-05, "loss": 1.068, "step": 3005 }, { "epoch": 0.2685907029731722, "grad_norm": 0.3905804753303528, "learning_rate": 8.58001579627031e-05, "loss": 1.0858, "step": 3006 }, { "epoch": 0.26868005450443405, "grad_norm": 0.3951498866081238, "learning_rate": 8.579005449885385e-05, "loss": 0.9513, "step": 3007 }, { "epoch": 0.26876940603569593, "grad_norm": 0.44809505343437195, "learning_rate": 8.577994803720606e-05, "loss": 0.9417, "step": 3008 }, { "epoch": 0.2688587575669578, "grad_norm": 0.39570125937461853, "learning_rate": 8.576983857860624e-05, "loss": 1.0068, "step": 3009 }, { "epoch": 0.26894810909821965, "grad_norm": 0.4790659546852112, "learning_rate": 8.575972612390114e-05, "loss": 0.9794, "step": 3010 }, { "epoch": 0.26903746062948153, "grad_norm": 0.4047996699810028, "learning_rate": 8.57496106739378e-05, "loss": 0.9618, "step": 3011 }, { "epoch": 0.2691268121607434, "grad_norm": 0.4825955629348755, "learning_rate": 8.573949222956352e-05, "loss": 0.9828, "step": 3012 }, { "epoch": 0.2692161636920053, "grad_norm": 0.4488469362258911, "learning_rate": 8.57293707916258e-05, "loss": 0.9346, "step": 3013 }, { "epoch": 0.2693055152232671, "grad_norm": 0.41295936703681946, "learning_rate": 8.571924636097243e-05, "loss": 1.007, "step": 3014 }, { "epoch": 0.269394866754529, "grad_norm": 0.43881046772003174, "learning_rate": 8.570911893845144e-05, "loss": 1.061, "step": 3015 }, { "epoch": 0.2694842182857909, "grad_norm": 0.386459618806839, "learning_rate": 8.569898852491112e-05, "loss": 1.0555, "step": 3016 }, { "epoch": 0.2695735698170527, "grad_norm": 0.3662417531013489, "learning_rate": 8.568885512119997e-05, "loss": 1.056, "step": 3017 }, { "epoch": 0.2696629213483146, "grad_norm": 0.38702377676963806, "learning_rate": 8.56787187281668e-05, "loss": 1.0434, "step": 3018 }, { "epoch": 0.2697522728795765, "grad_norm": 0.40131479501724243, "learning_rate": 8.566857934666062e-05, "loss": 0.9943, "step": 3019 }, { "epoch": 0.26984162441083837, "grad_norm": 0.47031551599502563, "learning_rate": 8.565843697753072e-05, "loss": 0.973, "step": 3020 }, { "epoch": 0.2699309759421002, "grad_norm": 0.47709816694259644, "learning_rate": 8.564829162162664e-05, "loss": 0.9718, "step": 3021 }, { "epoch": 0.2700203274733621, "grad_norm": 0.4619588553905487, "learning_rate": 8.563814327979814e-05, "loss": 0.979, "step": 3022 }, { "epoch": 0.27010967900462396, "grad_norm": 0.4841311573982239, "learning_rate": 8.562799195289527e-05, "loss": 0.9617, "step": 3023 }, { "epoch": 0.2701990305358858, "grad_norm": 0.385013222694397, "learning_rate": 8.561783764176832e-05, "loss": 1.0135, "step": 3024 }, { "epoch": 0.27028838206714767, "grad_norm": 0.48148104548454285, "learning_rate": 8.560768034726778e-05, "loss": 0.9748, "step": 3025 }, { "epoch": 0.27037773359840955, "grad_norm": 0.42841196060180664, "learning_rate": 8.559752007024448e-05, "loss": 0.9614, "step": 3026 }, { "epoch": 0.27046708512967144, "grad_norm": 0.42978012561798096, "learning_rate": 8.558735681154943e-05, "loss": 1.0068, "step": 3027 }, { "epoch": 0.27055643666093326, "grad_norm": 0.4296395778656006, "learning_rate": 8.557719057203391e-05, "loss": 1.0504, "step": 3028 }, { "epoch": 0.27064578819219515, "grad_norm": 0.41547369956970215, "learning_rate": 8.556702135254946e-05, "loss": 1.0337, "step": 3029 }, { "epoch": 0.27073513972345703, "grad_norm": 0.445972204208374, "learning_rate": 8.555684915394786e-05, "loss": 0.9933, "step": 3030 }, { "epoch": 0.27082449125471886, "grad_norm": 0.5036845207214355, "learning_rate": 8.554667397708112e-05, "loss": 0.9972, "step": 3031 }, { "epoch": 0.27091384278598074, "grad_norm": 0.4610268175601959, "learning_rate": 8.553649582280155e-05, "loss": 0.9926, "step": 3032 }, { "epoch": 0.2710031943172426, "grad_norm": 0.5067682266235352, "learning_rate": 8.552631469196164e-05, "loss": 1.0285, "step": 3033 }, { "epoch": 0.27109254584850445, "grad_norm": 0.3850218951702118, "learning_rate": 8.551613058541421e-05, "loss": 1.001, "step": 3034 }, { "epoch": 0.27118189737976633, "grad_norm": 0.39576455950737, "learning_rate": 8.550594350401225e-05, "loss": 0.9882, "step": 3035 }, { "epoch": 0.2712712489110282, "grad_norm": 0.4152897894382477, "learning_rate": 8.549575344860907e-05, "loss": 0.9561, "step": 3036 }, { "epoch": 0.2713606004422901, "grad_norm": 0.6863245964050293, "learning_rate": 8.548556042005819e-05, "loss": 0.9862, "step": 3037 }, { "epoch": 0.27144995197355193, "grad_norm": 0.3862869441509247, "learning_rate": 8.547536441921336e-05, "loss": 1.0157, "step": 3038 }, { "epoch": 0.2715393035048138, "grad_norm": 0.37958061695098877, "learning_rate": 8.546516544692861e-05, "loss": 1.0548, "step": 3039 }, { "epoch": 0.2716286550360757, "grad_norm": 0.4158911108970642, "learning_rate": 8.545496350405825e-05, "loss": 1.0377, "step": 3040 }, { "epoch": 0.2717180065673375, "grad_norm": 0.43958452343940735, "learning_rate": 8.544475859145676e-05, "loss": 1.0113, "step": 3041 }, { "epoch": 0.2718073580985994, "grad_norm": 0.4888957440853119, "learning_rate": 8.543455070997892e-05, "loss": 1.0082, "step": 3042 }, { "epoch": 0.2718967096298613, "grad_norm": 0.4653918147087097, "learning_rate": 8.542433986047977e-05, "loss": 0.9827, "step": 3043 }, { "epoch": 0.27198606116112317, "grad_norm": 0.4065740704536438, "learning_rate": 8.541412604381454e-05, "loss": 1.0425, "step": 3044 }, { "epoch": 0.272075412692385, "grad_norm": 0.3832918703556061, "learning_rate": 8.540390926083876e-05, "loss": 0.9736, "step": 3045 }, { "epoch": 0.2721647642236469, "grad_norm": 0.42073532938957214, "learning_rate": 8.539368951240823e-05, "loss": 1.0916, "step": 3046 }, { "epoch": 0.27225411575490877, "grad_norm": 0.4821849763393402, "learning_rate": 8.538346679937891e-05, "loss": 1.0015, "step": 3047 }, { "epoch": 0.2723434672861706, "grad_norm": 0.3975672125816345, "learning_rate": 8.53732411226071e-05, "loss": 0.9954, "step": 3048 }, { "epoch": 0.2724328188174325, "grad_norm": 0.4945456385612488, "learning_rate": 8.536301248294929e-05, "loss": 1.0665, "step": 3049 }, { "epoch": 0.27252217034869436, "grad_norm": 0.451886922121048, "learning_rate": 8.535278088126225e-05, "loss": 0.9445, "step": 3050 }, { "epoch": 0.27261152187995624, "grad_norm": 0.4293072819709778, "learning_rate": 8.534254631840296e-05, "loss": 0.9888, "step": 3051 }, { "epoch": 0.27270087341121807, "grad_norm": 0.4669989347457886, "learning_rate": 8.53323087952287e-05, "loss": 0.9923, "step": 3052 }, { "epoch": 0.27279022494247995, "grad_norm": 0.4209103286266327, "learning_rate": 8.532206831259696e-05, "loss": 0.9666, "step": 3053 }, { "epoch": 0.27287957647374184, "grad_norm": 0.41052526235580444, "learning_rate": 8.531182487136549e-05, "loss": 1.0202, "step": 3054 }, { "epoch": 0.27296892800500366, "grad_norm": 0.46915799379348755, "learning_rate": 8.53015784723923e-05, "loss": 0.8682, "step": 3055 }, { "epoch": 0.27305827953626555, "grad_norm": 0.3582940697669983, "learning_rate": 8.529132911653563e-05, "loss": 0.9678, "step": 3056 }, { "epoch": 0.27314763106752743, "grad_norm": 0.4747898280620575, "learning_rate": 8.528107680465394e-05, "loss": 0.9526, "step": 3057 }, { "epoch": 0.2732369825987893, "grad_norm": 0.43291470408439636, "learning_rate": 8.527082153760601e-05, "loss": 0.9883, "step": 3058 }, { "epoch": 0.27332633413005114, "grad_norm": 0.4209512770175934, "learning_rate": 8.526056331625083e-05, "loss": 0.9941, "step": 3059 }, { "epoch": 0.273415685661313, "grad_norm": 0.4599602520465851, "learning_rate": 8.525030214144763e-05, "loss": 1.0283, "step": 3060 }, { "epoch": 0.2735050371925749, "grad_norm": 0.3656606078147888, "learning_rate": 8.524003801405587e-05, "loss": 1.017, "step": 3061 }, { "epoch": 0.27359438872383673, "grad_norm": 0.43189749121665955, "learning_rate": 8.522977093493528e-05, "loss": 0.9632, "step": 3062 }, { "epoch": 0.2736837402550986, "grad_norm": 0.5547308325767517, "learning_rate": 8.521950090494587e-05, "loss": 1.0423, "step": 3063 }, { "epoch": 0.2737730917863605, "grad_norm": 0.48824217915534973, "learning_rate": 8.520922792494783e-05, "loss": 0.919, "step": 3064 }, { "epoch": 0.27386244331762233, "grad_norm": 0.4252563416957855, "learning_rate": 8.519895199580168e-05, "loss": 0.9322, "step": 3065 }, { "epoch": 0.2739517948488842, "grad_norm": 0.40307125449180603, "learning_rate": 8.518867311836808e-05, "loss": 1.0316, "step": 3066 }, { "epoch": 0.2740411463801461, "grad_norm": 0.38896623253822327, "learning_rate": 8.517839129350802e-05, "loss": 1.0407, "step": 3067 }, { "epoch": 0.274130497911408, "grad_norm": 0.40794113278388977, "learning_rate": 8.516810652208272e-05, "loss": 0.9766, "step": 3068 }, { "epoch": 0.2742198494426698, "grad_norm": 0.4464515745639801, "learning_rate": 8.515781880495363e-05, "loss": 0.9997, "step": 3069 }, { "epoch": 0.2743092009739317, "grad_norm": 0.3915344476699829, "learning_rate": 8.514752814298248e-05, "loss": 0.9563, "step": 3070 }, { "epoch": 0.27439855250519357, "grad_norm": 0.3987552225589752, "learning_rate": 8.513723453703119e-05, "loss": 1.0145, "step": 3071 }, { "epoch": 0.2744879040364554, "grad_norm": 0.3828818202018738, "learning_rate": 8.512693798796196e-05, "loss": 0.993, "step": 3072 }, { "epoch": 0.2745772555677173, "grad_norm": 0.47670143842697144, "learning_rate": 8.511663849663727e-05, "loss": 0.9994, "step": 3073 }, { "epoch": 0.27466660709897917, "grad_norm": 0.5165907144546509, "learning_rate": 8.510633606391977e-05, "loss": 0.974, "step": 3074 }, { "epoch": 0.27475595863024105, "grad_norm": 0.49067389965057373, "learning_rate": 8.509603069067243e-05, "loss": 0.9133, "step": 3075 }, { "epoch": 0.2748453101615029, "grad_norm": 0.3810344636440277, "learning_rate": 8.50857223777584e-05, "loss": 0.988, "step": 3076 }, { "epoch": 0.27493466169276476, "grad_norm": 0.4502268135547638, "learning_rate": 8.507541112604116e-05, "loss": 0.9268, "step": 3077 }, { "epoch": 0.27502401322402664, "grad_norm": 0.42537322640419006, "learning_rate": 8.506509693638435e-05, "loss": 1.0575, "step": 3078 }, { "epoch": 0.27511336475528847, "grad_norm": 0.37440937757492065, "learning_rate": 8.505477980965191e-05, "loss": 1.0067, "step": 3079 }, { "epoch": 0.27520271628655035, "grad_norm": 0.4221092164516449, "learning_rate": 8.5044459746708e-05, "loss": 0.9788, "step": 3080 }, { "epoch": 0.27529206781781224, "grad_norm": 0.5111864805221558, "learning_rate": 8.503413674841703e-05, "loss": 0.9639, "step": 3081 }, { "epoch": 0.2753814193490741, "grad_norm": 0.4051772654056549, "learning_rate": 8.502381081564369e-05, "loss": 1.0491, "step": 3082 }, { "epoch": 0.27547077088033595, "grad_norm": 0.4286561608314514, "learning_rate": 8.501348194925285e-05, "loss": 0.9884, "step": 3083 }, { "epoch": 0.27556012241159783, "grad_norm": 0.4711834490299225, "learning_rate": 8.500315015010968e-05, "loss": 1.025, "step": 3084 }, { "epoch": 0.2756494739428597, "grad_norm": 0.43955665826797485, "learning_rate": 8.499281541907959e-05, "loss": 0.9961, "step": 3085 }, { "epoch": 0.27573882547412154, "grad_norm": 0.38096296787261963, "learning_rate": 8.498247775702821e-05, "loss": 1.0081, "step": 3086 }, { "epoch": 0.2758281770053834, "grad_norm": 0.40169036388397217, "learning_rate": 8.497213716482142e-05, "loss": 1.0185, "step": 3087 }, { "epoch": 0.2759175285366453, "grad_norm": 0.4468224346637726, "learning_rate": 8.496179364332539e-05, "loss": 1.0679, "step": 3088 }, { "epoch": 0.2760068800679072, "grad_norm": 0.5090979337692261, "learning_rate": 8.495144719340646e-05, "loss": 1.0041, "step": 3089 }, { "epoch": 0.276096231599169, "grad_norm": 0.4413253366947174, "learning_rate": 8.494109781593127e-05, "loss": 0.998, "step": 3090 }, { "epoch": 0.2761855831304309, "grad_norm": 0.5128616094589233, "learning_rate": 8.49307455117667e-05, "loss": 0.9282, "step": 3091 }, { "epoch": 0.2762749346616928, "grad_norm": 0.4610481858253479, "learning_rate": 8.492039028177986e-05, "loss": 0.9687, "step": 3092 }, { "epoch": 0.2763642861929546, "grad_norm": 0.42069047689437866, "learning_rate": 8.491003212683811e-05, "loss": 0.9726, "step": 3093 }, { "epoch": 0.2764536377242165, "grad_norm": 0.42152658104896545, "learning_rate": 8.489967104780902e-05, "loss": 0.968, "step": 3094 }, { "epoch": 0.2765429892554784, "grad_norm": 0.40881800651550293, "learning_rate": 8.48893070455605e-05, "loss": 1.0844, "step": 3095 }, { "epoch": 0.2766323407867402, "grad_norm": 0.4400191009044647, "learning_rate": 8.487894012096065e-05, "loss": 0.9969, "step": 3096 }, { "epoch": 0.2767216923180021, "grad_norm": 0.4783930480480194, "learning_rate": 8.486857027487775e-05, "loss": 0.9458, "step": 3097 }, { "epoch": 0.27681104384926397, "grad_norm": 0.35160624980926514, "learning_rate": 8.485819750818043e-05, "loss": 1.0798, "step": 3098 }, { "epoch": 0.27690039538052585, "grad_norm": 0.4336118996143341, "learning_rate": 8.484782182173749e-05, "loss": 1.0089, "step": 3099 }, { "epoch": 0.2769897469117877, "grad_norm": 0.3933138847351074, "learning_rate": 8.483744321641805e-05, "loss": 1.0987, "step": 3100 }, { "epoch": 0.27707909844304957, "grad_norm": 0.44518598914146423, "learning_rate": 8.482706169309138e-05, "loss": 0.9875, "step": 3101 }, { "epoch": 0.27716844997431145, "grad_norm": 0.4272666275501251, "learning_rate": 8.481667725262708e-05, "loss": 0.986, "step": 3102 }, { "epoch": 0.2772578015055733, "grad_norm": 0.40855729579925537, "learning_rate": 8.480628989589491e-05, "loss": 0.9967, "step": 3103 }, { "epoch": 0.27734715303683516, "grad_norm": 0.42171216011047363, "learning_rate": 8.479589962376497e-05, "loss": 1.0869, "step": 3104 }, { "epoch": 0.27743650456809704, "grad_norm": 0.39265212416648865, "learning_rate": 8.478550643710754e-05, "loss": 1.0718, "step": 3105 }, { "epoch": 0.2775258560993589, "grad_norm": 0.4548582136631012, "learning_rate": 8.477511033679317e-05, "loss": 0.9926, "step": 3106 }, { "epoch": 0.27761520763062075, "grad_norm": 0.37569287419319153, "learning_rate": 8.476471132369262e-05, "loss": 1.022, "step": 3107 }, { "epoch": 0.27770455916188264, "grad_norm": 0.3934466242790222, "learning_rate": 8.475430939867693e-05, "loss": 0.9714, "step": 3108 }, { "epoch": 0.2777939106931445, "grad_norm": 0.4344901144504547, "learning_rate": 8.474390456261738e-05, "loss": 0.9954, "step": 3109 }, { "epoch": 0.27788326222440635, "grad_norm": 0.4985685646533966, "learning_rate": 8.473349681638547e-05, "loss": 0.949, "step": 3110 }, { "epoch": 0.27797261375566823, "grad_norm": 0.40631502866744995, "learning_rate": 8.472308616085298e-05, "loss": 1.0339, "step": 3111 }, { "epoch": 0.2780619652869301, "grad_norm": 0.4710947871208191, "learning_rate": 8.47126725968919e-05, "loss": 0.9509, "step": 3112 }, { "epoch": 0.278151316818192, "grad_norm": 0.4414302110671997, "learning_rate": 8.470225612537448e-05, "loss": 0.9992, "step": 3113 }, { "epoch": 0.2782406683494538, "grad_norm": 0.3889710605144501, "learning_rate": 8.46918367471732e-05, "loss": 0.976, "step": 3114 }, { "epoch": 0.2783300198807157, "grad_norm": 0.41074416041374207, "learning_rate": 8.468141446316082e-05, "loss": 1.0087, "step": 3115 }, { "epoch": 0.2784193714119776, "grad_norm": 0.47910332679748535, "learning_rate": 8.46709892742103e-05, "loss": 0.9827, "step": 3116 }, { "epoch": 0.2785087229432394, "grad_norm": 0.3920195400714874, "learning_rate": 8.466056118119485e-05, "loss": 1.0017, "step": 3117 }, { "epoch": 0.2785980744745013, "grad_norm": 0.4296383261680603, "learning_rate": 8.465013018498795e-05, "loss": 0.9948, "step": 3118 }, { "epoch": 0.2786874260057632, "grad_norm": 0.408699631690979, "learning_rate": 8.463969628646332e-05, "loss": 1.0166, "step": 3119 }, { "epoch": 0.27877677753702507, "grad_norm": 0.46718454360961914, "learning_rate": 8.462925948649488e-05, "loss": 0.9799, "step": 3120 }, { "epoch": 0.2788661290682869, "grad_norm": 0.40248745679855347, "learning_rate": 8.461881978595683e-05, "loss": 1.0439, "step": 3121 }, { "epoch": 0.2789554805995488, "grad_norm": 0.4227806329727173, "learning_rate": 8.460837718572361e-05, "loss": 1.0268, "step": 3122 }, { "epoch": 0.27904483213081066, "grad_norm": 0.44259488582611084, "learning_rate": 8.45979316866699e-05, "loss": 1.0697, "step": 3123 }, { "epoch": 0.2791341836620725, "grad_norm": 0.43050849437713623, "learning_rate": 8.458748328967065e-05, "loss": 1.0158, "step": 3124 }, { "epoch": 0.27922353519333437, "grad_norm": 0.4049340486526489, "learning_rate": 8.457703199560098e-05, "loss": 0.9342, "step": 3125 }, { "epoch": 0.27931288672459625, "grad_norm": 0.43839579820632935, "learning_rate": 8.456657780533632e-05, "loss": 0.951, "step": 3126 }, { "epoch": 0.27940223825585814, "grad_norm": 0.44916754961013794, "learning_rate": 8.45561207197523e-05, "loss": 0.9717, "step": 3127 }, { "epoch": 0.27949158978711997, "grad_norm": 0.4199538230895996, "learning_rate": 8.454566073972485e-05, "loss": 0.9371, "step": 3128 }, { "epoch": 0.27958094131838185, "grad_norm": 0.4070294201374054, "learning_rate": 8.453519786613007e-05, "loss": 0.9972, "step": 3129 }, { "epoch": 0.27967029284964373, "grad_norm": 0.4184653162956238, "learning_rate": 8.452473209984435e-05, "loss": 0.9807, "step": 3130 }, { "epoch": 0.27975964438090556, "grad_norm": 0.4994652271270752, "learning_rate": 8.451426344174433e-05, "loss": 1.0829, "step": 3131 }, { "epoch": 0.27984899591216744, "grad_norm": 0.4521782696247101, "learning_rate": 8.450379189270683e-05, "loss": 1.0126, "step": 3132 }, { "epoch": 0.2799383474434293, "grad_norm": 0.43609222769737244, "learning_rate": 8.449331745360898e-05, "loss": 1.0078, "step": 3133 }, { "epoch": 0.28002769897469115, "grad_norm": 0.4149281680583954, "learning_rate": 8.448284012532812e-05, "loss": 0.9348, "step": 3134 }, { "epoch": 0.28011705050595304, "grad_norm": 0.4595349431037903, "learning_rate": 8.447235990874182e-05, "loss": 1.0162, "step": 3135 }, { "epoch": 0.2802064020372149, "grad_norm": 0.48324936628341675, "learning_rate": 8.446187680472797e-05, "loss": 0.9531, "step": 3136 }, { "epoch": 0.2802957535684768, "grad_norm": 0.40329238772392273, "learning_rate": 8.445139081416458e-05, "loss": 0.977, "step": 3137 }, { "epoch": 0.28038510509973863, "grad_norm": 0.5319374203681946, "learning_rate": 8.444090193792997e-05, "loss": 0.9266, "step": 3138 }, { "epoch": 0.2804744566310005, "grad_norm": 0.43720632791519165, "learning_rate": 8.443041017690271e-05, "loss": 1.049, "step": 3139 }, { "epoch": 0.2805638081622624, "grad_norm": 0.4096292555332184, "learning_rate": 8.441991553196162e-05, "loss": 1.027, "step": 3140 }, { "epoch": 0.2806531596935242, "grad_norm": 0.4749426543712616, "learning_rate": 8.440941800398571e-05, "loss": 0.9608, "step": 3141 }, { "epoch": 0.2807425112247861, "grad_norm": 0.40563464164733887, "learning_rate": 8.439891759385425e-05, "loss": 1.025, "step": 3142 }, { "epoch": 0.280831862756048, "grad_norm": 0.4449627101421356, "learning_rate": 8.438841430244678e-05, "loss": 0.9698, "step": 3143 }, { "epoch": 0.2809212142873099, "grad_norm": 0.4511934816837311, "learning_rate": 8.437790813064305e-05, "loss": 0.9556, "step": 3144 }, { "epoch": 0.2810105658185717, "grad_norm": 0.4390813112258911, "learning_rate": 8.43673990793231e-05, "loss": 1.0457, "step": 3145 }, { "epoch": 0.2810999173498336, "grad_norm": 0.4567374885082245, "learning_rate": 8.435688714936711e-05, "loss": 0.9987, "step": 3146 }, { "epoch": 0.28118926888109547, "grad_norm": 0.39277079701423645, "learning_rate": 8.434637234165563e-05, "loss": 1.0383, "step": 3147 }, { "epoch": 0.2812786204123573, "grad_norm": 0.36896324157714844, "learning_rate": 8.433585465706934e-05, "loss": 0.9997, "step": 3148 }, { "epoch": 0.2813679719436192, "grad_norm": 0.40107041597366333, "learning_rate": 8.432533409648925e-05, "loss": 1.0192, "step": 3149 }, { "epoch": 0.28145732347488106, "grad_norm": 0.5373698472976685, "learning_rate": 8.431481066079654e-05, "loss": 0.9916, "step": 3150 }, { "epoch": 0.28154667500614294, "grad_norm": 0.4341030418872833, "learning_rate": 8.430428435087267e-05, "loss": 1.0095, "step": 3151 }, { "epoch": 0.28163602653740477, "grad_norm": 0.4856875240802765, "learning_rate": 8.429375516759933e-05, "loss": 0.9375, "step": 3152 }, { "epoch": 0.28172537806866665, "grad_norm": 0.4683944880962372, "learning_rate": 8.428322311185844e-05, "loss": 1.0203, "step": 3153 }, { "epoch": 0.28181472959992854, "grad_norm": 0.39955028891563416, "learning_rate": 8.427268818453218e-05, "loss": 1.0851, "step": 3154 }, { "epoch": 0.28190408113119036, "grad_norm": 0.4985107481479645, "learning_rate": 8.426215038650296e-05, "loss": 0.9497, "step": 3155 }, { "epoch": 0.28199343266245225, "grad_norm": 0.40328237414360046, "learning_rate": 8.425160971865345e-05, "loss": 1.019, "step": 3156 }, { "epoch": 0.28208278419371413, "grad_norm": 0.4598018527030945, "learning_rate": 8.424106618186653e-05, "loss": 0.8932, "step": 3157 }, { "epoch": 0.282172135724976, "grad_norm": 0.46386003494262695, "learning_rate": 8.423051977702532e-05, "loss": 1.0043, "step": 3158 }, { "epoch": 0.28226148725623784, "grad_norm": 0.4086954593658447, "learning_rate": 8.421997050501323e-05, "loss": 1.0223, "step": 3159 }, { "epoch": 0.2823508387874997, "grad_norm": 0.4212207794189453, "learning_rate": 8.420941836671384e-05, "loss": 0.9869, "step": 3160 }, { "epoch": 0.2824401903187616, "grad_norm": 0.3890816271305084, "learning_rate": 8.419886336301099e-05, "loss": 1.0268, "step": 3161 }, { "epoch": 0.28252954185002344, "grad_norm": 0.39444929361343384, "learning_rate": 8.418830549478885e-05, "loss": 1.0987, "step": 3162 }, { "epoch": 0.2826188933812853, "grad_norm": 0.43861931562423706, "learning_rate": 8.417774476293167e-05, "loss": 0.922, "step": 3163 }, { "epoch": 0.2827082449125472, "grad_norm": 0.42500317096710205, "learning_rate": 8.416718116832406e-05, "loss": 0.9877, "step": 3164 }, { "epoch": 0.28279759644380903, "grad_norm": 0.4124399721622467, "learning_rate": 8.415661471185084e-05, "loss": 1.0055, "step": 3165 }, { "epoch": 0.2828869479750709, "grad_norm": 0.41332152485847473, "learning_rate": 8.414604539439704e-05, "loss": 1.0422, "step": 3166 }, { "epoch": 0.2829762995063328, "grad_norm": 0.42248642444610596, "learning_rate": 8.413547321684798e-05, "loss": 1.0358, "step": 3167 }, { "epoch": 0.2830656510375947, "grad_norm": 0.42159736156463623, "learning_rate": 8.412489818008918e-05, "loss": 1.0113, "step": 3168 }, { "epoch": 0.2831550025688565, "grad_norm": 0.41881147027015686, "learning_rate": 8.41143202850064e-05, "loss": 1.0388, "step": 3169 }, { "epoch": 0.2832443541001184, "grad_norm": 0.4449400007724762, "learning_rate": 8.41037395324857e-05, "loss": 1.0147, "step": 3170 }, { "epoch": 0.2833337056313803, "grad_norm": 0.4245634377002716, "learning_rate": 8.409315592341324e-05, "loss": 1.0661, "step": 3171 }, { "epoch": 0.2834230571626421, "grad_norm": 0.4041271209716797, "learning_rate": 8.408256945867561e-05, "loss": 1.0927, "step": 3172 }, { "epoch": 0.283512408693904, "grad_norm": 0.5533398389816284, "learning_rate": 8.407198013915947e-05, "loss": 0.9166, "step": 3173 }, { "epoch": 0.28360176022516587, "grad_norm": 0.39345189929008484, "learning_rate": 8.406138796575183e-05, "loss": 0.9596, "step": 3174 }, { "epoch": 0.28369111175642775, "grad_norm": 0.39271482825279236, "learning_rate": 8.405079293933986e-05, "loss": 1.0173, "step": 3175 }, { "epoch": 0.2837804632876896, "grad_norm": 0.38316190242767334, "learning_rate": 8.404019506081104e-05, "loss": 0.9547, "step": 3176 }, { "epoch": 0.28386981481895146, "grad_norm": 0.3933804929256439, "learning_rate": 8.402959433105303e-05, "loss": 0.9311, "step": 3177 }, { "epoch": 0.28395916635021334, "grad_norm": 0.39881816506385803, "learning_rate": 8.401899075095378e-05, "loss": 0.9956, "step": 3178 }, { "epoch": 0.28404851788147517, "grad_norm": 0.4697539806365967, "learning_rate": 8.400838432140142e-05, "loss": 0.935, "step": 3179 }, { "epoch": 0.28413786941273705, "grad_norm": 0.4180028438568115, "learning_rate": 8.399777504328438e-05, "loss": 1.0263, "step": 3180 }, { "epoch": 0.28422722094399894, "grad_norm": 0.4734386205673218, "learning_rate": 8.398716291749129e-05, "loss": 0.911, "step": 3181 }, { "epoch": 0.2843165724752608, "grad_norm": 0.41403621435165405, "learning_rate": 8.397654794491103e-05, "loss": 1.0165, "step": 3182 }, { "epoch": 0.28440592400652265, "grad_norm": 0.5192162990570068, "learning_rate": 8.396593012643272e-05, "loss": 0.9693, "step": 3183 }, { "epoch": 0.28449527553778453, "grad_norm": 0.40429311990737915, "learning_rate": 8.39553094629457e-05, "loss": 1.0161, "step": 3184 }, { "epoch": 0.2845846270690464, "grad_norm": 0.43155255913734436, "learning_rate": 8.394468595533958e-05, "loss": 1.0094, "step": 3185 }, { "epoch": 0.28467397860030824, "grad_norm": 0.4038214683532715, "learning_rate": 8.393405960450418e-05, "loss": 1.0635, "step": 3186 }, { "epoch": 0.2847633301315701, "grad_norm": 0.5239593982696533, "learning_rate": 8.392343041132959e-05, "loss": 0.9088, "step": 3187 }, { "epoch": 0.284852681662832, "grad_norm": 0.4169578552246094, "learning_rate": 8.391279837670609e-05, "loss": 0.9349, "step": 3188 }, { "epoch": 0.2849420331940939, "grad_norm": 0.5473026633262634, "learning_rate": 8.390216350152425e-05, "loss": 1.0211, "step": 3189 }, { "epoch": 0.2850313847253557, "grad_norm": 0.45344579219818115, "learning_rate": 8.389152578667485e-05, "loss": 1.0067, "step": 3190 }, { "epoch": 0.2851207362566176, "grad_norm": 0.4618963599205017, "learning_rate": 8.38808852330489e-05, "loss": 0.996, "step": 3191 }, { "epoch": 0.2852100877878795, "grad_norm": 0.5052065253257751, "learning_rate": 8.387024184153766e-05, "loss": 0.9071, "step": 3192 }, { "epoch": 0.2852994393191413, "grad_norm": 0.413125604391098, "learning_rate": 8.385959561303265e-05, "loss": 1.022, "step": 3193 }, { "epoch": 0.2853887908504032, "grad_norm": 0.3971850574016571, "learning_rate": 8.384894654842559e-05, "loss": 1.0205, "step": 3194 }, { "epoch": 0.2854781423816651, "grad_norm": 0.3980804681777954, "learning_rate": 8.383829464860844e-05, "loss": 1.0146, "step": 3195 }, { "epoch": 0.2855674939129269, "grad_norm": 0.46941179037094116, "learning_rate": 8.382763991447344e-05, "loss": 1.0008, "step": 3196 }, { "epoch": 0.2856568454441888, "grad_norm": 0.38151049613952637, "learning_rate": 8.381698234691301e-05, "loss": 1.0283, "step": 3197 }, { "epoch": 0.2857461969754507, "grad_norm": 0.5456905364990234, "learning_rate": 8.380632194681986e-05, "loss": 0.92, "step": 3198 }, { "epoch": 0.28583554850671256, "grad_norm": 0.3786112368106842, "learning_rate": 8.379565871508688e-05, "loss": 0.9825, "step": 3199 }, { "epoch": 0.2859249000379744, "grad_norm": 0.48761385679244995, "learning_rate": 8.378499265260724e-05, "loss": 0.9437, "step": 3200 }, { "epoch": 0.28601425156923627, "grad_norm": 0.4335952401161194, "learning_rate": 8.377432376027437e-05, "loss": 0.9783, "step": 3201 }, { "epoch": 0.28610360310049815, "grad_norm": 0.44660720229148865, "learning_rate": 8.376365203898184e-05, "loss": 1.0364, "step": 3202 }, { "epoch": 0.28619295463176, "grad_norm": 0.4328174889087677, "learning_rate": 8.375297748962358e-05, "loss": 1.002, "step": 3203 }, { "epoch": 0.28628230616302186, "grad_norm": 0.4029334783554077, "learning_rate": 8.374230011309368e-05, "loss": 0.9958, "step": 3204 }, { "epoch": 0.28637165769428374, "grad_norm": 0.49282363057136536, "learning_rate": 8.373161991028646e-05, "loss": 0.8783, "step": 3205 }, { "epoch": 0.2864610092255456, "grad_norm": 0.4617169499397278, "learning_rate": 8.372093688209655e-05, "loss": 1.0666, "step": 3206 }, { "epoch": 0.28655036075680745, "grad_norm": 0.44560667872428894, "learning_rate": 8.371025102941872e-05, "loss": 0.9826, "step": 3207 }, { "epoch": 0.28663971228806934, "grad_norm": 0.4756043255329132, "learning_rate": 8.369956235314802e-05, "loss": 0.9956, "step": 3208 }, { "epoch": 0.2867290638193312, "grad_norm": 0.4700027108192444, "learning_rate": 8.368887085417977e-05, "loss": 1.0006, "step": 3209 }, { "epoch": 0.28681841535059305, "grad_norm": 0.47527703642845154, "learning_rate": 8.367817653340951e-05, "loss": 0.9828, "step": 3210 }, { "epoch": 0.28690776688185493, "grad_norm": 0.5730937719345093, "learning_rate": 8.366747939173297e-05, "loss": 0.9436, "step": 3211 }, { "epoch": 0.2869971184131168, "grad_norm": 0.47630396485328674, "learning_rate": 8.365677943004617e-05, "loss": 0.9992, "step": 3212 }, { "epoch": 0.2870864699443787, "grad_norm": 0.40669214725494385, "learning_rate": 8.364607664924533e-05, "loss": 1.015, "step": 3213 }, { "epoch": 0.2871758214756405, "grad_norm": 0.3980100750923157, "learning_rate": 8.363537105022692e-05, "loss": 0.968, "step": 3214 }, { "epoch": 0.2872651730069024, "grad_norm": 0.4197221100330353, "learning_rate": 8.362466263388766e-05, "loss": 1.0064, "step": 3215 }, { "epoch": 0.2873545245381643, "grad_norm": 0.42029672861099243, "learning_rate": 8.36139514011245e-05, "loss": 1.0118, "step": 3216 }, { "epoch": 0.2874438760694261, "grad_norm": 0.3882405757904053, "learning_rate": 8.36032373528346e-05, "loss": 1.0255, "step": 3217 }, { "epoch": 0.287533227600688, "grad_norm": 0.4027383625507355, "learning_rate": 8.359252048991539e-05, "loss": 1.013, "step": 3218 }, { "epoch": 0.2876225791319499, "grad_norm": 0.4605361819267273, "learning_rate": 8.358180081326454e-05, "loss": 0.9371, "step": 3219 }, { "epoch": 0.28771193066321177, "grad_norm": 0.4364441931247711, "learning_rate": 8.357107832377988e-05, "loss": 0.9737, "step": 3220 }, { "epoch": 0.2878012821944736, "grad_norm": 0.4793910086154938, "learning_rate": 8.356035302235959e-05, "loss": 0.9819, "step": 3221 }, { "epoch": 0.2878906337257355, "grad_norm": 0.39052850008010864, "learning_rate": 8.354962490990202e-05, "loss": 1.0593, "step": 3222 }, { "epoch": 0.28797998525699736, "grad_norm": 0.4997557997703552, "learning_rate": 8.353889398730572e-05, "loss": 1.0613, "step": 3223 }, { "epoch": 0.2880693367882592, "grad_norm": 0.398324191570282, "learning_rate": 8.352816025546957e-05, "loss": 0.9801, "step": 3224 }, { "epoch": 0.2881586883195211, "grad_norm": 0.5084810256958008, "learning_rate": 8.351742371529261e-05, "loss": 1.0013, "step": 3225 }, { "epoch": 0.28824803985078296, "grad_norm": 0.4650806486606598, "learning_rate": 8.350668436767413e-05, "loss": 0.9618, "step": 3226 }, { "epoch": 0.2883373913820448, "grad_norm": 0.43210747838020325, "learning_rate": 8.349594221351368e-05, "loss": 0.9195, "step": 3227 }, { "epoch": 0.28842674291330667, "grad_norm": 0.4068807363510132, "learning_rate": 8.348519725371105e-05, "loss": 1.1076, "step": 3228 }, { "epoch": 0.28851609444456855, "grad_norm": 0.4721238911151886, "learning_rate": 8.347444948916622e-05, "loss": 1.0048, "step": 3229 }, { "epoch": 0.28860544597583043, "grad_norm": 0.4541776776313782, "learning_rate": 8.346369892077942e-05, "loss": 0.9736, "step": 3230 }, { "epoch": 0.28869479750709226, "grad_norm": 0.5041652917861938, "learning_rate": 8.345294554945113e-05, "loss": 0.9833, "step": 3231 }, { "epoch": 0.28878414903835414, "grad_norm": 0.48984166979789734, "learning_rate": 8.344218937608211e-05, "loss": 0.9144, "step": 3232 }, { "epoch": 0.288873500569616, "grad_norm": 0.3938770294189453, "learning_rate": 8.343143040157322e-05, "loss": 0.9965, "step": 3233 }, { "epoch": 0.28896285210087785, "grad_norm": 0.36259424686431885, "learning_rate": 8.34206686268257e-05, "loss": 1.0121, "step": 3234 }, { "epoch": 0.28905220363213974, "grad_norm": 0.46610236167907715, "learning_rate": 8.340990405274091e-05, "loss": 0.9109, "step": 3235 }, { "epoch": 0.2891415551634016, "grad_norm": 0.39891326427459717, "learning_rate": 8.339913668022057e-05, "loss": 0.9904, "step": 3236 }, { "epoch": 0.2892309066946635, "grad_norm": 0.4756152629852295, "learning_rate": 8.338836651016652e-05, "loss": 1.0002, "step": 3237 }, { "epoch": 0.28932025822592533, "grad_norm": 0.42526543140411377, "learning_rate": 8.337759354348087e-05, "loss": 1.0233, "step": 3238 }, { "epoch": 0.2894096097571872, "grad_norm": 0.4663248360157013, "learning_rate": 8.336681778106598e-05, "loss": 1.0187, "step": 3239 }, { "epoch": 0.2894989612884491, "grad_norm": 0.39710041880607605, "learning_rate": 8.335603922382444e-05, "loss": 1.0259, "step": 3240 }, { "epoch": 0.2895883128197109, "grad_norm": 0.4082708954811096, "learning_rate": 8.334525787265905e-05, "loss": 0.9772, "step": 3241 }, { "epoch": 0.2896776643509728, "grad_norm": 0.41001391410827637, "learning_rate": 8.333447372847291e-05, "loss": 0.987, "step": 3242 }, { "epoch": 0.2897670158822347, "grad_norm": 0.39033588767051697, "learning_rate": 8.332368679216927e-05, "loss": 0.9732, "step": 3243 }, { "epoch": 0.2898563674134966, "grad_norm": 0.4427189230918884, "learning_rate": 8.331289706465162e-05, "loss": 1.0081, "step": 3244 }, { "epoch": 0.2899457189447584, "grad_norm": 0.3782840669155121, "learning_rate": 8.330210454682379e-05, "loss": 1.0221, "step": 3245 }, { "epoch": 0.2900350704760203, "grad_norm": 0.460595041513443, "learning_rate": 8.32913092395897e-05, "loss": 1.0137, "step": 3246 }, { "epoch": 0.29012442200728217, "grad_norm": 0.45908665657043457, "learning_rate": 8.328051114385362e-05, "loss": 0.9745, "step": 3247 }, { "epoch": 0.290213773538544, "grad_norm": 0.5565299987792969, "learning_rate": 8.326971026052e-05, "loss": 0.87, "step": 3248 }, { "epoch": 0.2903031250698059, "grad_norm": 0.37024036049842834, "learning_rate": 8.32589065904935e-05, "loss": 1.031, "step": 3249 }, { "epoch": 0.29039247660106776, "grad_norm": 0.42501765489578247, "learning_rate": 8.324810013467905e-05, "loss": 0.9865, "step": 3250 }, { "epoch": 0.29048182813232964, "grad_norm": 0.38723427057266235, "learning_rate": 8.323729089398181e-05, "loss": 1.0355, "step": 3251 }, { "epoch": 0.2905711796635915, "grad_norm": 0.37960848212242126, "learning_rate": 8.322647886930718e-05, "loss": 0.955, "step": 3252 }, { "epoch": 0.29066053119485336, "grad_norm": 0.3721432089805603, "learning_rate": 8.32156640615608e-05, "loss": 0.982, "step": 3253 }, { "epoch": 0.29074988272611524, "grad_norm": 0.43282589316368103, "learning_rate": 8.320484647164848e-05, "loss": 1.0314, "step": 3254 }, { "epoch": 0.29083923425737707, "grad_norm": 0.4642455577850342, "learning_rate": 8.319402610047633e-05, "loss": 0.9311, "step": 3255 }, { "epoch": 0.29092858578863895, "grad_norm": 0.3716782331466675, "learning_rate": 8.318320294895067e-05, "loss": 1.0325, "step": 3256 }, { "epoch": 0.29101793731990083, "grad_norm": 0.4215312600135803, "learning_rate": 8.317237701797807e-05, "loss": 0.9463, "step": 3257 }, { "epoch": 0.29110728885116266, "grad_norm": 0.49139776825904846, "learning_rate": 8.316154830846528e-05, "loss": 0.9384, "step": 3258 }, { "epoch": 0.29119664038242454, "grad_norm": 0.4280232787132263, "learning_rate": 8.315071682131936e-05, "loss": 1.0093, "step": 3259 }, { "epoch": 0.2912859919136864, "grad_norm": 0.42184942960739136, "learning_rate": 8.313988255744754e-05, "loss": 0.9151, "step": 3260 }, { "epoch": 0.2913753434449483, "grad_norm": 0.44842901825904846, "learning_rate": 8.312904551775731e-05, "loss": 1.0307, "step": 3261 }, { "epoch": 0.29146469497621014, "grad_norm": 0.39945071935653687, "learning_rate": 8.311820570315639e-05, "loss": 1.0681, "step": 3262 }, { "epoch": 0.291554046507472, "grad_norm": 0.4484541416168213, "learning_rate": 8.310736311455271e-05, "loss": 0.9827, "step": 3263 }, { "epoch": 0.2916433980387339, "grad_norm": 0.39152559638023376, "learning_rate": 8.30965177528545e-05, "loss": 1.0095, "step": 3264 }, { "epoch": 0.29173274956999573, "grad_norm": 0.4719206392765045, "learning_rate": 8.308566961897012e-05, "loss": 0.9007, "step": 3265 }, { "epoch": 0.2918221011012576, "grad_norm": 0.46025243401527405, "learning_rate": 8.307481871380824e-05, "loss": 1.0282, "step": 3266 }, { "epoch": 0.2919114526325195, "grad_norm": 0.41656240820884705, "learning_rate": 8.306396503827775e-05, "loss": 0.9827, "step": 3267 }, { "epoch": 0.2920008041637814, "grad_norm": 0.440974622964859, "learning_rate": 8.305310859328777e-05, "loss": 0.9863, "step": 3268 }, { "epoch": 0.2920901556950432, "grad_norm": 0.3946766257286072, "learning_rate": 8.30422493797476e-05, "loss": 0.9565, "step": 3269 }, { "epoch": 0.2921795072263051, "grad_norm": 0.47271499037742615, "learning_rate": 8.303138739856684e-05, "loss": 0.9492, "step": 3270 }, { "epoch": 0.292268858757567, "grad_norm": 0.4621809124946594, "learning_rate": 8.302052265065531e-05, "loss": 0.926, "step": 3271 }, { "epoch": 0.2923582102888288, "grad_norm": 0.4402875006198883, "learning_rate": 8.300965513692303e-05, "loss": 1.0155, "step": 3272 }, { "epoch": 0.2924475618200907, "grad_norm": 0.4282824695110321, "learning_rate": 8.299878485828028e-05, "loss": 1.0171, "step": 3273 }, { "epoch": 0.29253691335135257, "grad_norm": 0.47499823570251465, "learning_rate": 8.298791181563754e-05, "loss": 0.9616, "step": 3274 }, { "epoch": 0.29262626488261445, "grad_norm": 0.4334074556827545, "learning_rate": 8.297703600990556e-05, "loss": 0.9689, "step": 3275 }, { "epoch": 0.2927156164138763, "grad_norm": 0.42571699619293213, "learning_rate": 8.296615744199532e-05, "loss": 1.152, "step": 3276 }, { "epoch": 0.29280496794513816, "grad_norm": 0.473748117685318, "learning_rate": 8.295527611281799e-05, "loss": 0.9805, "step": 3277 }, { "epoch": 0.29289431947640004, "grad_norm": 0.4350266456604004, "learning_rate": 8.2944392023285e-05, "loss": 1.0552, "step": 3278 }, { "epoch": 0.29298367100766187, "grad_norm": 0.43737518787384033, "learning_rate": 8.293350517430805e-05, "loss": 0.9802, "step": 3279 }, { "epoch": 0.29307302253892376, "grad_norm": 0.45549145340919495, "learning_rate": 8.292261556679897e-05, "loss": 0.9499, "step": 3280 }, { "epoch": 0.29316237407018564, "grad_norm": 0.4989168345928192, "learning_rate": 8.29117232016699e-05, "loss": 0.9162, "step": 3281 }, { "epoch": 0.2932517256014475, "grad_norm": 0.4217113256454468, "learning_rate": 8.29008280798332e-05, "loss": 0.9838, "step": 3282 }, { "epoch": 0.29334107713270935, "grad_norm": 0.382791668176651, "learning_rate": 8.288993020220147e-05, "loss": 1.0894, "step": 3283 }, { "epoch": 0.29343042866397123, "grad_norm": 0.4287773072719574, "learning_rate": 8.287902956968748e-05, "loss": 0.9894, "step": 3284 }, { "epoch": 0.2935197801952331, "grad_norm": 0.4374977648258209, "learning_rate": 8.286812618320431e-05, "loss": 0.9899, "step": 3285 }, { "epoch": 0.29360913172649494, "grad_norm": 0.37822476029396057, "learning_rate": 8.285722004366522e-05, "loss": 1.0091, "step": 3286 }, { "epoch": 0.2936984832577568, "grad_norm": 0.4243577718734741, "learning_rate": 8.284631115198371e-05, "loss": 0.9796, "step": 3287 }, { "epoch": 0.2937878347890187, "grad_norm": 0.556516706943512, "learning_rate": 8.283539950907356e-05, "loss": 0.9766, "step": 3288 }, { "epoch": 0.29387718632028054, "grad_norm": 0.4003793001174927, "learning_rate": 8.282448511584866e-05, "loss": 0.9797, "step": 3289 }, { "epoch": 0.2939665378515424, "grad_norm": 0.39269542694091797, "learning_rate": 8.281356797322327e-05, "loss": 0.966, "step": 3290 }, { "epoch": 0.2940558893828043, "grad_norm": 0.4365537464618683, "learning_rate": 8.280264808211179e-05, "loss": 1.0566, "step": 3291 }, { "epoch": 0.2941452409140662, "grad_norm": 0.42626434564590454, "learning_rate": 8.279172544342888e-05, "loss": 0.9358, "step": 3292 }, { "epoch": 0.294234592445328, "grad_norm": 0.5638636946678162, "learning_rate": 8.278080005808943e-05, "loss": 0.9842, "step": 3293 }, { "epoch": 0.2943239439765899, "grad_norm": 0.3928448259830475, "learning_rate": 8.276987192700856e-05, "loss": 1.0545, "step": 3294 }, { "epoch": 0.2944132955078518, "grad_norm": 0.4263659119606018, "learning_rate": 8.275894105110161e-05, "loss": 0.9396, "step": 3295 }, { "epoch": 0.2945026470391136, "grad_norm": 0.4779483377933502, "learning_rate": 8.274800743128417e-05, "loss": 1.0356, "step": 3296 }, { "epoch": 0.2945919985703755, "grad_norm": 0.3762441575527191, "learning_rate": 8.273707106847202e-05, "loss": 1.0131, "step": 3297 }, { "epoch": 0.2946813501016374, "grad_norm": 0.4361181855201721, "learning_rate": 8.272613196358124e-05, "loss": 1.0383, "step": 3298 }, { "epoch": 0.29477070163289926, "grad_norm": 0.3914470076560974, "learning_rate": 8.271519011752807e-05, "loss": 0.9641, "step": 3299 }, { "epoch": 0.2948600531641611, "grad_norm": 0.4699605703353882, "learning_rate": 8.270424553122899e-05, "loss": 0.9384, "step": 3300 }, { "epoch": 0.29494940469542297, "grad_norm": 0.4258545935153961, "learning_rate": 8.269329820560075e-05, "loss": 1.0225, "step": 3301 }, { "epoch": 0.29503875622668485, "grad_norm": 0.4467248320579529, "learning_rate": 8.26823481415603e-05, "loss": 0.9794, "step": 3302 }, { "epoch": 0.2951281077579467, "grad_norm": 0.49532341957092285, "learning_rate": 8.267139534002483e-05, "loss": 0.9664, "step": 3303 }, { "epoch": 0.29521745928920856, "grad_norm": 0.5230326056480408, "learning_rate": 8.266043980191175e-05, "loss": 1.001, "step": 3304 }, { "epoch": 0.29530681082047044, "grad_norm": 0.46619758009910583, "learning_rate": 8.26494815281387e-05, "loss": 0.8984, "step": 3305 }, { "epoch": 0.2953961623517323, "grad_norm": 0.514583170413971, "learning_rate": 8.263852051962356e-05, "loss": 0.9236, "step": 3306 }, { "epoch": 0.29548551388299416, "grad_norm": 0.48377975821495056, "learning_rate": 8.26275567772844e-05, "loss": 1.0234, "step": 3307 }, { "epoch": 0.29557486541425604, "grad_norm": 0.3904837667942047, "learning_rate": 8.261659030203961e-05, "loss": 1.072, "step": 3308 }, { "epoch": 0.2956642169455179, "grad_norm": 0.3960415720939636, "learning_rate": 8.260562109480768e-05, "loss": 0.9875, "step": 3309 }, { "epoch": 0.29575356847677975, "grad_norm": 0.4395782947540283, "learning_rate": 8.259464915650745e-05, "loss": 0.9658, "step": 3310 }, { "epoch": 0.29584292000804163, "grad_norm": 0.548772394657135, "learning_rate": 8.258367448805791e-05, "loss": 0.9851, "step": 3311 }, { "epoch": 0.2959322715393035, "grad_norm": 0.38434407114982605, "learning_rate": 8.257269709037832e-05, "loss": 1.0152, "step": 3312 }, { "epoch": 0.2960216230705654, "grad_norm": 0.44232288002967834, "learning_rate": 8.256171696438817e-05, "loss": 0.9907, "step": 3313 }, { "epoch": 0.2961109746018272, "grad_norm": 0.4280986487865448, "learning_rate": 8.25507341110071e-05, "loss": 0.9695, "step": 3314 }, { "epoch": 0.2962003261330891, "grad_norm": 0.5346389412879944, "learning_rate": 8.253974853115511e-05, "loss": 0.9704, "step": 3315 }, { "epoch": 0.296289677664351, "grad_norm": 0.43099406361579895, "learning_rate": 8.252876022575234e-05, "loss": 1.0432, "step": 3316 }, { "epoch": 0.2963790291956128, "grad_norm": 0.412399560213089, "learning_rate": 8.251776919571915e-05, "loss": 1.0178, "step": 3317 }, { "epoch": 0.2964683807268747, "grad_norm": 0.40774041414260864, "learning_rate": 8.250677544197619e-05, "loss": 1.0056, "step": 3318 }, { "epoch": 0.2965577322581366, "grad_norm": 0.4034601151943207, "learning_rate": 8.249577896544427e-05, "loss": 1.0198, "step": 3319 }, { "epoch": 0.2966470837893984, "grad_norm": 0.46452754735946655, "learning_rate": 8.248477976704449e-05, "loss": 1.0138, "step": 3320 }, { "epoch": 0.2967364353206603, "grad_norm": 0.512606143951416, "learning_rate": 8.247377784769814e-05, "loss": 1.0394, "step": 3321 }, { "epoch": 0.2968257868519222, "grad_norm": 0.557276725769043, "learning_rate": 8.246277320832676e-05, "loss": 1.0412, "step": 3322 }, { "epoch": 0.29691513838318406, "grad_norm": 0.4517113268375397, "learning_rate": 8.245176584985208e-05, "loss": 0.9373, "step": 3323 }, { "epoch": 0.2970044899144459, "grad_norm": 0.3471103608608246, "learning_rate": 8.24407557731961e-05, "loss": 1.0308, "step": 3324 }, { "epoch": 0.2970938414457078, "grad_norm": 0.48082980513572693, "learning_rate": 8.242974297928105e-05, "loss": 0.9768, "step": 3325 }, { "epoch": 0.29718319297696966, "grad_norm": 0.47311216592788696, "learning_rate": 8.241872746902935e-05, "loss": 0.9338, "step": 3326 }, { "epoch": 0.2972725445082315, "grad_norm": 0.5065577626228333, "learning_rate": 8.240770924336364e-05, "loss": 0.9229, "step": 3327 }, { "epoch": 0.29736189603949337, "grad_norm": 0.45983678102493286, "learning_rate": 8.239668830320686e-05, "loss": 1.0353, "step": 3328 }, { "epoch": 0.29745124757075525, "grad_norm": 0.5233861804008484, "learning_rate": 8.23856646494821e-05, "loss": 0.9412, "step": 3329 }, { "epoch": 0.29754059910201713, "grad_norm": 0.4303194284439087, "learning_rate": 8.237463828311272e-05, "loss": 1.0683, "step": 3330 }, { "epoch": 0.29762995063327896, "grad_norm": 0.4418349862098694, "learning_rate": 8.23636092050223e-05, "loss": 0.9342, "step": 3331 }, { "epoch": 0.29771930216454084, "grad_norm": 0.41626089811325073, "learning_rate": 8.235257741613463e-05, "loss": 0.9939, "step": 3332 }, { "epoch": 0.2978086536958027, "grad_norm": 0.41136887669563293, "learning_rate": 8.234154291737375e-05, "loss": 1.0507, "step": 3333 }, { "epoch": 0.29789800522706456, "grad_norm": 0.4649127125740051, "learning_rate": 8.233050570966392e-05, "loss": 1.0003, "step": 3334 }, { "epoch": 0.29798735675832644, "grad_norm": 0.43239861726760864, "learning_rate": 8.231946579392961e-05, "loss": 0.9848, "step": 3335 }, { "epoch": 0.2980767082895883, "grad_norm": 0.395386278629303, "learning_rate": 8.230842317109555e-05, "loss": 1.0269, "step": 3336 }, { "epoch": 0.2981660598208502, "grad_norm": 0.5006843209266663, "learning_rate": 8.229737784208667e-05, "loss": 1.0536, "step": 3337 }, { "epoch": 0.29825541135211203, "grad_norm": 0.39409273862838745, "learning_rate": 8.22863298078281e-05, "loss": 0.9827, "step": 3338 }, { "epoch": 0.2983447628833739, "grad_norm": 0.4018579125404358, "learning_rate": 8.22752790692453e-05, "loss": 1.0009, "step": 3339 }, { "epoch": 0.2984341144146358, "grad_norm": 0.4127073585987091, "learning_rate": 8.226422562726383e-05, "loss": 1.0012, "step": 3340 }, { "epoch": 0.2985234659458976, "grad_norm": 0.4360758662223816, "learning_rate": 8.225316948280955e-05, "loss": 1.0098, "step": 3341 }, { "epoch": 0.2986128174771595, "grad_norm": 0.4666980803012848, "learning_rate": 8.224211063680853e-05, "loss": 1.0034, "step": 3342 }, { "epoch": 0.2987021690084214, "grad_norm": 0.4008931517601013, "learning_rate": 8.223104909018707e-05, "loss": 1.0339, "step": 3343 }, { "epoch": 0.2987915205396833, "grad_norm": 0.38966014981269836, "learning_rate": 8.22199848438717e-05, "loss": 1.0023, "step": 3344 }, { "epoch": 0.2988808720709451, "grad_norm": 0.48258697986602783, "learning_rate": 8.220891789878915e-05, "loss": 0.9615, "step": 3345 }, { "epoch": 0.298970223602207, "grad_norm": 0.4819931387901306, "learning_rate": 8.21978482558664e-05, "loss": 0.9779, "step": 3346 }, { "epoch": 0.29905957513346887, "grad_norm": 0.48947110772132874, "learning_rate": 8.218677591603066e-05, "loss": 0.9124, "step": 3347 }, { "epoch": 0.2991489266647307, "grad_norm": 0.39361852407455444, "learning_rate": 8.217570088020936e-05, "loss": 1.079, "step": 3348 }, { "epoch": 0.2992382781959926, "grad_norm": 0.5385176539421082, "learning_rate": 8.216462314933013e-05, "loss": 0.9939, "step": 3349 }, { "epoch": 0.29932762972725446, "grad_norm": 0.4513779878616333, "learning_rate": 8.215354272432086e-05, "loss": 0.9908, "step": 3350 }, { "epoch": 0.2994169812585163, "grad_norm": 0.4059743285179138, "learning_rate": 8.214245960610967e-05, "loss": 1.02, "step": 3351 }, { "epoch": 0.2995063327897782, "grad_norm": 0.43754130601882935, "learning_rate": 8.213137379562485e-05, "loss": 0.9945, "step": 3352 }, { "epoch": 0.29959568432104006, "grad_norm": 0.414631724357605, "learning_rate": 8.2120285293795e-05, "loss": 1.0089, "step": 3353 }, { "epoch": 0.29968503585230194, "grad_norm": 0.518393337726593, "learning_rate": 8.210919410154888e-05, "loss": 1.0141, "step": 3354 }, { "epoch": 0.29977438738356377, "grad_norm": 0.3928556442260742, "learning_rate": 8.209810021981548e-05, "loss": 1.0099, "step": 3355 }, { "epoch": 0.29986373891482565, "grad_norm": 0.4404885470867157, "learning_rate": 8.208700364952404e-05, "loss": 1.0929, "step": 3356 }, { "epoch": 0.29995309044608753, "grad_norm": 0.5233496427536011, "learning_rate": 8.207590439160404e-05, "loss": 0.9228, "step": 3357 }, { "epoch": 0.30004244197734936, "grad_norm": 0.35965949296951294, "learning_rate": 8.206480244698514e-05, "loss": 1.0296, "step": 3358 }, { "epoch": 0.30013179350861124, "grad_norm": 0.45257434248924255, "learning_rate": 8.205369781659724e-05, "loss": 0.9396, "step": 3359 }, { "epoch": 0.3002211450398731, "grad_norm": 0.4101848304271698, "learning_rate": 8.204259050137048e-05, "loss": 1.0811, "step": 3360 }, { "epoch": 0.300310496571135, "grad_norm": 0.48168009519577026, "learning_rate": 8.203148050223521e-05, "loss": 0.9708, "step": 3361 }, { "epoch": 0.30039984810239684, "grad_norm": 0.434770405292511, "learning_rate": 8.202036782012203e-05, "loss": 0.9487, "step": 3362 }, { "epoch": 0.3004891996336587, "grad_norm": 0.3673984706401825, "learning_rate": 8.200925245596174e-05, "loss": 1.0221, "step": 3363 }, { "epoch": 0.3005785511649206, "grad_norm": 0.4212033152580261, "learning_rate": 8.199813441068535e-05, "loss": 1.0128, "step": 3364 }, { "epoch": 0.30066790269618243, "grad_norm": 0.42803311347961426, "learning_rate": 8.198701368522412e-05, "loss": 1.0614, "step": 3365 }, { "epoch": 0.3007572542274443, "grad_norm": 0.48348864912986755, "learning_rate": 8.197589028050956e-05, "loss": 0.9918, "step": 3366 }, { "epoch": 0.3008466057587062, "grad_norm": 0.455049067735672, "learning_rate": 8.196476419747333e-05, "loss": 1.0028, "step": 3367 }, { "epoch": 0.3009359572899681, "grad_norm": 0.397243857383728, "learning_rate": 8.195363543704739e-05, "loss": 0.9751, "step": 3368 }, { "epoch": 0.3010253088212299, "grad_norm": 0.3902703523635864, "learning_rate": 8.194250400016388e-05, "loss": 1.0078, "step": 3369 }, { "epoch": 0.3011146603524918, "grad_norm": 0.41560593247413635, "learning_rate": 8.193136988775516e-05, "loss": 0.9526, "step": 3370 }, { "epoch": 0.3012040118837537, "grad_norm": 0.44474300742149353, "learning_rate": 8.192023310075387e-05, "loss": 0.8866, "step": 3371 }, { "epoch": 0.3012933634150155, "grad_norm": 0.34557124972343445, "learning_rate": 8.190909364009281e-05, "loss": 1.0232, "step": 3372 }, { "epoch": 0.3013827149462774, "grad_norm": 0.4132451117038727, "learning_rate": 8.1897951506705e-05, "loss": 1.0186, "step": 3373 }, { "epoch": 0.30147206647753927, "grad_norm": 0.4821149706840515, "learning_rate": 8.188680670152379e-05, "loss": 0.9414, "step": 3374 }, { "epoch": 0.30156141800880115, "grad_norm": 0.43094414472579956, "learning_rate": 8.18756592254826e-05, "loss": 0.9497, "step": 3375 }, { "epoch": 0.301650769540063, "grad_norm": 0.3759251832962036, "learning_rate": 8.18645090795152e-05, "loss": 1.0405, "step": 3376 }, { "epoch": 0.30174012107132486, "grad_norm": 0.5242640376091003, "learning_rate": 8.185335626455549e-05, "loss": 0.9464, "step": 3377 }, { "epoch": 0.30182947260258675, "grad_norm": 0.4868447780609131, "learning_rate": 8.184220078153768e-05, "loss": 0.9646, "step": 3378 }, { "epoch": 0.3019188241338486, "grad_norm": 0.441510945558548, "learning_rate": 8.183104263139613e-05, "loss": 0.9802, "step": 3379 }, { "epoch": 0.30200817566511046, "grad_norm": 0.39666980504989624, "learning_rate": 8.181988181506546e-05, "loss": 1.0346, "step": 3380 }, { "epoch": 0.30209752719637234, "grad_norm": 0.3387872874736786, "learning_rate": 8.180871833348052e-05, "loss": 0.9721, "step": 3381 }, { "epoch": 0.30218687872763417, "grad_norm": 0.4515991508960724, "learning_rate": 8.179755218757636e-05, "loss": 0.9902, "step": 3382 }, { "epoch": 0.30227623025889605, "grad_norm": 0.4831897020339966, "learning_rate": 8.178638337828828e-05, "loss": 0.9039, "step": 3383 }, { "epoch": 0.30236558179015793, "grad_norm": 0.44388824701309204, "learning_rate": 8.177521190655178e-05, "loss": 0.9844, "step": 3384 }, { "epoch": 0.3024549333214198, "grad_norm": 0.495235800743103, "learning_rate": 8.176403777330255e-05, "loss": 0.9928, "step": 3385 }, { "epoch": 0.30254428485268164, "grad_norm": 0.5274266600608826, "learning_rate": 8.17528609794766e-05, "loss": 1.0191, "step": 3386 }, { "epoch": 0.3026336363839435, "grad_norm": 0.3730257749557495, "learning_rate": 8.174168152601007e-05, "loss": 0.9684, "step": 3387 }, { "epoch": 0.3027229879152054, "grad_norm": 0.45568758249282837, "learning_rate": 8.173049941383938e-05, "loss": 0.9999, "step": 3388 }, { "epoch": 0.30281233944646724, "grad_norm": 0.4109109342098236, "learning_rate": 8.171931464390115e-05, "loss": 1.0108, "step": 3389 }, { "epoch": 0.3029016909777291, "grad_norm": 0.4820617735385895, "learning_rate": 8.170812721713218e-05, "loss": 1.0419, "step": 3390 }, { "epoch": 0.302991042508991, "grad_norm": 0.4437454044818878, "learning_rate": 8.169693713446959e-05, "loss": 0.9256, "step": 3391 }, { "epoch": 0.3030803940402529, "grad_norm": 0.4973864257335663, "learning_rate": 8.168574439685067e-05, "loss": 0.9822, "step": 3392 }, { "epoch": 0.3031697455715147, "grad_norm": 0.41152751445770264, "learning_rate": 8.167454900521289e-05, "loss": 1.02, "step": 3393 }, { "epoch": 0.3032590971027766, "grad_norm": 0.42877259850502014, "learning_rate": 8.1663350960494e-05, "loss": 1.0445, "step": 3394 }, { "epoch": 0.3033484486340385, "grad_norm": 0.5001733899116516, "learning_rate": 8.165215026363198e-05, "loss": 0.996, "step": 3395 }, { "epoch": 0.3034378001653003, "grad_norm": 0.43617990612983704, "learning_rate": 8.164094691556496e-05, "loss": 0.9811, "step": 3396 }, { "epoch": 0.3035271516965622, "grad_norm": 0.4765497148036957, "learning_rate": 8.162974091723139e-05, "loss": 0.976, "step": 3397 }, { "epoch": 0.3036165032278241, "grad_norm": 0.45245134830474854, "learning_rate": 8.161853226956985e-05, "loss": 1.0029, "step": 3398 }, { "epoch": 0.30370585475908596, "grad_norm": 0.4027976989746094, "learning_rate": 8.160732097351924e-05, "loss": 1.0238, "step": 3399 }, { "epoch": 0.3037952062903478, "grad_norm": 0.4003666341304779, "learning_rate": 8.159610703001857e-05, "loss": 1.0662, "step": 3400 }, { "epoch": 0.30388455782160967, "grad_norm": 0.4242064952850342, "learning_rate": 8.158489044000713e-05, "loss": 1.0077, "step": 3401 }, { "epoch": 0.30397390935287155, "grad_norm": 0.3776360750198364, "learning_rate": 8.157367120442445e-05, "loss": 1.0848, "step": 3402 }, { "epoch": 0.3040632608841334, "grad_norm": 0.40885689854621887, "learning_rate": 8.156244932421026e-05, "loss": 0.9957, "step": 3403 }, { "epoch": 0.30415261241539526, "grad_norm": 0.44386911392211914, "learning_rate": 8.155122480030453e-05, "loss": 1.0783, "step": 3404 }, { "epoch": 0.30424196394665715, "grad_norm": 0.38162335753440857, "learning_rate": 8.153999763364738e-05, "loss": 1.0232, "step": 3405 }, { "epoch": 0.30433131547791903, "grad_norm": 0.3886288106441498, "learning_rate": 8.152876782517925e-05, "loss": 1.0595, "step": 3406 }, { "epoch": 0.30442066700918086, "grad_norm": 0.43489503860473633, "learning_rate": 8.151753537584073e-05, "loss": 1.0397, "step": 3407 }, { "epoch": 0.30451001854044274, "grad_norm": 0.4794921278953552, "learning_rate": 8.150630028657267e-05, "loss": 0.9464, "step": 3408 }, { "epoch": 0.3045993700717046, "grad_norm": 0.5062465071678162, "learning_rate": 8.149506255831614e-05, "loss": 0.9363, "step": 3409 }, { "epoch": 0.30468872160296645, "grad_norm": 0.4547816514968872, "learning_rate": 8.148382219201241e-05, "loss": 0.981, "step": 3410 }, { "epoch": 0.30477807313422833, "grad_norm": 0.43893638253211975, "learning_rate": 8.147257918860296e-05, "loss": 1.0097, "step": 3411 }, { "epoch": 0.3048674246654902, "grad_norm": 0.4645942151546478, "learning_rate": 8.146133354902954e-05, "loss": 1.0032, "step": 3412 }, { "epoch": 0.3049567761967521, "grad_norm": 0.39374154806137085, "learning_rate": 8.145008527423409e-05, "loss": 1.0049, "step": 3413 }, { "epoch": 0.3050461277280139, "grad_norm": 0.4022867977619171, "learning_rate": 8.143883436515875e-05, "loss": 0.9876, "step": 3414 }, { "epoch": 0.3051354792592758, "grad_norm": 0.4411511719226837, "learning_rate": 8.142758082274593e-05, "loss": 0.9858, "step": 3415 }, { "epoch": 0.3052248307905377, "grad_norm": 0.4204576909542084, "learning_rate": 8.141632464793822e-05, "loss": 0.9826, "step": 3416 }, { "epoch": 0.3053141823217995, "grad_norm": 0.40963998436927795, "learning_rate": 8.140506584167845e-05, "loss": 0.9313, "step": 3417 }, { "epoch": 0.3054035338530614, "grad_norm": 0.40934035181999207, "learning_rate": 8.139380440490965e-05, "loss": 1.037, "step": 3418 }, { "epoch": 0.3054928853843233, "grad_norm": 0.3806384801864624, "learning_rate": 8.13825403385751e-05, "loss": 0.9968, "step": 3419 }, { "epoch": 0.3055822369155851, "grad_norm": 0.3986847400665283, "learning_rate": 8.137127364361828e-05, "loss": 0.9992, "step": 3420 }, { "epoch": 0.305671588446847, "grad_norm": 0.44412630796432495, "learning_rate": 8.136000432098291e-05, "loss": 1.0355, "step": 3421 }, { "epoch": 0.3057609399781089, "grad_norm": 0.43642017245292664, "learning_rate": 8.13487323716129e-05, "loss": 1.0421, "step": 3422 }, { "epoch": 0.30585029150937076, "grad_norm": 0.39217737317085266, "learning_rate": 8.133745779645238e-05, "loss": 1.0206, "step": 3423 }, { "epoch": 0.3059396430406326, "grad_norm": 0.456626832485199, "learning_rate": 8.132618059644577e-05, "loss": 1.0341, "step": 3424 }, { "epoch": 0.3060289945718945, "grad_norm": 0.5178848505020142, "learning_rate": 8.13149007725376e-05, "loss": 0.9424, "step": 3425 }, { "epoch": 0.30611834610315636, "grad_norm": 0.49753326177597046, "learning_rate": 8.13036183256727e-05, "loss": 1.0006, "step": 3426 }, { "epoch": 0.3062076976344182, "grad_norm": 0.48190826177597046, "learning_rate": 8.129233325679611e-05, "loss": 0.9446, "step": 3427 }, { "epoch": 0.30629704916568007, "grad_norm": 0.431306928396225, "learning_rate": 8.128104556685305e-05, "loss": 0.9653, "step": 3428 }, { "epoch": 0.30638640069694195, "grad_norm": 0.44903165102005005, "learning_rate": 8.126975525678898e-05, "loss": 1.0018, "step": 3429 }, { "epoch": 0.30647575222820383, "grad_norm": 0.4540453553199768, "learning_rate": 8.12584623275496e-05, "loss": 0.9977, "step": 3430 }, { "epoch": 0.30656510375946566, "grad_norm": 0.40107932686805725, "learning_rate": 8.124716678008082e-05, "loss": 1.058, "step": 3431 }, { "epoch": 0.30665445529072755, "grad_norm": 0.3945641815662384, "learning_rate": 8.123586861532873e-05, "loss": 1.0387, "step": 3432 }, { "epoch": 0.30674380682198943, "grad_norm": 0.45806485414505005, "learning_rate": 8.12245678342397e-05, "loss": 1.0011, "step": 3433 }, { "epoch": 0.30683315835325126, "grad_norm": 0.49401962757110596, "learning_rate": 8.12132644377603e-05, "loss": 1.005, "step": 3434 }, { "epoch": 0.30692250988451314, "grad_norm": 0.44378378987312317, "learning_rate": 8.120195842683728e-05, "loss": 0.9761, "step": 3435 }, { "epoch": 0.307011861415775, "grad_norm": 0.4618094563484192, "learning_rate": 8.119064980241766e-05, "loss": 0.9972, "step": 3436 }, { "epoch": 0.3071012129470369, "grad_norm": 0.4815685749053955, "learning_rate": 8.117933856544864e-05, "loss": 0.9633, "step": 3437 }, { "epoch": 0.30719056447829873, "grad_norm": 0.43568116426467896, "learning_rate": 8.116802471687768e-05, "loss": 0.9158, "step": 3438 }, { "epoch": 0.3072799160095606, "grad_norm": 0.4017315208911896, "learning_rate": 8.11567082576524e-05, "loss": 1.0004, "step": 3439 }, { "epoch": 0.3073692675408225, "grad_norm": 0.43449345231056213, "learning_rate": 8.114538918872072e-05, "loss": 1.0021, "step": 3440 }, { "epoch": 0.3074586190720843, "grad_norm": 0.37431442737579346, "learning_rate": 8.11340675110307e-05, "loss": 1.0407, "step": 3441 }, { "epoch": 0.3075479706033462, "grad_norm": 0.42223483324050903, "learning_rate": 8.112274322553067e-05, "loss": 0.9816, "step": 3442 }, { "epoch": 0.3076373221346081, "grad_norm": 0.5260857939720154, "learning_rate": 8.111141633316914e-05, "loss": 0.9194, "step": 3443 }, { "epoch": 0.30772667366587, "grad_norm": 0.4374569356441498, "learning_rate": 8.110008683489487e-05, "loss": 1.0595, "step": 3444 }, { "epoch": 0.3078160251971318, "grad_norm": 0.45745593309402466, "learning_rate": 8.108875473165683e-05, "loss": 0.988, "step": 3445 }, { "epoch": 0.3079053767283937, "grad_norm": 0.42707595229148865, "learning_rate": 8.10774200244042e-05, "loss": 1.0022, "step": 3446 }, { "epoch": 0.30799472825965557, "grad_norm": 0.4703771471977234, "learning_rate": 8.106608271408638e-05, "loss": 1.0088, "step": 3447 }, { "epoch": 0.3080840797909174, "grad_norm": 0.5333291292190552, "learning_rate": 8.105474280165298e-05, "loss": 0.994, "step": 3448 }, { "epoch": 0.3081734313221793, "grad_norm": 0.3782300055027008, "learning_rate": 8.104340028805386e-05, "loss": 1.0284, "step": 3449 }, { "epoch": 0.30826278285344116, "grad_norm": 0.5105333924293518, "learning_rate": 8.103205517423906e-05, "loss": 0.9954, "step": 3450 }, { "epoch": 0.308352134384703, "grad_norm": 0.37891459465026855, "learning_rate": 8.102070746115888e-05, "loss": 0.9678, "step": 3451 }, { "epoch": 0.3084414859159649, "grad_norm": 0.47059276700019836, "learning_rate": 8.100935714976377e-05, "loss": 0.9004, "step": 3452 }, { "epoch": 0.30853083744722676, "grad_norm": 0.38149774074554443, "learning_rate": 8.099800424100448e-05, "loss": 0.9746, "step": 3453 }, { "epoch": 0.30862018897848864, "grad_norm": 0.4379188120365143, "learning_rate": 8.09866487358319e-05, "loss": 1.02, "step": 3454 }, { "epoch": 0.30870954050975047, "grad_norm": 0.4322841465473175, "learning_rate": 8.097529063519723e-05, "loss": 0.9619, "step": 3455 }, { "epoch": 0.30879889204101235, "grad_norm": 0.4443778097629547, "learning_rate": 8.096392994005177e-05, "loss": 0.9386, "step": 3456 }, { "epoch": 0.30888824357227423, "grad_norm": 0.4804965853691101, "learning_rate": 8.095256665134712e-05, "loss": 0.9817, "step": 3457 }, { "epoch": 0.30897759510353606, "grad_norm": 0.5008002519607544, "learning_rate": 8.094120077003509e-05, "loss": 0.9661, "step": 3458 }, { "epoch": 0.30906694663479795, "grad_norm": 0.4188991189002991, "learning_rate": 8.092983229706767e-05, "loss": 0.9905, "step": 3459 }, { "epoch": 0.30915629816605983, "grad_norm": 0.3990739583969116, "learning_rate": 8.091846123339715e-05, "loss": 1.0172, "step": 3460 }, { "epoch": 0.3092456496973217, "grad_norm": 0.43267446756362915, "learning_rate": 8.090708757997591e-05, "loss": 1.0371, "step": 3461 }, { "epoch": 0.30933500122858354, "grad_norm": 0.42393234372138977, "learning_rate": 8.089571133775663e-05, "loss": 1.0194, "step": 3462 }, { "epoch": 0.3094243527598454, "grad_norm": 0.4767492711544037, "learning_rate": 8.08843325076922e-05, "loss": 1.0495, "step": 3463 }, { "epoch": 0.3095137042911073, "grad_norm": 0.42636582255363464, "learning_rate": 8.087295109073574e-05, "loss": 1.0434, "step": 3464 }, { "epoch": 0.30960305582236913, "grad_norm": 0.49471211433410645, "learning_rate": 8.086156708784054e-05, "loss": 0.9261, "step": 3465 }, { "epoch": 0.309692407353631, "grad_norm": 0.4437471628189087, "learning_rate": 8.085018049996011e-05, "loss": 1.0086, "step": 3466 }, { "epoch": 0.3097817588848929, "grad_norm": 0.44580212235450745, "learning_rate": 8.083879132804826e-05, "loss": 0.9378, "step": 3467 }, { "epoch": 0.3098711104161548, "grad_norm": 0.4966757595539093, "learning_rate": 8.082739957305889e-05, "loss": 0.9868, "step": 3468 }, { "epoch": 0.3099604619474166, "grad_norm": 0.5008419752120972, "learning_rate": 8.081600523594622e-05, "loss": 0.9713, "step": 3469 }, { "epoch": 0.3100498134786785, "grad_norm": 0.44220054149627686, "learning_rate": 8.080460831766464e-05, "loss": 0.9581, "step": 3470 }, { "epoch": 0.3101391650099404, "grad_norm": 0.47498950362205505, "learning_rate": 8.079320881916877e-05, "loss": 1.0412, "step": 3471 }, { "epoch": 0.3102285165412022, "grad_norm": 0.48593392968177795, "learning_rate": 8.07818067414134e-05, "loss": 0.9469, "step": 3472 }, { "epoch": 0.3103178680724641, "grad_norm": 0.3928394019603729, "learning_rate": 8.077040208535362e-05, "loss": 0.9524, "step": 3473 }, { "epoch": 0.31040721960372597, "grad_norm": 0.38281822204589844, "learning_rate": 8.075899485194467e-05, "loss": 1.0437, "step": 3474 }, { "epoch": 0.31049657113498785, "grad_norm": 0.408634752035141, "learning_rate": 8.074758504214206e-05, "loss": 0.9995, "step": 3475 }, { "epoch": 0.3105859226662497, "grad_norm": 0.4135921001434326, "learning_rate": 8.073617265690144e-05, "loss": 1.0512, "step": 3476 }, { "epoch": 0.31067527419751156, "grad_norm": 0.45093652606010437, "learning_rate": 8.072475769717872e-05, "loss": 0.9089, "step": 3477 }, { "epoch": 0.31076462572877345, "grad_norm": 0.38925284147262573, "learning_rate": 8.071334016393006e-05, "loss": 0.958, "step": 3478 }, { "epoch": 0.3108539772600353, "grad_norm": 0.4077800512313843, "learning_rate": 8.070192005811177e-05, "loss": 1.0573, "step": 3479 }, { "epoch": 0.31094332879129716, "grad_norm": 0.48132577538490295, "learning_rate": 8.069049738068041e-05, "loss": 0.899, "step": 3480 }, { "epoch": 0.31103268032255904, "grad_norm": 0.4954333007335663, "learning_rate": 8.067907213259278e-05, "loss": 0.9735, "step": 3481 }, { "epoch": 0.31112203185382087, "grad_norm": 0.3865114152431488, "learning_rate": 8.066764431480583e-05, "loss": 0.9941, "step": 3482 }, { "epoch": 0.31121138338508275, "grad_norm": 0.3530360460281372, "learning_rate": 8.065621392827678e-05, "loss": 1.0592, "step": 3483 }, { "epoch": 0.31130073491634463, "grad_norm": 0.4171468913555145, "learning_rate": 8.064478097396304e-05, "loss": 0.9654, "step": 3484 }, { "epoch": 0.3113900864476065, "grad_norm": 0.508972704410553, "learning_rate": 8.063334545282224e-05, "loss": 0.8936, "step": 3485 }, { "epoch": 0.31147943797886835, "grad_norm": 0.3663281500339508, "learning_rate": 8.062190736581223e-05, "loss": 1.0223, "step": 3486 }, { "epoch": 0.31156878951013023, "grad_norm": 0.4794585704803467, "learning_rate": 8.061046671389107e-05, "loss": 0.9199, "step": 3487 }, { "epoch": 0.3116581410413921, "grad_norm": 0.4471113383769989, "learning_rate": 8.059902349801704e-05, "loss": 0.9633, "step": 3488 }, { "epoch": 0.31174749257265394, "grad_norm": 0.3974318504333496, "learning_rate": 8.058757771914865e-05, "loss": 0.9769, "step": 3489 }, { "epoch": 0.3118368441039158, "grad_norm": 0.4705379605293274, "learning_rate": 8.057612937824456e-05, "loss": 0.9843, "step": 3490 }, { "epoch": 0.3119261956351777, "grad_norm": 0.3952665328979492, "learning_rate": 8.056467847626373e-05, "loss": 1.0787, "step": 3491 }, { "epoch": 0.3120155471664396, "grad_norm": 0.451646089553833, "learning_rate": 8.055322501416527e-05, "loss": 1.0125, "step": 3492 }, { "epoch": 0.3121048986977014, "grad_norm": 0.4705079197883606, "learning_rate": 8.054176899290855e-05, "loss": 0.9875, "step": 3493 }, { "epoch": 0.3121942502289633, "grad_norm": 0.4389439523220062, "learning_rate": 8.053031041345312e-05, "loss": 1.0343, "step": 3494 }, { "epoch": 0.3122836017602252, "grad_norm": 0.513843834400177, "learning_rate": 8.051884927675877e-05, "loss": 0.9618, "step": 3495 }, { "epoch": 0.312372953291487, "grad_norm": 0.4346451163291931, "learning_rate": 8.050738558378549e-05, "loss": 1.0277, "step": 3496 }, { "epoch": 0.3124623048227489, "grad_norm": 0.5713686347007751, "learning_rate": 8.049591933549347e-05, "loss": 0.9842, "step": 3497 }, { "epoch": 0.3125516563540108, "grad_norm": 0.4521034359931946, "learning_rate": 8.048445053284315e-05, "loss": 0.9809, "step": 3498 }, { "epoch": 0.31264100788527266, "grad_norm": 0.4172121584415436, "learning_rate": 8.047297917679515e-05, "loss": 1.0247, "step": 3499 }, { "epoch": 0.3127303594165345, "grad_norm": 0.4868249297142029, "learning_rate": 8.046150526831033e-05, "loss": 0.9791, "step": 3500 }, { "epoch": 0.31281971094779637, "grad_norm": 0.34857162833213806, "learning_rate": 8.045002880834975e-05, "loss": 1.0506, "step": 3501 }, { "epoch": 0.31290906247905825, "grad_norm": 0.4393348693847656, "learning_rate": 8.043854979787467e-05, "loss": 1.0026, "step": 3502 }, { "epoch": 0.3129984140103201, "grad_norm": 0.38047170639038086, "learning_rate": 8.04270682378466e-05, "loss": 1.0297, "step": 3503 }, { "epoch": 0.31308776554158196, "grad_norm": 0.39295217394828796, "learning_rate": 8.041558412922724e-05, "loss": 0.997, "step": 3504 }, { "epoch": 0.31317711707284385, "grad_norm": 0.4318108856678009, "learning_rate": 8.04040974729785e-05, "loss": 0.9394, "step": 3505 }, { "epoch": 0.31326646860410573, "grad_norm": 0.41878437995910645, "learning_rate": 8.039260827006252e-05, "loss": 1.012, "step": 3506 }, { "epoch": 0.31335582013536756, "grad_norm": 0.3880411982536316, "learning_rate": 8.038111652144163e-05, "loss": 0.962, "step": 3507 }, { "epoch": 0.31344517166662944, "grad_norm": 0.39992988109588623, "learning_rate": 8.036962222807838e-05, "loss": 0.9838, "step": 3508 }, { "epoch": 0.3135345231978913, "grad_norm": 0.3804689347743988, "learning_rate": 8.035812539093557e-05, "loss": 1.0372, "step": 3509 }, { "epoch": 0.31362387472915315, "grad_norm": 0.4325745403766632, "learning_rate": 8.034662601097615e-05, "loss": 1.032, "step": 3510 }, { "epoch": 0.31371322626041503, "grad_norm": 0.46068939566612244, "learning_rate": 8.033512408916334e-05, "loss": 0.9907, "step": 3511 }, { "epoch": 0.3138025777916769, "grad_norm": 0.5449123382568359, "learning_rate": 8.032361962646053e-05, "loss": 0.9458, "step": 3512 }, { "epoch": 0.31389192932293875, "grad_norm": 0.4366675913333893, "learning_rate": 8.031211262383136e-05, "loss": 1.0291, "step": 3513 }, { "epoch": 0.31398128085420063, "grad_norm": 0.4750640392303467, "learning_rate": 8.030060308223964e-05, "loss": 0.9661, "step": 3514 }, { "epoch": 0.3140706323854625, "grad_norm": 0.365343302488327, "learning_rate": 8.028909100264943e-05, "loss": 1.0437, "step": 3515 }, { "epoch": 0.3141599839167244, "grad_norm": 0.580810546875, "learning_rate": 8.0277576386025e-05, "loss": 0.9558, "step": 3516 }, { "epoch": 0.3142493354479862, "grad_norm": 0.5106691718101501, "learning_rate": 8.026605923333081e-05, "loss": 0.9663, "step": 3517 }, { "epoch": 0.3143386869792481, "grad_norm": 0.48174557089805603, "learning_rate": 8.025453954553155e-05, "loss": 0.9992, "step": 3518 }, { "epoch": 0.31442803851051, "grad_norm": 0.41393786668777466, "learning_rate": 8.024301732359212e-05, "loss": 1.0187, "step": 3519 }, { "epoch": 0.3145173900417718, "grad_norm": 0.4682253897190094, "learning_rate": 8.02314925684776e-05, "loss": 0.9622, "step": 3520 }, { "epoch": 0.3146067415730337, "grad_norm": 0.4221019446849823, "learning_rate": 8.021996528115335e-05, "loss": 1.0434, "step": 3521 }, { "epoch": 0.3146960931042956, "grad_norm": 0.3874817490577698, "learning_rate": 8.020843546258487e-05, "loss": 0.9968, "step": 3522 }, { "epoch": 0.31478544463555747, "grad_norm": 0.49449774622917175, "learning_rate": 8.019690311373793e-05, "loss": 0.9772, "step": 3523 }, { "epoch": 0.3148747961668193, "grad_norm": 0.4564272463321686, "learning_rate": 8.018536823557848e-05, "loss": 0.9736, "step": 3524 }, { "epoch": 0.3149641476980812, "grad_norm": 0.3975536525249481, "learning_rate": 8.017383082907269e-05, "loss": 0.988, "step": 3525 }, { "epoch": 0.31505349922934306, "grad_norm": 0.42857542634010315, "learning_rate": 8.016229089518694e-05, "loss": 0.9846, "step": 3526 }, { "epoch": 0.3151428507606049, "grad_norm": 0.34974923729896545, "learning_rate": 8.01507484348878e-05, "loss": 0.9828, "step": 3527 }, { "epoch": 0.31523220229186677, "grad_norm": 0.39053013920783997, "learning_rate": 8.013920344914212e-05, "loss": 1.0341, "step": 3528 }, { "epoch": 0.31532155382312865, "grad_norm": 0.44937631487846375, "learning_rate": 8.012765593891688e-05, "loss": 0.9921, "step": 3529 }, { "epoch": 0.31541090535439054, "grad_norm": 0.5252249836921692, "learning_rate": 8.011610590517932e-05, "loss": 0.9491, "step": 3530 }, { "epoch": 0.31550025688565236, "grad_norm": 0.3755653500556946, "learning_rate": 8.010455334889689e-05, "loss": 1.0336, "step": 3531 }, { "epoch": 0.31558960841691425, "grad_norm": 0.47862106561660767, "learning_rate": 8.00929982710372e-05, "loss": 1.0282, "step": 3532 }, { "epoch": 0.31567895994817613, "grad_norm": 0.47411367297172546, "learning_rate": 8.008144067256815e-05, "loss": 0.8744, "step": 3533 }, { "epoch": 0.31576831147943796, "grad_norm": 0.41446375846862793, "learning_rate": 8.006988055445778e-05, "loss": 0.9799, "step": 3534 }, { "epoch": 0.31585766301069984, "grad_norm": 0.45246684551239014, "learning_rate": 8.00583179176744e-05, "loss": 0.9723, "step": 3535 }, { "epoch": 0.3159470145419617, "grad_norm": 0.46888962388038635, "learning_rate": 8.004675276318651e-05, "loss": 0.9581, "step": 3536 }, { "epoch": 0.3160363660732236, "grad_norm": 0.4019709527492523, "learning_rate": 8.00351850919628e-05, "loss": 0.982, "step": 3537 }, { "epoch": 0.31612571760448543, "grad_norm": 0.4450971186161041, "learning_rate": 8.002361490497217e-05, "loss": 0.9854, "step": 3538 }, { "epoch": 0.3162150691357473, "grad_norm": 0.5295263528823853, "learning_rate": 8.001204220318377e-05, "loss": 0.9173, "step": 3539 }, { "epoch": 0.3163044206670092, "grad_norm": 0.47927385568618774, "learning_rate": 8.000046698756694e-05, "loss": 0.8995, "step": 3540 }, { "epoch": 0.31639377219827103, "grad_norm": 0.6096744537353516, "learning_rate": 7.99888892590912e-05, "loss": 0.9354, "step": 3541 }, { "epoch": 0.3164831237295329, "grad_norm": 0.40292373299598694, "learning_rate": 7.997730901872635e-05, "loss": 1.0007, "step": 3542 }, { "epoch": 0.3165724752607948, "grad_norm": 0.532189667224884, "learning_rate": 7.996572626744232e-05, "loss": 1.0307, "step": 3543 }, { "epoch": 0.3166618267920566, "grad_norm": 0.5631407499313354, "learning_rate": 7.99541410062093e-05, "loss": 0.9184, "step": 3544 }, { "epoch": 0.3167511783233185, "grad_norm": 0.4307250678539276, "learning_rate": 7.994255323599769e-05, "loss": 0.9451, "step": 3545 }, { "epoch": 0.3168405298545804, "grad_norm": 0.4229245185852051, "learning_rate": 7.993096295777807e-05, "loss": 1.0172, "step": 3546 }, { "epoch": 0.31692988138584227, "grad_norm": 0.4741484522819519, "learning_rate": 7.991937017252126e-05, "loss": 0.9643, "step": 3547 }, { "epoch": 0.3170192329171041, "grad_norm": 0.46456822752952576, "learning_rate": 7.990777488119829e-05, "loss": 1.0751, "step": 3548 }, { "epoch": 0.317108584448366, "grad_norm": 0.4328577518463135, "learning_rate": 7.989617708478039e-05, "loss": 0.9881, "step": 3549 }, { "epoch": 0.31719793597962787, "grad_norm": 0.48607033491134644, "learning_rate": 7.988457678423898e-05, "loss": 1.0788, "step": 3550 }, { "epoch": 0.3172872875108897, "grad_norm": 0.4115293622016907, "learning_rate": 7.987297398054572e-05, "loss": 1.005, "step": 3551 }, { "epoch": 0.3173766390421516, "grad_norm": 0.4423523247241974, "learning_rate": 7.986136867467247e-05, "loss": 1.0001, "step": 3552 }, { "epoch": 0.31746599057341346, "grad_norm": 0.4115248918533325, "learning_rate": 7.984976086759128e-05, "loss": 1.009, "step": 3553 }, { "epoch": 0.31755534210467534, "grad_norm": 0.4350251853466034, "learning_rate": 7.983815056027444e-05, "loss": 0.9741, "step": 3554 }, { "epoch": 0.31764469363593717, "grad_norm": 0.42540842294692993, "learning_rate": 7.982653775369444e-05, "loss": 0.9493, "step": 3555 }, { "epoch": 0.31773404516719905, "grad_norm": 0.4265209436416626, "learning_rate": 7.981492244882398e-05, "loss": 0.9707, "step": 3556 }, { "epoch": 0.31782339669846094, "grad_norm": 0.4104164242744446, "learning_rate": 7.980330464663597e-05, "loss": 1.0317, "step": 3557 }, { "epoch": 0.31791274822972276, "grad_norm": 0.39542466402053833, "learning_rate": 7.979168434810352e-05, "loss": 0.9764, "step": 3558 }, { "epoch": 0.31800209976098465, "grad_norm": 0.3717448115348816, "learning_rate": 7.978006155419993e-05, "loss": 0.9869, "step": 3559 }, { "epoch": 0.31809145129224653, "grad_norm": 0.42824071645736694, "learning_rate": 7.976843626589875e-05, "loss": 0.9522, "step": 3560 }, { "epoch": 0.3181808028235084, "grad_norm": 0.5165953636169434, "learning_rate": 7.975680848417373e-05, "loss": 1.0895, "step": 3561 }, { "epoch": 0.31827015435477024, "grad_norm": 0.4496411681175232, "learning_rate": 7.974517820999883e-05, "loss": 0.9996, "step": 3562 }, { "epoch": 0.3183595058860321, "grad_norm": 0.41469454765319824, "learning_rate": 7.973354544434818e-05, "loss": 0.9418, "step": 3563 }, { "epoch": 0.318448857417294, "grad_norm": 0.4479805529117584, "learning_rate": 7.972191018819615e-05, "loss": 0.986, "step": 3564 }, { "epoch": 0.31853820894855583, "grad_norm": 0.5563523173332214, "learning_rate": 7.971027244251734e-05, "loss": 0.9081, "step": 3565 }, { "epoch": 0.3186275604798177, "grad_norm": 0.44090694189071655, "learning_rate": 7.969863220828654e-05, "loss": 0.968, "step": 3566 }, { "epoch": 0.3187169120110796, "grad_norm": 0.4855956435203552, "learning_rate": 7.968698948647872e-05, "loss": 0.8986, "step": 3567 }, { "epoch": 0.3188062635423415, "grad_norm": 0.5776668787002563, "learning_rate": 7.967534427806909e-05, "loss": 1.0122, "step": 3568 }, { "epoch": 0.3188956150736033, "grad_norm": 0.487651526927948, "learning_rate": 7.966369658403305e-05, "loss": 0.9403, "step": 3569 }, { "epoch": 0.3189849666048652, "grad_norm": 0.5066558122634888, "learning_rate": 7.965204640534623e-05, "loss": 0.9553, "step": 3570 }, { "epoch": 0.3190743181361271, "grad_norm": 0.47500649094581604, "learning_rate": 7.964039374298447e-05, "loss": 0.9324, "step": 3571 }, { "epoch": 0.3191636696673889, "grad_norm": 0.3772512376308441, "learning_rate": 7.962873859792377e-05, "loss": 0.9869, "step": 3572 }, { "epoch": 0.3192530211986508, "grad_norm": 0.44486117362976074, "learning_rate": 7.96170809711404e-05, "loss": 1.0367, "step": 3573 }, { "epoch": 0.31934237272991267, "grad_norm": 0.6170856356620789, "learning_rate": 7.96054208636108e-05, "loss": 1.091, "step": 3574 }, { "epoch": 0.3194317242611745, "grad_norm": 0.46623945236206055, "learning_rate": 7.959375827631165e-05, "loss": 0.9985, "step": 3575 }, { "epoch": 0.3195210757924364, "grad_norm": 0.38250869512557983, "learning_rate": 7.958209321021979e-05, "loss": 1.027, "step": 3576 }, { "epoch": 0.31961042732369827, "grad_norm": 0.36755305528640747, "learning_rate": 7.95704256663123e-05, "loss": 1.0075, "step": 3577 }, { "epoch": 0.31969977885496015, "grad_norm": 0.47335994243621826, "learning_rate": 7.955875564556645e-05, "loss": 0.9888, "step": 3578 }, { "epoch": 0.319789130386222, "grad_norm": 0.4479726254940033, "learning_rate": 7.954708314895975e-05, "loss": 0.9571, "step": 3579 }, { "epoch": 0.31987848191748386, "grad_norm": 0.4270458519458771, "learning_rate": 7.953540817746988e-05, "loss": 0.9431, "step": 3580 }, { "epoch": 0.31996783344874574, "grad_norm": 0.42351189255714417, "learning_rate": 7.952373073207478e-05, "loss": 0.9591, "step": 3581 }, { "epoch": 0.32005718498000757, "grad_norm": 0.44711586833000183, "learning_rate": 7.951205081375249e-05, "loss": 0.9685, "step": 3582 }, { "epoch": 0.32014653651126945, "grad_norm": 0.44724342226982117, "learning_rate": 7.950036842348139e-05, "loss": 0.9795, "step": 3583 }, { "epoch": 0.32023588804253134, "grad_norm": 0.4775746762752533, "learning_rate": 7.948868356223997e-05, "loss": 0.9054, "step": 3584 }, { "epoch": 0.3203252395737932, "grad_norm": 0.4386691451072693, "learning_rate": 7.9476996231007e-05, "loss": 0.9949, "step": 3585 }, { "epoch": 0.32041459110505505, "grad_norm": 0.40129563212394714, "learning_rate": 7.946530643076138e-05, "loss": 1.0254, "step": 3586 }, { "epoch": 0.32050394263631693, "grad_norm": 0.4904455542564392, "learning_rate": 7.945361416248226e-05, "loss": 0.8934, "step": 3587 }, { "epoch": 0.3205932941675788, "grad_norm": 0.4553501009941101, "learning_rate": 7.9441919427149e-05, "loss": 0.9707, "step": 3588 }, { "epoch": 0.32068264569884064, "grad_norm": 0.38946229219436646, "learning_rate": 7.943022222574116e-05, "loss": 1.0248, "step": 3589 }, { "epoch": 0.3207719972301025, "grad_norm": 0.39546647667884827, "learning_rate": 7.941852255923852e-05, "loss": 1.0132, "step": 3590 }, { "epoch": 0.3208613487613644, "grad_norm": 0.4963313937187195, "learning_rate": 7.940682042862104e-05, "loss": 0.9254, "step": 3591 }, { "epoch": 0.3209507002926263, "grad_norm": 0.42194679379463196, "learning_rate": 7.939511583486887e-05, "loss": 0.9982, "step": 3592 }, { "epoch": 0.3210400518238881, "grad_norm": 0.4145607054233551, "learning_rate": 7.938340877896244e-05, "loss": 1.0461, "step": 3593 }, { "epoch": 0.32112940335515, "grad_norm": 0.5402034521102905, "learning_rate": 7.937169926188232e-05, "loss": 1.0905, "step": 3594 }, { "epoch": 0.3212187548864119, "grad_norm": 0.394927978515625, "learning_rate": 7.935998728460929e-05, "loss": 0.9712, "step": 3595 }, { "epoch": 0.3213081064176737, "grad_norm": 0.393349289894104, "learning_rate": 7.934827284812438e-05, "loss": 0.9541, "step": 3596 }, { "epoch": 0.3213974579489356, "grad_norm": 0.4421713352203369, "learning_rate": 7.93365559534088e-05, "loss": 1.0013, "step": 3597 }, { "epoch": 0.3214868094801975, "grad_norm": 0.46195080876350403, "learning_rate": 7.932483660144394e-05, "loss": 0.9468, "step": 3598 }, { "epoch": 0.32157616101145936, "grad_norm": 0.419318825006485, "learning_rate": 7.931311479321144e-05, "loss": 0.9858, "step": 3599 }, { "epoch": 0.3216655125427212, "grad_norm": 0.4154645800590515, "learning_rate": 7.93013905296931e-05, "loss": 0.9703, "step": 3600 }, { "epoch": 0.32175486407398307, "grad_norm": 0.5366727709770203, "learning_rate": 7.9289663811871e-05, "loss": 0.9062, "step": 3601 }, { "epoch": 0.32184421560524495, "grad_norm": 0.48142287135124207, "learning_rate": 7.927793464072734e-05, "loss": 1.0538, "step": 3602 }, { "epoch": 0.3219335671365068, "grad_norm": 0.4503115713596344, "learning_rate": 7.926620301724459e-05, "loss": 1.0272, "step": 3603 }, { "epoch": 0.32202291866776867, "grad_norm": 0.536191999912262, "learning_rate": 7.925446894240536e-05, "loss": 0.9567, "step": 3604 }, { "epoch": 0.32211227019903055, "grad_norm": 0.38099348545074463, "learning_rate": 7.924273241719254e-05, "loss": 0.9871, "step": 3605 }, { "epoch": 0.3222016217302924, "grad_norm": 0.5245529413223267, "learning_rate": 7.923099344258915e-05, "loss": 0.9687, "step": 3606 }, { "epoch": 0.32229097326155426, "grad_norm": 0.424612432718277, "learning_rate": 7.921925201957851e-05, "loss": 1.0479, "step": 3607 }, { "epoch": 0.32238032479281614, "grad_norm": 0.4393298625946045, "learning_rate": 7.920750814914404e-05, "loss": 1.0369, "step": 3608 }, { "epoch": 0.322469676324078, "grad_norm": 0.4350713789463043, "learning_rate": 7.919576183226945e-05, "loss": 1.0051, "step": 3609 }, { "epoch": 0.32255902785533985, "grad_norm": 0.40692925453186035, "learning_rate": 7.918401306993858e-05, "loss": 1.0296, "step": 3610 }, { "epoch": 0.32264837938660174, "grad_norm": 0.4172888696193695, "learning_rate": 7.917226186313554e-05, "loss": 0.9977, "step": 3611 }, { "epoch": 0.3227377309178636, "grad_norm": 0.46099647879600525, "learning_rate": 7.916050821284462e-05, "loss": 0.9395, "step": 3612 }, { "epoch": 0.32282708244912545, "grad_norm": 0.44035103917121887, "learning_rate": 7.914875212005032e-05, "loss": 0.9543, "step": 3613 }, { "epoch": 0.32291643398038733, "grad_norm": 0.3710711896419525, "learning_rate": 7.913699358573732e-05, "loss": 0.9894, "step": 3614 }, { "epoch": 0.3230057855116492, "grad_norm": 0.5159255862236023, "learning_rate": 7.912523261089051e-05, "loss": 0.9674, "step": 3615 }, { "epoch": 0.3230951370429111, "grad_norm": 0.5290977954864502, "learning_rate": 7.911346919649504e-05, "loss": 1.0115, "step": 3616 }, { "epoch": 0.3231844885741729, "grad_norm": 0.4615987241268158, "learning_rate": 7.910170334353619e-05, "loss": 0.9218, "step": 3617 }, { "epoch": 0.3232738401054348, "grad_norm": 0.41924357414245605, "learning_rate": 7.908993505299948e-05, "loss": 1.0448, "step": 3618 }, { "epoch": 0.3233631916366967, "grad_norm": 0.45864051580429077, "learning_rate": 7.907816432587062e-05, "loss": 1.0082, "step": 3619 }, { "epoch": 0.3234525431679585, "grad_norm": 0.3755020797252655, "learning_rate": 7.906639116313558e-05, "loss": 0.9703, "step": 3620 }, { "epoch": 0.3235418946992204, "grad_norm": 0.40216493606567383, "learning_rate": 7.905461556578043e-05, "loss": 1.0177, "step": 3621 }, { "epoch": 0.3236312462304823, "grad_norm": 0.42985835671424866, "learning_rate": 7.904283753479154e-05, "loss": 0.9477, "step": 3622 }, { "epoch": 0.32372059776174417, "grad_norm": 0.4799264371395111, "learning_rate": 7.903105707115543e-05, "loss": 1.0422, "step": 3623 }, { "epoch": 0.323809949293006, "grad_norm": 0.3927200734615326, "learning_rate": 7.901927417585884e-05, "loss": 0.9533, "step": 3624 }, { "epoch": 0.3238993008242679, "grad_norm": 0.4355437159538269, "learning_rate": 7.90074888498887e-05, "loss": 0.9825, "step": 3625 }, { "epoch": 0.32398865235552976, "grad_norm": 0.40045395493507385, "learning_rate": 7.899570109423217e-05, "loss": 1.0001, "step": 3626 }, { "epoch": 0.3240780038867916, "grad_norm": 0.4917450249195099, "learning_rate": 7.898391090987662e-05, "loss": 0.9796, "step": 3627 }, { "epoch": 0.32416735541805347, "grad_norm": 0.46011456847190857, "learning_rate": 7.897211829780959e-05, "loss": 0.9242, "step": 3628 }, { "epoch": 0.32425670694931535, "grad_norm": 0.4157116711139679, "learning_rate": 7.896032325901883e-05, "loss": 1.0034, "step": 3629 }, { "epoch": 0.32434605848057724, "grad_norm": 0.3974458575248718, "learning_rate": 7.894852579449227e-05, "loss": 1.0186, "step": 3630 }, { "epoch": 0.32443541001183906, "grad_norm": 0.38465315103530884, "learning_rate": 7.893672590521814e-05, "loss": 0.9477, "step": 3631 }, { "epoch": 0.32452476154310095, "grad_norm": 0.4720938801765442, "learning_rate": 7.892492359218477e-05, "loss": 0.9311, "step": 3632 }, { "epoch": 0.32461411307436283, "grad_norm": 0.4968278408050537, "learning_rate": 7.89131188563807e-05, "loss": 0.9531, "step": 3633 }, { "epoch": 0.32470346460562466, "grad_norm": 0.43185052275657654, "learning_rate": 7.890131169879477e-05, "loss": 1.0749, "step": 3634 }, { "epoch": 0.32479281613688654, "grad_norm": 0.4034889042377472, "learning_rate": 7.888950212041591e-05, "loss": 1.0049, "step": 3635 }, { "epoch": 0.3248821676681484, "grad_norm": 0.4653837978839874, "learning_rate": 7.88776901222333e-05, "loss": 0.9604, "step": 3636 }, { "epoch": 0.32497151919941025, "grad_norm": 0.4228241741657257, "learning_rate": 7.886587570523634e-05, "loss": 0.9343, "step": 3637 }, { "epoch": 0.32506087073067214, "grad_norm": 0.3874700665473938, "learning_rate": 7.88540588704146e-05, "loss": 1.0345, "step": 3638 }, { "epoch": 0.325150222261934, "grad_norm": 0.501092791557312, "learning_rate": 7.884223961875785e-05, "loss": 0.9159, "step": 3639 }, { "epoch": 0.3252395737931959, "grad_norm": 0.6081518530845642, "learning_rate": 7.88304179512561e-05, "loss": 0.8758, "step": 3640 }, { "epoch": 0.32532892532445773, "grad_norm": 0.4688172936439514, "learning_rate": 7.881859386889954e-05, "loss": 0.9757, "step": 3641 }, { "epoch": 0.3254182768557196, "grad_norm": 0.4610939025878906, "learning_rate": 7.880676737267857e-05, "loss": 1.0032, "step": 3642 }, { "epoch": 0.3255076283869815, "grad_norm": 0.49634119868278503, "learning_rate": 7.879493846358377e-05, "loss": 0.9689, "step": 3643 }, { "epoch": 0.3255969799182433, "grad_norm": 0.4354616105556488, "learning_rate": 7.878310714260593e-05, "loss": 0.9444, "step": 3644 }, { "epoch": 0.3256863314495052, "grad_norm": 0.4469475746154785, "learning_rate": 7.877127341073606e-05, "loss": 1.0258, "step": 3645 }, { "epoch": 0.3257756829807671, "grad_norm": 0.4689541459083557, "learning_rate": 7.875943726896538e-05, "loss": 0.8939, "step": 3646 }, { "epoch": 0.325865034512029, "grad_norm": 0.39675360918045044, "learning_rate": 7.874759871828527e-05, "loss": 0.9616, "step": 3647 }, { "epoch": 0.3259543860432908, "grad_norm": 0.4091286361217499, "learning_rate": 7.873575775968734e-05, "loss": 0.8918, "step": 3648 }, { "epoch": 0.3260437375745527, "grad_norm": 0.3711673319339752, "learning_rate": 7.872391439416339e-05, "loss": 1.0246, "step": 3649 }, { "epoch": 0.32613308910581457, "grad_norm": 0.4348282217979431, "learning_rate": 7.871206862270543e-05, "loss": 0.9172, "step": 3650 }, { "epoch": 0.3262224406370764, "grad_norm": 0.46988198161125183, "learning_rate": 7.870022044630569e-05, "loss": 0.9724, "step": 3651 }, { "epoch": 0.3263117921683383, "grad_norm": 0.42179208993911743, "learning_rate": 7.868836986595656e-05, "loss": 0.9622, "step": 3652 }, { "epoch": 0.32640114369960016, "grad_norm": 0.507832407951355, "learning_rate": 7.867651688265066e-05, "loss": 0.9428, "step": 3653 }, { "epoch": 0.32649049523086204, "grad_norm": 0.458981990814209, "learning_rate": 7.866466149738079e-05, "loss": 1.0245, "step": 3654 }, { "epoch": 0.32657984676212387, "grad_norm": 0.47192487120628357, "learning_rate": 7.865280371113998e-05, "loss": 0.9683, "step": 3655 }, { "epoch": 0.32666919829338575, "grad_norm": 0.4830261170864105, "learning_rate": 7.864094352492143e-05, "loss": 1.0502, "step": 3656 }, { "epoch": 0.32675854982464764, "grad_norm": 0.43681490421295166, "learning_rate": 7.862908093971859e-05, "loss": 0.9709, "step": 3657 }, { "epoch": 0.32684790135590946, "grad_norm": 0.4974532127380371, "learning_rate": 7.861721595652507e-05, "loss": 1.012, "step": 3658 }, { "epoch": 0.32693725288717135, "grad_norm": 0.43522706627845764, "learning_rate": 7.860534857633464e-05, "loss": 0.9935, "step": 3659 }, { "epoch": 0.32702660441843323, "grad_norm": 0.3843841254711151, "learning_rate": 7.859347880014138e-05, "loss": 1.01, "step": 3660 }, { "epoch": 0.3271159559496951, "grad_norm": 0.38631847500801086, "learning_rate": 7.858160662893948e-05, "loss": 1.0152, "step": 3661 }, { "epoch": 0.32720530748095694, "grad_norm": 0.4131559431552887, "learning_rate": 7.856973206372336e-05, "loss": 0.9994, "step": 3662 }, { "epoch": 0.3272946590122188, "grad_norm": 0.6184050440788269, "learning_rate": 7.855785510548765e-05, "loss": 0.97, "step": 3663 }, { "epoch": 0.3273840105434807, "grad_norm": 0.4109181761741638, "learning_rate": 7.854597575522717e-05, "loss": 1.0286, "step": 3664 }, { "epoch": 0.32747336207474254, "grad_norm": 0.46452227234840393, "learning_rate": 7.853409401393694e-05, "loss": 1.0194, "step": 3665 }, { "epoch": 0.3275627136060044, "grad_norm": 0.44786307215690613, "learning_rate": 7.85222098826122e-05, "loss": 0.9468, "step": 3666 }, { "epoch": 0.3276520651372663, "grad_norm": 0.4409159719944, "learning_rate": 7.851032336224835e-05, "loss": 0.9174, "step": 3667 }, { "epoch": 0.32774141666852813, "grad_norm": 0.35179105401039124, "learning_rate": 7.849843445384102e-05, "loss": 1.0162, "step": 3668 }, { "epoch": 0.32783076819979, "grad_norm": 0.45857298374176025, "learning_rate": 7.848654315838603e-05, "loss": 0.9139, "step": 3669 }, { "epoch": 0.3279201197310519, "grad_norm": 0.3982926309108734, "learning_rate": 7.84746494768794e-05, "loss": 1.0394, "step": 3670 }, { "epoch": 0.3280094712623138, "grad_norm": 0.48336413502693176, "learning_rate": 7.846275341031736e-05, "loss": 0.9504, "step": 3671 }, { "epoch": 0.3280988227935756, "grad_norm": 0.46158286929130554, "learning_rate": 7.845085495969635e-05, "loss": 0.9069, "step": 3672 }, { "epoch": 0.3281881743248375, "grad_norm": 0.5317728519439697, "learning_rate": 7.843895412601296e-05, "loss": 0.9455, "step": 3673 }, { "epoch": 0.3282775258560994, "grad_norm": 0.4177683889865875, "learning_rate": 7.842705091026403e-05, "loss": 0.9303, "step": 3674 }, { "epoch": 0.3283668773873612, "grad_norm": 0.4196977913379669, "learning_rate": 7.841514531344655e-05, "loss": 0.9551, "step": 3675 }, { "epoch": 0.3284562289186231, "grad_norm": 0.48437947034835815, "learning_rate": 7.840323733655778e-05, "loss": 0.8823, "step": 3676 }, { "epoch": 0.32854558044988497, "grad_norm": 0.38968583941459656, "learning_rate": 7.839132698059515e-05, "loss": 0.9749, "step": 3677 }, { "epoch": 0.32863493198114685, "grad_norm": 0.41708287596702576, "learning_rate": 7.837941424655624e-05, "loss": 0.9639, "step": 3678 }, { "epoch": 0.3287242835124087, "grad_norm": 0.43757346272468567, "learning_rate": 7.836749913543888e-05, "loss": 0.8771, "step": 3679 }, { "epoch": 0.32881363504367056, "grad_norm": 0.513954758644104, "learning_rate": 7.835558164824108e-05, "loss": 0.9956, "step": 3680 }, { "epoch": 0.32890298657493244, "grad_norm": 0.3900381624698639, "learning_rate": 7.834366178596109e-05, "loss": 1.0153, "step": 3681 }, { "epoch": 0.32899233810619427, "grad_norm": 0.45596617460250854, "learning_rate": 7.83317395495973e-05, "loss": 1.1101, "step": 3682 }, { "epoch": 0.32908168963745615, "grad_norm": 0.4868778586387634, "learning_rate": 7.831981494014833e-05, "loss": 1.0163, "step": 3683 }, { "epoch": 0.32917104116871804, "grad_norm": 0.40496212244033813, "learning_rate": 7.830788795861296e-05, "loss": 0.9321, "step": 3684 }, { "epoch": 0.3292603926999799, "grad_norm": 0.42820194363594055, "learning_rate": 7.829595860599026e-05, "loss": 1.0163, "step": 3685 }, { "epoch": 0.32934974423124175, "grad_norm": 0.42636728286743164, "learning_rate": 7.828402688327941e-05, "loss": 0.9583, "step": 3686 }, { "epoch": 0.32943909576250363, "grad_norm": 0.39956799149513245, "learning_rate": 7.827209279147982e-05, "loss": 1.0322, "step": 3687 }, { "epoch": 0.3295284472937655, "grad_norm": 0.4089909791946411, "learning_rate": 7.826015633159112e-05, "loss": 1.0177, "step": 3688 }, { "epoch": 0.32961779882502734, "grad_norm": 0.4677819013595581, "learning_rate": 7.824821750461308e-05, "loss": 0.9698, "step": 3689 }, { "epoch": 0.3297071503562892, "grad_norm": 0.4185117781162262, "learning_rate": 7.823627631154571e-05, "loss": 1.001, "step": 3690 }, { "epoch": 0.3297965018875511, "grad_norm": 0.3975558280944824, "learning_rate": 7.822433275338923e-05, "loss": 0.9953, "step": 3691 }, { "epoch": 0.329885853418813, "grad_norm": 0.3640524744987488, "learning_rate": 7.821238683114404e-05, "loss": 1.0064, "step": 3692 }, { "epoch": 0.3299752049500748, "grad_norm": 0.4032001197338104, "learning_rate": 7.820043854581071e-05, "loss": 1.01, "step": 3693 }, { "epoch": 0.3300645564813367, "grad_norm": 0.45397523045539856, "learning_rate": 7.818848789839008e-05, "loss": 0.9609, "step": 3694 }, { "epoch": 0.3301539080125986, "grad_norm": 0.3662114143371582, "learning_rate": 7.81765348898831e-05, "loss": 1.0157, "step": 3695 }, { "epoch": 0.3302432595438604, "grad_norm": 0.38710111379623413, "learning_rate": 7.816457952129099e-05, "loss": 0.9811, "step": 3696 }, { "epoch": 0.3303326110751223, "grad_norm": 0.40280595421791077, "learning_rate": 7.815262179361514e-05, "loss": 0.9899, "step": 3697 }, { "epoch": 0.3304219626063842, "grad_norm": 0.4115069806575775, "learning_rate": 7.814066170785714e-05, "loss": 0.9829, "step": 3698 }, { "epoch": 0.330511314137646, "grad_norm": 0.39041948318481445, "learning_rate": 7.812869926501874e-05, "loss": 0.9653, "step": 3699 }, { "epoch": 0.3306006656689079, "grad_norm": 0.40543365478515625, "learning_rate": 7.811673446610195e-05, "loss": 1.001, "step": 3700 }, { "epoch": 0.3306900172001698, "grad_norm": 0.36765506863594055, "learning_rate": 7.810476731210896e-05, "loss": 1.0908, "step": 3701 }, { "epoch": 0.33077936873143166, "grad_norm": 0.45462504029273987, "learning_rate": 7.809279780404212e-05, "loss": 1.0355, "step": 3702 }, { "epoch": 0.3308687202626935, "grad_norm": 0.5238141417503357, "learning_rate": 7.808082594290402e-05, "loss": 0.9617, "step": 3703 }, { "epoch": 0.33095807179395537, "grad_norm": 0.4108685851097107, "learning_rate": 7.806885172969742e-05, "loss": 0.9806, "step": 3704 }, { "epoch": 0.33104742332521725, "grad_norm": 0.320278525352478, "learning_rate": 7.80568751654253e-05, "loss": 1.0341, "step": 3705 }, { "epoch": 0.3311367748564791, "grad_norm": 0.3879075050354004, "learning_rate": 7.804489625109083e-05, "loss": 0.9905, "step": 3706 }, { "epoch": 0.33122612638774096, "grad_norm": 0.4919257164001465, "learning_rate": 7.803291498769735e-05, "loss": 0.9408, "step": 3707 }, { "epoch": 0.33131547791900284, "grad_norm": 0.4680798053741455, "learning_rate": 7.802093137624844e-05, "loss": 0.8917, "step": 3708 }, { "epoch": 0.3314048294502647, "grad_norm": 0.35378363728523254, "learning_rate": 7.800894541774783e-05, "loss": 1.0099, "step": 3709 }, { "epoch": 0.33149418098152655, "grad_norm": 0.3704555928707123, "learning_rate": 7.79969571131995e-05, "loss": 0.9949, "step": 3710 }, { "epoch": 0.33158353251278844, "grad_norm": 0.39882001280784607, "learning_rate": 7.798496646360758e-05, "loss": 0.9924, "step": 3711 }, { "epoch": 0.3316728840440503, "grad_norm": 0.4005809724330902, "learning_rate": 7.797297346997643e-05, "loss": 0.9349, "step": 3712 }, { "epoch": 0.33176223557531215, "grad_norm": 0.4036948084831238, "learning_rate": 7.796097813331059e-05, "loss": 0.9319, "step": 3713 }, { "epoch": 0.33185158710657403, "grad_norm": 0.46617838740348816, "learning_rate": 7.794898045461476e-05, "loss": 0.904, "step": 3714 }, { "epoch": 0.3319409386378359, "grad_norm": 0.38140738010406494, "learning_rate": 7.79369804348939e-05, "loss": 0.9657, "step": 3715 }, { "epoch": 0.3320302901690978, "grad_norm": 0.46043315529823303, "learning_rate": 7.792497807515317e-05, "loss": 1.045, "step": 3716 }, { "epoch": 0.3321196417003596, "grad_norm": 0.4285655617713928, "learning_rate": 7.791297337639784e-05, "loss": 1.0517, "step": 3717 }, { "epoch": 0.3322089932316215, "grad_norm": 0.43057459592819214, "learning_rate": 7.790096633963348e-05, "loss": 0.9226, "step": 3718 }, { "epoch": 0.3322983447628834, "grad_norm": 0.5528749823570251, "learning_rate": 7.788895696586577e-05, "loss": 0.9763, "step": 3719 }, { "epoch": 0.3323876962941452, "grad_norm": 0.49909940361976624, "learning_rate": 7.787694525610066e-05, "loss": 1.0276, "step": 3720 }, { "epoch": 0.3324770478254071, "grad_norm": 0.458188533782959, "learning_rate": 7.786493121134423e-05, "loss": 0.9415, "step": 3721 }, { "epoch": 0.332566399356669, "grad_norm": 0.4523871839046478, "learning_rate": 7.785291483260278e-05, "loss": 0.9684, "step": 3722 }, { "epoch": 0.33265575088793087, "grad_norm": 0.4228318929672241, "learning_rate": 7.784089612088283e-05, "loss": 1.042, "step": 3723 }, { "epoch": 0.3327451024191927, "grad_norm": 0.3901155889034271, "learning_rate": 7.782887507719108e-05, "loss": 1.0081, "step": 3724 }, { "epoch": 0.3328344539504546, "grad_norm": 0.39497750997543335, "learning_rate": 7.781685170253439e-05, "loss": 1.0522, "step": 3725 }, { "epoch": 0.33292380548171646, "grad_norm": 0.43867218494415283, "learning_rate": 7.780482599791987e-05, "loss": 0.9623, "step": 3726 }, { "epoch": 0.3330131570129783, "grad_norm": 0.43389105796813965, "learning_rate": 7.779279796435479e-05, "loss": 1.0464, "step": 3727 }, { "epoch": 0.3331025085442402, "grad_norm": 0.4637124538421631, "learning_rate": 7.778076760284665e-05, "loss": 0.9743, "step": 3728 }, { "epoch": 0.33319186007550206, "grad_norm": 0.48631882667541504, "learning_rate": 7.776873491440307e-05, "loss": 1.0403, "step": 3729 }, { "epoch": 0.33328121160676394, "grad_norm": 0.4544772505760193, "learning_rate": 7.775669990003197e-05, "loss": 1.0384, "step": 3730 }, { "epoch": 0.33337056313802577, "grad_norm": 0.41751065850257874, "learning_rate": 7.774466256074137e-05, "loss": 0.9482, "step": 3731 }, { "epoch": 0.33345991466928765, "grad_norm": 0.42404597997665405, "learning_rate": 7.773262289753956e-05, "loss": 0.9436, "step": 3732 }, { "epoch": 0.33354926620054953, "grad_norm": 0.400020033121109, "learning_rate": 7.772058091143497e-05, "loss": 0.9523, "step": 3733 }, { "epoch": 0.33363861773181136, "grad_norm": 0.4418000876903534, "learning_rate": 7.770853660343625e-05, "loss": 1.032, "step": 3734 }, { "epoch": 0.33372796926307324, "grad_norm": 0.48235222697257996, "learning_rate": 7.769648997455223e-05, "loss": 0.9437, "step": 3735 }, { "epoch": 0.3338173207943351, "grad_norm": 0.40464332699775696, "learning_rate": 7.768444102579196e-05, "loss": 0.9569, "step": 3736 }, { "epoch": 0.33390667232559695, "grad_norm": 0.5996637940406799, "learning_rate": 7.767238975816465e-05, "loss": 0.9467, "step": 3737 }, { "epoch": 0.33399602385685884, "grad_norm": 0.38040077686309814, "learning_rate": 7.766033617267975e-05, "loss": 0.9954, "step": 3738 }, { "epoch": 0.3340853753881207, "grad_norm": 0.4809688925743103, "learning_rate": 7.764828027034685e-05, "loss": 0.8698, "step": 3739 }, { "epoch": 0.3341747269193826, "grad_norm": 0.4978516399860382, "learning_rate": 7.763622205217576e-05, "loss": 0.9686, "step": 3740 }, { "epoch": 0.33426407845064443, "grad_norm": 0.41253742575645447, "learning_rate": 7.762416151917648e-05, "loss": 0.9883, "step": 3741 }, { "epoch": 0.3343534299819063, "grad_norm": 0.35575124621391296, "learning_rate": 7.761209867235924e-05, "loss": 1.0379, "step": 3742 }, { "epoch": 0.3344427815131682, "grad_norm": 0.3860713541507721, "learning_rate": 7.760003351273442e-05, "loss": 0.9995, "step": 3743 }, { "epoch": 0.33453213304443, "grad_norm": 0.3986268639564514, "learning_rate": 7.758796604131258e-05, "loss": 0.986, "step": 3744 }, { "epoch": 0.3346214845756919, "grad_norm": 0.4240453243255615, "learning_rate": 7.757589625910452e-05, "loss": 0.9722, "step": 3745 }, { "epoch": 0.3347108361069538, "grad_norm": 0.5454365015029907, "learning_rate": 7.75638241671212e-05, "loss": 0.8707, "step": 3746 }, { "epoch": 0.3348001876382157, "grad_norm": 0.4972113370895386, "learning_rate": 7.755174976637381e-05, "loss": 0.978, "step": 3747 }, { "epoch": 0.3348895391694775, "grad_norm": 0.4331071972846985, "learning_rate": 7.753967305787371e-05, "loss": 0.9539, "step": 3748 }, { "epoch": 0.3349788907007394, "grad_norm": 0.5387925505638123, "learning_rate": 7.752759404263242e-05, "loss": 0.8865, "step": 3749 }, { "epoch": 0.33506824223200127, "grad_norm": 0.4231729805469513, "learning_rate": 7.751551272166171e-05, "loss": 0.9758, "step": 3750 }, { "epoch": 0.3351575937632631, "grad_norm": 0.4942473769187927, "learning_rate": 7.750342909597352e-05, "loss": 0.9943, "step": 3751 }, { "epoch": 0.335246945294525, "grad_norm": 0.4464167356491089, "learning_rate": 7.749134316657997e-05, "loss": 1.0107, "step": 3752 }, { "epoch": 0.33533629682578686, "grad_norm": 0.4313907027244568, "learning_rate": 7.747925493449342e-05, "loss": 1.0031, "step": 3753 }, { "epoch": 0.33542564835704874, "grad_norm": 0.42571744322776794, "learning_rate": 7.746716440072632e-05, "loss": 1.003, "step": 3754 }, { "epoch": 0.33551499988831057, "grad_norm": 0.3863215446472168, "learning_rate": 7.745507156629145e-05, "loss": 1.0386, "step": 3755 }, { "epoch": 0.33560435141957246, "grad_norm": 0.4166812002658844, "learning_rate": 7.744297643220168e-05, "loss": 0.9372, "step": 3756 }, { "epoch": 0.33569370295083434, "grad_norm": 0.4798368215560913, "learning_rate": 7.74308789994701e-05, "loss": 0.9966, "step": 3757 }, { "epoch": 0.33578305448209617, "grad_norm": 0.3768024146556854, "learning_rate": 7.741877926911003e-05, "loss": 0.9845, "step": 3758 }, { "epoch": 0.33587240601335805, "grad_norm": 0.46540406346321106, "learning_rate": 7.740667724213493e-05, "loss": 0.9618, "step": 3759 }, { "epoch": 0.33596175754461993, "grad_norm": 0.5055358409881592, "learning_rate": 7.739457291955847e-05, "loss": 1.0283, "step": 3760 }, { "epoch": 0.3360511090758818, "grad_norm": 0.5068111419677734, "learning_rate": 7.738246630239452e-05, "loss": 0.9803, "step": 3761 }, { "epoch": 0.33614046060714364, "grad_norm": 0.43414464592933655, "learning_rate": 7.737035739165715e-05, "loss": 1.0182, "step": 3762 }, { "epoch": 0.3362298121384055, "grad_norm": 0.47884586453437805, "learning_rate": 7.73582461883606e-05, "loss": 1.0077, "step": 3763 }, { "epoch": 0.3363191636696674, "grad_norm": 0.3981468677520752, "learning_rate": 7.734613269351931e-05, "loss": 1.0856, "step": 3764 }, { "epoch": 0.33640851520092924, "grad_norm": 0.4908856153488159, "learning_rate": 7.733401690814793e-05, "loss": 0.8595, "step": 3765 }, { "epoch": 0.3364978667321911, "grad_norm": 0.3917779326438904, "learning_rate": 7.732189883326125e-05, "loss": 0.9844, "step": 3766 }, { "epoch": 0.336587218263453, "grad_norm": 0.40213218331336975, "learning_rate": 7.730977846987433e-05, "loss": 0.968, "step": 3767 }, { "epoch": 0.33667656979471483, "grad_norm": 0.3646174967288971, "learning_rate": 7.729765581900235e-05, "loss": 1.0258, "step": 3768 }, { "epoch": 0.3367659213259767, "grad_norm": 0.41250017285346985, "learning_rate": 7.728553088166075e-05, "loss": 0.9605, "step": 3769 }, { "epoch": 0.3368552728572386, "grad_norm": 0.384792685508728, "learning_rate": 7.727340365886506e-05, "loss": 1.0067, "step": 3770 }, { "epoch": 0.3369446243885005, "grad_norm": 0.5328003168106079, "learning_rate": 7.726127415163113e-05, "loss": 0.9602, "step": 3771 }, { "epoch": 0.3370339759197623, "grad_norm": 0.4145788550376892, "learning_rate": 7.724914236097489e-05, "loss": 1.0511, "step": 3772 }, { "epoch": 0.3371233274510242, "grad_norm": 0.37632668018341064, "learning_rate": 7.723700828791252e-05, "loss": 0.972, "step": 3773 }, { "epoch": 0.3372126789822861, "grad_norm": 0.37736862897872925, "learning_rate": 7.722487193346039e-05, "loss": 0.99, "step": 3774 }, { "epoch": 0.3373020305135479, "grad_norm": 0.4212090075016022, "learning_rate": 7.721273329863504e-05, "loss": 0.9789, "step": 3775 }, { "epoch": 0.3373913820448098, "grad_norm": 0.5166639089584351, "learning_rate": 7.72005923844532e-05, "loss": 0.9856, "step": 3776 }, { "epoch": 0.33748073357607167, "grad_norm": 0.46971988677978516, "learning_rate": 7.71884491919318e-05, "loss": 1.048, "step": 3777 }, { "epoch": 0.33757008510733355, "grad_norm": 0.4689841568470001, "learning_rate": 7.7176303722088e-05, "loss": 0.9496, "step": 3778 }, { "epoch": 0.3376594366385954, "grad_norm": 0.5772603154182434, "learning_rate": 7.716415597593907e-05, "loss": 0.8967, "step": 3779 }, { "epoch": 0.33774878816985726, "grad_norm": 0.4483826160430908, "learning_rate": 7.715200595450253e-05, "loss": 1.0697, "step": 3780 }, { "epoch": 0.33783813970111914, "grad_norm": 0.4816476106643677, "learning_rate": 7.713985365879606e-05, "loss": 0.912, "step": 3781 }, { "epoch": 0.33792749123238097, "grad_norm": 0.38734710216522217, "learning_rate": 7.712769908983757e-05, "loss": 0.9535, "step": 3782 }, { "epoch": 0.33801684276364286, "grad_norm": 0.4487532079219818, "learning_rate": 7.711554224864511e-05, "loss": 0.9674, "step": 3783 }, { "epoch": 0.33810619429490474, "grad_norm": 0.4268551170825958, "learning_rate": 7.710338313623697e-05, "loss": 0.9257, "step": 3784 }, { "epoch": 0.3381955458261666, "grad_norm": 0.44561639428138733, "learning_rate": 7.709122175363158e-05, "loss": 0.9981, "step": 3785 }, { "epoch": 0.33828489735742845, "grad_norm": 0.44499921798706055, "learning_rate": 7.707905810184762e-05, "loss": 0.9353, "step": 3786 }, { "epoch": 0.33837424888869033, "grad_norm": 0.4151676595211029, "learning_rate": 7.706689218190386e-05, "loss": 0.9364, "step": 3787 }, { "epoch": 0.3384636004199522, "grad_norm": 0.4505821764469147, "learning_rate": 7.705472399481939e-05, "loss": 0.9725, "step": 3788 }, { "epoch": 0.33855295195121404, "grad_norm": 0.410693883895874, "learning_rate": 7.704255354161341e-05, "loss": 1.0559, "step": 3789 }, { "epoch": 0.3386423034824759, "grad_norm": 0.38344308733940125, "learning_rate": 7.703038082330532e-05, "loss": 0.9554, "step": 3790 }, { "epoch": 0.3387316550137378, "grad_norm": 0.3579278588294983, "learning_rate": 7.70182058409147e-05, "loss": 0.9861, "step": 3791 }, { "epoch": 0.3388210065449997, "grad_norm": 0.4280867278575897, "learning_rate": 7.700602859546134e-05, "loss": 0.991, "step": 3792 }, { "epoch": 0.3389103580762615, "grad_norm": 0.4199950695037842, "learning_rate": 7.699384908796523e-05, "loss": 0.9886, "step": 3793 }, { "epoch": 0.3389997096075234, "grad_norm": 0.43950459361076355, "learning_rate": 7.698166731944654e-05, "loss": 0.9472, "step": 3794 }, { "epoch": 0.3390890611387853, "grad_norm": 0.43579909205436707, "learning_rate": 7.696948329092559e-05, "loss": 0.955, "step": 3795 }, { "epoch": 0.3391784126700471, "grad_norm": 0.4635678231716156, "learning_rate": 7.695729700342294e-05, "loss": 0.9009, "step": 3796 }, { "epoch": 0.339267764201309, "grad_norm": 0.4732118248939514, "learning_rate": 7.694510845795933e-05, "loss": 0.9975, "step": 3797 }, { "epoch": 0.3393571157325709, "grad_norm": 0.4001818299293518, "learning_rate": 7.693291765555567e-05, "loss": 0.9729, "step": 3798 }, { "epoch": 0.3394464672638327, "grad_norm": 0.39679357409477234, "learning_rate": 7.692072459723307e-05, "loss": 1.0314, "step": 3799 }, { "epoch": 0.3395358187950946, "grad_norm": 0.40091925859451294, "learning_rate": 7.690852928401285e-05, "loss": 0.9526, "step": 3800 }, { "epoch": 0.3396251703263565, "grad_norm": 0.46458113193511963, "learning_rate": 7.689633171691644e-05, "loss": 0.9608, "step": 3801 }, { "epoch": 0.33971452185761836, "grad_norm": 0.5359793305397034, "learning_rate": 7.688413189696559e-05, "loss": 0.9501, "step": 3802 }, { "epoch": 0.3398038733888802, "grad_norm": 0.43518051505088806, "learning_rate": 7.68719298251821e-05, "loss": 0.9698, "step": 3803 }, { "epoch": 0.33989322492014207, "grad_norm": 0.42377930879592896, "learning_rate": 7.685972550258809e-05, "loss": 0.974, "step": 3804 }, { "epoch": 0.33998257645140395, "grad_norm": 0.40546149015426636, "learning_rate": 7.684751893020574e-05, "loss": 0.9878, "step": 3805 }, { "epoch": 0.3400719279826658, "grad_norm": 0.43093767762184143, "learning_rate": 7.683531010905748e-05, "loss": 0.9468, "step": 3806 }, { "epoch": 0.34016127951392766, "grad_norm": 0.4087904095649719, "learning_rate": 7.682309904016601e-05, "loss": 0.9218, "step": 3807 }, { "epoch": 0.34025063104518954, "grad_norm": 0.4545688033103943, "learning_rate": 7.681088572455405e-05, "loss": 0.9562, "step": 3808 }, { "epoch": 0.3403399825764514, "grad_norm": 0.4550299644470215, "learning_rate": 7.679867016324465e-05, "loss": 0.9438, "step": 3809 }, { "epoch": 0.34042933410771326, "grad_norm": 0.4195230305194855, "learning_rate": 7.678645235726094e-05, "loss": 0.9424, "step": 3810 }, { "epoch": 0.34051868563897514, "grad_norm": 0.5254753232002258, "learning_rate": 7.677423230762632e-05, "loss": 0.9047, "step": 3811 }, { "epoch": 0.340608037170237, "grad_norm": 0.5194946527481079, "learning_rate": 7.676201001536439e-05, "loss": 0.9891, "step": 3812 }, { "epoch": 0.34069738870149885, "grad_norm": 0.4409390985965729, "learning_rate": 7.674978548149882e-05, "loss": 1.0141, "step": 3813 }, { "epoch": 0.34078674023276073, "grad_norm": 0.4274226725101471, "learning_rate": 7.67375587070536e-05, "loss": 1.0241, "step": 3814 }, { "epoch": 0.3408760917640226, "grad_norm": 0.41632789373397827, "learning_rate": 7.672532969305284e-05, "loss": 1.006, "step": 3815 }, { "epoch": 0.3409654432952845, "grad_norm": 0.41912585496902466, "learning_rate": 7.671309844052084e-05, "loss": 1.036, "step": 3816 }, { "epoch": 0.3410547948265463, "grad_norm": 0.44865861535072327, "learning_rate": 7.67008649504821e-05, "loss": 0.9867, "step": 3817 }, { "epoch": 0.3411441463578082, "grad_norm": 0.4446162283420563, "learning_rate": 7.668862922396131e-05, "loss": 0.9555, "step": 3818 }, { "epoch": 0.3412334978890701, "grad_norm": 0.468001127243042, "learning_rate": 7.667639126198337e-05, "loss": 1.0173, "step": 3819 }, { "epoch": 0.3413228494203319, "grad_norm": 0.38588184118270874, "learning_rate": 7.666415106557327e-05, "loss": 1.0348, "step": 3820 }, { "epoch": 0.3414122009515938, "grad_norm": 0.4341317415237427, "learning_rate": 7.665190863575633e-05, "loss": 1.0196, "step": 3821 }, { "epoch": 0.3415015524828557, "grad_norm": 0.4301292598247528, "learning_rate": 7.663966397355793e-05, "loss": 0.9469, "step": 3822 }, { "epoch": 0.34159090401411757, "grad_norm": 0.49630647897720337, "learning_rate": 7.662741708000374e-05, "loss": 0.9003, "step": 3823 }, { "epoch": 0.3416802555453794, "grad_norm": 0.46921518445014954, "learning_rate": 7.661516795611951e-05, "loss": 0.9352, "step": 3824 }, { "epoch": 0.3417696070766413, "grad_norm": 0.43845120072364807, "learning_rate": 7.66029166029313e-05, "loss": 0.9933, "step": 3825 }, { "epoch": 0.34185895860790316, "grad_norm": 0.3999157249927521, "learning_rate": 7.659066302146524e-05, "loss": 1.0288, "step": 3826 }, { "epoch": 0.341948310139165, "grad_norm": 0.38439029455184937, "learning_rate": 7.657840721274772e-05, "loss": 1.0599, "step": 3827 }, { "epoch": 0.3420376616704269, "grad_norm": 0.4651052951812744, "learning_rate": 7.656614917780527e-05, "loss": 1.0204, "step": 3828 }, { "epoch": 0.34212701320168876, "grad_norm": 0.41157326102256775, "learning_rate": 7.655388891766468e-05, "loss": 0.9886, "step": 3829 }, { "epoch": 0.3422163647329506, "grad_norm": 0.4303523302078247, "learning_rate": 7.654162643335283e-05, "loss": 0.9888, "step": 3830 }, { "epoch": 0.34230571626421247, "grad_norm": 0.3977055549621582, "learning_rate": 7.652936172589686e-05, "loss": 1.0203, "step": 3831 }, { "epoch": 0.34239506779547435, "grad_norm": 0.44146913290023804, "learning_rate": 7.651709479632406e-05, "loss": 1.0111, "step": 3832 }, { "epoch": 0.34248441932673623, "grad_norm": 0.532141923904419, "learning_rate": 7.650482564566193e-05, "loss": 0.9072, "step": 3833 }, { "epoch": 0.34257377085799806, "grad_norm": 0.49662163853645325, "learning_rate": 7.649255427493812e-05, "loss": 1.0138, "step": 3834 }, { "epoch": 0.34266312238925994, "grad_norm": 0.4919273555278778, "learning_rate": 7.64802806851805e-05, "loss": 0.9425, "step": 3835 }, { "epoch": 0.3427524739205218, "grad_norm": 0.4210042655467987, "learning_rate": 7.646800487741711e-05, "loss": 0.9302, "step": 3836 }, { "epoch": 0.34284182545178365, "grad_norm": 0.45006346702575684, "learning_rate": 7.645572685267619e-05, "loss": 0.9837, "step": 3837 }, { "epoch": 0.34293117698304554, "grad_norm": 0.5554122924804688, "learning_rate": 7.644344661198615e-05, "loss": 0.9685, "step": 3838 }, { "epoch": 0.3430205285143074, "grad_norm": 0.4323464035987854, "learning_rate": 7.643116415637559e-05, "loss": 1.0146, "step": 3839 }, { "epoch": 0.3431098800455693, "grad_norm": 0.5363249182701111, "learning_rate": 7.64188794868733e-05, "loss": 0.9732, "step": 3840 }, { "epoch": 0.34319923157683113, "grad_norm": 0.4187726378440857, "learning_rate": 7.640659260450823e-05, "loss": 0.9552, "step": 3841 }, { "epoch": 0.343288583108093, "grad_norm": 0.5245155692100525, "learning_rate": 7.639430351030958e-05, "loss": 0.8949, "step": 3842 }, { "epoch": 0.3433779346393549, "grad_norm": 0.4653611481189728, "learning_rate": 7.638201220530665e-05, "loss": 0.9063, "step": 3843 }, { "epoch": 0.3434672861706167, "grad_norm": 0.42201900482177734, "learning_rate": 7.636971869052899e-05, "loss": 0.9947, "step": 3844 }, { "epoch": 0.3435566377018786, "grad_norm": 0.5015077590942383, "learning_rate": 7.63574229670063e-05, "loss": 0.9122, "step": 3845 }, { "epoch": 0.3436459892331405, "grad_norm": 0.40728330612182617, "learning_rate": 7.63451250357685e-05, "loss": 0.9819, "step": 3846 }, { "epoch": 0.3437353407644024, "grad_norm": 0.508651852607727, "learning_rate": 7.633282489784564e-05, "loss": 0.9947, "step": 3847 }, { "epoch": 0.3438246922956642, "grad_norm": 0.47697994112968445, "learning_rate": 7.632052255426803e-05, "loss": 0.9991, "step": 3848 }, { "epoch": 0.3439140438269261, "grad_norm": 0.566102147102356, "learning_rate": 7.63082180060661e-05, "loss": 0.9842, "step": 3849 }, { "epoch": 0.34400339535818797, "grad_norm": 0.4584532380104065, "learning_rate": 7.629591125427047e-05, "loss": 0.9289, "step": 3850 }, { "epoch": 0.3440927468894498, "grad_norm": 0.42579421401023865, "learning_rate": 7.628360229991199e-05, "loss": 0.9618, "step": 3851 }, { "epoch": 0.3441820984207117, "grad_norm": 0.4023285508155823, "learning_rate": 7.627129114402164e-05, "loss": 1.0386, "step": 3852 }, { "epoch": 0.34427144995197356, "grad_norm": 0.5076940655708313, "learning_rate": 7.625897778763062e-05, "loss": 0.9791, "step": 3853 }, { "epoch": 0.34436080148323545, "grad_norm": 0.4704766273498535, "learning_rate": 7.624666223177033e-05, "loss": 0.9195, "step": 3854 }, { "epoch": 0.3444501530144973, "grad_norm": 0.4094794690608978, "learning_rate": 7.62343444774723e-05, "loss": 0.9469, "step": 3855 }, { "epoch": 0.34453950454575916, "grad_norm": 0.41486209630966187, "learning_rate": 7.62220245257683e-05, "loss": 1.0922, "step": 3856 }, { "epoch": 0.34462885607702104, "grad_norm": 0.45725998282432556, "learning_rate": 7.620970237769022e-05, "loss": 0.9506, "step": 3857 }, { "epoch": 0.34471820760828287, "grad_norm": 0.416765034198761, "learning_rate": 7.619737803427019e-05, "loss": 0.9818, "step": 3858 }, { "epoch": 0.34480755913954475, "grad_norm": 0.3775465190410614, "learning_rate": 7.618505149654051e-05, "loss": 0.9946, "step": 3859 }, { "epoch": 0.34489691067080663, "grad_norm": 0.43782955408096313, "learning_rate": 7.617272276553366e-05, "loss": 0.983, "step": 3860 }, { "epoch": 0.34498626220206846, "grad_norm": 0.40604960918426514, "learning_rate": 7.61603918422823e-05, "loss": 1.008, "step": 3861 }, { "epoch": 0.34507561373333034, "grad_norm": 0.4615291357040405, "learning_rate": 7.614805872781926e-05, "loss": 1.0312, "step": 3862 }, { "epoch": 0.3451649652645922, "grad_norm": 0.41947072744369507, "learning_rate": 7.613572342317758e-05, "loss": 0.9949, "step": 3863 }, { "epoch": 0.3452543167958541, "grad_norm": 0.4784375727176666, "learning_rate": 7.612338592939049e-05, "loss": 0.9847, "step": 3864 }, { "epoch": 0.34534366832711594, "grad_norm": 0.38383907079696655, "learning_rate": 7.611104624749137e-05, "loss": 1.0202, "step": 3865 }, { "epoch": 0.3454330198583778, "grad_norm": 0.4683041572570801, "learning_rate": 7.609870437851381e-05, "loss": 1.0224, "step": 3866 }, { "epoch": 0.3455223713896397, "grad_norm": 0.35911422967910767, "learning_rate": 7.608636032349155e-05, "loss": 1.0381, "step": 3867 }, { "epoch": 0.34561172292090153, "grad_norm": 0.4072626829147339, "learning_rate": 7.607401408345855e-05, "loss": 1.119, "step": 3868 }, { "epoch": 0.3457010744521634, "grad_norm": 0.4566546082496643, "learning_rate": 7.606166565944895e-05, "loss": 0.9207, "step": 3869 }, { "epoch": 0.3457904259834253, "grad_norm": 0.41566282510757446, "learning_rate": 7.604931505249706e-05, "loss": 0.9994, "step": 3870 }, { "epoch": 0.3458797775146872, "grad_norm": 0.46840545535087585, "learning_rate": 7.603696226363737e-05, "loss": 1.0202, "step": 3871 }, { "epoch": 0.345969129045949, "grad_norm": 0.5090840458869934, "learning_rate": 7.602460729390455e-05, "loss": 0.8464, "step": 3872 }, { "epoch": 0.3460584805772109, "grad_norm": 0.39953356981277466, "learning_rate": 7.601225014433346e-05, "loss": 1.0418, "step": 3873 }, { "epoch": 0.3461478321084728, "grad_norm": 0.48468056321144104, "learning_rate": 7.599989081595915e-05, "loss": 0.9746, "step": 3874 }, { "epoch": 0.3462371836397346, "grad_norm": 0.45679304003715515, "learning_rate": 7.598752930981686e-05, "loss": 0.9614, "step": 3875 }, { "epoch": 0.3463265351709965, "grad_norm": 0.39945322275161743, "learning_rate": 7.597516562694197e-05, "loss": 1.0286, "step": 3876 }, { "epoch": 0.34641588670225837, "grad_norm": 0.37428709864616394, "learning_rate": 7.59627997683701e-05, "loss": 1.0062, "step": 3877 }, { "epoch": 0.34650523823352025, "grad_norm": 0.36209729313850403, "learning_rate": 7.595043173513698e-05, "loss": 1.0071, "step": 3878 }, { "epoch": 0.3465945897647821, "grad_norm": 0.3938021659851074, "learning_rate": 7.593806152827861e-05, "loss": 0.9865, "step": 3879 }, { "epoch": 0.34668394129604396, "grad_norm": 0.3960947096347809, "learning_rate": 7.592568914883112e-05, "loss": 0.9646, "step": 3880 }, { "epoch": 0.34677329282730585, "grad_norm": 0.4837634563446045, "learning_rate": 7.591331459783078e-05, "loss": 0.9538, "step": 3881 }, { "epoch": 0.3468626443585677, "grad_norm": 0.44596779346466064, "learning_rate": 7.590093787631414e-05, "loss": 0.9657, "step": 3882 }, { "epoch": 0.34695199588982956, "grad_norm": 0.39485910534858704, "learning_rate": 7.588855898531787e-05, "loss": 1.047, "step": 3883 }, { "epoch": 0.34704134742109144, "grad_norm": 0.4514664113521576, "learning_rate": 7.587617792587884e-05, "loss": 1.0095, "step": 3884 }, { "epoch": 0.3471306989523533, "grad_norm": 0.46183550357818604, "learning_rate": 7.586379469903408e-05, "loss": 0.8961, "step": 3885 }, { "epoch": 0.34722005048361515, "grad_norm": 0.4102190136909485, "learning_rate": 7.585140930582085e-05, "loss": 0.8931, "step": 3886 }, { "epoch": 0.34730940201487703, "grad_norm": 0.42231065034866333, "learning_rate": 7.583902174727651e-05, "loss": 0.9373, "step": 3887 }, { "epoch": 0.3473987535461389, "grad_norm": 0.39157602190971375, "learning_rate": 7.582663202443867e-05, "loss": 1.0546, "step": 3888 }, { "epoch": 0.34748810507740074, "grad_norm": 0.46377474069595337, "learning_rate": 7.581424013834511e-05, "loss": 0.928, "step": 3889 }, { "epoch": 0.3475774566086626, "grad_norm": 0.3647017776966095, "learning_rate": 7.580184609003378e-05, "loss": 1.0289, "step": 3890 }, { "epoch": 0.3476668081399245, "grad_norm": 0.46764108538627625, "learning_rate": 7.578944988054281e-05, "loss": 0.9646, "step": 3891 }, { "epoch": 0.34775615967118634, "grad_norm": 0.37693339586257935, "learning_rate": 7.577705151091053e-05, "loss": 1.0112, "step": 3892 }, { "epoch": 0.3478455112024482, "grad_norm": 0.39555221796035767, "learning_rate": 7.576465098217542e-05, "loss": 0.9854, "step": 3893 }, { "epoch": 0.3479348627337101, "grad_norm": 0.5179402828216553, "learning_rate": 7.575224829537615e-05, "loss": 0.9444, "step": 3894 }, { "epoch": 0.348024214264972, "grad_norm": 0.42474082112312317, "learning_rate": 7.573984345155159e-05, "loss": 1.0315, "step": 3895 }, { "epoch": 0.3481135657962338, "grad_norm": 0.46355873346328735, "learning_rate": 7.572743645174077e-05, "loss": 0.9543, "step": 3896 }, { "epoch": 0.3482029173274957, "grad_norm": 0.4335034489631653, "learning_rate": 7.571502729698293e-05, "loss": 0.9763, "step": 3897 }, { "epoch": 0.3482922688587576, "grad_norm": 0.4546475112438202, "learning_rate": 7.570261598831743e-05, "loss": 0.9754, "step": 3898 }, { "epoch": 0.3483816203900194, "grad_norm": 0.4445870518684387, "learning_rate": 7.569020252678387e-05, "loss": 0.9814, "step": 3899 }, { "epoch": 0.3484709719212813, "grad_norm": 0.39527517557144165, "learning_rate": 7.567778691342203e-05, "loss": 1.0592, "step": 3900 }, { "epoch": 0.3485603234525432, "grad_norm": 0.442655473947525, "learning_rate": 7.566536914927181e-05, "loss": 0.9154, "step": 3901 }, { "epoch": 0.34864967498380506, "grad_norm": 0.40641334652900696, "learning_rate": 7.565294923537336e-05, "loss": 0.9864, "step": 3902 }, { "epoch": 0.3487390265150669, "grad_norm": 0.42094290256500244, "learning_rate": 7.564052717276696e-05, "loss": 0.9796, "step": 3903 }, { "epoch": 0.34882837804632877, "grad_norm": 0.4103735089302063, "learning_rate": 7.56281029624931e-05, "loss": 1.0549, "step": 3904 }, { "epoch": 0.34891772957759065, "grad_norm": 0.5410390496253967, "learning_rate": 7.561567660559246e-05, "loss": 0.9967, "step": 3905 }, { "epoch": 0.3490070811088525, "grad_norm": 0.4145830273628235, "learning_rate": 7.560324810310586e-05, "loss": 0.9267, "step": 3906 }, { "epoch": 0.34909643264011436, "grad_norm": 0.47264495491981506, "learning_rate": 7.559081745607431e-05, "loss": 0.9579, "step": 3907 }, { "epoch": 0.34918578417137625, "grad_norm": 0.5321443676948547, "learning_rate": 7.557838466553902e-05, "loss": 0.9407, "step": 3908 }, { "epoch": 0.34927513570263813, "grad_norm": 0.465622216463089, "learning_rate": 7.556594973254136e-05, "loss": 0.9588, "step": 3909 }, { "epoch": 0.34936448723389996, "grad_norm": 0.463840514421463, "learning_rate": 7.555351265812292e-05, "loss": 0.9034, "step": 3910 }, { "epoch": 0.34945383876516184, "grad_norm": 0.4932054281234741, "learning_rate": 7.55410734433254e-05, "loss": 0.9503, "step": 3911 }, { "epoch": 0.3495431902964237, "grad_norm": 0.3836749494075775, "learning_rate": 7.552863208919073e-05, "loss": 0.9432, "step": 3912 }, { "epoch": 0.34963254182768555, "grad_norm": 0.43824368715286255, "learning_rate": 7.551618859676101e-05, "loss": 1.047, "step": 3913 }, { "epoch": 0.34972189335894743, "grad_norm": 0.5028628706932068, "learning_rate": 7.550374296707851e-05, "loss": 0.9571, "step": 3914 }, { "epoch": 0.3498112448902093, "grad_norm": 0.4159904718399048, "learning_rate": 7.54912952011857e-05, "loss": 0.9978, "step": 3915 }, { "epoch": 0.3499005964214712, "grad_norm": 0.42895132303237915, "learning_rate": 7.547884530012517e-05, "loss": 0.9415, "step": 3916 }, { "epoch": 0.349989947952733, "grad_norm": 0.4712909162044525, "learning_rate": 7.546639326493978e-05, "loss": 0.971, "step": 3917 }, { "epoch": 0.3500792994839949, "grad_norm": 0.41875845193862915, "learning_rate": 7.545393909667249e-05, "loss": 1.0202, "step": 3918 }, { "epoch": 0.3501686510152568, "grad_norm": 0.32883596420288086, "learning_rate": 7.54414827963665e-05, "loss": 1.0405, "step": 3919 }, { "epoch": 0.3502580025465186, "grad_norm": 0.5156275033950806, "learning_rate": 7.542902436506514e-05, "loss": 0.8699, "step": 3920 }, { "epoch": 0.3503473540777805, "grad_norm": 0.3887585997581482, "learning_rate": 7.541656380381192e-05, "loss": 0.9707, "step": 3921 }, { "epoch": 0.3504367056090424, "grad_norm": 0.4152442216873169, "learning_rate": 7.540410111365055e-05, "loss": 0.9871, "step": 3922 }, { "epoch": 0.3505260571403042, "grad_norm": 0.4276559054851532, "learning_rate": 7.539163629562494e-05, "loss": 0.9426, "step": 3923 }, { "epoch": 0.3506154086715661, "grad_norm": 0.5117713212966919, "learning_rate": 7.537916935077914e-05, "loss": 0.9238, "step": 3924 }, { "epoch": 0.350704760202828, "grad_norm": 0.4770560562610626, "learning_rate": 7.536670028015737e-05, "loss": 0.9661, "step": 3925 }, { "epoch": 0.35079411173408986, "grad_norm": 0.49051031470298767, "learning_rate": 7.535422908480408e-05, "loss": 1.0228, "step": 3926 }, { "epoch": 0.3508834632653517, "grad_norm": 0.3898465037345886, "learning_rate": 7.534175576576384e-05, "loss": 1.01, "step": 3927 }, { "epoch": 0.3509728147966136, "grad_norm": 0.48158106207847595, "learning_rate": 7.532928032408142e-05, "loss": 1.0054, "step": 3928 }, { "epoch": 0.35106216632787546, "grad_norm": 0.43918222188949585, "learning_rate": 7.53168027608018e-05, "loss": 0.9429, "step": 3929 }, { "epoch": 0.3511515178591373, "grad_norm": 0.4521641433238983, "learning_rate": 7.530432307697007e-05, "loss": 0.8965, "step": 3930 }, { "epoch": 0.35124086939039917, "grad_norm": 0.4660329520702362, "learning_rate": 7.529184127363158e-05, "loss": 0.9271, "step": 3931 }, { "epoch": 0.35133022092166105, "grad_norm": 0.4383411705493927, "learning_rate": 7.527935735183177e-05, "loss": 1.0393, "step": 3932 }, { "epoch": 0.35141957245292293, "grad_norm": 0.36236387491226196, "learning_rate": 7.526687131261634e-05, "loss": 1.0835, "step": 3933 }, { "epoch": 0.35150892398418476, "grad_norm": 0.3911649286746979, "learning_rate": 7.52543831570311e-05, "loss": 1.0008, "step": 3934 }, { "epoch": 0.35159827551544665, "grad_norm": 0.43021535873413086, "learning_rate": 7.524189288612209e-05, "loss": 1.0463, "step": 3935 }, { "epoch": 0.35168762704670853, "grad_norm": 0.5328584909439087, "learning_rate": 7.522940050093547e-05, "loss": 1.0456, "step": 3936 }, { "epoch": 0.35177697857797036, "grad_norm": 0.41291365027427673, "learning_rate": 7.521690600251766e-05, "loss": 0.9843, "step": 3937 }, { "epoch": 0.35186633010923224, "grad_norm": 0.44431254267692566, "learning_rate": 7.520440939191515e-05, "loss": 0.9654, "step": 3938 }, { "epoch": 0.3519556816404941, "grad_norm": 0.40238481760025024, "learning_rate": 7.519191067017472e-05, "loss": 1.0108, "step": 3939 }, { "epoch": 0.352045033171756, "grad_norm": 0.41174423694610596, "learning_rate": 7.517940983834323e-05, "loss": 1.0082, "step": 3940 }, { "epoch": 0.35213438470301783, "grad_norm": 0.44105100631713867, "learning_rate": 7.516690689746779e-05, "loss": 0.9419, "step": 3941 }, { "epoch": 0.3522237362342797, "grad_norm": 0.4259895980358124, "learning_rate": 7.515440184859561e-05, "loss": 1.0009, "step": 3942 }, { "epoch": 0.3523130877655416, "grad_norm": 0.45526599884033203, "learning_rate": 7.514189469277418e-05, "loss": 1.019, "step": 3943 }, { "epoch": 0.3524024392968034, "grad_norm": 0.4207375943660736, "learning_rate": 7.512938543105105e-05, "loss": 0.9647, "step": 3944 }, { "epoch": 0.3524917908280653, "grad_norm": 0.4691575765609741, "learning_rate": 7.511687406447406e-05, "loss": 0.984, "step": 3945 }, { "epoch": 0.3525811423593272, "grad_norm": 0.4423462152481079, "learning_rate": 7.51043605940911e-05, "loss": 0.9842, "step": 3946 }, { "epoch": 0.3526704938905891, "grad_norm": 0.44663453102111816, "learning_rate": 7.509184502095038e-05, "loss": 0.9933, "step": 3947 }, { "epoch": 0.3527598454218509, "grad_norm": 0.4556397795677185, "learning_rate": 7.507932734610017e-05, "loss": 0.9362, "step": 3948 }, { "epoch": 0.3528491969531128, "grad_norm": 0.3916218876838684, "learning_rate": 7.506680757058896e-05, "loss": 1.0302, "step": 3949 }, { "epoch": 0.35293854848437467, "grad_norm": 0.39909112453460693, "learning_rate": 7.505428569546542e-05, "loss": 1.0345, "step": 3950 }, { "epoch": 0.3530279000156365, "grad_norm": 0.4075121283531189, "learning_rate": 7.504176172177841e-05, "loss": 0.9996, "step": 3951 }, { "epoch": 0.3531172515468984, "grad_norm": 0.46926334500312805, "learning_rate": 7.502923565057692e-05, "loss": 0.9457, "step": 3952 }, { "epoch": 0.35320660307816026, "grad_norm": 0.4146152436733246, "learning_rate": 7.501670748291016e-05, "loss": 1.0144, "step": 3953 }, { "epoch": 0.3532959546094221, "grad_norm": 0.4583793878555298, "learning_rate": 7.500417721982748e-05, "loss": 0.9683, "step": 3954 }, { "epoch": 0.353385306140684, "grad_norm": 0.42835697531700134, "learning_rate": 7.499164486237844e-05, "loss": 0.9714, "step": 3955 }, { "epoch": 0.35347465767194586, "grad_norm": 0.4047943949699402, "learning_rate": 7.497911041161274e-05, "loss": 1.0431, "step": 3956 }, { "epoch": 0.35356400920320774, "grad_norm": 0.3960307836532593, "learning_rate": 7.496657386858029e-05, "loss": 0.9681, "step": 3957 }, { "epoch": 0.35365336073446957, "grad_norm": 0.37818729877471924, "learning_rate": 7.495403523433116e-05, "loss": 1.0108, "step": 3958 }, { "epoch": 0.35374271226573145, "grad_norm": 0.44863516092300415, "learning_rate": 7.494149450991557e-05, "loss": 0.9547, "step": 3959 }, { "epoch": 0.35383206379699333, "grad_norm": 0.38477665185928345, "learning_rate": 7.492895169638397e-05, "loss": 1.0091, "step": 3960 }, { "epoch": 0.35392141532825516, "grad_norm": 0.45500656962394714, "learning_rate": 7.491640679478696e-05, "loss": 0.9557, "step": 3961 }, { "epoch": 0.35401076685951705, "grad_norm": 0.3944813013076782, "learning_rate": 7.490385980617527e-05, "loss": 1.0622, "step": 3962 }, { "epoch": 0.35410011839077893, "grad_norm": 0.3850362300872803, "learning_rate": 7.489131073159987e-05, "loss": 0.96, "step": 3963 }, { "epoch": 0.3541894699220408, "grad_norm": 0.402127742767334, "learning_rate": 7.487875957211188e-05, "loss": 0.8966, "step": 3964 }, { "epoch": 0.35427882145330264, "grad_norm": 0.39359238743782043, "learning_rate": 7.486620632876257e-05, "loss": 1.0262, "step": 3965 }, { "epoch": 0.3543681729845645, "grad_norm": 0.4493246078491211, "learning_rate": 7.485365100260345e-05, "loss": 0.9622, "step": 3966 }, { "epoch": 0.3544575245158264, "grad_norm": 0.38160157203674316, "learning_rate": 7.484109359468612e-05, "loss": 0.9997, "step": 3967 }, { "epoch": 0.35454687604708823, "grad_norm": 0.5229949355125427, "learning_rate": 7.482853410606242e-05, "loss": 0.8508, "step": 3968 }, { "epoch": 0.3546362275783501, "grad_norm": 0.41521862149238586, "learning_rate": 7.481597253778434e-05, "loss": 1.0109, "step": 3969 }, { "epoch": 0.354725579109612, "grad_norm": 0.4313722252845764, "learning_rate": 7.480340889090403e-05, "loss": 0.9793, "step": 3970 }, { "epoch": 0.3548149306408739, "grad_norm": 0.47400006651878357, "learning_rate": 7.479084316647385e-05, "loss": 1.0458, "step": 3971 }, { "epoch": 0.3549042821721357, "grad_norm": 0.44894692301750183, "learning_rate": 7.477827536554629e-05, "loss": 0.9617, "step": 3972 }, { "epoch": 0.3549936337033976, "grad_norm": 0.40421560406684875, "learning_rate": 7.476570548917406e-05, "loss": 1.0115, "step": 3973 }, { "epoch": 0.3550829852346595, "grad_norm": 0.4186607897281647, "learning_rate": 7.475313353841e-05, "loss": 1.0278, "step": 3974 }, { "epoch": 0.3551723367659213, "grad_norm": 0.36971282958984375, "learning_rate": 7.474055951430717e-05, "loss": 0.9399, "step": 3975 }, { "epoch": 0.3552616882971832, "grad_norm": 0.4785158634185791, "learning_rate": 7.472798341791877e-05, "loss": 0.9303, "step": 3976 }, { "epoch": 0.35535103982844507, "grad_norm": 0.38244813680648804, "learning_rate": 7.471540525029817e-05, "loss": 0.9782, "step": 3977 }, { "epoch": 0.35544039135970695, "grad_norm": 0.5407021641731262, "learning_rate": 7.470282501249893e-05, "loss": 0.948, "step": 3978 }, { "epoch": 0.3555297428909688, "grad_norm": 0.4248984158039093, "learning_rate": 7.469024270557477e-05, "loss": 1.0419, "step": 3979 }, { "epoch": 0.35561909442223066, "grad_norm": 0.38939395546913147, "learning_rate": 7.467765833057964e-05, "loss": 1.0081, "step": 3980 }, { "epoch": 0.35570844595349255, "grad_norm": 0.45824161171913147, "learning_rate": 7.466507188856755e-05, "loss": 0.9602, "step": 3981 }, { "epoch": 0.3557977974847544, "grad_norm": 0.45186036825180054, "learning_rate": 7.46524833805928e-05, "loss": 0.9459, "step": 3982 }, { "epoch": 0.35588714901601626, "grad_norm": 0.4205573499202728, "learning_rate": 7.463989280770978e-05, "loss": 1.0347, "step": 3983 }, { "epoch": 0.35597650054727814, "grad_norm": 0.5025462508201599, "learning_rate": 7.462730017097308e-05, "loss": 0.9495, "step": 3984 }, { "epoch": 0.35606585207853997, "grad_norm": 0.45681893825531006, "learning_rate": 7.46147054714375e-05, "loss": 0.9172, "step": 3985 }, { "epoch": 0.35615520360980185, "grad_norm": 0.43965548276901245, "learning_rate": 7.460210871015796e-05, "loss": 1.0745, "step": 3986 }, { "epoch": 0.35624455514106373, "grad_norm": 0.4385167062282562, "learning_rate": 7.458950988818957e-05, "loss": 1.0192, "step": 3987 }, { "epoch": 0.3563339066723256, "grad_norm": 0.4278493821620941, "learning_rate": 7.457690900658762e-05, "loss": 0.9996, "step": 3988 }, { "epoch": 0.35642325820358745, "grad_norm": 0.45512884855270386, "learning_rate": 7.456430606640757e-05, "loss": 0.9825, "step": 3989 }, { "epoch": 0.35651260973484933, "grad_norm": 0.4201413094997406, "learning_rate": 7.455170106870505e-05, "loss": 0.949, "step": 3990 }, { "epoch": 0.3566019612661112, "grad_norm": 0.39953503012657166, "learning_rate": 7.453909401453589e-05, "loss": 0.9882, "step": 3991 }, { "epoch": 0.35669131279737304, "grad_norm": 0.4012105464935303, "learning_rate": 7.452648490495602e-05, "loss": 0.9762, "step": 3992 }, { "epoch": 0.3567806643286349, "grad_norm": 0.39870861172676086, "learning_rate": 7.451387374102159e-05, "loss": 0.9714, "step": 3993 }, { "epoch": 0.3568700158598968, "grad_norm": 0.3774680197238922, "learning_rate": 7.450126052378894e-05, "loss": 0.9913, "step": 3994 }, { "epoch": 0.3569593673911587, "grad_norm": 0.4141833186149597, "learning_rate": 7.448864525431457e-05, "loss": 0.9495, "step": 3995 }, { "epoch": 0.3570487189224205, "grad_norm": 0.44811514019966125, "learning_rate": 7.447602793365514e-05, "loss": 1.0848, "step": 3996 }, { "epoch": 0.3571380704536824, "grad_norm": 0.44921422004699707, "learning_rate": 7.446340856286744e-05, "loss": 1.0281, "step": 3997 }, { "epoch": 0.3572274219849443, "grad_norm": 0.4648860991001129, "learning_rate": 7.445078714300855e-05, "loss": 1.0197, "step": 3998 }, { "epoch": 0.3573167735162061, "grad_norm": 0.4424198865890503, "learning_rate": 7.443816367513559e-05, "loss": 1.0069, "step": 3999 }, { "epoch": 0.357406125047468, "grad_norm": 0.5149994492530823, "learning_rate": 7.442553816030592e-05, "loss": 0.9001, "step": 4000 }, { "epoch": 0.3574954765787299, "grad_norm": 0.3967181444168091, "learning_rate": 7.441291059957708e-05, "loss": 1.0558, "step": 4001 }, { "epoch": 0.35758482810999176, "grad_norm": 0.4318191707134247, "learning_rate": 7.440028099400677e-05, "loss": 0.9383, "step": 4002 }, { "epoch": 0.3576741796412536, "grad_norm": 0.4305358827114105, "learning_rate": 7.438764934465283e-05, "loss": 1.0499, "step": 4003 }, { "epoch": 0.35776353117251547, "grad_norm": 0.38480517268180847, "learning_rate": 7.437501565257329e-05, "loss": 0.9881, "step": 4004 }, { "epoch": 0.35785288270377735, "grad_norm": 0.5233545303344727, "learning_rate": 7.436237991882637e-05, "loss": 0.969, "step": 4005 }, { "epoch": 0.3579422342350392, "grad_norm": 0.5495209693908691, "learning_rate": 7.434974214447047e-05, "loss": 0.9978, "step": 4006 }, { "epoch": 0.35803158576630106, "grad_norm": 0.46904364228248596, "learning_rate": 7.43371023305641e-05, "loss": 0.9741, "step": 4007 }, { "epoch": 0.35812093729756295, "grad_norm": 0.4199860990047455, "learning_rate": 7.432446047816599e-05, "loss": 0.9811, "step": 4008 }, { "epoch": 0.35821028882882483, "grad_norm": 0.3669317364692688, "learning_rate": 7.431181658833504e-05, "loss": 1.07, "step": 4009 }, { "epoch": 0.35829964036008666, "grad_norm": 0.41901111602783203, "learning_rate": 7.42991706621303e-05, "loss": 0.896, "step": 4010 }, { "epoch": 0.35838899189134854, "grad_norm": 0.47777092456817627, "learning_rate": 7.428652270061102e-05, "loss": 1.0107, "step": 4011 }, { "epoch": 0.3584783434226104, "grad_norm": 0.4377864897251129, "learning_rate": 7.427387270483659e-05, "loss": 0.9397, "step": 4012 }, { "epoch": 0.35856769495387225, "grad_norm": 0.45756855607032776, "learning_rate": 7.426122067586656e-05, "loss": 0.9688, "step": 4013 }, { "epoch": 0.35865704648513413, "grad_norm": 0.4156295359134674, "learning_rate": 7.424856661476071e-05, "loss": 1.0963, "step": 4014 }, { "epoch": 0.358746398016396, "grad_norm": 0.43841129541397095, "learning_rate": 7.423591052257893e-05, "loss": 0.9589, "step": 4015 }, { "epoch": 0.3588357495476579, "grad_norm": 0.35517293214797974, "learning_rate": 7.42232524003813e-05, "loss": 1.0089, "step": 4016 }, { "epoch": 0.35892510107891973, "grad_norm": 0.5077205300331116, "learning_rate": 7.42105922492281e-05, "loss": 0.9804, "step": 4017 }, { "epoch": 0.3590144526101816, "grad_norm": 0.480881929397583, "learning_rate": 7.419793007017972e-05, "loss": 1.0214, "step": 4018 }, { "epoch": 0.3591038041414435, "grad_norm": 0.4301677942276001, "learning_rate": 7.418526586429676e-05, "loss": 1.0346, "step": 4019 }, { "epoch": 0.3591931556727053, "grad_norm": 0.4578787684440613, "learning_rate": 7.417259963263999e-05, "loss": 0.9741, "step": 4020 }, { "epoch": 0.3592825072039672, "grad_norm": 0.5144799947738647, "learning_rate": 7.415993137627036e-05, "loss": 0.9486, "step": 4021 }, { "epoch": 0.3593718587352291, "grad_norm": 0.37972408533096313, "learning_rate": 7.414726109624892e-05, "loss": 1.0567, "step": 4022 }, { "epoch": 0.3594612102664909, "grad_norm": 0.40619876980781555, "learning_rate": 7.413458879363698e-05, "loss": 0.9922, "step": 4023 }, { "epoch": 0.3595505617977528, "grad_norm": 0.5421142578125, "learning_rate": 7.412191446949598e-05, "loss": 0.9754, "step": 4024 }, { "epoch": 0.3596399133290147, "grad_norm": 0.46210673451423645, "learning_rate": 7.410923812488752e-05, "loss": 1.0145, "step": 4025 }, { "epoch": 0.35972926486027657, "grad_norm": 0.45515942573547363, "learning_rate": 7.409655976087339e-05, "loss": 0.9441, "step": 4026 }, { "epoch": 0.3598186163915384, "grad_norm": 0.40571707487106323, "learning_rate": 7.408387937851551e-05, "loss": 0.9343, "step": 4027 }, { "epoch": 0.3599079679228003, "grad_norm": 0.5472614765167236, "learning_rate": 7.407119697887602e-05, "loss": 0.964, "step": 4028 }, { "epoch": 0.35999731945406216, "grad_norm": 0.4565272629261017, "learning_rate": 7.405851256301722e-05, "loss": 0.9862, "step": 4029 }, { "epoch": 0.360086670985324, "grad_norm": 0.5199412703514099, "learning_rate": 7.404582613200153e-05, "loss": 0.9162, "step": 4030 }, { "epoch": 0.36017602251658587, "grad_norm": 0.38596537709236145, "learning_rate": 7.403313768689159e-05, "loss": 1.0374, "step": 4031 }, { "epoch": 0.36026537404784775, "grad_norm": 0.4285348653793335, "learning_rate": 7.402044722875021e-05, "loss": 0.9533, "step": 4032 }, { "epoch": 0.36035472557910964, "grad_norm": 0.4435160756111145, "learning_rate": 7.400775475864032e-05, "loss": 0.9475, "step": 4033 }, { "epoch": 0.36044407711037146, "grad_norm": 0.40904703736305237, "learning_rate": 7.399506027762507e-05, "loss": 0.9486, "step": 4034 }, { "epoch": 0.36053342864163335, "grad_norm": 0.42833709716796875, "learning_rate": 7.398236378676776e-05, "loss": 0.9862, "step": 4035 }, { "epoch": 0.36062278017289523, "grad_norm": 0.44428741931915283, "learning_rate": 7.396966528713184e-05, "loss": 0.9491, "step": 4036 }, { "epoch": 0.36071213170415706, "grad_norm": 0.37404051423072815, "learning_rate": 7.395696477978096e-05, "loss": 1.0652, "step": 4037 }, { "epoch": 0.36080148323541894, "grad_norm": 0.5555472373962402, "learning_rate": 7.394426226577891e-05, "loss": 0.8922, "step": 4038 }, { "epoch": 0.3608908347666808, "grad_norm": 0.40428173542022705, "learning_rate": 7.393155774618967e-05, "loss": 0.9652, "step": 4039 }, { "epoch": 0.3609801862979427, "grad_norm": 0.5697705149650574, "learning_rate": 7.391885122207738e-05, "loss": 0.9272, "step": 4040 }, { "epoch": 0.36106953782920453, "grad_norm": 0.38609790802001953, "learning_rate": 7.390614269450634e-05, "loss": 0.9702, "step": 4041 }, { "epoch": 0.3611588893604664, "grad_norm": 0.401506245136261, "learning_rate": 7.389343216454103e-05, "loss": 1.0075, "step": 4042 }, { "epoch": 0.3612482408917283, "grad_norm": 0.43169543147087097, "learning_rate": 7.388071963324609e-05, "loss": 0.9761, "step": 4043 }, { "epoch": 0.36133759242299013, "grad_norm": 0.4852740168571472, "learning_rate": 7.386800510168632e-05, "loss": 0.9143, "step": 4044 }, { "epoch": 0.361426943954252, "grad_norm": 0.4311829209327698, "learning_rate": 7.385528857092672e-05, "loss": 0.9728, "step": 4045 }, { "epoch": 0.3615162954855139, "grad_norm": 0.4241611063480377, "learning_rate": 7.384257004203242e-05, "loss": 0.9631, "step": 4046 }, { "epoch": 0.3616056470167758, "grad_norm": 0.435428649187088, "learning_rate": 7.382984951606875e-05, "loss": 0.9813, "step": 4047 }, { "epoch": 0.3616949985480376, "grad_norm": 0.4283429980278015, "learning_rate": 7.381712699410116e-05, "loss": 0.9696, "step": 4048 }, { "epoch": 0.3617843500792995, "grad_norm": 0.43169790506362915, "learning_rate": 7.380440247719532e-05, "loss": 0.968, "step": 4049 }, { "epoch": 0.36187370161056137, "grad_norm": 0.44879093766212463, "learning_rate": 7.379167596641702e-05, "loss": 0.9826, "step": 4050 }, { "epoch": 0.3619630531418232, "grad_norm": 0.3845369219779968, "learning_rate": 7.377894746283227e-05, "loss": 0.9373, "step": 4051 }, { "epoch": 0.3620524046730851, "grad_norm": 0.42470452189445496, "learning_rate": 7.37662169675072e-05, "loss": 1.0286, "step": 4052 }, { "epoch": 0.36214175620434697, "grad_norm": 0.5062727332115173, "learning_rate": 7.375348448150814e-05, "loss": 0.9409, "step": 4053 }, { "epoch": 0.3622311077356088, "grad_norm": 0.41713082790374756, "learning_rate": 7.374075000590155e-05, "loss": 0.9857, "step": 4054 }, { "epoch": 0.3623204592668707, "grad_norm": 0.5149965286254883, "learning_rate": 7.372801354175409e-05, "loss": 0.8709, "step": 4055 }, { "epoch": 0.36240981079813256, "grad_norm": 0.49608248472213745, "learning_rate": 7.371527509013257e-05, "loss": 0.9298, "step": 4056 }, { "epoch": 0.36249916232939444, "grad_norm": 0.5133218169212341, "learning_rate": 7.370253465210398e-05, "loss": 0.9234, "step": 4057 }, { "epoch": 0.36258851386065627, "grad_norm": 0.4780612587928772, "learning_rate": 7.368979222873547e-05, "loss": 0.9725, "step": 4058 }, { "epoch": 0.36267786539191815, "grad_norm": 0.5128650665283203, "learning_rate": 7.367704782109433e-05, "loss": 0.9635, "step": 4059 }, { "epoch": 0.36276721692318004, "grad_norm": 0.41586834192276, "learning_rate": 7.366430143024805e-05, "loss": 0.9352, "step": 4060 }, { "epoch": 0.36285656845444186, "grad_norm": 0.3711674213409424, "learning_rate": 7.36515530572643e-05, "loss": 1.0388, "step": 4061 }, { "epoch": 0.36294591998570375, "grad_norm": 0.5341949462890625, "learning_rate": 7.363880270321087e-05, "loss": 0.9257, "step": 4062 }, { "epoch": 0.36303527151696563, "grad_norm": 0.43725699186325073, "learning_rate": 7.362605036915574e-05, "loss": 0.9485, "step": 4063 }, { "epoch": 0.3631246230482275, "grad_norm": 0.47559094429016113, "learning_rate": 7.361329605616705e-05, "loss": 1.0375, "step": 4064 }, { "epoch": 0.36321397457948934, "grad_norm": 0.3746141493320465, "learning_rate": 7.360053976531312e-05, "loss": 0.9983, "step": 4065 }, { "epoch": 0.3633033261107512, "grad_norm": 0.4194895625114441, "learning_rate": 7.358778149766244e-05, "loss": 0.9532, "step": 4066 }, { "epoch": 0.3633926776420131, "grad_norm": 0.42524465918540955, "learning_rate": 7.35750212542836e-05, "loss": 1.0122, "step": 4067 }, { "epoch": 0.36348202917327493, "grad_norm": 0.5586007237434387, "learning_rate": 7.356225903624545e-05, "loss": 0.9938, "step": 4068 }, { "epoch": 0.3635713807045368, "grad_norm": 0.36692580580711365, "learning_rate": 7.354949484461697e-05, "loss": 1.0279, "step": 4069 }, { "epoch": 0.3636607322357987, "grad_norm": 0.40066802501678467, "learning_rate": 7.353672868046725e-05, "loss": 1.0642, "step": 4070 }, { "epoch": 0.3637500837670606, "grad_norm": 0.4100087285041809, "learning_rate": 7.352396054486562e-05, "loss": 1.0454, "step": 4071 }, { "epoch": 0.3638394352983224, "grad_norm": 0.39286187291145325, "learning_rate": 7.351119043888158e-05, "loss": 1.0109, "step": 4072 }, { "epoch": 0.3639287868295843, "grad_norm": 0.39239948987960815, "learning_rate": 7.349841836358468e-05, "loss": 1.0396, "step": 4073 }, { "epoch": 0.3640181383608462, "grad_norm": 0.3697894513607025, "learning_rate": 7.34856443200448e-05, "loss": 1.02, "step": 4074 }, { "epoch": 0.364107489892108, "grad_norm": 0.44793811440467834, "learning_rate": 7.347286830933187e-05, "loss": 0.9778, "step": 4075 }, { "epoch": 0.3641968414233699, "grad_norm": 0.46395543217658997, "learning_rate": 7.3460090332516e-05, "loss": 0.9936, "step": 4076 }, { "epoch": 0.36428619295463177, "grad_norm": 0.4393361806869507, "learning_rate": 7.344731039066752e-05, "loss": 0.9751, "step": 4077 }, { "epoch": 0.36437554448589365, "grad_norm": 0.3656950294971466, "learning_rate": 7.343452848485683e-05, "loss": 1.0026, "step": 4078 }, { "epoch": 0.3644648960171555, "grad_norm": 0.43100160360336304, "learning_rate": 7.342174461615461e-05, "loss": 0.9219, "step": 4079 }, { "epoch": 0.36455424754841737, "grad_norm": 0.37089818716049194, "learning_rate": 7.340895878563162e-05, "loss": 1.0019, "step": 4080 }, { "epoch": 0.36464359907967925, "grad_norm": 0.4475706219673157, "learning_rate": 7.339617099435881e-05, "loss": 0.9677, "step": 4081 }, { "epoch": 0.3647329506109411, "grad_norm": 0.3892630636692047, "learning_rate": 7.338338124340728e-05, "loss": 1.0284, "step": 4082 }, { "epoch": 0.36482230214220296, "grad_norm": 0.4408927857875824, "learning_rate": 7.337058953384834e-05, "loss": 0.9299, "step": 4083 }, { "epoch": 0.36491165367346484, "grad_norm": 0.5656498670578003, "learning_rate": 7.335779586675341e-05, "loss": 0.9921, "step": 4084 }, { "epoch": 0.36500100520472667, "grad_norm": 0.40620046854019165, "learning_rate": 7.334500024319409e-05, "loss": 1.0244, "step": 4085 }, { "epoch": 0.36509035673598855, "grad_norm": 0.5039829611778259, "learning_rate": 7.333220266424217e-05, "loss": 0.944, "step": 4086 }, { "epoch": 0.36517970826725044, "grad_norm": 0.4727097451686859, "learning_rate": 7.331940313096957e-05, "loss": 0.8922, "step": 4087 }, { "epoch": 0.3652690597985123, "grad_norm": 0.44061312079429626, "learning_rate": 7.33066016444484e-05, "loss": 0.9798, "step": 4088 }, { "epoch": 0.36535841132977415, "grad_norm": 0.45610326528549194, "learning_rate": 7.329379820575089e-05, "loss": 1.004, "step": 4089 }, { "epoch": 0.36544776286103603, "grad_norm": 0.4043094515800476, "learning_rate": 7.32809928159495e-05, "loss": 1.0491, "step": 4090 }, { "epoch": 0.3655371143922979, "grad_norm": 0.4471263885498047, "learning_rate": 7.32681854761168e-05, "loss": 0.9727, "step": 4091 }, { "epoch": 0.36562646592355974, "grad_norm": 0.5790581703186035, "learning_rate": 7.325537618732557e-05, "loss": 0.9889, "step": 4092 }, { "epoch": 0.3657158174548216, "grad_norm": 0.4110223650932312, "learning_rate": 7.324256495064867e-05, "loss": 1.0602, "step": 4093 }, { "epoch": 0.3658051689860835, "grad_norm": 0.5571956634521484, "learning_rate": 7.322975176715921e-05, "loss": 1.0389, "step": 4094 }, { "epoch": 0.3658945205173454, "grad_norm": 0.3689397871494293, "learning_rate": 7.321693663793044e-05, "loss": 0.9932, "step": 4095 }, { "epoch": 0.3659838720486072, "grad_norm": 0.37520068883895874, "learning_rate": 7.320411956403573e-05, "loss": 1.0419, "step": 4096 }, { "epoch": 0.3660732235798691, "grad_norm": 0.5426008105278015, "learning_rate": 7.319130054654869e-05, "loss": 0.9726, "step": 4097 }, { "epoch": 0.366162575111131, "grad_norm": 0.41847026348114014, "learning_rate": 7.317847958654303e-05, "loss": 0.9663, "step": 4098 }, { "epoch": 0.3662519266423928, "grad_norm": 0.39576879143714905, "learning_rate": 7.316565668509262e-05, "loss": 0.9269, "step": 4099 }, { "epoch": 0.3663412781736547, "grad_norm": 0.46187660098075867, "learning_rate": 7.315283184327156e-05, "loss": 1.0216, "step": 4100 }, { "epoch": 0.3664306297049166, "grad_norm": 0.4226139187812805, "learning_rate": 7.314000506215402e-05, "loss": 0.9553, "step": 4101 }, { "epoch": 0.36651998123617846, "grad_norm": 0.41681933403015137, "learning_rate": 7.312717634281441e-05, "loss": 1.0133, "step": 4102 }, { "epoch": 0.3666093327674403, "grad_norm": 0.43406426906585693, "learning_rate": 7.311434568632725e-05, "loss": 1.0148, "step": 4103 }, { "epoch": 0.36669868429870217, "grad_norm": 0.6190657615661621, "learning_rate": 7.310151309376728e-05, "loss": 0.8836, "step": 4104 }, { "epoch": 0.36678803582996405, "grad_norm": 0.43368059396743774, "learning_rate": 7.308867856620933e-05, "loss": 0.9831, "step": 4105 }, { "epoch": 0.3668773873612259, "grad_norm": 0.41560328006744385, "learning_rate": 7.307584210472844e-05, "loss": 1.0052, "step": 4106 }, { "epoch": 0.36696673889248776, "grad_norm": 0.45990949869155884, "learning_rate": 7.306300371039983e-05, "loss": 0.9839, "step": 4107 }, { "epoch": 0.36705609042374965, "grad_norm": 0.4657072126865387, "learning_rate": 7.30501633842988e-05, "loss": 0.9505, "step": 4108 }, { "epoch": 0.36714544195501153, "grad_norm": 0.39380112290382385, "learning_rate": 7.303732112750089e-05, "loss": 1.0012, "step": 4109 }, { "epoch": 0.36723479348627336, "grad_norm": 0.4520123600959778, "learning_rate": 7.302447694108177e-05, "loss": 0.9982, "step": 4110 }, { "epoch": 0.36732414501753524, "grad_norm": 0.5289416909217834, "learning_rate": 7.301163082611729e-05, "loss": 0.9575, "step": 4111 }, { "epoch": 0.3674134965487971, "grad_norm": 0.5018066167831421, "learning_rate": 7.299878278368345e-05, "loss": 0.8618, "step": 4112 }, { "epoch": 0.36750284808005895, "grad_norm": 0.3886435031890869, "learning_rate": 7.29859328148564e-05, "loss": 0.9727, "step": 4113 }, { "epoch": 0.36759219961132084, "grad_norm": 0.45843201875686646, "learning_rate": 7.297308092071245e-05, "loss": 0.9968, "step": 4114 }, { "epoch": 0.3676815511425827, "grad_norm": 0.4753422141075134, "learning_rate": 7.296022710232812e-05, "loss": 0.9434, "step": 4115 }, { "epoch": 0.36777090267384455, "grad_norm": 0.4317519962787628, "learning_rate": 7.294737136078001e-05, "loss": 0.9388, "step": 4116 }, { "epoch": 0.36786025420510643, "grad_norm": 0.388031005859375, "learning_rate": 7.293451369714495e-05, "loss": 1.079, "step": 4117 }, { "epoch": 0.3679496057363683, "grad_norm": 0.42603445053100586, "learning_rate": 7.292165411249993e-05, "loss": 0.9291, "step": 4118 }, { "epoch": 0.3680389572676302, "grad_norm": 0.4375905990600586, "learning_rate": 7.290879260792203e-05, "loss": 0.9836, "step": 4119 }, { "epoch": 0.368128308798892, "grad_norm": 0.40802040696144104, "learning_rate": 7.289592918448856e-05, "loss": 0.9789, "step": 4120 }, { "epoch": 0.3682176603301539, "grad_norm": 0.3883795142173767, "learning_rate": 7.288306384327696e-05, "loss": 1.0521, "step": 4121 }, { "epoch": 0.3683070118614158, "grad_norm": 0.42812490463256836, "learning_rate": 7.287019658536486e-05, "loss": 1.0214, "step": 4122 }, { "epoch": 0.3683963633926776, "grad_norm": 0.4556853771209717, "learning_rate": 7.285732741183003e-05, "loss": 0.9778, "step": 4123 }, { "epoch": 0.3684857149239395, "grad_norm": 0.43323996663093567, "learning_rate": 7.284445632375035e-05, "loss": 0.9807, "step": 4124 }, { "epoch": 0.3685750664552014, "grad_norm": 0.4459212124347687, "learning_rate": 7.283158332220397e-05, "loss": 0.9184, "step": 4125 }, { "epoch": 0.36866441798646327, "grad_norm": 0.43171730637550354, "learning_rate": 7.281870840826912e-05, "loss": 0.9214, "step": 4126 }, { "epoch": 0.3687537695177251, "grad_norm": 0.4195806086063385, "learning_rate": 7.280583158302421e-05, "loss": 0.9684, "step": 4127 }, { "epoch": 0.368843121048987, "grad_norm": 0.4562622606754303, "learning_rate": 7.279295284754782e-05, "loss": 0.9569, "step": 4128 }, { "epoch": 0.36893247258024886, "grad_norm": 0.5500267148017883, "learning_rate": 7.278007220291866e-05, "loss": 1.0506, "step": 4129 }, { "epoch": 0.3690218241115107, "grad_norm": 0.5012460947036743, "learning_rate": 7.276718965021563e-05, "loss": 0.9673, "step": 4130 }, { "epoch": 0.36911117564277257, "grad_norm": 0.46749043464660645, "learning_rate": 7.27543051905178e-05, "loss": 0.9777, "step": 4131 }, { "epoch": 0.36920052717403445, "grad_norm": 0.49202150106430054, "learning_rate": 7.274141882490435e-05, "loss": 0.9335, "step": 4132 }, { "epoch": 0.36928987870529634, "grad_norm": 0.47457700967788696, "learning_rate": 7.27285305544547e-05, "loss": 0.9617, "step": 4133 }, { "epoch": 0.36937923023655816, "grad_norm": 0.46560418605804443, "learning_rate": 7.271564038024831e-05, "loss": 1.0336, "step": 4134 }, { "epoch": 0.36946858176782005, "grad_norm": 0.41487690806388855, "learning_rate": 7.270274830336493e-05, "loss": 0.9539, "step": 4135 }, { "epoch": 0.36955793329908193, "grad_norm": 0.4482000470161438, "learning_rate": 7.268985432488438e-05, "loss": 0.9841, "step": 4136 }, { "epoch": 0.36964728483034376, "grad_norm": 0.44116854667663574, "learning_rate": 7.267695844588668e-05, "loss": 0.9894, "step": 4137 }, { "epoch": 0.36973663636160564, "grad_norm": 0.3913993537425995, "learning_rate": 7.266406066745199e-05, "loss": 0.9203, "step": 4138 }, { "epoch": 0.3698259878928675, "grad_norm": 0.43621063232421875, "learning_rate": 7.265116099066063e-05, "loss": 1.0129, "step": 4139 }, { "epoch": 0.3699153394241294, "grad_norm": 0.5264103412628174, "learning_rate": 7.26382594165931e-05, "loss": 0.9673, "step": 4140 }, { "epoch": 0.37000469095539124, "grad_norm": 0.393413782119751, "learning_rate": 7.262535594633002e-05, "loss": 1.0037, "step": 4141 }, { "epoch": 0.3700940424866531, "grad_norm": 0.5121841430664062, "learning_rate": 7.261245058095223e-05, "loss": 0.8443, "step": 4142 }, { "epoch": 0.370183394017915, "grad_norm": 0.4309183359146118, "learning_rate": 7.259954332154066e-05, "loss": 1.0241, "step": 4143 }, { "epoch": 0.37027274554917683, "grad_norm": 0.3869653046131134, "learning_rate": 7.258663416917645e-05, "loss": 1.0111, "step": 4144 }, { "epoch": 0.3703620970804387, "grad_norm": 0.5161389112472534, "learning_rate": 7.257372312494088e-05, "loss": 1.0608, "step": 4145 }, { "epoch": 0.3704514486117006, "grad_norm": 0.48384639620780945, "learning_rate": 7.256081018991536e-05, "loss": 0.9671, "step": 4146 }, { "epoch": 0.3705408001429624, "grad_norm": 0.4092333912849426, "learning_rate": 7.254789536518151e-05, "loss": 1.0206, "step": 4147 }, { "epoch": 0.3706301516742243, "grad_norm": 0.4120895266532898, "learning_rate": 7.25349786518211e-05, "loss": 1.0183, "step": 4148 }, { "epoch": 0.3707195032054862, "grad_norm": 0.47029179334640503, "learning_rate": 7.2522060050916e-05, "loss": 0.9786, "step": 4149 }, { "epoch": 0.3708088547367481, "grad_norm": 0.38169971108436584, "learning_rate": 7.25091395635483e-05, "loss": 0.9691, "step": 4150 }, { "epoch": 0.3708982062680099, "grad_norm": 0.48329901695251465, "learning_rate": 7.249621719080025e-05, "loss": 0.9664, "step": 4151 }, { "epoch": 0.3709875577992718, "grad_norm": 0.4072484076023102, "learning_rate": 7.248329293375422e-05, "loss": 1.0322, "step": 4152 }, { "epoch": 0.37107690933053367, "grad_norm": 0.4380795955657959, "learning_rate": 7.247036679349274e-05, "loss": 0.9546, "step": 4153 }, { "epoch": 0.3711662608617955, "grad_norm": 0.515524685382843, "learning_rate": 7.245743877109852e-05, "loss": 0.9816, "step": 4154 }, { "epoch": 0.3712556123930574, "grad_norm": 0.42287132143974304, "learning_rate": 7.244450886765443e-05, "loss": 0.9431, "step": 4155 }, { "epoch": 0.37134496392431926, "grad_norm": 0.4340779483318329, "learning_rate": 7.243157708424348e-05, "loss": 0.9488, "step": 4156 }, { "epoch": 0.37143431545558114, "grad_norm": 0.524341344833374, "learning_rate": 7.241864342194886e-05, "loss": 0.9756, "step": 4157 }, { "epoch": 0.37152366698684297, "grad_norm": 0.6122943758964539, "learning_rate": 7.240570788185388e-05, "loss": 0.9382, "step": 4158 }, { "epoch": 0.37161301851810485, "grad_norm": 0.5012384653091431, "learning_rate": 7.239277046504202e-05, "loss": 0.9809, "step": 4159 }, { "epoch": 0.37170237004936674, "grad_norm": 0.3928990364074707, "learning_rate": 7.237983117259696e-05, "loss": 0.9839, "step": 4160 }, { "epoch": 0.37179172158062856, "grad_norm": 0.4313662350177765, "learning_rate": 7.236689000560248e-05, "loss": 1.0517, "step": 4161 }, { "epoch": 0.37188107311189045, "grad_norm": 0.3812869191169739, "learning_rate": 7.235394696514255e-05, "loss": 1.0109, "step": 4162 }, { "epoch": 0.37197042464315233, "grad_norm": 0.3870796263217926, "learning_rate": 7.23410020523013e-05, "loss": 1.0234, "step": 4163 }, { "epoch": 0.3720597761744142, "grad_norm": 0.4438193142414093, "learning_rate": 7.232805526816297e-05, "loss": 0.9647, "step": 4164 }, { "epoch": 0.37214912770567604, "grad_norm": 0.375704288482666, "learning_rate": 7.231510661381202e-05, "loss": 0.9969, "step": 4165 }, { "epoch": 0.3722384792369379, "grad_norm": 0.38540956377983093, "learning_rate": 7.230215609033301e-05, "loss": 1.0308, "step": 4166 }, { "epoch": 0.3723278307681998, "grad_norm": 0.3983640670776367, "learning_rate": 7.228920369881073e-05, "loss": 0.9823, "step": 4167 }, { "epoch": 0.37241718229946164, "grad_norm": 0.4259108901023865, "learning_rate": 7.227624944033006e-05, "loss": 0.9739, "step": 4168 }, { "epoch": 0.3725065338307235, "grad_norm": 0.4795960783958435, "learning_rate": 7.226329331597604e-05, "loss": 0.9372, "step": 4169 }, { "epoch": 0.3725958853619854, "grad_norm": 0.4027024209499359, "learning_rate": 7.225033532683388e-05, "loss": 0.9707, "step": 4170 }, { "epoch": 0.3726852368932473, "grad_norm": 0.42690253257751465, "learning_rate": 7.223737547398898e-05, "loss": 0.973, "step": 4171 }, { "epoch": 0.3727745884245091, "grad_norm": 0.42597901821136475, "learning_rate": 7.222441375852685e-05, "loss": 0.9487, "step": 4172 }, { "epoch": 0.372863939955771, "grad_norm": 0.43113502860069275, "learning_rate": 7.221145018153317e-05, "loss": 1.0139, "step": 4173 }, { "epoch": 0.3729532914870329, "grad_norm": 0.4594292938709259, "learning_rate": 7.219848474409378e-05, "loss": 1.0409, "step": 4174 }, { "epoch": 0.3730426430182947, "grad_norm": 0.39709535241127014, "learning_rate": 7.218551744729468e-05, "loss": 0.9839, "step": 4175 }, { "epoch": 0.3731319945495566, "grad_norm": 0.38931846618652344, "learning_rate": 7.217254829222201e-05, "loss": 0.9697, "step": 4176 }, { "epoch": 0.3732213460808185, "grad_norm": 0.3806284964084625, "learning_rate": 7.215957727996207e-05, "loss": 1.0805, "step": 4177 }, { "epoch": 0.3733106976120803, "grad_norm": 0.4208787977695465, "learning_rate": 7.214660441160134e-05, "loss": 0.9579, "step": 4178 }, { "epoch": 0.3734000491433422, "grad_norm": 0.4032423198223114, "learning_rate": 7.213362968822643e-05, "loss": 0.985, "step": 4179 }, { "epoch": 0.37348940067460407, "grad_norm": 0.4312584400177002, "learning_rate": 7.212065311092409e-05, "loss": 0.9404, "step": 4180 }, { "epoch": 0.37357875220586595, "grad_norm": 0.39697664976119995, "learning_rate": 7.210767468078126e-05, "loss": 0.9926, "step": 4181 }, { "epoch": 0.3736681037371278, "grad_norm": 0.38140472769737244, "learning_rate": 7.209469439888504e-05, "loss": 0.9736, "step": 4182 }, { "epoch": 0.37375745526838966, "grad_norm": 0.41029414534568787, "learning_rate": 7.208171226632264e-05, "loss": 1.0136, "step": 4183 }, { "epoch": 0.37384680679965154, "grad_norm": 0.44529950618743896, "learning_rate": 7.206872828418146e-05, "loss": 0.9878, "step": 4184 }, { "epoch": 0.37393615833091337, "grad_norm": 0.4840013086795807, "learning_rate": 7.205574245354907e-05, "loss": 0.9829, "step": 4185 }, { "epoch": 0.37402550986217525, "grad_norm": 0.41455405950546265, "learning_rate": 7.204275477551314e-05, "loss": 0.9755, "step": 4186 }, { "epoch": 0.37411486139343714, "grad_norm": 0.5246483087539673, "learning_rate": 7.202976525116154e-05, "loss": 0.9764, "step": 4187 }, { "epoch": 0.374204212924699, "grad_norm": 0.484429270029068, "learning_rate": 7.201677388158228e-05, "loss": 0.9466, "step": 4188 }, { "epoch": 0.37429356445596085, "grad_norm": 0.39618611335754395, "learning_rate": 7.200378066786352e-05, "loss": 1.0202, "step": 4189 }, { "epoch": 0.37438291598722273, "grad_norm": 0.4614309072494507, "learning_rate": 7.199078561109359e-05, "loss": 0.9347, "step": 4190 }, { "epoch": 0.3744722675184846, "grad_norm": 0.46450915932655334, "learning_rate": 7.197778871236096e-05, "loss": 0.9394, "step": 4191 }, { "epoch": 0.37456161904974644, "grad_norm": 0.37364086508750916, "learning_rate": 7.196478997275426e-05, "loss": 1.0168, "step": 4192 }, { "epoch": 0.3746509705810083, "grad_norm": 0.4108889400959015, "learning_rate": 7.195178939336228e-05, "loss": 0.9738, "step": 4193 }, { "epoch": 0.3747403221122702, "grad_norm": 0.43093374371528625, "learning_rate": 7.193878697527394e-05, "loss": 0.9225, "step": 4194 }, { "epoch": 0.3748296736435321, "grad_norm": 0.5023407340049744, "learning_rate": 7.192578271957833e-05, "loss": 0.9292, "step": 4195 }, { "epoch": 0.3749190251747939, "grad_norm": 0.42425450682640076, "learning_rate": 7.191277662736473e-05, "loss": 0.9801, "step": 4196 }, { "epoch": 0.3750083767060558, "grad_norm": 0.46366748213768005, "learning_rate": 7.189976869972248e-05, "loss": 0.9504, "step": 4197 }, { "epoch": 0.3750977282373177, "grad_norm": 0.4702743589878082, "learning_rate": 7.18867589377412e-05, "loss": 0.9922, "step": 4198 }, { "epoch": 0.3751870797685795, "grad_norm": 0.4722293019294739, "learning_rate": 7.187374734251054e-05, "loss": 1.0015, "step": 4199 }, { "epoch": 0.3752764312998414, "grad_norm": 0.4281826615333557, "learning_rate": 7.186073391512039e-05, "loss": 0.9522, "step": 4200 }, { "epoch": 0.3753657828311033, "grad_norm": 0.3947566747665405, "learning_rate": 7.184771865666076e-05, "loss": 1.0434, "step": 4201 }, { "epoch": 0.37545513436236516, "grad_norm": 0.43979522585868835, "learning_rate": 7.18347015682218e-05, "loss": 0.9935, "step": 4202 }, { "epoch": 0.375544485893627, "grad_norm": 0.41721320152282715, "learning_rate": 7.182168265089386e-05, "loss": 0.9861, "step": 4203 }, { "epoch": 0.3756338374248889, "grad_norm": 0.46523186564445496, "learning_rate": 7.18086619057674e-05, "loss": 0.9951, "step": 4204 }, { "epoch": 0.37572318895615076, "grad_norm": 0.43534207344055176, "learning_rate": 7.179563933393303e-05, "loss": 0.9288, "step": 4205 }, { "epoch": 0.3758125404874126, "grad_norm": 0.4193892776966095, "learning_rate": 7.178261493648154e-05, "loss": 1.0575, "step": 4206 }, { "epoch": 0.37590189201867447, "grad_norm": 0.48996466398239136, "learning_rate": 7.176958871450386e-05, "loss": 0.9262, "step": 4207 }, { "epoch": 0.37599124354993635, "grad_norm": 0.4287664294242859, "learning_rate": 7.175656066909109e-05, "loss": 1.031, "step": 4208 }, { "epoch": 0.3760805950811982, "grad_norm": 0.4364438056945801, "learning_rate": 7.174353080133445e-05, "loss": 1.0015, "step": 4209 }, { "epoch": 0.37616994661246006, "grad_norm": 0.4443535804748535, "learning_rate": 7.173049911232533e-05, "loss": 0.9699, "step": 4210 }, { "epoch": 0.37625929814372194, "grad_norm": 0.4485486149787903, "learning_rate": 7.171746560315529e-05, "loss": 0.996, "step": 4211 }, { "epoch": 0.3763486496749838, "grad_norm": 0.43986693024635315, "learning_rate": 7.1704430274916e-05, "loss": 0.9912, "step": 4212 }, { "epoch": 0.37643800120624565, "grad_norm": 0.42477118968963623, "learning_rate": 7.169139312869933e-05, "loss": 1.0281, "step": 4213 }, { "epoch": 0.37652735273750754, "grad_norm": 0.35374677181243896, "learning_rate": 7.167835416559727e-05, "loss": 1.0282, "step": 4214 }, { "epoch": 0.3766167042687694, "grad_norm": 0.5305443406105042, "learning_rate": 7.166531338670198e-05, "loss": 0.8789, "step": 4215 }, { "epoch": 0.37670605580003125, "grad_norm": 0.4482033848762512, "learning_rate": 7.165227079310575e-05, "loss": 0.9928, "step": 4216 }, { "epoch": 0.37679540733129313, "grad_norm": 0.4617825150489807, "learning_rate": 7.163922638590104e-05, "loss": 0.9478, "step": 4217 }, { "epoch": 0.376884758862555, "grad_norm": 0.4732501804828644, "learning_rate": 7.162618016618047e-05, "loss": 0.9714, "step": 4218 }, { "epoch": 0.3769741103938169, "grad_norm": 0.40239885449409485, "learning_rate": 7.161313213503679e-05, "loss": 0.9664, "step": 4219 }, { "epoch": 0.3770634619250787, "grad_norm": 0.4415636658668518, "learning_rate": 7.160008229356292e-05, "loss": 1.0054, "step": 4220 }, { "epoch": 0.3771528134563406, "grad_norm": 0.47628524899482727, "learning_rate": 7.158703064285192e-05, "loss": 0.9021, "step": 4221 }, { "epoch": 0.3772421649876025, "grad_norm": 0.3998652398586273, "learning_rate": 7.157397718399698e-05, "loss": 1.046, "step": 4222 }, { "epoch": 0.3773315165188643, "grad_norm": 0.3887675702571869, "learning_rate": 7.156092191809152e-05, "loss": 1.0419, "step": 4223 }, { "epoch": 0.3774208680501262, "grad_norm": 0.3833785951137543, "learning_rate": 7.154786484622901e-05, "loss": 0.9874, "step": 4224 }, { "epoch": 0.3775102195813881, "grad_norm": 0.4666437804698944, "learning_rate": 7.153480596950314e-05, "loss": 0.967, "step": 4225 }, { "epoch": 0.37759957111264997, "grad_norm": 0.37928861379623413, "learning_rate": 7.152174528900772e-05, "loss": 0.9859, "step": 4226 }, { "epoch": 0.3776889226439118, "grad_norm": 0.5285788178443909, "learning_rate": 7.150868280583674e-05, "loss": 0.9279, "step": 4227 }, { "epoch": 0.3777782741751737, "grad_norm": 0.4263918697834015, "learning_rate": 7.149561852108429e-05, "loss": 0.9441, "step": 4228 }, { "epoch": 0.37786762570643556, "grad_norm": 0.40622586011886597, "learning_rate": 7.148255243584467e-05, "loss": 0.9801, "step": 4229 }, { "epoch": 0.3779569772376974, "grad_norm": 0.3894538879394531, "learning_rate": 7.146948455121231e-05, "loss": 1.0106, "step": 4230 }, { "epoch": 0.37804632876895927, "grad_norm": 0.4317224621772766, "learning_rate": 7.145641486828175e-05, "loss": 0.941, "step": 4231 }, { "epoch": 0.37813568030022116, "grad_norm": 0.4164870083332062, "learning_rate": 7.144334338814774e-05, "loss": 0.9747, "step": 4232 }, { "epoch": 0.37822503183148304, "grad_norm": 0.5142220258712769, "learning_rate": 7.143027011190515e-05, "loss": 0.9172, "step": 4233 }, { "epoch": 0.37831438336274487, "grad_norm": 0.3964780569076538, "learning_rate": 7.141719504064902e-05, "loss": 0.963, "step": 4234 }, { "epoch": 0.37840373489400675, "grad_norm": 0.532341480255127, "learning_rate": 7.140411817547452e-05, "loss": 0.977, "step": 4235 }, { "epoch": 0.37849308642526863, "grad_norm": 0.39835068583488464, "learning_rate": 7.139103951747695e-05, "loss": 0.9591, "step": 4236 }, { "epoch": 0.37858243795653046, "grad_norm": 0.41592469811439514, "learning_rate": 7.137795906775182e-05, "loss": 0.9929, "step": 4237 }, { "epoch": 0.37867178948779234, "grad_norm": 0.43503326177597046, "learning_rate": 7.136487682739472e-05, "loss": 0.9984, "step": 4238 }, { "epoch": 0.3787611410190542, "grad_norm": 0.531616747379303, "learning_rate": 7.135179279750149e-05, "loss": 0.9221, "step": 4239 }, { "epoch": 0.37885049255031605, "grad_norm": 0.4244007170200348, "learning_rate": 7.1338706979168e-05, "loss": 0.9679, "step": 4240 }, { "epoch": 0.37893984408157794, "grad_norm": 0.4253380000591278, "learning_rate": 7.132561937349035e-05, "loss": 0.9562, "step": 4241 }, { "epoch": 0.3790291956128398, "grad_norm": 0.4492589235305786, "learning_rate": 7.131252998156476e-05, "loss": 0.9848, "step": 4242 }, { "epoch": 0.3791185471441017, "grad_norm": 0.41047170758247375, "learning_rate": 7.129943880448762e-05, "loss": 1.0768, "step": 4243 }, { "epoch": 0.37920789867536353, "grad_norm": 0.43270984292030334, "learning_rate": 7.128634584335545e-05, "loss": 0.9937, "step": 4244 }, { "epoch": 0.3792972502066254, "grad_norm": 0.45858004689216614, "learning_rate": 7.127325109926491e-05, "loss": 0.9881, "step": 4245 }, { "epoch": 0.3793866017378873, "grad_norm": 0.4487616717815399, "learning_rate": 7.126015457331281e-05, "loss": 1.0136, "step": 4246 }, { "epoch": 0.3794759532691491, "grad_norm": 0.41824638843536377, "learning_rate": 7.124705626659616e-05, "loss": 0.9809, "step": 4247 }, { "epoch": 0.379565304800411, "grad_norm": 0.4919416606426239, "learning_rate": 7.123395618021208e-05, "loss": 0.9974, "step": 4248 }, { "epoch": 0.3796546563316729, "grad_norm": 0.4910551905632019, "learning_rate": 7.122085431525785e-05, "loss": 1.0215, "step": 4249 }, { "epoch": 0.3797440078629348, "grad_norm": 0.46813416481018066, "learning_rate": 7.120775067283085e-05, "loss": 0.951, "step": 4250 }, { "epoch": 0.3798333593941966, "grad_norm": 0.4559837579727173, "learning_rate": 7.119464525402867e-05, "loss": 0.9815, "step": 4251 }, { "epoch": 0.3799227109254585, "grad_norm": 0.38314858078956604, "learning_rate": 7.118153805994902e-05, "loss": 1.0214, "step": 4252 }, { "epoch": 0.38001206245672037, "grad_norm": 0.4613505005836487, "learning_rate": 7.11684290916898e-05, "loss": 0.9771, "step": 4253 }, { "epoch": 0.3801014139879822, "grad_norm": 0.43904268741607666, "learning_rate": 7.1155318350349e-05, "loss": 0.9689, "step": 4254 }, { "epoch": 0.3801907655192441, "grad_norm": 0.4177344739437103, "learning_rate": 7.11422058370248e-05, "loss": 0.9386, "step": 4255 }, { "epoch": 0.38028011705050596, "grad_norm": 0.44413110613822937, "learning_rate": 7.112909155281549e-05, "loss": 0.9454, "step": 4256 }, { "epoch": 0.38036946858176784, "grad_norm": 0.4826868772506714, "learning_rate": 7.111597549881953e-05, "loss": 0.8906, "step": 4257 }, { "epoch": 0.38045882011302967, "grad_norm": 0.517841100692749, "learning_rate": 7.110285767613555e-05, "loss": 0.9733, "step": 4258 }, { "epoch": 0.38054817164429156, "grad_norm": 0.4552189111709595, "learning_rate": 7.108973808586231e-05, "loss": 0.969, "step": 4259 }, { "epoch": 0.38063752317555344, "grad_norm": 0.40824398398399353, "learning_rate": 7.107661672909871e-05, "loss": 0.9928, "step": 4260 }, { "epoch": 0.38072687470681527, "grad_norm": 0.45659610629081726, "learning_rate": 7.106349360694379e-05, "loss": 0.9898, "step": 4261 }, { "epoch": 0.38081622623807715, "grad_norm": 0.39422810077667236, "learning_rate": 7.105036872049675e-05, "loss": 1.005, "step": 4262 }, { "epoch": 0.38090557776933903, "grad_norm": 0.47162938117980957, "learning_rate": 7.103724207085696e-05, "loss": 0.995, "step": 4263 }, { "epoch": 0.3809949293006009, "grad_norm": 0.45578041672706604, "learning_rate": 7.10241136591239e-05, "loss": 0.9659, "step": 4264 }, { "epoch": 0.38108428083186274, "grad_norm": 0.35482344031333923, "learning_rate": 7.101098348639722e-05, "loss": 1.0144, "step": 4265 }, { "epoch": 0.3811736323631246, "grad_norm": 0.4190642237663269, "learning_rate": 7.099785155377672e-05, "loss": 0.9442, "step": 4266 }, { "epoch": 0.3812629838943865, "grad_norm": 0.4424975514411926, "learning_rate": 7.098471786236231e-05, "loss": 0.9564, "step": 4267 }, { "epoch": 0.38135233542564834, "grad_norm": 0.4281081259250641, "learning_rate": 7.097158241325411e-05, "loss": 0.9608, "step": 4268 }, { "epoch": 0.3814416869569102, "grad_norm": 0.46084991097450256, "learning_rate": 7.095844520755235e-05, "loss": 0.9865, "step": 4269 }, { "epoch": 0.3815310384881721, "grad_norm": 0.3826462924480438, "learning_rate": 7.094530624635741e-05, "loss": 0.978, "step": 4270 }, { "epoch": 0.38162039001943393, "grad_norm": 0.4165399372577667, "learning_rate": 7.093216553076977e-05, "loss": 0.9959, "step": 4271 }, { "epoch": 0.3817097415506958, "grad_norm": 0.4049665331840515, "learning_rate": 7.091902306189018e-05, "loss": 1.0591, "step": 4272 }, { "epoch": 0.3817990930819577, "grad_norm": 0.3788042664527893, "learning_rate": 7.090587884081943e-05, "loss": 1.0281, "step": 4273 }, { "epoch": 0.3818884446132196, "grad_norm": 0.436847060918808, "learning_rate": 7.089273286865849e-05, "loss": 0.9862, "step": 4274 }, { "epoch": 0.3819777961444814, "grad_norm": 0.4377835988998413, "learning_rate": 7.087958514650846e-05, "loss": 1.0327, "step": 4275 }, { "epoch": 0.3820671476757433, "grad_norm": 0.43927526473999023, "learning_rate": 7.08664356754706e-05, "loss": 0.9563, "step": 4276 }, { "epoch": 0.3821564992070052, "grad_norm": 0.4470997750759125, "learning_rate": 7.085328445664636e-05, "loss": 0.9565, "step": 4277 }, { "epoch": 0.382245850738267, "grad_norm": 0.42849406599998474, "learning_rate": 7.084013149113724e-05, "loss": 0.9785, "step": 4278 }, { "epoch": 0.3823352022695289, "grad_norm": 0.37449339032173157, "learning_rate": 7.0826976780045e-05, "loss": 0.9791, "step": 4279 }, { "epoch": 0.38242455380079077, "grad_norm": 0.39009079337120056, "learning_rate": 7.081382032447142e-05, "loss": 0.9007, "step": 4280 }, { "epoch": 0.38251390533205265, "grad_norm": 0.37804755568504333, "learning_rate": 7.080066212551854e-05, "loss": 1.0259, "step": 4281 }, { "epoch": 0.3826032568633145, "grad_norm": 0.4352472722530365, "learning_rate": 7.07875021842885e-05, "loss": 0.9648, "step": 4282 }, { "epoch": 0.38269260839457636, "grad_norm": 0.38885149359703064, "learning_rate": 7.077434050188356e-05, "loss": 1.0278, "step": 4283 }, { "epoch": 0.38278195992583824, "grad_norm": 0.3463941514492035, "learning_rate": 7.076117707940617e-05, "loss": 1.0115, "step": 4284 }, { "epoch": 0.38287131145710007, "grad_norm": 0.4116084575653076, "learning_rate": 7.07480119179589e-05, "loss": 0.9362, "step": 4285 }, { "epoch": 0.38296066298836196, "grad_norm": 0.44609686732292175, "learning_rate": 7.073484501864447e-05, "loss": 0.9525, "step": 4286 }, { "epoch": 0.38305001451962384, "grad_norm": 0.40134397149086, "learning_rate": 7.072167638256576e-05, "loss": 1.0264, "step": 4287 }, { "epoch": 0.3831393660508857, "grad_norm": 0.38840845227241516, "learning_rate": 7.070850601082577e-05, "loss": 0.9817, "step": 4288 }, { "epoch": 0.38322871758214755, "grad_norm": 0.43877261877059937, "learning_rate": 7.069533390452766e-05, "loss": 0.9695, "step": 4289 }, { "epoch": 0.38331806911340943, "grad_norm": 0.42478203773498535, "learning_rate": 7.068216006477476e-05, "loss": 0.9468, "step": 4290 }, { "epoch": 0.3834074206446713, "grad_norm": 0.5718162059783936, "learning_rate": 7.066898449267047e-05, "loss": 0.851, "step": 4291 }, { "epoch": 0.38349677217593314, "grad_norm": 0.5737486481666565, "learning_rate": 7.065580718931843e-05, "loss": 0.852, "step": 4292 }, { "epoch": 0.383586123707195, "grad_norm": 0.4374921917915344, "learning_rate": 7.064262815582238e-05, "loss": 0.8747, "step": 4293 }, { "epoch": 0.3836754752384569, "grad_norm": 0.47579288482666016, "learning_rate": 7.062944739328616e-05, "loss": 1.02, "step": 4294 }, { "epoch": 0.3837648267697188, "grad_norm": 0.4117971658706665, "learning_rate": 7.061626490281384e-05, "loss": 0.9578, "step": 4295 }, { "epoch": 0.3838541783009806, "grad_norm": 0.38990846276283264, "learning_rate": 7.060308068550959e-05, "loss": 0.9143, "step": 4296 }, { "epoch": 0.3839435298322425, "grad_norm": 0.45375731587409973, "learning_rate": 7.05898947424777e-05, "loss": 0.9272, "step": 4297 }, { "epoch": 0.3840328813635044, "grad_norm": 0.496006041765213, "learning_rate": 7.057670707482264e-05, "loss": 0.8945, "step": 4298 }, { "epoch": 0.3841222328947662, "grad_norm": 0.4508313536643982, "learning_rate": 7.056351768364905e-05, "loss": 1.0054, "step": 4299 }, { "epoch": 0.3842115844260281, "grad_norm": 0.406969279050827, "learning_rate": 7.055032657006166e-05, "loss": 0.967, "step": 4300 }, { "epoch": 0.38430093595729, "grad_norm": 0.432956725358963, "learning_rate": 7.053713373516538e-05, "loss": 0.9331, "step": 4301 }, { "epoch": 0.38439028748855186, "grad_norm": 0.5145943760871887, "learning_rate": 7.052393918006522e-05, "loss": 0.9295, "step": 4302 }, { "epoch": 0.3844796390198137, "grad_norm": 0.40598639845848083, "learning_rate": 7.05107429058664e-05, "loss": 0.99, "step": 4303 }, { "epoch": 0.3845689905510756, "grad_norm": 0.42250970005989075, "learning_rate": 7.049754491367421e-05, "loss": 0.9821, "step": 4304 }, { "epoch": 0.38465834208233746, "grad_norm": 0.48077642917633057, "learning_rate": 7.048434520459418e-05, "loss": 0.9702, "step": 4305 }, { "epoch": 0.3847476936135993, "grad_norm": 0.41635996103286743, "learning_rate": 7.047114377973188e-05, "loss": 0.986, "step": 4306 }, { "epoch": 0.38483704514486117, "grad_norm": 0.5170190930366516, "learning_rate": 7.045794064019306e-05, "loss": 0.9175, "step": 4307 }, { "epoch": 0.38492639667612305, "grad_norm": 0.41324326395988464, "learning_rate": 7.044473578708366e-05, "loss": 1.0153, "step": 4308 }, { "epoch": 0.3850157482073849, "grad_norm": 0.46481555700302124, "learning_rate": 7.043152922150972e-05, "loss": 0.9365, "step": 4309 }, { "epoch": 0.38510509973864676, "grad_norm": 0.5050130486488342, "learning_rate": 7.041832094457742e-05, "loss": 1.0131, "step": 4310 }, { "epoch": 0.38519445126990864, "grad_norm": 0.3722282648086548, "learning_rate": 7.04051109573931e-05, "loss": 0.9874, "step": 4311 }, { "epoch": 0.3852838028011705, "grad_norm": 0.4793398082256317, "learning_rate": 7.039189926106324e-05, "loss": 1.0063, "step": 4312 }, { "epoch": 0.38537315433243235, "grad_norm": 0.48876821994781494, "learning_rate": 7.037868585669443e-05, "loss": 0.9032, "step": 4313 }, { "epoch": 0.38546250586369424, "grad_norm": 0.43197745084762573, "learning_rate": 7.036547074539347e-05, "loss": 1.0087, "step": 4314 }, { "epoch": 0.3855518573949561, "grad_norm": 0.3676876127719879, "learning_rate": 7.035225392826727e-05, "loss": 1.0239, "step": 4315 }, { "epoch": 0.38564120892621795, "grad_norm": 0.39950066804885864, "learning_rate": 7.033903540642283e-05, "loss": 0.9731, "step": 4316 }, { "epoch": 0.38573056045747983, "grad_norm": 0.44849759340286255, "learning_rate": 7.032581518096741e-05, "loss": 0.9701, "step": 4317 }, { "epoch": 0.3858199119887417, "grad_norm": 0.4369259476661682, "learning_rate": 7.03125932530083e-05, "loss": 1.032, "step": 4318 }, { "epoch": 0.3859092635200036, "grad_norm": 0.40139544010162354, "learning_rate": 7.029936962365297e-05, "loss": 1.0624, "step": 4319 }, { "epoch": 0.3859986150512654, "grad_norm": 0.4485412836074829, "learning_rate": 7.028614429400908e-05, "loss": 1.0387, "step": 4320 }, { "epoch": 0.3860879665825273, "grad_norm": 0.44548967480659485, "learning_rate": 7.027291726518436e-05, "loss": 1.0029, "step": 4321 }, { "epoch": 0.3861773181137892, "grad_norm": 0.40692946314811707, "learning_rate": 7.025968853828674e-05, "loss": 0.959, "step": 4322 }, { "epoch": 0.386266669645051, "grad_norm": 0.4305645525455475, "learning_rate": 7.024645811442423e-05, "loss": 0.9873, "step": 4323 }, { "epoch": 0.3863560211763129, "grad_norm": 0.5114787817001343, "learning_rate": 7.023322599470505e-05, "loss": 0.8899, "step": 4324 }, { "epoch": 0.3864453727075748, "grad_norm": 0.41181930899620056, "learning_rate": 7.021999218023753e-05, "loss": 0.9755, "step": 4325 }, { "epoch": 0.38653472423883667, "grad_norm": 0.3804915249347687, "learning_rate": 7.020675667213014e-05, "loss": 0.9616, "step": 4326 }, { "epoch": 0.3866240757700985, "grad_norm": 0.41884955763816833, "learning_rate": 7.019351947149148e-05, "loss": 0.9835, "step": 4327 }, { "epoch": 0.3867134273013604, "grad_norm": 0.44132474064826965, "learning_rate": 7.018028057943032e-05, "loss": 1.0083, "step": 4328 }, { "epoch": 0.38680277883262226, "grad_norm": 0.3745604157447815, "learning_rate": 7.016703999705556e-05, "loss": 0.986, "step": 4329 }, { "epoch": 0.3868921303638841, "grad_norm": 0.39664706587791443, "learning_rate": 7.015379772547623e-05, "loss": 0.9742, "step": 4330 }, { "epoch": 0.386981481895146, "grad_norm": 0.510838508605957, "learning_rate": 7.014055376580151e-05, "loss": 1.0018, "step": 4331 }, { "epoch": 0.38707083342640786, "grad_norm": 0.5416032075881958, "learning_rate": 7.012730811914074e-05, "loss": 0.9375, "step": 4332 }, { "epoch": 0.38716018495766974, "grad_norm": 0.45843154191970825, "learning_rate": 7.011406078660336e-05, "loss": 0.9032, "step": 4333 }, { "epoch": 0.38724953648893157, "grad_norm": 0.4215903878211975, "learning_rate": 7.0100811769299e-05, "loss": 0.9499, "step": 4334 }, { "epoch": 0.38733888802019345, "grad_norm": 0.4728352725505829, "learning_rate": 7.008756106833739e-05, "loss": 0.9126, "step": 4335 }, { "epoch": 0.38742823955145533, "grad_norm": 0.4127133786678314, "learning_rate": 7.007430868482842e-05, "loss": 1.0198, "step": 4336 }, { "epoch": 0.38751759108271716, "grad_norm": 0.4411311745643616, "learning_rate": 7.006105461988212e-05, "loss": 0.9796, "step": 4337 }, { "epoch": 0.38760694261397904, "grad_norm": 0.4794873297214508, "learning_rate": 7.004779887460867e-05, "loss": 0.9494, "step": 4338 }, { "epoch": 0.3876962941452409, "grad_norm": 0.5127260088920593, "learning_rate": 7.003454145011836e-05, "loss": 0.9547, "step": 4339 }, { "epoch": 0.38778564567650275, "grad_norm": 0.3873635530471802, "learning_rate": 7.002128234752166e-05, "loss": 1.028, "step": 4340 }, { "epoch": 0.38787499720776464, "grad_norm": 0.4938105344772339, "learning_rate": 7.000802156792915e-05, "loss": 0.9775, "step": 4341 }, { "epoch": 0.3879643487390265, "grad_norm": 0.43845218420028687, "learning_rate": 6.999475911245156e-05, "loss": 0.945, "step": 4342 }, { "epoch": 0.3880537002702884, "grad_norm": 0.4035814106464386, "learning_rate": 6.998149498219977e-05, "loss": 1.0355, "step": 4343 }, { "epoch": 0.38814305180155023, "grad_norm": 0.4540978670120239, "learning_rate": 6.996822917828477e-05, "loss": 0.9927, "step": 4344 }, { "epoch": 0.3882324033328121, "grad_norm": 0.41308870911598206, "learning_rate": 6.995496170181775e-05, "loss": 0.963, "step": 4345 }, { "epoch": 0.388321754864074, "grad_norm": 0.40299081802368164, "learning_rate": 6.994169255390999e-05, "loss": 0.9774, "step": 4346 }, { "epoch": 0.3884111063953358, "grad_norm": 0.5228738784790039, "learning_rate": 6.99284217356729e-05, "loss": 0.9757, "step": 4347 }, { "epoch": 0.3885004579265977, "grad_norm": 0.4321485757827759, "learning_rate": 6.991514924821807e-05, "loss": 0.9918, "step": 4348 }, { "epoch": 0.3885898094578596, "grad_norm": 0.4189693331718445, "learning_rate": 6.990187509265721e-05, "loss": 0.9971, "step": 4349 }, { "epoch": 0.3886791609891215, "grad_norm": 0.5382431745529175, "learning_rate": 6.988859927010219e-05, "loss": 1.0251, "step": 4350 }, { "epoch": 0.3887685125203833, "grad_norm": 0.47325825691223145, "learning_rate": 6.987532178166496e-05, "loss": 0.9422, "step": 4351 }, { "epoch": 0.3888578640516452, "grad_norm": 0.441567987203598, "learning_rate": 6.986204262845768e-05, "loss": 0.9758, "step": 4352 }, { "epoch": 0.38894721558290707, "grad_norm": 0.44492974877357483, "learning_rate": 6.984876181159261e-05, "loss": 0.9484, "step": 4353 }, { "epoch": 0.3890365671141689, "grad_norm": 0.4059096872806549, "learning_rate": 6.983547933218218e-05, "loss": 1.0436, "step": 4354 }, { "epoch": 0.3891259186454308, "grad_norm": 0.4348817765712738, "learning_rate": 6.982219519133892e-05, "loss": 0.9641, "step": 4355 }, { "epoch": 0.38921527017669266, "grad_norm": 0.4612578749656677, "learning_rate": 6.980890939017551e-05, "loss": 0.9583, "step": 4356 }, { "epoch": 0.38930462170795455, "grad_norm": 0.4391173720359802, "learning_rate": 6.979562192980481e-05, "loss": 0.9592, "step": 4357 }, { "epoch": 0.3893939732392164, "grad_norm": 0.45531633496284485, "learning_rate": 6.978233281133976e-05, "loss": 0.9537, "step": 4358 }, { "epoch": 0.38948332477047826, "grad_norm": 0.39362677931785583, "learning_rate": 6.976904203589346e-05, "loss": 1.0051, "step": 4359 }, { "epoch": 0.38957267630174014, "grad_norm": 0.5158856511116028, "learning_rate": 6.975574960457919e-05, "loss": 1.0024, "step": 4360 }, { "epoch": 0.38966202783300197, "grad_norm": 0.5970863699913025, "learning_rate": 6.974245551851029e-05, "loss": 0.9359, "step": 4361 }, { "epoch": 0.38975137936426385, "grad_norm": 0.40095895528793335, "learning_rate": 6.972915977880031e-05, "loss": 1.0245, "step": 4362 }, { "epoch": 0.38984073089552573, "grad_norm": 0.46378135681152344, "learning_rate": 6.971586238656291e-05, "loss": 0.9918, "step": 4363 }, { "epoch": 0.3899300824267876, "grad_norm": 0.5374437570571899, "learning_rate": 6.970256334291187e-05, "loss": 1.0324, "step": 4364 }, { "epoch": 0.39001943395804944, "grad_norm": 0.5034531950950623, "learning_rate": 6.968926264896114e-05, "loss": 0.8832, "step": 4365 }, { "epoch": 0.3901087854893113, "grad_norm": 0.3909834921360016, "learning_rate": 6.967596030582478e-05, "loss": 0.9664, "step": 4366 }, { "epoch": 0.3901981370205732, "grad_norm": 0.4428032338619232, "learning_rate": 6.966265631461703e-05, "loss": 0.9607, "step": 4367 }, { "epoch": 0.39028748855183504, "grad_norm": 0.6216595768928528, "learning_rate": 6.96493506764522e-05, "loss": 0.9773, "step": 4368 }, { "epoch": 0.3903768400830969, "grad_norm": 0.44318845868110657, "learning_rate": 6.963604339244481e-05, "loss": 0.9698, "step": 4369 }, { "epoch": 0.3904661916143588, "grad_norm": 0.4492654800415039, "learning_rate": 6.96227344637095e-05, "loss": 0.9126, "step": 4370 }, { "epoch": 0.39055554314562063, "grad_norm": 0.46567076444625854, "learning_rate": 6.960942389136101e-05, "loss": 0.9868, "step": 4371 }, { "epoch": 0.3906448946768825, "grad_norm": 0.4394150674343109, "learning_rate": 6.959611167651423e-05, "loss": 0.9878, "step": 4372 }, { "epoch": 0.3907342462081444, "grad_norm": 0.44331878423690796, "learning_rate": 6.958279782028423e-05, "loss": 0.9312, "step": 4373 }, { "epoch": 0.3908235977394063, "grad_norm": 0.4242664575576782, "learning_rate": 6.956948232378617e-05, "loss": 1.0962, "step": 4374 }, { "epoch": 0.3909129492706681, "grad_norm": 0.44841963052749634, "learning_rate": 6.955616518813538e-05, "loss": 1.0338, "step": 4375 }, { "epoch": 0.39100230080193, "grad_norm": 0.40618273615837097, "learning_rate": 6.95428464144473e-05, "loss": 0.9697, "step": 4376 }, { "epoch": 0.3910916523331919, "grad_norm": 0.41985857486724854, "learning_rate": 6.952952600383752e-05, "loss": 0.9683, "step": 4377 }, { "epoch": 0.3911810038644537, "grad_norm": 0.5133029222488403, "learning_rate": 6.951620395742176e-05, "loss": 0.953, "step": 4378 }, { "epoch": 0.3912703553957156, "grad_norm": 0.42223209142684937, "learning_rate": 6.950288027631588e-05, "loss": 1.063, "step": 4379 }, { "epoch": 0.39135970692697747, "grad_norm": 0.4248124659061432, "learning_rate": 6.948955496163593e-05, "loss": 1.0083, "step": 4380 }, { "epoch": 0.39144905845823935, "grad_norm": 0.389886736869812, "learning_rate": 6.947622801449799e-05, "loss": 1.0032, "step": 4381 }, { "epoch": 0.3915384099895012, "grad_norm": 0.4102005660533905, "learning_rate": 6.946289943601833e-05, "loss": 0.9879, "step": 4382 }, { "epoch": 0.39162776152076306, "grad_norm": 0.4157712757587433, "learning_rate": 6.94495692273134e-05, "loss": 0.9817, "step": 4383 }, { "epoch": 0.39171711305202495, "grad_norm": 0.42993542551994324, "learning_rate": 6.943623738949973e-05, "loss": 1.0497, "step": 4384 }, { "epoch": 0.3918064645832868, "grad_norm": 0.46929094195365906, "learning_rate": 6.942290392369401e-05, "loss": 0.945, "step": 4385 }, { "epoch": 0.39189581611454866, "grad_norm": 0.41299015283584595, "learning_rate": 6.940956883101304e-05, "loss": 0.9762, "step": 4386 }, { "epoch": 0.39198516764581054, "grad_norm": 0.4164472222328186, "learning_rate": 6.93962321125738e-05, "loss": 0.8936, "step": 4387 }, { "epoch": 0.3920745191770724, "grad_norm": 0.5273587107658386, "learning_rate": 6.938289376949336e-05, "loss": 0.9785, "step": 4388 }, { "epoch": 0.39216387070833425, "grad_norm": 0.5239537954330444, "learning_rate": 6.936955380288897e-05, "loss": 0.9617, "step": 4389 }, { "epoch": 0.39225322223959613, "grad_norm": 0.3496021628379822, "learning_rate": 6.935621221387797e-05, "loss": 1.0419, "step": 4390 }, { "epoch": 0.392342573770858, "grad_norm": 0.5209729075431824, "learning_rate": 6.934286900357789e-05, "loss": 0.9904, "step": 4391 }, { "epoch": 0.39243192530211984, "grad_norm": 0.4754767119884491, "learning_rate": 6.932952417310634e-05, "loss": 0.9543, "step": 4392 }, { "epoch": 0.3925212768333817, "grad_norm": 0.4421217739582062, "learning_rate": 6.931617772358112e-05, "loss": 0.9329, "step": 4393 }, { "epoch": 0.3926106283646436, "grad_norm": 0.40273427963256836, "learning_rate": 6.93028296561201e-05, "loss": 0.9904, "step": 4394 }, { "epoch": 0.3926999798959055, "grad_norm": 0.4427558183670044, "learning_rate": 6.928947997184134e-05, "loss": 0.9429, "step": 4395 }, { "epoch": 0.3927893314271673, "grad_norm": 0.46975377202033997, "learning_rate": 6.927612867186305e-05, "loss": 0.9122, "step": 4396 }, { "epoch": 0.3928786829584292, "grad_norm": 0.3829523026943207, "learning_rate": 6.926277575730349e-05, "loss": 0.9974, "step": 4397 }, { "epoch": 0.3929680344896911, "grad_norm": 0.5063918828964233, "learning_rate": 6.924942122928115e-05, "loss": 0.9954, "step": 4398 }, { "epoch": 0.3930573860209529, "grad_norm": 0.4212724566459656, "learning_rate": 6.923606508891459e-05, "loss": 1.0462, "step": 4399 }, { "epoch": 0.3931467375522148, "grad_norm": 0.44579607248306274, "learning_rate": 6.922270733732254e-05, "loss": 0.9423, "step": 4400 }, { "epoch": 0.3932360890834767, "grad_norm": 0.45305055379867554, "learning_rate": 6.920934797562385e-05, "loss": 0.9498, "step": 4401 }, { "epoch": 0.3933254406147385, "grad_norm": 0.39803916215896606, "learning_rate": 6.91959870049375e-05, "loss": 0.9772, "step": 4402 }, { "epoch": 0.3934147921460004, "grad_norm": 0.5116259455680847, "learning_rate": 6.918262442638263e-05, "loss": 0.9202, "step": 4403 }, { "epoch": 0.3935041436772623, "grad_norm": 0.39163100719451904, "learning_rate": 6.91692602410785e-05, "loss": 0.9933, "step": 4404 }, { "epoch": 0.39359349520852416, "grad_norm": 0.386976420879364, "learning_rate": 6.915589445014448e-05, "loss": 1.006, "step": 4405 }, { "epoch": 0.393682846739786, "grad_norm": 0.4204871356487274, "learning_rate": 6.914252705470013e-05, "loss": 0.9525, "step": 4406 }, { "epoch": 0.39377219827104787, "grad_norm": 0.5166035294532776, "learning_rate": 6.912915805586509e-05, "loss": 0.8921, "step": 4407 }, { "epoch": 0.39386154980230975, "grad_norm": 0.4778018891811371, "learning_rate": 6.911578745475915e-05, "loss": 0.9687, "step": 4408 }, { "epoch": 0.3939509013335716, "grad_norm": 0.4855932295322418, "learning_rate": 6.910241525250225e-05, "loss": 0.9592, "step": 4409 }, { "epoch": 0.39404025286483346, "grad_norm": 0.4324698746204376, "learning_rate": 6.908904145021447e-05, "loss": 0.9615, "step": 4410 }, { "epoch": 0.39412960439609535, "grad_norm": 0.41898903250694275, "learning_rate": 6.907566604901599e-05, "loss": 0.9127, "step": 4411 }, { "epoch": 0.39421895592735723, "grad_norm": 0.44606366753578186, "learning_rate": 6.906228905002714e-05, "loss": 0.9715, "step": 4412 }, { "epoch": 0.39430830745861906, "grad_norm": 0.4419662356376648, "learning_rate": 6.90489104543684e-05, "loss": 0.9972, "step": 4413 }, { "epoch": 0.39439765898988094, "grad_norm": 0.43655163049697876, "learning_rate": 6.903553026316036e-05, "loss": 0.9968, "step": 4414 }, { "epoch": 0.3944870105211428, "grad_norm": 0.41895565390586853, "learning_rate": 6.902214847752375e-05, "loss": 0.9499, "step": 4415 }, { "epoch": 0.39457636205240465, "grad_norm": 0.403507798910141, "learning_rate": 6.900876509857945e-05, "loss": 0.9934, "step": 4416 }, { "epoch": 0.39466571358366653, "grad_norm": 0.4834029972553253, "learning_rate": 6.899538012744847e-05, "loss": 0.9242, "step": 4417 }, { "epoch": 0.3947550651149284, "grad_norm": 0.46136564016342163, "learning_rate": 6.89819935652519e-05, "loss": 0.9524, "step": 4418 }, { "epoch": 0.3948444166461903, "grad_norm": 0.4441271424293518, "learning_rate": 6.896860541311105e-05, "loss": 0.9916, "step": 4419 }, { "epoch": 0.3949337681774521, "grad_norm": 0.5411950349807739, "learning_rate": 6.89552156721473e-05, "loss": 0.8352, "step": 4420 }, { "epoch": 0.395023119708714, "grad_norm": 0.43596935272216797, "learning_rate": 6.89418243434822e-05, "loss": 0.9776, "step": 4421 }, { "epoch": 0.3951124712399759, "grad_norm": 0.4356800317764282, "learning_rate": 6.892843142823739e-05, "loss": 0.9779, "step": 4422 }, { "epoch": 0.3952018227712377, "grad_norm": 0.4590302109718323, "learning_rate": 6.891503692753469e-05, "loss": 1.0105, "step": 4423 }, { "epoch": 0.3952911743024996, "grad_norm": 0.3544255793094635, "learning_rate": 6.890164084249602e-05, "loss": 1.0239, "step": 4424 }, { "epoch": 0.3953805258337615, "grad_norm": 0.4187402129173279, "learning_rate": 6.888824317424347e-05, "loss": 1.0746, "step": 4425 }, { "epoch": 0.39546987736502337, "grad_norm": 0.5059707760810852, "learning_rate": 6.887484392389922e-05, "loss": 0.7944, "step": 4426 }, { "epoch": 0.3955592288962852, "grad_norm": 0.4193587601184845, "learning_rate": 6.88614430925856e-05, "loss": 1.0126, "step": 4427 }, { "epoch": 0.3956485804275471, "grad_norm": 0.4689008891582489, "learning_rate": 6.884804068142505e-05, "loss": 0.9036, "step": 4428 }, { "epoch": 0.39573793195880896, "grad_norm": 0.4160483479499817, "learning_rate": 6.88346366915402e-05, "loss": 0.9379, "step": 4429 }, { "epoch": 0.3958272834900708, "grad_norm": 0.4379422068595886, "learning_rate": 6.882123112405376e-05, "loss": 0.9573, "step": 4430 }, { "epoch": 0.3959166350213327, "grad_norm": 0.4520152509212494, "learning_rate": 6.880782398008862e-05, "loss": 0.9565, "step": 4431 }, { "epoch": 0.39600598655259456, "grad_norm": 0.4579842686653137, "learning_rate": 6.879441526076771e-05, "loss": 0.9489, "step": 4432 }, { "epoch": 0.3960953380838564, "grad_norm": 0.49286845326423645, "learning_rate": 6.878100496721423e-05, "loss": 0.9866, "step": 4433 }, { "epoch": 0.39618468961511827, "grad_norm": 0.39000403881073, "learning_rate": 6.876759310055135e-05, "loss": 1.0161, "step": 4434 }, { "epoch": 0.39627404114638015, "grad_norm": 0.3937504291534424, "learning_rate": 6.875417966190251e-05, "loss": 0.975, "step": 4435 }, { "epoch": 0.39636339267764203, "grad_norm": 0.504807710647583, "learning_rate": 6.874076465239123e-05, "loss": 0.8956, "step": 4436 }, { "epoch": 0.39645274420890386, "grad_norm": 0.38329121470451355, "learning_rate": 6.872734807314115e-05, "loss": 1.0308, "step": 4437 }, { "epoch": 0.39654209574016575, "grad_norm": 0.4224548637866974, "learning_rate": 6.871392992527603e-05, "loss": 0.9944, "step": 4438 }, { "epoch": 0.39663144727142763, "grad_norm": 0.44873788952827454, "learning_rate": 6.870051020991981e-05, "loss": 1.0696, "step": 4439 }, { "epoch": 0.39672079880268946, "grad_norm": 0.49590742588043213, "learning_rate": 6.868708892819653e-05, "loss": 0.9732, "step": 4440 }, { "epoch": 0.39681015033395134, "grad_norm": 0.42665350437164307, "learning_rate": 6.867366608123038e-05, "loss": 0.9137, "step": 4441 }, { "epoch": 0.3968995018652132, "grad_norm": 0.3908413052558899, "learning_rate": 6.866024167014562e-05, "loss": 1.0205, "step": 4442 }, { "epoch": 0.3969888533964751, "grad_norm": 0.4211508631706238, "learning_rate": 6.864681569606673e-05, "loss": 1.0103, "step": 4443 }, { "epoch": 0.39707820492773693, "grad_norm": 0.4399520754814148, "learning_rate": 6.863338816011826e-05, "loss": 1.0127, "step": 4444 }, { "epoch": 0.3971675564589988, "grad_norm": 0.37878918647766113, "learning_rate": 6.861995906342491e-05, "loss": 0.9763, "step": 4445 }, { "epoch": 0.3972569079902607, "grad_norm": 0.47828400135040283, "learning_rate": 6.860652840711154e-05, "loss": 0.955, "step": 4446 }, { "epoch": 0.3973462595215225, "grad_norm": 0.45073986053466797, "learning_rate": 6.859309619230305e-05, "loss": 0.9025, "step": 4447 }, { "epoch": 0.3974356110527844, "grad_norm": 0.38527220487594604, "learning_rate": 6.85796624201246e-05, "loss": 0.9831, "step": 4448 }, { "epoch": 0.3975249625840463, "grad_norm": 0.4932858347892761, "learning_rate": 6.856622709170134e-05, "loss": 0.9358, "step": 4449 }, { "epoch": 0.3976143141153082, "grad_norm": 0.5208497047424316, "learning_rate": 6.855279020815868e-05, "loss": 0.9976, "step": 4450 }, { "epoch": 0.39770366564657, "grad_norm": 0.40091952681541443, "learning_rate": 6.853935177062209e-05, "loss": 0.9934, "step": 4451 }, { "epoch": 0.3977930171778319, "grad_norm": 0.4903776943683624, "learning_rate": 6.852591178021716e-05, "loss": 0.9286, "step": 4452 }, { "epoch": 0.39788236870909377, "grad_norm": 0.45064613223075867, "learning_rate": 6.851247023806964e-05, "loss": 0.9525, "step": 4453 }, { "epoch": 0.3979717202403556, "grad_norm": 0.49910131096839905, "learning_rate": 6.849902714530542e-05, "loss": 0.971, "step": 4454 }, { "epoch": 0.3980610717716175, "grad_norm": 0.4719197452068329, "learning_rate": 6.84855825030505e-05, "loss": 0.9095, "step": 4455 }, { "epoch": 0.39815042330287936, "grad_norm": 0.4351549744606018, "learning_rate": 6.847213631243099e-05, "loss": 0.8641, "step": 4456 }, { "epoch": 0.39823977483414125, "grad_norm": 0.37931522727012634, "learning_rate": 6.845868857457316e-05, "loss": 0.9773, "step": 4457 }, { "epoch": 0.3983291263654031, "grad_norm": 0.49004626274108887, "learning_rate": 6.844523929060343e-05, "loss": 1.012, "step": 4458 }, { "epoch": 0.39841847789666496, "grad_norm": 0.36821407079696655, "learning_rate": 6.843178846164826e-05, "loss": 1.0009, "step": 4459 }, { "epoch": 0.39850782942792684, "grad_norm": 0.42328307032585144, "learning_rate": 6.841833608883437e-05, "loss": 0.9568, "step": 4460 }, { "epoch": 0.39859718095918867, "grad_norm": 0.4346807897090912, "learning_rate": 6.840488217328848e-05, "loss": 1.0407, "step": 4461 }, { "epoch": 0.39868653249045055, "grad_norm": 0.35460466146469116, "learning_rate": 6.839142671613757e-05, "loss": 1.0353, "step": 4462 }, { "epoch": 0.39877588402171243, "grad_norm": 0.43105101585388184, "learning_rate": 6.837796971850859e-05, "loss": 1.0479, "step": 4463 }, { "epoch": 0.39886523555297426, "grad_norm": 0.43495675921440125, "learning_rate": 6.836451118152877e-05, "loss": 0.9813, "step": 4464 }, { "epoch": 0.39895458708423615, "grad_norm": 0.4951534867286682, "learning_rate": 6.835105110632539e-05, "loss": 0.9718, "step": 4465 }, { "epoch": 0.39904393861549803, "grad_norm": 0.48780739307403564, "learning_rate": 6.833758949402587e-05, "loss": 0.9653, "step": 4466 }, { "epoch": 0.3991332901467599, "grad_norm": 0.3795831799507141, "learning_rate": 6.832412634575774e-05, "loss": 1.0119, "step": 4467 }, { "epoch": 0.39922264167802174, "grad_norm": 0.36188191175460815, "learning_rate": 6.831066166264874e-05, "loss": 0.979, "step": 4468 }, { "epoch": 0.3993119932092836, "grad_norm": 0.4243720769882202, "learning_rate": 6.829719544582665e-05, "loss": 1.0764, "step": 4469 }, { "epoch": 0.3994013447405455, "grad_norm": 0.3986402750015259, "learning_rate": 6.828372769641938e-05, "loss": 1.0687, "step": 4470 }, { "epoch": 0.39949069627180733, "grad_norm": 0.48252764344215393, "learning_rate": 6.827025841555504e-05, "loss": 0.9346, "step": 4471 }, { "epoch": 0.3995800478030692, "grad_norm": 0.41776660084724426, "learning_rate": 6.825678760436182e-05, "loss": 0.9995, "step": 4472 }, { "epoch": 0.3996693993343311, "grad_norm": 0.48631688952445984, "learning_rate": 6.824331526396801e-05, "loss": 1.0179, "step": 4473 }, { "epoch": 0.399758750865593, "grad_norm": 0.39176318049430847, "learning_rate": 6.82298413955021e-05, "loss": 1.0147, "step": 4474 }, { "epoch": 0.3998481023968548, "grad_norm": 0.43048596382141113, "learning_rate": 6.821636600009266e-05, "loss": 0.9092, "step": 4475 }, { "epoch": 0.3999374539281167, "grad_norm": 0.445742130279541, "learning_rate": 6.82028890788684e-05, "loss": 0.9472, "step": 4476 }, { "epoch": 0.4000268054593786, "grad_norm": 0.41645416617393494, "learning_rate": 6.818941063295815e-05, "loss": 0.9294, "step": 4477 }, { "epoch": 0.4001161569906404, "grad_norm": 0.5943310260772705, "learning_rate": 6.817593066349086e-05, "loss": 0.9001, "step": 4478 }, { "epoch": 0.4002055085219023, "grad_norm": 0.39284154772758484, "learning_rate": 6.816244917159564e-05, "loss": 0.9755, "step": 4479 }, { "epoch": 0.40029486005316417, "grad_norm": 0.41957008838653564, "learning_rate": 6.814896615840171e-05, "loss": 0.9493, "step": 4480 }, { "epoch": 0.40038421158442605, "grad_norm": 0.4775201082229614, "learning_rate": 6.81354816250384e-05, "loss": 1.0124, "step": 4481 }, { "epoch": 0.4004735631156879, "grad_norm": 0.4292440116405487, "learning_rate": 6.812199557263522e-05, "loss": 0.9147, "step": 4482 }, { "epoch": 0.40056291464694976, "grad_norm": 0.4357702434062958, "learning_rate": 6.810850800232172e-05, "loss": 0.9812, "step": 4483 }, { "epoch": 0.40065226617821165, "grad_norm": 0.525195837020874, "learning_rate": 6.809501891522765e-05, "loss": 0.9529, "step": 4484 }, { "epoch": 0.4007416177094735, "grad_norm": 0.42463192343711853, "learning_rate": 6.808152831248287e-05, "loss": 0.9269, "step": 4485 }, { "epoch": 0.40083096924073536, "grad_norm": 0.4291436970233917, "learning_rate": 6.806803619521737e-05, "loss": 1.0478, "step": 4486 }, { "epoch": 0.40092032077199724, "grad_norm": 0.4439351260662079, "learning_rate": 6.805454256456125e-05, "loss": 0.9217, "step": 4487 }, { "epoch": 0.4010096723032591, "grad_norm": 0.5323197245597839, "learning_rate": 6.804104742164472e-05, "loss": 0.977, "step": 4488 }, { "epoch": 0.40109902383452095, "grad_norm": 0.40645474195480347, "learning_rate": 6.802755076759819e-05, "loss": 0.9972, "step": 4489 }, { "epoch": 0.40118837536578283, "grad_norm": 0.4209262728691101, "learning_rate": 6.80140526035521e-05, "loss": 0.9412, "step": 4490 }, { "epoch": 0.4012777268970447, "grad_norm": 0.504423201084137, "learning_rate": 6.80005529306371e-05, "loss": 0.9029, "step": 4491 }, { "epoch": 0.40136707842830655, "grad_norm": 0.4125538170337677, "learning_rate": 6.798705174998392e-05, "loss": 0.9586, "step": 4492 }, { "epoch": 0.40145642995956843, "grad_norm": 0.36721163988113403, "learning_rate": 6.797354906272342e-05, "loss": 1.0099, "step": 4493 }, { "epoch": 0.4015457814908303, "grad_norm": 0.4413255453109741, "learning_rate": 6.796004486998661e-05, "loss": 0.9163, "step": 4494 }, { "epoch": 0.40163513302209214, "grad_norm": 0.44812944531440735, "learning_rate": 6.79465391729046e-05, "loss": 0.967, "step": 4495 }, { "epoch": 0.401724484553354, "grad_norm": 0.38190940022468567, "learning_rate": 6.793303197260864e-05, "loss": 0.9877, "step": 4496 }, { "epoch": 0.4018138360846159, "grad_norm": 0.44277167320251465, "learning_rate": 6.79195232702301e-05, "loss": 0.956, "step": 4497 }, { "epoch": 0.4019031876158778, "grad_norm": 0.41372618079185486, "learning_rate": 6.790601306690048e-05, "loss": 1.003, "step": 4498 }, { "epoch": 0.4019925391471396, "grad_norm": 0.39554059505462646, "learning_rate": 6.78925013637514e-05, "loss": 1.0137, "step": 4499 }, { "epoch": 0.4020818906784015, "grad_norm": 0.3876018226146698, "learning_rate": 6.78789881619146e-05, "loss": 0.964, "step": 4500 }, { "epoch": 0.4021712422096634, "grad_norm": 0.5051389932632446, "learning_rate": 6.786547346252198e-05, "loss": 0.9447, "step": 4501 }, { "epoch": 0.4022605937409252, "grad_norm": 0.49276506900787354, "learning_rate": 6.785195726670552e-05, "loss": 0.949, "step": 4502 }, { "epoch": 0.4023499452721871, "grad_norm": 0.5032357573509216, "learning_rate": 6.783843957559734e-05, "loss": 0.9475, "step": 4503 }, { "epoch": 0.402439296803449, "grad_norm": 0.42340540885925293, "learning_rate": 6.782492039032971e-05, "loss": 0.9155, "step": 4504 }, { "epoch": 0.40252864833471086, "grad_norm": 0.4485189914703369, "learning_rate": 6.7811399712035e-05, "loss": 0.8825, "step": 4505 }, { "epoch": 0.4026179998659727, "grad_norm": 0.4616818130016327, "learning_rate": 6.779787754184571e-05, "loss": 0.9936, "step": 4506 }, { "epoch": 0.40270735139723457, "grad_norm": 0.44533130526542664, "learning_rate": 6.778435388089446e-05, "loss": 0.9267, "step": 4507 }, { "epoch": 0.40279670292849645, "grad_norm": 0.4395417869091034, "learning_rate": 6.777082873031401e-05, "loss": 0.9568, "step": 4508 }, { "epoch": 0.4028860544597583, "grad_norm": 0.44981324672698975, "learning_rate": 6.775730209123722e-05, "loss": 0.9014, "step": 4509 }, { "epoch": 0.40297540599102016, "grad_norm": 0.4732804596424103, "learning_rate": 6.77437739647971e-05, "loss": 0.9897, "step": 4510 }, { "epoch": 0.40306475752228205, "grad_norm": 0.4026157557964325, "learning_rate": 6.773024435212678e-05, "loss": 1.0569, "step": 4511 }, { "epoch": 0.40315410905354393, "grad_norm": 0.39510583877563477, "learning_rate": 6.771671325435952e-05, "loss": 0.9722, "step": 4512 }, { "epoch": 0.40324346058480576, "grad_norm": 0.4581720232963562, "learning_rate": 6.770318067262866e-05, "loss": 0.9051, "step": 4513 }, { "epoch": 0.40333281211606764, "grad_norm": 0.5355126261711121, "learning_rate": 6.768964660806772e-05, "loss": 0.9467, "step": 4514 }, { "epoch": 0.4034221636473295, "grad_norm": 0.38436293601989746, "learning_rate": 6.767611106181031e-05, "loss": 0.9994, "step": 4515 }, { "epoch": 0.40351151517859135, "grad_norm": 0.4458340108394623, "learning_rate": 6.766257403499019e-05, "loss": 0.9685, "step": 4516 }, { "epoch": 0.40360086670985323, "grad_norm": 0.5055366158485413, "learning_rate": 6.764903552874125e-05, "loss": 0.9154, "step": 4517 }, { "epoch": 0.4036902182411151, "grad_norm": 0.4424796998500824, "learning_rate": 6.763549554419743e-05, "loss": 0.9438, "step": 4518 }, { "epoch": 0.403779569772377, "grad_norm": 0.4608469605445862, "learning_rate": 6.762195408249288e-05, "loss": 0.9699, "step": 4519 }, { "epoch": 0.40386892130363883, "grad_norm": 0.4548738896846771, "learning_rate": 6.760841114476185e-05, "loss": 1.0052, "step": 4520 }, { "epoch": 0.4039582728349007, "grad_norm": 0.5669234395027161, "learning_rate": 6.759486673213869e-05, "loss": 1.0287, "step": 4521 }, { "epoch": 0.4040476243661626, "grad_norm": 0.4517204165458679, "learning_rate": 6.75813208457579e-05, "loss": 0.909, "step": 4522 }, { "epoch": 0.4041369758974244, "grad_norm": 0.5192089080810547, "learning_rate": 6.756777348675407e-05, "loss": 0.9845, "step": 4523 }, { "epoch": 0.4042263274286863, "grad_norm": 0.4186631739139557, "learning_rate": 6.755422465626196e-05, "loss": 0.9981, "step": 4524 }, { "epoch": 0.4043156789599482, "grad_norm": 0.4013117551803589, "learning_rate": 6.754067435541642e-05, "loss": 0.9479, "step": 4525 }, { "epoch": 0.40440503049121, "grad_norm": 0.4642598330974579, "learning_rate": 6.752712258535244e-05, "loss": 0.9661, "step": 4526 }, { "epoch": 0.4044943820224719, "grad_norm": 0.5033729672431946, "learning_rate": 6.751356934720511e-05, "loss": 0.8962, "step": 4527 }, { "epoch": 0.4045837335537338, "grad_norm": 0.4341753125190735, "learning_rate": 6.750001464210967e-05, "loss": 0.976, "step": 4528 }, { "epoch": 0.40467308508499567, "grad_norm": 0.5763057470321655, "learning_rate": 6.748645847120146e-05, "loss": 0.9349, "step": 4529 }, { "epoch": 0.4047624366162575, "grad_norm": 0.5385793447494507, "learning_rate": 6.747290083561596e-05, "loss": 0.9334, "step": 4530 }, { "epoch": 0.4048517881475194, "grad_norm": 0.42370885610580444, "learning_rate": 6.745934173648876e-05, "loss": 0.9816, "step": 4531 }, { "epoch": 0.40494113967878126, "grad_norm": 0.48537033796310425, "learning_rate": 6.744578117495562e-05, "loss": 0.9148, "step": 4532 }, { "epoch": 0.4050304912100431, "grad_norm": 0.46244072914123535, "learning_rate": 6.743221915215232e-05, "loss": 0.9388, "step": 4533 }, { "epoch": 0.40511984274130497, "grad_norm": 0.40350237488746643, "learning_rate": 6.741865566921484e-05, "loss": 0.9161, "step": 4534 }, { "epoch": 0.40520919427256685, "grad_norm": 0.45283472537994385, "learning_rate": 6.740509072727931e-05, "loss": 1.0257, "step": 4535 }, { "epoch": 0.40529854580382874, "grad_norm": 0.41299960017204285, "learning_rate": 6.73915243274819e-05, "loss": 1.0064, "step": 4536 }, { "epoch": 0.40538789733509056, "grad_norm": 0.4359651207923889, "learning_rate": 6.737795647095893e-05, "loss": 0.9863, "step": 4537 }, { "epoch": 0.40547724886635245, "grad_norm": 0.477792888879776, "learning_rate": 6.736438715884688e-05, "loss": 0.957, "step": 4538 }, { "epoch": 0.40556660039761433, "grad_norm": 0.4058784544467926, "learning_rate": 6.735081639228232e-05, "loss": 0.971, "step": 4539 }, { "epoch": 0.40565595192887616, "grad_norm": 0.45812395215034485, "learning_rate": 6.733724417240194e-05, "loss": 0.9408, "step": 4540 }, { "epoch": 0.40574530346013804, "grad_norm": 0.47528955340385437, "learning_rate": 6.732367050034254e-05, "loss": 0.9003, "step": 4541 }, { "epoch": 0.4058346549913999, "grad_norm": 0.3834351897239685, "learning_rate": 6.731009537724112e-05, "loss": 0.9874, "step": 4542 }, { "epoch": 0.4059240065226618, "grad_norm": 0.3695882260799408, "learning_rate": 6.729651880423465e-05, "loss": 1.0228, "step": 4543 }, { "epoch": 0.40601335805392363, "grad_norm": 0.4406605064868927, "learning_rate": 6.728294078246038e-05, "loss": 1.0354, "step": 4544 }, { "epoch": 0.4061027095851855, "grad_norm": 0.4965038001537323, "learning_rate": 6.72693613130556e-05, "loss": 0.912, "step": 4545 }, { "epoch": 0.4061920611164474, "grad_norm": 0.38543984293937683, "learning_rate": 6.725578039715774e-05, "loss": 1.0156, "step": 4546 }, { "epoch": 0.40628141264770923, "grad_norm": 0.49438318610191345, "learning_rate": 6.724219803590433e-05, "loss": 0.9627, "step": 4547 }, { "epoch": 0.4063707641789711, "grad_norm": 0.3538610637187958, "learning_rate": 6.722861423043305e-05, "loss": 1.0335, "step": 4548 }, { "epoch": 0.406460115710233, "grad_norm": 0.3865319490432739, "learning_rate": 6.721502898188167e-05, "loss": 1.0594, "step": 4549 }, { "epoch": 0.4065494672414949, "grad_norm": 0.395279198884964, "learning_rate": 6.720144229138813e-05, "loss": 0.9866, "step": 4550 }, { "epoch": 0.4066388187727567, "grad_norm": 0.5369691252708435, "learning_rate": 6.718785416009044e-05, "loss": 0.9146, "step": 4551 }, { "epoch": 0.4067281703040186, "grad_norm": 0.43310627341270447, "learning_rate": 6.717426458912675e-05, "loss": 0.9597, "step": 4552 }, { "epoch": 0.40681752183528047, "grad_norm": 0.4922178387641907, "learning_rate": 6.716067357963535e-05, "loss": 0.9746, "step": 4553 }, { "epoch": 0.4069068733665423, "grad_norm": 0.43535903096199036, "learning_rate": 6.714708113275461e-05, "loss": 0.967, "step": 4554 }, { "epoch": 0.4069962248978042, "grad_norm": 0.44184789061546326, "learning_rate": 6.713348724962305e-05, "loss": 0.8968, "step": 4555 }, { "epoch": 0.40708557642906606, "grad_norm": 0.4985772371292114, "learning_rate": 6.711989193137929e-05, "loss": 0.905, "step": 4556 }, { "epoch": 0.4071749279603279, "grad_norm": 0.40521472692489624, "learning_rate": 6.710629517916211e-05, "loss": 1.0015, "step": 4557 }, { "epoch": 0.4072642794915898, "grad_norm": 0.3867556154727936, "learning_rate": 6.709269699411038e-05, "loss": 0.9735, "step": 4558 }, { "epoch": 0.40735363102285166, "grad_norm": 0.46226397156715393, "learning_rate": 6.707909737736306e-05, "loss": 0.8992, "step": 4559 }, { "epoch": 0.40744298255411354, "grad_norm": 0.43801769614219666, "learning_rate": 6.70654963300593e-05, "loss": 0.9856, "step": 4560 }, { "epoch": 0.40753233408537537, "grad_norm": 0.45655280351638794, "learning_rate": 6.70518938533383e-05, "loss": 0.9502, "step": 4561 }, { "epoch": 0.40762168561663725, "grad_norm": 0.4761228561401367, "learning_rate": 6.703828994833944e-05, "loss": 0.9812, "step": 4562 }, { "epoch": 0.40771103714789914, "grad_norm": 0.5070672631263733, "learning_rate": 6.702468461620218e-05, "loss": 0.8713, "step": 4563 }, { "epoch": 0.40780038867916096, "grad_norm": 0.3671000599861145, "learning_rate": 6.701107785806612e-05, "loss": 1.0342, "step": 4564 }, { "epoch": 0.40788974021042285, "grad_norm": 0.4318379759788513, "learning_rate": 6.699746967507095e-05, "loss": 0.9301, "step": 4565 }, { "epoch": 0.40797909174168473, "grad_norm": 0.45091480016708374, "learning_rate": 6.698386006835653e-05, "loss": 1.0128, "step": 4566 }, { "epoch": 0.4080684432729466, "grad_norm": 0.45846810936927795, "learning_rate": 6.697024903906279e-05, "loss": 1.044, "step": 4567 }, { "epoch": 0.40815779480420844, "grad_norm": 0.42733606696128845, "learning_rate": 6.695663658832981e-05, "loss": 0.9401, "step": 4568 }, { "epoch": 0.4082471463354703, "grad_norm": 0.48478513956069946, "learning_rate": 6.694302271729774e-05, "loss": 0.9547, "step": 4569 }, { "epoch": 0.4083364978667322, "grad_norm": 0.42427879571914673, "learning_rate": 6.692940742710694e-05, "loss": 0.9735, "step": 4570 }, { "epoch": 0.40842584939799403, "grad_norm": 0.40262219309806824, "learning_rate": 6.691579071889782e-05, "loss": 0.9945, "step": 4571 }, { "epoch": 0.4085152009292559, "grad_norm": 0.445208877325058, "learning_rate": 6.690217259381091e-05, "loss": 0.9534, "step": 4572 }, { "epoch": 0.4086045524605178, "grad_norm": 0.40912604331970215, "learning_rate": 6.688855305298688e-05, "loss": 0.9062, "step": 4573 }, { "epoch": 0.4086939039917797, "grad_norm": 0.4119530916213989, "learning_rate": 6.687493209756653e-05, "loss": 0.9902, "step": 4574 }, { "epoch": 0.4087832555230415, "grad_norm": 0.43412020802497864, "learning_rate": 6.686130972869072e-05, "loss": 0.9655, "step": 4575 }, { "epoch": 0.4088726070543034, "grad_norm": 0.4143422842025757, "learning_rate": 6.68476859475005e-05, "loss": 0.9909, "step": 4576 }, { "epoch": 0.4089619585855653, "grad_norm": 0.3809065520763397, "learning_rate": 6.6834060755137e-05, "loss": 1.0173, "step": 4577 }, { "epoch": 0.4090513101168271, "grad_norm": 0.5703213810920715, "learning_rate": 6.682043415274147e-05, "loss": 0.9383, "step": 4578 }, { "epoch": 0.409140661648089, "grad_norm": 0.37264561653137207, "learning_rate": 6.680680614145529e-05, "loss": 0.9856, "step": 4579 }, { "epoch": 0.40923001317935087, "grad_norm": 0.4544839859008789, "learning_rate": 6.679317672241994e-05, "loss": 0.9899, "step": 4580 }, { "epoch": 0.40931936471061275, "grad_norm": 0.3862614929676056, "learning_rate": 6.677954589677705e-05, "loss": 1.0016, "step": 4581 }, { "epoch": 0.4094087162418746, "grad_norm": 0.4736924171447754, "learning_rate": 6.676591366566831e-05, "loss": 0.9096, "step": 4582 }, { "epoch": 0.40949806777313646, "grad_norm": 0.4209919273853302, "learning_rate": 6.675228003023561e-05, "loss": 0.9689, "step": 4583 }, { "epoch": 0.40958741930439835, "grad_norm": 0.416951060295105, "learning_rate": 6.673864499162089e-05, "loss": 0.9733, "step": 4584 }, { "epoch": 0.4096767708356602, "grad_norm": 0.44141101837158203, "learning_rate": 6.672500855096623e-05, "loss": 0.9765, "step": 4585 }, { "epoch": 0.40976612236692206, "grad_norm": 0.4138414263725281, "learning_rate": 6.671137070941383e-05, "loss": 0.9467, "step": 4586 }, { "epoch": 0.40985547389818394, "grad_norm": 0.42416608333587646, "learning_rate": 6.669773146810599e-05, "loss": 0.9659, "step": 4587 }, { "epoch": 0.40994482542944577, "grad_norm": 0.4376296401023865, "learning_rate": 6.668409082818517e-05, "loss": 1.0398, "step": 4588 }, { "epoch": 0.41003417696070765, "grad_norm": 0.4650912880897522, "learning_rate": 6.66704487907939e-05, "loss": 0.9026, "step": 4589 }, { "epoch": 0.41012352849196954, "grad_norm": 0.436990350484848, "learning_rate": 6.665680535707485e-05, "loss": 1.0163, "step": 4590 }, { "epoch": 0.4102128800232314, "grad_norm": 0.42195725440979004, "learning_rate": 6.664316052817079e-05, "loss": 0.9237, "step": 4591 }, { "epoch": 0.41030223155449325, "grad_norm": 0.4974232017993927, "learning_rate": 6.662951430522464e-05, "loss": 0.9441, "step": 4592 }, { "epoch": 0.41039158308575513, "grad_norm": 0.43734729290008545, "learning_rate": 6.661586668937943e-05, "loss": 0.9482, "step": 4593 }, { "epoch": 0.410480934617017, "grad_norm": 0.45577242970466614, "learning_rate": 6.660221768177824e-05, "loss": 0.937, "step": 4594 }, { "epoch": 0.41057028614827884, "grad_norm": 0.4594924747943878, "learning_rate": 6.658856728356437e-05, "loss": 0.9181, "step": 4595 }, { "epoch": 0.4106596376795407, "grad_norm": 0.441518098115921, "learning_rate": 6.657491549588115e-05, "loss": 0.9669, "step": 4596 }, { "epoch": 0.4107489892108026, "grad_norm": 0.4046085774898529, "learning_rate": 6.65612623198721e-05, "loss": 0.9289, "step": 4597 }, { "epoch": 0.4108383407420645, "grad_norm": 0.4187487065792084, "learning_rate": 6.65476077566808e-05, "loss": 1.0489, "step": 4598 }, { "epoch": 0.4109276922733263, "grad_norm": 0.4384293854236603, "learning_rate": 6.653395180745095e-05, "loss": 0.9635, "step": 4599 }, { "epoch": 0.4110170438045882, "grad_norm": 0.38338857889175415, "learning_rate": 6.652029447332641e-05, "loss": 1.0088, "step": 4600 }, { "epoch": 0.4111063953358501, "grad_norm": 0.5203673839569092, "learning_rate": 6.650663575545111e-05, "loss": 0.9381, "step": 4601 }, { "epoch": 0.4111957468671119, "grad_norm": 0.43095913529396057, "learning_rate": 6.649297565496911e-05, "loss": 1.0205, "step": 4602 }, { "epoch": 0.4112850983983738, "grad_norm": 0.43971747159957886, "learning_rate": 6.647931417302462e-05, "loss": 0.9816, "step": 4603 }, { "epoch": 0.4113744499296357, "grad_norm": 0.3989725708961487, "learning_rate": 6.64656513107619e-05, "loss": 0.9747, "step": 4604 }, { "epoch": 0.41146380146089756, "grad_norm": 0.47779417037963867, "learning_rate": 6.645198706932536e-05, "loss": 0.9923, "step": 4605 }, { "epoch": 0.4115531529921594, "grad_norm": 0.4292563199996948, "learning_rate": 6.643832144985955e-05, "loss": 0.944, "step": 4606 }, { "epoch": 0.41164250452342127, "grad_norm": 0.5014485120773315, "learning_rate": 6.64246544535091e-05, "loss": 0.9265, "step": 4607 }, { "epoch": 0.41173185605468315, "grad_norm": 0.43778491020202637, "learning_rate": 6.641098608141874e-05, "loss": 0.9643, "step": 4608 }, { "epoch": 0.411821207585945, "grad_norm": 0.6082847714424133, "learning_rate": 6.63973163347334e-05, "loss": 0.9869, "step": 4609 }, { "epoch": 0.41191055911720686, "grad_norm": 0.4783382713794708, "learning_rate": 6.638364521459802e-05, "loss": 1.0311, "step": 4610 }, { "epoch": 0.41199991064846875, "grad_norm": 0.5080249905586243, "learning_rate": 6.636997272215772e-05, "loss": 0.9857, "step": 4611 }, { "epoch": 0.41208926217973063, "grad_norm": 0.3644493520259857, "learning_rate": 6.63562988585577e-05, "loss": 1.006, "step": 4612 }, { "epoch": 0.41217861371099246, "grad_norm": 0.4645083248615265, "learning_rate": 6.634262362494332e-05, "loss": 0.9701, "step": 4613 }, { "epoch": 0.41226796524225434, "grad_norm": 0.48680824041366577, "learning_rate": 6.632894702246001e-05, "loss": 0.9591, "step": 4614 }, { "epoch": 0.4123573167735162, "grad_norm": 0.4588204622268677, "learning_rate": 6.631526905225333e-05, "loss": 0.926, "step": 4615 }, { "epoch": 0.41244666830477805, "grad_norm": 0.43005460500717163, "learning_rate": 6.630158971546896e-05, "loss": 1.0193, "step": 4616 }, { "epoch": 0.41253601983603994, "grad_norm": 0.4423251152038574, "learning_rate": 6.628790901325267e-05, "loss": 0.8798, "step": 4617 }, { "epoch": 0.4126253713673018, "grad_norm": 0.5219715237617493, "learning_rate": 6.627422694675042e-05, "loss": 1.0016, "step": 4618 }, { "epoch": 0.4127147228985637, "grad_norm": 0.4232001304626465, "learning_rate": 6.626054351710817e-05, "loss": 0.9672, "step": 4619 }, { "epoch": 0.41280407442982553, "grad_norm": 0.47682496905326843, "learning_rate": 6.624685872547207e-05, "loss": 0.9314, "step": 4620 }, { "epoch": 0.4128934259610874, "grad_norm": 0.46166396141052246, "learning_rate": 6.623317257298837e-05, "loss": 1.015, "step": 4621 }, { "epoch": 0.4129827774923493, "grad_norm": 0.39371028542518616, "learning_rate": 6.621948506080345e-05, "loss": 0.9835, "step": 4622 }, { "epoch": 0.4130721290236111, "grad_norm": 0.41618743538856506, "learning_rate": 6.620579619006377e-05, "loss": 0.9857, "step": 4623 }, { "epoch": 0.413161480554873, "grad_norm": 0.4997164011001587, "learning_rate": 6.619210596191592e-05, "loss": 0.8842, "step": 4624 }, { "epoch": 0.4132508320861349, "grad_norm": 0.45205265283584595, "learning_rate": 6.61784143775066e-05, "loss": 0.9845, "step": 4625 }, { "epoch": 0.4133401836173967, "grad_norm": 0.49516791105270386, "learning_rate": 6.616472143798261e-05, "loss": 0.8691, "step": 4626 }, { "epoch": 0.4134295351486586, "grad_norm": 0.4403949975967407, "learning_rate": 6.615102714449089e-05, "loss": 0.9634, "step": 4627 }, { "epoch": 0.4135188866799205, "grad_norm": 0.43604418635368347, "learning_rate": 6.613733149817852e-05, "loss": 1.0116, "step": 4628 }, { "epoch": 0.41360823821118237, "grad_norm": 0.4321603775024414, "learning_rate": 6.612363450019261e-05, "loss": 0.924, "step": 4629 }, { "epoch": 0.4136975897424442, "grad_norm": 0.40170714259147644, "learning_rate": 6.610993615168044e-05, "loss": 1.0031, "step": 4630 }, { "epoch": 0.4137869412737061, "grad_norm": 0.4581966996192932, "learning_rate": 6.60962364537894e-05, "loss": 0.9848, "step": 4631 }, { "epoch": 0.41387629280496796, "grad_norm": 0.42870965600013733, "learning_rate": 6.608253540766698e-05, "loss": 1.0401, "step": 4632 }, { "epoch": 0.4139656443362298, "grad_norm": 0.4049166738986969, "learning_rate": 6.60688330144608e-05, "loss": 0.9862, "step": 4633 }, { "epoch": 0.41405499586749167, "grad_norm": 0.44598478078842163, "learning_rate": 6.605512927531858e-05, "loss": 1.0188, "step": 4634 }, { "epoch": 0.41414434739875355, "grad_norm": 0.4548451602458954, "learning_rate": 6.604142419138812e-05, "loss": 0.9463, "step": 4635 }, { "epoch": 0.41423369893001544, "grad_norm": 0.41547542810440063, "learning_rate": 6.602771776381743e-05, "loss": 1.0352, "step": 4636 }, { "epoch": 0.41432305046127726, "grad_norm": 0.42148005962371826, "learning_rate": 6.60140099937545e-05, "loss": 0.944, "step": 4637 }, { "epoch": 0.41441240199253915, "grad_norm": 0.41155093908309937, "learning_rate": 6.600030088234755e-05, "loss": 0.9639, "step": 4638 }, { "epoch": 0.41450175352380103, "grad_norm": 0.4119633436203003, "learning_rate": 6.598659043074487e-05, "loss": 0.9254, "step": 4639 }, { "epoch": 0.41459110505506286, "grad_norm": 0.4401843547821045, "learning_rate": 6.59728786400948e-05, "loss": 0.9798, "step": 4640 }, { "epoch": 0.41468045658632474, "grad_norm": 0.4861004054546356, "learning_rate": 6.595916551154591e-05, "loss": 0.9584, "step": 4641 }, { "epoch": 0.4147698081175866, "grad_norm": 0.44102394580841064, "learning_rate": 6.59454510462468e-05, "loss": 0.9108, "step": 4642 }, { "epoch": 0.4148591596488485, "grad_norm": 0.4617249667644501, "learning_rate": 6.593173524534619e-05, "loss": 0.9601, "step": 4643 }, { "epoch": 0.41494851118011034, "grad_norm": 0.5489480495452881, "learning_rate": 6.591801810999294e-05, "loss": 0.9668, "step": 4644 }, { "epoch": 0.4150378627113722, "grad_norm": 0.5144347548484802, "learning_rate": 6.590429964133599e-05, "loss": 0.9139, "step": 4645 }, { "epoch": 0.4151272142426341, "grad_norm": 0.4274512827396393, "learning_rate": 6.589057984052441e-05, "loss": 1.001, "step": 4646 }, { "epoch": 0.41521656577389593, "grad_norm": 0.4437926709651947, "learning_rate": 6.58768587087074e-05, "loss": 0.9184, "step": 4647 }, { "epoch": 0.4153059173051578, "grad_norm": 0.46083325147628784, "learning_rate": 6.586313624703423e-05, "loss": 1.0409, "step": 4648 }, { "epoch": 0.4153952688364197, "grad_norm": 0.46094614267349243, "learning_rate": 6.584941245665432e-05, "loss": 0.9456, "step": 4649 }, { "epoch": 0.4154846203676816, "grad_norm": 0.3888949751853943, "learning_rate": 6.583568733871716e-05, "loss": 0.9999, "step": 4650 }, { "epoch": 0.4155739718989434, "grad_norm": 0.4531100392341614, "learning_rate": 6.58219608943724e-05, "loss": 1.0092, "step": 4651 }, { "epoch": 0.4156633234302053, "grad_norm": 0.44666728377342224, "learning_rate": 6.580823312476976e-05, "loss": 0.961, "step": 4652 }, { "epoch": 0.4157526749614672, "grad_norm": 0.39835554361343384, "learning_rate": 6.579450403105909e-05, "loss": 0.9973, "step": 4653 }, { "epoch": 0.415842026492729, "grad_norm": 0.4402940571308136, "learning_rate": 6.578077361439037e-05, "loss": 0.9221, "step": 4654 }, { "epoch": 0.4159313780239909, "grad_norm": 0.43528056144714355, "learning_rate": 6.576704187591362e-05, "loss": 0.9335, "step": 4655 }, { "epoch": 0.41602072955525277, "grad_norm": 0.48147714138031006, "learning_rate": 6.575330881677907e-05, "loss": 0.9158, "step": 4656 }, { "epoch": 0.4161100810865146, "grad_norm": 0.4357036352157593, "learning_rate": 6.573957443813698e-05, "loss": 0.9688, "step": 4657 }, { "epoch": 0.4161994326177765, "grad_norm": 0.41116756200790405, "learning_rate": 6.572583874113777e-05, "loss": 0.9678, "step": 4658 }, { "epoch": 0.41628878414903836, "grad_norm": 0.3910764753818512, "learning_rate": 6.571210172693192e-05, "loss": 1.0419, "step": 4659 }, { "epoch": 0.41637813568030024, "grad_norm": 0.5164288878440857, "learning_rate": 6.569836339667009e-05, "loss": 0.8935, "step": 4660 }, { "epoch": 0.41646748721156207, "grad_norm": 0.48171505331993103, "learning_rate": 6.568462375150298e-05, "loss": 1.0256, "step": 4661 }, { "epoch": 0.41655683874282395, "grad_norm": 0.43852394819259644, "learning_rate": 6.567088279258144e-05, "loss": 0.925, "step": 4662 }, { "epoch": 0.41664619027408584, "grad_norm": 0.42417702078819275, "learning_rate": 6.565714052105645e-05, "loss": 0.9589, "step": 4663 }, { "epoch": 0.41673554180534766, "grad_norm": 0.42217710614204407, "learning_rate": 6.564339693807904e-05, "loss": 0.9391, "step": 4664 }, { "epoch": 0.41682489333660955, "grad_norm": 0.48488372564315796, "learning_rate": 6.562965204480039e-05, "loss": 0.9816, "step": 4665 }, { "epoch": 0.41691424486787143, "grad_norm": 0.424485981464386, "learning_rate": 6.561590584237176e-05, "loss": 1.0454, "step": 4666 }, { "epoch": 0.4170035963991333, "grad_norm": 0.40469837188720703, "learning_rate": 6.560215833194457e-05, "loss": 1.0021, "step": 4667 }, { "epoch": 0.41709294793039514, "grad_norm": 0.4954060912132263, "learning_rate": 6.558840951467033e-05, "loss": 1.0377, "step": 4668 }, { "epoch": 0.417182299461657, "grad_norm": 0.47317901253700256, "learning_rate": 6.557465939170062e-05, "loss": 0.9776, "step": 4669 }, { "epoch": 0.4172716509929189, "grad_norm": 0.4012228846549988, "learning_rate": 6.556090796418717e-05, "loss": 0.9594, "step": 4670 }, { "epoch": 0.41736100252418074, "grad_norm": 0.4025217890739441, "learning_rate": 6.554715523328181e-05, "loss": 0.9784, "step": 4671 }, { "epoch": 0.4174503540554426, "grad_norm": 0.4811991751194, "learning_rate": 6.553340120013649e-05, "loss": 0.9726, "step": 4672 }, { "epoch": 0.4175397055867045, "grad_norm": 0.4033472239971161, "learning_rate": 6.551964586590323e-05, "loss": 0.9814, "step": 4673 }, { "epoch": 0.4176290571179664, "grad_norm": 0.45485714077949524, "learning_rate": 6.550588923173422e-05, "loss": 0.9099, "step": 4674 }, { "epoch": 0.4177184086492282, "grad_norm": 0.48728662729263306, "learning_rate": 6.549213129878169e-05, "loss": 0.9082, "step": 4675 }, { "epoch": 0.4178077601804901, "grad_norm": 0.46427708864212036, "learning_rate": 6.547837206819804e-05, "loss": 0.9797, "step": 4676 }, { "epoch": 0.417897111711752, "grad_norm": 0.5304161310195923, "learning_rate": 6.546461154113575e-05, "loss": 0.9526, "step": 4677 }, { "epoch": 0.4179864632430138, "grad_norm": 0.5012792348861694, "learning_rate": 6.545084971874738e-05, "loss": 0.9665, "step": 4678 }, { "epoch": 0.4180758147742757, "grad_norm": 0.41334888339042664, "learning_rate": 6.543708660218566e-05, "loss": 0.945, "step": 4679 }, { "epoch": 0.4181651663055376, "grad_norm": 0.38999998569488525, "learning_rate": 6.54233221926034e-05, "loss": 0.9889, "step": 4680 }, { "epoch": 0.41825451783679946, "grad_norm": 0.43242332339286804, "learning_rate": 6.540955649115349e-05, "loss": 0.9914, "step": 4681 }, { "epoch": 0.4183438693680613, "grad_norm": 0.46687716245651245, "learning_rate": 6.539578949898896e-05, "loss": 0.986, "step": 4682 }, { "epoch": 0.41843322089932317, "grad_norm": 0.5817546844482422, "learning_rate": 6.538202121726298e-05, "loss": 0.8811, "step": 4683 }, { "epoch": 0.41852257243058505, "grad_norm": 0.4378966987133026, "learning_rate": 6.536825164712876e-05, "loss": 0.9343, "step": 4684 }, { "epoch": 0.4186119239618469, "grad_norm": 0.45147213339805603, "learning_rate": 6.535448078973963e-05, "loss": 0.928, "step": 4685 }, { "epoch": 0.41870127549310876, "grad_norm": 0.5144475102424622, "learning_rate": 6.534070864624908e-05, "loss": 0.9623, "step": 4686 }, { "epoch": 0.41879062702437064, "grad_norm": 0.44257742166519165, "learning_rate": 6.532693521781066e-05, "loss": 0.9797, "step": 4687 }, { "epoch": 0.41887997855563247, "grad_norm": 0.4004027247428894, "learning_rate": 6.531316050557803e-05, "loss": 1.0237, "step": 4688 }, { "epoch": 0.41896933008689435, "grad_norm": 0.5068597793579102, "learning_rate": 6.529938451070501e-05, "loss": 0.9238, "step": 4689 }, { "epoch": 0.41905868161815624, "grad_norm": 0.4261419177055359, "learning_rate": 6.528560723434543e-05, "loss": 0.9867, "step": 4690 }, { "epoch": 0.4191480331494181, "grad_norm": 0.5222342014312744, "learning_rate": 6.527182867765332e-05, "loss": 0.9864, "step": 4691 }, { "epoch": 0.41923738468067995, "grad_norm": 0.5611239075660706, "learning_rate": 6.525804884178277e-05, "loss": 0.9771, "step": 4692 }, { "epoch": 0.41932673621194183, "grad_norm": 0.3744679391384125, "learning_rate": 6.524426772788801e-05, "loss": 0.9883, "step": 4693 }, { "epoch": 0.4194160877432037, "grad_norm": 0.4100448787212372, "learning_rate": 6.523048533712331e-05, "loss": 1.0148, "step": 4694 }, { "epoch": 0.41950543927446554, "grad_norm": 0.42226895689964294, "learning_rate": 6.521670167064313e-05, "loss": 1.027, "step": 4695 }, { "epoch": 0.4195947908057274, "grad_norm": 0.450785756111145, "learning_rate": 6.5202916729602e-05, "loss": 0.9611, "step": 4696 }, { "epoch": 0.4196841423369893, "grad_norm": 0.4674597382545471, "learning_rate": 6.518913051515451e-05, "loss": 0.9714, "step": 4697 }, { "epoch": 0.4197734938682512, "grad_norm": 0.5814161896705627, "learning_rate": 6.517534302845545e-05, "loss": 0.8825, "step": 4698 }, { "epoch": 0.419862845399513, "grad_norm": 0.4287194609642029, "learning_rate": 6.516155427065967e-05, "loss": 1.0032, "step": 4699 }, { "epoch": 0.4199521969307749, "grad_norm": 0.466623991727829, "learning_rate": 6.514776424292208e-05, "loss": 0.9162, "step": 4700 }, { "epoch": 0.4200415484620368, "grad_norm": 0.5070553421974182, "learning_rate": 6.513397294639778e-05, "loss": 1.0066, "step": 4701 }, { "epoch": 0.4201308999932986, "grad_norm": 0.42103180289268494, "learning_rate": 6.512018038224194e-05, "loss": 0.9602, "step": 4702 }, { "epoch": 0.4202202515245605, "grad_norm": 0.49392175674438477, "learning_rate": 6.510638655160981e-05, "loss": 0.9859, "step": 4703 }, { "epoch": 0.4203096030558224, "grad_norm": 0.46998798847198486, "learning_rate": 6.509259145565681e-05, "loss": 0.9445, "step": 4704 }, { "epoch": 0.42039895458708426, "grad_norm": 0.47235095500946045, "learning_rate": 6.507879509553837e-05, "loss": 0.942, "step": 4705 }, { "epoch": 0.4204883061183461, "grad_norm": 0.4976077973842621, "learning_rate": 6.506499747241013e-05, "loss": 0.9331, "step": 4706 }, { "epoch": 0.42057765764960797, "grad_norm": 0.45665648579597473, "learning_rate": 6.505119858742775e-05, "loss": 1.0242, "step": 4707 }, { "epoch": 0.42066700918086986, "grad_norm": 0.5178200602531433, "learning_rate": 6.503739844174708e-05, "loss": 0.9287, "step": 4708 }, { "epoch": 0.4207563607121317, "grad_norm": 0.41020286083221436, "learning_rate": 6.502359703652398e-05, "loss": 1.0093, "step": 4709 }, { "epoch": 0.42084571224339357, "grad_norm": 0.44288718700408936, "learning_rate": 6.500979437291451e-05, "loss": 1.0115, "step": 4710 }, { "epoch": 0.42093506377465545, "grad_norm": 0.4006830155849457, "learning_rate": 6.499599045207475e-05, "loss": 0.9736, "step": 4711 }, { "epoch": 0.42102441530591733, "grad_norm": 0.4733505845069885, "learning_rate": 6.498218527516097e-05, "loss": 0.9286, "step": 4712 }, { "epoch": 0.42111376683717916, "grad_norm": 0.48651424050331116, "learning_rate": 6.496837884332945e-05, "loss": 1.0209, "step": 4713 }, { "epoch": 0.42120311836844104, "grad_norm": 0.38982510566711426, "learning_rate": 6.495457115773667e-05, "loss": 1.0108, "step": 4714 }, { "epoch": 0.4212924698997029, "grad_norm": 0.47377124428749084, "learning_rate": 6.494076221953912e-05, "loss": 0.986, "step": 4715 }, { "epoch": 0.42138182143096475, "grad_norm": 0.3638954758644104, "learning_rate": 6.492695202989351e-05, "loss": 1.0072, "step": 4716 }, { "epoch": 0.42147117296222664, "grad_norm": 0.49875038862228394, "learning_rate": 6.491314058995654e-05, "loss": 0.9364, "step": 4717 }, { "epoch": 0.4215605244934885, "grad_norm": 0.41740208864212036, "learning_rate": 6.489932790088508e-05, "loss": 0.9213, "step": 4718 }, { "epoch": 0.42164987602475035, "grad_norm": 0.40949100255966187, "learning_rate": 6.48855139638361e-05, "loss": 0.9908, "step": 4719 }, { "epoch": 0.42173922755601223, "grad_norm": 0.4184902608394623, "learning_rate": 6.487169877996667e-05, "loss": 0.9252, "step": 4720 }, { "epoch": 0.4218285790872741, "grad_norm": 0.3919154405593872, "learning_rate": 6.485788235043392e-05, "loss": 1.0046, "step": 4721 }, { "epoch": 0.421917930618536, "grad_norm": 0.44090232253074646, "learning_rate": 6.484406467639516e-05, "loss": 1.0108, "step": 4722 }, { "epoch": 0.4220072821497978, "grad_norm": 0.407052606344223, "learning_rate": 6.483024575900776e-05, "loss": 1.01, "step": 4723 }, { "epoch": 0.4220966336810597, "grad_norm": 0.5126084089279175, "learning_rate": 6.481642559942919e-05, "loss": 0.9421, "step": 4724 }, { "epoch": 0.4221859852123216, "grad_norm": 0.3954399824142456, "learning_rate": 6.480260419881706e-05, "loss": 1.0176, "step": 4725 }, { "epoch": 0.4222753367435834, "grad_norm": 0.4803306758403778, "learning_rate": 6.478878155832903e-05, "loss": 0.8829, "step": 4726 }, { "epoch": 0.4223646882748453, "grad_norm": 0.4403332769870758, "learning_rate": 6.477495767912292e-05, "loss": 0.9965, "step": 4727 }, { "epoch": 0.4224540398061072, "grad_norm": 0.4345102906227112, "learning_rate": 6.476113256235661e-05, "loss": 0.9472, "step": 4728 }, { "epoch": 0.42254339133736907, "grad_norm": 0.4194163978099823, "learning_rate": 6.47473062091881e-05, "loss": 1.0142, "step": 4729 }, { "epoch": 0.4226327428686309, "grad_norm": 0.5548313856124878, "learning_rate": 6.473347862077552e-05, "loss": 0.9398, "step": 4730 }, { "epoch": 0.4227220943998928, "grad_norm": 0.49894118309020996, "learning_rate": 6.471964979827702e-05, "loss": 0.9525, "step": 4731 }, { "epoch": 0.42281144593115466, "grad_norm": 0.5956845879554749, "learning_rate": 6.470581974285098e-05, "loss": 0.9364, "step": 4732 }, { "epoch": 0.4229007974624165, "grad_norm": 0.5391216278076172, "learning_rate": 6.469198845565577e-05, "loss": 0.9362, "step": 4733 }, { "epoch": 0.42299014899367837, "grad_norm": 0.4035119414329529, "learning_rate": 6.467815593784993e-05, "loss": 0.9826, "step": 4734 }, { "epoch": 0.42307950052494026, "grad_norm": 0.43336036801338196, "learning_rate": 6.466432219059208e-05, "loss": 0.9782, "step": 4735 }, { "epoch": 0.42316885205620214, "grad_norm": 0.43682193756103516, "learning_rate": 6.465048721504091e-05, "loss": 0.9663, "step": 4736 }, { "epoch": 0.42325820358746397, "grad_norm": 0.49515190720558167, "learning_rate": 6.46366510123553e-05, "loss": 0.9055, "step": 4737 }, { "epoch": 0.42334755511872585, "grad_norm": 0.4037051200866699, "learning_rate": 6.462281358369413e-05, "loss": 0.9359, "step": 4738 }, { "epoch": 0.42343690664998773, "grad_norm": 0.4590768814086914, "learning_rate": 6.460897493021646e-05, "loss": 0.9446, "step": 4739 }, { "epoch": 0.42352625818124956, "grad_norm": 0.45074954628944397, "learning_rate": 6.459513505308142e-05, "loss": 0.993, "step": 4740 }, { "epoch": 0.42361560971251144, "grad_norm": 0.4910895824432373, "learning_rate": 6.458129395344825e-05, "loss": 0.9115, "step": 4741 }, { "epoch": 0.4237049612437733, "grad_norm": 0.4874398410320282, "learning_rate": 6.456745163247628e-05, "loss": 0.9896, "step": 4742 }, { "epoch": 0.4237943127750352, "grad_norm": 0.4627344012260437, "learning_rate": 6.455360809132496e-05, "loss": 0.9819, "step": 4743 }, { "epoch": 0.42388366430629704, "grad_norm": 0.3980332314968109, "learning_rate": 6.453976333115383e-05, "loss": 0.9962, "step": 4744 }, { "epoch": 0.4239730158375589, "grad_norm": 0.527260422706604, "learning_rate": 6.452591735312257e-05, "loss": 0.8636, "step": 4745 }, { "epoch": 0.4240623673688208, "grad_norm": 0.4000941514968872, "learning_rate": 6.451207015839086e-05, "loss": 0.9904, "step": 4746 }, { "epoch": 0.42415171890008263, "grad_norm": 0.4481007158756256, "learning_rate": 6.44982217481186e-05, "loss": 0.9877, "step": 4747 }, { "epoch": 0.4242410704313445, "grad_norm": 0.49271902441978455, "learning_rate": 6.448437212346572e-05, "loss": 0.8871, "step": 4748 }, { "epoch": 0.4243304219626064, "grad_norm": 0.4154813587665558, "learning_rate": 6.447052128559229e-05, "loss": 0.9812, "step": 4749 }, { "epoch": 0.4244197734938682, "grad_norm": 0.5907759666442871, "learning_rate": 6.445666923565846e-05, "loss": 0.9432, "step": 4750 }, { "epoch": 0.4245091250251301, "grad_norm": 0.4116978049278259, "learning_rate": 6.444281597482448e-05, "loss": 0.9952, "step": 4751 }, { "epoch": 0.424598476556392, "grad_norm": 0.4625799357891083, "learning_rate": 6.44289615042507e-05, "loss": 1.0082, "step": 4752 }, { "epoch": 0.4246878280876539, "grad_norm": 0.4559314548969269, "learning_rate": 6.441510582509761e-05, "loss": 1.0048, "step": 4753 }, { "epoch": 0.4247771796189157, "grad_norm": 0.4704115390777588, "learning_rate": 6.440124893852575e-05, "loss": 0.9848, "step": 4754 }, { "epoch": 0.4248665311501776, "grad_norm": 0.40942203998565674, "learning_rate": 6.438739084569579e-05, "loss": 0.9624, "step": 4755 }, { "epoch": 0.42495588268143947, "grad_norm": 0.3844430446624756, "learning_rate": 6.437353154776849e-05, "loss": 1.0214, "step": 4756 }, { "epoch": 0.4250452342127013, "grad_norm": 0.4514857232570648, "learning_rate": 6.435967104590469e-05, "loss": 0.9475, "step": 4757 }, { "epoch": 0.4251345857439632, "grad_norm": 0.44414305686950684, "learning_rate": 6.43458093412654e-05, "loss": 0.9882, "step": 4758 }, { "epoch": 0.42522393727522506, "grad_norm": 0.5354793667793274, "learning_rate": 6.433194643501164e-05, "loss": 0.9163, "step": 4759 }, { "epoch": 0.42531328880648694, "grad_norm": 0.4825092554092407, "learning_rate": 6.431808232830462e-05, "loss": 0.96, "step": 4760 }, { "epoch": 0.42540264033774877, "grad_norm": 0.44994306564331055, "learning_rate": 6.430421702230556e-05, "loss": 0.9369, "step": 4761 }, { "epoch": 0.42549199186901066, "grad_norm": 0.4433254599571228, "learning_rate": 6.429035051817588e-05, "loss": 0.9945, "step": 4762 }, { "epoch": 0.42558134340027254, "grad_norm": 0.49267831444740295, "learning_rate": 6.427648281707701e-05, "loss": 1.0097, "step": 4763 }, { "epoch": 0.42567069493153437, "grad_norm": 0.3941679000854492, "learning_rate": 6.426261392017052e-05, "loss": 1.0503, "step": 4764 }, { "epoch": 0.42576004646279625, "grad_norm": 0.3821266293525696, "learning_rate": 6.424874382861811e-05, "loss": 0.9521, "step": 4765 }, { "epoch": 0.42584939799405813, "grad_norm": 0.44064003229141235, "learning_rate": 6.42348725435815e-05, "loss": 0.9589, "step": 4766 }, { "epoch": 0.42593874952532, "grad_norm": 0.437092661857605, "learning_rate": 6.422100006622257e-05, "loss": 1.004, "step": 4767 }, { "epoch": 0.42602810105658184, "grad_norm": 0.5033923387527466, "learning_rate": 6.420712639770333e-05, "loss": 0.8967, "step": 4768 }, { "epoch": 0.4261174525878437, "grad_norm": 0.4463227093219757, "learning_rate": 6.41932515391858e-05, "loss": 0.9387, "step": 4769 }, { "epoch": 0.4262068041191056, "grad_norm": 0.4098617732524872, "learning_rate": 6.417937549183218e-05, "loss": 0.9521, "step": 4770 }, { "epoch": 0.42629615565036744, "grad_norm": 0.4326515197753906, "learning_rate": 6.41654982568047e-05, "loss": 1.0335, "step": 4771 }, { "epoch": 0.4263855071816293, "grad_norm": 0.42155736684799194, "learning_rate": 6.415161983526576e-05, "loss": 0.9386, "step": 4772 }, { "epoch": 0.4264748587128912, "grad_norm": 0.407709538936615, "learning_rate": 6.41377402283778e-05, "loss": 1.0556, "step": 4773 }, { "epoch": 0.4265642102441531, "grad_norm": 0.4312109649181366, "learning_rate": 6.412385943730341e-05, "loss": 1.004, "step": 4774 }, { "epoch": 0.4266535617754149, "grad_norm": 0.5315758585929871, "learning_rate": 6.410997746320524e-05, "loss": 0.8835, "step": 4775 }, { "epoch": 0.4267429133066768, "grad_norm": 0.40640825033187866, "learning_rate": 6.409609430724607e-05, "loss": 1.0284, "step": 4776 }, { "epoch": 0.4268322648379387, "grad_norm": 0.3886488676071167, "learning_rate": 6.408220997058873e-05, "loss": 0.9606, "step": 4777 }, { "epoch": 0.4269216163692005, "grad_norm": 0.47869306802749634, "learning_rate": 6.40683244543962e-05, "loss": 1.0092, "step": 4778 }, { "epoch": 0.4270109679004624, "grad_norm": 0.5204102396965027, "learning_rate": 6.405443775983154e-05, "loss": 0.8834, "step": 4779 }, { "epoch": 0.4271003194317243, "grad_norm": 0.5049982666969299, "learning_rate": 6.404054988805792e-05, "loss": 0.8793, "step": 4780 }, { "epoch": 0.4271896709629861, "grad_norm": 0.48872700333595276, "learning_rate": 6.402666084023858e-05, "loss": 0.9817, "step": 4781 }, { "epoch": 0.427279022494248, "grad_norm": 0.428631991147995, "learning_rate": 6.401277061753689e-05, "loss": 0.9423, "step": 4782 }, { "epoch": 0.42736837402550987, "grad_norm": 0.4668726623058319, "learning_rate": 6.399887922111627e-05, "loss": 0.9722, "step": 4783 }, { "epoch": 0.42745772555677175, "grad_norm": 0.5166489481925964, "learning_rate": 6.398498665214032e-05, "loss": 0.9832, "step": 4784 }, { "epoch": 0.4275470770880336, "grad_norm": 0.4075476825237274, "learning_rate": 6.397109291177266e-05, "loss": 1.0145, "step": 4785 }, { "epoch": 0.42763642861929546, "grad_norm": 0.40599527955055237, "learning_rate": 6.395719800117706e-05, "loss": 0.9469, "step": 4786 }, { "epoch": 0.42772578015055734, "grad_norm": 0.4204799234867096, "learning_rate": 6.394330192151732e-05, "loss": 0.9147, "step": 4787 }, { "epoch": 0.42781513168181917, "grad_norm": 0.3994778096675873, "learning_rate": 6.392940467395745e-05, "loss": 1.0195, "step": 4788 }, { "epoch": 0.42790448321308105, "grad_norm": 0.4340113401412964, "learning_rate": 6.391550625966144e-05, "loss": 1.0036, "step": 4789 }, { "epoch": 0.42799383474434294, "grad_norm": 0.45802706480026245, "learning_rate": 6.390160667979348e-05, "loss": 0.9356, "step": 4790 }, { "epoch": 0.4280831862756048, "grad_norm": 0.4832572937011719, "learning_rate": 6.388770593551777e-05, "loss": 1.0298, "step": 4791 }, { "epoch": 0.42817253780686665, "grad_norm": 0.45630741119384766, "learning_rate": 6.387380402799866e-05, "loss": 0.9754, "step": 4792 }, { "epoch": 0.42826188933812853, "grad_norm": 0.42788875102996826, "learning_rate": 6.385990095840055e-05, "loss": 1.0313, "step": 4793 }, { "epoch": 0.4283512408693904, "grad_norm": 0.4116227328777313, "learning_rate": 6.384599672788802e-05, "loss": 0.9863, "step": 4794 }, { "epoch": 0.42844059240065224, "grad_norm": 0.3941948115825653, "learning_rate": 6.383209133762569e-05, "loss": 1.0179, "step": 4795 }, { "epoch": 0.4285299439319141, "grad_norm": 0.41168758273124695, "learning_rate": 6.381818478877825e-05, "loss": 0.979, "step": 4796 }, { "epoch": 0.428619295463176, "grad_norm": 0.5500960946083069, "learning_rate": 6.380427708251054e-05, "loss": 0.9703, "step": 4797 }, { "epoch": 0.4287086469944379, "grad_norm": 0.42650794982910156, "learning_rate": 6.379036821998751e-05, "loss": 0.9895, "step": 4798 }, { "epoch": 0.4287979985256997, "grad_norm": 0.4761294424533844, "learning_rate": 6.377645820237412e-05, "loss": 0.9629, "step": 4799 }, { "epoch": 0.4288873500569616, "grad_norm": 0.45492222905158997, "learning_rate": 6.376254703083552e-05, "loss": 0.9479, "step": 4800 }, { "epoch": 0.4289767015882235, "grad_norm": 0.44997844099998474, "learning_rate": 6.374863470653691e-05, "loss": 0.9265, "step": 4801 }, { "epoch": 0.4290660531194853, "grad_norm": 0.5663099884986877, "learning_rate": 6.373472123064358e-05, "loss": 0.8915, "step": 4802 }, { "epoch": 0.4291554046507472, "grad_norm": 0.41858309507369995, "learning_rate": 6.372080660432095e-05, "loss": 0.9733, "step": 4803 }, { "epoch": 0.4292447561820091, "grad_norm": 0.42884135246276855, "learning_rate": 6.370689082873451e-05, "loss": 0.9741, "step": 4804 }, { "epoch": 0.42933410771327096, "grad_norm": 0.49144938588142395, "learning_rate": 6.369297390504987e-05, "loss": 0.9202, "step": 4805 }, { "epoch": 0.4294234592445328, "grad_norm": 0.43993905186653137, "learning_rate": 6.36790558344327e-05, "loss": 0.9182, "step": 4806 }, { "epoch": 0.4295128107757947, "grad_norm": 0.41477641463279724, "learning_rate": 6.36651366180488e-05, "loss": 1.0702, "step": 4807 }, { "epoch": 0.42960216230705656, "grad_norm": 0.48536887764930725, "learning_rate": 6.365121625706405e-05, "loss": 1.009, "step": 4808 }, { "epoch": 0.4296915138383184, "grad_norm": 0.4555813670158386, "learning_rate": 6.363729475264441e-05, "loss": 0.9185, "step": 4809 }, { "epoch": 0.42978086536958027, "grad_norm": 0.4251463711261749, "learning_rate": 6.362337210595599e-05, "loss": 0.9602, "step": 4810 }, { "epoch": 0.42987021690084215, "grad_norm": 0.5072605013847351, "learning_rate": 6.360944831816495e-05, "loss": 0.9825, "step": 4811 }, { "epoch": 0.429959568432104, "grad_norm": 0.39637628197669983, "learning_rate": 6.359552339043753e-05, "loss": 1.0062, "step": 4812 }, { "epoch": 0.43004891996336586, "grad_norm": 0.45429277420043945, "learning_rate": 6.35815973239401e-05, "loss": 0.9989, "step": 4813 }, { "epoch": 0.43013827149462774, "grad_norm": 0.4280487298965454, "learning_rate": 6.356767011983915e-05, "loss": 0.9106, "step": 4814 }, { "epoch": 0.4302276230258896, "grad_norm": 0.45695221424102783, "learning_rate": 6.355374177930118e-05, "loss": 0.977, "step": 4815 }, { "epoch": 0.43031697455715145, "grad_norm": 0.40981754660606384, "learning_rate": 6.353981230349289e-05, "loss": 1.0272, "step": 4816 }, { "epoch": 0.43040632608841334, "grad_norm": 0.4471674859523773, "learning_rate": 6.352588169358099e-05, "loss": 0.9212, "step": 4817 }, { "epoch": 0.4304956776196752, "grad_norm": 0.5394784808158875, "learning_rate": 6.35119499507323e-05, "loss": 0.9334, "step": 4818 }, { "epoch": 0.43058502915093705, "grad_norm": 0.5647401213645935, "learning_rate": 6.34980170761138e-05, "loss": 0.9176, "step": 4819 }, { "epoch": 0.43067438068219893, "grad_norm": 0.5009137392044067, "learning_rate": 6.348408307089248e-05, "loss": 0.9756, "step": 4820 }, { "epoch": 0.4307637322134608, "grad_norm": 0.38283008337020874, "learning_rate": 6.347014793623547e-05, "loss": 1.0153, "step": 4821 }, { "epoch": 0.4308530837447227, "grad_norm": 0.4172475337982178, "learning_rate": 6.345621167331e-05, "loss": 1.0625, "step": 4822 }, { "epoch": 0.4309424352759845, "grad_norm": 0.3502405285835266, "learning_rate": 6.344227428328335e-05, "loss": 0.9735, "step": 4823 }, { "epoch": 0.4310317868072464, "grad_norm": 0.43267232179641724, "learning_rate": 6.342833576732297e-05, "loss": 0.9561, "step": 4824 }, { "epoch": 0.4311211383385083, "grad_norm": 0.41811031103134155, "learning_rate": 6.341439612659631e-05, "loss": 0.9465, "step": 4825 }, { "epoch": 0.4312104898697701, "grad_norm": 0.4469202756881714, "learning_rate": 6.340045536227101e-05, "loss": 0.9639, "step": 4826 }, { "epoch": 0.431299841401032, "grad_norm": 0.43867889046669006, "learning_rate": 6.338651347551472e-05, "loss": 0.9826, "step": 4827 }, { "epoch": 0.4313891929322939, "grad_norm": 0.4113059341907501, "learning_rate": 6.337257046749523e-05, "loss": 1.0002, "step": 4828 }, { "epoch": 0.43147854446355577, "grad_norm": 0.47658199071884155, "learning_rate": 6.335862633938044e-05, "loss": 0.9545, "step": 4829 }, { "epoch": 0.4315678959948176, "grad_norm": 0.3787316083908081, "learning_rate": 6.334468109233827e-05, "loss": 1.033, "step": 4830 }, { "epoch": 0.4316572475260795, "grad_norm": 0.5350416898727417, "learning_rate": 6.333073472753686e-05, "loss": 0.9514, "step": 4831 }, { "epoch": 0.43174659905734136, "grad_norm": 0.40219366550445557, "learning_rate": 6.331678724614429e-05, "loss": 0.9928, "step": 4832 }, { "epoch": 0.4318359505886032, "grad_norm": 0.457727313041687, "learning_rate": 6.330283864932885e-05, "loss": 0.9433, "step": 4833 }, { "epoch": 0.4319253021198651, "grad_norm": 0.4098197817802429, "learning_rate": 6.328888893825888e-05, "loss": 1.0253, "step": 4834 }, { "epoch": 0.43201465365112696, "grad_norm": 0.4684712886810303, "learning_rate": 6.32749381141028e-05, "loss": 0.9667, "step": 4835 }, { "epoch": 0.43210400518238884, "grad_norm": 0.45592206716537476, "learning_rate": 6.326098617802917e-05, "loss": 0.9459, "step": 4836 }, { "epoch": 0.43219335671365067, "grad_norm": 0.4041098952293396, "learning_rate": 6.324703313120659e-05, "loss": 1.0305, "step": 4837 }, { "epoch": 0.43228270824491255, "grad_norm": 0.44690701365470886, "learning_rate": 6.323307897480376e-05, "loss": 0.9919, "step": 4838 }, { "epoch": 0.43237205977617443, "grad_norm": 0.4994511008262634, "learning_rate": 6.321912370998952e-05, "loss": 0.921, "step": 4839 }, { "epoch": 0.43246141130743626, "grad_norm": 0.4134766757488251, "learning_rate": 6.320516733793278e-05, "loss": 0.9395, "step": 4840 }, { "epoch": 0.43255076283869814, "grad_norm": 0.517279863357544, "learning_rate": 6.319120985980251e-05, "loss": 0.9349, "step": 4841 }, { "epoch": 0.43264011436996, "grad_norm": 0.5020187497138977, "learning_rate": 6.317725127676781e-05, "loss": 0.961, "step": 4842 }, { "epoch": 0.43272946590122185, "grad_norm": 0.4880422353744507, "learning_rate": 6.316329158999784e-05, "loss": 0.9669, "step": 4843 }, { "epoch": 0.43281881743248374, "grad_norm": 0.3964580297470093, "learning_rate": 6.31493308006619e-05, "loss": 0.9649, "step": 4844 }, { "epoch": 0.4329081689637456, "grad_norm": 0.37571898102760315, "learning_rate": 6.313536890992935e-05, "loss": 1.0031, "step": 4845 }, { "epoch": 0.4329975204950075, "grad_norm": 0.45982518792152405, "learning_rate": 6.312140591896964e-05, "loss": 0.9339, "step": 4846 }, { "epoch": 0.43308687202626933, "grad_norm": 0.4254016876220703, "learning_rate": 6.310744182895231e-05, "loss": 1.0641, "step": 4847 }, { "epoch": 0.4331762235575312, "grad_norm": 0.43494075536727905, "learning_rate": 6.309347664104701e-05, "loss": 0.9164, "step": 4848 }, { "epoch": 0.4332655750887931, "grad_norm": 0.451556921005249, "learning_rate": 6.307951035642349e-05, "loss": 1.0389, "step": 4849 }, { "epoch": 0.4333549266200549, "grad_norm": 0.34367331862449646, "learning_rate": 6.306554297625156e-05, "loss": 1.0079, "step": 4850 }, { "epoch": 0.4334442781513168, "grad_norm": 0.5219529271125793, "learning_rate": 6.305157450170111e-05, "loss": 0.8767, "step": 4851 }, { "epoch": 0.4335336296825787, "grad_norm": 0.36903488636016846, "learning_rate": 6.303760493394221e-05, "loss": 0.9979, "step": 4852 }, { "epoch": 0.4336229812138406, "grad_norm": 0.37834039330482483, "learning_rate": 6.302363427414491e-05, "loss": 0.9771, "step": 4853 }, { "epoch": 0.4337123327451024, "grad_norm": 0.4161636233329773, "learning_rate": 6.300966252347942e-05, "loss": 0.9805, "step": 4854 }, { "epoch": 0.4338016842763643, "grad_norm": 0.5334766507148743, "learning_rate": 6.299568968311601e-05, "loss": 0.9568, "step": 4855 }, { "epoch": 0.43389103580762617, "grad_norm": 0.4917714595794678, "learning_rate": 6.298171575422508e-05, "loss": 0.917, "step": 4856 }, { "epoch": 0.433980387338888, "grad_norm": 0.42037853598594666, "learning_rate": 6.296774073797708e-05, "loss": 0.9624, "step": 4857 }, { "epoch": 0.4340697388701499, "grad_norm": 0.3785925805568695, "learning_rate": 6.295376463554255e-05, "loss": 1.0437, "step": 4858 }, { "epoch": 0.43415909040141176, "grad_norm": 0.47149381041526794, "learning_rate": 6.293978744809217e-05, "loss": 0.9401, "step": 4859 }, { "epoch": 0.43424844193267365, "grad_norm": 0.4850710928440094, "learning_rate": 6.292580917679665e-05, "loss": 0.9874, "step": 4860 }, { "epoch": 0.4343377934639355, "grad_norm": 0.43360236287117004, "learning_rate": 6.291182982282685e-05, "loss": 0.9758, "step": 4861 }, { "epoch": 0.43442714499519736, "grad_norm": 0.41151541471481323, "learning_rate": 6.289784938735366e-05, "loss": 1.0028, "step": 4862 }, { "epoch": 0.43451649652645924, "grad_norm": 0.46998703479766846, "learning_rate": 6.28838678715481e-05, "loss": 0.9591, "step": 4863 }, { "epoch": 0.43460584805772107, "grad_norm": 0.5863698124885559, "learning_rate": 6.286988527658129e-05, "loss": 0.9661, "step": 4864 }, { "epoch": 0.43469519958898295, "grad_norm": 0.434731662273407, "learning_rate": 6.285590160362438e-05, "loss": 0.9454, "step": 4865 }, { "epoch": 0.43478455112024483, "grad_norm": 0.5044207572937012, "learning_rate": 6.28419168538487e-05, "loss": 0.9308, "step": 4866 }, { "epoch": 0.4348739026515067, "grad_norm": 0.4645664095878601, "learning_rate": 6.282793102842559e-05, "loss": 0.9077, "step": 4867 }, { "epoch": 0.43496325418276854, "grad_norm": 0.43170252442359924, "learning_rate": 6.281394412852652e-05, "loss": 1.0509, "step": 4868 }, { "epoch": 0.4350526057140304, "grad_norm": 0.41620826721191406, "learning_rate": 6.279995615532304e-05, "loss": 0.9653, "step": 4869 }, { "epoch": 0.4351419572452923, "grad_norm": 0.3965005874633789, "learning_rate": 6.27859671099868e-05, "loss": 0.9601, "step": 4870 }, { "epoch": 0.43523130877655414, "grad_norm": 0.3978107273578644, "learning_rate": 6.277197699368954e-05, "loss": 1.0369, "step": 4871 }, { "epoch": 0.435320660307816, "grad_norm": 0.42217400670051575, "learning_rate": 6.275798580760304e-05, "loss": 0.9626, "step": 4872 }, { "epoch": 0.4354100118390779, "grad_norm": 0.4375147223472595, "learning_rate": 6.274399355289923e-05, "loss": 0.9481, "step": 4873 }, { "epoch": 0.43549936337033973, "grad_norm": 0.4031033515930176, "learning_rate": 6.273000023075014e-05, "loss": 0.9599, "step": 4874 }, { "epoch": 0.4355887149016016, "grad_norm": 0.50283282995224, "learning_rate": 6.271600584232784e-05, "loss": 0.9854, "step": 4875 }, { "epoch": 0.4356780664328635, "grad_norm": 0.5138799548149109, "learning_rate": 6.27020103888045e-05, "loss": 1.0223, "step": 4876 }, { "epoch": 0.4357674179641254, "grad_norm": 0.37269532680511475, "learning_rate": 6.26880138713524e-05, "loss": 0.9626, "step": 4877 }, { "epoch": 0.4358567694953872, "grad_norm": 0.4185316264629364, "learning_rate": 6.267401629114389e-05, "loss": 0.9613, "step": 4878 }, { "epoch": 0.4359461210266491, "grad_norm": 0.41371724009513855, "learning_rate": 6.266001764935144e-05, "loss": 0.9528, "step": 4879 }, { "epoch": 0.436035472557911, "grad_norm": 0.392198383808136, "learning_rate": 6.264601794714753e-05, "loss": 0.9722, "step": 4880 }, { "epoch": 0.4361248240891728, "grad_norm": 0.39201459288597107, "learning_rate": 6.263201718570485e-05, "loss": 0.9175, "step": 4881 }, { "epoch": 0.4362141756204347, "grad_norm": 0.5743230581283569, "learning_rate": 6.261801536619607e-05, "loss": 0.9564, "step": 4882 }, { "epoch": 0.43630352715169657, "grad_norm": 0.5308464765548706, "learning_rate": 6.2604012489794e-05, "loss": 0.9499, "step": 4883 }, { "epoch": 0.43639287868295845, "grad_norm": 0.4258040487766266, "learning_rate": 6.259000855767155e-05, "loss": 0.9583, "step": 4884 }, { "epoch": 0.4364822302142203, "grad_norm": 0.45301637053489685, "learning_rate": 6.257600357100167e-05, "loss": 0.9621, "step": 4885 }, { "epoch": 0.43657158174548216, "grad_norm": 0.5080031752586365, "learning_rate": 6.256199753095745e-05, "loss": 0.9353, "step": 4886 }, { "epoch": 0.43666093327674405, "grad_norm": 0.41666001081466675, "learning_rate": 6.254799043871204e-05, "loss": 1.0159, "step": 4887 }, { "epoch": 0.4367502848080059, "grad_norm": 0.4304046928882599, "learning_rate": 6.253398229543867e-05, "loss": 0.9442, "step": 4888 }, { "epoch": 0.43683963633926776, "grad_norm": 0.41285210847854614, "learning_rate": 6.251997310231067e-05, "loss": 0.9862, "step": 4889 }, { "epoch": 0.43692898787052964, "grad_norm": 0.5112424492835999, "learning_rate": 6.250596286050148e-05, "loss": 0.9551, "step": 4890 }, { "epoch": 0.4370183394017915, "grad_norm": 0.46234673261642456, "learning_rate": 6.249195157118461e-05, "loss": 0.9087, "step": 4891 }, { "epoch": 0.43710769093305335, "grad_norm": 0.43776413798332214, "learning_rate": 6.247793923553362e-05, "loss": 0.9494, "step": 4892 }, { "epoch": 0.43719704246431523, "grad_norm": 0.42224618792533875, "learning_rate": 6.246392585472222e-05, "loss": 1.0397, "step": 4893 }, { "epoch": 0.4372863939955771, "grad_norm": 0.4742605984210968, "learning_rate": 6.244991142992417e-05, "loss": 0.9794, "step": 4894 }, { "epoch": 0.43737574552683894, "grad_norm": 0.5097817778587341, "learning_rate": 6.243589596231333e-05, "loss": 0.9319, "step": 4895 }, { "epoch": 0.4374650970581008, "grad_norm": 0.4433581829071045, "learning_rate": 6.242187945306364e-05, "loss": 0.9704, "step": 4896 }, { "epoch": 0.4375544485893627, "grad_norm": 0.3601015508174896, "learning_rate": 6.240786190334918e-05, "loss": 1.0209, "step": 4897 }, { "epoch": 0.4376438001206246, "grad_norm": 0.6255925893783569, "learning_rate": 6.239384331434399e-05, "loss": 0.986, "step": 4898 }, { "epoch": 0.4377331516518864, "grad_norm": 0.45729881525039673, "learning_rate": 6.237982368722232e-05, "loss": 1.0179, "step": 4899 }, { "epoch": 0.4378225031831483, "grad_norm": 0.41129839420318604, "learning_rate": 6.236580302315844e-05, "loss": 0.9502, "step": 4900 }, { "epoch": 0.4379118547144102, "grad_norm": 0.4457799792289734, "learning_rate": 6.235178132332677e-05, "loss": 0.9722, "step": 4901 }, { "epoch": 0.438001206245672, "grad_norm": 0.492878794670105, "learning_rate": 6.233775858890175e-05, "loss": 0.9167, "step": 4902 }, { "epoch": 0.4380905577769339, "grad_norm": 0.43004390597343445, "learning_rate": 6.232373482105794e-05, "loss": 0.9728, "step": 4903 }, { "epoch": 0.4381799093081958, "grad_norm": 0.4913460314273834, "learning_rate": 6.230971002096999e-05, "loss": 1.05, "step": 4904 }, { "epoch": 0.43826926083945766, "grad_norm": 0.47525662183761597, "learning_rate": 6.229568418981258e-05, "loss": 0.9822, "step": 4905 }, { "epoch": 0.4383586123707195, "grad_norm": 0.4017292857170105, "learning_rate": 6.228165732876056e-05, "loss": 0.9573, "step": 4906 }, { "epoch": 0.4384479639019814, "grad_norm": 0.41514596343040466, "learning_rate": 6.226762943898886e-05, "loss": 0.9435, "step": 4907 }, { "epoch": 0.43853731543324326, "grad_norm": 0.4426291286945343, "learning_rate": 6.22536005216724e-05, "loss": 1.0117, "step": 4908 }, { "epoch": 0.4386266669645051, "grad_norm": 0.42675042152404785, "learning_rate": 6.223957057798629e-05, "loss": 0.9363, "step": 4909 }, { "epoch": 0.43871601849576697, "grad_norm": 0.5054933428764343, "learning_rate": 6.222553960910567e-05, "loss": 0.961, "step": 4910 }, { "epoch": 0.43880537002702885, "grad_norm": 0.37240302562713623, "learning_rate": 6.221150761620581e-05, "loss": 1.098, "step": 4911 }, { "epoch": 0.4388947215582907, "grad_norm": 0.37764713168144226, "learning_rate": 6.219747460046203e-05, "loss": 0.9538, "step": 4912 }, { "epoch": 0.43898407308955256, "grad_norm": 0.49377158284187317, "learning_rate": 6.218344056304972e-05, "loss": 0.9257, "step": 4913 }, { "epoch": 0.43907342462081445, "grad_norm": 0.44281005859375, "learning_rate": 6.216940550514439e-05, "loss": 0.9663, "step": 4914 }, { "epoch": 0.43916277615207633, "grad_norm": 0.42006000876426697, "learning_rate": 6.215536942792163e-05, "loss": 0.9435, "step": 4915 }, { "epoch": 0.43925212768333816, "grad_norm": 0.468666136264801, "learning_rate": 6.214133233255713e-05, "loss": 0.9804, "step": 4916 }, { "epoch": 0.43934147921460004, "grad_norm": 0.3761788308620453, "learning_rate": 6.212729422022664e-05, "loss": 1.0003, "step": 4917 }, { "epoch": 0.4394308307458619, "grad_norm": 0.4250943064689636, "learning_rate": 6.211325509210597e-05, "loss": 0.9809, "step": 4918 }, { "epoch": 0.43952018227712375, "grad_norm": 0.40891769528388977, "learning_rate": 6.209921494937108e-05, "loss": 0.9841, "step": 4919 }, { "epoch": 0.43960953380838563, "grad_norm": 0.3647308945655823, "learning_rate": 6.208517379319796e-05, "loss": 0.9747, "step": 4920 }, { "epoch": 0.4396988853396475, "grad_norm": 0.4176364541053772, "learning_rate": 6.207113162476272e-05, "loss": 0.9336, "step": 4921 }, { "epoch": 0.4397882368709094, "grad_norm": 0.40200045704841614, "learning_rate": 6.205708844524153e-05, "loss": 1.0223, "step": 4922 }, { "epoch": 0.4398775884021712, "grad_norm": 0.4894162714481354, "learning_rate": 6.204304425581069e-05, "loss": 0.9348, "step": 4923 }, { "epoch": 0.4399669399334331, "grad_norm": 0.4525613784790039, "learning_rate": 6.20289990576465e-05, "loss": 0.941, "step": 4924 }, { "epoch": 0.440056291464695, "grad_norm": 0.4009491205215454, "learning_rate": 6.201495285192542e-05, "loss": 1.0821, "step": 4925 }, { "epoch": 0.4401456429959568, "grad_norm": 0.4592028558254242, "learning_rate": 6.200090563982397e-05, "loss": 0.9359, "step": 4926 }, { "epoch": 0.4402349945272187, "grad_norm": 0.45449405908584595, "learning_rate": 6.198685742251877e-05, "loss": 0.9356, "step": 4927 }, { "epoch": 0.4403243460584806, "grad_norm": 0.6728967428207397, "learning_rate": 6.197280820118646e-05, "loss": 1.0374, "step": 4928 }, { "epoch": 0.44041369758974247, "grad_norm": 0.5596892833709717, "learning_rate": 6.195875797700385e-05, "loss": 0.9524, "step": 4929 }, { "epoch": 0.4405030491210043, "grad_norm": 0.4790952801704407, "learning_rate": 6.19447067511478e-05, "loss": 1.0068, "step": 4930 }, { "epoch": 0.4405924006522662, "grad_norm": 0.4227498173713684, "learning_rate": 6.193065452479523e-05, "loss": 1.0209, "step": 4931 }, { "epoch": 0.44068175218352806, "grad_norm": 0.4277952015399933, "learning_rate": 6.191660129912317e-05, "loss": 0.8967, "step": 4932 }, { "epoch": 0.4407711037147899, "grad_norm": 0.4384140074253082, "learning_rate": 6.190254707530874e-05, "loss": 0.9636, "step": 4933 }, { "epoch": 0.4408604552460518, "grad_norm": 0.4057149589061737, "learning_rate": 6.188849185452911e-05, "loss": 1.0107, "step": 4934 }, { "epoch": 0.44094980677731366, "grad_norm": 0.43641364574432373, "learning_rate": 6.187443563796157e-05, "loss": 0.9296, "step": 4935 }, { "epoch": 0.44103915830857554, "grad_norm": 0.4788879156112671, "learning_rate": 6.186037842678349e-05, "loss": 0.9666, "step": 4936 }, { "epoch": 0.44112850983983737, "grad_norm": 0.3350755572319031, "learning_rate": 6.184632022217227e-05, "loss": 1.0117, "step": 4937 }, { "epoch": 0.44121786137109925, "grad_norm": 0.4167011082172394, "learning_rate": 6.183226102530547e-05, "loss": 1.023, "step": 4938 }, { "epoch": 0.44130721290236113, "grad_norm": 0.47220227122306824, "learning_rate": 6.181820083736067e-05, "loss": 1.0494, "step": 4939 }, { "epoch": 0.44139656443362296, "grad_norm": 0.4266274571418762, "learning_rate": 6.18041396595156e-05, "loss": 0.9815, "step": 4940 }, { "epoch": 0.44148591596488485, "grad_norm": 0.4164573848247528, "learning_rate": 6.1790077492948e-05, "loss": 1.0067, "step": 4941 }, { "epoch": 0.44157526749614673, "grad_norm": 0.4291037917137146, "learning_rate": 6.177601433883573e-05, "loss": 0.9725, "step": 4942 }, { "epoch": 0.44166461902740856, "grad_norm": 0.49879229068756104, "learning_rate": 6.176195019835674e-05, "loss": 0.9782, "step": 4943 }, { "epoch": 0.44175397055867044, "grad_norm": 0.5074213743209839, "learning_rate": 6.174788507268905e-05, "loss": 0.9614, "step": 4944 }, { "epoch": 0.4418433220899323, "grad_norm": 0.39095085859298706, "learning_rate": 6.173381896301076e-05, "loss": 0.9515, "step": 4945 }, { "epoch": 0.4419326736211942, "grad_norm": 0.40660542249679565, "learning_rate": 6.171975187050005e-05, "loss": 0.9768, "step": 4946 }, { "epoch": 0.44202202515245603, "grad_norm": 0.6411069631576538, "learning_rate": 6.170568379633522e-05, "loss": 1.0163, "step": 4947 }, { "epoch": 0.4421113766837179, "grad_norm": 0.4861661195755005, "learning_rate": 6.169161474169458e-05, "loss": 0.9521, "step": 4948 }, { "epoch": 0.4422007282149798, "grad_norm": 0.4027434289455414, "learning_rate": 6.167754470775659e-05, "loss": 0.9752, "step": 4949 }, { "epoch": 0.4422900797462416, "grad_norm": 0.46598201990127563, "learning_rate": 6.166347369569975e-05, "loss": 0.9576, "step": 4950 }, { "epoch": 0.4423794312775035, "grad_norm": 0.4167962372303009, "learning_rate": 6.164940170670266e-05, "loss": 0.9705, "step": 4951 }, { "epoch": 0.4424687828087654, "grad_norm": 0.4604121744632721, "learning_rate": 6.163532874194401e-05, "loss": 0.9491, "step": 4952 }, { "epoch": 0.4425581343400273, "grad_norm": 0.4519181549549103, "learning_rate": 6.162125480260257e-05, "loss": 0.9529, "step": 4953 }, { "epoch": 0.4426474858712891, "grad_norm": 0.3763716220855713, "learning_rate": 6.160717988985714e-05, "loss": 0.962, "step": 4954 }, { "epoch": 0.442736837402551, "grad_norm": 0.3946007788181305, "learning_rate": 6.15931040048867e-05, "loss": 1.0416, "step": 4955 }, { "epoch": 0.44282618893381287, "grad_norm": 0.4197639226913452, "learning_rate": 6.15790271488702e-05, "loss": 1.05, "step": 4956 }, { "epoch": 0.4429155404650747, "grad_norm": 0.40052515268325806, "learning_rate": 6.156494932298678e-05, "loss": 0.95, "step": 4957 }, { "epoch": 0.4430048919963366, "grad_norm": 0.4258505702018738, "learning_rate": 6.155087052841555e-05, "loss": 0.9829, "step": 4958 }, { "epoch": 0.44309424352759846, "grad_norm": 0.41184738278388977, "learning_rate": 6.153679076633581e-05, "loss": 0.9795, "step": 4959 }, { "epoch": 0.44318359505886035, "grad_norm": 0.5452486872673035, "learning_rate": 6.152271003792686e-05, "loss": 0.8949, "step": 4960 }, { "epoch": 0.4432729465901222, "grad_norm": 0.47022590041160583, "learning_rate": 6.150862834436811e-05, "loss": 0.9618, "step": 4961 }, { "epoch": 0.44336229812138406, "grad_norm": 0.4404926300048828, "learning_rate": 6.149454568683909e-05, "loss": 1.0751, "step": 4962 }, { "epoch": 0.44345164965264594, "grad_norm": 0.4911143481731415, "learning_rate": 6.148046206651932e-05, "loss": 0.9599, "step": 4963 }, { "epoch": 0.44354100118390777, "grad_norm": 0.4653252959251404, "learning_rate": 6.146637748458849e-05, "loss": 1.0376, "step": 4964 }, { "epoch": 0.44363035271516965, "grad_norm": 0.5367299914360046, "learning_rate": 6.145229194222633e-05, "loss": 0.9954, "step": 4965 }, { "epoch": 0.44371970424643153, "grad_norm": 0.39032191038131714, "learning_rate": 6.143820544061263e-05, "loss": 0.9678, "step": 4966 }, { "epoch": 0.4438090557776934, "grad_norm": 0.46807265281677246, "learning_rate": 6.142411798092731e-05, "loss": 0.9434, "step": 4967 }, { "epoch": 0.44389840730895525, "grad_norm": 0.3878948390483856, "learning_rate": 6.141002956435034e-05, "loss": 0.9584, "step": 4968 }, { "epoch": 0.44398775884021713, "grad_norm": 0.4154819846153259, "learning_rate": 6.139594019206178e-05, "loss": 0.9433, "step": 4969 }, { "epoch": 0.444077110371479, "grad_norm": 0.4845220744609833, "learning_rate": 6.138184986524175e-05, "loss": 1.0408, "step": 4970 }, { "epoch": 0.44416646190274084, "grad_norm": 0.4787101149559021, "learning_rate": 6.136775858507046e-05, "loss": 0.9348, "step": 4971 }, { "epoch": 0.4442558134340027, "grad_norm": 0.4312731921672821, "learning_rate": 6.135366635272824e-05, "loss": 0.9757, "step": 4972 }, { "epoch": 0.4443451649652646, "grad_norm": 0.4321444034576416, "learning_rate": 6.133957316939543e-05, "loss": 0.9491, "step": 4973 }, { "epoch": 0.44443451649652643, "grad_norm": 0.5200006365776062, "learning_rate": 6.132547903625249e-05, "loss": 0.897, "step": 4974 }, { "epoch": 0.4445238680277883, "grad_norm": 0.44951632618904114, "learning_rate": 6.131138395447997e-05, "loss": 0.991, "step": 4975 }, { "epoch": 0.4446132195590502, "grad_norm": 0.556250274181366, "learning_rate": 6.129728792525846e-05, "loss": 0.9118, "step": 4976 }, { "epoch": 0.4447025710903121, "grad_norm": 0.5766691565513611, "learning_rate": 6.128319094976868e-05, "loss": 0.9496, "step": 4977 }, { "epoch": 0.4447919226215739, "grad_norm": 0.472476601600647, "learning_rate": 6.126909302919138e-05, "loss": 0.9091, "step": 4978 }, { "epoch": 0.4448812741528358, "grad_norm": 0.41769498586654663, "learning_rate": 6.125499416470742e-05, "loss": 1.0127, "step": 4979 }, { "epoch": 0.4449706256840977, "grad_norm": 0.44769975543022156, "learning_rate": 6.124089435749772e-05, "loss": 0.9084, "step": 4980 }, { "epoch": 0.4450599772153595, "grad_norm": 0.42755383253097534, "learning_rate": 6.122679360874331e-05, "loss": 1.0314, "step": 4981 }, { "epoch": 0.4451493287466214, "grad_norm": 0.4499891698360443, "learning_rate": 6.121269191962527e-05, "loss": 0.9204, "step": 4982 }, { "epoch": 0.44523868027788327, "grad_norm": 0.5186624526977539, "learning_rate": 6.119858929132475e-05, "loss": 0.973, "step": 4983 }, { "epoch": 0.44532803180914515, "grad_norm": 0.43319734930992126, "learning_rate": 6.118448572502302e-05, "loss": 1.0126, "step": 4984 }, { "epoch": 0.445417383340407, "grad_norm": 0.41849610209465027, "learning_rate": 6.117038122190139e-05, "loss": 1.0017, "step": 4985 }, { "epoch": 0.44550673487166886, "grad_norm": 0.5016182065010071, "learning_rate": 6.115627578314125e-05, "loss": 0.9203, "step": 4986 }, { "epoch": 0.44559608640293075, "grad_norm": 0.47967931628227234, "learning_rate": 6.114216940992411e-05, "loss": 0.9842, "step": 4987 }, { "epoch": 0.4456854379341926, "grad_norm": 0.4858255088329315, "learning_rate": 6.112806210343152e-05, "loss": 0.9719, "step": 4988 }, { "epoch": 0.44577478946545446, "grad_norm": 0.5705578327178955, "learning_rate": 6.111395386484511e-05, "loss": 0.8807, "step": 4989 }, { "epoch": 0.44586414099671634, "grad_norm": 0.4477182626724243, "learning_rate": 6.109984469534659e-05, "loss": 0.9386, "step": 4990 }, { "epoch": 0.4459534925279782, "grad_norm": 0.4489213824272156, "learning_rate": 6.108573459611776e-05, "loss": 1.0481, "step": 4991 }, { "epoch": 0.44604284405924005, "grad_norm": 0.36626997590065, "learning_rate": 6.10716235683405e-05, "loss": 0.908, "step": 4992 }, { "epoch": 0.44613219559050193, "grad_norm": 0.409940630197525, "learning_rate": 6.105751161319675e-05, "loss": 0.9784, "step": 4993 }, { "epoch": 0.4462215471217638, "grad_norm": 0.42009782791137695, "learning_rate": 6.104339873186855e-05, "loss": 1.0009, "step": 4994 }, { "epoch": 0.44631089865302565, "grad_norm": 0.4435548186302185, "learning_rate": 6.102928492553796e-05, "loss": 1.0153, "step": 4995 }, { "epoch": 0.44640025018428753, "grad_norm": 0.574257493019104, "learning_rate": 6.101517019538721e-05, "loss": 0.8759, "step": 4996 }, { "epoch": 0.4464896017155494, "grad_norm": 0.43628671765327454, "learning_rate": 6.1001054542598534e-05, "loss": 0.9219, "step": 4997 }, { "epoch": 0.4465789532468113, "grad_norm": 0.3821120262145996, "learning_rate": 6.0986937968354295e-05, "loss": 1.0096, "step": 4998 }, { "epoch": 0.4466683047780731, "grad_norm": 0.41857802867889404, "learning_rate": 6.097282047383688e-05, "loss": 0.9917, "step": 4999 }, { "epoch": 0.446757656309335, "grad_norm": 0.6450624465942383, "learning_rate": 6.095870206022879e-05, "loss": 0.8897, "step": 5000 }, { "epoch": 0.4468470078405969, "grad_norm": 0.4506264925003052, "learning_rate": 6.0944582728712585e-05, "loss": 0.9337, "step": 5001 }, { "epoch": 0.4469363593718587, "grad_norm": 0.5254955887794495, "learning_rate": 6.093046248047092e-05, "loss": 0.9374, "step": 5002 }, { "epoch": 0.4470257109031206, "grad_norm": 0.4589659869670868, "learning_rate": 6.091634131668652e-05, "loss": 0.9522, "step": 5003 }, { "epoch": 0.4471150624343825, "grad_norm": 0.5653790831565857, "learning_rate": 6.090221923854217e-05, "loss": 0.9442, "step": 5004 }, { "epoch": 0.4472044139656443, "grad_norm": 0.4403340220451355, "learning_rate": 6.088809624722074e-05, "loss": 0.9674, "step": 5005 }, { "epoch": 0.4472937654969062, "grad_norm": 0.5026565790176392, "learning_rate": 6.0873972343905206e-05, "loss": 0.9168, "step": 5006 }, { "epoch": 0.4473831170281681, "grad_norm": 0.4866418242454529, "learning_rate": 6.085984752977857e-05, "loss": 0.9227, "step": 5007 }, { "epoch": 0.44747246855942996, "grad_norm": 0.43236756324768066, "learning_rate": 6.0845721806023945e-05, "loss": 0.9651, "step": 5008 }, { "epoch": 0.4475618200906918, "grad_norm": 0.49595317244529724, "learning_rate": 6.083159517382452e-05, "loss": 0.8973, "step": 5009 }, { "epoch": 0.44765117162195367, "grad_norm": 0.3765192925930023, "learning_rate": 6.0817467634363535e-05, "loss": 0.9991, "step": 5010 }, { "epoch": 0.44774052315321555, "grad_norm": 0.49171602725982666, "learning_rate": 6.0803339188824326e-05, "loss": 1.0389, "step": 5011 }, { "epoch": 0.4478298746844774, "grad_norm": 0.47733286023139954, "learning_rate": 6.078920983839031e-05, "loss": 0.974, "step": 5012 }, { "epoch": 0.44791922621573926, "grad_norm": 0.5364671945571899, "learning_rate": 6.0775079584244976e-05, "loss": 0.957, "step": 5013 }, { "epoch": 0.44800857774700115, "grad_norm": 0.5192524790763855, "learning_rate": 6.076094842757185e-05, "loss": 0.8715, "step": 5014 }, { "epoch": 0.44809792927826303, "grad_norm": 0.38916611671447754, "learning_rate": 6.07468163695546e-05, "loss": 1.0494, "step": 5015 }, { "epoch": 0.44818728080952486, "grad_norm": 0.4378674030303955, "learning_rate": 6.0732683411376935e-05, "loss": 1.0302, "step": 5016 }, { "epoch": 0.44827663234078674, "grad_norm": 0.4529155492782593, "learning_rate": 6.0718549554222614e-05, "loss": 0.9895, "step": 5017 }, { "epoch": 0.4483659838720486, "grad_norm": 0.4706246256828308, "learning_rate": 6.070441479927554e-05, "loss": 0.9995, "step": 5018 }, { "epoch": 0.44845533540331045, "grad_norm": 0.515957772731781, "learning_rate": 6.069027914771961e-05, "loss": 0.898, "step": 5019 }, { "epoch": 0.44854468693457233, "grad_norm": 0.43126967549324036, "learning_rate": 6.067614260073885e-05, "loss": 1.0187, "step": 5020 }, { "epoch": 0.4486340384658342, "grad_norm": 0.44768694043159485, "learning_rate": 6.0662005159517354e-05, "loss": 1.047, "step": 5021 }, { "epoch": 0.4487233899970961, "grad_norm": 0.4717482328414917, "learning_rate": 6.064786682523928e-05, "loss": 0.9254, "step": 5022 }, { "epoch": 0.44881274152835793, "grad_norm": 0.40896281599998474, "learning_rate": 6.063372759908885e-05, "loss": 0.9374, "step": 5023 }, { "epoch": 0.4489020930596198, "grad_norm": 0.43339207768440247, "learning_rate": 6.06195874822504e-05, "loss": 0.9764, "step": 5024 }, { "epoch": 0.4489914445908817, "grad_norm": 0.4062994718551636, "learning_rate": 6.06054464759083e-05, "loss": 0.9822, "step": 5025 }, { "epoch": 0.4490807961221435, "grad_norm": 0.4177139401435852, "learning_rate": 6.0591304581247e-05, "loss": 1.0045, "step": 5026 }, { "epoch": 0.4491701476534054, "grad_norm": 0.46026477217674255, "learning_rate": 6.0577161799451054e-05, "loss": 0.9932, "step": 5027 }, { "epoch": 0.4492594991846673, "grad_norm": 0.3944912552833557, "learning_rate": 6.0563018131705063e-05, "loss": 1.0112, "step": 5028 }, { "epoch": 0.44934885071592917, "grad_norm": 0.49842584133148193, "learning_rate": 6.054887357919371e-05, "loss": 1.0421, "step": 5029 }, { "epoch": 0.449438202247191, "grad_norm": 0.4486824572086334, "learning_rate": 6.053472814310175e-05, "loss": 0.9671, "step": 5030 }, { "epoch": 0.4495275537784529, "grad_norm": 0.41824930906295776, "learning_rate": 6.052058182461401e-05, "loss": 0.9581, "step": 5031 }, { "epoch": 0.44961690530971476, "grad_norm": 0.500913143157959, "learning_rate": 6.0506434624915396e-05, "loss": 0.9364, "step": 5032 }, { "epoch": 0.4497062568409766, "grad_norm": 0.4475401043891907, "learning_rate": 6.049228654519091e-05, "loss": 0.9434, "step": 5033 }, { "epoch": 0.4497956083722385, "grad_norm": 0.47360408306121826, "learning_rate": 6.0478137586625584e-05, "loss": 0.959, "step": 5034 }, { "epoch": 0.44988495990350036, "grad_norm": 0.4704830050468445, "learning_rate": 6.046398775040454e-05, "loss": 0.9246, "step": 5035 }, { "epoch": 0.4499743114347622, "grad_norm": 0.41631588339805603, "learning_rate": 6.0449837037712976e-05, "loss": 1.0194, "step": 5036 }, { "epoch": 0.45006366296602407, "grad_norm": 0.47200682759284973, "learning_rate": 6.043568544973618e-05, "loss": 0.9078, "step": 5037 }, { "epoch": 0.45015301449728595, "grad_norm": 0.4732605516910553, "learning_rate": 6.0421532987659504e-05, "loss": 1.0336, "step": 5038 }, { "epoch": 0.45024236602854784, "grad_norm": 0.4652157425880432, "learning_rate": 6.040737965266834e-05, "loss": 0.9257, "step": 5039 }, { "epoch": 0.45033171755980966, "grad_norm": 0.5224878191947937, "learning_rate": 6.039322544594819e-05, "loss": 0.9199, "step": 5040 }, { "epoch": 0.45042106909107155, "grad_norm": 0.4996034801006317, "learning_rate": 6.037907036868464e-05, "loss": 1.0375, "step": 5041 }, { "epoch": 0.45051042062233343, "grad_norm": 0.4124327600002289, "learning_rate": 6.0364914422063304e-05, "loss": 0.9245, "step": 5042 }, { "epoch": 0.45059977215359526, "grad_norm": 0.441354900598526, "learning_rate": 6.0350757607269904e-05, "loss": 0.9574, "step": 5043 }, { "epoch": 0.45068912368485714, "grad_norm": 0.4461125433444977, "learning_rate": 6.033659992549023e-05, "loss": 0.8827, "step": 5044 }, { "epoch": 0.450778475216119, "grad_norm": 0.3978963792324066, "learning_rate": 6.0322441377910135e-05, "loss": 0.9883, "step": 5045 }, { "epoch": 0.4508678267473809, "grad_norm": 0.4818345010280609, "learning_rate": 6.030828196571553e-05, "loss": 0.9646, "step": 5046 }, { "epoch": 0.45095717827864273, "grad_norm": 0.5546659231185913, "learning_rate": 6.029412169009243e-05, "loss": 0.9554, "step": 5047 }, { "epoch": 0.4510465298099046, "grad_norm": 0.4250444173812866, "learning_rate": 6.0279960552226934e-05, "loss": 0.9748, "step": 5048 }, { "epoch": 0.4511358813411665, "grad_norm": 0.44978564977645874, "learning_rate": 6.026579855330515e-05, "loss": 0.9755, "step": 5049 }, { "epoch": 0.45122523287242833, "grad_norm": 0.4644257426261902, "learning_rate": 6.025163569451331e-05, "loss": 1.003, "step": 5050 }, { "epoch": 0.4513145844036902, "grad_norm": 0.41228753328323364, "learning_rate": 6.0237471977037706e-05, "loss": 1.0315, "step": 5051 }, { "epoch": 0.4514039359349521, "grad_norm": 0.5227345824241638, "learning_rate": 6.02233074020647e-05, "loss": 0.8891, "step": 5052 }, { "epoch": 0.451493287466214, "grad_norm": 0.46746036410331726, "learning_rate": 6.020914197078074e-05, "loss": 0.9664, "step": 5053 }, { "epoch": 0.4515826389974758, "grad_norm": 0.3485402762889862, "learning_rate": 6.01949756843723e-05, "loss": 1.0188, "step": 5054 }, { "epoch": 0.4516719905287377, "grad_norm": 0.4057181179523468, "learning_rate": 6.018080854402599e-05, "loss": 0.9448, "step": 5055 }, { "epoch": 0.45176134205999957, "grad_norm": 0.39723464846611023, "learning_rate": 6.0166640550928434e-05, "loss": 0.97, "step": 5056 }, { "epoch": 0.4518506935912614, "grad_norm": 0.3836243152618408, "learning_rate": 6.015247170626637e-05, "loss": 0.9494, "step": 5057 }, { "epoch": 0.4519400451225233, "grad_norm": 0.4795580208301544, "learning_rate": 6.013830201122659e-05, "loss": 0.9054, "step": 5058 }, { "epoch": 0.45202939665378516, "grad_norm": 0.4811798930168152, "learning_rate": 6.0124131466995936e-05, "loss": 0.9832, "step": 5059 }, { "epoch": 0.45211874818504705, "grad_norm": 0.41265878081321716, "learning_rate": 6.0109960074761374e-05, "loss": 0.9763, "step": 5060 }, { "epoch": 0.4522080997163089, "grad_norm": 0.4164207875728607, "learning_rate": 6.009578783570987e-05, "loss": 0.9297, "step": 5061 }, { "epoch": 0.45229745124757076, "grad_norm": 0.392967164516449, "learning_rate": 6.008161475102853e-05, "loss": 0.953, "step": 5062 }, { "epoch": 0.45238680277883264, "grad_norm": 0.42946505546569824, "learning_rate": 6.00674408219045e-05, "loss": 0.979, "step": 5063 }, { "epoch": 0.45247615431009447, "grad_norm": 0.47703489661216736, "learning_rate": 6.005326604952498e-05, "loss": 0.9656, "step": 5064 }, { "epoch": 0.45256550584135635, "grad_norm": 0.4116286635398865, "learning_rate": 6.003909043507727e-05, "loss": 0.943, "step": 5065 }, { "epoch": 0.45265485737261824, "grad_norm": 0.4779094159603119, "learning_rate": 6.002491397974872e-05, "loss": 0.9066, "step": 5066 }, { "epoch": 0.45274420890388006, "grad_norm": 0.4313408136367798, "learning_rate": 6.0010736684726774e-05, "loss": 1.01, "step": 5067 }, { "epoch": 0.45283356043514195, "grad_norm": 0.4300564229488373, "learning_rate": 5.999655855119893e-05, "loss": 0.9609, "step": 5068 }, { "epoch": 0.45292291196640383, "grad_norm": 0.4585869610309601, "learning_rate": 5.998237958035274e-05, "loss": 0.9914, "step": 5069 }, { "epoch": 0.4530122634976657, "grad_norm": 0.5256367325782776, "learning_rate": 5.996819977337587e-05, "loss": 0.9232, "step": 5070 }, { "epoch": 0.45310161502892754, "grad_norm": 0.38605356216430664, "learning_rate": 5.9954019131456e-05, "loss": 1.0481, "step": 5071 }, { "epoch": 0.4531909665601894, "grad_norm": 0.41398924589157104, "learning_rate": 5.993983765578093e-05, "loss": 1.0571, "step": 5072 }, { "epoch": 0.4532803180914513, "grad_norm": 0.45383667945861816, "learning_rate": 5.9925655347538504e-05, "loss": 0.9514, "step": 5073 }, { "epoch": 0.45336966962271313, "grad_norm": 0.5158712863922119, "learning_rate": 5.991147220791665e-05, "loss": 0.8985, "step": 5074 }, { "epoch": 0.453459021153975, "grad_norm": 0.4722534716129303, "learning_rate": 5.989728823810335e-05, "loss": 0.9492, "step": 5075 }, { "epoch": 0.4535483726852369, "grad_norm": 0.517153799533844, "learning_rate": 5.9883103439286646e-05, "loss": 0.9614, "step": 5076 }, { "epoch": 0.4536377242164988, "grad_norm": 0.4288327097892761, "learning_rate": 5.986891781265471e-05, "loss": 0.9977, "step": 5077 }, { "epoch": 0.4537270757477606, "grad_norm": 0.3851867616176605, "learning_rate": 5.98547313593957e-05, "loss": 0.9741, "step": 5078 }, { "epoch": 0.4538164272790225, "grad_norm": 0.41952255368232727, "learning_rate": 5.9840544080697904e-05, "loss": 0.901, "step": 5079 }, { "epoch": 0.4539057788102844, "grad_norm": 0.4248369634151459, "learning_rate": 5.9826355977749624e-05, "loss": 1.004, "step": 5080 }, { "epoch": 0.4539951303415462, "grad_norm": 0.3857657015323639, "learning_rate": 5.98121670517393e-05, "loss": 1.0764, "step": 5081 }, { "epoch": 0.4540844818728081, "grad_norm": 0.4897959232330322, "learning_rate": 5.97979773038554e-05, "loss": 0.9433, "step": 5082 }, { "epoch": 0.45417383340406997, "grad_norm": 0.4853098392486572, "learning_rate": 5.978378673528645e-05, "loss": 1.0143, "step": 5083 }, { "epoch": 0.45426318493533185, "grad_norm": 0.3813380300998688, "learning_rate": 5.9769595347221096e-05, "loss": 0.991, "step": 5084 }, { "epoch": 0.4543525364665937, "grad_norm": 0.3379877507686615, "learning_rate": 5.9755403140847974e-05, "loss": 1.0139, "step": 5085 }, { "epoch": 0.45444188799785556, "grad_norm": 0.41478732228279114, "learning_rate": 5.974121011735586e-05, "loss": 0.9286, "step": 5086 }, { "epoch": 0.45453123952911745, "grad_norm": 0.4362654685974121, "learning_rate": 5.972701627793357e-05, "loss": 0.9713, "step": 5087 }, { "epoch": 0.4546205910603793, "grad_norm": 0.44205188751220703, "learning_rate": 5.9712821623769976e-05, "loss": 0.9442, "step": 5088 }, { "epoch": 0.45470994259164116, "grad_norm": 0.4520505666732788, "learning_rate": 5.969862615605405e-05, "loss": 0.9221, "step": 5089 }, { "epoch": 0.45479929412290304, "grad_norm": 0.3804233968257904, "learning_rate": 5.96844298759748e-05, "loss": 0.9983, "step": 5090 }, { "epoch": 0.4548886456541649, "grad_norm": 0.45884522795677185, "learning_rate": 5.967023278472131e-05, "loss": 0.9408, "step": 5091 }, { "epoch": 0.45497799718542675, "grad_norm": 0.5373643040657043, "learning_rate": 5.965603488348276e-05, "loss": 0.9206, "step": 5092 }, { "epoch": 0.45506734871668864, "grad_norm": 0.4332393705844879, "learning_rate": 5.964183617344836e-05, "loss": 0.9796, "step": 5093 }, { "epoch": 0.4551567002479505, "grad_norm": 0.5239064693450928, "learning_rate": 5.962763665580741e-05, "loss": 0.9123, "step": 5094 }, { "epoch": 0.45524605177921235, "grad_norm": 0.41141557693481445, "learning_rate": 5.961343633174926e-05, "loss": 0.9346, "step": 5095 }, { "epoch": 0.45533540331047423, "grad_norm": 0.43024352192878723, "learning_rate": 5.9599235202463344e-05, "loss": 0.9898, "step": 5096 }, { "epoch": 0.4554247548417361, "grad_norm": 0.4978499412536621, "learning_rate": 5.958503326913916e-05, "loss": 0.9262, "step": 5097 }, { "epoch": 0.45551410637299794, "grad_norm": 0.37708181142807007, "learning_rate": 5.957083053296626e-05, "loss": 1.0786, "step": 5098 }, { "epoch": 0.4556034579042598, "grad_norm": 0.42339766025543213, "learning_rate": 5.955662699513431e-05, "loss": 0.9934, "step": 5099 }, { "epoch": 0.4556928094355217, "grad_norm": 0.515396773815155, "learning_rate": 5.954242265683296e-05, "loss": 0.9361, "step": 5100 }, { "epoch": 0.4557821609667836, "grad_norm": 0.502129316329956, "learning_rate": 5.9528217519252003e-05, "loss": 0.9834, "step": 5101 }, { "epoch": 0.4558715124980454, "grad_norm": 0.44378775358200073, "learning_rate": 5.9514011583581275e-05, "loss": 0.9662, "step": 5102 }, { "epoch": 0.4559608640293073, "grad_norm": 0.4403262436389923, "learning_rate": 5.9499804851010655e-05, "loss": 0.986, "step": 5103 }, { "epoch": 0.4560502155605692, "grad_norm": 0.38599881529808044, "learning_rate": 5.948559732273013e-05, "loss": 1.0328, "step": 5104 }, { "epoch": 0.456139567091831, "grad_norm": 0.5018365979194641, "learning_rate": 5.9471388999929714e-05, "loss": 0.9439, "step": 5105 }, { "epoch": 0.4562289186230929, "grad_norm": 0.4095374047756195, "learning_rate": 5.9457179883799496e-05, "loss": 0.9718, "step": 5106 }, { "epoch": 0.4563182701543548, "grad_norm": 0.49552807211875916, "learning_rate": 5.944296997552967e-05, "loss": 0.9708, "step": 5107 }, { "epoch": 0.45640762168561666, "grad_norm": 0.45015573501586914, "learning_rate": 5.942875927631045e-05, "loss": 0.9136, "step": 5108 }, { "epoch": 0.4564969732168785, "grad_norm": 0.4981837868690491, "learning_rate": 5.9414547787332155e-05, "loss": 0.8726, "step": 5109 }, { "epoch": 0.45658632474814037, "grad_norm": 0.38596221804618835, "learning_rate": 5.940033550978511e-05, "loss": 1.0169, "step": 5110 }, { "epoch": 0.45667567627940225, "grad_norm": 0.4662768840789795, "learning_rate": 5.938612244485977e-05, "loss": 0.8954, "step": 5111 }, { "epoch": 0.4567650278106641, "grad_norm": 0.5224105715751648, "learning_rate": 5.937190859374664e-05, "loss": 0.9889, "step": 5112 }, { "epoch": 0.45685437934192596, "grad_norm": 0.5646452307701111, "learning_rate": 5.935769395763625e-05, "loss": 0.8719, "step": 5113 }, { "epoch": 0.45694373087318785, "grad_norm": 0.4161957800388336, "learning_rate": 5.934347853771927e-05, "loss": 0.9331, "step": 5114 }, { "epoch": 0.45703308240444973, "grad_norm": 0.394827276468277, "learning_rate": 5.9329262335186344e-05, "loss": 0.954, "step": 5115 }, { "epoch": 0.45712243393571156, "grad_norm": 0.5495721697807312, "learning_rate": 5.931504535122825e-05, "loss": 1.0314, "step": 5116 }, { "epoch": 0.45721178546697344, "grad_norm": 0.48053717613220215, "learning_rate": 5.9300827587035825e-05, "loss": 0.9705, "step": 5117 }, { "epoch": 0.4573011369982353, "grad_norm": 0.48041045665740967, "learning_rate": 5.9286609043799945e-05, "loss": 0.941, "step": 5118 }, { "epoch": 0.45739048852949715, "grad_norm": 0.46086958050727844, "learning_rate": 5.9272389722711586e-05, "loss": 0.9866, "step": 5119 }, { "epoch": 0.45747984006075904, "grad_norm": 0.4236505925655365, "learning_rate": 5.925816962496175e-05, "loss": 0.9627, "step": 5120 }, { "epoch": 0.4575691915920209, "grad_norm": 0.41899070143699646, "learning_rate": 5.9243948751741505e-05, "loss": 0.9541, "step": 5121 }, { "epoch": 0.4576585431232828, "grad_norm": 0.37787696719169617, "learning_rate": 5.922972710424203e-05, "loss": 0.9989, "step": 5122 }, { "epoch": 0.45774789465454463, "grad_norm": 0.40664857625961304, "learning_rate": 5.921550468365452e-05, "loss": 1.0442, "step": 5123 }, { "epoch": 0.4578372461858065, "grad_norm": 0.4115360975265503, "learning_rate": 5.920128149117028e-05, "loss": 0.9699, "step": 5124 }, { "epoch": 0.4579265977170684, "grad_norm": 0.44247210025787354, "learning_rate": 5.918705752798062e-05, "loss": 0.9151, "step": 5125 }, { "epoch": 0.4580159492483302, "grad_norm": 0.4786369800567627, "learning_rate": 5.917283279527697e-05, "loss": 0.9834, "step": 5126 }, { "epoch": 0.4581053007795921, "grad_norm": 0.5580417513847351, "learning_rate": 5.915860729425079e-05, "loss": 0.7882, "step": 5127 }, { "epoch": 0.458194652310854, "grad_norm": 0.4049915373325348, "learning_rate": 5.914438102609364e-05, "loss": 0.9563, "step": 5128 }, { "epoch": 0.4582840038421158, "grad_norm": 0.4272023141384125, "learning_rate": 5.91301539919971e-05, "loss": 0.9359, "step": 5129 }, { "epoch": 0.4583733553733777, "grad_norm": 0.4311048686504364, "learning_rate": 5.911592619315286e-05, "loss": 0.9599, "step": 5130 }, { "epoch": 0.4584627069046396, "grad_norm": 0.4287171959877014, "learning_rate": 5.910169763075261e-05, "loss": 0.9668, "step": 5131 }, { "epoch": 0.45855205843590147, "grad_norm": 0.4313339293003082, "learning_rate": 5.908746830598817e-05, "loss": 0.9181, "step": 5132 }, { "epoch": 0.4586414099671633, "grad_norm": 0.4437636435031891, "learning_rate": 5.9073238220051394e-05, "loss": 1.0319, "step": 5133 }, { "epoch": 0.4587307614984252, "grad_norm": 0.4116639196872711, "learning_rate": 5.905900737413421e-05, "loss": 0.9471, "step": 5134 }, { "epoch": 0.45882011302968706, "grad_norm": 0.5594172477722168, "learning_rate": 5.9044775769428594e-05, "loss": 0.8384, "step": 5135 }, { "epoch": 0.4589094645609489, "grad_norm": 0.4292183816432953, "learning_rate": 5.903054340712659e-05, "loss": 1.0041, "step": 5136 }, { "epoch": 0.45899881609221077, "grad_norm": 0.5219025015830994, "learning_rate": 5.9016310288420316e-05, "loss": 0.8519, "step": 5137 }, { "epoch": 0.45908816762347265, "grad_norm": 0.4652097821235657, "learning_rate": 5.900207641450195e-05, "loss": 0.9249, "step": 5138 }, { "epoch": 0.45917751915473454, "grad_norm": 0.41842982172966003, "learning_rate": 5.898784178656375e-05, "loss": 1.0074, "step": 5139 }, { "epoch": 0.45926687068599636, "grad_norm": 0.41296178102493286, "learning_rate": 5.897360640579798e-05, "loss": 0.9087, "step": 5140 }, { "epoch": 0.45935622221725825, "grad_norm": 0.49167054891586304, "learning_rate": 5.895937027339701e-05, "loss": 0.9117, "step": 5141 }, { "epoch": 0.45944557374852013, "grad_norm": 0.3634302020072937, "learning_rate": 5.894513339055329e-05, "loss": 1.0143, "step": 5142 }, { "epoch": 0.45953492527978196, "grad_norm": 0.522591233253479, "learning_rate": 5.89308957584593e-05, "loss": 0.9263, "step": 5143 }, { "epoch": 0.45962427681104384, "grad_norm": 0.49898669123649597, "learning_rate": 5.89166573783076e-05, "loss": 0.9532, "step": 5144 }, { "epoch": 0.4597136283423057, "grad_norm": 0.44929975271224976, "learning_rate": 5.890241825129079e-05, "loss": 0.9635, "step": 5145 }, { "epoch": 0.4598029798735676, "grad_norm": 0.4347250461578369, "learning_rate": 5.8888178378601565e-05, "loss": 0.9352, "step": 5146 }, { "epoch": 0.45989233140482944, "grad_norm": 0.44016188383102417, "learning_rate": 5.887393776143267e-05, "loss": 0.9607, "step": 5147 }, { "epoch": 0.4599816829360913, "grad_norm": 0.5058707594871521, "learning_rate": 5.8859696400976884e-05, "loss": 0.965, "step": 5148 }, { "epoch": 0.4600710344673532, "grad_norm": 0.40825730562210083, "learning_rate": 5.8845454298427094e-05, "loss": 1.0115, "step": 5149 }, { "epoch": 0.46016038599861503, "grad_norm": 0.4351537227630615, "learning_rate": 5.883121145497622e-05, "loss": 0.9622, "step": 5150 }, { "epoch": 0.4602497375298769, "grad_norm": 0.4099435806274414, "learning_rate": 5.881696787181724e-05, "loss": 0.9724, "step": 5151 }, { "epoch": 0.4603390890611388, "grad_norm": 0.44913604855537415, "learning_rate": 5.8802723550143225e-05, "loss": 0.9009, "step": 5152 }, { "epoch": 0.4604284405924007, "grad_norm": 0.417484313249588, "learning_rate": 5.878847849114728e-05, "loss": 0.9643, "step": 5153 }, { "epoch": 0.4605177921236625, "grad_norm": 0.43109041452407837, "learning_rate": 5.8774232696022604e-05, "loss": 0.9845, "step": 5154 }, { "epoch": 0.4606071436549244, "grad_norm": 0.432075172662735, "learning_rate": 5.8759986165962386e-05, "loss": 1.0115, "step": 5155 }, { "epoch": 0.4606964951861863, "grad_norm": 0.49210742115974426, "learning_rate": 5.8745738902159965e-05, "loss": 0.8991, "step": 5156 }, { "epoch": 0.4607858467174481, "grad_norm": 0.41765424609184265, "learning_rate": 5.873149090580868e-05, "loss": 0.9901, "step": 5157 }, { "epoch": 0.46087519824871, "grad_norm": 0.5431698560714722, "learning_rate": 5.8717242178101975e-05, "loss": 0.9571, "step": 5158 }, { "epoch": 0.46096454977997187, "grad_norm": 0.5281333923339844, "learning_rate": 5.8702992720233296e-05, "loss": 0.9096, "step": 5159 }, { "epoch": 0.4610539013112337, "grad_norm": 0.4179060757160187, "learning_rate": 5.868874253339624e-05, "loss": 0.937, "step": 5160 }, { "epoch": 0.4611432528424956, "grad_norm": 0.4784322679042816, "learning_rate": 5.867449161878435e-05, "loss": 0.989, "step": 5161 }, { "epoch": 0.46123260437375746, "grad_norm": 0.431211918592453, "learning_rate": 5.8660239977591344e-05, "loss": 1.0558, "step": 5162 }, { "epoch": 0.46132195590501934, "grad_norm": 0.4160225987434387, "learning_rate": 5.8645987611010935e-05, "loss": 0.9614, "step": 5163 }, { "epoch": 0.46141130743628117, "grad_norm": 0.41637277603149414, "learning_rate": 5.863173452023688e-05, "loss": 0.9515, "step": 5164 }, { "epoch": 0.46150065896754305, "grad_norm": 0.35038280487060547, "learning_rate": 5.861748070646307e-05, "loss": 0.9557, "step": 5165 }, { "epoch": 0.46159001049880494, "grad_norm": 0.49989181756973267, "learning_rate": 5.860322617088339e-05, "loss": 0.9531, "step": 5166 }, { "epoch": 0.46167936203006676, "grad_norm": 0.3543355166912079, "learning_rate": 5.858897091469181e-05, "loss": 0.971, "step": 5167 }, { "epoch": 0.46176871356132865, "grad_norm": 0.4254325032234192, "learning_rate": 5.857471493908237e-05, "loss": 1.009, "step": 5168 }, { "epoch": 0.46185806509259053, "grad_norm": 0.4708648920059204, "learning_rate": 5.856045824524914e-05, "loss": 0.9253, "step": 5169 }, { "epoch": 0.4619474166238524, "grad_norm": 0.48328697681427, "learning_rate": 5.85462008343863e-05, "loss": 0.8906, "step": 5170 }, { "epoch": 0.46203676815511424, "grad_norm": 0.42431074380874634, "learning_rate": 5.8531942707688034e-05, "loss": 0.9893, "step": 5171 }, { "epoch": 0.4621261196863761, "grad_norm": 0.37147825956344604, "learning_rate": 5.851768386634863e-05, "loss": 1.0255, "step": 5172 }, { "epoch": 0.462215471217638, "grad_norm": 0.47785985469818115, "learning_rate": 5.8503424311562406e-05, "loss": 0.9222, "step": 5173 }, { "epoch": 0.46230482274889984, "grad_norm": 0.4230659604072571, "learning_rate": 5.848916404452375e-05, "loss": 1.0169, "step": 5174 }, { "epoch": 0.4623941742801617, "grad_norm": 0.45284929871559143, "learning_rate": 5.8474903066427136e-05, "loss": 1.0053, "step": 5175 }, { "epoch": 0.4624835258114236, "grad_norm": 0.4324350357055664, "learning_rate": 5.846064137846704e-05, "loss": 0.9689, "step": 5176 }, { "epoch": 0.4625728773426855, "grad_norm": 0.4609506130218506, "learning_rate": 5.844637898183805e-05, "loss": 0.9361, "step": 5177 }, { "epoch": 0.4626622288739473, "grad_norm": 0.45454010367393494, "learning_rate": 5.8432115877734775e-05, "loss": 0.8882, "step": 5178 }, { "epoch": 0.4627515804052092, "grad_norm": 0.47145554423332214, "learning_rate": 5.841785206735192e-05, "loss": 0.9578, "step": 5179 }, { "epoch": 0.4628409319364711, "grad_norm": 0.5458246469497681, "learning_rate": 5.8403587551884244e-05, "loss": 0.9361, "step": 5180 }, { "epoch": 0.4629302834677329, "grad_norm": 0.46121159195899963, "learning_rate": 5.838932233252652e-05, "loss": 0.8609, "step": 5181 }, { "epoch": 0.4630196349989948, "grad_norm": 0.40931928157806396, "learning_rate": 5.837505641047364e-05, "loss": 1.0475, "step": 5182 }, { "epoch": 0.46310898653025667, "grad_norm": 0.3603355288505554, "learning_rate": 5.83607897869205e-05, "loss": 0.9752, "step": 5183 }, { "epoch": 0.46319833806151856, "grad_norm": 0.40766477584838867, "learning_rate": 5.834652246306209e-05, "loss": 0.9826, "step": 5184 }, { "epoch": 0.4632876895927804, "grad_norm": 0.43302759528160095, "learning_rate": 5.833225444009348e-05, "loss": 0.954, "step": 5185 }, { "epoch": 0.46337704112404227, "grad_norm": 0.518700122833252, "learning_rate": 5.831798571920973e-05, "loss": 1.0633, "step": 5186 }, { "epoch": 0.46346639265530415, "grad_norm": 0.4842970371246338, "learning_rate": 5.8303716301606e-05, "loss": 0.9383, "step": 5187 }, { "epoch": 0.463555744186566, "grad_norm": 0.42370370030403137, "learning_rate": 5.828944618847753e-05, "loss": 0.9952, "step": 5188 }, { "epoch": 0.46364509571782786, "grad_norm": 0.47464433312416077, "learning_rate": 5.827517538101959e-05, "loss": 0.9543, "step": 5189 }, { "epoch": 0.46373444724908974, "grad_norm": 0.48624545335769653, "learning_rate": 5.826090388042751e-05, "loss": 0.9396, "step": 5190 }, { "epoch": 0.4638237987803516, "grad_norm": 0.45458412170410156, "learning_rate": 5.8246631687896656e-05, "loss": 0.9381, "step": 5191 }, { "epoch": 0.46391315031161345, "grad_norm": 0.43564853072166443, "learning_rate": 5.823235880462251e-05, "loss": 0.9548, "step": 5192 }, { "epoch": 0.46400250184287534, "grad_norm": 0.46724554896354675, "learning_rate": 5.8218085231800544e-05, "loss": 0.9358, "step": 5193 }, { "epoch": 0.4640918533741372, "grad_norm": 0.4100496172904968, "learning_rate": 5.820381097062636e-05, "loss": 0.9394, "step": 5194 }, { "epoch": 0.46418120490539905, "grad_norm": 0.5092671513557434, "learning_rate": 5.818953602229557e-05, "loss": 0.9381, "step": 5195 }, { "epoch": 0.46427055643666093, "grad_norm": 0.410299688577652, "learning_rate": 5.8175260388003813e-05, "loss": 1.0375, "step": 5196 }, { "epoch": 0.4643599079679228, "grad_norm": 0.4275777041912079, "learning_rate": 5.816098406894688e-05, "loss": 0.9305, "step": 5197 }, { "epoch": 0.46444925949918464, "grad_norm": 0.4333481192588806, "learning_rate": 5.814670706632054e-05, "loss": 0.9786, "step": 5198 }, { "epoch": 0.4645386110304465, "grad_norm": 0.5642595291137695, "learning_rate": 5.813242938132064e-05, "loss": 0.9754, "step": 5199 }, { "epoch": 0.4646279625617084, "grad_norm": 0.5947056412696838, "learning_rate": 5.8118151015143094e-05, "loss": 0.9364, "step": 5200 }, { "epoch": 0.4647173140929703, "grad_norm": 0.4831881523132324, "learning_rate": 5.810387196898387e-05, "loss": 0.9705, "step": 5201 }, { "epoch": 0.4648066656242321, "grad_norm": 0.4384908676147461, "learning_rate": 5.808959224403898e-05, "loss": 0.9407, "step": 5202 }, { "epoch": 0.464896017155494, "grad_norm": 0.4423673748970032, "learning_rate": 5.807531184150452e-05, "loss": 1.0036, "step": 5203 }, { "epoch": 0.4649853686867559, "grad_norm": 0.4656173288822174, "learning_rate": 5.806103076257661e-05, "loss": 0.9284, "step": 5204 }, { "epoch": 0.4650747202180177, "grad_norm": 0.4020898938179016, "learning_rate": 5.804674900845145e-05, "loss": 0.9451, "step": 5205 }, { "epoch": 0.4651640717492796, "grad_norm": 0.3898525536060333, "learning_rate": 5.803246658032528e-05, "loss": 0.9848, "step": 5206 }, { "epoch": 0.4652534232805415, "grad_norm": 0.41617026925086975, "learning_rate": 5.801818347939443e-05, "loss": 0.9434, "step": 5207 }, { "epoch": 0.46534277481180336, "grad_norm": 0.3756409287452698, "learning_rate": 5.8003899706855235e-05, "loss": 1.0008, "step": 5208 }, { "epoch": 0.4654321263430652, "grad_norm": 0.48038768768310547, "learning_rate": 5.798961526390413e-05, "loss": 0.9255, "step": 5209 }, { "epoch": 0.46552147787432707, "grad_norm": 0.42943793535232544, "learning_rate": 5.797533015173759e-05, "loss": 1.0199, "step": 5210 }, { "epoch": 0.46561082940558896, "grad_norm": 0.5280621647834778, "learning_rate": 5.796104437155213e-05, "loss": 0.9118, "step": 5211 }, { "epoch": 0.4657001809368508, "grad_norm": 0.4121812582015991, "learning_rate": 5.794675792454434e-05, "loss": 0.9665, "step": 5212 }, { "epoch": 0.46578953246811267, "grad_norm": 0.43791842460632324, "learning_rate": 5.793247081191087e-05, "loss": 0.9876, "step": 5213 }, { "epoch": 0.46587888399937455, "grad_norm": 0.38042962551116943, "learning_rate": 5.791818303484842e-05, "loss": 0.916, "step": 5214 }, { "epoch": 0.46596823553063643, "grad_norm": 0.4212323725223541, "learning_rate": 5.790389459455374e-05, "loss": 0.9963, "step": 5215 }, { "epoch": 0.46605758706189826, "grad_norm": 0.4514927566051483, "learning_rate": 5.788960549222364e-05, "loss": 0.9769, "step": 5216 }, { "epoch": 0.46614693859316014, "grad_norm": 0.4762234687805176, "learning_rate": 5.787531572905498e-05, "loss": 1.0555, "step": 5217 }, { "epoch": 0.466236290124422, "grad_norm": 0.3893480598926544, "learning_rate": 5.786102530624469e-05, "loss": 0.9298, "step": 5218 }, { "epoch": 0.46632564165568385, "grad_norm": 0.4439602494239807, "learning_rate": 5.784673422498972e-05, "loss": 0.9701, "step": 5219 }, { "epoch": 0.46641499318694574, "grad_norm": 0.44786638021469116, "learning_rate": 5.783244248648714e-05, "loss": 0.9189, "step": 5220 }, { "epoch": 0.4665043447182076, "grad_norm": 0.4381410479545593, "learning_rate": 5.7818150091934e-05, "loss": 1.0335, "step": 5221 }, { "epoch": 0.4665936962494695, "grad_norm": 0.4737434685230255, "learning_rate": 5.780385704252746e-05, "loss": 0.9773, "step": 5222 }, { "epoch": 0.46668304778073133, "grad_norm": 0.41768383979797363, "learning_rate": 5.7789563339464695e-05, "loss": 0.9492, "step": 5223 }, { "epoch": 0.4667723993119932, "grad_norm": 0.5461154580116272, "learning_rate": 5.777526898394298e-05, "loss": 0.8902, "step": 5224 }, { "epoch": 0.4668617508432551, "grad_norm": 0.46472278237342834, "learning_rate": 5.7760973977159615e-05, "loss": 0.9915, "step": 5225 }, { "epoch": 0.4669511023745169, "grad_norm": 0.4949001669883728, "learning_rate": 5.7746678320311954e-05, "loss": 0.8738, "step": 5226 }, { "epoch": 0.4670404539057788, "grad_norm": 0.5098089575767517, "learning_rate": 5.77323820145974e-05, "loss": 0.9527, "step": 5227 }, { "epoch": 0.4671298054370407, "grad_norm": 0.4948689341545105, "learning_rate": 5.7718085061213436e-05, "loss": 0.9125, "step": 5228 }, { "epoch": 0.4672191569683025, "grad_norm": 0.4423523247241974, "learning_rate": 5.7703787461357575e-05, "loss": 1.0395, "step": 5229 }, { "epoch": 0.4673085084995644, "grad_norm": 0.4000280499458313, "learning_rate": 5.7689489216227413e-05, "loss": 0.9811, "step": 5230 }, { "epoch": 0.4673978600308263, "grad_norm": 0.4417012333869934, "learning_rate": 5.767519032702055e-05, "loss": 0.9685, "step": 5231 }, { "epoch": 0.46748721156208817, "grad_norm": 0.5120064616203308, "learning_rate": 5.7660890794934676e-05, "loss": 0.8708, "step": 5232 }, { "epoch": 0.46757656309335, "grad_norm": 0.4820534288883209, "learning_rate": 5.7646590621167564e-05, "loss": 0.9201, "step": 5233 }, { "epoch": 0.4676659146246119, "grad_norm": 0.46045050024986267, "learning_rate": 5.763228980691696e-05, "loss": 0.9647, "step": 5234 }, { "epoch": 0.46775526615587376, "grad_norm": 0.448191374540329, "learning_rate": 5.7617988353380746e-05, "loss": 0.9384, "step": 5235 }, { "epoch": 0.4678446176871356, "grad_norm": 0.5090450644493103, "learning_rate": 5.76036862617568e-05, "loss": 0.8526, "step": 5236 }, { "epoch": 0.46793396921839747, "grad_norm": 0.4289693236351013, "learning_rate": 5.758938353324308e-05, "loss": 0.9773, "step": 5237 }, { "epoch": 0.46802332074965936, "grad_norm": 0.522016704082489, "learning_rate": 5.757508016903759e-05, "loss": 0.867, "step": 5238 }, { "epoch": 0.46811267228092124, "grad_norm": 0.48837369680404663, "learning_rate": 5.756077617033838e-05, "loss": 0.9976, "step": 5239 }, { "epoch": 0.46820202381218307, "grad_norm": 0.41904178261756897, "learning_rate": 5.75464715383436e-05, "loss": 1.0333, "step": 5240 }, { "epoch": 0.46829137534344495, "grad_norm": 0.4596676230430603, "learning_rate": 5.7532166274251364e-05, "loss": 0.9308, "step": 5241 }, { "epoch": 0.46838072687470683, "grad_norm": 0.40436744689941406, "learning_rate": 5.751786037925993e-05, "loss": 1.0501, "step": 5242 }, { "epoch": 0.46847007840596866, "grad_norm": 0.4699536859989166, "learning_rate": 5.750355385456755e-05, "loss": 0.9808, "step": 5243 }, { "epoch": 0.46855942993723054, "grad_norm": 0.4330219030380249, "learning_rate": 5.7489246701372545e-05, "loss": 0.9724, "step": 5244 }, { "epoch": 0.4686487814684924, "grad_norm": 0.421366810798645, "learning_rate": 5.74749389208733e-05, "loss": 1.0166, "step": 5245 }, { "epoch": 0.4687381329997543, "grad_norm": 0.39331406354904175, "learning_rate": 5.746063051426827e-05, "loss": 0.9864, "step": 5246 }, { "epoch": 0.46882748453101614, "grad_norm": 0.5093923807144165, "learning_rate": 5.744632148275588e-05, "loss": 0.9477, "step": 5247 }, { "epoch": 0.468916836062278, "grad_norm": 0.480220228433609, "learning_rate": 5.74320118275347e-05, "loss": 0.934, "step": 5248 }, { "epoch": 0.4690061875935399, "grad_norm": 0.4359627664089203, "learning_rate": 5.741770154980331e-05, "loss": 0.9851, "step": 5249 }, { "epoch": 0.46909553912480173, "grad_norm": 0.4476899802684784, "learning_rate": 5.7403390650760356e-05, "loss": 0.8952, "step": 5250 }, { "epoch": 0.4691848906560636, "grad_norm": 0.42261365056037903, "learning_rate": 5.738907913160452e-05, "loss": 0.9202, "step": 5251 }, { "epoch": 0.4692742421873255, "grad_norm": 0.4985414743423462, "learning_rate": 5.7374766993534545e-05, "loss": 0.8766, "step": 5252 }, { "epoch": 0.4693635937185874, "grad_norm": 0.5040563941001892, "learning_rate": 5.736045423774923e-05, "loss": 0.9752, "step": 5253 }, { "epoch": 0.4694529452498492, "grad_norm": 0.4096861779689789, "learning_rate": 5.7346140865447395e-05, "loss": 1.0323, "step": 5254 }, { "epoch": 0.4695422967811111, "grad_norm": 0.4268275499343872, "learning_rate": 5.7331826877827965e-05, "loss": 1.0152, "step": 5255 }, { "epoch": 0.469631648312373, "grad_norm": 0.4520736336708069, "learning_rate": 5.73175122760899e-05, "loss": 0.9099, "step": 5256 }, { "epoch": 0.4697209998436348, "grad_norm": 0.4920952618122101, "learning_rate": 5.7303197061432165e-05, "loss": 0.9575, "step": 5257 }, { "epoch": 0.4698103513748967, "grad_norm": 0.44144368171691895, "learning_rate": 5.728888123505384e-05, "loss": 0.9656, "step": 5258 }, { "epoch": 0.46989970290615857, "grad_norm": 0.39283469319343567, "learning_rate": 5.727456479815401e-05, "loss": 1.0424, "step": 5259 }, { "epoch": 0.4699890544374204, "grad_norm": 0.4499633312225342, "learning_rate": 5.726024775193184e-05, "loss": 0.9702, "step": 5260 }, { "epoch": 0.4700784059686823, "grad_norm": 0.48507824540138245, "learning_rate": 5.724593009758653e-05, "loss": 0.9632, "step": 5261 }, { "epoch": 0.47016775749994416, "grad_norm": 0.4047746956348419, "learning_rate": 5.723161183631734e-05, "loss": 0.9593, "step": 5262 }, { "epoch": 0.47025710903120604, "grad_norm": 0.41394850611686707, "learning_rate": 5.7217292969323576e-05, "loss": 0.9266, "step": 5263 }, { "epoch": 0.47034646056246787, "grad_norm": 0.45513802766799927, "learning_rate": 5.720297349780459e-05, "loss": 1.0043, "step": 5264 }, { "epoch": 0.47043581209372975, "grad_norm": 0.4691244959831238, "learning_rate": 5.718865342295979e-05, "loss": 0.9583, "step": 5265 }, { "epoch": 0.47052516362499164, "grad_norm": 0.49911266565322876, "learning_rate": 5.7174332745988666e-05, "loss": 0.924, "step": 5266 }, { "epoch": 0.47061451515625347, "grad_norm": 0.441734254360199, "learning_rate": 5.716001146809068e-05, "loss": 0.9989, "step": 5267 }, { "epoch": 0.47070386668751535, "grad_norm": 0.40593159198760986, "learning_rate": 5.714568959046542e-05, "loss": 0.936, "step": 5268 }, { "epoch": 0.47079321821877723, "grad_norm": 0.3953614830970764, "learning_rate": 5.713136711431249e-05, "loss": 0.9676, "step": 5269 }, { "epoch": 0.4708825697500391, "grad_norm": 0.48885035514831543, "learning_rate": 5.7117044040831544e-05, "loss": 0.9002, "step": 5270 }, { "epoch": 0.47097192128130094, "grad_norm": 0.4319499433040619, "learning_rate": 5.7102720371222305e-05, "loss": 1.0525, "step": 5271 }, { "epoch": 0.4710612728125628, "grad_norm": 0.48925381898880005, "learning_rate": 5.708839610668453e-05, "loss": 1.023, "step": 5272 }, { "epoch": 0.4711506243438247, "grad_norm": 0.39478790760040283, "learning_rate": 5.7074071248418e-05, "loss": 0.9983, "step": 5273 }, { "epoch": 0.47123997587508654, "grad_norm": 0.5163027048110962, "learning_rate": 5.7059745797622624e-05, "loss": 0.9257, "step": 5274 }, { "epoch": 0.4713293274063484, "grad_norm": 0.5084983110427856, "learning_rate": 5.7045419755498264e-05, "loss": 0.9036, "step": 5275 }, { "epoch": 0.4714186789376103, "grad_norm": 0.5043264031410217, "learning_rate": 5.7031093123244925e-05, "loss": 1.005, "step": 5276 }, { "epoch": 0.4715080304688722, "grad_norm": 0.4621896743774414, "learning_rate": 5.7016765902062574e-05, "loss": 0.9614, "step": 5277 }, { "epoch": 0.471597382000134, "grad_norm": 0.45665669441223145, "learning_rate": 5.700243809315129e-05, "loss": 0.9324, "step": 5278 }, { "epoch": 0.4716867335313959, "grad_norm": 0.4607732892036438, "learning_rate": 5.6988109697711164e-05, "loss": 1.0348, "step": 5279 }, { "epoch": 0.4717760850626578, "grad_norm": 0.3779040277004242, "learning_rate": 5.697378071694238e-05, "loss": 1.0202, "step": 5280 }, { "epoch": 0.4718654365939196, "grad_norm": 0.4088393449783325, "learning_rate": 5.6959451152045126e-05, "loss": 0.9944, "step": 5281 }, { "epoch": 0.4719547881251815, "grad_norm": 0.5540783405303955, "learning_rate": 5.694512100421965e-05, "loss": 0.9225, "step": 5282 }, { "epoch": 0.4720441396564434, "grad_norm": 0.4990440011024475, "learning_rate": 5.6930790274666256e-05, "loss": 0.9773, "step": 5283 }, { "epoch": 0.47213349118770526, "grad_norm": 0.4320768713951111, "learning_rate": 5.691645896458529e-05, "loss": 1.0435, "step": 5284 }, { "epoch": 0.4722228427189671, "grad_norm": 0.47398531436920166, "learning_rate": 5.690212707517719e-05, "loss": 0.9129, "step": 5285 }, { "epoch": 0.47231219425022897, "grad_norm": 0.468569815158844, "learning_rate": 5.688779460764235e-05, "loss": 0.9181, "step": 5286 }, { "epoch": 0.47240154578149085, "grad_norm": 0.37630656361579895, "learning_rate": 5.687346156318132e-05, "loss": 1.0331, "step": 5287 }, { "epoch": 0.4724908973127527, "grad_norm": 0.49889349937438965, "learning_rate": 5.68591279429946e-05, "loss": 0.9137, "step": 5288 }, { "epoch": 0.47258024884401456, "grad_norm": 0.4064541161060333, "learning_rate": 5.68447937482828e-05, "loss": 1.0404, "step": 5289 }, { "epoch": 0.47266960037527644, "grad_norm": 0.416142076253891, "learning_rate": 5.6830458980246574e-05, "loss": 0.9972, "step": 5290 }, { "epoch": 0.47275895190653827, "grad_norm": 0.5175201892852783, "learning_rate": 5.681612364008659e-05, "loss": 0.9116, "step": 5291 }, { "epoch": 0.47284830343780015, "grad_norm": 0.5164797902107239, "learning_rate": 5.6801787729003595e-05, "loss": 0.941, "step": 5292 }, { "epoch": 0.47293765496906204, "grad_norm": 0.4773990213871002, "learning_rate": 5.6787451248198384e-05, "loss": 0.8495, "step": 5293 }, { "epoch": 0.4730270065003239, "grad_norm": 0.4284185469150543, "learning_rate": 5.677311419887178e-05, "loss": 1.0105, "step": 5294 }, { "epoch": 0.47311635803158575, "grad_norm": 0.39999738335609436, "learning_rate": 5.675877658222466e-05, "loss": 0.9562, "step": 5295 }, { "epoch": 0.47320570956284763, "grad_norm": 0.537138044834137, "learning_rate": 5.674443839945797e-05, "loss": 1.0542, "step": 5296 }, { "epoch": 0.4732950610941095, "grad_norm": 0.40088164806365967, "learning_rate": 5.673009965177266e-05, "loss": 0.9687, "step": 5297 }, { "epoch": 0.47338441262537134, "grad_norm": 0.4030899405479431, "learning_rate": 5.6715760340369775e-05, "loss": 1.0254, "step": 5298 }, { "epoch": 0.4734737641566332, "grad_norm": 0.41325297951698303, "learning_rate": 5.670142046645037e-05, "loss": 0.9592, "step": 5299 }, { "epoch": 0.4735631156878951, "grad_norm": 0.5127687454223633, "learning_rate": 5.668708003121557e-05, "loss": 0.9605, "step": 5300 }, { "epoch": 0.473652467219157, "grad_norm": 0.4479494094848633, "learning_rate": 5.667273903586655e-05, "loss": 0.9453, "step": 5301 }, { "epoch": 0.4737418187504188, "grad_norm": 0.41398364305496216, "learning_rate": 5.66583974816045e-05, "loss": 0.8632, "step": 5302 }, { "epoch": 0.4738311702816807, "grad_norm": 0.3753226101398468, "learning_rate": 5.66440553696307e-05, "loss": 1.002, "step": 5303 }, { "epoch": 0.4739205218129426, "grad_norm": 0.4559895694255829, "learning_rate": 5.662971270114642e-05, "loss": 0.9741, "step": 5304 }, { "epoch": 0.4740098733442044, "grad_norm": 0.4614042043685913, "learning_rate": 5.661536947735304e-05, "loss": 0.9813, "step": 5305 }, { "epoch": 0.4740992248754663, "grad_norm": 0.4653113782405853, "learning_rate": 5.660102569945197e-05, "loss": 0.9023, "step": 5306 }, { "epoch": 0.4741885764067282, "grad_norm": 0.4278392493724823, "learning_rate": 5.658668136864462e-05, "loss": 0.9543, "step": 5307 }, { "epoch": 0.47427792793799006, "grad_norm": 0.4447217583656311, "learning_rate": 5.6572336486132495e-05, "loss": 0.9646, "step": 5308 }, { "epoch": 0.4743672794692519, "grad_norm": 0.4308806359767914, "learning_rate": 5.655799105311713e-05, "loss": 0.8999, "step": 5309 }, { "epoch": 0.4744566310005138, "grad_norm": 0.401542603969574, "learning_rate": 5.654364507080011e-05, "loss": 0.9943, "step": 5310 }, { "epoch": 0.47454598253177566, "grad_norm": 0.4810407757759094, "learning_rate": 5.652929854038309e-05, "loss": 0.8805, "step": 5311 }, { "epoch": 0.4746353340630375, "grad_norm": 0.3643689453601837, "learning_rate": 5.65149514630677e-05, "loss": 0.9883, "step": 5312 }, { "epoch": 0.47472468559429937, "grad_norm": 0.34783321619033813, "learning_rate": 5.650060384005568e-05, "loss": 0.9739, "step": 5313 }, { "epoch": 0.47481403712556125, "grad_norm": 0.45785385370254517, "learning_rate": 5.6486255672548794e-05, "loss": 0.9802, "step": 5314 }, { "epoch": 0.47490338865682313, "grad_norm": 0.5383105278015137, "learning_rate": 5.6471906961748856e-05, "loss": 0.9742, "step": 5315 }, { "epoch": 0.47499274018808496, "grad_norm": 0.5157361626625061, "learning_rate": 5.6457557708857745e-05, "loss": 0.9478, "step": 5316 }, { "epoch": 0.47508209171934684, "grad_norm": 0.43339022994041443, "learning_rate": 5.644320791507732e-05, "loss": 0.965, "step": 5317 }, { "epoch": 0.4751714432506087, "grad_norm": 0.4871036112308502, "learning_rate": 5.642885758160956e-05, "loss": 0.9936, "step": 5318 }, { "epoch": 0.47526079478187055, "grad_norm": 0.4251110553741455, "learning_rate": 5.641450670965646e-05, "loss": 1.0055, "step": 5319 }, { "epoch": 0.47535014631313244, "grad_norm": 0.46318215131759644, "learning_rate": 5.640015530042004e-05, "loss": 0.9703, "step": 5320 }, { "epoch": 0.4754394978443943, "grad_norm": 0.4937099814414978, "learning_rate": 5.6385803355102395e-05, "loss": 0.9328, "step": 5321 }, { "epoch": 0.47552884937565615, "grad_norm": 0.4998278021812439, "learning_rate": 5.637145087490566e-05, "loss": 0.9198, "step": 5322 }, { "epoch": 0.47561820090691803, "grad_norm": 0.43673455715179443, "learning_rate": 5.6357097861031984e-05, "loss": 0.9822, "step": 5323 }, { "epoch": 0.4757075524381799, "grad_norm": 0.5582587122917175, "learning_rate": 5.63427443146836e-05, "loss": 0.9003, "step": 5324 }, { "epoch": 0.4757969039694418, "grad_norm": 0.47949889302253723, "learning_rate": 5.632839023706278e-05, "loss": 0.9357, "step": 5325 }, { "epoch": 0.4758862555007036, "grad_norm": 0.480672150850296, "learning_rate": 5.631403562937183e-05, "loss": 0.9649, "step": 5326 }, { "epoch": 0.4759756070319655, "grad_norm": 0.44741761684417725, "learning_rate": 5.629968049281308e-05, "loss": 1.0556, "step": 5327 }, { "epoch": 0.4760649585632274, "grad_norm": 0.5157933235168457, "learning_rate": 5.6285324828588934e-05, "loss": 1.0001, "step": 5328 }, { "epoch": 0.4761543100944892, "grad_norm": 0.45148682594299316, "learning_rate": 5.6270968637901844e-05, "loss": 0.9291, "step": 5329 }, { "epoch": 0.4762436616257511, "grad_norm": 0.43948331475257874, "learning_rate": 5.625661192195428e-05, "loss": 0.9562, "step": 5330 }, { "epoch": 0.476333013157013, "grad_norm": 0.49232932925224304, "learning_rate": 5.624225468194879e-05, "loss": 0.8804, "step": 5331 }, { "epoch": 0.47642236468827487, "grad_norm": 0.44230684638023376, "learning_rate": 5.622789691908794e-05, "loss": 0.9769, "step": 5332 }, { "epoch": 0.4765117162195367, "grad_norm": 0.46092236042022705, "learning_rate": 5.621353863457431e-05, "loss": 0.9966, "step": 5333 }, { "epoch": 0.4766010677507986, "grad_norm": 0.45058542490005493, "learning_rate": 5.61991798296106e-05, "loss": 0.921, "step": 5334 }, { "epoch": 0.47669041928206046, "grad_norm": 0.4571652114391327, "learning_rate": 5.6184820505399514e-05, "loss": 1.0159, "step": 5335 }, { "epoch": 0.4767797708133223, "grad_norm": 0.45455148816108704, "learning_rate": 5.617046066314377e-05, "loss": 0.9191, "step": 5336 }, { "epoch": 0.4768691223445842, "grad_norm": 0.5318629741668701, "learning_rate": 5.6156100304046186e-05, "loss": 0.8992, "step": 5337 }, { "epoch": 0.47695847387584606, "grad_norm": 0.42745161056518555, "learning_rate": 5.614173942930958e-05, "loss": 1.0188, "step": 5338 }, { "epoch": 0.47704782540710794, "grad_norm": 0.4684041738510132, "learning_rate": 5.6127378040136826e-05, "loss": 1.0166, "step": 5339 }, { "epoch": 0.47713717693836977, "grad_norm": 0.5044588446617126, "learning_rate": 5.6113016137730844e-05, "loss": 0.9859, "step": 5340 }, { "epoch": 0.47722652846963165, "grad_norm": 0.41825881600379944, "learning_rate": 5.6098653723294604e-05, "loss": 0.9519, "step": 5341 }, { "epoch": 0.47731588000089353, "grad_norm": 0.5564168095588684, "learning_rate": 5.6084290798031116e-05, "loss": 0.9854, "step": 5342 }, { "epoch": 0.47740523153215536, "grad_norm": 0.45696184039115906, "learning_rate": 5.6069927363143405e-05, "loss": 0.9567, "step": 5343 }, { "epoch": 0.47749458306341724, "grad_norm": 0.49420660734176636, "learning_rate": 5.6055563419834575e-05, "loss": 0.8842, "step": 5344 }, { "epoch": 0.4775839345946791, "grad_norm": 0.4271746873855591, "learning_rate": 5.604119896930777e-05, "loss": 0.9697, "step": 5345 }, { "epoch": 0.477673286125941, "grad_norm": 0.4472793936729431, "learning_rate": 5.602683401276615e-05, "loss": 0.9975, "step": 5346 }, { "epoch": 0.47776263765720284, "grad_norm": 0.5239841341972351, "learning_rate": 5.601246855141296e-05, "loss": 0.9098, "step": 5347 }, { "epoch": 0.4778519891884647, "grad_norm": 0.4218825697898865, "learning_rate": 5.599810258645142e-05, "loss": 0.9914, "step": 5348 }, { "epoch": 0.4779413407197266, "grad_norm": 0.4484994113445282, "learning_rate": 5.598373611908485e-05, "loss": 0.953, "step": 5349 }, { "epoch": 0.47803069225098843, "grad_norm": 0.49346399307250977, "learning_rate": 5.596936915051662e-05, "loss": 0.9513, "step": 5350 }, { "epoch": 0.4781200437822503, "grad_norm": 0.5029492974281311, "learning_rate": 5.595500168195007e-05, "loss": 0.9715, "step": 5351 }, { "epoch": 0.4782093953135122, "grad_norm": 0.4443117678165436, "learning_rate": 5.5940633714588675e-05, "loss": 0.9222, "step": 5352 }, { "epoch": 0.478298746844774, "grad_norm": 0.4409370720386505, "learning_rate": 5.592626524963587e-05, "loss": 0.9049, "step": 5353 }, { "epoch": 0.4783880983760359, "grad_norm": 0.44364142417907715, "learning_rate": 5.5911896288295183e-05, "loss": 0.9859, "step": 5354 }, { "epoch": 0.4784774499072978, "grad_norm": 0.52082759141922, "learning_rate": 5.5897526831770175e-05, "loss": 0.9021, "step": 5355 }, { "epoch": 0.4785668014385597, "grad_norm": 0.38146278262138367, "learning_rate": 5.588315688126442e-05, "loss": 1.0069, "step": 5356 }, { "epoch": 0.4786561529698215, "grad_norm": 0.44152483344078064, "learning_rate": 5.586878643798158e-05, "loss": 1.0141, "step": 5357 }, { "epoch": 0.4787455045010834, "grad_norm": 0.5023549795150757, "learning_rate": 5.58544155031253e-05, "loss": 0.964, "step": 5358 }, { "epoch": 0.47883485603234527, "grad_norm": 0.43117037415504456, "learning_rate": 5.584004407789933e-05, "loss": 1.0089, "step": 5359 }, { "epoch": 0.4789242075636071, "grad_norm": 0.44485437870025635, "learning_rate": 5.582567216350741e-05, "loss": 0.9615, "step": 5360 }, { "epoch": 0.479013559094869, "grad_norm": 0.4267984628677368, "learning_rate": 5.581129976115336e-05, "loss": 0.9292, "step": 5361 }, { "epoch": 0.47910291062613086, "grad_norm": 0.4499671459197998, "learning_rate": 5.579692687204101e-05, "loss": 0.9711, "step": 5362 }, { "epoch": 0.47919226215739275, "grad_norm": 0.5573053956031799, "learning_rate": 5.578255349737423e-05, "loss": 0.965, "step": 5363 }, { "epoch": 0.4792816136886546, "grad_norm": 0.41541776061058044, "learning_rate": 5.5768179638356963e-05, "loss": 0.9682, "step": 5364 }, { "epoch": 0.47937096521991646, "grad_norm": 0.4482407569885254, "learning_rate": 5.575380529619317e-05, "loss": 0.9534, "step": 5365 }, { "epoch": 0.47946031675117834, "grad_norm": 0.4427946209907532, "learning_rate": 5.573943047208685e-05, "loss": 1.0162, "step": 5366 }, { "epoch": 0.47954966828244017, "grad_norm": 0.5755056738853455, "learning_rate": 5.572505516724207e-05, "loss": 0.9427, "step": 5367 }, { "epoch": 0.47963901981370205, "grad_norm": 0.45547792315483093, "learning_rate": 5.5710679382862874e-05, "loss": 0.9308, "step": 5368 }, { "epoch": 0.47972837134496393, "grad_norm": 0.5226430892944336, "learning_rate": 5.5696303120153405e-05, "loss": 0.9416, "step": 5369 }, { "epoch": 0.4798177228762258, "grad_norm": 0.4200889468193054, "learning_rate": 5.568192638031784e-05, "loss": 1.042, "step": 5370 }, { "epoch": 0.47990707440748764, "grad_norm": 0.4149283468723297, "learning_rate": 5.566754916456037e-05, "loss": 0.9486, "step": 5371 }, { "epoch": 0.4799964259387495, "grad_norm": 0.5608735680580139, "learning_rate": 5.5653171474085254e-05, "loss": 0.8783, "step": 5372 }, { "epoch": 0.4800857774700114, "grad_norm": 0.47211307287216187, "learning_rate": 5.563879331009677e-05, "loss": 0.9475, "step": 5373 }, { "epoch": 0.48017512900127324, "grad_norm": 0.38663092255592346, "learning_rate": 5.562441467379923e-05, "loss": 0.9769, "step": 5374 }, { "epoch": 0.4802644805325351, "grad_norm": 0.36634334921836853, "learning_rate": 5.5610035566397014e-05, "loss": 0.9618, "step": 5375 }, { "epoch": 0.480353832063797, "grad_norm": 0.4378538131713867, "learning_rate": 5.559565598909452e-05, "loss": 0.9778, "step": 5376 }, { "epoch": 0.4804431835950589, "grad_norm": 0.40313366055488586, "learning_rate": 5.55812759430962e-05, "loss": 0.9487, "step": 5377 }, { "epoch": 0.4805325351263207, "grad_norm": 0.4929332733154297, "learning_rate": 5.5566895429606516e-05, "loss": 0.9624, "step": 5378 }, { "epoch": 0.4806218866575826, "grad_norm": 0.5455753803253174, "learning_rate": 5.5552514449830005e-05, "loss": 0.9789, "step": 5379 }, { "epoch": 0.4807112381888445, "grad_norm": 0.3865823745727539, "learning_rate": 5.5538133004971216e-05, "loss": 0.9508, "step": 5380 }, { "epoch": 0.4808005897201063, "grad_norm": 0.48918184638023376, "learning_rate": 5.5523751096234765e-05, "loss": 0.951, "step": 5381 }, { "epoch": 0.4808899412513682, "grad_norm": 0.49451667070388794, "learning_rate": 5.550936872482528e-05, "loss": 0.9803, "step": 5382 }, { "epoch": 0.4809792927826301, "grad_norm": 0.40212544798851013, "learning_rate": 5.549498589194744e-05, "loss": 0.9332, "step": 5383 }, { "epoch": 0.4810686443138919, "grad_norm": 0.44129249453544617, "learning_rate": 5.5480602598805955e-05, "loss": 0.9288, "step": 5384 }, { "epoch": 0.4811579958451538, "grad_norm": 0.4733429551124573, "learning_rate": 5.546621884660558e-05, "loss": 0.9687, "step": 5385 }, { "epoch": 0.48124734737641567, "grad_norm": 0.44276171922683716, "learning_rate": 5.545183463655112e-05, "loss": 0.9343, "step": 5386 }, { "epoch": 0.48133669890767755, "grad_norm": 0.505408525466919, "learning_rate": 5.54374499698474e-05, "loss": 0.8595, "step": 5387 }, { "epoch": 0.4814260504389394, "grad_norm": 0.4253001809120178, "learning_rate": 5.542306484769927e-05, "loss": 0.9035, "step": 5388 }, { "epoch": 0.48151540197020126, "grad_norm": 0.5153298377990723, "learning_rate": 5.540867927131166e-05, "loss": 0.9208, "step": 5389 }, { "epoch": 0.48160475350146315, "grad_norm": 0.443081796169281, "learning_rate": 5.5394293241889516e-05, "loss": 0.9694, "step": 5390 }, { "epoch": 0.481694105032725, "grad_norm": 0.4136885404586792, "learning_rate": 5.5379906760637814e-05, "loss": 0.9473, "step": 5391 }, { "epoch": 0.48178345656398686, "grad_norm": 0.3998454809188843, "learning_rate": 5.536551982876157e-05, "loss": 0.9494, "step": 5392 }, { "epoch": 0.48187280809524874, "grad_norm": 0.410869836807251, "learning_rate": 5.535113244746585e-05, "loss": 0.979, "step": 5393 }, { "epoch": 0.4819621596265106, "grad_norm": 0.4814499318599701, "learning_rate": 5.5336744617955735e-05, "loss": 0.9871, "step": 5394 }, { "epoch": 0.48205151115777245, "grad_norm": 0.44015729427337646, "learning_rate": 5.532235634143639e-05, "loss": 1.0168, "step": 5395 }, { "epoch": 0.48214086268903433, "grad_norm": 0.38507404923439026, "learning_rate": 5.530796761911295e-05, "loss": 0.9503, "step": 5396 }, { "epoch": 0.4822302142202962, "grad_norm": 0.5117488503456116, "learning_rate": 5.5293578452190675e-05, "loss": 0.9804, "step": 5397 }, { "epoch": 0.48231956575155804, "grad_norm": 0.4578893780708313, "learning_rate": 5.527918884187475e-05, "loss": 1.038, "step": 5398 }, { "epoch": 0.4824089172828199, "grad_norm": 0.5064082145690918, "learning_rate": 5.5264798789370496e-05, "loss": 0.8768, "step": 5399 }, { "epoch": 0.4824982688140818, "grad_norm": 0.42315277457237244, "learning_rate": 5.525040829588323e-05, "loss": 0.9454, "step": 5400 }, { "epoch": 0.4825876203453437, "grad_norm": 0.3969995379447937, "learning_rate": 5.52360173626183e-05, "loss": 0.9996, "step": 5401 }, { "epoch": 0.4826769718766055, "grad_norm": 0.5402255654335022, "learning_rate": 5.5221625990781115e-05, "loss": 0.9203, "step": 5402 }, { "epoch": 0.4827663234078674, "grad_norm": 0.4401646554470062, "learning_rate": 5.520723418157707e-05, "loss": 0.9595, "step": 5403 }, { "epoch": 0.4828556749391293, "grad_norm": 0.4190303683280945, "learning_rate": 5.519284193621167e-05, "loss": 1.0173, "step": 5404 }, { "epoch": 0.4829450264703911, "grad_norm": 0.38738173246383667, "learning_rate": 5.517844925589041e-05, "loss": 0.9696, "step": 5405 }, { "epoch": 0.483034378001653, "grad_norm": 0.4196470081806183, "learning_rate": 5.516405614181883e-05, "loss": 0.9762, "step": 5406 }, { "epoch": 0.4831237295329149, "grad_norm": 0.4423119127750397, "learning_rate": 5.514966259520249e-05, "loss": 0.9455, "step": 5407 }, { "epoch": 0.48321308106417676, "grad_norm": 0.45127880573272705, "learning_rate": 5.513526861724703e-05, "loss": 0.994, "step": 5408 }, { "epoch": 0.4833024325954386, "grad_norm": 0.3754526674747467, "learning_rate": 5.512087420915808e-05, "loss": 0.9263, "step": 5409 }, { "epoch": 0.4833917841267005, "grad_norm": 0.4244709014892578, "learning_rate": 5.510647937214133e-05, "loss": 1.0555, "step": 5410 }, { "epoch": 0.48348113565796236, "grad_norm": 0.4102063477039337, "learning_rate": 5.509208410740249e-05, "loss": 0.9678, "step": 5411 }, { "epoch": 0.4835704871892242, "grad_norm": 0.4029648005962372, "learning_rate": 5.5077688416147345e-05, "loss": 1.0338, "step": 5412 }, { "epoch": 0.48365983872048607, "grad_norm": 0.4508571922779083, "learning_rate": 5.506329229958166e-05, "loss": 1.035, "step": 5413 }, { "epoch": 0.48374919025174795, "grad_norm": 0.38453125953674316, "learning_rate": 5.504889575891128e-05, "loss": 1.0721, "step": 5414 }, { "epoch": 0.4838385417830098, "grad_norm": 0.4159301221370697, "learning_rate": 5.503449879534206e-05, "loss": 0.9116, "step": 5415 }, { "epoch": 0.48392789331427166, "grad_norm": 0.5297693610191345, "learning_rate": 5.5020101410079896e-05, "loss": 0.9292, "step": 5416 }, { "epoch": 0.48401724484553355, "grad_norm": 0.3714810907840729, "learning_rate": 5.5005703604330736e-05, "loss": 0.8904, "step": 5417 }, { "epoch": 0.48410659637679543, "grad_norm": 0.4217725694179535, "learning_rate": 5.499130537930055e-05, "loss": 0.9627, "step": 5418 }, { "epoch": 0.48419594790805726, "grad_norm": 0.5042391419410706, "learning_rate": 5.497690673619532e-05, "loss": 0.8922, "step": 5419 }, { "epoch": 0.48428529943931914, "grad_norm": 0.4610849916934967, "learning_rate": 5.496250767622111e-05, "loss": 0.9797, "step": 5420 }, { "epoch": 0.484374650970581, "grad_norm": 0.4403340518474579, "learning_rate": 5.494810820058398e-05, "loss": 0.9383, "step": 5421 }, { "epoch": 0.48446400250184285, "grad_norm": 0.38612040877342224, "learning_rate": 5.493370831049004e-05, "loss": 0.9801, "step": 5422 }, { "epoch": 0.48455335403310473, "grad_norm": 0.4851481318473816, "learning_rate": 5.4919308007145465e-05, "loss": 0.9842, "step": 5423 }, { "epoch": 0.4846427055643666, "grad_norm": 0.4784315824508667, "learning_rate": 5.490490729175638e-05, "loss": 0.9389, "step": 5424 }, { "epoch": 0.4847320570956285, "grad_norm": 0.39346399903297424, "learning_rate": 5.489050616552905e-05, "loss": 1.0562, "step": 5425 }, { "epoch": 0.4848214086268903, "grad_norm": 0.5939106941223145, "learning_rate": 5.487610462966969e-05, "loss": 0.9138, "step": 5426 }, { "epoch": 0.4849107601581522, "grad_norm": 0.4027441442012787, "learning_rate": 5.4861702685384586e-05, "loss": 1.0134, "step": 5427 }, { "epoch": 0.4850001116894141, "grad_norm": 0.5542457699775696, "learning_rate": 5.484730033388007e-05, "loss": 0.9148, "step": 5428 }, { "epoch": 0.4850894632206759, "grad_norm": 0.40890124440193176, "learning_rate": 5.483289757636247e-05, "loss": 1.011, "step": 5429 }, { "epoch": 0.4851788147519378, "grad_norm": 0.3923655152320862, "learning_rate": 5.481849441403819e-05, "loss": 0.9849, "step": 5430 }, { "epoch": 0.4852681662831997, "grad_norm": 0.444996178150177, "learning_rate": 5.480409084811363e-05, "loss": 0.9099, "step": 5431 }, { "epoch": 0.48535751781446157, "grad_norm": 0.5601452589035034, "learning_rate": 5.4789686879795263e-05, "loss": 0.8231, "step": 5432 }, { "epoch": 0.4854468693457234, "grad_norm": 0.4075312316417694, "learning_rate": 5.477528251028957e-05, "loss": 0.9326, "step": 5433 }, { "epoch": 0.4855362208769853, "grad_norm": 0.4870973825454712, "learning_rate": 5.4760877740803066e-05, "loss": 0.9654, "step": 5434 }, { "epoch": 0.48562557240824716, "grad_norm": 0.5276638269424438, "learning_rate": 5.47464725725423e-05, "loss": 0.9561, "step": 5435 }, { "epoch": 0.485714923939509, "grad_norm": 0.39273542165756226, "learning_rate": 5.473206700671386e-05, "loss": 0.9866, "step": 5436 }, { "epoch": 0.4858042754707709, "grad_norm": 0.48121076822280884, "learning_rate": 5.471766104452436e-05, "loss": 0.9523, "step": 5437 }, { "epoch": 0.48589362700203276, "grad_norm": 0.5641562938690186, "learning_rate": 5.4703254687180486e-05, "loss": 0.9552, "step": 5438 }, { "epoch": 0.48598297853329464, "grad_norm": 0.5305027961730957, "learning_rate": 5.468884793588888e-05, "loss": 0.953, "step": 5439 }, { "epoch": 0.48607233006455647, "grad_norm": 0.40917256474494934, "learning_rate": 5.467444079185629e-05, "loss": 0.9913, "step": 5440 }, { "epoch": 0.48616168159581835, "grad_norm": 0.5030852556228638, "learning_rate": 5.466003325628945e-05, "loss": 0.9434, "step": 5441 }, { "epoch": 0.48625103312708023, "grad_norm": 0.3704419434070587, "learning_rate": 5.464562533039515e-05, "loss": 1.023, "step": 5442 }, { "epoch": 0.48634038465834206, "grad_norm": 0.46701717376708984, "learning_rate": 5.463121701538021e-05, "loss": 0.8984, "step": 5443 }, { "epoch": 0.48642973618960395, "grad_norm": 0.43691638112068176, "learning_rate": 5.46168083124515e-05, "loss": 1.0049, "step": 5444 }, { "epoch": 0.48651908772086583, "grad_norm": 0.4441946744918823, "learning_rate": 5.460239922281586e-05, "loss": 0.9694, "step": 5445 }, { "epoch": 0.48660843925212766, "grad_norm": 0.42461204528808594, "learning_rate": 5.458798974768022e-05, "loss": 0.9686, "step": 5446 }, { "epoch": 0.48669779078338954, "grad_norm": 0.44621115922927856, "learning_rate": 5.457357988825155e-05, "loss": 1.0075, "step": 5447 }, { "epoch": 0.4867871423146514, "grad_norm": 0.5380200743675232, "learning_rate": 5.455916964573682e-05, "loss": 0.8571, "step": 5448 }, { "epoch": 0.4868764938459133, "grad_norm": 0.5656547546386719, "learning_rate": 5.4544759021343016e-05, "loss": 0.8964, "step": 5449 }, { "epoch": 0.48696584537717513, "grad_norm": 0.43907156586647034, "learning_rate": 5.453034801627721e-05, "loss": 1.0135, "step": 5450 }, { "epoch": 0.487055196908437, "grad_norm": 0.4475903809070587, "learning_rate": 5.451593663174647e-05, "loss": 0.8743, "step": 5451 }, { "epoch": 0.4871445484396989, "grad_norm": 0.42074811458587646, "learning_rate": 5.4501524868957886e-05, "loss": 0.9357, "step": 5452 }, { "epoch": 0.4872338999709607, "grad_norm": 0.47343945503234863, "learning_rate": 5.448711272911864e-05, "loss": 0.9863, "step": 5453 }, { "epoch": 0.4873232515022226, "grad_norm": 0.5782976746559143, "learning_rate": 5.4472700213435854e-05, "loss": 0.9197, "step": 5454 }, { "epoch": 0.4874126030334845, "grad_norm": 0.36520230770111084, "learning_rate": 5.4458287323116755e-05, "loss": 0.9455, "step": 5455 }, { "epoch": 0.4875019545647464, "grad_norm": 0.5519242882728577, "learning_rate": 5.444387405936856e-05, "loss": 0.8343, "step": 5456 }, { "epoch": 0.4875913060960082, "grad_norm": 0.4189887046813965, "learning_rate": 5.442946042339855e-05, "loss": 0.9955, "step": 5457 }, { "epoch": 0.4876806576272701, "grad_norm": 0.46009689569473267, "learning_rate": 5.441504641641403e-05, "loss": 0.9444, "step": 5458 }, { "epoch": 0.48777000915853197, "grad_norm": 0.5175769925117493, "learning_rate": 5.4400632039622293e-05, "loss": 0.9562, "step": 5459 }, { "epoch": 0.4878593606897938, "grad_norm": 0.41818684339523315, "learning_rate": 5.438621729423072e-05, "loss": 0.9337, "step": 5460 }, { "epoch": 0.4879487122210557, "grad_norm": 0.42660924792289734, "learning_rate": 5.4371802181446694e-05, "loss": 1.0022, "step": 5461 }, { "epoch": 0.48803806375231756, "grad_norm": 0.4058513939380646, "learning_rate": 5.4357386702477645e-05, "loss": 0.99, "step": 5462 }, { "epoch": 0.48812741528357945, "grad_norm": 0.43901243805885315, "learning_rate": 5.434297085853101e-05, "loss": 0.8644, "step": 5463 }, { "epoch": 0.4882167668148413, "grad_norm": 0.410495787858963, "learning_rate": 5.432855465081427e-05, "loss": 0.9906, "step": 5464 }, { "epoch": 0.48830611834610316, "grad_norm": 0.5046043395996094, "learning_rate": 5.431413808053492e-05, "loss": 1.0235, "step": 5465 }, { "epoch": 0.48839546987736504, "grad_norm": 0.4128507077693939, "learning_rate": 5.4299721148900554e-05, "loss": 1.0187, "step": 5466 }, { "epoch": 0.48848482140862687, "grad_norm": 0.4370626211166382, "learning_rate": 5.4285303857118685e-05, "loss": 0.935, "step": 5467 }, { "epoch": 0.48857417293988875, "grad_norm": 0.41134530305862427, "learning_rate": 5.4270886206396956e-05, "loss": 0.9916, "step": 5468 }, { "epoch": 0.48866352447115063, "grad_norm": 0.6058627367019653, "learning_rate": 5.4256468197942964e-05, "loss": 0.9549, "step": 5469 }, { "epoch": 0.4887528760024125, "grad_norm": 0.47504645586013794, "learning_rate": 5.42420498329644e-05, "loss": 1.0599, "step": 5470 }, { "epoch": 0.48884222753367434, "grad_norm": 0.42550885677337646, "learning_rate": 5.4227631112668955e-05, "loss": 0.9998, "step": 5471 }, { "epoch": 0.48893157906493623, "grad_norm": 0.4544508755207062, "learning_rate": 5.4213212038264325e-05, "loss": 0.9259, "step": 5472 }, { "epoch": 0.4890209305961981, "grad_norm": 0.41492947936058044, "learning_rate": 5.419879261095829e-05, "loss": 0.9139, "step": 5473 }, { "epoch": 0.48911028212745994, "grad_norm": 0.3988552391529083, "learning_rate": 5.418437283195862e-05, "loss": 1.0173, "step": 5474 }, { "epoch": 0.4891996336587218, "grad_norm": 0.48259106278419495, "learning_rate": 5.4169952702473114e-05, "loss": 0.9367, "step": 5475 }, { "epoch": 0.4892889851899837, "grad_norm": 0.38662248849868774, "learning_rate": 5.415553222370963e-05, "loss": 1.0017, "step": 5476 }, { "epoch": 0.48937833672124553, "grad_norm": 0.4289376139640808, "learning_rate": 5.414111139687601e-05, "loss": 0.9943, "step": 5477 }, { "epoch": 0.4894676882525074, "grad_norm": 0.42582669854164124, "learning_rate": 5.4126690223180175e-05, "loss": 1.0044, "step": 5478 }, { "epoch": 0.4895570397837693, "grad_norm": 0.3854537606239319, "learning_rate": 5.411226870383006e-05, "loss": 0.9932, "step": 5479 }, { "epoch": 0.4896463913150312, "grad_norm": 0.41988447308540344, "learning_rate": 5.4097846840033604e-05, "loss": 0.9692, "step": 5480 }, { "epoch": 0.489735742846293, "grad_norm": 0.424517959356308, "learning_rate": 5.408342463299878e-05, "loss": 0.9933, "step": 5481 }, { "epoch": 0.4898250943775549, "grad_norm": 0.524263858795166, "learning_rate": 5.4069002083933625e-05, "loss": 0.921, "step": 5482 }, { "epoch": 0.4899144459088168, "grad_norm": 0.5599950551986694, "learning_rate": 5.405457919404619e-05, "loss": 0.9442, "step": 5483 }, { "epoch": 0.4900037974400786, "grad_norm": 0.3951818645000458, "learning_rate": 5.4040155964544506e-05, "loss": 0.9275, "step": 5484 }, { "epoch": 0.4900931489713405, "grad_norm": 0.5091139674186707, "learning_rate": 5.40257323966367e-05, "loss": 1.0093, "step": 5485 }, { "epoch": 0.49018250050260237, "grad_norm": 0.46656838059425354, "learning_rate": 5.40113084915309e-05, "loss": 0.9953, "step": 5486 }, { "epoch": 0.49027185203386425, "grad_norm": 0.41893553733825684, "learning_rate": 5.399688425043524e-05, "loss": 0.9857, "step": 5487 }, { "epoch": 0.4903612035651261, "grad_norm": 0.5395665764808655, "learning_rate": 5.398245967455795e-05, "loss": 0.9104, "step": 5488 }, { "epoch": 0.49045055509638796, "grad_norm": 0.5200545191764832, "learning_rate": 5.396803476510719e-05, "loss": 0.8928, "step": 5489 }, { "epoch": 0.49053990662764985, "grad_norm": 0.5178142786026001, "learning_rate": 5.3953609523291225e-05, "loss": 0.9587, "step": 5490 }, { "epoch": 0.4906292581589117, "grad_norm": 0.46901631355285645, "learning_rate": 5.3939183950318316e-05, "loss": 0.9875, "step": 5491 }, { "epoch": 0.49071860969017356, "grad_norm": 0.3886154294013977, "learning_rate": 5.392475804739677e-05, "loss": 0.9903, "step": 5492 }, { "epoch": 0.49080796122143544, "grad_norm": 0.4371306598186493, "learning_rate": 5.391033181573491e-05, "loss": 0.9537, "step": 5493 }, { "epoch": 0.4908973127526973, "grad_norm": 0.5079873204231262, "learning_rate": 5.3895905256541066e-05, "loss": 0.934, "step": 5494 }, { "epoch": 0.49098666428395915, "grad_norm": 0.46429815888404846, "learning_rate": 5.3881478371023655e-05, "loss": 0.9762, "step": 5495 }, { "epoch": 0.49107601581522103, "grad_norm": 0.44685813784599304, "learning_rate": 5.3867051160391025e-05, "loss": 1.0016, "step": 5496 }, { "epoch": 0.4911653673464829, "grad_norm": 0.48742127418518066, "learning_rate": 5.385262362585165e-05, "loss": 0.8865, "step": 5497 }, { "epoch": 0.49125471887774474, "grad_norm": 0.410982221364975, "learning_rate": 5.3838195768613995e-05, "loss": 0.9393, "step": 5498 }, { "epoch": 0.49134407040900663, "grad_norm": 0.4197956919670105, "learning_rate": 5.382376758988652e-05, "loss": 0.9513, "step": 5499 }, { "epoch": 0.4914334219402685, "grad_norm": 0.48656147718429565, "learning_rate": 5.3809339090877745e-05, "loss": 0.8835, "step": 5500 }, { "epoch": 0.4915227734715304, "grad_norm": 0.43064260482788086, "learning_rate": 5.379491027279622e-05, "loss": 1.0597, "step": 5501 }, { "epoch": 0.4916121250027922, "grad_norm": 0.39117228984832764, "learning_rate": 5.3780481136850505e-05, "loss": 0.9828, "step": 5502 }, { "epoch": 0.4917014765340541, "grad_norm": 0.5010277032852173, "learning_rate": 5.37660516842492e-05, "loss": 1.0373, "step": 5503 }, { "epoch": 0.491790828065316, "grad_norm": 0.367341011762619, "learning_rate": 5.375162191620093e-05, "loss": 0.9854, "step": 5504 }, { "epoch": 0.4918801795965778, "grad_norm": 0.5271230340003967, "learning_rate": 5.373719183391434e-05, "loss": 0.8852, "step": 5505 }, { "epoch": 0.4919695311278397, "grad_norm": 0.4072398543357849, "learning_rate": 5.372276143859809e-05, "loss": 0.9513, "step": 5506 }, { "epoch": 0.4920588826591016, "grad_norm": 0.4533540606498718, "learning_rate": 5.3708330731460876e-05, "loss": 0.9483, "step": 5507 }, { "epoch": 0.49214823419036346, "grad_norm": 0.48236024379730225, "learning_rate": 5.369389971371145e-05, "loss": 1.027, "step": 5508 }, { "epoch": 0.4922375857216253, "grad_norm": 0.4861671030521393, "learning_rate": 5.367946838655855e-05, "loss": 0.905, "step": 5509 }, { "epoch": 0.4923269372528872, "grad_norm": 0.5689902305603027, "learning_rate": 5.366503675121095e-05, "loss": 0.9575, "step": 5510 }, { "epoch": 0.49241628878414906, "grad_norm": 0.5556491613388062, "learning_rate": 5.3650604808877456e-05, "loss": 0.9894, "step": 5511 }, { "epoch": 0.4925056403154109, "grad_norm": 0.3819589912891388, "learning_rate": 5.36361725607669e-05, "loss": 0.9288, "step": 5512 }, { "epoch": 0.49259499184667277, "grad_norm": 0.4342455267906189, "learning_rate": 5.3621740008088126e-05, "loss": 1.0173, "step": 5513 }, { "epoch": 0.49268434337793465, "grad_norm": 0.5042043924331665, "learning_rate": 5.360730715205002e-05, "loss": 1.0367, "step": 5514 }, { "epoch": 0.4927736949091965, "grad_norm": 0.4823912978172302, "learning_rate": 5.35928739938615e-05, "loss": 0.9668, "step": 5515 }, { "epoch": 0.49286304644045836, "grad_norm": 0.4222562313079834, "learning_rate": 5.357844053473148e-05, "loss": 0.9825, "step": 5516 }, { "epoch": 0.49295239797172025, "grad_norm": 0.48596012592315674, "learning_rate": 5.356400677586891e-05, "loss": 0.9142, "step": 5517 }, { "epoch": 0.49304174950298213, "grad_norm": 0.4577832818031311, "learning_rate": 5.3549572718482785e-05, "loss": 0.9555, "step": 5518 }, { "epoch": 0.49313110103424396, "grad_norm": 0.4585660696029663, "learning_rate": 5.353513836378213e-05, "loss": 0.9421, "step": 5519 }, { "epoch": 0.49322045256550584, "grad_norm": 0.44608667492866516, "learning_rate": 5.352070371297594e-05, "loss": 1.0034, "step": 5520 }, { "epoch": 0.4933098040967677, "grad_norm": 0.4560176432132721, "learning_rate": 5.350626876727328e-05, "loss": 0.9055, "step": 5521 }, { "epoch": 0.49339915562802955, "grad_norm": 0.4542362689971924, "learning_rate": 5.349183352788325e-05, "loss": 0.976, "step": 5522 }, { "epoch": 0.49348850715929143, "grad_norm": 0.4418618679046631, "learning_rate": 5.347739799601493e-05, "loss": 0.9314, "step": 5523 }, { "epoch": 0.4935778586905533, "grad_norm": 0.4947437345981598, "learning_rate": 5.3462962172877475e-05, "loss": 0.9936, "step": 5524 }, { "epoch": 0.4936672102218152, "grad_norm": 0.47864702343940735, "learning_rate": 5.3448526059680016e-05, "loss": 0.9143, "step": 5525 }, { "epoch": 0.49375656175307703, "grad_norm": 0.40519431233406067, "learning_rate": 5.343408965763174e-05, "loss": 0.9579, "step": 5526 }, { "epoch": 0.4938459132843389, "grad_norm": 0.4214836061000824, "learning_rate": 5.341965296794185e-05, "loss": 1.0252, "step": 5527 }, { "epoch": 0.4939352648156008, "grad_norm": 0.4515914022922516, "learning_rate": 5.340521599181959e-05, "loss": 0.9455, "step": 5528 }, { "epoch": 0.4940246163468626, "grad_norm": 0.39432477951049805, "learning_rate": 5.339077873047418e-05, "loss": 0.9309, "step": 5529 }, { "epoch": 0.4941139678781245, "grad_norm": 0.4477103054523468, "learning_rate": 5.337634118511491e-05, "loss": 0.9542, "step": 5530 }, { "epoch": 0.4942033194093864, "grad_norm": 0.48979735374450684, "learning_rate": 5.336190335695107e-05, "loss": 0.9266, "step": 5531 }, { "epoch": 0.49429267094064827, "grad_norm": 0.470674991607666, "learning_rate": 5.3347465247192e-05, "loss": 0.9846, "step": 5532 }, { "epoch": 0.4943820224719101, "grad_norm": 0.5746048092842102, "learning_rate": 5.333302685704702e-05, "loss": 0.9958, "step": 5533 }, { "epoch": 0.494471374003172, "grad_norm": 0.44571205973625183, "learning_rate": 5.331858818772553e-05, "loss": 0.9015, "step": 5534 }, { "epoch": 0.49456072553443386, "grad_norm": 0.39946070313453674, "learning_rate": 5.330414924043689e-05, "loss": 1.0127, "step": 5535 }, { "epoch": 0.4946500770656957, "grad_norm": 0.3653550148010254, "learning_rate": 5.3289710016390535e-05, "loss": 0.9527, "step": 5536 }, { "epoch": 0.4947394285969576, "grad_norm": 0.4395412802696228, "learning_rate": 5.327527051679591e-05, "loss": 0.9398, "step": 5537 }, { "epoch": 0.49482878012821946, "grad_norm": 0.5096445679664612, "learning_rate": 5.3260830742862456e-05, "loss": 0.9331, "step": 5538 }, { "epoch": 0.49491813165948134, "grad_norm": 0.5146742463111877, "learning_rate": 5.324639069579968e-05, "loss": 0.9474, "step": 5539 }, { "epoch": 0.49500748319074317, "grad_norm": 0.41445231437683105, "learning_rate": 5.3231950376817065e-05, "loss": 0.9408, "step": 5540 }, { "epoch": 0.49509683472200505, "grad_norm": 0.4815199077129364, "learning_rate": 5.3217509787124165e-05, "loss": 0.9785, "step": 5541 }, { "epoch": 0.49518618625326694, "grad_norm": 0.43417826294898987, "learning_rate": 5.320306892793052e-05, "loss": 0.9017, "step": 5542 }, { "epoch": 0.49527553778452876, "grad_norm": 0.4291759133338928, "learning_rate": 5.318862780044571e-05, "loss": 0.9912, "step": 5543 }, { "epoch": 0.49536488931579065, "grad_norm": 0.41811853647232056, "learning_rate": 5.317418640587934e-05, "loss": 0.9695, "step": 5544 }, { "epoch": 0.49545424084705253, "grad_norm": 0.4489332139492035, "learning_rate": 5.315974474544102e-05, "loss": 0.9603, "step": 5545 }, { "epoch": 0.49554359237831436, "grad_norm": 0.4664028584957123, "learning_rate": 5.3145302820340404e-05, "loss": 0.9628, "step": 5546 }, { "epoch": 0.49563294390957624, "grad_norm": 0.5637416243553162, "learning_rate": 5.313086063178715e-05, "loss": 1.0238, "step": 5547 }, { "epoch": 0.4957222954408381, "grad_norm": 0.4180266559123993, "learning_rate": 5.311641818099093e-05, "loss": 0.9883, "step": 5548 }, { "epoch": 0.4958116469721, "grad_norm": 0.3877975344657898, "learning_rate": 5.310197546916149e-05, "loss": 0.93, "step": 5549 }, { "epoch": 0.49590099850336183, "grad_norm": 0.3823062777519226, "learning_rate": 5.308753249750853e-05, "loss": 1.0135, "step": 5550 }, { "epoch": 0.4959903500346237, "grad_norm": 0.4974936246871948, "learning_rate": 5.3073089267241804e-05, "loss": 0.9422, "step": 5551 }, { "epoch": 0.4960797015658856, "grad_norm": 0.44719165563583374, "learning_rate": 5.305864577957111e-05, "loss": 0.9379, "step": 5552 }, { "epoch": 0.49616905309714743, "grad_norm": 0.4038889706134796, "learning_rate": 5.30442020357062e-05, "loss": 0.9951, "step": 5553 }, { "epoch": 0.4962584046284093, "grad_norm": 0.497601181268692, "learning_rate": 5.3029758036856955e-05, "loss": 0.9611, "step": 5554 }, { "epoch": 0.4963477561596712, "grad_norm": 0.4878460764884949, "learning_rate": 5.301531378423317e-05, "loss": 0.9078, "step": 5555 }, { "epoch": 0.4964371076909331, "grad_norm": 0.5067799687385559, "learning_rate": 5.300086927904471e-05, "loss": 0.9569, "step": 5556 }, { "epoch": 0.4965264592221949, "grad_norm": 0.3875921368598938, "learning_rate": 5.298642452250145e-05, "loss": 0.9997, "step": 5557 }, { "epoch": 0.4966158107534568, "grad_norm": 0.3980264365673065, "learning_rate": 5.297197951581332e-05, "loss": 1.0059, "step": 5558 }, { "epoch": 0.49670516228471867, "grad_norm": 0.42988121509552, "learning_rate": 5.295753426019022e-05, "loss": 0.9343, "step": 5559 }, { "epoch": 0.4967945138159805, "grad_norm": 0.4407704174518585, "learning_rate": 5.294308875684211e-05, "loss": 0.9433, "step": 5560 }, { "epoch": 0.4968838653472424, "grad_norm": 0.4002476930618286, "learning_rate": 5.2928643006978926e-05, "loss": 1.0739, "step": 5561 }, { "epoch": 0.49697321687850426, "grad_norm": 0.44479769468307495, "learning_rate": 5.291419701181068e-05, "loss": 0.9789, "step": 5562 }, { "epoch": 0.49706256840976615, "grad_norm": 0.4247623383998871, "learning_rate": 5.2899750772547385e-05, "loss": 0.9605, "step": 5563 }, { "epoch": 0.497151919941028, "grad_norm": 0.4519825279712677, "learning_rate": 5.288530429039904e-05, "loss": 0.9244, "step": 5564 }, { "epoch": 0.49724127147228986, "grad_norm": 0.42883196473121643, "learning_rate": 5.287085756657573e-05, "loss": 0.955, "step": 5565 }, { "epoch": 0.49733062300355174, "grad_norm": 0.3723878562450409, "learning_rate": 5.2856410602287465e-05, "loss": 0.9387, "step": 5566 }, { "epoch": 0.49741997453481357, "grad_norm": 0.4619128704071045, "learning_rate": 5.284196339874438e-05, "loss": 0.9301, "step": 5567 }, { "epoch": 0.49750932606607545, "grad_norm": 0.42548492550849915, "learning_rate": 5.282751595715656e-05, "loss": 1.0291, "step": 5568 }, { "epoch": 0.49759867759733734, "grad_norm": 0.43272992968559265, "learning_rate": 5.281306827873416e-05, "loss": 1.0186, "step": 5569 }, { "epoch": 0.4976880291285992, "grad_norm": 0.4682878851890564, "learning_rate": 5.279862036468728e-05, "loss": 0.9629, "step": 5570 }, { "epoch": 0.49777738065986105, "grad_norm": 0.5456759929656982, "learning_rate": 5.2784172216226124e-05, "loss": 0.8809, "step": 5571 }, { "epoch": 0.49786673219112293, "grad_norm": 0.38989564776420593, "learning_rate": 5.2769723834560855e-05, "loss": 0.9556, "step": 5572 }, { "epoch": 0.4979560837223848, "grad_norm": 0.4226441979408264, "learning_rate": 5.275527522090171e-05, "loss": 0.9417, "step": 5573 }, { "epoch": 0.49804543525364664, "grad_norm": 0.4417705833911896, "learning_rate": 5.2740826376458894e-05, "loss": 0.9943, "step": 5574 }, { "epoch": 0.4981347867849085, "grad_norm": 0.4658608138561249, "learning_rate": 5.2726377302442656e-05, "loss": 0.907, "step": 5575 }, { "epoch": 0.4982241383161704, "grad_norm": 0.516185998916626, "learning_rate": 5.2711928000063245e-05, "loss": 0.9793, "step": 5576 }, { "epoch": 0.49831348984743223, "grad_norm": 0.4596862494945526, "learning_rate": 5.269747847053096e-05, "loss": 0.9666, "step": 5577 }, { "epoch": 0.4984028413786941, "grad_norm": 0.43760859966278076, "learning_rate": 5.268302871505611e-05, "loss": 0.9403, "step": 5578 }, { "epoch": 0.498492192909956, "grad_norm": 0.4713875949382782, "learning_rate": 5.2668578734849e-05, "loss": 0.867, "step": 5579 }, { "epoch": 0.4985815444412179, "grad_norm": 0.4082961678504944, "learning_rate": 5.265412853111997e-05, "loss": 1.002, "step": 5580 }, { "epoch": 0.4986708959724797, "grad_norm": 0.4304913282394409, "learning_rate": 5.2639678105079394e-05, "loss": 0.9176, "step": 5581 }, { "epoch": 0.4987602475037416, "grad_norm": 0.5440099835395813, "learning_rate": 5.262522745793764e-05, "loss": 0.8735, "step": 5582 }, { "epoch": 0.4988495990350035, "grad_norm": 0.43295422196388245, "learning_rate": 5.26107765909051e-05, "loss": 0.9699, "step": 5583 }, { "epoch": 0.4989389505662653, "grad_norm": 0.5452165603637695, "learning_rate": 5.25963255051922e-05, "loss": 1.0142, "step": 5584 }, { "epoch": 0.4990283020975272, "grad_norm": 0.4139503240585327, "learning_rate": 5.258187420200935e-05, "loss": 0.9684, "step": 5585 }, { "epoch": 0.49911765362878907, "grad_norm": 0.4647497832775116, "learning_rate": 5.256742268256703e-05, "loss": 0.9009, "step": 5586 }, { "epoch": 0.49920700516005095, "grad_norm": 0.41619575023651123, "learning_rate": 5.255297094807568e-05, "loss": 0.9714, "step": 5587 }, { "epoch": 0.4992963566913128, "grad_norm": 0.46064916253089905, "learning_rate": 5.25385189997458e-05, "loss": 0.9339, "step": 5588 }, { "epoch": 0.49938570822257466, "grad_norm": 0.46546754240989685, "learning_rate": 5.252406683878791e-05, "loss": 0.9525, "step": 5589 }, { "epoch": 0.49947505975383655, "grad_norm": 0.5033959150314331, "learning_rate": 5.250961446641251e-05, "loss": 0.9203, "step": 5590 }, { "epoch": 0.4995644112850984, "grad_norm": 0.44631263613700867, "learning_rate": 5.249516188383014e-05, "loss": 0.9998, "step": 5591 }, { "epoch": 0.49965376281636026, "grad_norm": 0.4394984543323517, "learning_rate": 5.248070909225136e-05, "loss": 0.9694, "step": 5592 }, { "epoch": 0.49974311434762214, "grad_norm": 0.5696101784706116, "learning_rate": 5.2466256092886754e-05, "loss": 0.93, "step": 5593 }, { "epoch": 0.499832465878884, "grad_norm": 0.4666324555873871, "learning_rate": 5.245180288694692e-05, "loss": 0.8524, "step": 5594 }, { "epoch": 0.49992181741014585, "grad_norm": 0.5041062235832214, "learning_rate": 5.243734947564247e-05, "loss": 0.9306, "step": 5595 }, { "epoch": 0.5000111689414077, "grad_norm": 0.4425108730792999, "learning_rate": 5.242289586018401e-05, "loss": 0.9774, "step": 5596 }, { "epoch": 0.5001005204726696, "grad_norm": 0.38095951080322266, "learning_rate": 5.240844204178219e-05, "loss": 1.0063, "step": 5597 }, { "epoch": 0.5001898720039315, "grad_norm": 0.4874595105648041, "learning_rate": 5.23939880216477e-05, "loss": 0.8665, "step": 5598 }, { "epoch": 0.5002792235351934, "grad_norm": 0.4805736839771271, "learning_rate": 5.237953380099116e-05, "loss": 0.9152, "step": 5599 }, { "epoch": 0.5003685750664552, "grad_norm": 0.60560142993927, "learning_rate": 5.236507938102334e-05, "loss": 0.8751, "step": 5600 }, { "epoch": 0.500457926597717, "grad_norm": 0.37698978185653687, "learning_rate": 5.2350624762954884e-05, "loss": 0.9901, "step": 5601 }, { "epoch": 0.5005472781289789, "grad_norm": 0.5520381331443787, "learning_rate": 5.233616994799655e-05, "loss": 0.9581, "step": 5602 }, { "epoch": 0.5006366296602408, "grad_norm": 0.4423448443412781, "learning_rate": 5.232171493735909e-05, "loss": 0.936, "step": 5603 }, { "epoch": 0.5007259811915027, "grad_norm": 0.4027252495288849, "learning_rate": 5.230725973225324e-05, "loss": 1.0058, "step": 5604 }, { "epoch": 0.5008153327227646, "grad_norm": 0.39916396141052246, "learning_rate": 5.229280433388982e-05, "loss": 1.1366, "step": 5605 }, { "epoch": 0.5009046842540265, "grad_norm": 0.49851128458976746, "learning_rate": 5.227834874347958e-05, "loss": 0.8975, "step": 5606 }, { "epoch": 0.5009940357852882, "grad_norm": 0.37986597418785095, "learning_rate": 5.226389296223335e-05, "loss": 1.0004, "step": 5607 }, { "epoch": 0.5010833873165501, "grad_norm": 0.41239768266677856, "learning_rate": 5.224943699136195e-05, "loss": 0.9784, "step": 5608 }, { "epoch": 0.501172738847812, "grad_norm": 0.40920108556747437, "learning_rate": 5.223498083207622e-05, "loss": 0.9932, "step": 5609 }, { "epoch": 0.5012620903790739, "grad_norm": 0.4581597149372101, "learning_rate": 5.222052448558704e-05, "loss": 0.914, "step": 5610 }, { "epoch": 0.5013514419103358, "grad_norm": 0.39035099744796753, "learning_rate": 5.2206067953105255e-05, "loss": 0.992, "step": 5611 }, { "epoch": 0.5014407934415976, "grad_norm": 0.4067721962928772, "learning_rate": 5.219161123584176e-05, "loss": 0.9821, "step": 5612 }, { "epoch": 0.5015301449728595, "grad_norm": 0.5137872695922852, "learning_rate": 5.217715433500747e-05, "loss": 0.9398, "step": 5613 }, { "epoch": 0.5016194965041213, "grad_norm": 0.5111674070358276, "learning_rate": 5.21626972518133e-05, "loss": 0.9266, "step": 5614 }, { "epoch": 0.5017088480353832, "grad_norm": 0.41246017813682556, "learning_rate": 5.214823998747017e-05, "loss": 0.965, "step": 5615 }, { "epoch": 0.5017981995666451, "grad_norm": 0.48276659846305847, "learning_rate": 5.213378254318906e-05, "loss": 0.9673, "step": 5616 }, { "epoch": 0.501887551097907, "grad_norm": 0.48614341020584106, "learning_rate": 5.211932492018091e-05, "loss": 0.874, "step": 5617 }, { "epoch": 0.5019769026291688, "grad_norm": 0.42692437767982483, "learning_rate": 5.21048671196567e-05, "loss": 0.9371, "step": 5618 }, { "epoch": 0.5020662541604307, "grad_norm": 0.41222038865089417, "learning_rate": 5.209040914282744e-05, "loss": 0.9588, "step": 5619 }, { "epoch": 0.5021556056916925, "grad_norm": 0.4768015146255493, "learning_rate": 5.2075950990904133e-05, "loss": 0.943, "step": 5620 }, { "epoch": 0.5022449572229544, "grad_norm": 0.37785643339157104, "learning_rate": 5.20614926650978e-05, "loss": 0.9968, "step": 5621 }, { "epoch": 0.5023343087542163, "grad_norm": 0.4530482590198517, "learning_rate": 5.2047034166619476e-05, "loss": 0.9125, "step": 5622 }, { "epoch": 0.5024236602854781, "grad_norm": 0.4255998432636261, "learning_rate": 5.2032575496680224e-05, "loss": 0.9464, "step": 5623 }, { "epoch": 0.50251301181674, "grad_norm": 0.41948202252388, "learning_rate": 5.201811665649111e-05, "loss": 0.9442, "step": 5624 }, { "epoch": 0.5026023633480019, "grad_norm": 0.38996177911758423, "learning_rate": 5.200365764726323e-05, "loss": 0.9536, "step": 5625 }, { "epoch": 0.5026917148792638, "grad_norm": 0.4734954833984375, "learning_rate": 5.198919847020765e-05, "loss": 0.9781, "step": 5626 }, { "epoch": 0.5027810664105256, "grad_norm": 0.4030005633831024, "learning_rate": 5.197473912653549e-05, "loss": 0.9611, "step": 5627 }, { "epoch": 0.5028704179417874, "grad_norm": 0.4445195496082306, "learning_rate": 5.1960279617457874e-05, "loss": 0.9721, "step": 5628 }, { "epoch": 0.5029597694730493, "grad_norm": 0.3933766186237335, "learning_rate": 5.1945819944185944e-05, "loss": 0.9375, "step": 5629 }, { "epoch": 0.5030491210043112, "grad_norm": 0.40946388244628906, "learning_rate": 5.193136010793088e-05, "loss": 1.0194, "step": 5630 }, { "epoch": 0.5031384725355731, "grad_norm": 0.49432265758514404, "learning_rate": 5.1916900109903796e-05, "loss": 0.9872, "step": 5631 }, { "epoch": 0.503227824066835, "grad_norm": 0.5955692529678345, "learning_rate": 5.190243995131591e-05, "loss": 0.9548, "step": 5632 }, { "epoch": 0.5033171755980969, "grad_norm": 0.47443050146102905, "learning_rate": 5.188797963337839e-05, "loss": 0.9318, "step": 5633 }, { "epoch": 0.5034065271293586, "grad_norm": 0.5481427907943726, "learning_rate": 5.187351915730245e-05, "loss": 0.8878, "step": 5634 }, { "epoch": 0.5034958786606205, "grad_norm": 0.5294390320777893, "learning_rate": 5.185905852429933e-05, "loss": 0.8749, "step": 5635 }, { "epoch": 0.5035852301918824, "grad_norm": 0.4480739235877991, "learning_rate": 5.184459773558022e-05, "loss": 1.0006, "step": 5636 }, { "epoch": 0.5036745817231443, "grad_norm": 0.4594273865222931, "learning_rate": 5.183013679235639e-05, "loss": 1.0186, "step": 5637 }, { "epoch": 0.5037639332544062, "grad_norm": 0.4672705829143524, "learning_rate": 5.1815675695839095e-05, "loss": 0.8973, "step": 5638 }, { "epoch": 0.503853284785668, "grad_norm": 0.405490517616272, "learning_rate": 5.180121444723961e-05, "loss": 0.9966, "step": 5639 }, { "epoch": 0.5039426363169299, "grad_norm": 0.4211828410625458, "learning_rate": 5.178675304776923e-05, "loss": 0.9086, "step": 5640 }, { "epoch": 0.5040319878481917, "grad_norm": 0.4052455723285675, "learning_rate": 5.177229149863922e-05, "loss": 0.9668, "step": 5641 }, { "epoch": 0.5041213393794536, "grad_norm": 0.4231189787387848, "learning_rate": 5.1757829801060905e-05, "loss": 0.9387, "step": 5642 }, { "epoch": 0.5042106909107155, "grad_norm": 0.48749008774757385, "learning_rate": 5.1743367956245614e-05, "loss": 0.9537, "step": 5643 }, { "epoch": 0.5043000424419773, "grad_norm": 0.45534324645996094, "learning_rate": 5.172890596540466e-05, "loss": 0.9535, "step": 5644 }, { "epoch": 0.5043893939732392, "grad_norm": 0.39456576108932495, "learning_rate": 5.171444382974943e-05, "loss": 1.0038, "step": 5645 }, { "epoch": 0.5044787455045011, "grad_norm": 0.4487841725349426, "learning_rate": 5.169998155049123e-05, "loss": 0.9386, "step": 5646 }, { "epoch": 0.504568097035763, "grad_norm": 0.4985732138156891, "learning_rate": 5.168551912884147e-05, "loss": 0.9678, "step": 5647 }, { "epoch": 0.5046574485670248, "grad_norm": 0.42822030186653137, "learning_rate": 5.16710565660115e-05, "loss": 1.0048, "step": 5648 }, { "epoch": 0.5047468000982867, "grad_norm": 0.4911450445652008, "learning_rate": 5.165659386321273e-05, "loss": 0.9387, "step": 5649 }, { "epoch": 0.5048361516295485, "grad_norm": 0.43765896558761597, "learning_rate": 5.164213102165659e-05, "loss": 0.9642, "step": 5650 }, { "epoch": 0.5049255031608104, "grad_norm": 0.4351087212562561, "learning_rate": 5.1627668042554454e-05, "loss": 0.9759, "step": 5651 }, { "epoch": 0.5050148546920723, "grad_norm": 0.49687519669532776, "learning_rate": 5.1613204927117785e-05, "loss": 0.889, "step": 5652 }, { "epoch": 0.5051042062233342, "grad_norm": 0.4558013379573822, "learning_rate": 5.1598741676557995e-05, "loss": 1.0162, "step": 5653 }, { "epoch": 0.5051935577545961, "grad_norm": 0.5322263240814209, "learning_rate": 5.158427829208655e-05, "loss": 0.929, "step": 5654 }, { "epoch": 0.5052829092858578, "grad_norm": 0.4086034893989563, "learning_rate": 5.1569814774914916e-05, "loss": 0.9369, "step": 5655 }, { "epoch": 0.5053722608171197, "grad_norm": 0.44374528527259827, "learning_rate": 5.155535112625456e-05, "loss": 0.9484, "step": 5656 }, { "epoch": 0.5054616123483816, "grad_norm": 0.495838463306427, "learning_rate": 5.154088734731696e-05, "loss": 0.9707, "step": 5657 }, { "epoch": 0.5055509638796435, "grad_norm": 0.44730693101882935, "learning_rate": 5.152642343931362e-05, "loss": 0.956, "step": 5658 }, { "epoch": 0.5056403154109054, "grad_norm": 0.4501069188117981, "learning_rate": 5.1511959403456055e-05, "loss": 0.9622, "step": 5659 }, { "epoch": 0.5057296669421673, "grad_norm": 0.41660791635513306, "learning_rate": 5.149749524095577e-05, "loss": 0.9941, "step": 5660 }, { "epoch": 0.5058190184734291, "grad_norm": 0.4883604347705841, "learning_rate": 5.1483030953024305e-05, "loss": 1.0128, "step": 5661 }, { "epoch": 0.5059083700046909, "grad_norm": 0.4125032424926758, "learning_rate": 5.146856654087318e-05, "loss": 1.0034, "step": 5662 }, { "epoch": 0.5059977215359528, "grad_norm": 0.4337124824523926, "learning_rate": 5.145410200571395e-05, "loss": 0.9983, "step": 5663 }, { "epoch": 0.5060870730672147, "grad_norm": 0.42638298869132996, "learning_rate": 5.1439637348758175e-05, "loss": 0.9504, "step": 5664 }, { "epoch": 0.5061764245984766, "grad_norm": 0.5545623302459717, "learning_rate": 5.142517257121744e-05, "loss": 0.9618, "step": 5665 }, { "epoch": 0.5062657761297384, "grad_norm": 0.3784050941467285, "learning_rate": 5.14107076743033e-05, "loss": 1.0023, "step": 5666 }, { "epoch": 0.5063551276610003, "grad_norm": 0.5684899687767029, "learning_rate": 5.139624265922737e-05, "loss": 0.9455, "step": 5667 }, { "epoch": 0.5064444791922622, "grad_norm": 0.5042484402656555, "learning_rate": 5.138177752720122e-05, "loss": 0.9237, "step": 5668 }, { "epoch": 0.506533830723524, "grad_norm": 0.4414721727371216, "learning_rate": 5.1367312279436486e-05, "loss": 0.9426, "step": 5669 }, { "epoch": 0.5066231822547859, "grad_norm": 0.5522531867027283, "learning_rate": 5.135284691714477e-05, "loss": 0.9738, "step": 5670 }, { "epoch": 0.5067125337860477, "grad_norm": 0.45710471272468567, "learning_rate": 5.13383814415377e-05, "loss": 0.9844, "step": 5671 }, { "epoch": 0.5068018853173096, "grad_norm": 0.47510063648223877, "learning_rate": 5.132391585382692e-05, "loss": 1.033, "step": 5672 }, { "epoch": 0.5068912368485715, "grad_norm": 0.4167405366897583, "learning_rate": 5.130945015522407e-05, "loss": 0.9195, "step": 5673 }, { "epoch": 0.5069805883798334, "grad_norm": 0.4073796570301056, "learning_rate": 5.129498434694081e-05, "loss": 0.9487, "step": 5674 }, { "epoch": 0.5070699399110953, "grad_norm": 0.37733787298202515, "learning_rate": 5.128051843018882e-05, "loss": 0.9742, "step": 5675 }, { "epoch": 0.507159291442357, "grad_norm": 0.3933600187301636, "learning_rate": 5.126605240617975e-05, "loss": 0.9361, "step": 5676 }, { "epoch": 0.5072486429736189, "grad_norm": 0.41629257798194885, "learning_rate": 5.1251586276125305e-05, "loss": 0.9511, "step": 5677 }, { "epoch": 0.5073379945048808, "grad_norm": 0.47717419266700745, "learning_rate": 5.123712004123716e-05, "loss": 0.9537, "step": 5678 }, { "epoch": 0.5074273460361427, "grad_norm": 0.5077764391899109, "learning_rate": 5.122265370272703e-05, "loss": 0.9123, "step": 5679 }, { "epoch": 0.5075166975674046, "grad_norm": 0.4773484170436859, "learning_rate": 5.1208187261806615e-05, "loss": 0.9243, "step": 5680 }, { "epoch": 0.5076060490986665, "grad_norm": 0.46315327286720276, "learning_rate": 5.119372071968764e-05, "loss": 0.962, "step": 5681 }, { "epoch": 0.5076954006299284, "grad_norm": 0.4640251100063324, "learning_rate": 5.117925407758184e-05, "loss": 0.9902, "step": 5682 }, { "epoch": 0.5077847521611901, "grad_norm": 0.5840420722961426, "learning_rate": 5.116478733670092e-05, "loss": 0.9444, "step": 5683 }, { "epoch": 0.507874103692452, "grad_norm": 0.46668434143066406, "learning_rate": 5.115032049825667e-05, "loss": 0.9198, "step": 5684 }, { "epoch": 0.5079634552237139, "grad_norm": 0.4504847526550293, "learning_rate": 5.113585356346081e-05, "loss": 1.0119, "step": 5685 }, { "epoch": 0.5080528067549758, "grad_norm": 0.531743586063385, "learning_rate": 5.1121386533525105e-05, "loss": 0.8559, "step": 5686 }, { "epoch": 0.5081421582862377, "grad_norm": 0.47331151366233826, "learning_rate": 5.110691940966133e-05, "loss": 0.9896, "step": 5687 }, { "epoch": 0.5082315098174995, "grad_norm": 0.42783498764038086, "learning_rate": 5.109245219308124e-05, "loss": 0.9287, "step": 5688 }, { "epoch": 0.5083208613487613, "grad_norm": 0.47757160663604736, "learning_rate": 5.107798488499664e-05, "loss": 1.0162, "step": 5689 }, { "epoch": 0.5084102128800232, "grad_norm": 0.44240617752075195, "learning_rate": 5.106351748661932e-05, "loss": 0.9406, "step": 5690 }, { "epoch": 0.5084995644112851, "grad_norm": 0.4076976776123047, "learning_rate": 5.104904999916108e-05, "loss": 1.0159, "step": 5691 }, { "epoch": 0.508588915942547, "grad_norm": 0.45423173904418945, "learning_rate": 5.103458242383371e-05, "loss": 0.948, "step": 5692 }, { "epoch": 0.5086782674738088, "grad_norm": 0.42676976323127747, "learning_rate": 5.102011476184903e-05, "loss": 0.9556, "step": 5693 }, { "epoch": 0.5087676190050707, "grad_norm": 0.4629276990890503, "learning_rate": 5.1005647014418866e-05, "loss": 0.9159, "step": 5694 }, { "epoch": 0.5088569705363326, "grad_norm": 0.47380349040031433, "learning_rate": 5.0991179182755046e-05, "loss": 0.9767, "step": 5695 }, { "epoch": 0.5089463220675944, "grad_norm": 0.563545823097229, "learning_rate": 5.097671126806942e-05, "loss": 0.9585, "step": 5696 }, { "epoch": 0.5090356735988563, "grad_norm": 0.48258113861083984, "learning_rate": 5.0962243271573795e-05, "loss": 0.9296, "step": 5697 }, { "epoch": 0.5091250251301181, "grad_norm": 0.42884641885757446, "learning_rate": 5.094777519448005e-05, "loss": 0.9912, "step": 5698 }, { "epoch": 0.50921437666138, "grad_norm": 0.47126665711402893, "learning_rate": 5.093330703800002e-05, "loss": 1.0121, "step": 5699 }, { "epoch": 0.5093037281926419, "grad_norm": 0.4382041096687317, "learning_rate": 5.091883880334558e-05, "loss": 0.9886, "step": 5700 }, { "epoch": 0.5093930797239038, "grad_norm": 0.4659852087497711, "learning_rate": 5.090437049172862e-05, "loss": 0.9174, "step": 5701 }, { "epoch": 0.5094824312551657, "grad_norm": 0.5495339632034302, "learning_rate": 5.0889902104360965e-05, "loss": 0.9337, "step": 5702 }, { "epoch": 0.5095717827864275, "grad_norm": 0.42241787910461426, "learning_rate": 5.087543364245454e-05, "loss": 0.9787, "step": 5703 }, { "epoch": 0.5096611343176893, "grad_norm": 0.47585710883140564, "learning_rate": 5.08609651072212e-05, "loss": 0.9195, "step": 5704 }, { "epoch": 0.5097504858489512, "grad_norm": 0.49689507484436035, "learning_rate": 5.084649649987285e-05, "loss": 0.9603, "step": 5705 }, { "epoch": 0.5098398373802131, "grad_norm": 0.4226880669593811, "learning_rate": 5.083202782162142e-05, "loss": 1.0116, "step": 5706 }, { "epoch": 0.509929188911475, "grad_norm": 0.4046030640602112, "learning_rate": 5.0817559073678775e-05, "loss": 0.9555, "step": 5707 }, { "epoch": 0.5100185404427369, "grad_norm": 0.4440669119358063, "learning_rate": 5.080309025725685e-05, "loss": 0.9408, "step": 5708 }, { "epoch": 0.5101078919739988, "grad_norm": 0.5712856650352478, "learning_rate": 5.0788621373567545e-05, "loss": 0.922, "step": 5709 }, { "epoch": 0.5101972435052605, "grad_norm": 0.39347535371780396, "learning_rate": 5.0774152423822796e-05, "loss": 1.0552, "step": 5710 }, { "epoch": 0.5102865950365224, "grad_norm": 0.45367705821990967, "learning_rate": 5.075968340923454e-05, "loss": 0.9869, "step": 5711 }, { "epoch": 0.5103759465677843, "grad_norm": 0.48188331723213196, "learning_rate": 5.074521433101469e-05, "loss": 0.9637, "step": 5712 }, { "epoch": 0.5104652980990462, "grad_norm": 0.4748827815055847, "learning_rate": 5.07307451903752e-05, "loss": 0.9533, "step": 5713 }, { "epoch": 0.510554649630308, "grad_norm": 0.4055231809616089, "learning_rate": 5.0716275988528005e-05, "loss": 0.9954, "step": 5714 }, { "epoch": 0.5106440011615699, "grad_norm": 0.4364885091781616, "learning_rate": 5.0701806726685076e-05, "loss": 0.9841, "step": 5715 }, { "epoch": 0.5107333526928318, "grad_norm": 0.3934551775455475, "learning_rate": 5.0687337406058345e-05, "loss": 0.9707, "step": 5716 }, { "epoch": 0.5108227042240936, "grad_norm": 0.505912721157074, "learning_rate": 5.0672868027859774e-05, "loss": 0.9605, "step": 5717 }, { "epoch": 0.5109120557553555, "grad_norm": 0.4747815728187561, "learning_rate": 5.065839859330134e-05, "loss": 0.9746, "step": 5718 }, { "epoch": 0.5110014072866174, "grad_norm": 0.5368762016296387, "learning_rate": 5.0643929103595e-05, "loss": 0.9514, "step": 5719 }, { "epoch": 0.5110907588178792, "grad_norm": 0.45678213238716125, "learning_rate": 5.0629459559952754e-05, "loss": 0.9288, "step": 5720 }, { "epoch": 0.5111801103491411, "grad_norm": 0.407548725605011, "learning_rate": 5.0614989963586536e-05, "loss": 0.9745, "step": 5721 }, { "epoch": 0.511269461880403, "grad_norm": 0.4418816864490509, "learning_rate": 5.060052031570837e-05, "loss": 0.9858, "step": 5722 }, { "epoch": 0.5113588134116649, "grad_norm": 0.4085575342178345, "learning_rate": 5.0586050617530214e-05, "loss": 1.0397, "step": 5723 }, { "epoch": 0.5114481649429267, "grad_norm": 0.4091111719608307, "learning_rate": 5.057158087026408e-05, "loss": 1.0209, "step": 5724 }, { "epoch": 0.5115375164741885, "grad_norm": 0.42167025804519653, "learning_rate": 5.0557111075121944e-05, "loss": 0.9675, "step": 5725 }, { "epoch": 0.5116268680054504, "grad_norm": 0.5113321542739868, "learning_rate": 5.0542641233315834e-05, "loss": 0.8551, "step": 5726 }, { "epoch": 0.5117162195367123, "grad_norm": 0.4538382291793823, "learning_rate": 5.0528171346057725e-05, "loss": 0.9376, "step": 5727 }, { "epoch": 0.5118055710679742, "grad_norm": 0.4386774003505707, "learning_rate": 5.0513701414559635e-05, "loss": 0.9178, "step": 5728 }, { "epoch": 0.5118949225992361, "grad_norm": 0.45510563254356384, "learning_rate": 5.049923144003358e-05, "loss": 0.9116, "step": 5729 }, { "epoch": 0.511984274130498, "grad_norm": 0.39837151765823364, "learning_rate": 5.048476142369156e-05, "loss": 0.9675, "step": 5730 }, { "epoch": 0.5120736256617597, "grad_norm": 0.5683768391609192, "learning_rate": 5.047029136674563e-05, "loss": 0.9143, "step": 5731 }, { "epoch": 0.5121629771930216, "grad_norm": 0.44197937846183777, "learning_rate": 5.0455821270407763e-05, "loss": 0.9469, "step": 5732 }, { "epoch": 0.5122523287242835, "grad_norm": 0.4030378758907318, "learning_rate": 5.0441351135890004e-05, "loss": 0.9871, "step": 5733 }, { "epoch": 0.5123416802555454, "grad_norm": 0.40224945545196533, "learning_rate": 5.042688096440439e-05, "loss": 0.9674, "step": 5734 }, { "epoch": 0.5124310317868073, "grad_norm": 0.43630731105804443, "learning_rate": 5.041241075716294e-05, "loss": 0.9477, "step": 5735 }, { "epoch": 0.5125203833180692, "grad_norm": 0.4436815679073334, "learning_rate": 5.039794051537771e-05, "loss": 0.9675, "step": 5736 }, { "epoch": 0.512609734849331, "grad_norm": 0.45859208703041077, "learning_rate": 5.0383470240260713e-05, "loss": 0.9066, "step": 5737 }, { "epoch": 0.5126990863805928, "grad_norm": 0.4863572120666504, "learning_rate": 5.0368999933024e-05, "loss": 0.9758, "step": 5738 }, { "epoch": 0.5127884379118547, "grad_norm": 0.4681277871131897, "learning_rate": 5.035452959487959e-05, "loss": 0.9519, "step": 5739 }, { "epoch": 0.5128777894431166, "grad_norm": 0.5145890712738037, "learning_rate": 5.034005922703956e-05, "loss": 0.8922, "step": 5740 }, { "epoch": 0.5129671409743785, "grad_norm": 0.46464183926582336, "learning_rate": 5.032558883071594e-05, "loss": 0.9765, "step": 5741 }, { "epoch": 0.5130564925056403, "grad_norm": 0.4565260410308838, "learning_rate": 5.031111840712079e-05, "loss": 0.9139, "step": 5742 }, { "epoch": 0.5131458440369022, "grad_norm": 0.4922895133495331, "learning_rate": 5.029664795746616e-05, "loss": 0.8892, "step": 5743 }, { "epoch": 0.5132351955681641, "grad_norm": 0.4101276993751526, "learning_rate": 5.028217748296409e-05, "loss": 1.0292, "step": 5744 }, { "epoch": 0.5133245470994259, "grad_norm": 0.44919613003730774, "learning_rate": 5.0267706984826644e-05, "loss": 0.9039, "step": 5745 }, { "epoch": 0.5134138986306878, "grad_norm": 0.403489887714386, "learning_rate": 5.025323646426591e-05, "loss": 0.9693, "step": 5746 }, { "epoch": 0.5135032501619496, "grad_norm": 0.4634898602962494, "learning_rate": 5.02387659224939e-05, "loss": 0.9674, "step": 5747 }, { "epoch": 0.5135926016932115, "grad_norm": 0.5770649909973145, "learning_rate": 5.022429536072271e-05, "loss": 0.9314, "step": 5748 }, { "epoch": 0.5136819532244734, "grad_norm": 0.36833301186561584, "learning_rate": 5.0209824780164386e-05, "loss": 1.0579, "step": 5749 }, { "epoch": 0.5137713047557353, "grad_norm": 0.44532787799835205, "learning_rate": 5.0195354182031e-05, "loss": 1.0662, "step": 5750 }, { "epoch": 0.5138606562869971, "grad_norm": 0.40112441778182983, "learning_rate": 5.018088356753463e-05, "loss": 0.9897, "step": 5751 }, { "epoch": 0.513950007818259, "grad_norm": 0.3955027759075165, "learning_rate": 5.016641293788732e-05, "loss": 1.0451, "step": 5752 }, { "epoch": 0.5140393593495208, "grad_norm": 0.4612029194831848, "learning_rate": 5.0151942294301155e-05, "loss": 0.951, "step": 5753 }, { "epoch": 0.5141287108807827, "grad_norm": 0.5001698136329651, "learning_rate": 5.013747163798821e-05, "loss": 0.9077, "step": 5754 }, { "epoch": 0.5142180624120446, "grad_norm": 0.46220487356185913, "learning_rate": 5.012300097016055e-05, "loss": 0.9542, "step": 5755 }, { "epoch": 0.5143074139433065, "grad_norm": 0.5143389701843262, "learning_rate": 5.010853029203024e-05, "loss": 0.9437, "step": 5756 }, { "epoch": 0.5143967654745684, "grad_norm": 0.45577070116996765, "learning_rate": 5.0094059604809375e-05, "loss": 0.977, "step": 5757 }, { "epoch": 0.5144861170058301, "grad_norm": 0.4429572522640228, "learning_rate": 5.0079588909710005e-05, "loss": 0.9611, "step": 5758 }, { "epoch": 0.514575468537092, "grad_norm": 0.3987106382846832, "learning_rate": 5.0065118207944216e-05, "loss": 0.9967, "step": 5759 }, { "epoch": 0.5146648200683539, "grad_norm": 0.37299370765686035, "learning_rate": 5.005064750072408e-05, "loss": 0.9641, "step": 5760 }, { "epoch": 0.5147541715996158, "grad_norm": 0.5281698107719421, "learning_rate": 5.003617678926169e-05, "loss": 0.967, "step": 5761 }, { "epoch": 0.5148435231308777, "grad_norm": 0.5716055631637573, "learning_rate": 5.0021706074769095e-05, "loss": 0.8772, "step": 5762 }, { "epoch": 0.5149328746621396, "grad_norm": 0.5003583431243896, "learning_rate": 5.000723535845838e-05, "loss": 1.0233, "step": 5763 }, { "epoch": 0.5150222261934014, "grad_norm": 0.40355581045150757, "learning_rate": 4.999276464154164e-05, "loss": 0.9501, "step": 5764 }, { "epoch": 0.5151115777246632, "grad_norm": 0.4532722234725952, "learning_rate": 4.997829392523093e-05, "loss": 0.9233, "step": 5765 }, { "epoch": 0.5152009292559251, "grad_norm": 0.46640169620513916, "learning_rate": 4.9963823210738335e-05, "loss": 0.9473, "step": 5766 }, { "epoch": 0.515290280787187, "grad_norm": 0.4269860088825226, "learning_rate": 4.994935249927593e-05, "loss": 0.9754, "step": 5767 }, { "epoch": 0.5153796323184489, "grad_norm": 0.45868611335754395, "learning_rate": 4.9934881792055796e-05, "loss": 1.0073, "step": 5768 }, { "epoch": 0.5154689838497107, "grad_norm": 0.4609023332595825, "learning_rate": 4.9920411090290006e-05, "loss": 0.9759, "step": 5769 }, { "epoch": 0.5155583353809726, "grad_norm": 0.4061686098575592, "learning_rate": 4.990594039519063e-05, "loss": 0.9157, "step": 5770 }, { "epoch": 0.5156476869122345, "grad_norm": 0.4474005103111267, "learning_rate": 4.9891469707969765e-05, "loss": 0.9675, "step": 5771 }, { "epoch": 0.5157370384434963, "grad_norm": 0.41910743713378906, "learning_rate": 4.987699902983946e-05, "loss": 0.8943, "step": 5772 }, { "epoch": 0.5158263899747582, "grad_norm": 0.5283576250076294, "learning_rate": 4.9862528362011794e-05, "loss": 0.8617, "step": 5773 }, { "epoch": 0.51591574150602, "grad_norm": 0.6106252670288086, "learning_rate": 4.9848057705698856e-05, "loss": 0.9183, "step": 5774 }, { "epoch": 0.5160050930372819, "grad_norm": 0.40918898582458496, "learning_rate": 4.98335870621127e-05, "loss": 0.9387, "step": 5775 }, { "epoch": 0.5160944445685438, "grad_norm": 0.582862138748169, "learning_rate": 4.981911643246539e-05, "loss": 0.9047, "step": 5776 }, { "epoch": 0.5161837960998057, "grad_norm": 0.4943019449710846, "learning_rate": 4.9804645817969015e-05, "loss": 0.8898, "step": 5777 }, { "epoch": 0.5162731476310676, "grad_norm": 0.41431719064712524, "learning_rate": 4.9790175219835626e-05, "loss": 0.9774, "step": 5778 }, { "epoch": 0.5163624991623293, "grad_norm": 0.5077488422393799, "learning_rate": 4.97757046392773e-05, "loss": 0.985, "step": 5779 }, { "epoch": 0.5164518506935912, "grad_norm": 0.5016276240348816, "learning_rate": 4.976123407750611e-05, "loss": 0.9377, "step": 5780 }, { "epoch": 0.5165412022248531, "grad_norm": 0.3875533938407898, "learning_rate": 4.9746763535734104e-05, "loss": 1.008, "step": 5781 }, { "epoch": 0.516630553756115, "grad_norm": 0.5634098649024963, "learning_rate": 4.973229301517335e-05, "loss": 0.9303, "step": 5782 }, { "epoch": 0.5167199052873769, "grad_norm": 0.446728378534317, "learning_rate": 4.971782251703591e-05, "loss": 0.9349, "step": 5783 }, { "epoch": 0.5168092568186388, "grad_norm": 0.3732495903968811, "learning_rate": 4.9703352042533866e-05, "loss": 0.9877, "step": 5784 }, { "epoch": 0.5168986083499006, "grad_norm": 0.4483281075954437, "learning_rate": 4.9688881592879233e-05, "loss": 0.875, "step": 5785 }, { "epoch": 0.5169879598811624, "grad_norm": 0.4210112392902374, "learning_rate": 4.967441116928407e-05, "loss": 0.9735, "step": 5786 }, { "epoch": 0.5170773114124243, "grad_norm": 0.40223428606987, "learning_rate": 4.9659940772960456e-05, "loss": 0.946, "step": 5787 }, { "epoch": 0.5171666629436862, "grad_norm": 0.5056480169296265, "learning_rate": 4.964547040512042e-05, "loss": 0.8455, "step": 5788 }, { "epoch": 0.5172560144749481, "grad_norm": 0.4198668599128723, "learning_rate": 4.963100006697602e-05, "loss": 1.0444, "step": 5789 }, { "epoch": 0.51734536600621, "grad_norm": 0.4606395363807678, "learning_rate": 4.9616529759739305e-05, "loss": 0.9313, "step": 5790 }, { "epoch": 0.5174347175374718, "grad_norm": 0.47824013233184814, "learning_rate": 4.96020594846223e-05, "loss": 0.9325, "step": 5791 }, { "epoch": 0.5175240690687337, "grad_norm": 0.46849849820137024, "learning_rate": 4.958758924283706e-05, "loss": 0.8359, "step": 5792 }, { "epoch": 0.5176134205999955, "grad_norm": 0.39913827180862427, "learning_rate": 4.957311903559561e-05, "loss": 0.9767, "step": 5793 }, { "epoch": 0.5177027721312574, "grad_norm": 0.5645284652709961, "learning_rate": 4.9558648864110014e-05, "loss": 0.8838, "step": 5794 }, { "epoch": 0.5177921236625193, "grad_norm": 0.4400821924209595, "learning_rate": 4.954417872959226e-05, "loss": 0.9193, "step": 5795 }, { "epoch": 0.5178814751937811, "grad_norm": 0.4306231439113617, "learning_rate": 4.95297086332544e-05, "loss": 0.9235, "step": 5796 }, { "epoch": 0.517970826725043, "grad_norm": 0.5578770637512207, "learning_rate": 4.951523857630845e-05, "loss": 0.9793, "step": 5797 }, { "epoch": 0.5180601782563049, "grad_norm": 0.4247242510318756, "learning_rate": 4.950076855996643e-05, "loss": 1.0176, "step": 5798 }, { "epoch": 0.5181495297875668, "grad_norm": 0.5067040920257568, "learning_rate": 4.9486298585440376e-05, "loss": 0.9595, "step": 5799 }, { "epoch": 0.5182388813188286, "grad_norm": 0.5062249898910522, "learning_rate": 4.9471828653942286e-05, "loss": 0.8926, "step": 5800 }, { "epoch": 0.5183282328500904, "grad_norm": 0.43885526061058044, "learning_rate": 4.945735876668418e-05, "loss": 0.9922, "step": 5801 }, { "epoch": 0.5184175843813523, "grad_norm": 0.36411014199256897, "learning_rate": 4.9442888924878054e-05, "loss": 0.9627, "step": 5802 }, { "epoch": 0.5185069359126142, "grad_norm": 0.45766767859458923, "learning_rate": 4.9428419129735925e-05, "loss": 0.9285, "step": 5803 }, { "epoch": 0.5185962874438761, "grad_norm": 0.4283965826034546, "learning_rate": 4.9413949382469805e-05, "loss": 1.0057, "step": 5804 }, { "epoch": 0.518685638975138, "grad_norm": 0.4837194085121155, "learning_rate": 4.939947968429165e-05, "loss": 0.9082, "step": 5805 }, { "epoch": 0.5187749905063999, "grad_norm": 0.459768146276474, "learning_rate": 4.9385010036413475e-05, "loss": 0.8664, "step": 5806 }, { "epoch": 0.5188643420376616, "grad_norm": 0.39023587107658386, "learning_rate": 4.9370540440047264e-05, "loss": 1.0252, "step": 5807 }, { "epoch": 0.5189536935689235, "grad_norm": 0.44945186376571655, "learning_rate": 4.935607089640501e-05, "loss": 0.919, "step": 5808 }, { "epoch": 0.5190430451001854, "grad_norm": 0.4950411915779114, "learning_rate": 4.934160140669867e-05, "loss": 0.9833, "step": 5809 }, { "epoch": 0.5191323966314473, "grad_norm": 0.42366206645965576, "learning_rate": 4.932713197214023e-05, "loss": 0.9545, "step": 5810 }, { "epoch": 0.5192217481627092, "grad_norm": 0.5626574158668518, "learning_rate": 4.931266259394167e-05, "loss": 0.8383, "step": 5811 }, { "epoch": 0.519311099693971, "grad_norm": 0.49013814330101013, "learning_rate": 4.929819327331493e-05, "loss": 0.9492, "step": 5812 }, { "epoch": 0.5194004512252328, "grad_norm": 0.48132896423339844, "learning_rate": 4.928372401147199e-05, "loss": 0.8303, "step": 5813 }, { "epoch": 0.5194898027564947, "grad_norm": 0.5105422735214233, "learning_rate": 4.92692548096248e-05, "loss": 0.9641, "step": 5814 }, { "epoch": 0.5195791542877566, "grad_norm": 0.4313611388206482, "learning_rate": 4.9254785668985335e-05, "loss": 0.9816, "step": 5815 }, { "epoch": 0.5196685058190185, "grad_norm": 0.3787804841995239, "learning_rate": 4.924031659076548e-05, "loss": 0.9836, "step": 5816 }, { "epoch": 0.5197578573502804, "grad_norm": 0.519477903842926, "learning_rate": 4.9225847576177216e-05, "loss": 0.9174, "step": 5817 }, { "epoch": 0.5198472088815422, "grad_norm": 0.40833377838134766, "learning_rate": 4.921137862643247e-05, "loss": 0.9769, "step": 5818 }, { "epoch": 0.5199365604128041, "grad_norm": 0.5591320991516113, "learning_rate": 4.919690974274316e-05, "loss": 1.0388, "step": 5819 }, { "epoch": 0.5200259119440659, "grad_norm": 0.44129371643066406, "learning_rate": 4.918244092632123e-05, "loss": 0.9731, "step": 5820 }, { "epoch": 0.5201152634753278, "grad_norm": 0.3888798654079437, "learning_rate": 4.916797217837859e-05, "loss": 0.9446, "step": 5821 }, { "epoch": 0.5202046150065897, "grad_norm": 0.4500011205673218, "learning_rate": 4.915350350012714e-05, "loss": 1.0109, "step": 5822 }, { "epoch": 0.5202939665378515, "grad_norm": 0.5037985444068909, "learning_rate": 4.913903489277881e-05, "loss": 0.9591, "step": 5823 }, { "epoch": 0.5203833180691134, "grad_norm": 0.49185624718666077, "learning_rate": 4.9124566357545476e-05, "loss": 1.0417, "step": 5824 }, { "epoch": 0.5204726696003753, "grad_norm": 0.38457438349723816, "learning_rate": 4.9110097895639046e-05, "loss": 1.0718, "step": 5825 }, { "epoch": 0.5205620211316372, "grad_norm": 0.4443393051624298, "learning_rate": 4.9095629508271394e-05, "loss": 1.0176, "step": 5826 }, { "epoch": 0.520651372662899, "grad_norm": 0.39499226212501526, "learning_rate": 4.908116119665443e-05, "loss": 0.9926, "step": 5827 }, { "epoch": 0.5207407241941608, "grad_norm": 0.563543975353241, "learning_rate": 4.906669296199999e-05, "loss": 0.87, "step": 5828 }, { "epoch": 0.5208300757254227, "grad_norm": 0.5549851655960083, "learning_rate": 4.905222480551996e-05, "loss": 0.9655, "step": 5829 }, { "epoch": 0.5209194272566846, "grad_norm": 0.5066598653793335, "learning_rate": 4.903775672842621e-05, "loss": 0.968, "step": 5830 }, { "epoch": 0.5210087787879465, "grad_norm": 0.38776955008506775, "learning_rate": 4.902328873193059e-05, "loss": 1.0964, "step": 5831 }, { "epoch": 0.5210981303192084, "grad_norm": 0.45412904024124146, "learning_rate": 4.900882081724495e-05, "loss": 0.9324, "step": 5832 }, { "epoch": 0.5211874818504703, "grad_norm": 0.5597448945045471, "learning_rate": 4.899435298558113e-05, "loss": 0.9376, "step": 5833 }, { "epoch": 0.521276833381732, "grad_norm": 0.413144588470459, "learning_rate": 4.897988523815097e-05, "loss": 0.9871, "step": 5834 }, { "epoch": 0.5213661849129939, "grad_norm": 0.44255807995796204, "learning_rate": 4.8965417576166316e-05, "loss": 0.9833, "step": 5835 }, { "epoch": 0.5214555364442558, "grad_norm": 0.43993690609931946, "learning_rate": 4.8950950000838945e-05, "loss": 0.972, "step": 5836 }, { "epoch": 0.5215448879755177, "grad_norm": 0.4028208553791046, "learning_rate": 4.893648251338069e-05, "loss": 0.9361, "step": 5837 }, { "epoch": 0.5216342395067796, "grad_norm": 0.4599561095237732, "learning_rate": 4.8922015115003375e-05, "loss": 0.9747, "step": 5838 }, { "epoch": 0.5217235910380414, "grad_norm": 0.5245079398155212, "learning_rate": 4.890754780691877e-05, "loss": 0.8898, "step": 5839 }, { "epoch": 0.5218129425693033, "grad_norm": 0.5127478837966919, "learning_rate": 4.8893080590338684e-05, "loss": 0.9127, "step": 5840 }, { "epoch": 0.5219022941005651, "grad_norm": 0.37643924355506897, "learning_rate": 4.887861346647491e-05, "loss": 0.9822, "step": 5841 }, { "epoch": 0.521991645631827, "grad_norm": 0.4069046974182129, "learning_rate": 4.8864146436539196e-05, "loss": 0.9837, "step": 5842 }, { "epoch": 0.5220809971630889, "grad_norm": 0.4322601854801178, "learning_rate": 4.884967950174333e-05, "loss": 0.9865, "step": 5843 }, { "epoch": 0.5221703486943507, "grad_norm": 0.4148218333721161, "learning_rate": 4.883521266329907e-05, "loss": 0.9269, "step": 5844 }, { "epoch": 0.5222597002256126, "grad_norm": 0.3982529640197754, "learning_rate": 4.882074592241817e-05, "loss": 0.9791, "step": 5845 }, { "epoch": 0.5223490517568745, "grad_norm": 0.48719900846481323, "learning_rate": 4.880627928031237e-05, "loss": 0.9728, "step": 5846 }, { "epoch": 0.5224384032881364, "grad_norm": 0.5783781409263611, "learning_rate": 4.87918127381934e-05, "loss": 0.9212, "step": 5847 }, { "epoch": 0.5225277548193982, "grad_norm": 0.4238271713256836, "learning_rate": 4.8777346297272986e-05, "loss": 1.0067, "step": 5848 }, { "epoch": 0.52261710635066, "grad_norm": 0.433936208486557, "learning_rate": 4.876287995876285e-05, "loss": 0.9611, "step": 5849 }, { "epoch": 0.5227064578819219, "grad_norm": 0.4108850657939911, "learning_rate": 4.874841372387471e-05, "loss": 0.9153, "step": 5850 }, { "epoch": 0.5227958094131838, "grad_norm": 0.4847879111766815, "learning_rate": 4.8733947593820254e-05, "loss": 0.9725, "step": 5851 }, { "epoch": 0.5228851609444457, "grad_norm": 0.508726954460144, "learning_rate": 4.871948156981119e-05, "loss": 0.8959, "step": 5852 }, { "epoch": 0.5229745124757076, "grad_norm": 0.4954078495502472, "learning_rate": 4.870501565305919e-05, "loss": 0.9549, "step": 5853 }, { "epoch": 0.5230638640069695, "grad_norm": 0.4881262183189392, "learning_rate": 4.8690549844775935e-05, "loss": 1.066, "step": 5854 }, { "epoch": 0.5231532155382312, "grad_norm": 0.6973787546157837, "learning_rate": 4.867608414617311e-05, "loss": 0.9015, "step": 5855 }, { "epoch": 0.5232425670694931, "grad_norm": 0.4565022587776184, "learning_rate": 4.8661618558462325e-05, "loss": 0.9478, "step": 5856 }, { "epoch": 0.523331918600755, "grad_norm": 0.48407623171806335, "learning_rate": 4.8647153082855254e-05, "loss": 0.9787, "step": 5857 }, { "epoch": 0.5234212701320169, "grad_norm": 0.4646012783050537, "learning_rate": 4.863268772056353e-05, "loss": 0.9203, "step": 5858 }, { "epoch": 0.5235106216632788, "grad_norm": 0.4288366436958313, "learning_rate": 4.8618222472798783e-05, "loss": 0.9631, "step": 5859 }, { "epoch": 0.5235999731945407, "grad_norm": 0.5206550359725952, "learning_rate": 4.860375734077265e-05, "loss": 0.9404, "step": 5860 }, { "epoch": 0.5236893247258025, "grad_norm": 0.36840370297431946, "learning_rate": 4.858929232569671e-05, "loss": 1.0194, "step": 5861 }, { "epoch": 0.5237786762570643, "grad_norm": 0.4584810435771942, "learning_rate": 4.857482742878257e-05, "loss": 0.9383, "step": 5862 }, { "epoch": 0.5238680277883262, "grad_norm": 0.3916776478290558, "learning_rate": 4.856036265124182e-05, "loss": 1.0608, "step": 5863 }, { "epoch": 0.5239573793195881, "grad_norm": 0.4396377503871918, "learning_rate": 4.854589799428606e-05, "loss": 0.9877, "step": 5864 }, { "epoch": 0.52404673085085, "grad_norm": 0.5073574781417847, "learning_rate": 4.853143345912685e-05, "loss": 0.8528, "step": 5865 }, { "epoch": 0.5241360823821118, "grad_norm": 0.5877308249473572, "learning_rate": 4.851696904697573e-05, "loss": 0.9014, "step": 5866 }, { "epoch": 0.5242254339133737, "grad_norm": 0.470747172832489, "learning_rate": 4.8502504759044245e-05, "loss": 0.922, "step": 5867 }, { "epoch": 0.5243147854446356, "grad_norm": 0.4053327143192291, "learning_rate": 4.848804059654396e-05, "loss": 0.93, "step": 5868 }, { "epoch": 0.5244041369758974, "grad_norm": 0.44682514667510986, "learning_rate": 4.847357656068639e-05, "loss": 1.0523, "step": 5869 }, { "epoch": 0.5244934885071593, "grad_norm": 0.3955923914909363, "learning_rate": 4.845911265268305e-05, "loss": 1.0694, "step": 5870 }, { "epoch": 0.5245828400384211, "grad_norm": 0.46006014943122864, "learning_rate": 4.844464887374545e-05, "loss": 0.9943, "step": 5871 }, { "epoch": 0.524672191569683, "grad_norm": 0.4770232141017914, "learning_rate": 4.8430185225085096e-05, "loss": 0.9395, "step": 5872 }, { "epoch": 0.5247615431009449, "grad_norm": 0.5369167327880859, "learning_rate": 4.841572170791345e-05, "loss": 0.9318, "step": 5873 }, { "epoch": 0.5248508946322068, "grad_norm": 0.4109400808811188, "learning_rate": 4.8401258323442016e-05, "loss": 1.0272, "step": 5874 }, { "epoch": 0.5249402461634686, "grad_norm": 0.42871788144111633, "learning_rate": 4.8386795072882234e-05, "loss": 0.9692, "step": 5875 }, { "epoch": 0.5250295976947305, "grad_norm": 0.4132499098777771, "learning_rate": 4.837233195744556e-05, "loss": 0.9554, "step": 5876 }, { "epoch": 0.5251189492259923, "grad_norm": 0.40216875076293945, "learning_rate": 4.835786897834342e-05, "loss": 0.9684, "step": 5877 }, { "epoch": 0.5252083007572542, "grad_norm": 0.3839309513568878, "learning_rate": 4.8343406136787274e-05, "loss": 0.9613, "step": 5878 }, { "epoch": 0.5252976522885161, "grad_norm": 0.4431716799736023, "learning_rate": 4.832894343398851e-05, "loss": 0.9824, "step": 5879 }, { "epoch": 0.525387003819778, "grad_norm": 0.5026467442512512, "learning_rate": 4.831448087115855e-05, "loss": 1.0332, "step": 5880 }, { "epoch": 0.5254763553510399, "grad_norm": 0.431949257850647, "learning_rate": 4.8300018449508775e-05, "loss": 0.9471, "step": 5881 }, { "epoch": 0.5255657068823016, "grad_norm": 0.5039017796516418, "learning_rate": 4.8285556170250584e-05, "loss": 0.9319, "step": 5882 }, { "epoch": 0.5256550584135635, "grad_norm": 0.44245362281799316, "learning_rate": 4.8271094034595336e-05, "loss": 0.9568, "step": 5883 }, { "epoch": 0.5257444099448254, "grad_norm": 0.4410870671272278, "learning_rate": 4.825663204375439e-05, "loss": 0.9713, "step": 5884 }, { "epoch": 0.5258337614760873, "grad_norm": 0.5179281234741211, "learning_rate": 4.824217019893912e-05, "loss": 0.8289, "step": 5885 }, { "epoch": 0.5259231130073492, "grad_norm": 0.4670274555683136, "learning_rate": 4.82277085013608e-05, "loss": 0.9096, "step": 5886 }, { "epoch": 0.5260124645386111, "grad_norm": 0.5021420121192932, "learning_rate": 4.8213246952230794e-05, "loss": 0.9992, "step": 5887 }, { "epoch": 0.5261018160698729, "grad_norm": 0.4476109445095062, "learning_rate": 4.81987855527604e-05, "loss": 0.9169, "step": 5888 }, { "epoch": 0.5261911676011347, "grad_norm": 0.4486454725265503, "learning_rate": 4.818432430416091e-05, "loss": 1.0424, "step": 5889 }, { "epoch": 0.5262805191323966, "grad_norm": 0.5046273469924927, "learning_rate": 4.8169863207643615e-05, "loss": 0.9194, "step": 5890 }, { "epoch": 0.5263698706636585, "grad_norm": 0.4156638979911804, "learning_rate": 4.815540226441979e-05, "loss": 0.9668, "step": 5891 }, { "epoch": 0.5264592221949204, "grad_norm": 0.4498765468597412, "learning_rate": 4.814094147570069e-05, "loss": 0.9449, "step": 5892 }, { "epoch": 0.5265485737261822, "grad_norm": 0.4794931411743164, "learning_rate": 4.812648084269756e-05, "loss": 1.0374, "step": 5893 }, { "epoch": 0.5266379252574441, "grad_norm": 0.4733235836029053, "learning_rate": 4.811202036662162e-05, "loss": 0.9983, "step": 5894 }, { "epoch": 0.526727276788706, "grad_norm": 0.46899041533470154, "learning_rate": 4.8097560048684095e-05, "loss": 0.9764, "step": 5895 }, { "epoch": 0.5268166283199678, "grad_norm": 0.44023454189300537, "learning_rate": 4.808309989009621e-05, "loss": 0.935, "step": 5896 }, { "epoch": 0.5269059798512297, "grad_norm": 0.43503814935684204, "learning_rate": 4.806863989206914e-05, "loss": 0.8848, "step": 5897 }, { "epoch": 0.5269953313824915, "grad_norm": 0.4301520884037018, "learning_rate": 4.805418005581406e-05, "loss": 0.999, "step": 5898 }, { "epoch": 0.5270846829137534, "grad_norm": 0.3994184136390686, "learning_rate": 4.803972038254214e-05, "loss": 0.9203, "step": 5899 }, { "epoch": 0.5271740344450153, "grad_norm": 0.4339260756969452, "learning_rate": 4.802526087346453e-05, "loss": 1.0067, "step": 5900 }, { "epoch": 0.5272633859762772, "grad_norm": 0.4898630678653717, "learning_rate": 4.801080152979237e-05, "loss": 0.9212, "step": 5901 }, { "epoch": 0.5273527375075391, "grad_norm": 0.49684223532676697, "learning_rate": 4.799634235273679e-05, "loss": 0.9273, "step": 5902 }, { "epoch": 0.5274420890388009, "grad_norm": 0.420746386051178, "learning_rate": 4.798188334350889e-05, "loss": 0.9468, "step": 5903 }, { "epoch": 0.5275314405700627, "grad_norm": 0.4721129536628723, "learning_rate": 4.7967424503319774e-05, "loss": 0.9378, "step": 5904 }, { "epoch": 0.5276207921013246, "grad_norm": 0.4775635004043579, "learning_rate": 4.795296583338052e-05, "loss": 0.996, "step": 5905 }, { "epoch": 0.5277101436325865, "grad_norm": 0.4336440861225128, "learning_rate": 4.793850733490222e-05, "loss": 1.0088, "step": 5906 }, { "epoch": 0.5277994951638484, "grad_norm": 0.4374808669090271, "learning_rate": 4.792404900909589e-05, "loss": 0.9745, "step": 5907 }, { "epoch": 0.5278888466951103, "grad_norm": 0.4299390912055969, "learning_rate": 4.7909590857172574e-05, "loss": 0.9602, "step": 5908 }, { "epoch": 0.5279781982263722, "grad_norm": 0.4343792498111725, "learning_rate": 4.7895132880343306e-05, "loss": 0.9381, "step": 5909 }, { "epoch": 0.5280675497576339, "grad_norm": 0.536439836025238, "learning_rate": 4.78806750798191e-05, "loss": 0.8658, "step": 5910 }, { "epoch": 0.5281569012888958, "grad_norm": 0.46042898297309875, "learning_rate": 4.786621745681096e-05, "loss": 0.9794, "step": 5911 }, { "epoch": 0.5282462528201577, "grad_norm": 0.4773177206516266, "learning_rate": 4.785176001252983e-05, "loss": 0.9424, "step": 5912 }, { "epoch": 0.5283356043514196, "grad_norm": 0.47220951318740845, "learning_rate": 4.7837302748186705e-05, "loss": 0.9412, "step": 5913 }, { "epoch": 0.5284249558826815, "grad_norm": 0.46493634581565857, "learning_rate": 4.782284566499253e-05, "loss": 0.9811, "step": 5914 }, { "epoch": 0.5285143074139433, "grad_norm": 0.5822499990463257, "learning_rate": 4.780838876415824e-05, "loss": 0.8253, "step": 5915 }, { "epoch": 0.5286036589452052, "grad_norm": 0.46834489703178406, "learning_rate": 4.779393204689477e-05, "loss": 0.9326, "step": 5916 }, { "epoch": 0.528693010476467, "grad_norm": 0.5302138328552246, "learning_rate": 4.777947551441298e-05, "loss": 0.8613, "step": 5917 }, { "epoch": 0.5287823620077289, "grad_norm": 0.4419107735157013, "learning_rate": 4.776501916792379e-05, "loss": 0.9798, "step": 5918 }, { "epoch": 0.5288717135389908, "grad_norm": 0.4270457923412323, "learning_rate": 4.775056300863806e-05, "loss": 1.0351, "step": 5919 }, { "epoch": 0.5289610650702526, "grad_norm": 0.4577907621860504, "learning_rate": 4.773610703776666e-05, "loss": 0.9121, "step": 5920 }, { "epoch": 0.5290504166015145, "grad_norm": 0.4939514398574829, "learning_rate": 4.772165125652043e-05, "loss": 0.9367, "step": 5921 }, { "epoch": 0.5291397681327764, "grad_norm": 0.45236796140670776, "learning_rate": 4.7707195666110195e-05, "loss": 0.9506, "step": 5922 }, { "epoch": 0.5292291196640383, "grad_norm": 0.4021003842353821, "learning_rate": 4.7692740267746757e-05, "loss": 0.9241, "step": 5923 }, { "epoch": 0.5293184711953001, "grad_norm": 0.44145897030830383, "learning_rate": 4.767828506264091e-05, "loss": 0.937, "step": 5924 }, { "epoch": 0.529407822726562, "grad_norm": 0.4989675283432007, "learning_rate": 4.766383005200345e-05, "loss": 1.0619, "step": 5925 }, { "epoch": 0.5294971742578238, "grad_norm": 0.45392560958862305, "learning_rate": 4.7649375237045135e-05, "loss": 0.9564, "step": 5926 }, { "epoch": 0.5295865257890857, "grad_norm": 0.40658605098724365, "learning_rate": 4.763492061897669e-05, "loss": 0.9595, "step": 5927 }, { "epoch": 0.5296758773203476, "grad_norm": 0.47459685802459717, "learning_rate": 4.762046619900884e-05, "loss": 0.9083, "step": 5928 }, { "epoch": 0.5297652288516095, "grad_norm": 0.5040339827537537, "learning_rate": 4.7606011978352315e-05, "loss": 0.9873, "step": 5929 }, { "epoch": 0.5298545803828714, "grad_norm": 0.5010223388671875, "learning_rate": 4.759155795821782e-05, "loss": 0.8799, "step": 5930 }, { "epoch": 0.5299439319141331, "grad_norm": 0.4148063063621521, "learning_rate": 4.7577104139816e-05, "loss": 0.9431, "step": 5931 }, { "epoch": 0.530033283445395, "grad_norm": 0.469046413898468, "learning_rate": 4.7562650524357536e-05, "loss": 0.9503, "step": 5932 }, { "epoch": 0.5301226349766569, "grad_norm": 0.5078368186950684, "learning_rate": 4.754819711305308e-05, "loss": 0.9369, "step": 5933 }, { "epoch": 0.5302119865079188, "grad_norm": 0.5710771083831787, "learning_rate": 4.753374390711324e-05, "loss": 0.8195, "step": 5934 }, { "epoch": 0.5303013380391807, "grad_norm": 0.4148007035255432, "learning_rate": 4.751929090774864e-05, "loss": 0.9669, "step": 5935 }, { "epoch": 0.5303906895704426, "grad_norm": 0.4209470748901367, "learning_rate": 4.7504838116169895e-05, "loss": 0.9673, "step": 5936 }, { "epoch": 0.5304800411017044, "grad_norm": 0.45647767186164856, "learning_rate": 4.7490385533587525e-05, "loss": 0.9706, "step": 5937 }, { "epoch": 0.5305693926329662, "grad_norm": 0.4158954620361328, "learning_rate": 4.7475933161212116e-05, "loss": 1.0093, "step": 5938 }, { "epoch": 0.5306587441642281, "grad_norm": 0.4575401842594147, "learning_rate": 4.746148100025421e-05, "loss": 1.0107, "step": 5939 }, { "epoch": 0.53074809569549, "grad_norm": 0.4388822019100189, "learning_rate": 4.7447029051924334e-05, "loss": 0.9565, "step": 5940 }, { "epoch": 0.5308374472267519, "grad_norm": 0.5215067267417908, "learning_rate": 4.7432577317432984e-05, "loss": 0.8885, "step": 5941 }, { "epoch": 0.5309267987580137, "grad_norm": 0.5837841033935547, "learning_rate": 4.7418125797990655e-05, "loss": 0.9078, "step": 5942 }, { "epoch": 0.5310161502892756, "grad_norm": 0.4431391656398773, "learning_rate": 4.740367449480781e-05, "loss": 0.9465, "step": 5943 }, { "epoch": 0.5311055018205374, "grad_norm": 0.4966404438018799, "learning_rate": 4.7389223409094904e-05, "loss": 0.9855, "step": 5944 }, { "epoch": 0.5311948533517993, "grad_norm": 0.4595504403114319, "learning_rate": 4.737477254206236e-05, "loss": 0.9012, "step": 5945 }, { "epoch": 0.5312842048830612, "grad_norm": 0.4891195595264435, "learning_rate": 4.736032189492062e-05, "loss": 0.865, "step": 5946 }, { "epoch": 0.531373556414323, "grad_norm": 0.38100045919418335, "learning_rate": 4.734587146888003e-05, "loss": 0.9545, "step": 5947 }, { "epoch": 0.5314629079455849, "grad_norm": 0.4812198877334595, "learning_rate": 4.733142126515101e-05, "loss": 1.0111, "step": 5948 }, { "epoch": 0.5315522594768468, "grad_norm": 0.5166088938713074, "learning_rate": 4.7316971284943905e-05, "loss": 0.9222, "step": 5949 }, { "epoch": 0.5316416110081087, "grad_norm": 0.42743030190467834, "learning_rate": 4.730252152946905e-05, "loss": 0.9176, "step": 5950 }, { "epoch": 0.5317309625393705, "grad_norm": 0.4856008291244507, "learning_rate": 4.7288071999936766e-05, "loss": 0.9253, "step": 5951 }, { "epoch": 0.5318203140706323, "grad_norm": 0.40241360664367676, "learning_rate": 4.7273622697557356e-05, "loss": 1.0319, "step": 5952 }, { "epoch": 0.5319096656018942, "grad_norm": 0.46790629625320435, "learning_rate": 4.725917362354111e-05, "loss": 0.9318, "step": 5953 }, { "epoch": 0.5319990171331561, "grad_norm": 0.4300851821899414, "learning_rate": 4.7244724779098293e-05, "loss": 0.9938, "step": 5954 }, { "epoch": 0.532088368664418, "grad_norm": 0.5142315626144409, "learning_rate": 4.7230276165439136e-05, "loss": 0.9487, "step": 5955 }, { "epoch": 0.5321777201956799, "grad_norm": 0.38115739822387695, "learning_rate": 4.72158277837739e-05, "loss": 0.9286, "step": 5956 }, { "epoch": 0.5322670717269418, "grad_norm": 0.7321306467056274, "learning_rate": 4.720137963531274e-05, "loss": 0.9289, "step": 5957 }, { "epoch": 0.5323564232582035, "grad_norm": 0.4938461184501648, "learning_rate": 4.718693172126587e-05, "loss": 0.973, "step": 5958 }, { "epoch": 0.5324457747894654, "grad_norm": 0.5693181157112122, "learning_rate": 4.7172484042843454e-05, "loss": 0.9181, "step": 5959 }, { "epoch": 0.5325351263207273, "grad_norm": 0.5643350481987, "learning_rate": 4.7158036601255634e-05, "loss": 1.017, "step": 5960 }, { "epoch": 0.5326244778519892, "grad_norm": 0.5340102910995483, "learning_rate": 4.714358939771255e-05, "loss": 0.8804, "step": 5961 }, { "epoch": 0.5327138293832511, "grad_norm": 0.5467627048492432, "learning_rate": 4.712914243342429e-05, "loss": 0.8569, "step": 5962 }, { "epoch": 0.532803180914513, "grad_norm": 0.5655173659324646, "learning_rate": 4.711469570960096e-05, "loss": 0.9432, "step": 5963 }, { "epoch": 0.5328925324457748, "grad_norm": 0.41005784273147583, "learning_rate": 4.7100249227452627e-05, "loss": 1.0204, "step": 5964 }, { "epoch": 0.5329818839770366, "grad_norm": 0.4703254997730255, "learning_rate": 4.7085802988189315e-05, "loss": 0.9528, "step": 5965 }, { "epoch": 0.5330712355082985, "grad_norm": 0.4015927314758301, "learning_rate": 4.707135699302108e-05, "loss": 0.955, "step": 5966 }, { "epoch": 0.5331605870395604, "grad_norm": 0.4751605689525604, "learning_rate": 4.705691124315792e-05, "loss": 0.9394, "step": 5967 }, { "epoch": 0.5332499385708223, "grad_norm": 0.4073632061481476, "learning_rate": 4.70424657398098e-05, "loss": 0.943, "step": 5968 }, { "epoch": 0.5333392901020841, "grad_norm": 0.43543925881385803, "learning_rate": 4.70280204841867e-05, "loss": 0.9864, "step": 5969 }, { "epoch": 0.533428641633346, "grad_norm": 0.4593610465526581, "learning_rate": 4.701357547749856e-05, "loss": 0.9586, "step": 5970 }, { "epoch": 0.5335179931646079, "grad_norm": 0.4604717493057251, "learning_rate": 4.699913072095531e-05, "loss": 0.9719, "step": 5971 }, { "epoch": 0.5336073446958697, "grad_norm": 0.522756814956665, "learning_rate": 4.698468621576685e-05, "loss": 0.9178, "step": 5972 }, { "epoch": 0.5336966962271316, "grad_norm": 0.43565860390663147, "learning_rate": 4.697024196314305e-05, "loss": 0.9208, "step": 5973 }, { "epoch": 0.5337860477583934, "grad_norm": 0.45407941937446594, "learning_rate": 4.695579796429379e-05, "loss": 0.9655, "step": 5974 }, { "epoch": 0.5338753992896553, "grad_norm": 0.524958074092865, "learning_rate": 4.69413542204289e-05, "loss": 0.9391, "step": 5975 }, { "epoch": 0.5339647508209172, "grad_norm": 0.4323229193687439, "learning_rate": 4.6926910732758215e-05, "loss": 0.937, "step": 5976 }, { "epoch": 0.5340541023521791, "grad_norm": 0.48769766092300415, "learning_rate": 4.69124675024915e-05, "loss": 0.9422, "step": 5977 }, { "epoch": 0.534143453883441, "grad_norm": 0.47193393111228943, "learning_rate": 4.689802453083853e-05, "loss": 0.9762, "step": 5978 }, { "epoch": 0.5342328054147027, "grad_norm": 0.574012041091919, "learning_rate": 4.688358181900907e-05, "loss": 0.867, "step": 5979 }, { "epoch": 0.5343221569459646, "grad_norm": 0.43543365597724915, "learning_rate": 4.686913936821287e-05, "loss": 0.9602, "step": 5980 }, { "epoch": 0.5344115084772265, "grad_norm": 0.45189157128334045, "learning_rate": 4.6854697179659614e-05, "loss": 0.9971, "step": 5981 }, { "epoch": 0.5345008600084884, "grad_norm": 0.4232499599456787, "learning_rate": 4.684025525455899e-05, "loss": 1.0099, "step": 5982 }, { "epoch": 0.5345902115397503, "grad_norm": 0.42236101627349854, "learning_rate": 4.682581359412066e-05, "loss": 0.9674, "step": 5983 }, { "epoch": 0.5346795630710122, "grad_norm": 0.4021371603012085, "learning_rate": 4.681137219955429e-05, "loss": 0.932, "step": 5984 }, { "epoch": 0.534768914602274, "grad_norm": 0.39467480778694153, "learning_rate": 4.6796931072069484e-05, "loss": 0.9422, "step": 5985 }, { "epoch": 0.5348582661335358, "grad_norm": 0.40237951278686523, "learning_rate": 4.678249021287583e-05, "loss": 0.9538, "step": 5986 }, { "epoch": 0.5349476176647977, "grad_norm": 0.5124891996383667, "learning_rate": 4.6768049623182953e-05, "loss": 0.9768, "step": 5987 }, { "epoch": 0.5350369691960596, "grad_norm": 0.4601878225803375, "learning_rate": 4.675360930420035e-05, "loss": 0.9826, "step": 5988 }, { "epoch": 0.5351263207273215, "grad_norm": 0.4513419568538666, "learning_rate": 4.673916925713756e-05, "loss": 0.9567, "step": 5989 }, { "epoch": 0.5352156722585834, "grad_norm": 0.38111451268196106, "learning_rate": 4.672472948320411e-05, "loss": 1.0469, "step": 5990 }, { "epoch": 0.5353050237898452, "grad_norm": 0.5757367610931396, "learning_rate": 4.671028998360947e-05, "loss": 0.9822, "step": 5991 }, { "epoch": 0.5353943753211071, "grad_norm": 0.5025779008865356, "learning_rate": 4.669585075956312e-05, "loss": 0.9309, "step": 5992 }, { "epoch": 0.5354837268523689, "grad_norm": 0.4102911651134491, "learning_rate": 4.668141181227448e-05, "loss": 0.9255, "step": 5993 }, { "epoch": 0.5355730783836308, "grad_norm": 0.4956420958042145, "learning_rate": 4.666697314295298e-05, "loss": 0.8852, "step": 5994 }, { "epoch": 0.5356624299148927, "grad_norm": 0.4618532061576843, "learning_rate": 4.665253475280801e-05, "loss": 0.9496, "step": 5995 }, { "epoch": 0.5357517814461545, "grad_norm": 0.5225943922996521, "learning_rate": 4.663809664304894e-05, "loss": 0.9285, "step": 5996 }, { "epoch": 0.5358411329774164, "grad_norm": 0.5119801759719849, "learning_rate": 4.662365881488511e-05, "loss": 0.9436, "step": 5997 }, { "epoch": 0.5359304845086783, "grad_norm": 0.5475390553474426, "learning_rate": 4.6609221269525835e-05, "loss": 0.9381, "step": 5998 }, { "epoch": 0.5360198360399402, "grad_norm": 0.48015183210372925, "learning_rate": 4.659478400818043e-05, "loss": 0.9396, "step": 5999 }, { "epoch": 0.536109187571202, "grad_norm": 0.5057094693183899, "learning_rate": 4.658034703205816e-05, "loss": 0.9935, "step": 6000 }, { "epoch": 0.5361985391024638, "grad_norm": 0.48920387029647827, "learning_rate": 4.6565910342368266e-05, "loss": 0.9402, "step": 6001 }, { "epoch": 0.5362878906337257, "grad_norm": 0.4365025758743286, "learning_rate": 4.6551473940319995e-05, "loss": 0.9472, "step": 6002 }, { "epoch": 0.5363772421649876, "grad_norm": 0.408589243888855, "learning_rate": 4.6537037827122536e-05, "loss": 0.9435, "step": 6003 }, { "epoch": 0.5364665936962495, "grad_norm": 0.5231300592422485, "learning_rate": 4.652260200398507e-05, "loss": 0.9997, "step": 6004 }, { "epoch": 0.5365559452275114, "grad_norm": 0.435147225856781, "learning_rate": 4.6508166472116754e-05, "loss": 0.9694, "step": 6005 }, { "epoch": 0.5366452967587731, "grad_norm": 0.42764097452163696, "learning_rate": 4.649373123272672e-05, "loss": 0.9844, "step": 6006 }, { "epoch": 0.536734648290035, "grad_norm": 0.5183307528495789, "learning_rate": 4.647929628702408e-05, "loss": 0.9385, "step": 6007 }, { "epoch": 0.5368239998212969, "grad_norm": 0.4516974091529846, "learning_rate": 4.6464861636217895e-05, "loss": 0.9214, "step": 6008 }, { "epoch": 0.5369133513525588, "grad_norm": 0.39056047797203064, "learning_rate": 4.645042728151722e-05, "loss": 0.9394, "step": 6009 }, { "epoch": 0.5370027028838207, "grad_norm": 0.3742974102497101, "learning_rate": 4.64359932241311e-05, "loss": 0.9829, "step": 6010 }, { "epoch": 0.5370920544150826, "grad_norm": 0.491948664188385, "learning_rate": 4.642155946526854e-05, "loss": 0.8914, "step": 6011 }, { "epoch": 0.5371814059463444, "grad_norm": 0.4805702567100525, "learning_rate": 4.640712600613851e-05, "loss": 0.9744, "step": 6012 }, { "epoch": 0.5372707574776062, "grad_norm": 0.48853474855422974, "learning_rate": 4.6392692847949984e-05, "loss": 0.985, "step": 6013 }, { "epoch": 0.5373601090088681, "grad_norm": 0.414133220911026, "learning_rate": 4.6378259991911886e-05, "loss": 0.9825, "step": 6014 }, { "epoch": 0.53744946054013, "grad_norm": 0.3783036470413208, "learning_rate": 4.6363827439233114e-05, "loss": 0.9994, "step": 6015 }, { "epoch": 0.5375388120713919, "grad_norm": 0.5566924214363098, "learning_rate": 4.634939519112255e-05, "loss": 0.9119, "step": 6016 }, { "epoch": 0.5376281636026538, "grad_norm": 0.49489137530326843, "learning_rate": 4.633496324878906e-05, "loss": 0.952, "step": 6017 }, { "epoch": 0.5377175151339156, "grad_norm": 0.45840591192245483, "learning_rate": 4.632053161344146e-05, "loss": 0.8933, "step": 6018 }, { "epoch": 0.5378068666651775, "grad_norm": 0.5176952481269836, "learning_rate": 4.6306100286288565e-05, "loss": 0.9606, "step": 6019 }, { "epoch": 0.5378962181964393, "grad_norm": 0.484392374753952, "learning_rate": 4.629166926853913e-05, "loss": 0.9356, "step": 6020 }, { "epoch": 0.5379855697277012, "grad_norm": 0.5076186656951904, "learning_rate": 4.6277238561401927e-05, "loss": 0.9744, "step": 6021 }, { "epoch": 0.5380749212589631, "grad_norm": 0.4623713791370392, "learning_rate": 4.6262808166085674e-05, "loss": 0.9705, "step": 6022 }, { "epoch": 0.5381642727902249, "grad_norm": 0.419096440076828, "learning_rate": 4.624837808379907e-05, "loss": 1.0033, "step": 6023 }, { "epoch": 0.5382536243214868, "grad_norm": 0.3825317323207855, "learning_rate": 4.62339483157508e-05, "loss": 1.0228, "step": 6024 }, { "epoch": 0.5383429758527487, "grad_norm": 0.5461272597312927, "learning_rate": 4.6219518863149493e-05, "loss": 0.9623, "step": 6025 }, { "epoch": 0.5384323273840106, "grad_norm": 0.4370659589767456, "learning_rate": 4.6205089727203785e-05, "loss": 0.9881, "step": 6026 }, { "epoch": 0.5385216789152724, "grad_norm": 0.48183608055114746, "learning_rate": 4.619066090912228e-05, "loss": 0.867, "step": 6027 }, { "epoch": 0.5386110304465342, "grad_norm": 0.4971342980861664, "learning_rate": 4.6176232410113506e-05, "loss": 0.9406, "step": 6028 }, { "epoch": 0.5387003819777961, "grad_norm": 0.44625434279441833, "learning_rate": 4.616180423138603e-05, "loss": 0.9701, "step": 6029 }, { "epoch": 0.538789733509058, "grad_norm": 0.4783417880535126, "learning_rate": 4.614737637414836e-05, "loss": 0.8333, "step": 6030 }, { "epoch": 0.5388790850403199, "grad_norm": 0.39774414896965027, "learning_rate": 4.613294883960898e-05, "loss": 0.9892, "step": 6031 }, { "epoch": 0.5389684365715818, "grad_norm": 0.449542373418808, "learning_rate": 4.611852162897636e-05, "loss": 0.9164, "step": 6032 }, { "epoch": 0.5390577881028437, "grad_norm": 0.38516610860824585, "learning_rate": 4.610409474345894e-05, "loss": 0.9848, "step": 6033 }, { "epoch": 0.5391471396341054, "grad_norm": 0.43511533737182617, "learning_rate": 4.60896681842651e-05, "loss": 0.9101, "step": 6034 }, { "epoch": 0.5392364911653673, "grad_norm": 0.4192296862602234, "learning_rate": 4.6075241952603225e-05, "loss": 1.0098, "step": 6035 }, { "epoch": 0.5393258426966292, "grad_norm": 0.38819652795791626, "learning_rate": 4.6060816049681676e-05, "loss": 0.9525, "step": 6036 }, { "epoch": 0.5394151942278911, "grad_norm": 0.4016992449760437, "learning_rate": 4.6046390476708794e-05, "loss": 0.993, "step": 6037 }, { "epoch": 0.539504545759153, "grad_norm": 0.4347737431526184, "learning_rate": 4.6031965234892834e-05, "loss": 0.9847, "step": 6038 }, { "epoch": 0.5395938972904148, "grad_norm": 0.4962137043476105, "learning_rate": 4.601754032544208e-05, "loss": 0.908, "step": 6039 }, { "epoch": 0.5396832488216767, "grad_norm": 0.4483336806297302, "learning_rate": 4.6003115749564765e-05, "loss": 0.9726, "step": 6040 }, { "epoch": 0.5397726003529385, "grad_norm": 0.5861220359802246, "learning_rate": 4.598869150846912e-05, "loss": 0.8876, "step": 6041 }, { "epoch": 0.5398619518842004, "grad_norm": 0.4730405807495117, "learning_rate": 4.597426760336331e-05, "loss": 0.9645, "step": 6042 }, { "epoch": 0.5399513034154623, "grad_norm": 0.48867109417915344, "learning_rate": 4.59598440354555e-05, "loss": 0.9377, "step": 6043 }, { "epoch": 0.5400406549467242, "grad_norm": 0.4301815927028656, "learning_rate": 4.5945420805953825e-05, "loss": 0.9474, "step": 6044 }, { "epoch": 0.540130006477986, "grad_norm": 0.4912492334842682, "learning_rate": 4.593099791606637e-05, "loss": 0.955, "step": 6045 }, { "epoch": 0.5402193580092479, "grad_norm": 0.5850039124488831, "learning_rate": 4.5916575367001214e-05, "loss": 0.904, "step": 6046 }, { "epoch": 0.5403087095405098, "grad_norm": 0.4344959557056427, "learning_rate": 4.590215315996642e-05, "loss": 0.9921, "step": 6047 }, { "epoch": 0.5403980610717716, "grad_norm": 0.5166114568710327, "learning_rate": 4.588773129616996e-05, "loss": 0.941, "step": 6048 }, { "epoch": 0.5404874126030335, "grad_norm": 0.5347388386726379, "learning_rate": 4.587330977681983e-05, "loss": 0.893, "step": 6049 }, { "epoch": 0.5405767641342953, "grad_norm": 0.398970365524292, "learning_rate": 4.585888860312399e-05, "loss": 0.9573, "step": 6050 }, { "epoch": 0.5406661156655572, "grad_norm": 0.4769752621650696, "learning_rate": 4.584446777629038e-05, "loss": 0.9554, "step": 6051 }, { "epoch": 0.5407554671968191, "grad_norm": 0.5892907381057739, "learning_rate": 4.5830047297526904e-05, "loss": 1.025, "step": 6052 }, { "epoch": 0.540844818728081, "grad_norm": 0.4411056637763977, "learning_rate": 4.58156271680414e-05, "loss": 0.9451, "step": 6053 }, { "epoch": 0.5409341702593429, "grad_norm": 0.4067056477069855, "learning_rate": 4.5801207389041715e-05, "loss": 0.993, "step": 6054 }, { "epoch": 0.5410235217906046, "grad_norm": 0.44847869873046875, "learning_rate": 4.5786787961735673e-05, "loss": 0.9894, "step": 6055 }, { "epoch": 0.5411128733218665, "grad_norm": 0.43127429485321045, "learning_rate": 4.577236888733105e-05, "loss": 0.9412, "step": 6056 }, { "epoch": 0.5412022248531284, "grad_norm": 0.4345093369483948, "learning_rate": 4.575795016703561e-05, "loss": 1.0033, "step": 6057 }, { "epoch": 0.5412915763843903, "grad_norm": 0.44015347957611084, "learning_rate": 4.574353180205705e-05, "loss": 0.9976, "step": 6058 }, { "epoch": 0.5413809279156522, "grad_norm": 0.45786052942276, "learning_rate": 4.572911379360307e-05, "loss": 0.9713, "step": 6059 }, { "epoch": 0.5414702794469141, "grad_norm": 0.47783318161964417, "learning_rate": 4.571469614288133e-05, "loss": 0.9236, "step": 6060 }, { "epoch": 0.5415596309781759, "grad_norm": 0.5037944316864014, "learning_rate": 4.5700278851099464e-05, "loss": 0.9076, "step": 6061 }, { "epoch": 0.5416489825094377, "grad_norm": 0.402452290058136, "learning_rate": 4.568586191946508e-05, "loss": 0.9454, "step": 6062 }, { "epoch": 0.5417383340406996, "grad_norm": 0.45303410291671753, "learning_rate": 4.567144534918574e-05, "loss": 0.9148, "step": 6063 }, { "epoch": 0.5418276855719615, "grad_norm": 0.4717887341976166, "learning_rate": 4.5657029141468996e-05, "loss": 0.9045, "step": 6064 }, { "epoch": 0.5419170371032234, "grad_norm": 0.4819827079772949, "learning_rate": 4.564261329752236e-05, "loss": 0.8857, "step": 6065 }, { "epoch": 0.5420063886344852, "grad_norm": 0.4282495975494385, "learning_rate": 4.562819781855331e-05, "loss": 0.9649, "step": 6066 }, { "epoch": 0.5420957401657471, "grad_norm": 0.43561655282974243, "learning_rate": 4.561378270576929e-05, "loss": 0.9829, "step": 6067 }, { "epoch": 0.5421850916970089, "grad_norm": 0.47019922733306885, "learning_rate": 4.559936796037772e-05, "loss": 0.9926, "step": 6068 }, { "epoch": 0.5422744432282708, "grad_norm": 0.5216493606567383, "learning_rate": 4.5584953583585985e-05, "loss": 0.8186, "step": 6069 }, { "epoch": 0.5423637947595327, "grad_norm": 0.5065385699272156, "learning_rate": 4.5570539576601463e-05, "loss": 0.9491, "step": 6070 }, { "epoch": 0.5424531462907946, "grad_norm": 0.473476380109787, "learning_rate": 4.5556125940631454e-05, "loss": 0.9613, "step": 6071 }, { "epoch": 0.5425424978220564, "grad_norm": 0.4875657558441162, "learning_rate": 4.5541712676883263e-05, "loss": 0.9939, "step": 6072 }, { "epoch": 0.5426318493533183, "grad_norm": 0.5284254550933838, "learning_rate": 4.552729978656416e-05, "loss": 0.8902, "step": 6073 }, { "epoch": 0.5427212008845802, "grad_norm": 0.5050989985466003, "learning_rate": 4.5512887270881374e-05, "loss": 0.8749, "step": 6074 }, { "epoch": 0.542810552415842, "grad_norm": 0.5607906579971313, "learning_rate": 4.5498475131042106e-05, "loss": 0.9388, "step": 6075 }, { "epoch": 0.5428999039471039, "grad_norm": 0.5271878838539124, "learning_rate": 4.548406336825353e-05, "loss": 1.0645, "step": 6076 }, { "epoch": 0.5429892554783657, "grad_norm": 0.41676685214042664, "learning_rate": 4.546965198372279e-05, "loss": 0.9279, "step": 6077 }, { "epoch": 0.5430786070096276, "grad_norm": 0.47751128673553467, "learning_rate": 4.5455240978656996e-05, "loss": 0.9398, "step": 6078 }, { "epoch": 0.5431679585408895, "grad_norm": 0.4766964912414551, "learning_rate": 4.5440830354263205e-05, "loss": 0.9324, "step": 6079 }, { "epoch": 0.5432573100721514, "grad_norm": 0.4669797718524933, "learning_rate": 4.542642011174846e-05, "loss": 0.9492, "step": 6080 }, { "epoch": 0.5433466616034133, "grad_norm": 0.4376409947872162, "learning_rate": 4.5412010252319784e-05, "loss": 1.0007, "step": 6081 }, { "epoch": 0.543436013134675, "grad_norm": 0.42658764123916626, "learning_rate": 4.539760077718416e-05, "loss": 0.9542, "step": 6082 }, { "epoch": 0.5435253646659369, "grad_norm": 0.4167364835739136, "learning_rate": 4.5383191687548513e-05, "loss": 1.0401, "step": 6083 }, { "epoch": 0.5436147161971988, "grad_norm": 0.4156143367290497, "learning_rate": 4.53687829846198e-05, "loss": 1.0165, "step": 6084 }, { "epoch": 0.5437040677284607, "grad_norm": 0.5895379781723022, "learning_rate": 4.535437466960486e-05, "loss": 0.9548, "step": 6085 }, { "epoch": 0.5437934192597226, "grad_norm": 0.44905561208724976, "learning_rate": 4.533996674371056e-05, "loss": 0.9401, "step": 6086 }, { "epoch": 0.5438827707909845, "grad_norm": 0.43985962867736816, "learning_rate": 4.5325559208143717e-05, "loss": 0.9516, "step": 6087 }, { "epoch": 0.5439721223222463, "grad_norm": 0.5113573670387268, "learning_rate": 4.5311152064111134e-05, "loss": 0.9881, "step": 6088 }, { "epoch": 0.5440614738535081, "grad_norm": 0.37436649203300476, "learning_rate": 4.529674531281954e-05, "loss": 0.9438, "step": 6089 }, { "epoch": 0.54415082538477, "grad_norm": 0.5028565526008606, "learning_rate": 4.5282338955475644e-05, "loss": 1.0298, "step": 6090 }, { "epoch": 0.5442401769160319, "grad_norm": 0.4592767059803009, "learning_rate": 4.526793299328616e-05, "loss": 0.9478, "step": 6091 }, { "epoch": 0.5443295284472938, "grad_norm": 0.4168972671031952, "learning_rate": 4.5253527427457715e-05, "loss": 0.9624, "step": 6092 }, { "epoch": 0.5444188799785556, "grad_norm": 0.4067929685115814, "learning_rate": 4.523912225919694e-05, "loss": 0.8968, "step": 6093 }, { "epoch": 0.5445082315098175, "grad_norm": 0.5613819360733032, "learning_rate": 4.522471748971043e-05, "loss": 0.8793, "step": 6094 }, { "epoch": 0.5445975830410794, "grad_norm": 0.4839697778224945, "learning_rate": 4.5210313120204735e-05, "loss": 0.9269, "step": 6095 }, { "epoch": 0.5446869345723412, "grad_norm": 0.42698168754577637, "learning_rate": 4.519590915188637e-05, "loss": 0.9873, "step": 6096 }, { "epoch": 0.5447762861036031, "grad_norm": 0.5424548983573914, "learning_rate": 4.5181505585961816e-05, "loss": 0.9847, "step": 6097 }, { "epoch": 0.544865637634865, "grad_norm": 0.4275175929069519, "learning_rate": 4.5167102423637554e-05, "loss": 1.0299, "step": 6098 }, { "epoch": 0.5449549891661268, "grad_norm": 0.4584430158138275, "learning_rate": 4.515269966611996e-05, "loss": 0.9439, "step": 6099 }, { "epoch": 0.5450443406973887, "grad_norm": 0.4883447289466858, "learning_rate": 4.513829731461543e-05, "loss": 0.9921, "step": 6100 }, { "epoch": 0.5451336922286506, "grad_norm": 0.44181644916534424, "learning_rate": 4.512389537033033e-05, "loss": 0.9704, "step": 6101 }, { "epoch": 0.5452230437599125, "grad_norm": 0.47616517543792725, "learning_rate": 4.510949383447096e-05, "loss": 0.9071, "step": 6102 }, { "epoch": 0.5453123952911743, "grad_norm": 0.6123970746994019, "learning_rate": 4.5095092708243623e-05, "loss": 0.8817, "step": 6103 }, { "epoch": 0.5454017468224361, "grad_norm": 0.4854317009449005, "learning_rate": 4.5080691992854554e-05, "loss": 0.8724, "step": 6104 }, { "epoch": 0.545491098353698, "grad_norm": 0.47287046909332275, "learning_rate": 4.5066291689509953e-05, "loss": 0.9679, "step": 6105 }, { "epoch": 0.5455804498849599, "grad_norm": 0.4474678933620453, "learning_rate": 4.5051891799416025e-05, "loss": 0.943, "step": 6106 }, { "epoch": 0.5456698014162218, "grad_norm": 0.46921437978744507, "learning_rate": 4.503749232377889e-05, "loss": 0.9584, "step": 6107 }, { "epoch": 0.5457591529474837, "grad_norm": 0.5251668691635132, "learning_rate": 4.50230932638047e-05, "loss": 0.9205, "step": 6108 }, { "epoch": 0.5458485044787456, "grad_norm": 0.41507506370544434, "learning_rate": 4.5008694620699474e-05, "loss": 0.9704, "step": 6109 }, { "epoch": 0.5459378560100073, "grad_norm": 0.43505141139030457, "learning_rate": 4.4994296395669276e-05, "loss": 0.926, "step": 6110 }, { "epoch": 0.5460272075412692, "grad_norm": 0.5086851716041565, "learning_rate": 4.497989858992011e-05, "loss": 0.905, "step": 6111 }, { "epoch": 0.5461165590725311, "grad_norm": 0.4543953537940979, "learning_rate": 4.496550120465795e-05, "loss": 0.9689, "step": 6112 }, { "epoch": 0.546205910603793, "grad_norm": 0.4054141938686371, "learning_rate": 4.495110424108873e-05, "loss": 1.0004, "step": 6113 }, { "epoch": 0.5462952621350549, "grad_norm": 0.39775604009628296, "learning_rate": 4.4936707700418346e-05, "loss": 1.021, "step": 6114 }, { "epoch": 0.5463846136663167, "grad_norm": 0.5066169500350952, "learning_rate": 4.492231158385266e-05, "loss": 0.9796, "step": 6115 }, { "epoch": 0.5464739651975786, "grad_norm": 0.43312814831733704, "learning_rate": 4.4907915892597504e-05, "loss": 0.9808, "step": 6116 }, { "epoch": 0.5465633167288404, "grad_norm": 0.4440118372440338, "learning_rate": 4.489352062785869e-05, "loss": 0.9781, "step": 6117 }, { "epoch": 0.5466526682601023, "grad_norm": 0.45043429732322693, "learning_rate": 4.4879125790841944e-05, "loss": 1.0155, "step": 6118 }, { "epoch": 0.5467420197913642, "grad_norm": 0.44489559531211853, "learning_rate": 4.486473138275299e-05, "loss": 0.9426, "step": 6119 }, { "epoch": 0.546831371322626, "grad_norm": 0.45286840200424194, "learning_rate": 4.485033740479752e-05, "loss": 0.982, "step": 6120 }, { "epoch": 0.5469207228538879, "grad_norm": 0.4958401322364807, "learning_rate": 4.483594385818118e-05, "loss": 0.9419, "step": 6121 }, { "epoch": 0.5470100743851498, "grad_norm": 0.4510517120361328, "learning_rate": 4.482155074410961e-05, "loss": 0.9684, "step": 6122 }, { "epoch": 0.5470994259164117, "grad_norm": 0.3880564272403717, "learning_rate": 4.480715806378834e-05, "loss": 1.0172, "step": 6123 }, { "epoch": 0.5471887774476735, "grad_norm": 0.5004435181617737, "learning_rate": 4.479276581842294e-05, "loss": 1.0378, "step": 6124 }, { "epoch": 0.5472781289789354, "grad_norm": 0.43363669514656067, "learning_rate": 4.4778374009218904e-05, "loss": 1.0019, "step": 6125 }, { "epoch": 0.5473674805101972, "grad_norm": 0.4868524670600891, "learning_rate": 4.4763982637381706e-05, "loss": 0.9947, "step": 6126 }, { "epoch": 0.5474568320414591, "grad_norm": 0.43527916073799133, "learning_rate": 4.474959170411677e-05, "loss": 1.0288, "step": 6127 }, { "epoch": 0.547546183572721, "grad_norm": 0.5454886555671692, "learning_rate": 4.473520121062952e-05, "loss": 0.9882, "step": 6128 }, { "epoch": 0.5476355351039829, "grad_norm": 0.4397745132446289, "learning_rate": 4.4720811158125267e-05, "loss": 1.0003, "step": 6129 }, { "epoch": 0.5477248866352447, "grad_norm": 0.5059059262275696, "learning_rate": 4.470642154780935e-05, "loss": 0.9265, "step": 6130 }, { "epoch": 0.5478142381665065, "grad_norm": 0.5163365006446838, "learning_rate": 4.469203238088705e-05, "loss": 0.9582, "step": 6131 }, { "epoch": 0.5479035896977684, "grad_norm": 0.48326799273490906, "learning_rate": 4.467764365856362e-05, "loss": 0.9396, "step": 6132 }, { "epoch": 0.5479929412290303, "grad_norm": 0.46925976872444153, "learning_rate": 4.466325538204427e-05, "loss": 1.0036, "step": 6133 }, { "epoch": 0.5480822927602922, "grad_norm": 0.4270949363708496, "learning_rate": 4.464886755253416e-05, "loss": 1.0472, "step": 6134 }, { "epoch": 0.5481716442915541, "grad_norm": 0.49136438965797424, "learning_rate": 4.463448017123844e-05, "loss": 0.8925, "step": 6135 }, { "epoch": 0.548260995822816, "grad_norm": 0.41291165351867676, "learning_rate": 4.4620093239362204e-05, "loss": 0.9923, "step": 6136 }, { "epoch": 0.5483503473540777, "grad_norm": 0.40838754177093506, "learning_rate": 4.460570675811049e-05, "loss": 0.9903, "step": 6137 }, { "epoch": 0.5484396988853396, "grad_norm": 0.5193032622337341, "learning_rate": 4.459132072868835e-05, "loss": 0.9049, "step": 6138 }, { "epoch": 0.5485290504166015, "grad_norm": 0.4100017547607422, "learning_rate": 4.457693515230074e-05, "loss": 1.0034, "step": 6139 }, { "epoch": 0.5486184019478634, "grad_norm": 0.5860205888748169, "learning_rate": 4.456255003015263e-05, "loss": 0.9951, "step": 6140 }, { "epoch": 0.5487077534791253, "grad_norm": 0.49297282099723816, "learning_rate": 4.4548165363448894e-05, "loss": 0.9113, "step": 6141 }, { "epoch": 0.5487971050103871, "grad_norm": 0.41039666533470154, "learning_rate": 4.4533781153394426e-05, "loss": 0.9713, "step": 6142 }, { "epoch": 0.548886456541649, "grad_norm": 0.40572866797447205, "learning_rate": 4.4519397401194056e-05, "loss": 0.9805, "step": 6143 }, { "epoch": 0.5489758080729108, "grad_norm": 0.43796059489250183, "learning_rate": 4.4505014108052564e-05, "loss": 1.0003, "step": 6144 }, { "epoch": 0.5490651596041727, "grad_norm": 0.4492766857147217, "learning_rate": 4.449063127517472e-05, "loss": 0.9586, "step": 6145 }, { "epoch": 0.5491545111354346, "grad_norm": 0.3965035676956177, "learning_rate": 4.447624890376523e-05, "loss": 0.9954, "step": 6146 }, { "epoch": 0.5492438626666964, "grad_norm": 0.45780253410339355, "learning_rate": 4.4461866995028776e-05, "loss": 0.9275, "step": 6147 }, { "epoch": 0.5493332141979583, "grad_norm": 0.4715741276741028, "learning_rate": 4.4447485550170013e-05, "loss": 0.9706, "step": 6148 }, { "epoch": 0.5494225657292202, "grad_norm": 0.43292608857154846, "learning_rate": 4.44331045703935e-05, "loss": 0.9596, "step": 6149 }, { "epoch": 0.5495119172604821, "grad_norm": 0.4346010982990265, "learning_rate": 4.4418724056903824e-05, "loss": 1.0092, "step": 6150 }, { "epoch": 0.5496012687917439, "grad_norm": 0.5378443002700806, "learning_rate": 4.440434401090549e-05, "loss": 0.9605, "step": 6151 }, { "epoch": 0.5496906203230058, "grad_norm": 0.5118531584739685, "learning_rate": 4.438996443360299e-05, "loss": 0.8896, "step": 6152 }, { "epoch": 0.5497799718542676, "grad_norm": 0.4279499351978302, "learning_rate": 4.437558532620077e-05, "loss": 0.8621, "step": 6153 }, { "epoch": 0.5498693233855295, "grad_norm": 0.4221118092536926, "learning_rate": 4.436120668990324e-05, "loss": 0.9983, "step": 6154 }, { "epoch": 0.5499586749167914, "grad_norm": 0.42579370737075806, "learning_rate": 4.434682852591476e-05, "loss": 0.9634, "step": 6155 }, { "epoch": 0.5500480264480533, "grad_norm": 0.47894287109375, "learning_rate": 4.4332450835439634e-05, "loss": 0.9609, "step": 6156 }, { "epoch": 0.5501373779793152, "grad_norm": 0.5039592385292053, "learning_rate": 4.431807361968217e-05, "loss": 0.9188, "step": 6157 }, { "epoch": 0.5502267295105769, "grad_norm": 0.4221189618110657, "learning_rate": 4.4303696879846593e-05, "loss": 0.9687, "step": 6158 }, { "epoch": 0.5503160810418388, "grad_norm": 0.454010009765625, "learning_rate": 4.428932061713715e-05, "loss": 0.9647, "step": 6159 }, { "epoch": 0.5504054325731007, "grad_norm": 0.409974604845047, "learning_rate": 4.427494483275796e-05, "loss": 0.9301, "step": 6160 }, { "epoch": 0.5504947841043626, "grad_norm": 0.4791439473628998, "learning_rate": 4.426056952791316e-05, "loss": 0.9908, "step": 6161 }, { "epoch": 0.5505841356356245, "grad_norm": 0.4021007716655731, "learning_rate": 4.424619470380684e-05, "loss": 1.0427, "step": 6162 }, { "epoch": 0.5506734871668864, "grad_norm": 0.48248547315597534, "learning_rate": 4.423182036164304e-05, "loss": 0.9028, "step": 6163 }, { "epoch": 0.5507628386981482, "grad_norm": 0.4851128160953522, "learning_rate": 4.4217446502625773e-05, "loss": 0.9881, "step": 6164 }, { "epoch": 0.55085219022941, "grad_norm": 0.42037060856819153, "learning_rate": 4.420307312795901e-05, "loss": 0.9585, "step": 6165 }, { "epoch": 0.5509415417606719, "grad_norm": 0.47032657265663147, "learning_rate": 4.418870023884665e-05, "loss": 0.9322, "step": 6166 }, { "epoch": 0.5510308932919338, "grad_norm": 0.6283010244369507, "learning_rate": 4.4174327836492587e-05, "loss": 0.9798, "step": 6167 }, { "epoch": 0.5511202448231957, "grad_norm": 0.45526832342147827, "learning_rate": 4.4159955922100674e-05, "loss": 0.9473, "step": 6168 }, { "epoch": 0.5512095963544575, "grad_norm": 0.4486526548862457, "learning_rate": 4.414558449687471e-05, "loss": 0.9134, "step": 6169 }, { "epoch": 0.5512989478857194, "grad_norm": 0.38991108536720276, "learning_rate": 4.413121356201844e-05, "loss": 0.9947, "step": 6170 }, { "epoch": 0.5513882994169813, "grad_norm": 0.4334878623485565, "learning_rate": 4.411684311873559e-05, "loss": 0.9613, "step": 6171 }, { "epoch": 0.5514776509482431, "grad_norm": 0.49451541900634766, "learning_rate": 4.4102473168229837e-05, "loss": 0.9073, "step": 6172 }, { "epoch": 0.551567002479505, "grad_norm": 0.41775211691856384, "learning_rate": 4.408810371170483e-05, "loss": 0.9729, "step": 6173 }, { "epoch": 0.5516563540107668, "grad_norm": 0.4632229506969452, "learning_rate": 4.4073734750364144e-05, "loss": 0.9096, "step": 6174 }, { "epoch": 0.5517457055420287, "grad_norm": 0.5613375306129456, "learning_rate": 4.4059366285411344e-05, "loss": 0.8467, "step": 6175 }, { "epoch": 0.5518350570732906, "grad_norm": 0.4471941590309143, "learning_rate": 4.404499831804993e-05, "loss": 0.9103, "step": 6176 }, { "epoch": 0.5519244086045525, "grad_norm": 0.4910937547683716, "learning_rate": 4.403063084948339e-05, "loss": 1.0049, "step": 6177 }, { "epoch": 0.5520137601358144, "grad_norm": 0.551129162311554, "learning_rate": 4.4016263880915146e-05, "loss": 0.9435, "step": 6178 }, { "epoch": 0.5521031116670762, "grad_norm": 0.44475892186164856, "learning_rate": 4.4001897413548605e-05, "loss": 0.9798, "step": 6179 }, { "epoch": 0.552192463198338, "grad_norm": 0.549535870552063, "learning_rate": 4.398753144858707e-05, "loss": 0.878, "step": 6180 }, { "epoch": 0.5522818147295999, "grad_norm": 0.4066050052642822, "learning_rate": 4.397316598723385e-05, "loss": 1.008, "step": 6181 }, { "epoch": 0.5523711662608618, "grad_norm": 0.4381326138973236, "learning_rate": 4.3958801030692245e-05, "loss": 0.9418, "step": 6182 }, { "epoch": 0.5524605177921237, "grad_norm": 0.4462524354457855, "learning_rate": 4.394443658016543e-05, "loss": 0.932, "step": 6183 }, { "epoch": 0.5525498693233856, "grad_norm": 0.4263027608394623, "learning_rate": 4.393007263685661e-05, "loss": 0.968, "step": 6184 }, { "epoch": 0.5526392208546475, "grad_norm": 0.3910346031188965, "learning_rate": 4.3915709201968896e-05, "loss": 1.014, "step": 6185 }, { "epoch": 0.5527285723859092, "grad_norm": 0.45934584736824036, "learning_rate": 4.39013462767054e-05, "loss": 0.9602, "step": 6186 }, { "epoch": 0.5528179239171711, "grad_norm": 0.48381033539772034, "learning_rate": 4.388698386226917e-05, "loss": 0.9204, "step": 6187 }, { "epoch": 0.552907275448433, "grad_norm": 0.4025076627731323, "learning_rate": 4.3872621959863185e-05, "loss": 0.9068, "step": 6188 }, { "epoch": 0.5529966269796949, "grad_norm": 0.5276347398757935, "learning_rate": 4.385826057069044e-05, "loss": 0.9264, "step": 6189 }, { "epoch": 0.5530859785109568, "grad_norm": 0.42190733551979065, "learning_rate": 4.3843899695953826e-05, "loss": 0.941, "step": 6190 }, { "epoch": 0.5531753300422186, "grad_norm": 0.45186877250671387, "learning_rate": 4.382953933685623e-05, "loss": 0.9799, "step": 6191 }, { "epoch": 0.5532646815734804, "grad_norm": 0.4708520472049713, "learning_rate": 4.38151794946005e-05, "loss": 0.9633, "step": 6192 }, { "epoch": 0.5533540331047423, "grad_norm": 0.44095146656036377, "learning_rate": 4.38008201703894e-05, "loss": 0.9377, "step": 6193 }, { "epoch": 0.5534433846360042, "grad_norm": 0.4277689754962921, "learning_rate": 4.378646136542569e-05, "loss": 0.9737, "step": 6194 }, { "epoch": 0.5535327361672661, "grad_norm": 0.4015730321407318, "learning_rate": 4.377210308091207e-05, "loss": 0.9792, "step": 6195 }, { "epoch": 0.5536220876985279, "grad_norm": 0.4145139753818512, "learning_rate": 4.375774531805121e-05, "loss": 0.9336, "step": 6196 }, { "epoch": 0.5537114392297898, "grad_norm": 0.42907387018203735, "learning_rate": 4.374338807804571e-05, "loss": 0.9789, "step": 6197 }, { "epoch": 0.5538007907610517, "grad_norm": 0.4848146140575409, "learning_rate": 4.372903136209815e-05, "loss": 0.9244, "step": 6198 }, { "epoch": 0.5538901422923135, "grad_norm": 0.4261986017227173, "learning_rate": 4.371467517141108e-05, "loss": 0.9835, "step": 6199 }, { "epoch": 0.5539794938235754, "grad_norm": 0.430756539106369, "learning_rate": 4.3700319507186935e-05, "loss": 1.0051, "step": 6200 }, { "epoch": 0.5540688453548372, "grad_norm": 0.3806210160255432, "learning_rate": 4.3685964370628193e-05, "loss": 0.9518, "step": 6201 }, { "epoch": 0.5541581968860991, "grad_norm": 0.44627901911735535, "learning_rate": 4.367160976293723e-05, "loss": 0.9854, "step": 6202 }, { "epoch": 0.554247548417361, "grad_norm": 0.492567241191864, "learning_rate": 4.3657255685316404e-05, "loss": 0.9096, "step": 6203 }, { "epoch": 0.5543368999486229, "grad_norm": 0.3693414330482483, "learning_rate": 4.364290213896802e-05, "loss": 0.9909, "step": 6204 }, { "epoch": 0.5544262514798848, "grad_norm": 0.4952232539653778, "learning_rate": 4.362854912509435e-05, "loss": 0.9532, "step": 6205 }, { "epoch": 0.5545156030111466, "grad_norm": 0.44463467597961426, "learning_rate": 4.361419664489762e-05, "loss": 0.9138, "step": 6206 }, { "epoch": 0.5546049545424084, "grad_norm": 0.4538500905036926, "learning_rate": 4.3599844699579964e-05, "loss": 0.9869, "step": 6207 }, { "epoch": 0.5546943060736703, "grad_norm": 0.4345034658908844, "learning_rate": 4.358549329034355e-05, "loss": 0.8805, "step": 6208 }, { "epoch": 0.5547836576049322, "grad_norm": 0.48444312810897827, "learning_rate": 4.357114241839045e-05, "loss": 0.9715, "step": 6209 }, { "epoch": 0.5548730091361941, "grad_norm": 0.46041545271873474, "learning_rate": 4.35567920849227e-05, "loss": 0.9387, "step": 6210 }, { "epoch": 0.554962360667456, "grad_norm": 0.49429547786712646, "learning_rate": 4.354244229114228e-05, "loss": 0.9641, "step": 6211 }, { "epoch": 0.5550517121987179, "grad_norm": 0.41394224762916565, "learning_rate": 4.352809303825115e-05, "loss": 0.9933, "step": 6212 }, { "epoch": 0.5551410637299796, "grad_norm": 0.4146006405353546, "learning_rate": 4.351374432745122e-05, "loss": 0.8814, "step": 6213 }, { "epoch": 0.5552304152612415, "grad_norm": 0.5136590600013733, "learning_rate": 4.349939615994433e-05, "loss": 0.9119, "step": 6214 }, { "epoch": 0.5553197667925034, "grad_norm": 0.5504260659217834, "learning_rate": 4.3485048536932314e-05, "loss": 0.9742, "step": 6215 }, { "epoch": 0.5554091183237653, "grad_norm": 0.5039506554603577, "learning_rate": 4.347070145961692e-05, "loss": 0.9296, "step": 6216 }, { "epoch": 0.5554984698550272, "grad_norm": 0.46146827936172485, "learning_rate": 4.345635492919988e-05, "loss": 0.9909, "step": 6217 }, { "epoch": 0.555587821386289, "grad_norm": 0.4409083127975464, "learning_rate": 4.344200894688287e-05, "loss": 0.9877, "step": 6218 }, { "epoch": 0.5556771729175509, "grad_norm": 0.49056732654571533, "learning_rate": 4.342766351386753e-05, "loss": 0.9094, "step": 6219 }, { "epoch": 0.5557665244488127, "grad_norm": 0.4934116303920746, "learning_rate": 4.3413318631355403e-05, "loss": 0.9019, "step": 6220 }, { "epoch": 0.5558558759800746, "grad_norm": 0.4715670943260193, "learning_rate": 4.339897430054806e-05, "loss": 0.9102, "step": 6221 }, { "epoch": 0.5559452275113365, "grad_norm": 0.41046231985092163, "learning_rate": 4.338463052264697e-05, "loss": 0.9674, "step": 6222 }, { "epoch": 0.5560345790425983, "grad_norm": 0.39487752318382263, "learning_rate": 4.3370287298853585e-05, "loss": 1.0071, "step": 6223 }, { "epoch": 0.5561239305738602, "grad_norm": 0.510352611541748, "learning_rate": 4.3355944630369315e-05, "loss": 0.9072, "step": 6224 }, { "epoch": 0.5562132821051221, "grad_norm": 0.409078985452652, "learning_rate": 4.334160251839551e-05, "loss": 0.9709, "step": 6225 }, { "epoch": 0.556302633636384, "grad_norm": 0.4232335388660431, "learning_rate": 4.332726096413346e-05, "loss": 0.9738, "step": 6226 }, { "epoch": 0.5563919851676458, "grad_norm": 0.44725194573402405, "learning_rate": 4.331291996878443e-05, "loss": 0.8612, "step": 6227 }, { "epoch": 0.5564813366989076, "grad_norm": 0.44963935017585754, "learning_rate": 4.329857953354963e-05, "loss": 0.9369, "step": 6228 }, { "epoch": 0.5565706882301695, "grad_norm": 0.48645251989364624, "learning_rate": 4.328423965963025e-05, "loss": 0.9099, "step": 6229 }, { "epoch": 0.5566600397614314, "grad_norm": 0.5652446746826172, "learning_rate": 4.326990034822736e-05, "loss": 0.8716, "step": 6230 }, { "epoch": 0.5567493912926933, "grad_norm": 0.3996671438217163, "learning_rate": 4.325556160054205e-05, "loss": 0.9745, "step": 6231 }, { "epoch": 0.5568387428239552, "grad_norm": 0.5085249543190002, "learning_rate": 4.324122341777535e-05, "loss": 1.0347, "step": 6232 }, { "epoch": 0.5569280943552171, "grad_norm": 0.3927776515483856, "learning_rate": 4.322688580112824e-05, "loss": 1.0099, "step": 6233 }, { "epoch": 0.5570174458864788, "grad_norm": 0.5127935409545898, "learning_rate": 4.321254875180163e-05, "loss": 1.0354, "step": 6234 }, { "epoch": 0.5571067974177407, "grad_norm": 0.4555366039276123, "learning_rate": 4.319821227099641e-05, "loss": 0.9839, "step": 6235 }, { "epoch": 0.5571961489490026, "grad_norm": 0.4541626274585724, "learning_rate": 4.318387635991342e-05, "loss": 0.9915, "step": 6236 }, { "epoch": 0.5572855004802645, "grad_norm": 0.4412585198879242, "learning_rate": 4.316954101975343e-05, "loss": 1.0239, "step": 6237 }, { "epoch": 0.5573748520115264, "grad_norm": 0.42731690406799316, "learning_rate": 4.31552062517172e-05, "loss": 0.9534, "step": 6238 }, { "epoch": 0.5574642035427883, "grad_norm": 0.4002136290073395, "learning_rate": 4.314087205700542e-05, "loss": 0.9607, "step": 6239 }, { "epoch": 0.5575535550740501, "grad_norm": 0.4313288629055023, "learning_rate": 4.3126538436818704e-05, "loss": 0.926, "step": 6240 }, { "epoch": 0.5576429066053119, "grad_norm": 0.4170110821723938, "learning_rate": 4.311220539235765e-05, "loss": 0.9685, "step": 6241 }, { "epoch": 0.5577322581365738, "grad_norm": 0.4707728624343872, "learning_rate": 4.3097872924822816e-05, "loss": 0.9372, "step": 6242 }, { "epoch": 0.5578216096678357, "grad_norm": 0.5320422649383545, "learning_rate": 4.308354103541471e-05, "loss": 0.99, "step": 6243 }, { "epoch": 0.5579109611990976, "grad_norm": 0.44399821758270264, "learning_rate": 4.3069209725333756e-05, "loss": 0.9685, "step": 6244 }, { "epoch": 0.5580003127303594, "grad_norm": 0.5755261182785034, "learning_rate": 4.305487899578036e-05, "loss": 0.9169, "step": 6245 }, { "epoch": 0.5580896642616213, "grad_norm": 0.4340269863605499, "learning_rate": 4.3040548847954885e-05, "loss": 0.9954, "step": 6246 }, { "epoch": 0.5581790157928832, "grad_norm": 0.4525034725666046, "learning_rate": 4.3026219283057625e-05, "loss": 0.9127, "step": 6247 }, { "epoch": 0.558268367324145, "grad_norm": 0.45277920365333557, "learning_rate": 4.301189030228883e-05, "loss": 0.9261, "step": 6248 }, { "epoch": 0.5583577188554069, "grad_norm": 0.5205567479133606, "learning_rate": 4.299756190684871e-05, "loss": 0.9047, "step": 6249 }, { "epoch": 0.5584470703866687, "grad_norm": 0.5122022032737732, "learning_rate": 4.2983234097937444e-05, "loss": 0.9429, "step": 6250 }, { "epoch": 0.5585364219179306, "grad_norm": 0.5380150675773621, "learning_rate": 4.29689068767551e-05, "loss": 0.974, "step": 6251 }, { "epoch": 0.5586257734491925, "grad_norm": 0.45227405428886414, "learning_rate": 4.295458024450174e-05, "loss": 0.9372, "step": 6252 }, { "epoch": 0.5587151249804544, "grad_norm": 0.47980716824531555, "learning_rate": 4.2940254202377395e-05, "loss": 0.9399, "step": 6253 }, { "epoch": 0.5588044765117163, "grad_norm": 0.5136576890945435, "learning_rate": 4.2925928751582e-05, "loss": 0.8619, "step": 6254 }, { "epoch": 0.558893828042978, "grad_norm": 0.4292888045310974, "learning_rate": 4.291160389331549e-05, "loss": 0.9736, "step": 6255 }, { "epoch": 0.5589831795742399, "grad_norm": 0.4159403443336487, "learning_rate": 4.28972796287777e-05, "loss": 0.9077, "step": 6256 }, { "epoch": 0.5590725311055018, "grad_norm": 0.43436843156814575, "learning_rate": 4.2882955959168454e-05, "loss": 0.965, "step": 6257 }, { "epoch": 0.5591618826367637, "grad_norm": 0.4158000349998474, "learning_rate": 4.286863288568752e-05, "loss": 1.0099, "step": 6258 }, { "epoch": 0.5592512341680256, "grad_norm": 0.4041937589645386, "learning_rate": 4.2854310409534583e-05, "loss": 0.9982, "step": 6259 }, { "epoch": 0.5593405856992875, "grad_norm": 0.5290785431861877, "learning_rate": 4.283998853190933e-05, "loss": 0.9658, "step": 6260 }, { "epoch": 0.5594299372305492, "grad_norm": 0.4550890326499939, "learning_rate": 4.2825667254011346e-05, "loss": 0.9857, "step": 6261 }, { "epoch": 0.5595192887618111, "grad_norm": 0.5154798030853271, "learning_rate": 4.281134657704022e-05, "loss": 0.8585, "step": 6262 }, { "epoch": 0.559608640293073, "grad_norm": 0.46254241466522217, "learning_rate": 4.279702650219543e-05, "loss": 0.9572, "step": 6263 }, { "epoch": 0.5596979918243349, "grad_norm": 0.41614094376564026, "learning_rate": 4.278270703067644e-05, "loss": 0.9475, "step": 6264 }, { "epoch": 0.5597873433555968, "grad_norm": 0.39605072140693665, "learning_rate": 4.276838816368267e-05, "loss": 0.984, "step": 6265 }, { "epoch": 0.5598766948868587, "grad_norm": 0.4425526261329651, "learning_rate": 4.275406990241348e-05, "loss": 1.0881, "step": 6266 }, { "epoch": 0.5599660464181205, "grad_norm": 0.43981900811195374, "learning_rate": 4.273975224806816e-05, "loss": 0.9565, "step": 6267 }, { "epoch": 0.5600553979493823, "grad_norm": 0.48525822162628174, "learning_rate": 4.272543520184599e-05, "loss": 0.8835, "step": 6268 }, { "epoch": 0.5601447494806442, "grad_norm": 0.43978825211524963, "learning_rate": 4.271111876494616e-05, "loss": 0.8794, "step": 6269 }, { "epoch": 0.5602341010119061, "grad_norm": 0.4795457124710083, "learning_rate": 4.2696802938567854e-05, "loss": 0.9744, "step": 6270 }, { "epoch": 0.560323452543168, "grad_norm": 0.46985960006713867, "learning_rate": 4.2682487723910116e-05, "loss": 0.9602, "step": 6271 }, { "epoch": 0.5604128040744298, "grad_norm": 0.4838345944881439, "learning_rate": 4.266817312217204e-05, "loss": 1.0128, "step": 6272 }, { "epoch": 0.5605021556056917, "grad_norm": 0.4208124279975891, "learning_rate": 4.2653859134552616e-05, "loss": 0.9531, "step": 6273 }, { "epoch": 0.5605915071369536, "grad_norm": 0.36559927463531494, "learning_rate": 4.263954576225079e-05, "loss": 0.9299, "step": 6274 }, { "epoch": 0.5606808586682154, "grad_norm": 0.6015887260437012, "learning_rate": 4.262523300646546e-05, "loss": 0.8584, "step": 6275 }, { "epoch": 0.5607702101994773, "grad_norm": 0.6308769583702087, "learning_rate": 4.261092086839549e-05, "loss": 0.8135, "step": 6276 }, { "epoch": 0.5608595617307391, "grad_norm": 0.46543291211128235, "learning_rate": 4.259660934923965e-05, "loss": 0.9407, "step": 6277 }, { "epoch": 0.560948913262001, "grad_norm": 0.4775448739528656, "learning_rate": 4.258229845019669e-05, "loss": 0.9693, "step": 6278 }, { "epoch": 0.5610382647932629, "grad_norm": 0.5649442672729492, "learning_rate": 4.2567988172465304e-05, "loss": 1.0061, "step": 6279 }, { "epoch": 0.5611276163245248, "grad_norm": 0.5074711441993713, "learning_rate": 4.2553678517244144e-05, "loss": 0.9523, "step": 6280 }, { "epoch": 0.5612169678557867, "grad_norm": 0.49485981464385986, "learning_rate": 4.253936948573176e-05, "loss": 0.9723, "step": 6281 }, { "epoch": 0.5613063193870484, "grad_norm": 0.41250553727149963, "learning_rate": 4.2525061079126705e-05, "loss": 0.9542, "step": 6282 }, { "epoch": 0.5613956709183103, "grad_norm": 0.42612266540527344, "learning_rate": 4.251075329862747e-05, "loss": 1.0109, "step": 6283 }, { "epoch": 0.5614850224495722, "grad_norm": 0.41712063550949097, "learning_rate": 4.249644614543247e-05, "loss": 0.9561, "step": 6284 }, { "epoch": 0.5615743739808341, "grad_norm": 0.42624518275260925, "learning_rate": 4.2482139620740084e-05, "loss": 0.9082, "step": 6285 }, { "epoch": 0.561663725512096, "grad_norm": 0.4930400848388672, "learning_rate": 4.246783372574864e-05, "loss": 0.9108, "step": 6286 }, { "epoch": 0.5617530770433579, "grad_norm": 0.46004951000213623, "learning_rate": 4.245352846165641e-05, "loss": 1.0204, "step": 6287 }, { "epoch": 0.5618424285746197, "grad_norm": 0.46289774775505066, "learning_rate": 4.243922382966162e-05, "loss": 0.9439, "step": 6288 }, { "epoch": 0.5619317801058815, "grad_norm": 0.39655157923698425, "learning_rate": 4.2424919830962414e-05, "loss": 0.9299, "step": 6289 }, { "epoch": 0.5620211316371434, "grad_norm": 0.53416508436203, "learning_rate": 4.241061646675695e-05, "loss": 1.0126, "step": 6290 }, { "epoch": 0.5621104831684053, "grad_norm": 0.5086248517036438, "learning_rate": 4.239631373824322e-05, "loss": 0.9606, "step": 6291 }, { "epoch": 0.5621998346996672, "grad_norm": 0.4238837659358978, "learning_rate": 4.2382011646619265e-05, "loss": 1.0183, "step": 6292 }, { "epoch": 0.562289186230929, "grad_norm": 0.4114820063114166, "learning_rate": 4.236771019308304e-05, "loss": 0.9271, "step": 6293 }, { "epoch": 0.5623785377621909, "grad_norm": 0.5694311261177063, "learning_rate": 4.235340937883245e-05, "loss": 0.9144, "step": 6294 }, { "epoch": 0.5624678892934528, "grad_norm": 0.41458985209465027, "learning_rate": 4.233910920506533e-05, "loss": 0.9604, "step": 6295 }, { "epoch": 0.5625572408247146, "grad_norm": 0.5260997414588928, "learning_rate": 4.232480967297947e-05, "loss": 0.9441, "step": 6296 }, { "epoch": 0.5626465923559765, "grad_norm": 0.4400598406791687, "learning_rate": 4.2310510783772605e-05, "loss": 0.9635, "step": 6297 }, { "epoch": 0.5627359438872384, "grad_norm": 0.6165459156036377, "learning_rate": 4.229621253864243e-05, "loss": 0.8765, "step": 6298 }, { "epoch": 0.5628252954185002, "grad_norm": 0.535810649394989, "learning_rate": 4.228191493878657e-05, "loss": 0.8623, "step": 6299 }, { "epoch": 0.5629146469497621, "grad_norm": 0.556769073009491, "learning_rate": 4.2267617985402625e-05, "loss": 0.9318, "step": 6300 }, { "epoch": 0.563003998481024, "grad_norm": 0.44776806235313416, "learning_rate": 4.225332167968807e-05, "loss": 0.9505, "step": 6301 }, { "epoch": 0.5630933500122859, "grad_norm": 0.5459929704666138, "learning_rate": 4.2239026022840404e-05, "loss": 0.9728, "step": 6302 }, { "epoch": 0.5631827015435477, "grad_norm": 0.4663999378681183, "learning_rate": 4.222473101605703e-05, "loss": 0.9274, "step": 6303 }, { "epoch": 0.5632720530748095, "grad_norm": 0.4985668957233429, "learning_rate": 4.221043666053531e-05, "loss": 0.9777, "step": 6304 }, { "epoch": 0.5633614046060714, "grad_norm": 0.4315873086452484, "learning_rate": 4.2196142957472554e-05, "loss": 0.9759, "step": 6305 }, { "epoch": 0.5634507561373333, "grad_norm": 0.45682254433631897, "learning_rate": 4.218184990806601e-05, "loss": 0.9806, "step": 6306 }, { "epoch": 0.5635401076685952, "grad_norm": 0.5467448234558105, "learning_rate": 4.216755751351287e-05, "loss": 0.8838, "step": 6307 }, { "epoch": 0.5636294591998571, "grad_norm": 0.4684566557407379, "learning_rate": 4.215326577501028e-05, "loss": 0.8951, "step": 6308 }, { "epoch": 0.563718810731119, "grad_norm": 0.4865800142288208, "learning_rate": 4.213897469375533e-05, "loss": 0.9647, "step": 6309 }, { "epoch": 0.5638081622623807, "grad_norm": 0.5342085361480713, "learning_rate": 4.212468427094503e-05, "loss": 0.9468, "step": 6310 }, { "epoch": 0.5638975137936426, "grad_norm": 0.38983017206192017, "learning_rate": 4.2110394507776377e-05, "loss": 1.0299, "step": 6311 }, { "epoch": 0.5639868653249045, "grad_norm": 0.42452338337898254, "learning_rate": 4.2096105405446264e-05, "loss": 0.9127, "step": 6312 }, { "epoch": 0.5640762168561664, "grad_norm": 0.5090858936309814, "learning_rate": 4.2081816965151595e-05, "loss": 0.858, "step": 6313 }, { "epoch": 0.5641655683874283, "grad_norm": 0.4234965443611145, "learning_rate": 4.206752918808914e-05, "loss": 0.9866, "step": 6314 }, { "epoch": 0.5642549199186901, "grad_norm": 0.5019885897636414, "learning_rate": 4.205324207545567e-05, "loss": 0.9501, "step": 6315 }, { "epoch": 0.564344271449952, "grad_norm": 0.5135182738304138, "learning_rate": 4.203895562844789e-05, "loss": 1.013, "step": 6316 }, { "epoch": 0.5644336229812138, "grad_norm": 0.42812684178352356, "learning_rate": 4.202466984826242e-05, "loss": 1.0301, "step": 6317 }, { "epoch": 0.5645229745124757, "grad_norm": 0.5810215473175049, "learning_rate": 4.201038473609587e-05, "loss": 0.938, "step": 6318 }, { "epoch": 0.5646123260437376, "grad_norm": 0.4419316351413727, "learning_rate": 4.199610029314476e-05, "loss": 0.9318, "step": 6319 }, { "epoch": 0.5647016775749994, "grad_norm": 0.48427048325538635, "learning_rate": 4.198181652060559e-05, "loss": 0.9813, "step": 6320 }, { "epoch": 0.5647910291062613, "grad_norm": 0.4527822732925415, "learning_rate": 4.196753341967473e-05, "loss": 0.9851, "step": 6321 }, { "epoch": 0.5648803806375232, "grad_norm": 0.45797282457351685, "learning_rate": 4.195325099154857e-05, "loss": 0.9875, "step": 6322 }, { "epoch": 0.564969732168785, "grad_norm": 0.5200628042221069, "learning_rate": 4.193896923742341e-05, "loss": 0.9328, "step": 6323 }, { "epoch": 0.5650590837000469, "grad_norm": 0.4386134147644043, "learning_rate": 4.192468815849549e-05, "loss": 0.9706, "step": 6324 }, { "epoch": 0.5651484352313088, "grad_norm": 0.4257854223251343, "learning_rate": 4.1910407755961025e-05, "loss": 0.9642, "step": 6325 }, { "epoch": 0.5652377867625706, "grad_norm": 0.42326584458351135, "learning_rate": 4.189612803101614e-05, "loss": 0.9783, "step": 6326 }, { "epoch": 0.5653271382938325, "grad_norm": 0.38882023096084595, "learning_rate": 4.188184898485691e-05, "loss": 0.9474, "step": 6327 }, { "epoch": 0.5654164898250944, "grad_norm": 0.4262528717517853, "learning_rate": 4.186757061867937e-05, "loss": 1.0289, "step": 6328 }, { "epoch": 0.5655058413563563, "grad_norm": 0.5065363049507141, "learning_rate": 4.185329293367947e-05, "loss": 0.8959, "step": 6329 }, { "epoch": 0.5655951928876181, "grad_norm": 0.4666295051574707, "learning_rate": 4.1839015931053125e-05, "loss": 0.9883, "step": 6330 }, { "epoch": 0.5656845444188799, "grad_norm": 0.4304862916469574, "learning_rate": 4.182473961199619e-05, "loss": 0.9752, "step": 6331 }, { "epoch": 0.5657738959501418, "grad_norm": 0.4546220600605011, "learning_rate": 4.1810463977704464e-05, "loss": 0.9584, "step": 6332 }, { "epoch": 0.5658632474814037, "grad_norm": 0.4094926714897156, "learning_rate": 4.179618902937365e-05, "loss": 0.957, "step": 6333 }, { "epoch": 0.5659525990126656, "grad_norm": 0.41204431653022766, "learning_rate": 4.178191476819946e-05, "loss": 1.0065, "step": 6334 }, { "epoch": 0.5660419505439275, "grad_norm": 0.5892501473426819, "learning_rate": 4.17676411953775e-05, "loss": 1.0111, "step": 6335 }, { "epoch": 0.5661313020751894, "grad_norm": 0.5121837854385376, "learning_rate": 4.175336831210335e-05, "loss": 0.8916, "step": 6336 }, { "epoch": 0.5662206536064511, "grad_norm": 0.5115182995796204, "learning_rate": 4.17390961195725e-05, "loss": 0.9181, "step": 6337 }, { "epoch": 0.566310005137713, "grad_norm": 0.4587537348270416, "learning_rate": 4.172482461898041e-05, "loss": 0.9743, "step": 6338 }, { "epoch": 0.5663993566689749, "grad_norm": 0.533208966255188, "learning_rate": 4.171055381152246e-05, "loss": 0.9295, "step": 6339 }, { "epoch": 0.5664887082002368, "grad_norm": 0.4383775591850281, "learning_rate": 4.169628369839399e-05, "loss": 0.9257, "step": 6340 }, { "epoch": 0.5665780597314987, "grad_norm": 0.572481632232666, "learning_rate": 4.1682014280790294e-05, "loss": 0.9358, "step": 6341 }, { "epoch": 0.5666674112627605, "grad_norm": 0.4642484486103058, "learning_rate": 4.166774555990654e-05, "loss": 0.9556, "step": 6342 }, { "epoch": 0.5667567627940224, "grad_norm": 0.4034402668476105, "learning_rate": 4.165347753693791e-05, "loss": 1.0468, "step": 6343 }, { "epoch": 0.5668461143252842, "grad_norm": 0.5274239778518677, "learning_rate": 4.1639210213079513e-05, "loss": 0.9687, "step": 6344 }, { "epoch": 0.5669354658565461, "grad_norm": 0.4413708746433258, "learning_rate": 4.162494358952637e-05, "loss": 0.9749, "step": 6345 }, { "epoch": 0.567024817387808, "grad_norm": 0.4011813700199127, "learning_rate": 4.161067766747349e-05, "loss": 1.012, "step": 6346 }, { "epoch": 0.5671141689190698, "grad_norm": 0.4661290943622589, "learning_rate": 4.159641244811577e-05, "loss": 0.9554, "step": 6347 }, { "epoch": 0.5672035204503317, "grad_norm": 0.40306004881858826, "learning_rate": 4.1582147932648074e-05, "loss": 0.9564, "step": 6348 }, { "epoch": 0.5672928719815936, "grad_norm": 0.4345554709434509, "learning_rate": 4.156788412226522e-05, "loss": 0.9711, "step": 6349 }, { "epoch": 0.5673822235128555, "grad_norm": 0.4540269076824188, "learning_rate": 4.155362101816196e-05, "loss": 0.91, "step": 6350 }, { "epoch": 0.5674715750441173, "grad_norm": 0.44446346163749695, "learning_rate": 4.153935862153298e-05, "loss": 0.9575, "step": 6351 }, { "epoch": 0.5675609265753792, "grad_norm": 0.5166485905647278, "learning_rate": 4.152509693357289e-05, "loss": 0.9601, "step": 6352 }, { "epoch": 0.567650278106641, "grad_norm": 0.39662766456604004, "learning_rate": 4.1510835955476256e-05, "loss": 1.0281, "step": 6353 }, { "epoch": 0.5677396296379029, "grad_norm": 0.4909988045692444, "learning_rate": 4.1496575688437605e-05, "loss": 0.9623, "step": 6354 }, { "epoch": 0.5678289811691648, "grad_norm": 0.5233615040779114, "learning_rate": 4.1482316133651375e-05, "loss": 0.9088, "step": 6355 }, { "epoch": 0.5679183327004267, "grad_norm": 0.48598557710647583, "learning_rate": 4.146805729231197e-05, "loss": 0.9457, "step": 6356 }, { "epoch": 0.5680076842316886, "grad_norm": 0.4184345304965973, "learning_rate": 4.145379916561371e-05, "loss": 1.0264, "step": 6357 }, { "epoch": 0.5680970357629503, "grad_norm": 0.48690488934516907, "learning_rate": 4.143954175475086e-05, "loss": 0.9859, "step": 6358 }, { "epoch": 0.5681863872942122, "grad_norm": 0.5679425001144409, "learning_rate": 4.142528506091764e-05, "loss": 1.0039, "step": 6359 }, { "epoch": 0.5682757388254741, "grad_norm": 0.44501417875289917, "learning_rate": 4.141102908530819e-05, "loss": 0.974, "step": 6360 }, { "epoch": 0.568365090356736, "grad_norm": 0.4163120687007904, "learning_rate": 4.139677382911663e-05, "loss": 0.8965, "step": 6361 }, { "epoch": 0.5684544418879979, "grad_norm": 0.46128639578819275, "learning_rate": 4.138251929353695e-05, "loss": 0.9975, "step": 6362 }, { "epoch": 0.5685437934192598, "grad_norm": 0.42836207151412964, "learning_rate": 4.1368265479763127e-05, "loss": 0.9683, "step": 6363 }, { "epoch": 0.5686331449505216, "grad_norm": 0.4289848208427429, "learning_rate": 4.135401238898908e-05, "loss": 0.9996, "step": 6364 }, { "epoch": 0.5687224964817834, "grad_norm": 0.5396338105201721, "learning_rate": 4.133976002240867e-05, "loss": 0.929, "step": 6365 }, { "epoch": 0.5688118480130453, "grad_norm": 0.5357837080955505, "learning_rate": 4.132550838121565e-05, "loss": 0.9172, "step": 6366 }, { "epoch": 0.5689011995443072, "grad_norm": 0.48109525442123413, "learning_rate": 4.1311257466603774e-05, "loss": 0.9564, "step": 6367 }, { "epoch": 0.5689905510755691, "grad_norm": 0.5908039212226868, "learning_rate": 4.12970072797667e-05, "loss": 0.9478, "step": 6368 }, { "epoch": 0.569079902606831, "grad_norm": 0.4044991731643677, "learning_rate": 4.128275782189803e-05, "loss": 0.9789, "step": 6369 }, { "epoch": 0.5691692541380928, "grad_norm": 0.44325581192970276, "learning_rate": 4.1268509094191315e-05, "loss": 0.9043, "step": 6370 }, { "epoch": 0.5692586056693547, "grad_norm": 0.37263184785842896, "learning_rate": 4.125426109784006e-05, "loss": 1.0072, "step": 6371 }, { "epoch": 0.5693479572006165, "grad_norm": 0.4318254888057709, "learning_rate": 4.1240013834037626e-05, "loss": 0.8824, "step": 6372 }, { "epoch": 0.5694373087318784, "grad_norm": 0.49909618496894836, "learning_rate": 4.122576730397742e-05, "loss": 0.9341, "step": 6373 }, { "epoch": 0.5695266602631402, "grad_norm": 0.6334441304206848, "learning_rate": 4.1211521508852726e-05, "loss": 0.8383, "step": 6374 }, { "epoch": 0.5696160117944021, "grad_norm": 0.5850768685340881, "learning_rate": 4.119727644985678e-05, "loss": 0.88, "step": 6375 }, { "epoch": 0.569705363325664, "grad_norm": 0.48001495003700256, "learning_rate": 4.1183032128182766e-05, "loss": 0.9366, "step": 6376 }, { "epoch": 0.5697947148569259, "grad_norm": 0.504117488861084, "learning_rate": 4.1168788545023796e-05, "loss": 0.9801, "step": 6377 }, { "epoch": 0.5698840663881878, "grad_norm": 0.4841177761554718, "learning_rate": 4.115454570157291e-05, "loss": 0.9103, "step": 6378 }, { "epoch": 0.5699734179194496, "grad_norm": 0.475758820772171, "learning_rate": 4.114030359902313e-05, "loss": 0.9221, "step": 6379 }, { "epoch": 0.5700627694507114, "grad_norm": 0.4464789927005768, "learning_rate": 4.112606223856734e-05, "loss": 1.0044, "step": 6380 }, { "epoch": 0.5701521209819733, "grad_norm": 0.4405708312988281, "learning_rate": 4.1111821621398446e-05, "loss": 0.9311, "step": 6381 }, { "epoch": 0.5702414725132352, "grad_norm": 0.49968060851097107, "learning_rate": 4.109758174870921e-05, "loss": 0.898, "step": 6382 }, { "epoch": 0.5703308240444971, "grad_norm": 0.4581187963485718, "learning_rate": 4.10833426216924e-05, "loss": 0.95, "step": 6383 }, { "epoch": 0.570420175575759, "grad_norm": 0.46304047107696533, "learning_rate": 4.1069104241540715e-05, "loss": 0.9737, "step": 6384 }, { "epoch": 0.5705095271070207, "grad_norm": 0.5021114349365234, "learning_rate": 4.105486660944672e-05, "loss": 0.9034, "step": 6385 }, { "epoch": 0.5705988786382826, "grad_norm": 0.5141803622245789, "learning_rate": 4.1040629726602996e-05, "loss": 0.9224, "step": 6386 }, { "epoch": 0.5706882301695445, "grad_norm": 0.4306526482105255, "learning_rate": 4.102639359420204e-05, "loss": 0.9798, "step": 6387 }, { "epoch": 0.5707775817008064, "grad_norm": 0.5190479755401611, "learning_rate": 4.101215821343626e-05, "loss": 0.9899, "step": 6388 }, { "epoch": 0.5708669332320683, "grad_norm": 0.5050314664840698, "learning_rate": 4.0997923585498046e-05, "loss": 0.935, "step": 6389 }, { "epoch": 0.5709562847633302, "grad_norm": 0.47217750549316406, "learning_rate": 4.098368971157968e-05, "loss": 0.9683, "step": 6390 }, { "epoch": 0.571045636294592, "grad_norm": 0.43386927247047424, "learning_rate": 4.0969456592873435e-05, "loss": 0.925, "step": 6391 }, { "epoch": 0.5711349878258538, "grad_norm": 0.577601969242096, "learning_rate": 4.095522423057143e-05, "loss": 0.9045, "step": 6392 }, { "epoch": 0.5712243393571157, "grad_norm": 0.38935279846191406, "learning_rate": 4.094099262586581e-05, "loss": 0.9945, "step": 6393 }, { "epoch": 0.5713136908883776, "grad_norm": 0.4525635838508606, "learning_rate": 4.092676177994862e-05, "loss": 1.0153, "step": 6394 }, { "epoch": 0.5714030424196395, "grad_norm": 0.4267630875110626, "learning_rate": 4.091253169401184e-05, "loss": 1.0024, "step": 6395 }, { "epoch": 0.5714923939509013, "grad_norm": 0.47713613510131836, "learning_rate": 4.0898302369247405e-05, "loss": 0.9021, "step": 6396 }, { "epoch": 0.5715817454821632, "grad_norm": 0.4627811908721924, "learning_rate": 4.088407380684715e-05, "loss": 0.9642, "step": 6397 }, { "epoch": 0.5716710970134251, "grad_norm": 0.5233446955680847, "learning_rate": 4.086984600800291e-05, "loss": 0.9229, "step": 6398 }, { "epoch": 0.5717604485446869, "grad_norm": 0.4403021037578583, "learning_rate": 4.0855618973906365e-05, "loss": 0.9546, "step": 6399 }, { "epoch": 0.5718498000759488, "grad_norm": 0.42531147599220276, "learning_rate": 4.08413927057492e-05, "loss": 1.0129, "step": 6400 }, { "epoch": 0.5719391516072106, "grad_norm": 0.46862322092056274, "learning_rate": 4.082716720472304e-05, "loss": 0.9922, "step": 6401 }, { "epoch": 0.5720285031384725, "grad_norm": 0.43281012773513794, "learning_rate": 4.08129424720194e-05, "loss": 0.9253, "step": 6402 }, { "epoch": 0.5721178546697344, "grad_norm": 0.44874048233032227, "learning_rate": 4.079871850882975e-05, "loss": 0.9576, "step": 6403 }, { "epoch": 0.5722072062009963, "grad_norm": 0.3677184283733368, "learning_rate": 4.0784495316345496e-05, "loss": 1.0069, "step": 6404 }, { "epoch": 0.5722965577322582, "grad_norm": 0.5108903050422668, "learning_rate": 4.077027289575799e-05, "loss": 0.9036, "step": 6405 }, { "epoch": 0.57238590926352, "grad_norm": 0.5099184513092041, "learning_rate": 4.0756051248258506e-05, "loss": 0.9907, "step": 6406 }, { "epoch": 0.5724752607947818, "grad_norm": 0.506807804107666, "learning_rate": 4.074183037503827e-05, "loss": 0.8992, "step": 6407 }, { "epoch": 0.5725646123260437, "grad_norm": 0.4727827310562134, "learning_rate": 4.072761027728842e-05, "loss": 0.9714, "step": 6408 }, { "epoch": 0.5726539638573056, "grad_norm": 0.41330844163894653, "learning_rate": 4.0713390956200046e-05, "loss": 0.9732, "step": 6409 }, { "epoch": 0.5727433153885675, "grad_norm": 0.5296694040298462, "learning_rate": 4.069917241296417e-05, "loss": 1.0646, "step": 6410 }, { "epoch": 0.5728326669198294, "grad_norm": 0.5255220532417297, "learning_rate": 4.068495464877177e-05, "loss": 0.967, "step": 6411 }, { "epoch": 0.5729220184510913, "grad_norm": 0.5186451077461243, "learning_rate": 4.067073766481369e-05, "loss": 0.9345, "step": 6412 }, { "epoch": 0.573011369982353, "grad_norm": 0.5088133215904236, "learning_rate": 4.0656521462280764e-05, "loss": 0.8783, "step": 6413 }, { "epoch": 0.5731007215136149, "grad_norm": 0.4002193510532379, "learning_rate": 4.064230604236376e-05, "loss": 0.975, "step": 6414 }, { "epoch": 0.5731900730448768, "grad_norm": 0.4866524040699005, "learning_rate": 4.062809140625338e-05, "loss": 0.8875, "step": 6415 }, { "epoch": 0.5732794245761387, "grad_norm": 0.45396822690963745, "learning_rate": 4.061387755514024e-05, "loss": 0.9482, "step": 6416 }, { "epoch": 0.5733687761074006, "grad_norm": 0.5568546056747437, "learning_rate": 4.05996644902149e-05, "loss": 0.9384, "step": 6417 }, { "epoch": 0.5734581276386624, "grad_norm": 0.4871857464313507, "learning_rate": 4.0585452212667864e-05, "loss": 0.9945, "step": 6418 }, { "epoch": 0.5735474791699243, "grad_norm": 0.4038003385066986, "learning_rate": 4.0571240723689546e-05, "loss": 0.9829, "step": 6419 }, { "epoch": 0.5736368307011861, "grad_norm": 0.39206719398498535, "learning_rate": 4.055703002447033e-05, "loss": 1.0198, "step": 6420 }, { "epoch": 0.573726182232448, "grad_norm": 0.37577176094055176, "learning_rate": 4.0542820116200495e-05, "loss": 0.9572, "step": 6421 }, { "epoch": 0.5738155337637099, "grad_norm": 0.3899545669555664, "learning_rate": 4.052861100007032e-05, "loss": 1.035, "step": 6422 }, { "epoch": 0.5739048852949717, "grad_norm": 0.6146190762519836, "learning_rate": 4.051440267726989e-05, "loss": 0.91, "step": 6423 }, { "epoch": 0.5739942368262336, "grad_norm": 0.4568648338317871, "learning_rate": 4.050019514898936e-05, "loss": 0.9665, "step": 6424 }, { "epoch": 0.5740835883574955, "grad_norm": 0.4437832534313202, "learning_rate": 4.048598841641874e-05, "loss": 0.9733, "step": 6425 }, { "epoch": 0.5741729398887574, "grad_norm": 0.4725315272808075, "learning_rate": 4.0471782480748e-05, "loss": 0.947, "step": 6426 }, { "epoch": 0.5742622914200192, "grad_norm": 0.5983218550682068, "learning_rate": 4.0457577343167044e-05, "loss": 0.9638, "step": 6427 }, { "epoch": 0.574351642951281, "grad_norm": 0.42528533935546875, "learning_rate": 4.04433730048657e-05, "loss": 0.9599, "step": 6428 }, { "epoch": 0.5744409944825429, "grad_norm": 0.4161036014556885, "learning_rate": 4.042916946703373e-05, "loss": 0.9122, "step": 6429 }, { "epoch": 0.5745303460138048, "grad_norm": 0.418058305978775, "learning_rate": 4.0414966730860846e-05, "loss": 0.9734, "step": 6430 }, { "epoch": 0.5746196975450667, "grad_norm": 0.5074496865272522, "learning_rate": 4.0400764797536675e-05, "loss": 0.8858, "step": 6431 }, { "epoch": 0.5747090490763286, "grad_norm": 0.5379347801208496, "learning_rate": 4.038656366825076e-05, "loss": 0.9721, "step": 6432 }, { "epoch": 0.5747984006075905, "grad_norm": 0.5281144380569458, "learning_rate": 4.037236334419261e-05, "loss": 1.0213, "step": 6433 }, { "epoch": 0.5748877521388522, "grad_norm": 0.4760182201862335, "learning_rate": 4.035816382655165e-05, "loss": 0.8549, "step": 6434 }, { "epoch": 0.5749771036701141, "grad_norm": 0.4302056133747101, "learning_rate": 4.034396511651726e-05, "loss": 0.9301, "step": 6435 }, { "epoch": 0.575066455201376, "grad_norm": 0.4959314167499542, "learning_rate": 4.032976721527869e-05, "loss": 0.889, "step": 6436 }, { "epoch": 0.5751558067326379, "grad_norm": 0.5147891044616699, "learning_rate": 4.0315570124025216e-05, "loss": 0.9727, "step": 6437 }, { "epoch": 0.5752451582638998, "grad_norm": 0.42699047923088074, "learning_rate": 4.030137384394595e-05, "loss": 1.0242, "step": 6438 }, { "epoch": 0.5753345097951617, "grad_norm": 0.5570961833000183, "learning_rate": 4.028717837623002e-05, "loss": 0.8377, "step": 6439 }, { "epoch": 0.5754238613264235, "grad_norm": 0.5209365487098694, "learning_rate": 4.0272983722066435e-05, "loss": 0.9127, "step": 6440 }, { "epoch": 0.5755132128576853, "grad_norm": 0.4663841128349304, "learning_rate": 4.0258789882644135e-05, "loss": 0.9139, "step": 6441 }, { "epoch": 0.5756025643889472, "grad_norm": 0.5318526029586792, "learning_rate": 4.024459685915204e-05, "loss": 0.9117, "step": 6442 }, { "epoch": 0.5756919159202091, "grad_norm": 0.455567866563797, "learning_rate": 4.023040465277892e-05, "loss": 0.8577, "step": 6443 }, { "epoch": 0.575781267451471, "grad_norm": 0.4229848086833954, "learning_rate": 4.0216213264713556e-05, "loss": 0.9288, "step": 6444 }, { "epoch": 0.5758706189827328, "grad_norm": 0.38377058506011963, "learning_rate": 4.020202269614461e-05, "loss": 0.9909, "step": 6445 }, { "epoch": 0.5759599705139947, "grad_norm": 0.48710566759109497, "learning_rate": 4.0187832948260705e-05, "loss": 0.9682, "step": 6446 }, { "epoch": 0.5760493220452565, "grad_norm": 0.3837445378303528, "learning_rate": 4.017364402225038e-05, "loss": 0.9965, "step": 6447 }, { "epoch": 0.5761386735765184, "grad_norm": 0.4128133952617645, "learning_rate": 4.0159455919302114e-05, "loss": 0.9991, "step": 6448 }, { "epoch": 0.5762280251077803, "grad_norm": 0.44364961981773376, "learning_rate": 4.014526864060432e-05, "loss": 1.0147, "step": 6449 }, { "epoch": 0.5763173766390421, "grad_norm": 0.47115516662597656, "learning_rate": 4.01310821873453e-05, "loss": 0.9597, "step": 6450 }, { "epoch": 0.576406728170304, "grad_norm": 0.4687146842479706, "learning_rate": 4.0116896560713346e-05, "loss": 0.9166, "step": 6451 }, { "epoch": 0.5764960797015659, "grad_norm": 0.46051499247550964, "learning_rate": 4.010271176189666e-05, "loss": 0.9735, "step": 6452 }, { "epoch": 0.5765854312328278, "grad_norm": 0.45504701137542725, "learning_rate": 4.008852779208336e-05, "loss": 0.9643, "step": 6453 }, { "epoch": 0.5766747827640896, "grad_norm": 0.5997002124786377, "learning_rate": 4.007434465246151e-05, "loss": 0.9214, "step": 6454 }, { "epoch": 0.5767641342953514, "grad_norm": 0.5700611472129822, "learning_rate": 4.006016234421908e-05, "loss": 1.0074, "step": 6455 }, { "epoch": 0.5768534858266133, "grad_norm": 0.5041797757148743, "learning_rate": 4.0045980868544014e-05, "loss": 0.9945, "step": 6456 }, { "epoch": 0.5769428373578752, "grad_norm": 0.39958614110946655, "learning_rate": 4.003180022662415e-05, "loss": 1.0317, "step": 6457 }, { "epoch": 0.5770321888891371, "grad_norm": 0.5589715242385864, "learning_rate": 4.001762041964727e-05, "loss": 0.8559, "step": 6458 }, { "epoch": 0.577121540420399, "grad_norm": 0.4533728361129761, "learning_rate": 4.000344144880108e-05, "loss": 0.9256, "step": 6459 }, { "epoch": 0.5772108919516609, "grad_norm": 0.5269187688827515, "learning_rate": 3.998926331527323e-05, "loss": 0.9339, "step": 6460 }, { "epoch": 0.5773002434829226, "grad_norm": 0.4693765938282013, "learning_rate": 3.997508602025128e-05, "loss": 0.9535, "step": 6461 }, { "epoch": 0.5773895950141845, "grad_norm": 0.42026007175445557, "learning_rate": 3.996090956492275e-05, "loss": 0.9109, "step": 6462 }, { "epoch": 0.5774789465454464, "grad_norm": 0.41388946771621704, "learning_rate": 3.994673395047505e-05, "loss": 0.9696, "step": 6463 }, { "epoch": 0.5775682980767083, "grad_norm": 0.3982580304145813, "learning_rate": 3.993255917809553e-05, "loss": 1.0228, "step": 6464 }, { "epoch": 0.5776576496079702, "grad_norm": 0.4337417483329773, "learning_rate": 3.9918385248971484e-05, "loss": 0.996, "step": 6465 }, { "epoch": 0.577747001139232, "grad_norm": 0.5209522843360901, "learning_rate": 3.990421216429014e-05, "loss": 0.8832, "step": 6466 }, { "epoch": 0.5778363526704939, "grad_norm": 0.4163426160812378, "learning_rate": 3.9890039925238645e-05, "loss": 0.9445, "step": 6467 }, { "epoch": 0.5779257042017557, "grad_norm": 0.4440252482891083, "learning_rate": 3.987586853300408e-05, "loss": 0.9476, "step": 6468 }, { "epoch": 0.5780150557330176, "grad_norm": 0.41356053948402405, "learning_rate": 3.9861697988773425e-05, "loss": 0.9586, "step": 6469 }, { "epoch": 0.5781044072642795, "grad_norm": 0.46509677171707153, "learning_rate": 3.9847528293733636e-05, "loss": 0.956, "step": 6470 }, { "epoch": 0.5781937587955414, "grad_norm": 0.4374508261680603, "learning_rate": 3.9833359449071564e-05, "loss": 0.9895, "step": 6471 }, { "epoch": 0.5782831103268032, "grad_norm": 0.4301753044128418, "learning_rate": 3.981919145597404e-05, "loss": 1.0006, "step": 6472 }, { "epoch": 0.5783724618580651, "grad_norm": 0.4043073356151581, "learning_rate": 3.9805024315627714e-05, "loss": 0.9939, "step": 6473 }, { "epoch": 0.578461813389327, "grad_norm": 0.4929652512073517, "learning_rate": 3.979085802921928e-05, "loss": 0.808, "step": 6474 }, { "epoch": 0.5785511649205888, "grad_norm": 0.3793524205684662, "learning_rate": 3.977669259793531e-05, "loss": 0.9753, "step": 6475 }, { "epoch": 0.5786405164518507, "grad_norm": 0.4306744635105133, "learning_rate": 3.9762528022962305e-05, "loss": 0.9424, "step": 6476 }, { "epoch": 0.5787298679831125, "grad_norm": 0.5743783712387085, "learning_rate": 3.9748364305486703e-05, "loss": 0.8385, "step": 6477 }, { "epoch": 0.5788192195143744, "grad_norm": 0.46976256370544434, "learning_rate": 3.9734201446694865e-05, "loss": 0.9779, "step": 6478 }, { "epoch": 0.5789085710456363, "grad_norm": 0.3878982365131378, "learning_rate": 3.972003944777308e-05, "loss": 1.0064, "step": 6479 }, { "epoch": 0.5789979225768982, "grad_norm": 0.3860369324684143, "learning_rate": 3.9705878309907565e-05, "loss": 0.9581, "step": 6480 }, { "epoch": 0.5790872741081601, "grad_norm": 0.582404375076294, "learning_rate": 3.969171803428447e-05, "loss": 0.8623, "step": 6481 }, { "epoch": 0.5791766256394218, "grad_norm": 0.5933336019515991, "learning_rate": 3.96775586220899e-05, "loss": 0.8301, "step": 6482 }, { "epoch": 0.5792659771706837, "grad_norm": 0.4246273934841156, "learning_rate": 3.9663400074509786e-05, "loss": 0.9505, "step": 6483 }, { "epoch": 0.5793553287019456, "grad_norm": 0.4642765522003174, "learning_rate": 3.964924239273011e-05, "loss": 0.9685, "step": 6484 }, { "epoch": 0.5794446802332075, "grad_norm": 0.4278883635997772, "learning_rate": 3.96350855779367e-05, "loss": 1.0149, "step": 6485 }, { "epoch": 0.5795340317644694, "grad_norm": 0.427097350358963, "learning_rate": 3.962092963131537e-05, "loss": 0.9337, "step": 6486 }, { "epoch": 0.5796233832957313, "grad_norm": 0.5278881788253784, "learning_rate": 3.9606774554051824e-05, "loss": 0.9693, "step": 6487 }, { "epoch": 0.5797127348269931, "grad_norm": 0.42778128385543823, "learning_rate": 3.959262034733168e-05, "loss": 0.984, "step": 6488 }, { "epoch": 0.5798020863582549, "grad_norm": 0.6493330597877502, "learning_rate": 3.9578467012340515e-05, "loss": 0.8847, "step": 6489 }, { "epoch": 0.5798914378895168, "grad_norm": 0.4391600787639618, "learning_rate": 3.956431455026382e-05, "loss": 1.0071, "step": 6490 }, { "epoch": 0.5799807894207787, "grad_norm": 0.4158203899860382, "learning_rate": 3.955016296228702e-05, "loss": 0.967, "step": 6491 }, { "epoch": 0.5800701409520406, "grad_norm": 0.41016045212745667, "learning_rate": 3.953601224959549e-05, "loss": 0.9616, "step": 6492 }, { "epoch": 0.5801594924833025, "grad_norm": 0.420639306306839, "learning_rate": 3.952186241337444e-05, "loss": 0.9555, "step": 6493 }, { "epoch": 0.5802488440145643, "grad_norm": 0.4078323543071747, "learning_rate": 3.9507713454809106e-05, "loss": 0.9645, "step": 6494 }, { "epoch": 0.5803381955458262, "grad_norm": 0.4655108153820038, "learning_rate": 3.949356537508461e-05, "loss": 0.8071, "step": 6495 }, { "epoch": 0.580427547077088, "grad_norm": 0.4758930504322052, "learning_rate": 3.947941817538601e-05, "loss": 0.8597, "step": 6496 }, { "epoch": 0.5805168986083499, "grad_norm": 0.41579219698905945, "learning_rate": 3.946527185689827e-05, "loss": 0.9475, "step": 6497 }, { "epoch": 0.5806062501396118, "grad_norm": 0.4709773659706116, "learning_rate": 3.9451126420806304e-05, "loss": 0.9533, "step": 6498 }, { "epoch": 0.5806956016708736, "grad_norm": 0.572155773639679, "learning_rate": 3.943698186829495e-05, "loss": 0.932, "step": 6499 }, { "epoch": 0.5807849532021355, "grad_norm": 0.4728442132472992, "learning_rate": 3.942283820054895e-05, "loss": 1.0003, "step": 6500 }, { "epoch": 0.5808743047333974, "grad_norm": 0.504787266254425, "learning_rate": 3.940869541875301e-05, "loss": 0.9066, "step": 6501 }, { "epoch": 0.5809636562646593, "grad_norm": 0.4003005921840668, "learning_rate": 3.939455352409172e-05, "loss": 0.9711, "step": 6502 }, { "epoch": 0.5810530077959211, "grad_norm": 0.4955715537071228, "learning_rate": 3.9380412517749613e-05, "loss": 0.9386, "step": 6503 }, { "epoch": 0.581142359327183, "grad_norm": 0.4974922835826874, "learning_rate": 3.9366272400911156e-05, "loss": 0.9126, "step": 6504 }, { "epoch": 0.5812317108584448, "grad_norm": 0.444976806640625, "learning_rate": 3.935213317476074e-05, "loss": 1.0139, "step": 6505 }, { "epoch": 0.5813210623897067, "grad_norm": 0.47083789110183716, "learning_rate": 3.9337994840482664e-05, "loss": 0.9698, "step": 6506 }, { "epoch": 0.5814104139209686, "grad_norm": 0.3796704411506653, "learning_rate": 3.932385739926116e-05, "loss": 0.9525, "step": 6507 }, { "epoch": 0.5814997654522305, "grad_norm": 0.4334143400192261, "learning_rate": 3.93097208522804e-05, "loss": 0.9909, "step": 6508 }, { "epoch": 0.5815891169834922, "grad_norm": 0.47649437189102173, "learning_rate": 3.929558520072447e-05, "loss": 0.9077, "step": 6509 }, { "epoch": 0.5816784685147541, "grad_norm": 0.6175134181976318, "learning_rate": 3.928145044577738e-05, "loss": 0.9106, "step": 6510 }, { "epoch": 0.581767820046016, "grad_norm": 0.4393305480480194, "learning_rate": 3.926731658862307e-05, "loss": 0.9112, "step": 6511 }, { "epoch": 0.5818571715772779, "grad_norm": 0.46963876485824585, "learning_rate": 3.9253183630445395e-05, "loss": 0.9079, "step": 6512 }, { "epoch": 0.5819465231085398, "grad_norm": 0.4847874343395233, "learning_rate": 3.923905157242817e-05, "loss": 0.9349, "step": 6513 }, { "epoch": 0.5820358746398017, "grad_norm": 0.40541452169418335, "learning_rate": 3.922492041575505e-05, "loss": 0.9665, "step": 6514 }, { "epoch": 0.5821252261710635, "grad_norm": 0.4827271103858948, "learning_rate": 3.92107901616097e-05, "loss": 0.9835, "step": 6515 }, { "epoch": 0.5822145777023253, "grad_norm": 0.5301873087882996, "learning_rate": 3.9196660811175685e-05, "loss": 0.8416, "step": 6516 }, { "epoch": 0.5823039292335872, "grad_norm": 0.557282030582428, "learning_rate": 3.918253236563648e-05, "loss": 0.8892, "step": 6517 }, { "epoch": 0.5823932807648491, "grad_norm": 0.584327220916748, "learning_rate": 3.9168404826175486e-05, "loss": 0.9241, "step": 6518 }, { "epoch": 0.582482632296111, "grad_norm": 0.42827340960502625, "learning_rate": 3.9154278193976066e-05, "loss": 0.92, "step": 6519 }, { "epoch": 0.5825719838273729, "grad_norm": 0.43465957045555115, "learning_rate": 3.914015247022144e-05, "loss": 0.9636, "step": 6520 }, { "epoch": 0.5826613353586347, "grad_norm": 0.4111988842487335, "learning_rate": 3.9126027656094806e-05, "loss": 0.9871, "step": 6521 }, { "epoch": 0.5827506868898966, "grad_norm": 0.4078396260738373, "learning_rate": 3.9111903752779263e-05, "loss": 0.9392, "step": 6522 }, { "epoch": 0.5828400384211584, "grad_norm": 0.49271008372306824, "learning_rate": 3.909778076145785e-05, "loss": 0.9823, "step": 6523 }, { "epoch": 0.5829293899524203, "grad_norm": 0.4981904625892639, "learning_rate": 3.90836586833135e-05, "loss": 0.9286, "step": 6524 }, { "epoch": 0.5830187414836822, "grad_norm": 0.5224156379699707, "learning_rate": 3.906953751952909e-05, "loss": 0.9063, "step": 6525 }, { "epoch": 0.583108093014944, "grad_norm": 0.5273329615592957, "learning_rate": 3.9055417271287426e-05, "loss": 0.999, "step": 6526 }, { "epoch": 0.5831974445462059, "grad_norm": 0.5131478905677795, "learning_rate": 3.9041297939771224e-05, "loss": 0.9669, "step": 6527 }, { "epoch": 0.5832867960774678, "grad_norm": 0.5106601119041443, "learning_rate": 3.9027179526163125e-05, "loss": 0.8704, "step": 6528 }, { "epoch": 0.5833761476087297, "grad_norm": 0.5162898302078247, "learning_rate": 3.901306203164571e-05, "loss": 0.9613, "step": 6529 }, { "epoch": 0.5834654991399915, "grad_norm": 0.42621272802352905, "learning_rate": 3.899894545740146e-05, "loss": 0.9468, "step": 6530 }, { "epoch": 0.5835548506712533, "grad_norm": 0.47468671202659607, "learning_rate": 3.898482980461279e-05, "loss": 0.9432, "step": 6531 }, { "epoch": 0.5836442022025152, "grad_norm": 0.4372991919517517, "learning_rate": 3.897071507446204e-05, "loss": 0.9773, "step": 6532 }, { "epoch": 0.5837335537337771, "grad_norm": 0.4610428810119629, "learning_rate": 3.8956601268131486e-05, "loss": 0.9766, "step": 6533 }, { "epoch": 0.583822905265039, "grad_norm": 0.4634798765182495, "learning_rate": 3.894248838680327e-05, "loss": 1.0252, "step": 6534 }, { "epoch": 0.5839122567963009, "grad_norm": 0.46993133425712585, "learning_rate": 3.8928376431659516e-05, "loss": 0.8756, "step": 6535 }, { "epoch": 0.5840016083275628, "grad_norm": 0.4452194273471832, "learning_rate": 3.891426540388224e-05, "loss": 0.9596, "step": 6536 }, { "epoch": 0.5840909598588245, "grad_norm": 0.47519785165786743, "learning_rate": 3.890015530465342e-05, "loss": 0.9268, "step": 6537 }, { "epoch": 0.5841803113900864, "grad_norm": 0.42138373851776123, "learning_rate": 3.888604613515491e-05, "loss": 0.9785, "step": 6538 }, { "epoch": 0.5842696629213483, "grad_norm": 0.429235577583313, "learning_rate": 3.887193789656849e-05, "loss": 0.9077, "step": 6539 }, { "epoch": 0.5843590144526102, "grad_norm": 0.4701876640319824, "learning_rate": 3.8857830590075895e-05, "loss": 0.9449, "step": 6540 }, { "epoch": 0.5844483659838721, "grad_norm": 0.45711401104927063, "learning_rate": 3.8843724216858745e-05, "loss": 1.0787, "step": 6541 }, { "epoch": 0.584537717515134, "grad_norm": 0.4365895390510559, "learning_rate": 3.882961877809862e-05, "loss": 0.9814, "step": 6542 }, { "epoch": 0.5846270690463958, "grad_norm": 0.5129074454307556, "learning_rate": 3.881551427497701e-05, "loss": 0.9376, "step": 6543 }, { "epoch": 0.5847164205776576, "grad_norm": 0.45288604497909546, "learning_rate": 3.880141070867527e-05, "loss": 0.9004, "step": 6544 }, { "epoch": 0.5848057721089195, "grad_norm": 0.42734140157699585, "learning_rate": 3.878730808037475e-05, "loss": 1.0315, "step": 6545 }, { "epoch": 0.5848951236401814, "grad_norm": 0.47614336013793945, "learning_rate": 3.87732063912567e-05, "loss": 1.0126, "step": 6546 }, { "epoch": 0.5849844751714433, "grad_norm": 0.475925475358963, "learning_rate": 3.875910564250229e-05, "loss": 0.9627, "step": 6547 }, { "epoch": 0.5850738267027051, "grad_norm": 0.4257443845272064, "learning_rate": 3.874500583529259e-05, "loss": 0.9535, "step": 6548 }, { "epoch": 0.585163178233967, "grad_norm": 0.4719519317150116, "learning_rate": 3.873090697080863e-05, "loss": 0.9882, "step": 6549 }, { "epoch": 0.5852525297652289, "grad_norm": 0.5359987020492554, "learning_rate": 3.871680905023133e-05, "loss": 0.9496, "step": 6550 }, { "epoch": 0.5853418812964907, "grad_norm": 0.5468908548355103, "learning_rate": 3.8702712074741534e-05, "loss": 0.9608, "step": 6551 }, { "epoch": 0.5854312328277526, "grad_norm": 0.5251081585884094, "learning_rate": 3.868861604552004e-05, "loss": 0.8878, "step": 6552 }, { "epoch": 0.5855205843590144, "grad_norm": 0.4280673861503601, "learning_rate": 3.8674520963747526e-05, "loss": 0.916, "step": 6553 }, { "epoch": 0.5856099358902763, "grad_norm": 0.4354718029499054, "learning_rate": 3.866042683060459e-05, "loss": 0.952, "step": 6554 }, { "epoch": 0.5856992874215382, "grad_norm": 0.43345123529434204, "learning_rate": 3.864633364727177e-05, "loss": 0.9935, "step": 6555 }, { "epoch": 0.5857886389528001, "grad_norm": 0.3917457163333893, "learning_rate": 3.8632241414929536e-05, "loss": 0.9337, "step": 6556 }, { "epoch": 0.585877990484062, "grad_norm": 0.45168983936309814, "learning_rate": 3.861815013475827e-05, "loss": 0.9561, "step": 6557 }, { "epoch": 0.5859673420153237, "grad_norm": 0.47507405281066895, "learning_rate": 3.860405980793823e-05, "loss": 0.9946, "step": 6558 }, { "epoch": 0.5860566935465856, "grad_norm": 0.5725569725036621, "learning_rate": 3.858997043564966e-05, "loss": 0.9315, "step": 6559 }, { "epoch": 0.5861460450778475, "grad_norm": 0.4958851635456085, "learning_rate": 3.8575882019072683e-05, "loss": 0.8849, "step": 6560 }, { "epoch": 0.5862353966091094, "grad_norm": 0.4789509177207947, "learning_rate": 3.8561794559387366e-05, "loss": 0.9942, "step": 6561 }, { "epoch": 0.5863247481403713, "grad_norm": 0.4200628697872162, "learning_rate": 3.854770805777368e-05, "loss": 0.9415, "step": 6562 }, { "epoch": 0.5864140996716332, "grad_norm": 0.4809480607509613, "learning_rate": 3.8533622515411525e-05, "loss": 0.9314, "step": 6563 }, { "epoch": 0.586503451202895, "grad_norm": 0.5276997685432434, "learning_rate": 3.85195379334807e-05, "loss": 0.9348, "step": 6564 }, { "epoch": 0.5865928027341568, "grad_norm": 0.5005677938461304, "learning_rate": 3.8505454313160935e-05, "loss": 0.8954, "step": 6565 }, { "epoch": 0.5866821542654187, "grad_norm": 0.6186678409576416, "learning_rate": 3.8491371655631897e-05, "loss": 0.9265, "step": 6566 }, { "epoch": 0.5867715057966806, "grad_norm": 0.45182543992996216, "learning_rate": 3.847728996207316e-05, "loss": 1.0398, "step": 6567 }, { "epoch": 0.5868608573279425, "grad_norm": 0.5082495212554932, "learning_rate": 3.846320923366421e-05, "loss": 0.9212, "step": 6568 }, { "epoch": 0.5869502088592043, "grad_norm": 0.4350298345088959, "learning_rate": 3.844912947158446e-05, "loss": 0.9523, "step": 6569 }, { "epoch": 0.5870395603904662, "grad_norm": 0.5526973605155945, "learning_rate": 3.843505067701324e-05, "loss": 0.8776, "step": 6570 }, { "epoch": 0.5871289119217281, "grad_norm": 0.4889020323753357, "learning_rate": 3.84209728511298e-05, "loss": 1.0305, "step": 6571 }, { "epoch": 0.5872182634529899, "grad_norm": 0.5451194643974304, "learning_rate": 3.840689599511331e-05, "loss": 0.9137, "step": 6572 }, { "epoch": 0.5873076149842518, "grad_norm": 0.44288796186447144, "learning_rate": 3.839282011014286e-05, "loss": 0.8818, "step": 6573 }, { "epoch": 0.5873969665155137, "grad_norm": 0.42640823125839233, "learning_rate": 3.837874519739744e-05, "loss": 0.9603, "step": 6574 }, { "epoch": 0.5874863180467755, "grad_norm": 0.43930867314338684, "learning_rate": 3.8364671258056e-05, "loss": 1.0335, "step": 6575 }, { "epoch": 0.5875756695780374, "grad_norm": 0.5153297185897827, "learning_rate": 3.835059829329735e-05, "loss": 1.0377, "step": 6576 }, { "epoch": 0.5876650211092993, "grad_norm": 0.47636160254478455, "learning_rate": 3.8336526304300265e-05, "loss": 0.9774, "step": 6577 }, { "epoch": 0.5877543726405611, "grad_norm": 0.44680356979370117, "learning_rate": 3.832245529224342e-05, "loss": 1.0178, "step": 6578 }, { "epoch": 0.587843724171823, "grad_norm": 0.44048798084259033, "learning_rate": 3.830838525830542e-05, "loss": 1.0009, "step": 6579 }, { "epoch": 0.5879330757030848, "grad_norm": 0.4684334993362427, "learning_rate": 3.829431620366479e-05, "loss": 0.8922, "step": 6580 }, { "epoch": 0.5880224272343467, "grad_norm": 0.5170097351074219, "learning_rate": 3.828024812949994e-05, "loss": 0.9479, "step": 6581 }, { "epoch": 0.5881117787656086, "grad_norm": 0.4309813976287842, "learning_rate": 3.826618103698924e-05, "loss": 0.9732, "step": 6582 }, { "epoch": 0.5882011302968705, "grad_norm": 0.4656025469303131, "learning_rate": 3.825211492731097e-05, "loss": 0.9387, "step": 6583 }, { "epoch": 0.5882904818281324, "grad_norm": 0.4610069990158081, "learning_rate": 3.823804980164328e-05, "loss": 1.0072, "step": 6584 }, { "epoch": 0.5883798333593941, "grad_norm": 0.49006903171539307, "learning_rate": 3.8223985661164284e-05, "loss": 0.9517, "step": 6585 }, { "epoch": 0.588469184890656, "grad_norm": 0.5057728290557861, "learning_rate": 3.820992250705202e-05, "loss": 0.9777, "step": 6586 }, { "epoch": 0.5885585364219179, "grad_norm": 0.45757678151130676, "learning_rate": 3.819586034048441e-05, "loss": 0.9111, "step": 6587 }, { "epoch": 0.5886478879531798, "grad_norm": 0.4634867012500763, "learning_rate": 3.818179916263933e-05, "loss": 0.9293, "step": 6588 }, { "epoch": 0.5887372394844417, "grad_norm": 0.42738625407218933, "learning_rate": 3.816773897469454e-05, "loss": 1.005, "step": 6589 }, { "epoch": 0.5888265910157036, "grad_norm": 0.496150404214859, "learning_rate": 3.815367977782774e-05, "loss": 0.878, "step": 6590 }, { "epoch": 0.5889159425469654, "grad_norm": 0.4651472866535187, "learning_rate": 3.813962157321653e-05, "loss": 0.991, "step": 6591 }, { "epoch": 0.5890052940782272, "grad_norm": 0.49362361431121826, "learning_rate": 3.812556436203843e-05, "loss": 0.9222, "step": 6592 }, { "epoch": 0.5890946456094891, "grad_norm": 0.4723232686519623, "learning_rate": 3.8111508145470886e-05, "loss": 0.9071, "step": 6593 }, { "epoch": 0.589183997140751, "grad_norm": 0.4556770622730255, "learning_rate": 3.809745292469128e-05, "loss": 0.9447, "step": 6594 }, { "epoch": 0.5892733486720129, "grad_norm": 0.49390918016433716, "learning_rate": 3.808339870087684e-05, "loss": 0.8855, "step": 6595 }, { "epoch": 0.5893627002032747, "grad_norm": 0.4614836275577545, "learning_rate": 3.8069345475204784e-05, "loss": 0.9299, "step": 6596 }, { "epoch": 0.5894520517345366, "grad_norm": 0.42079588770866394, "learning_rate": 3.805529324885222e-05, "loss": 0.9729, "step": 6597 }, { "epoch": 0.5895414032657985, "grad_norm": 0.4124428629875183, "learning_rate": 3.804124202299615e-05, "loss": 0.9736, "step": 6598 }, { "epoch": 0.5896307547970603, "grad_norm": 0.45447784662246704, "learning_rate": 3.8027191798813546e-05, "loss": 0.937, "step": 6599 }, { "epoch": 0.5897201063283222, "grad_norm": 0.5072098970413208, "learning_rate": 3.801314257748125e-05, "loss": 0.9483, "step": 6600 }, { "epoch": 0.589809457859584, "grad_norm": 0.42272502183914185, "learning_rate": 3.799909436017604e-05, "loss": 1.0475, "step": 6601 }, { "epoch": 0.5898988093908459, "grad_norm": 0.44008708000183105, "learning_rate": 3.7985047148074585e-05, "loss": 0.9706, "step": 6602 }, { "epoch": 0.5899881609221078, "grad_norm": 0.4524151086807251, "learning_rate": 3.797100094235351e-05, "loss": 0.9213, "step": 6603 }, { "epoch": 0.5900775124533697, "grad_norm": 0.4076539874076843, "learning_rate": 3.795695574418934e-05, "loss": 1.0978, "step": 6604 }, { "epoch": 0.5901668639846316, "grad_norm": 0.46805551648139954, "learning_rate": 3.794291155475848e-05, "loss": 0.8995, "step": 6605 }, { "epoch": 0.5902562155158934, "grad_norm": 0.4466945230960846, "learning_rate": 3.792886837523729e-05, "loss": 0.9412, "step": 6606 }, { "epoch": 0.5903455670471552, "grad_norm": 0.4632209837436676, "learning_rate": 3.7914826206802047e-05, "loss": 0.9332, "step": 6607 }, { "epoch": 0.5904349185784171, "grad_norm": 0.49044013023376465, "learning_rate": 3.790078505062894e-05, "loss": 1.0105, "step": 6608 }, { "epoch": 0.590524270109679, "grad_norm": 0.45329976081848145, "learning_rate": 3.788674490789404e-05, "loss": 1.0149, "step": 6609 }, { "epoch": 0.5906136216409409, "grad_norm": 0.45417705178260803, "learning_rate": 3.7872705779773376e-05, "loss": 0.9508, "step": 6610 }, { "epoch": 0.5907029731722028, "grad_norm": 0.4554515480995178, "learning_rate": 3.785866766744287e-05, "loss": 0.9459, "step": 6611 }, { "epoch": 0.5907923247034647, "grad_norm": 0.4931671619415283, "learning_rate": 3.784463057207836e-05, "loss": 0.9422, "step": 6612 }, { "epoch": 0.5908816762347264, "grad_norm": 0.5159240961074829, "learning_rate": 3.783059449485561e-05, "loss": 0.9961, "step": 6613 }, { "epoch": 0.5909710277659883, "grad_norm": 0.4163471758365631, "learning_rate": 3.78165594369503e-05, "loss": 1.0271, "step": 6614 }, { "epoch": 0.5910603792972502, "grad_norm": 0.4183323383331299, "learning_rate": 3.7802525399538e-05, "loss": 1.0124, "step": 6615 }, { "epoch": 0.5911497308285121, "grad_norm": 0.46458837389945984, "learning_rate": 3.77884923837942e-05, "loss": 0.952, "step": 6616 }, { "epoch": 0.591239082359774, "grad_norm": 0.3890257775783539, "learning_rate": 3.777446039089433e-05, "loss": 0.9898, "step": 6617 }, { "epoch": 0.5913284338910358, "grad_norm": 0.4664503335952759, "learning_rate": 3.776042942201372e-05, "loss": 0.9048, "step": 6618 }, { "epoch": 0.5914177854222977, "grad_norm": 0.5847722887992859, "learning_rate": 3.774639947832761e-05, "loss": 0.8766, "step": 6619 }, { "epoch": 0.5915071369535595, "grad_norm": 0.4752833843231201, "learning_rate": 3.7732370561011154e-05, "loss": 0.9276, "step": 6620 }, { "epoch": 0.5915964884848214, "grad_norm": 0.4764302670955658, "learning_rate": 3.771834267123943e-05, "loss": 0.9594, "step": 6621 }, { "epoch": 0.5916858400160833, "grad_norm": 0.40147051215171814, "learning_rate": 3.770431581018743e-05, "loss": 0.9262, "step": 6622 }, { "epoch": 0.5917751915473451, "grad_norm": 0.4563705623149872, "learning_rate": 3.769028997903003e-05, "loss": 0.9904, "step": 6623 }, { "epoch": 0.591864543078607, "grad_norm": 0.4160234034061432, "learning_rate": 3.7676265178942074e-05, "loss": 1.0111, "step": 6624 }, { "epoch": 0.5919538946098689, "grad_norm": 0.426800400018692, "learning_rate": 3.766224141109825e-05, "loss": 0.9869, "step": 6625 }, { "epoch": 0.5920432461411308, "grad_norm": 0.43789252638816833, "learning_rate": 3.764821867667323e-05, "loss": 0.9818, "step": 6626 }, { "epoch": 0.5921325976723926, "grad_norm": 0.5048993229866028, "learning_rate": 3.763419697684156e-05, "loss": 0.8763, "step": 6627 }, { "epoch": 0.5922219492036545, "grad_norm": 0.5642315745353699, "learning_rate": 3.76201763127777e-05, "loss": 0.9, "step": 6628 }, { "epoch": 0.5923113007349163, "grad_norm": 0.4938600957393646, "learning_rate": 3.7606156685656026e-05, "loss": 0.924, "step": 6629 }, { "epoch": 0.5924006522661782, "grad_norm": 0.6127682328224182, "learning_rate": 3.759213809665084e-05, "loss": 0.9511, "step": 6630 }, { "epoch": 0.5924900037974401, "grad_norm": 0.5626466870307922, "learning_rate": 3.757812054693634e-05, "loss": 0.9138, "step": 6631 }, { "epoch": 0.592579355328702, "grad_norm": 0.4270266592502594, "learning_rate": 3.756410403768667e-05, "loss": 0.9399, "step": 6632 }, { "epoch": 0.5926687068599639, "grad_norm": 0.4422089159488678, "learning_rate": 3.755008857007583e-05, "loss": 0.9363, "step": 6633 }, { "epoch": 0.5927580583912256, "grad_norm": 0.4473762512207031, "learning_rate": 3.75360741452778e-05, "loss": 0.9541, "step": 6634 }, { "epoch": 0.5928474099224875, "grad_norm": 0.44996803998947144, "learning_rate": 3.752206076446641e-05, "loss": 0.9283, "step": 6635 }, { "epoch": 0.5929367614537494, "grad_norm": 0.544922411441803, "learning_rate": 3.7508048428815416e-05, "loss": 0.893, "step": 6636 }, { "epoch": 0.5930261129850113, "grad_norm": 0.38203221559524536, "learning_rate": 3.7494037139498525e-05, "loss": 0.9555, "step": 6637 }, { "epoch": 0.5931154645162732, "grad_norm": 0.47850242257118225, "learning_rate": 3.748002689768934e-05, "loss": 0.939, "step": 6638 }, { "epoch": 0.593204816047535, "grad_norm": 0.43377867341041565, "learning_rate": 3.7466017704561345e-05, "loss": 0.9497, "step": 6639 }, { "epoch": 0.5932941675787968, "grad_norm": 0.4623333215713501, "learning_rate": 3.745200956128797e-05, "loss": 0.8625, "step": 6640 }, { "epoch": 0.5933835191100587, "grad_norm": 0.4614277482032776, "learning_rate": 3.7438002469042565e-05, "loss": 0.9202, "step": 6641 }, { "epoch": 0.5934728706413206, "grad_norm": 0.4852299988269806, "learning_rate": 3.742399642899833e-05, "loss": 0.9428, "step": 6642 }, { "epoch": 0.5935622221725825, "grad_norm": 0.46920087933540344, "learning_rate": 3.740999144232846e-05, "loss": 0.9912, "step": 6643 }, { "epoch": 0.5936515737038444, "grad_norm": 0.4522823393344879, "learning_rate": 3.739598751020601e-05, "loss": 0.9731, "step": 6644 }, { "epoch": 0.5937409252351062, "grad_norm": 0.4409060478210449, "learning_rate": 3.7381984633803955e-05, "loss": 0.9657, "step": 6645 }, { "epoch": 0.5938302767663681, "grad_norm": 0.45765984058380127, "learning_rate": 3.7367982814295174e-05, "loss": 0.9724, "step": 6646 }, { "epoch": 0.5939196282976299, "grad_norm": 0.5184260606765747, "learning_rate": 3.735398205285248e-05, "loss": 0.9308, "step": 6647 }, { "epoch": 0.5940089798288918, "grad_norm": 0.44960641860961914, "learning_rate": 3.733998235064858e-05, "loss": 0.9715, "step": 6648 }, { "epoch": 0.5940983313601537, "grad_norm": 0.416469931602478, "learning_rate": 3.732598370885612e-05, "loss": 0.9493, "step": 6649 }, { "epoch": 0.5941876828914155, "grad_norm": 0.4398280382156372, "learning_rate": 3.73119861286476e-05, "loss": 0.996, "step": 6650 }, { "epoch": 0.5942770344226774, "grad_norm": 0.4765319526195526, "learning_rate": 3.7297989611195506e-05, "loss": 0.9283, "step": 6651 }, { "epoch": 0.5943663859539393, "grad_norm": 0.4116184115409851, "learning_rate": 3.728399415767216e-05, "loss": 0.9913, "step": 6652 }, { "epoch": 0.5944557374852012, "grad_norm": 0.43389296531677246, "learning_rate": 3.7269999769249855e-05, "loss": 0.9189, "step": 6653 }, { "epoch": 0.594545089016463, "grad_norm": 0.46604517102241516, "learning_rate": 3.725600644710078e-05, "loss": 0.8957, "step": 6654 }, { "epoch": 0.5946344405477249, "grad_norm": 0.509748101234436, "learning_rate": 3.724201419239699e-05, "loss": 0.895, "step": 6655 }, { "epoch": 0.5947237920789867, "grad_norm": 0.4493167996406555, "learning_rate": 3.722802300631049e-05, "loss": 0.9533, "step": 6656 }, { "epoch": 0.5948131436102486, "grad_norm": 0.5172920823097229, "learning_rate": 3.721403289001321e-05, "loss": 0.9407, "step": 6657 }, { "epoch": 0.5949024951415105, "grad_norm": 0.45403075218200684, "learning_rate": 3.720004384467697e-05, "loss": 0.98, "step": 6658 }, { "epoch": 0.5949918466727724, "grad_norm": 0.5167436003684998, "learning_rate": 3.718605587147348e-05, "loss": 0.972, "step": 6659 }, { "epoch": 0.5950811982040343, "grad_norm": 0.4543740153312683, "learning_rate": 3.7172068971574426e-05, "loss": 0.9288, "step": 6660 }, { "epoch": 0.595170549735296, "grad_norm": 0.401083767414093, "learning_rate": 3.715808314615131e-05, "loss": 0.9727, "step": 6661 }, { "epoch": 0.5952599012665579, "grad_norm": 0.4421873986721039, "learning_rate": 3.714409839637562e-05, "loss": 0.96, "step": 6662 }, { "epoch": 0.5953492527978198, "grad_norm": 0.436882883310318, "learning_rate": 3.713011472341872e-05, "loss": 0.947, "step": 6663 }, { "epoch": 0.5954386043290817, "grad_norm": 0.5699417591094971, "learning_rate": 3.711613212845192e-05, "loss": 0.9443, "step": 6664 }, { "epoch": 0.5955279558603436, "grad_norm": 0.48340317606925964, "learning_rate": 3.7102150612646356e-05, "loss": 0.9616, "step": 6665 }, { "epoch": 0.5956173073916055, "grad_norm": 0.6746427416801453, "learning_rate": 3.708817017717317e-05, "loss": 0.8092, "step": 6666 }, { "epoch": 0.5957066589228673, "grad_norm": 0.5548288822174072, "learning_rate": 3.707419082320336e-05, "loss": 0.9948, "step": 6667 }, { "epoch": 0.5957960104541291, "grad_norm": 0.4507887363433838, "learning_rate": 3.7060212551907845e-05, "loss": 0.9182, "step": 6668 }, { "epoch": 0.595885361985391, "grad_norm": 0.43728700280189514, "learning_rate": 3.704623536445746e-05, "loss": 0.9271, "step": 6669 }, { "epoch": 0.5959747135166529, "grad_norm": 0.43481287360191345, "learning_rate": 3.7032259262022936e-05, "loss": 1.0612, "step": 6670 }, { "epoch": 0.5960640650479148, "grad_norm": 0.4465053081512451, "learning_rate": 3.7018284245774925e-05, "loss": 0.9987, "step": 6671 }, { "epoch": 0.5961534165791766, "grad_norm": 0.50724196434021, "learning_rate": 3.700431031688399e-05, "loss": 0.9271, "step": 6672 }, { "epoch": 0.5962427681104385, "grad_norm": 0.3783160150051117, "learning_rate": 3.699033747652059e-05, "loss": 0.9373, "step": 6673 }, { "epoch": 0.5963321196417004, "grad_norm": 0.4265550971031189, "learning_rate": 3.697636572585511e-05, "loss": 0.923, "step": 6674 }, { "epoch": 0.5964214711729622, "grad_norm": 0.4511755406856537, "learning_rate": 3.6962395066057806e-05, "loss": 0.9411, "step": 6675 }, { "epoch": 0.5965108227042241, "grad_norm": 0.398965984582901, "learning_rate": 3.694842549829889e-05, "loss": 1.0101, "step": 6676 }, { "epoch": 0.596600174235486, "grad_norm": 0.48364296555519104, "learning_rate": 3.693445702374846e-05, "loss": 0.9362, "step": 6677 }, { "epoch": 0.5966895257667478, "grad_norm": 0.4517565071582794, "learning_rate": 3.692048964357653e-05, "loss": 0.9031, "step": 6678 }, { "epoch": 0.5967788772980097, "grad_norm": 0.42774462699890137, "learning_rate": 3.690652335895299e-05, "loss": 0.9305, "step": 6679 }, { "epoch": 0.5968682288292716, "grad_norm": 0.5115528702735901, "learning_rate": 3.68925581710477e-05, "loss": 0.8811, "step": 6680 }, { "epoch": 0.5969575803605335, "grad_norm": 0.4454442262649536, "learning_rate": 3.687859408103037e-05, "loss": 0.9036, "step": 6681 }, { "epoch": 0.5970469318917953, "grad_norm": 0.5473392009735107, "learning_rate": 3.6864631090070655e-05, "loss": 0.8625, "step": 6682 }, { "epoch": 0.5971362834230571, "grad_norm": 0.4324687719345093, "learning_rate": 3.6850669199338096e-05, "loss": 0.9759, "step": 6683 }, { "epoch": 0.597225634954319, "grad_norm": 0.4886661767959595, "learning_rate": 3.683670841000215e-05, "loss": 0.9272, "step": 6684 }, { "epoch": 0.5973149864855809, "grad_norm": 0.4877077341079712, "learning_rate": 3.682274872323221e-05, "loss": 0.8533, "step": 6685 }, { "epoch": 0.5974043380168428, "grad_norm": 0.42951878905296326, "learning_rate": 3.680879014019751e-05, "loss": 0.9555, "step": 6686 }, { "epoch": 0.5974936895481047, "grad_norm": 0.5074982047080994, "learning_rate": 3.679483266206723e-05, "loss": 0.9338, "step": 6687 }, { "epoch": 0.5975830410793666, "grad_norm": 0.4952092170715332, "learning_rate": 3.678087629001048e-05, "loss": 1.0184, "step": 6688 }, { "epoch": 0.5976723926106283, "grad_norm": 0.45204105973243713, "learning_rate": 3.676692102519625e-05, "loss": 0.9575, "step": 6689 }, { "epoch": 0.5977617441418902, "grad_norm": 0.5234025120735168, "learning_rate": 3.675296686879343e-05, "loss": 0.9418, "step": 6690 }, { "epoch": 0.5978510956731521, "grad_norm": 0.4052625298500061, "learning_rate": 3.6739013821970846e-05, "loss": 0.949, "step": 6691 }, { "epoch": 0.597940447204414, "grad_norm": 0.5295931696891785, "learning_rate": 3.67250618858972e-05, "loss": 0.9215, "step": 6692 }, { "epoch": 0.5980297987356759, "grad_norm": 0.4193294942378998, "learning_rate": 3.671111106174113e-05, "loss": 0.9675, "step": 6693 }, { "epoch": 0.5981191502669377, "grad_norm": 0.4514252841472626, "learning_rate": 3.669716135067116e-05, "loss": 0.9491, "step": 6694 }, { "epoch": 0.5982085017981996, "grad_norm": 0.38525697588920593, "learning_rate": 3.6683212753855726e-05, "loss": 0.962, "step": 6695 }, { "epoch": 0.5982978533294614, "grad_norm": 0.49608129262924194, "learning_rate": 3.666926527246316e-05, "loss": 0.9742, "step": 6696 }, { "epoch": 0.5983872048607233, "grad_norm": 0.5682451128959656, "learning_rate": 3.6655318907661726e-05, "loss": 0.879, "step": 6697 }, { "epoch": 0.5984765563919852, "grad_norm": 0.3935823142528534, "learning_rate": 3.664137366061958e-05, "loss": 0.9591, "step": 6698 }, { "epoch": 0.598565907923247, "grad_norm": 0.4220222234725952, "learning_rate": 3.662742953250478e-05, "loss": 1.0057, "step": 6699 }, { "epoch": 0.5986552594545089, "grad_norm": 0.4496472179889679, "learning_rate": 3.6613486524485294e-05, "loss": 0.9168, "step": 6700 }, { "epoch": 0.5987446109857708, "grad_norm": 0.40350624918937683, "learning_rate": 3.6599544637729007e-05, "loss": 1.0385, "step": 6701 }, { "epoch": 0.5988339625170326, "grad_norm": 0.42951273918151855, "learning_rate": 3.65856038734037e-05, "loss": 0.9669, "step": 6702 }, { "epoch": 0.5989233140482945, "grad_norm": 0.4338366687297821, "learning_rate": 3.657166423267704e-05, "loss": 0.9676, "step": 6703 }, { "epoch": 0.5990126655795563, "grad_norm": 0.5033963918685913, "learning_rate": 3.655772571671664e-05, "loss": 0.9289, "step": 6704 }, { "epoch": 0.5991020171108182, "grad_norm": 0.48832231760025024, "learning_rate": 3.654378832669002e-05, "loss": 0.8811, "step": 6705 }, { "epoch": 0.5991913686420801, "grad_norm": 0.5550376176834106, "learning_rate": 3.6529852063764545e-05, "loss": 0.8869, "step": 6706 }, { "epoch": 0.599280720173342, "grad_norm": 0.46830353140830994, "learning_rate": 3.651591692910754e-05, "loss": 0.9608, "step": 6707 }, { "epoch": 0.5993700717046039, "grad_norm": 0.4385646879673004, "learning_rate": 3.650198292388621e-05, "loss": 1.0007, "step": 6708 }, { "epoch": 0.5994594232358657, "grad_norm": 0.5016947984695435, "learning_rate": 3.64880500492677e-05, "loss": 1.0079, "step": 6709 }, { "epoch": 0.5995487747671275, "grad_norm": 0.38246116042137146, "learning_rate": 3.647411830641903e-05, "loss": 0.9701, "step": 6710 }, { "epoch": 0.5996381262983894, "grad_norm": 0.6153517365455627, "learning_rate": 3.646018769650713e-05, "loss": 0.8733, "step": 6711 }, { "epoch": 0.5997274778296513, "grad_norm": 0.5294399857521057, "learning_rate": 3.6446258220698814e-05, "loss": 0.9023, "step": 6712 }, { "epoch": 0.5998168293609132, "grad_norm": 0.4735589623451233, "learning_rate": 3.643232988016086e-05, "loss": 0.9632, "step": 6713 }, { "epoch": 0.5999061808921751, "grad_norm": 0.4517965018749237, "learning_rate": 3.641840267605989e-05, "loss": 0.95, "step": 6714 }, { "epoch": 0.599995532423437, "grad_norm": 0.5445354580879211, "learning_rate": 3.640447660956249e-05, "loss": 0.8805, "step": 6715 }, { "epoch": 0.6000848839546987, "grad_norm": 0.4683813452720642, "learning_rate": 3.639055168183507e-05, "loss": 0.9601, "step": 6716 }, { "epoch": 0.6001742354859606, "grad_norm": 0.5692836046218872, "learning_rate": 3.637662789404402e-05, "loss": 0.9754, "step": 6717 }, { "epoch": 0.6002635870172225, "grad_norm": 0.47742897272109985, "learning_rate": 3.636270524735559e-05, "loss": 0.956, "step": 6718 }, { "epoch": 0.6003529385484844, "grad_norm": 0.5341329574584961, "learning_rate": 3.6348783742935966e-05, "loss": 0.9581, "step": 6719 }, { "epoch": 0.6004422900797463, "grad_norm": 0.45011308789253235, "learning_rate": 3.6334863381951214e-05, "loss": 0.9189, "step": 6720 }, { "epoch": 0.6005316416110081, "grad_norm": 0.4406468868255615, "learning_rate": 3.632094416556731e-05, "loss": 0.9414, "step": 6721 }, { "epoch": 0.60062099314227, "grad_norm": 0.46647706627845764, "learning_rate": 3.630702609495014e-05, "loss": 0.893, "step": 6722 }, { "epoch": 0.6007103446735318, "grad_norm": 0.3957505226135254, "learning_rate": 3.6293109171265486e-05, "loss": 0.9984, "step": 6723 }, { "epoch": 0.6007996962047937, "grad_norm": 0.5789626836776733, "learning_rate": 3.627919339567906e-05, "loss": 0.9168, "step": 6724 }, { "epoch": 0.6008890477360556, "grad_norm": 0.4705089032649994, "learning_rate": 3.626527876935645e-05, "loss": 0.9538, "step": 6725 }, { "epoch": 0.6009783992673174, "grad_norm": 0.4237965941429138, "learning_rate": 3.625136529346312e-05, "loss": 1.0059, "step": 6726 }, { "epoch": 0.6010677507985793, "grad_norm": 0.423532634973526, "learning_rate": 3.6237452969164495e-05, "loss": 0.9481, "step": 6727 }, { "epoch": 0.6011571023298412, "grad_norm": 0.5018135905265808, "learning_rate": 3.622354179762589e-05, "loss": 0.9048, "step": 6728 }, { "epoch": 0.6012464538611031, "grad_norm": 0.4970499873161316, "learning_rate": 3.620963178001251e-05, "loss": 0.8817, "step": 6729 }, { "epoch": 0.6013358053923649, "grad_norm": 0.5816099047660828, "learning_rate": 3.619572291748947e-05, "loss": 0.9101, "step": 6730 }, { "epoch": 0.6014251569236267, "grad_norm": 0.43934929370880127, "learning_rate": 3.618181521122176e-05, "loss": 0.9431, "step": 6731 }, { "epoch": 0.6015145084548886, "grad_norm": 0.503680408000946, "learning_rate": 3.616790866237433e-05, "loss": 0.9065, "step": 6732 }, { "epoch": 0.6016038599861505, "grad_norm": 0.6071944832801819, "learning_rate": 3.615400327211198e-05, "loss": 0.9336, "step": 6733 }, { "epoch": 0.6016932115174124, "grad_norm": 0.4700120687484741, "learning_rate": 3.614009904159945e-05, "loss": 0.9705, "step": 6734 }, { "epoch": 0.6017825630486743, "grad_norm": 0.4359018802642822, "learning_rate": 3.6126195972001376e-05, "loss": 0.968, "step": 6735 }, { "epoch": 0.6018719145799362, "grad_norm": 0.3945593237876892, "learning_rate": 3.6112294064482253e-05, "loss": 1.0053, "step": 6736 }, { "epoch": 0.6019612661111979, "grad_norm": 0.3968038558959961, "learning_rate": 3.6098393320206536e-05, "loss": 0.9179, "step": 6737 }, { "epoch": 0.6020506176424598, "grad_norm": 0.4622810184955597, "learning_rate": 3.608449374033856e-05, "loss": 0.9345, "step": 6738 }, { "epoch": 0.6021399691737217, "grad_norm": 0.45781803131103516, "learning_rate": 3.607059532604256e-05, "loss": 0.9804, "step": 6739 }, { "epoch": 0.6022293207049836, "grad_norm": 0.43766266107559204, "learning_rate": 3.6056698078482676e-05, "loss": 0.9481, "step": 6740 }, { "epoch": 0.6023186722362455, "grad_norm": 0.3953046202659607, "learning_rate": 3.604280199882296e-05, "loss": 0.9496, "step": 6741 }, { "epoch": 0.6024080237675074, "grad_norm": 0.5054026246070862, "learning_rate": 3.602890708822735e-05, "loss": 0.9, "step": 6742 }, { "epoch": 0.6024973752987692, "grad_norm": 0.43824490904808044, "learning_rate": 3.601501334785968e-05, "loss": 0.9556, "step": 6743 }, { "epoch": 0.602586726830031, "grad_norm": 0.4914816915988922, "learning_rate": 3.600112077888374e-05, "loss": 1.0054, "step": 6744 }, { "epoch": 0.6026760783612929, "grad_norm": 0.4403778910636902, "learning_rate": 3.598722938246314e-05, "loss": 0.9259, "step": 6745 }, { "epoch": 0.6027654298925548, "grad_norm": 0.4326126277446747, "learning_rate": 3.5973339159761435e-05, "loss": 0.9524, "step": 6746 }, { "epoch": 0.6028547814238167, "grad_norm": 0.4220033586025238, "learning_rate": 3.595945011194208e-05, "loss": 1.0042, "step": 6747 }, { "epoch": 0.6029441329550785, "grad_norm": 0.5944996476173401, "learning_rate": 3.594556224016847e-05, "loss": 0.9395, "step": 6748 }, { "epoch": 0.6030334844863404, "grad_norm": 0.4896973967552185, "learning_rate": 3.593167554560381e-05, "loss": 0.9024, "step": 6749 }, { "epoch": 0.6031228360176023, "grad_norm": 0.5966628789901733, "learning_rate": 3.591779002941128e-05, "loss": 0.9577, "step": 6750 }, { "epoch": 0.6032121875488641, "grad_norm": 0.4684157073497772, "learning_rate": 3.590390569275395e-05, "loss": 0.9349, "step": 6751 }, { "epoch": 0.603301539080126, "grad_norm": 0.5347898006439209, "learning_rate": 3.589002253679476e-05, "loss": 0.9619, "step": 6752 }, { "epoch": 0.6033908906113878, "grad_norm": 0.4841513931751251, "learning_rate": 3.5876140562696594e-05, "loss": 0.8933, "step": 6753 }, { "epoch": 0.6034802421426497, "grad_norm": 0.48916760087013245, "learning_rate": 3.58622597716222e-05, "loss": 0.9148, "step": 6754 }, { "epoch": 0.6035695936739116, "grad_norm": 0.440901517868042, "learning_rate": 3.584838016473426e-05, "loss": 0.9739, "step": 6755 }, { "epoch": 0.6036589452051735, "grad_norm": 0.39379745721817017, "learning_rate": 3.5834501743195314e-05, "loss": 1.0049, "step": 6756 }, { "epoch": 0.6037482967364354, "grad_norm": 0.6075735688209534, "learning_rate": 3.582062450816784e-05, "loss": 0.8999, "step": 6757 }, { "epoch": 0.6038376482676971, "grad_norm": 0.4395672082901001, "learning_rate": 3.580674846081421e-05, "loss": 0.894, "step": 6758 }, { "epoch": 0.603926999798959, "grad_norm": 0.5487232208251953, "learning_rate": 3.579287360229668e-05, "loss": 0.9693, "step": 6759 }, { "epoch": 0.6040163513302209, "grad_norm": 0.44997963309288025, "learning_rate": 3.5778999933777423e-05, "loss": 0.9385, "step": 6760 }, { "epoch": 0.6041057028614828, "grad_norm": 0.49523553252220154, "learning_rate": 3.5765127456418514e-05, "loss": 0.8897, "step": 6761 }, { "epoch": 0.6041950543927447, "grad_norm": 0.4439547657966614, "learning_rate": 3.57512561713819e-05, "loss": 0.9414, "step": 6762 }, { "epoch": 0.6042844059240066, "grad_norm": 0.5505682826042175, "learning_rate": 3.5737386079829484e-05, "loss": 0.8705, "step": 6763 }, { "epoch": 0.6043737574552683, "grad_norm": 0.42850804328918457, "learning_rate": 3.5723517182923e-05, "loss": 0.9114, "step": 6764 }, { "epoch": 0.6044631089865302, "grad_norm": 0.40083956718444824, "learning_rate": 3.570964948182412e-05, "loss": 0.9795, "step": 6765 }, { "epoch": 0.6045524605177921, "grad_norm": 0.4469483196735382, "learning_rate": 3.5695782977694436e-05, "loss": 1.0354, "step": 6766 }, { "epoch": 0.604641812049054, "grad_norm": 0.5799636840820312, "learning_rate": 3.56819176716954e-05, "loss": 0.8358, "step": 6767 }, { "epoch": 0.6047311635803159, "grad_norm": 0.4821333587169647, "learning_rate": 3.566805356498837e-05, "loss": 0.8875, "step": 6768 }, { "epoch": 0.6048205151115778, "grad_norm": 0.4493705928325653, "learning_rate": 3.5654190658734624e-05, "loss": 0.971, "step": 6769 }, { "epoch": 0.6049098666428396, "grad_norm": 0.4657124876976013, "learning_rate": 3.564032895409532e-05, "loss": 0.9372, "step": 6770 }, { "epoch": 0.6049992181741014, "grad_norm": 0.4851822555065155, "learning_rate": 3.562646845223153e-05, "loss": 0.9399, "step": 6771 }, { "epoch": 0.6050885697053633, "grad_norm": 0.4866061210632324, "learning_rate": 3.561260915430422e-05, "loss": 0.9057, "step": 6772 }, { "epoch": 0.6051779212366252, "grad_norm": 0.4425218403339386, "learning_rate": 3.559875106147425e-05, "loss": 0.9446, "step": 6773 }, { "epoch": 0.605267272767887, "grad_norm": 0.49788007140159607, "learning_rate": 3.5584894174902386e-05, "loss": 0.934, "step": 6774 }, { "epoch": 0.6053566242991489, "grad_norm": 0.4979552626609802, "learning_rate": 3.557103849574929e-05, "loss": 0.8783, "step": 6775 }, { "epoch": 0.6054459758304108, "grad_norm": 0.4978952407836914, "learning_rate": 3.5557184025175536e-05, "loss": 0.9696, "step": 6776 }, { "epoch": 0.6055353273616727, "grad_norm": 0.5217999219894409, "learning_rate": 3.554333076434156e-05, "loss": 0.8745, "step": 6777 }, { "epoch": 0.6056246788929345, "grad_norm": 0.4879658818244934, "learning_rate": 3.552947871440772e-05, "loss": 0.9278, "step": 6778 }, { "epoch": 0.6057140304241964, "grad_norm": 0.3954866826534271, "learning_rate": 3.551562787653429e-05, "loss": 0.943, "step": 6779 }, { "epoch": 0.6058033819554582, "grad_norm": 0.41415131092071533, "learning_rate": 3.550177825188141e-05, "loss": 0.9986, "step": 6780 }, { "epoch": 0.6058927334867201, "grad_norm": 0.4643884003162384, "learning_rate": 3.5487929841609154e-05, "loss": 0.8917, "step": 6781 }, { "epoch": 0.605982085017982, "grad_norm": 0.5743858218193054, "learning_rate": 3.5474082646877446e-05, "loss": 0.9231, "step": 6782 }, { "epoch": 0.6060714365492439, "grad_norm": 0.46073269844055176, "learning_rate": 3.546023666884616e-05, "loss": 0.9074, "step": 6783 }, { "epoch": 0.6061607880805058, "grad_norm": 0.3921644389629364, "learning_rate": 3.5446391908675033e-05, "loss": 0.9552, "step": 6784 }, { "epoch": 0.6062501396117675, "grad_norm": 0.4658121168613434, "learning_rate": 3.543254836752371e-05, "loss": 0.9736, "step": 6785 }, { "epoch": 0.6063394911430294, "grad_norm": 0.44751229882240295, "learning_rate": 3.541870604655176e-05, "loss": 1.0575, "step": 6786 }, { "epoch": 0.6064288426742913, "grad_norm": 0.534199595451355, "learning_rate": 3.5404864946918595e-05, "loss": 0.8935, "step": 6787 }, { "epoch": 0.6065181942055532, "grad_norm": 0.41165226697921753, "learning_rate": 3.539102506978356e-05, "loss": 0.9994, "step": 6788 }, { "epoch": 0.6066075457368151, "grad_norm": 0.5048467516899109, "learning_rate": 3.5377186416305884e-05, "loss": 0.9057, "step": 6789 }, { "epoch": 0.606696897268077, "grad_norm": 0.44976770877838135, "learning_rate": 3.5363348987644725e-05, "loss": 0.9591, "step": 6790 }, { "epoch": 0.6067862487993388, "grad_norm": 0.4342833161354065, "learning_rate": 3.534951278495909e-05, "loss": 0.945, "step": 6791 }, { "epoch": 0.6068756003306006, "grad_norm": 0.4819537401199341, "learning_rate": 3.533567780940794e-05, "loss": 0.9135, "step": 6792 }, { "epoch": 0.6069649518618625, "grad_norm": 0.4745539426803589, "learning_rate": 3.532184406215008e-05, "loss": 0.9931, "step": 6793 }, { "epoch": 0.6070543033931244, "grad_norm": 0.4145744740962982, "learning_rate": 3.5308011544344224e-05, "loss": 1.0892, "step": 6794 }, { "epoch": 0.6071436549243863, "grad_norm": 0.5059308409690857, "learning_rate": 3.529418025714902e-05, "loss": 0.9136, "step": 6795 }, { "epoch": 0.6072330064556481, "grad_norm": 0.4676026403903961, "learning_rate": 3.528035020172299e-05, "loss": 0.9068, "step": 6796 }, { "epoch": 0.60732235798691, "grad_norm": 0.4627551734447479, "learning_rate": 3.526652137922451e-05, "loss": 0.9546, "step": 6797 }, { "epoch": 0.6074117095181719, "grad_norm": 0.5230789184570312, "learning_rate": 3.525269379081191e-05, "loss": 0.959, "step": 6798 }, { "epoch": 0.6075010610494337, "grad_norm": 0.46442854404449463, "learning_rate": 3.52388674376434e-05, "loss": 0.9959, "step": 6799 }, { "epoch": 0.6075904125806956, "grad_norm": 0.4610006511211395, "learning_rate": 3.52250423208771e-05, "loss": 0.8957, "step": 6800 }, { "epoch": 0.6076797641119575, "grad_norm": 0.48097458481788635, "learning_rate": 3.521121844167098e-05, "loss": 0.9721, "step": 6801 }, { "epoch": 0.6077691156432193, "grad_norm": 0.539371132850647, "learning_rate": 3.5197395801182955e-05, "loss": 0.8974, "step": 6802 }, { "epoch": 0.6078584671744812, "grad_norm": 0.4365449845790863, "learning_rate": 3.518357440057081e-05, "loss": 0.9525, "step": 6803 }, { "epoch": 0.6079478187057431, "grad_norm": 0.4736482501029968, "learning_rate": 3.516975424099225e-05, "loss": 0.8908, "step": 6804 }, { "epoch": 0.608037170237005, "grad_norm": 0.4505004286766052, "learning_rate": 3.515593532360484e-05, "loss": 0.9166, "step": 6805 }, { "epoch": 0.6081265217682668, "grad_norm": 0.5180654525756836, "learning_rate": 3.5142117649566104e-05, "loss": 0.9138, "step": 6806 }, { "epoch": 0.6082158732995286, "grad_norm": 0.3982846140861511, "learning_rate": 3.5128301220033366e-05, "loss": 0.9941, "step": 6807 }, { "epoch": 0.6083052248307905, "grad_norm": 0.4852418005466461, "learning_rate": 3.511448603616392e-05, "loss": 0.9798, "step": 6808 }, { "epoch": 0.6083945763620524, "grad_norm": 0.4647953510284424, "learning_rate": 3.510067209911493e-05, "loss": 0.8897, "step": 6809 }, { "epoch": 0.6084839278933143, "grad_norm": 0.4955390691757202, "learning_rate": 3.508685941004348e-05, "loss": 0.9398, "step": 6810 }, { "epoch": 0.6085732794245762, "grad_norm": 0.45322465896606445, "learning_rate": 3.507304797010651e-05, "loss": 0.9526, "step": 6811 }, { "epoch": 0.6086626309558381, "grad_norm": 0.47306427359580994, "learning_rate": 3.505923778046088e-05, "loss": 0.9454, "step": 6812 }, { "epoch": 0.6087519824870998, "grad_norm": 0.5509787201881409, "learning_rate": 3.5045428842263344e-05, "loss": 0.87, "step": 6813 }, { "epoch": 0.6088413340183617, "grad_norm": 0.4736871123313904, "learning_rate": 3.503162115667056e-05, "loss": 0.9705, "step": 6814 }, { "epoch": 0.6089306855496236, "grad_norm": 0.6895001530647278, "learning_rate": 3.5017814724839046e-05, "loss": 0.9558, "step": 6815 }, { "epoch": 0.6090200370808855, "grad_norm": 0.47932013869285583, "learning_rate": 3.5004009547925255e-05, "loss": 0.9453, "step": 6816 }, { "epoch": 0.6091093886121474, "grad_norm": 0.49553731083869934, "learning_rate": 3.4990205627085504e-05, "loss": 0.9632, "step": 6817 }, { "epoch": 0.6091987401434092, "grad_norm": 0.49811407923698425, "learning_rate": 3.497640296347603e-05, "loss": 0.898, "step": 6818 }, { "epoch": 0.6092880916746711, "grad_norm": 0.42561087012290955, "learning_rate": 3.496260155825294e-05, "loss": 0.9824, "step": 6819 }, { "epoch": 0.6093774432059329, "grad_norm": 0.4683232009410858, "learning_rate": 3.4948801412572255e-05, "loss": 0.9188, "step": 6820 }, { "epoch": 0.6094667947371948, "grad_norm": 0.4638214409351349, "learning_rate": 3.493500252758989e-05, "loss": 0.9847, "step": 6821 }, { "epoch": 0.6095561462684567, "grad_norm": 0.47168654203414917, "learning_rate": 3.492120490446164e-05, "loss": 0.9237, "step": 6822 }, { "epoch": 0.6096454977997185, "grad_norm": 0.5087332725524902, "learning_rate": 3.490740854434321e-05, "loss": 0.9413, "step": 6823 }, { "epoch": 0.6097348493309804, "grad_norm": 0.5125972628593445, "learning_rate": 3.489361344839018e-05, "loss": 1.0025, "step": 6824 }, { "epoch": 0.6098242008622423, "grad_norm": 0.4349963068962097, "learning_rate": 3.487981961775806e-05, "loss": 0.9474, "step": 6825 }, { "epoch": 0.6099135523935042, "grad_norm": 0.5097174644470215, "learning_rate": 3.486602705360223e-05, "loss": 0.9926, "step": 6826 }, { "epoch": 0.610002903924766, "grad_norm": 0.43980687856674194, "learning_rate": 3.4852235757077934e-05, "loss": 0.9322, "step": 6827 }, { "epoch": 0.6100922554560279, "grad_norm": 0.48493608832359314, "learning_rate": 3.483844572934036e-05, "loss": 0.992, "step": 6828 }, { "epoch": 0.6101816069872897, "grad_norm": 0.43996307253837585, "learning_rate": 3.482465697154456e-05, "loss": 0.9379, "step": 6829 }, { "epoch": 0.6102709585185516, "grad_norm": 0.5646868944168091, "learning_rate": 3.48108694848455e-05, "loss": 0.9061, "step": 6830 }, { "epoch": 0.6103603100498135, "grad_norm": 0.4832056164741516, "learning_rate": 3.479708327039802e-05, "loss": 0.9026, "step": 6831 }, { "epoch": 0.6104496615810754, "grad_norm": 0.5810287594795227, "learning_rate": 3.478329832935687e-05, "loss": 0.825, "step": 6832 }, { "epoch": 0.6105390131123372, "grad_norm": 0.5271215438842773, "learning_rate": 3.47695146628767e-05, "loss": 0.9671, "step": 6833 }, { "epoch": 0.610628364643599, "grad_norm": 0.461401104927063, "learning_rate": 3.475573227211201e-05, "loss": 0.9199, "step": 6834 }, { "epoch": 0.6107177161748609, "grad_norm": 0.46049854159355164, "learning_rate": 3.474195115821723e-05, "loss": 0.9179, "step": 6835 }, { "epoch": 0.6108070677061228, "grad_norm": 0.521156370639801, "learning_rate": 3.4728171322346694e-05, "loss": 0.8643, "step": 6836 }, { "epoch": 0.6108964192373847, "grad_norm": 0.4350111782550812, "learning_rate": 3.471439276565459e-05, "loss": 0.9443, "step": 6837 }, { "epoch": 0.6109857707686466, "grad_norm": 0.4543260931968689, "learning_rate": 3.470061548929502e-05, "loss": 0.9858, "step": 6838 }, { "epoch": 0.6110751222999085, "grad_norm": 0.43811121582984924, "learning_rate": 3.4686839494421976e-05, "loss": 0.9243, "step": 6839 }, { "epoch": 0.6111644738311702, "grad_norm": 0.45619046688079834, "learning_rate": 3.4673064782189356e-05, "loss": 0.9676, "step": 6840 }, { "epoch": 0.6112538253624321, "grad_norm": 0.39327651262283325, "learning_rate": 3.4659291353750934e-05, "loss": 0.9336, "step": 6841 }, { "epoch": 0.611343176893694, "grad_norm": 0.5329780578613281, "learning_rate": 3.464551921026038e-05, "loss": 0.9395, "step": 6842 }, { "epoch": 0.6114325284249559, "grad_norm": 0.5322310924530029, "learning_rate": 3.463174835287125e-05, "loss": 0.9203, "step": 6843 }, { "epoch": 0.6115218799562178, "grad_norm": 0.5095454454421997, "learning_rate": 3.461797878273703e-05, "loss": 0.9522, "step": 6844 }, { "epoch": 0.6116112314874796, "grad_norm": 0.5680137276649475, "learning_rate": 3.460421050101103e-05, "loss": 0.9741, "step": 6845 }, { "epoch": 0.6117005830187415, "grad_norm": 0.4388297498226166, "learning_rate": 3.4590443508846536e-05, "loss": 0.9311, "step": 6846 }, { "epoch": 0.6117899345500033, "grad_norm": 0.3977706730365753, "learning_rate": 3.457667780739663e-05, "loss": 0.9636, "step": 6847 }, { "epoch": 0.6118792860812652, "grad_norm": 0.42693883180618286, "learning_rate": 3.456291339781435e-05, "loss": 0.8826, "step": 6848 }, { "epoch": 0.6119686376125271, "grad_norm": 0.4118557870388031, "learning_rate": 3.4549150281252636e-05, "loss": 0.9483, "step": 6849 }, { "epoch": 0.612057989143789, "grad_norm": 0.5658688545227051, "learning_rate": 3.453538845886427e-05, "loss": 0.909, "step": 6850 }, { "epoch": 0.6121473406750508, "grad_norm": 0.48689889907836914, "learning_rate": 3.452162793180198e-05, "loss": 1.0, "step": 6851 }, { "epoch": 0.6122366922063127, "grad_norm": 0.5138941407203674, "learning_rate": 3.4507868701218314e-05, "loss": 1.0012, "step": 6852 }, { "epoch": 0.6123260437375746, "grad_norm": 0.5919985771179199, "learning_rate": 3.4494110768265795e-05, "loss": 0.9056, "step": 6853 }, { "epoch": 0.6124153952688364, "grad_norm": 0.5183218121528625, "learning_rate": 3.448035413409677e-05, "loss": 0.9247, "step": 6854 }, { "epoch": 0.6125047468000983, "grad_norm": 0.455565869808197, "learning_rate": 3.446659879986351e-05, "loss": 1.024, "step": 6855 }, { "epoch": 0.6125940983313601, "grad_norm": 0.5125479698181152, "learning_rate": 3.445284476671818e-05, "loss": 1.0608, "step": 6856 }, { "epoch": 0.612683449862622, "grad_norm": 0.39517349004745483, "learning_rate": 3.443909203581285e-05, "loss": 0.9636, "step": 6857 }, { "epoch": 0.6127728013938839, "grad_norm": 0.5921311974525452, "learning_rate": 3.44253406082994e-05, "loss": 0.9022, "step": 6858 }, { "epoch": 0.6128621529251458, "grad_norm": 0.5381450057029724, "learning_rate": 3.441159048532969e-05, "loss": 0.9532, "step": 6859 }, { "epoch": 0.6129515044564077, "grad_norm": 0.4705429971218109, "learning_rate": 3.439784166805544e-05, "loss": 0.9147, "step": 6860 }, { "epoch": 0.6130408559876694, "grad_norm": 0.4696353077888489, "learning_rate": 3.438409415762825e-05, "loss": 0.9437, "step": 6861 }, { "epoch": 0.6131302075189313, "grad_norm": 0.5168521404266357, "learning_rate": 3.437034795519963e-05, "loss": 0.9752, "step": 6862 }, { "epoch": 0.6132195590501932, "grad_norm": 0.5304763317108154, "learning_rate": 3.435660306192098e-05, "loss": 0.9091, "step": 6863 }, { "epoch": 0.6133089105814551, "grad_norm": 0.460254043340683, "learning_rate": 3.434285947894356e-05, "loss": 0.9877, "step": 6864 }, { "epoch": 0.613398262112717, "grad_norm": 0.5005228519439697, "learning_rate": 3.432911720741855e-05, "loss": 0.9853, "step": 6865 }, { "epoch": 0.6134876136439789, "grad_norm": 0.4847668409347534, "learning_rate": 3.4315376248497025e-05, "loss": 0.9965, "step": 6866 }, { "epoch": 0.6135769651752407, "grad_norm": 0.4120919704437256, "learning_rate": 3.430163660332993e-05, "loss": 0.9123, "step": 6867 }, { "epoch": 0.6136663167065025, "grad_norm": 0.5527864694595337, "learning_rate": 3.428789827306809e-05, "loss": 0.9233, "step": 6868 }, { "epoch": 0.6137556682377644, "grad_norm": 0.5144837498664856, "learning_rate": 3.4274161258862245e-05, "loss": 0.9465, "step": 6869 }, { "epoch": 0.6138450197690263, "grad_norm": 0.4069208800792694, "learning_rate": 3.426042556186303e-05, "loss": 0.9784, "step": 6870 }, { "epoch": 0.6139343713002882, "grad_norm": 0.47290289402008057, "learning_rate": 3.424669118322094e-05, "loss": 0.9245, "step": 6871 }, { "epoch": 0.61402372283155, "grad_norm": 0.5234418511390686, "learning_rate": 3.423295812408638e-05, "loss": 0.9324, "step": 6872 }, { "epoch": 0.6141130743628119, "grad_norm": 0.42255857586860657, "learning_rate": 3.421922638560964e-05, "loss": 0.9565, "step": 6873 }, { "epoch": 0.6142024258940738, "grad_norm": 0.4056835472583771, "learning_rate": 3.4205495968940907e-05, "loss": 0.983, "step": 6874 }, { "epoch": 0.6142917774253356, "grad_norm": 0.42643529176712036, "learning_rate": 3.4191766875230234e-05, "loss": 0.906, "step": 6875 }, { "epoch": 0.6143811289565975, "grad_norm": 0.42716148495674133, "learning_rate": 3.4178039105627594e-05, "loss": 1.0189, "step": 6876 }, { "epoch": 0.6144704804878593, "grad_norm": 0.4740234315395355, "learning_rate": 3.416431266128286e-05, "loss": 0.9249, "step": 6877 }, { "epoch": 0.6145598320191212, "grad_norm": 0.41565650701522827, "learning_rate": 3.4150587543345705e-05, "loss": 0.9317, "step": 6878 }, { "epoch": 0.6146491835503831, "grad_norm": 0.46336111426353455, "learning_rate": 3.413686375296578e-05, "loss": 0.9369, "step": 6879 }, { "epoch": 0.614738535081645, "grad_norm": 0.4771583676338196, "learning_rate": 3.4123141291292615e-05, "loss": 0.9388, "step": 6880 }, { "epoch": 0.6148278866129069, "grad_norm": 0.4211154878139496, "learning_rate": 3.41094201594756e-05, "loss": 0.8495, "step": 6881 }, { "epoch": 0.6149172381441687, "grad_norm": 0.4368746280670166, "learning_rate": 3.4095700358664026e-05, "loss": 0.9002, "step": 6882 }, { "epoch": 0.6150065896754305, "grad_norm": 0.4566292464733124, "learning_rate": 3.4081981890007074e-05, "loss": 0.9938, "step": 6883 }, { "epoch": 0.6150959412066924, "grad_norm": 0.43355435132980347, "learning_rate": 3.406826475465382e-05, "loss": 0.9271, "step": 6884 }, { "epoch": 0.6151852927379543, "grad_norm": 0.502873420715332, "learning_rate": 3.4054548953753205e-05, "loss": 0.9655, "step": 6885 }, { "epoch": 0.6152746442692162, "grad_norm": 0.5166613459587097, "learning_rate": 3.4040834488454086e-05, "loss": 0.8589, "step": 6886 }, { "epoch": 0.6153639958004781, "grad_norm": 0.47696536779403687, "learning_rate": 3.40271213599052e-05, "loss": 0.8985, "step": 6887 }, { "epoch": 0.61545334733174, "grad_norm": 0.47705844044685364, "learning_rate": 3.401340956925515e-05, "loss": 0.9217, "step": 6888 }, { "epoch": 0.6155426988630017, "grad_norm": 0.5489514470100403, "learning_rate": 3.399969911765246e-05, "loss": 0.9599, "step": 6889 }, { "epoch": 0.6156320503942636, "grad_norm": 0.47389769554138184, "learning_rate": 3.398599000624551e-05, "loss": 0.9344, "step": 6890 }, { "epoch": 0.6157214019255255, "grad_norm": 0.41707125306129456, "learning_rate": 3.39722822361826e-05, "loss": 1.0065, "step": 6891 }, { "epoch": 0.6158107534567874, "grad_norm": 0.4215580224990845, "learning_rate": 3.395857580861188e-05, "loss": 0.9582, "step": 6892 }, { "epoch": 0.6159001049880493, "grad_norm": 0.5935801863670349, "learning_rate": 3.394487072468144e-05, "loss": 0.8802, "step": 6893 }, { "epoch": 0.6159894565193111, "grad_norm": 0.40050363540649414, "learning_rate": 3.393116698553921e-05, "loss": 0.9529, "step": 6894 }, { "epoch": 0.6160788080505729, "grad_norm": 0.41118553280830383, "learning_rate": 3.391746459233302e-05, "loss": 0.9639, "step": 6895 }, { "epoch": 0.6161681595818348, "grad_norm": 0.47675037384033203, "learning_rate": 3.39037635462106e-05, "loss": 0.9067, "step": 6896 }, { "epoch": 0.6162575111130967, "grad_norm": 0.5176188945770264, "learning_rate": 3.3890063848319585e-05, "loss": 0.8955, "step": 6897 }, { "epoch": 0.6163468626443586, "grad_norm": 0.4446241557598114, "learning_rate": 3.3876365499807414e-05, "loss": 0.9455, "step": 6898 }, { "epoch": 0.6164362141756204, "grad_norm": 0.42455655336380005, "learning_rate": 3.38626685018215e-05, "loss": 0.8833, "step": 6899 }, { "epoch": 0.6165255657068823, "grad_norm": 0.4535345733165741, "learning_rate": 3.384897285550911e-05, "loss": 0.9354, "step": 6900 }, { "epoch": 0.6166149172381442, "grad_norm": 0.3765280544757843, "learning_rate": 3.38352785620174e-05, "loss": 0.9248, "step": 6901 }, { "epoch": 0.616704268769406, "grad_norm": 0.42387163639068604, "learning_rate": 3.382158562249342e-05, "loss": 0.945, "step": 6902 }, { "epoch": 0.6167936203006679, "grad_norm": 0.4898373484611511, "learning_rate": 3.38078940380841e-05, "loss": 0.9428, "step": 6903 }, { "epoch": 0.6168829718319297, "grad_norm": 0.5467028021812439, "learning_rate": 3.3794203809936235e-05, "loss": 0.891, "step": 6904 }, { "epoch": 0.6169723233631916, "grad_norm": 0.4893369972705841, "learning_rate": 3.3780514939196554e-05, "loss": 0.9611, "step": 6905 }, { "epoch": 0.6170616748944535, "grad_norm": 0.6234750747680664, "learning_rate": 3.376682742701161e-05, "loss": 0.946, "step": 6906 }, { "epoch": 0.6171510264257154, "grad_norm": 0.49757829308509827, "learning_rate": 3.375314127452795e-05, "loss": 0.8848, "step": 6907 }, { "epoch": 0.6172403779569773, "grad_norm": 0.49189379811286926, "learning_rate": 3.3739456482891854e-05, "loss": 0.901, "step": 6908 }, { "epoch": 0.617329729488239, "grad_norm": 0.49801400303840637, "learning_rate": 3.37257730532496e-05, "loss": 1.0374, "step": 6909 }, { "epoch": 0.6174190810195009, "grad_norm": 0.5089427828788757, "learning_rate": 3.371209098674734e-05, "loss": 0.9399, "step": 6910 }, { "epoch": 0.6175084325507628, "grad_norm": 0.43401315808296204, "learning_rate": 3.3698410284531055e-05, "loss": 0.9137, "step": 6911 }, { "epoch": 0.6175977840820247, "grad_norm": 0.43453726172447205, "learning_rate": 3.368473094774668e-05, "loss": 0.9569, "step": 6912 }, { "epoch": 0.6176871356132866, "grad_norm": 0.5029697418212891, "learning_rate": 3.3671052977539995e-05, "loss": 0.9513, "step": 6913 }, { "epoch": 0.6177764871445485, "grad_norm": 0.3645218312740326, "learning_rate": 3.3657376375056684e-05, "loss": 1.0016, "step": 6914 }, { "epoch": 0.6178658386758104, "grad_norm": 0.5805116891860962, "learning_rate": 3.36437011414423e-05, "loss": 1.0221, "step": 6915 }, { "epoch": 0.6179551902070721, "grad_norm": 0.4427826702594757, "learning_rate": 3.363002727784228e-05, "loss": 0.9114, "step": 6916 }, { "epoch": 0.618044541738334, "grad_norm": 0.41726526618003845, "learning_rate": 3.3616354785401996e-05, "loss": 1.0393, "step": 6917 }, { "epoch": 0.6181338932695959, "grad_norm": 0.6137450337409973, "learning_rate": 3.360268366526662e-05, "loss": 0.9014, "step": 6918 }, { "epoch": 0.6182232448008578, "grad_norm": 0.4480533301830292, "learning_rate": 3.358901391858126e-05, "loss": 0.9186, "step": 6919 }, { "epoch": 0.6183125963321197, "grad_norm": 0.4977112114429474, "learning_rate": 3.357534554649092e-05, "loss": 0.8681, "step": 6920 }, { "epoch": 0.6184019478633815, "grad_norm": 0.44784173369407654, "learning_rate": 3.3561678550140466e-05, "loss": 0.9717, "step": 6921 }, { "epoch": 0.6184912993946434, "grad_norm": 0.5142949223518372, "learning_rate": 3.3548012930674656e-05, "loss": 0.924, "step": 6922 }, { "epoch": 0.6185806509259052, "grad_norm": 0.4716286361217499, "learning_rate": 3.3534348689238115e-05, "loss": 1.0101, "step": 6923 }, { "epoch": 0.6186700024571671, "grad_norm": 0.4497227668762207, "learning_rate": 3.352068582697539e-05, "loss": 1.0712, "step": 6924 }, { "epoch": 0.618759353988429, "grad_norm": 0.4125528633594513, "learning_rate": 3.3507024345030884e-05, "loss": 0.9306, "step": 6925 }, { "epoch": 0.6188487055196908, "grad_norm": 0.47352614998817444, "learning_rate": 3.349336424454889e-05, "loss": 0.9388, "step": 6926 }, { "epoch": 0.6189380570509527, "grad_norm": 0.4631035029888153, "learning_rate": 3.347970552667361e-05, "loss": 0.9669, "step": 6927 }, { "epoch": 0.6190274085822146, "grad_norm": 0.42280247807502747, "learning_rate": 3.346604819254907e-05, "loss": 0.9841, "step": 6928 }, { "epoch": 0.6191167601134765, "grad_norm": 0.4958139955997467, "learning_rate": 3.3452392243319216e-05, "loss": 1.0159, "step": 6929 }, { "epoch": 0.6192061116447383, "grad_norm": 0.4598698019981384, "learning_rate": 3.343873768012792e-05, "loss": 0.9439, "step": 6930 }, { "epoch": 0.6192954631760001, "grad_norm": 0.4202096462249756, "learning_rate": 3.342508450411886e-05, "loss": 1.0049, "step": 6931 }, { "epoch": 0.619384814707262, "grad_norm": 0.43892961740493774, "learning_rate": 3.341143271643565e-05, "loss": 0.8916, "step": 6932 }, { "epoch": 0.6194741662385239, "grad_norm": 0.4676835834980011, "learning_rate": 3.339778231822177e-05, "loss": 0.9904, "step": 6933 }, { "epoch": 0.6195635177697858, "grad_norm": 0.4813263416290283, "learning_rate": 3.338413331062059e-05, "loss": 0.91, "step": 6934 }, { "epoch": 0.6196528693010477, "grad_norm": 0.442055344581604, "learning_rate": 3.3370485694775354e-05, "loss": 0.9984, "step": 6935 }, { "epoch": 0.6197422208323096, "grad_norm": 0.5169895887374878, "learning_rate": 3.335683947182921e-05, "loss": 0.9339, "step": 6936 }, { "epoch": 0.6198315723635713, "grad_norm": 0.4888537526130676, "learning_rate": 3.3343194642925166e-05, "loss": 0.9264, "step": 6937 }, { "epoch": 0.6199209238948332, "grad_norm": 0.5011920928955078, "learning_rate": 3.332955120920612e-05, "loss": 0.9406, "step": 6938 }, { "epoch": 0.6200102754260951, "grad_norm": 0.4616535007953644, "learning_rate": 3.331590917181484e-05, "loss": 0.945, "step": 6939 }, { "epoch": 0.620099626957357, "grad_norm": 0.5329020619392395, "learning_rate": 3.330226853189402e-05, "loss": 0.8971, "step": 6940 }, { "epoch": 0.6201889784886189, "grad_norm": 0.4462106227874756, "learning_rate": 3.328862929058619e-05, "loss": 0.9299, "step": 6941 }, { "epoch": 0.6202783300198808, "grad_norm": 0.6361902952194214, "learning_rate": 3.327499144903378e-05, "loss": 0.9665, "step": 6942 }, { "epoch": 0.6203676815511426, "grad_norm": 0.4172038733959198, "learning_rate": 3.326135500837911e-05, "loss": 0.9784, "step": 6943 }, { "epoch": 0.6204570330824044, "grad_norm": 0.4322963058948517, "learning_rate": 3.324771996976439e-05, "loss": 0.9376, "step": 6944 }, { "epoch": 0.6205463846136663, "grad_norm": 0.4417525827884674, "learning_rate": 3.323408633433168e-05, "loss": 0.923, "step": 6945 }, { "epoch": 0.6206357361449282, "grad_norm": 0.5419497489929199, "learning_rate": 3.322045410322296e-05, "loss": 0.7987, "step": 6946 }, { "epoch": 0.6207250876761901, "grad_norm": 0.5265266299247742, "learning_rate": 3.3206823277580054e-05, "loss": 0.9383, "step": 6947 }, { "epoch": 0.6208144392074519, "grad_norm": 0.3826887905597687, "learning_rate": 3.3193193858544735e-05, "loss": 1.0365, "step": 6948 }, { "epoch": 0.6209037907387138, "grad_norm": 0.44361376762390137, "learning_rate": 3.317956584725855e-05, "loss": 0.9833, "step": 6949 }, { "epoch": 0.6209931422699757, "grad_norm": 0.4837126135826111, "learning_rate": 3.316593924486302e-05, "loss": 0.8984, "step": 6950 }, { "epoch": 0.6210824938012375, "grad_norm": 0.4288838505744934, "learning_rate": 3.315231405249951e-05, "loss": 0.9326, "step": 6951 }, { "epoch": 0.6211718453324994, "grad_norm": 0.5369590520858765, "learning_rate": 3.313869027130929e-05, "loss": 0.9524, "step": 6952 }, { "epoch": 0.6212611968637612, "grad_norm": 0.5428600907325745, "learning_rate": 3.3125067902433485e-05, "loss": 0.9537, "step": 6953 }, { "epoch": 0.6213505483950231, "grad_norm": 0.43962809443473816, "learning_rate": 3.311144694701313e-05, "loss": 0.9584, "step": 6954 }, { "epoch": 0.621439899926285, "grad_norm": 0.40536513924598694, "learning_rate": 3.3097827406189094e-05, "loss": 0.96, "step": 6955 }, { "epoch": 0.6215292514575469, "grad_norm": 0.4061243236064911, "learning_rate": 3.3084209281102184e-05, "loss": 0.9844, "step": 6956 }, { "epoch": 0.6216186029888087, "grad_norm": 0.44544127583503723, "learning_rate": 3.307059257289306e-05, "loss": 0.9206, "step": 6957 }, { "epoch": 0.6217079545200705, "grad_norm": 0.4827932119369507, "learning_rate": 3.305697728270226e-05, "loss": 0.9254, "step": 6958 }, { "epoch": 0.6217973060513324, "grad_norm": 0.5052177906036377, "learning_rate": 3.3043363411670225e-05, "loss": 0.9132, "step": 6959 }, { "epoch": 0.6218866575825943, "grad_norm": 0.5290608406066895, "learning_rate": 3.302975096093723e-05, "loss": 0.9137, "step": 6960 }, { "epoch": 0.6219760091138562, "grad_norm": 0.6665788292884827, "learning_rate": 3.3016139931643486e-05, "loss": 0.8932, "step": 6961 }, { "epoch": 0.6220653606451181, "grad_norm": 0.44664615392684937, "learning_rate": 3.300253032492906e-05, "loss": 0.9808, "step": 6962 }, { "epoch": 0.62215471217638, "grad_norm": 0.44915464520454407, "learning_rate": 3.29889221419339e-05, "loss": 0.9504, "step": 6963 }, { "epoch": 0.6222440637076417, "grad_norm": 0.5096574425697327, "learning_rate": 3.297531538379782e-05, "loss": 0.9388, "step": 6964 }, { "epoch": 0.6223334152389036, "grad_norm": 0.5032405853271484, "learning_rate": 3.296171005166057e-05, "loss": 0.8991, "step": 6965 }, { "epoch": 0.6224227667701655, "grad_norm": 0.43075719475746155, "learning_rate": 3.29481061466617e-05, "loss": 0.9858, "step": 6966 }, { "epoch": 0.6225121183014274, "grad_norm": 0.5037357211112976, "learning_rate": 3.293450366994071e-05, "loss": 0.8802, "step": 6967 }, { "epoch": 0.6226014698326893, "grad_norm": 0.3918449878692627, "learning_rate": 3.292090262263696e-05, "loss": 1.063, "step": 6968 }, { "epoch": 0.6226908213639512, "grad_norm": 0.5222644209861755, "learning_rate": 3.290730300588965e-05, "loss": 0.8485, "step": 6969 }, { "epoch": 0.622780172895213, "grad_norm": 0.46805500984191895, "learning_rate": 3.28937048208379e-05, "loss": 0.9382, "step": 6970 }, { "epoch": 0.6228695244264748, "grad_norm": 0.5328028798103333, "learning_rate": 3.288010806862071e-05, "loss": 0.8772, "step": 6971 }, { "epoch": 0.6229588759577367, "grad_norm": 0.45489615201950073, "learning_rate": 3.286651275037697e-05, "loss": 0.9407, "step": 6972 }, { "epoch": 0.6230482274889986, "grad_norm": 0.4729410409927368, "learning_rate": 3.285291886724541e-05, "loss": 0.9488, "step": 6973 }, { "epoch": 0.6231375790202605, "grad_norm": 0.5226532816886902, "learning_rate": 3.2839326420364664e-05, "loss": 0.9016, "step": 6974 }, { "epoch": 0.6232269305515223, "grad_norm": 0.5036323070526123, "learning_rate": 3.282573541087325e-05, "loss": 0.89, "step": 6975 }, { "epoch": 0.6233162820827842, "grad_norm": 0.42593827843666077, "learning_rate": 3.281214583990956e-05, "loss": 1.067, "step": 6976 }, { "epoch": 0.6234056336140461, "grad_norm": 0.46937912702560425, "learning_rate": 3.2798557708611864e-05, "loss": 0.9958, "step": 6977 }, { "epoch": 0.6234949851453079, "grad_norm": 0.4938536286354065, "learning_rate": 3.2784971018118346e-05, "loss": 0.9676, "step": 6978 }, { "epoch": 0.6235843366765698, "grad_norm": 0.5184669494628906, "learning_rate": 3.2771385769566975e-05, "loss": 0.9144, "step": 6979 }, { "epoch": 0.6236736882078316, "grad_norm": 0.46924924850463867, "learning_rate": 3.275780196409569e-05, "loss": 0.9918, "step": 6980 }, { "epoch": 0.6237630397390935, "grad_norm": 0.4476311504840851, "learning_rate": 3.2744219602842276e-05, "loss": 0.9499, "step": 6981 }, { "epoch": 0.6238523912703554, "grad_norm": 0.4355534315109253, "learning_rate": 3.27306386869444e-05, "loss": 0.912, "step": 6982 }, { "epoch": 0.6239417428016173, "grad_norm": 0.532875120639801, "learning_rate": 3.271705921753962e-05, "loss": 0.9276, "step": 6983 }, { "epoch": 0.6240310943328792, "grad_norm": 0.4715759754180908, "learning_rate": 3.270348119576536e-05, "loss": 0.9415, "step": 6984 }, { "epoch": 0.624120445864141, "grad_norm": 0.5215761065483093, "learning_rate": 3.26899046227589e-05, "loss": 0.9365, "step": 6985 }, { "epoch": 0.6242097973954028, "grad_norm": 0.5269849896430969, "learning_rate": 3.2676329499657455e-05, "loss": 0.8021, "step": 6986 }, { "epoch": 0.6242991489266647, "grad_norm": 0.43522122502326965, "learning_rate": 3.266275582759808e-05, "loss": 0.9598, "step": 6987 }, { "epoch": 0.6243885004579266, "grad_norm": 0.45015889406204224, "learning_rate": 3.2649183607717706e-05, "loss": 0.9508, "step": 6988 }, { "epoch": 0.6244778519891885, "grad_norm": 0.4154300093650818, "learning_rate": 3.263561284115313e-05, "loss": 0.9569, "step": 6989 }, { "epoch": 0.6245672035204504, "grad_norm": 0.39608290791511536, "learning_rate": 3.262204352904108e-05, "loss": 0.9678, "step": 6990 }, { "epoch": 0.6246565550517122, "grad_norm": 0.4975006878376007, "learning_rate": 3.2608475672518115e-05, "loss": 0.9861, "step": 6991 }, { "epoch": 0.624745906582974, "grad_norm": 0.507334291934967, "learning_rate": 3.259490927272071e-05, "loss": 0.8913, "step": 6992 }, { "epoch": 0.6248352581142359, "grad_norm": 0.46734029054641724, "learning_rate": 3.2581344330785156e-05, "loss": 0.9106, "step": 6993 }, { "epoch": 0.6249246096454978, "grad_norm": 0.45459601283073425, "learning_rate": 3.2567780847847693e-05, "loss": 0.9296, "step": 6994 }, { "epoch": 0.6250139611767597, "grad_norm": 0.45823296904563904, "learning_rate": 3.25542188250444e-05, "loss": 0.9551, "step": 6995 }, { "epoch": 0.6251033127080216, "grad_norm": 0.4765163064002991, "learning_rate": 3.2540658263511235e-05, "loss": 0.9167, "step": 6996 }, { "epoch": 0.6251926642392834, "grad_norm": 0.4322914183139801, "learning_rate": 3.252709916438404e-05, "loss": 0.9762, "step": 6997 }, { "epoch": 0.6252820157705453, "grad_norm": 0.4407638907432556, "learning_rate": 3.251354152879856e-05, "loss": 0.9413, "step": 6998 }, { "epoch": 0.6253713673018071, "grad_norm": 0.44821932911872864, "learning_rate": 3.2499985357890356e-05, "loss": 0.9412, "step": 6999 }, { "epoch": 0.625460718833069, "grad_norm": 0.5897961854934692, "learning_rate": 3.2486430652794906e-05, "loss": 1.0047, "step": 7000 }, { "epoch": 0.6255500703643309, "grad_norm": 0.37673619389533997, "learning_rate": 3.247287741464758e-05, "loss": 1.0576, "step": 7001 }, { "epoch": 0.6256394218955927, "grad_norm": 0.43584850430488586, "learning_rate": 3.245932564458359e-05, "loss": 0.9994, "step": 7002 }, { "epoch": 0.6257287734268546, "grad_norm": 0.4906632602214813, "learning_rate": 3.244577534373805e-05, "loss": 0.9025, "step": 7003 }, { "epoch": 0.6258181249581165, "grad_norm": 0.4197581112384796, "learning_rate": 3.2432226513245935e-05, "loss": 0.9456, "step": 7004 }, { "epoch": 0.6259074764893784, "grad_norm": 0.4052625000476837, "learning_rate": 3.241867915424211e-05, "loss": 0.9911, "step": 7005 }, { "epoch": 0.6259968280206402, "grad_norm": 0.46664437651634216, "learning_rate": 3.240513326786132e-05, "loss": 0.9425, "step": 7006 }, { "epoch": 0.626086179551902, "grad_norm": 0.5681549310684204, "learning_rate": 3.239158885523815e-05, "loss": 0.9522, "step": 7007 }, { "epoch": 0.6261755310831639, "grad_norm": 0.4695439040660858, "learning_rate": 3.237804591750713e-05, "loss": 1.0061, "step": 7008 }, { "epoch": 0.6262648826144258, "grad_norm": 0.4879624545574188, "learning_rate": 3.236450445580258e-05, "loss": 1.0038, "step": 7009 }, { "epoch": 0.6263542341456877, "grad_norm": 0.4483213424682617, "learning_rate": 3.2350964471258785e-05, "loss": 0.9624, "step": 7010 }, { "epoch": 0.6264435856769496, "grad_norm": 0.5487602949142456, "learning_rate": 3.233742596500982e-05, "loss": 0.8609, "step": 7011 }, { "epoch": 0.6265329372082115, "grad_norm": 0.6317974925041199, "learning_rate": 3.2323888938189696e-05, "loss": 0.8496, "step": 7012 }, { "epoch": 0.6266222887394732, "grad_norm": 0.5183166861534119, "learning_rate": 3.231035339193229e-05, "loss": 0.9314, "step": 7013 }, { "epoch": 0.6267116402707351, "grad_norm": 0.4140419363975525, "learning_rate": 3.2296819327371354e-05, "loss": 1.0071, "step": 7014 }, { "epoch": 0.626800991801997, "grad_norm": 0.4753478467464447, "learning_rate": 3.228328674564049e-05, "loss": 0.8971, "step": 7015 }, { "epoch": 0.6268903433332589, "grad_norm": 0.4986949861049652, "learning_rate": 3.226975564787322e-05, "loss": 0.9599, "step": 7016 }, { "epoch": 0.6269796948645208, "grad_norm": 0.4630643427371979, "learning_rate": 3.2256226035202895e-05, "loss": 0.9859, "step": 7017 }, { "epoch": 0.6270690463957826, "grad_norm": 0.45809265971183777, "learning_rate": 3.22426979087628e-05, "loss": 0.9974, "step": 7018 }, { "epoch": 0.6271583979270444, "grad_norm": 0.5661429762840271, "learning_rate": 3.222917126968601e-05, "loss": 0.8188, "step": 7019 }, { "epoch": 0.6272477494583063, "grad_norm": 0.4159087538719177, "learning_rate": 3.221564611910556e-05, "loss": 0.941, "step": 7020 }, { "epoch": 0.6273371009895682, "grad_norm": 0.4369848966598511, "learning_rate": 3.22021224581543e-05, "loss": 0.9944, "step": 7021 }, { "epoch": 0.6274264525208301, "grad_norm": 0.43189677596092224, "learning_rate": 3.218860028796501e-05, "loss": 1.0337, "step": 7022 }, { "epoch": 0.627515804052092, "grad_norm": 0.4517824947834015, "learning_rate": 3.2175079609670286e-05, "loss": 0.9424, "step": 7023 }, { "epoch": 0.6276051555833538, "grad_norm": 0.42732521891593933, "learning_rate": 3.216156042440267e-05, "loss": 0.9269, "step": 7024 }, { "epoch": 0.6276945071146157, "grad_norm": 0.5549626350402832, "learning_rate": 3.2148042733294494e-05, "loss": 0.898, "step": 7025 }, { "epoch": 0.6277838586458775, "grad_norm": 0.4612114429473877, "learning_rate": 3.2134526537478034e-05, "loss": 0.9135, "step": 7026 }, { "epoch": 0.6278732101771394, "grad_norm": 0.42315712571144104, "learning_rate": 3.21210118380854e-05, "loss": 0.9782, "step": 7027 }, { "epoch": 0.6279625617084013, "grad_norm": 0.5238030552864075, "learning_rate": 3.210749863624861e-05, "loss": 0.923, "step": 7028 }, { "epoch": 0.6280519132396631, "grad_norm": 0.39960044622421265, "learning_rate": 3.209398693309954e-05, "loss": 0.944, "step": 7029 }, { "epoch": 0.628141264770925, "grad_norm": 0.5192845463752747, "learning_rate": 3.2080476729769916e-05, "loss": 0.9699, "step": 7030 }, { "epoch": 0.6282306163021869, "grad_norm": 0.5337046980857849, "learning_rate": 3.2066968027391374e-05, "loss": 0.8788, "step": 7031 }, { "epoch": 0.6283199678334488, "grad_norm": 0.4468163549900055, "learning_rate": 3.20534608270954e-05, "loss": 0.9746, "step": 7032 }, { "epoch": 0.6284093193647106, "grad_norm": 0.4410332441329956, "learning_rate": 3.20399551300134e-05, "loss": 0.9883, "step": 7033 }, { "epoch": 0.6284986708959724, "grad_norm": 0.4546487629413605, "learning_rate": 3.202645093727659e-05, "loss": 0.9566, "step": 7034 }, { "epoch": 0.6285880224272343, "grad_norm": 0.5063059329986572, "learning_rate": 3.2012948250016084e-05, "loss": 0.865, "step": 7035 }, { "epoch": 0.6286773739584962, "grad_norm": 0.432156503200531, "learning_rate": 3.1999447069362904e-05, "loss": 0.9478, "step": 7036 }, { "epoch": 0.6287667254897581, "grad_norm": 0.5517258644104004, "learning_rate": 3.19859473964479e-05, "loss": 0.9291, "step": 7037 }, { "epoch": 0.62885607702102, "grad_norm": 0.44080060720443726, "learning_rate": 3.197244923240182e-05, "loss": 1.0247, "step": 7038 }, { "epoch": 0.6289454285522819, "grad_norm": 0.7278925180435181, "learning_rate": 3.1958952578355295e-05, "loss": 0.8903, "step": 7039 }, { "epoch": 0.6290347800835436, "grad_norm": 0.44589099287986755, "learning_rate": 3.194545743543878e-05, "loss": 0.9401, "step": 7040 }, { "epoch": 0.6291241316148055, "grad_norm": 0.5441766381263733, "learning_rate": 3.193196380478264e-05, "loss": 0.9553, "step": 7041 }, { "epoch": 0.6292134831460674, "grad_norm": 0.4079403579235077, "learning_rate": 3.191847168751714e-05, "loss": 0.9159, "step": 7042 }, { "epoch": 0.6293028346773293, "grad_norm": 0.5653771758079529, "learning_rate": 3.190498108477237e-05, "loss": 0.9289, "step": 7043 }, { "epoch": 0.6293921862085912, "grad_norm": 0.48930737376213074, "learning_rate": 3.18914919976783e-05, "loss": 0.9681, "step": 7044 }, { "epoch": 0.629481537739853, "grad_norm": 0.5332466959953308, "learning_rate": 3.187800442736481e-05, "loss": 0.9396, "step": 7045 }, { "epoch": 0.6295708892711149, "grad_norm": 0.4455600678920746, "learning_rate": 3.1864518374961606e-05, "loss": 0.9359, "step": 7046 }, { "epoch": 0.6296602408023767, "grad_norm": 0.47383755445480347, "learning_rate": 3.1851033841598297e-05, "loss": 0.9179, "step": 7047 }, { "epoch": 0.6297495923336386, "grad_norm": 0.4770072102546692, "learning_rate": 3.183755082840436e-05, "loss": 0.9212, "step": 7048 }, { "epoch": 0.6298389438649005, "grad_norm": 0.48469939827919006, "learning_rate": 3.182406933650917e-05, "loss": 0.9359, "step": 7049 }, { "epoch": 0.6299282953961624, "grad_norm": 0.6045317053794861, "learning_rate": 3.181058936704187e-05, "loss": 0.9362, "step": 7050 }, { "epoch": 0.6300176469274242, "grad_norm": 0.41952088475227356, "learning_rate": 3.179711092113162e-05, "loss": 0.9626, "step": 7051 }, { "epoch": 0.6301069984586861, "grad_norm": 0.5453062653541565, "learning_rate": 3.178363399990735e-05, "loss": 0.8763, "step": 7052 }, { "epoch": 0.630196349989948, "grad_norm": 0.47446975111961365, "learning_rate": 3.1770158604497905e-05, "loss": 0.9664, "step": 7053 }, { "epoch": 0.6302857015212098, "grad_norm": 0.521207869052887, "learning_rate": 3.175668473603199e-05, "loss": 0.9685, "step": 7054 }, { "epoch": 0.6303750530524717, "grad_norm": 0.47734490036964417, "learning_rate": 3.17432123956382e-05, "loss": 0.9631, "step": 7055 }, { "epoch": 0.6304644045837335, "grad_norm": 0.5704666376113892, "learning_rate": 3.172974158444496e-05, "loss": 0.9473, "step": 7056 }, { "epoch": 0.6305537561149954, "grad_norm": 0.5392587184906006, "learning_rate": 3.171627230358063e-05, "loss": 0.8843, "step": 7057 }, { "epoch": 0.6306431076462573, "grad_norm": 0.5207484364509583, "learning_rate": 3.1702804554173374e-05, "loss": 0.9672, "step": 7058 }, { "epoch": 0.6307324591775192, "grad_norm": 0.3899401128292084, "learning_rate": 3.1689338337351273e-05, "loss": 0.9844, "step": 7059 }, { "epoch": 0.6308218107087811, "grad_norm": 0.4320841431617737, "learning_rate": 3.1675873654242264e-05, "loss": 0.9221, "step": 7060 }, { "epoch": 0.6309111622400428, "grad_norm": 0.4138674736022949, "learning_rate": 3.1662410505974146e-05, "loss": 1.0483, "step": 7061 }, { "epoch": 0.6310005137713047, "grad_norm": 0.5958953499794006, "learning_rate": 3.164894889367463e-05, "loss": 0.9235, "step": 7062 }, { "epoch": 0.6310898653025666, "grad_norm": 0.4245629608631134, "learning_rate": 3.1635488818471246e-05, "loss": 0.9503, "step": 7063 }, { "epoch": 0.6311792168338285, "grad_norm": 0.47923198342323303, "learning_rate": 3.162203028149142e-05, "loss": 0.9579, "step": 7064 }, { "epoch": 0.6312685683650904, "grad_norm": 0.4923160672187805, "learning_rate": 3.160857328386245e-05, "loss": 0.9471, "step": 7065 }, { "epoch": 0.6313579198963523, "grad_norm": 0.3958245515823364, "learning_rate": 3.1595117826711514e-05, "loss": 0.9887, "step": 7066 }, { "epoch": 0.6314472714276141, "grad_norm": 0.4653833508491516, "learning_rate": 3.1581663911165635e-05, "loss": 0.9628, "step": 7067 }, { "epoch": 0.6315366229588759, "grad_norm": 0.43516650795936584, "learning_rate": 3.1568211538351736e-05, "loss": 0.876, "step": 7068 }, { "epoch": 0.6316259744901378, "grad_norm": 0.5455069541931152, "learning_rate": 3.15547607093966e-05, "loss": 0.8408, "step": 7069 }, { "epoch": 0.6317153260213997, "grad_norm": 0.6274839639663696, "learning_rate": 3.154131142542686e-05, "loss": 0.9539, "step": 7070 }, { "epoch": 0.6318046775526616, "grad_norm": 0.5620449781417847, "learning_rate": 3.1527863687569026e-05, "loss": 0.9296, "step": 7071 }, { "epoch": 0.6318940290839234, "grad_norm": 0.5150429010391235, "learning_rate": 3.1514417496949525e-05, "loss": 0.8938, "step": 7072 }, { "epoch": 0.6319833806151853, "grad_norm": 0.3932952880859375, "learning_rate": 3.150097285469459e-05, "loss": 0.9809, "step": 7073 }, { "epoch": 0.6320727321464472, "grad_norm": 0.4048050045967102, "learning_rate": 3.148752976193036e-05, "loss": 0.9128, "step": 7074 }, { "epoch": 0.632162083677709, "grad_norm": 0.45377394556999207, "learning_rate": 3.147408821978285e-05, "loss": 0.9428, "step": 7075 }, { "epoch": 0.6322514352089709, "grad_norm": 0.47857969999313354, "learning_rate": 3.146064822937793e-05, "loss": 0.9388, "step": 7076 }, { "epoch": 0.6323407867402328, "grad_norm": 0.42985495924949646, "learning_rate": 3.144720979184133e-05, "loss": 0.9978, "step": 7077 }, { "epoch": 0.6324301382714946, "grad_norm": 0.4862077236175537, "learning_rate": 3.1433772908298665e-05, "loss": 0.9369, "step": 7078 }, { "epoch": 0.6325194898027565, "grad_norm": 0.5292137861251831, "learning_rate": 3.1420337579875424e-05, "loss": 0.8707, "step": 7079 }, { "epoch": 0.6326088413340184, "grad_norm": 0.5022360682487488, "learning_rate": 3.140690380769696e-05, "loss": 0.968, "step": 7080 }, { "epoch": 0.6326981928652802, "grad_norm": 0.45004457235336304, "learning_rate": 3.139347159288849e-05, "loss": 0.9509, "step": 7081 }, { "epoch": 0.6327875443965421, "grad_norm": 0.3994084894657135, "learning_rate": 3.1380040936575094e-05, "loss": 1.0183, "step": 7082 }, { "epoch": 0.6328768959278039, "grad_norm": 0.4302981495857239, "learning_rate": 3.136661183988175e-05, "loss": 1.0077, "step": 7083 }, { "epoch": 0.6329662474590658, "grad_norm": 0.5247629284858704, "learning_rate": 3.135318430393328e-05, "loss": 0.9228, "step": 7084 }, { "epoch": 0.6330555989903277, "grad_norm": 0.44027116894721985, "learning_rate": 3.133975832985438e-05, "loss": 0.9233, "step": 7085 }, { "epoch": 0.6331449505215896, "grad_norm": 0.4487362205982208, "learning_rate": 3.1326333918769633e-05, "loss": 0.9578, "step": 7086 }, { "epoch": 0.6332343020528515, "grad_norm": 0.4788406789302826, "learning_rate": 3.1312911071803464e-05, "loss": 0.9458, "step": 7087 }, { "epoch": 0.6333236535841132, "grad_norm": 0.5990272164344788, "learning_rate": 3.1299489790080184e-05, "loss": 0.8684, "step": 7088 }, { "epoch": 0.6334130051153751, "grad_norm": 0.4570746123790741, "learning_rate": 3.128607007472398e-05, "loss": 1.0058, "step": 7089 }, { "epoch": 0.633502356646637, "grad_norm": 0.45381224155426025, "learning_rate": 3.127265192685887e-05, "loss": 0.9837, "step": 7090 }, { "epoch": 0.6335917081778989, "grad_norm": 0.46698102355003357, "learning_rate": 3.1259235347608786e-05, "loss": 0.9462, "step": 7091 }, { "epoch": 0.6336810597091608, "grad_norm": 0.39173898100852966, "learning_rate": 3.12458203380975e-05, "loss": 0.9532, "step": 7092 }, { "epoch": 0.6337704112404227, "grad_norm": 0.5905160307884216, "learning_rate": 3.123240689944866e-05, "loss": 0.8619, "step": 7093 }, { "epoch": 0.6338597627716845, "grad_norm": 0.4507081210613251, "learning_rate": 3.121899503278579e-05, "loss": 0.9821, "step": 7094 }, { "epoch": 0.6339491143029463, "grad_norm": 0.4796884059906006, "learning_rate": 3.120558473923229e-05, "loss": 0.9545, "step": 7095 }, { "epoch": 0.6340384658342082, "grad_norm": 0.4841066896915436, "learning_rate": 3.119217601991139e-05, "loss": 0.8731, "step": 7096 }, { "epoch": 0.6341278173654701, "grad_norm": 0.4773595631122589, "learning_rate": 3.117876887594623e-05, "loss": 0.9447, "step": 7097 }, { "epoch": 0.634217168896732, "grad_norm": 0.5614939332008362, "learning_rate": 3.116536330845979e-05, "loss": 0.9185, "step": 7098 }, { "epoch": 0.6343065204279938, "grad_norm": 0.440338671207428, "learning_rate": 3.1151959318574964e-05, "loss": 0.9498, "step": 7099 }, { "epoch": 0.6343958719592557, "grad_norm": 0.43341484665870667, "learning_rate": 3.113855690741443e-05, "loss": 1.0054, "step": 7100 }, { "epoch": 0.6344852234905176, "grad_norm": 0.5022625923156738, "learning_rate": 3.1125156076100805e-05, "loss": 0.8804, "step": 7101 }, { "epoch": 0.6345745750217794, "grad_norm": 0.4659278392791748, "learning_rate": 3.1111756825756546e-05, "loss": 0.93, "step": 7102 }, { "epoch": 0.6346639265530413, "grad_norm": 0.39844343066215515, "learning_rate": 3.109835915750398e-05, "loss": 1.0028, "step": 7103 }, { "epoch": 0.6347532780843032, "grad_norm": 0.4734951853752136, "learning_rate": 3.108496307246532e-05, "loss": 0.9552, "step": 7104 }, { "epoch": 0.634842629615565, "grad_norm": 0.4761861264705658, "learning_rate": 3.107156857176262e-05, "loss": 0.976, "step": 7105 }, { "epoch": 0.6349319811468269, "grad_norm": 0.5420219302177429, "learning_rate": 3.105817565651782e-05, "loss": 0.8865, "step": 7106 }, { "epoch": 0.6350213326780888, "grad_norm": 0.4390241205692291, "learning_rate": 3.10447843278527e-05, "loss": 0.9958, "step": 7107 }, { "epoch": 0.6351106842093507, "grad_norm": 0.5415240526199341, "learning_rate": 3.103139458688895e-05, "loss": 0.8849, "step": 7108 }, { "epoch": 0.6352000357406125, "grad_norm": 0.6151022911071777, "learning_rate": 3.1018006434748113e-05, "loss": 0.9167, "step": 7109 }, { "epoch": 0.6352893872718743, "grad_norm": 0.4660266935825348, "learning_rate": 3.100461987255155e-05, "loss": 0.9687, "step": 7110 }, { "epoch": 0.6353787388031362, "grad_norm": 0.4258476495742798, "learning_rate": 3.0991234901420555e-05, "loss": 0.9189, "step": 7111 }, { "epoch": 0.6354680903343981, "grad_norm": 0.47770798206329346, "learning_rate": 3.0977851522476254e-05, "loss": 0.9047, "step": 7112 }, { "epoch": 0.63555744186566, "grad_norm": 0.473283976316452, "learning_rate": 3.096446973683966e-05, "loss": 0.9983, "step": 7113 }, { "epoch": 0.6356467933969219, "grad_norm": 0.4351825714111328, "learning_rate": 3.0951089545631614e-05, "loss": 0.9757, "step": 7114 }, { "epoch": 0.6357361449281838, "grad_norm": 0.49671927094459534, "learning_rate": 3.093771094997286e-05, "loss": 0.8834, "step": 7115 }, { "epoch": 0.6358254964594455, "grad_norm": 0.45592954754829407, "learning_rate": 3.092433395098402e-05, "loss": 0.9736, "step": 7116 }, { "epoch": 0.6359148479907074, "grad_norm": 0.46876510977745056, "learning_rate": 3.091095854978553e-05, "loss": 0.9785, "step": 7117 }, { "epoch": 0.6360041995219693, "grad_norm": 0.5314716696739197, "learning_rate": 3.089758474749774e-05, "loss": 0.9481, "step": 7118 }, { "epoch": 0.6360935510532312, "grad_norm": 0.4798244833946228, "learning_rate": 3.088421254524085e-05, "loss": 0.8803, "step": 7119 }, { "epoch": 0.6361829025844931, "grad_norm": 0.5197677612304688, "learning_rate": 3.087084194413493e-05, "loss": 0.8943, "step": 7120 }, { "epoch": 0.6362722541157549, "grad_norm": 0.4947774112224579, "learning_rate": 3.085747294529989e-05, "loss": 0.9543, "step": 7121 }, { "epoch": 0.6363616056470168, "grad_norm": 0.4913742244243622, "learning_rate": 3.084410554985553e-05, "loss": 0.9508, "step": 7122 }, { "epoch": 0.6364509571782786, "grad_norm": 0.5018487572669983, "learning_rate": 3.083073975892151e-05, "loss": 0.8714, "step": 7123 }, { "epoch": 0.6365403087095405, "grad_norm": 0.4363705813884735, "learning_rate": 3.081737557361737e-05, "loss": 0.946, "step": 7124 }, { "epoch": 0.6366296602408024, "grad_norm": 0.3925338387489319, "learning_rate": 3.0804012995062503e-05, "loss": 0.988, "step": 7125 }, { "epoch": 0.6367190117720642, "grad_norm": 0.4123278260231018, "learning_rate": 3.0790652024376157e-05, "loss": 0.9768, "step": 7126 }, { "epoch": 0.6368083633033261, "grad_norm": 0.5473465323448181, "learning_rate": 3.077729266267748e-05, "loss": 0.9486, "step": 7127 }, { "epoch": 0.636897714834588, "grad_norm": 0.5201464295387268, "learning_rate": 3.076393491108542e-05, "loss": 0.9381, "step": 7128 }, { "epoch": 0.6369870663658499, "grad_norm": 0.4789254069328308, "learning_rate": 3.075057877071886e-05, "loss": 0.9006, "step": 7129 }, { "epoch": 0.6370764178971117, "grad_norm": 0.4798673391342163, "learning_rate": 3.0737224242696515e-05, "loss": 0.9128, "step": 7130 }, { "epoch": 0.6371657694283736, "grad_norm": 0.42166826128959656, "learning_rate": 3.072387132813696e-05, "loss": 1.0026, "step": 7131 }, { "epoch": 0.6372551209596354, "grad_norm": 0.45716649293899536, "learning_rate": 3.071052002815866e-05, "loss": 0.9762, "step": 7132 }, { "epoch": 0.6373444724908973, "grad_norm": 0.4412686824798584, "learning_rate": 3.069717034387991e-05, "loss": 0.9536, "step": 7133 }, { "epoch": 0.6374338240221592, "grad_norm": 0.5291409492492676, "learning_rate": 3.0683822276418895e-05, "loss": 0.9105, "step": 7134 }, { "epoch": 0.6375231755534211, "grad_norm": 0.41035082936286926, "learning_rate": 3.0670475826893664e-05, "loss": 0.9441, "step": 7135 }, { "epoch": 0.637612527084683, "grad_norm": 0.44016411900520325, "learning_rate": 3.065713099642211e-05, "loss": 0.9229, "step": 7136 }, { "epoch": 0.6377018786159447, "grad_norm": 0.5165585875511169, "learning_rate": 3.0643787786122026e-05, "loss": 0.9092, "step": 7137 }, { "epoch": 0.6377912301472066, "grad_norm": 0.46023446321487427, "learning_rate": 3.063044619711104e-05, "loss": 0.9572, "step": 7138 }, { "epoch": 0.6378805816784685, "grad_norm": 0.4530174136161804, "learning_rate": 3.0617106230506645e-05, "loss": 0.9166, "step": 7139 }, { "epoch": 0.6379699332097304, "grad_norm": 0.45306262373924255, "learning_rate": 3.0603767887426224e-05, "loss": 1.0331, "step": 7140 }, { "epoch": 0.6380592847409923, "grad_norm": 0.440663605928421, "learning_rate": 3.059043116898698e-05, "loss": 1.0083, "step": 7141 }, { "epoch": 0.6381486362722542, "grad_norm": 0.48396119475364685, "learning_rate": 3.057709607630601e-05, "loss": 0.9481, "step": 7142 }, { "epoch": 0.638237987803516, "grad_norm": 0.4982928931713104, "learning_rate": 3.056376261050028e-05, "loss": 1.0025, "step": 7143 }, { "epoch": 0.6383273393347778, "grad_norm": 0.4071272313594818, "learning_rate": 3.05504307726866e-05, "loss": 1.0177, "step": 7144 }, { "epoch": 0.6384166908660397, "grad_norm": 0.48170456290245056, "learning_rate": 3.053710056398167e-05, "loss": 0.8774, "step": 7145 }, { "epoch": 0.6385060423973016, "grad_norm": 0.5535345077514648, "learning_rate": 3.052377198550204e-05, "loss": 0.8821, "step": 7146 }, { "epoch": 0.6385953939285635, "grad_norm": 0.5003737807273865, "learning_rate": 3.051044503836409e-05, "loss": 0.9331, "step": 7147 }, { "epoch": 0.6386847454598253, "grad_norm": 0.4324195086956024, "learning_rate": 3.0497119723684108e-05, "loss": 1.0236, "step": 7148 }, { "epoch": 0.6387740969910872, "grad_norm": 0.5235957503318787, "learning_rate": 3.0483796042578246e-05, "loss": 0.9891, "step": 7149 }, { "epoch": 0.638863448522349, "grad_norm": 0.5359724760055542, "learning_rate": 3.047047399616251e-05, "loss": 0.896, "step": 7150 }, { "epoch": 0.6389528000536109, "grad_norm": 0.44273999333381653, "learning_rate": 3.0457153585552723e-05, "loss": 0.9874, "step": 7151 }, { "epoch": 0.6390421515848728, "grad_norm": 0.41611048579216003, "learning_rate": 3.0443834811864635e-05, "loss": 0.9371, "step": 7152 }, { "epoch": 0.6391315031161346, "grad_norm": 0.5038164854049683, "learning_rate": 3.043051767621383e-05, "loss": 0.9263, "step": 7153 }, { "epoch": 0.6392208546473965, "grad_norm": 0.49609890580177307, "learning_rate": 3.0417202179715776e-05, "loss": 0.9053, "step": 7154 }, { "epoch": 0.6393102061786584, "grad_norm": 0.4573724567890167, "learning_rate": 3.0403888323485775e-05, "loss": 0.9369, "step": 7155 }, { "epoch": 0.6393995577099203, "grad_norm": 0.4710655212402344, "learning_rate": 3.0390576108639e-05, "loss": 0.9565, "step": 7156 }, { "epoch": 0.6394889092411821, "grad_norm": 0.5273383259773254, "learning_rate": 3.03772655362905e-05, "loss": 1.0381, "step": 7157 }, { "epoch": 0.639578260772444, "grad_norm": 0.5512682795524597, "learning_rate": 3.0363956607555177e-05, "loss": 0.9397, "step": 7158 }, { "epoch": 0.6396676123037058, "grad_norm": 0.45379549264907837, "learning_rate": 3.0350649323547796e-05, "loss": 0.9626, "step": 7159 }, { "epoch": 0.6397569638349677, "grad_norm": 0.4535743296146393, "learning_rate": 3.0337343685383e-05, "loss": 0.948, "step": 7160 }, { "epoch": 0.6398463153662296, "grad_norm": 0.45462849736213684, "learning_rate": 3.0324039694175233e-05, "loss": 1.0285, "step": 7161 }, { "epoch": 0.6399356668974915, "grad_norm": 0.5407901406288147, "learning_rate": 3.0310737351038875e-05, "loss": 0.9403, "step": 7162 }, { "epoch": 0.6400250184287534, "grad_norm": 0.410166472196579, "learning_rate": 3.029743665708814e-05, "loss": 1.0002, "step": 7163 }, { "epoch": 0.6401143699600151, "grad_norm": 0.501658022403717, "learning_rate": 3.0284137613437098e-05, "loss": 0.9004, "step": 7164 }, { "epoch": 0.640203721491277, "grad_norm": 0.4671003818511963, "learning_rate": 3.027084022119969e-05, "loss": 0.9121, "step": 7165 }, { "epoch": 0.6402930730225389, "grad_norm": 0.4434639811515808, "learning_rate": 3.0257544481489712e-05, "loss": 0.9303, "step": 7166 }, { "epoch": 0.6403824245538008, "grad_norm": 0.46848881244659424, "learning_rate": 3.024425039542082e-05, "loss": 0.8991, "step": 7167 }, { "epoch": 0.6404717760850627, "grad_norm": 0.4049372971057892, "learning_rate": 3.0230957964106532e-05, "loss": 0.9814, "step": 7168 }, { "epoch": 0.6405611276163246, "grad_norm": 0.5770232677459717, "learning_rate": 3.0217667188660248e-05, "loss": 0.9142, "step": 7169 }, { "epoch": 0.6406504791475864, "grad_norm": 0.5074619650840759, "learning_rate": 3.0204378070195218e-05, "loss": 0.9127, "step": 7170 }, { "epoch": 0.6407398306788482, "grad_norm": 0.539682924747467, "learning_rate": 3.01910906098245e-05, "loss": 0.9482, "step": 7171 }, { "epoch": 0.6408291822101101, "grad_norm": 0.5305405259132385, "learning_rate": 3.0177804808661103e-05, "loss": 0.9165, "step": 7172 }, { "epoch": 0.640918533741372, "grad_norm": 0.39787405729293823, "learning_rate": 3.0164520667817842e-05, "loss": 0.9578, "step": 7173 }, { "epoch": 0.6410078852726339, "grad_norm": 0.40553003549575806, "learning_rate": 3.01512381884074e-05, "loss": 1.0191, "step": 7174 }, { "epoch": 0.6410972368038957, "grad_norm": 0.5072142481803894, "learning_rate": 3.0137957371542336e-05, "loss": 0.9339, "step": 7175 }, { "epoch": 0.6411865883351576, "grad_norm": 0.402472585439682, "learning_rate": 3.0124678218335058e-05, "loss": 0.919, "step": 7176 }, { "epoch": 0.6412759398664195, "grad_norm": 0.5144462585449219, "learning_rate": 3.0111400729897833e-05, "loss": 0.9673, "step": 7177 }, { "epoch": 0.6413652913976813, "grad_norm": 0.5433812141418457, "learning_rate": 3.009812490734279e-05, "loss": 0.9127, "step": 7178 }, { "epoch": 0.6414546429289432, "grad_norm": 0.522948145866394, "learning_rate": 3.008485075178194e-05, "loss": 0.8578, "step": 7179 }, { "epoch": 0.641543994460205, "grad_norm": 0.4444359242916107, "learning_rate": 3.0071578264327116e-05, "loss": 0.9434, "step": 7180 }, { "epoch": 0.6416333459914669, "grad_norm": 0.5288926959037781, "learning_rate": 3.005830744609003e-05, "loss": 0.8699, "step": 7181 }, { "epoch": 0.6417226975227288, "grad_norm": 0.5076068639755249, "learning_rate": 3.004503829818225e-05, "loss": 0.905, "step": 7182 }, { "epoch": 0.6418120490539907, "grad_norm": 0.49402233958244324, "learning_rate": 3.003177082171523e-05, "loss": 0.9201, "step": 7183 }, { "epoch": 0.6419014005852526, "grad_norm": 0.5896358489990234, "learning_rate": 3.0018505017800246e-05, "loss": 0.8503, "step": 7184 }, { "epoch": 0.6419907521165144, "grad_norm": 0.5615974068641663, "learning_rate": 3.0005240887548445e-05, "loss": 1.0653, "step": 7185 }, { "epoch": 0.6420801036477762, "grad_norm": 0.42065030336380005, "learning_rate": 2.9991978432070856e-05, "loss": 0.9577, "step": 7186 }, { "epoch": 0.6421694551790381, "grad_norm": 0.44661208987236023, "learning_rate": 2.9978717652478344e-05, "loss": 0.999, "step": 7187 }, { "epoch": 0.6422588067103, "grad_norm": 0.489004909992218, "learning_rate": 2.9965458549881638e-05, "loss": 0.872, "step": 7188 }, { "epoch": 0.6423481582415619, "grad_norm": 0.43301481008529663, "learning_rate": 2.9952201125391332e-05, "loss": 0.9497, "step": 7189 }, { "epoch": 0.6424375097728238, "grad_norm": 0.46835726499557495, "learning_rate": 2.993894538011789e-05, "loss": 1.0229, "step": 7190 }, { "epoch": 0.6425268613040857, "grad_norm": 0.4566435217857361, "learning_rate": 2.9925691315171594e-05, "loss": 0.9432, "step": 7191 }, { "epoch": 0.6426162128353474, "grad_norm": 0.490376353263855, "learning_rate": 2.9912438931662624e-05, "loss": 0.9158, "step": 7192 }, { "epoch": 0.6427055643666093, "grad_norm": 0.45299986004829407, "learning_rate": 2.9899188230701014e-05, "loss": 0.942, "step": 7193 }, { "epoch": 0.6427949158978712, "grad_norm": 0.40514639019966125, "learning_rate": 2.9885939213396647e-05, "loss": 0.987, "step": 7194 }, { "epoch": 0.6428842674291331, "grad_norm": 0.46090513467788696, "learning_rate": 2.987269188085927e-05, "loss": 1.0108, "step": 7195 }, { "epoch": 0.642973618960395, "grad_norm": 0.5533647537231445, "learning_rate": 2.9859446234198494e-05, "loss": 0.8314, "step": 7196 }, { "epoch": 0.6430629704916568, "grad_norm": 0.5012393593788147, "learning_rate": 2.9846202274523776e-05, "loss": 0.8779, "step": 7197 }, { "epoch": 0.6431523220229187, "grad_norm": 0.4513954818248749, "learning_rate": 2.9832960002944454e-05, "loss": 0.855, "step": 7198 }, { "epoch": 0.6432416735541805, "grad_norm": 0.49379584193229675, "learning_rate": 2.981971942056968e-05, "loss": 0.9278, "step": 7199 }, { "epoch": 0.6433310250854424, "grad_norm": 0.43649551272392273, "learning_rate": 2.9806480528508517e-05, "loss": 0.9354, "step": 7200 }, { "epoch": 0.6434203766167043, "grad_norm": 0.35923606157302856, "learning_rate": 2.9793243327869868e-05, "loss": 0.9558, "step": 7201 }, { "epoch": 0.6435097281479661, "grad_norm": 0.4612623453140259, "learning_rate": 2.978000781976248e-05, "loss": 0.9682, "step": 7202 }, { "epoch": 0.643599079679228, "grad_norm": 0.4402258098125458, "learning_rate": 2.9766774005294952e-05, "loss": 1.0167, "step": 7203 }, { "epoch": 0.6436884312104899, "grad_norm": 0.6181815266609192, "learning_rate": 2.9753541885575777e-05, "loss": 0.877, "step": 7204 }, { "epoch": 0.6437777827417518, "grad_norm": 0.5466234683990479, "learning_rate": 2.9740311461713273e-05, "loss": 0.936, "step": 7205 }, { "epoch": 0.6438671342730136, "grad_norm": 0.4415399432182312, "learning_rate": 2.9727082734815637e-05, "loss": 0.9895, "step": 7206 }, { "epoch": 0.6439564858042754, "grad_norm": 0.47382205724716187, "learning_rate": 2.9713855705990923e-05, "loss": 0.9684, "step": 7207 }, { "epoch": 0.6440458373355373, "grad_norm": 0.5399396419525146, "learning_rate": 2.970063037634703e-05, "loss": 0.8178, "step": 7208 }, { "epoch": 0.6441351888667992, "grad_norm": 0.5536864995956421, "learning_rate": 2.9687406746991708e-05, "loss": 0.9333, "step": 7209 }, { "epoch": 0.6442245403980611, "grad_norm": 0.4546963572502136, "learning_rate": 2.967418481903259e-05, "loss": 0.993, "step": 7210 }, { "epoch": 0.644313891929323, "grad_norm": 0.42409881949424744, "learning_rate": 2.966096459357718e-05, "loss": 0.9383, "step": 7211 }, { "epoch": 0.6444032434605848, "grad_norm": 0.4200609624385834, "learning_rate": 2.9647746071732757e-05, "loss": 0.8847, "step": 7212 }, { "epoch": 0.6444925949918466, "grad_norm": 0.5027011632919312, "learning_rate": 2.9634529254606542e-05, "loss": 0.8904, "step": 7213 }, { "epoch": 0.6445819465231085, "grad_norm": 0.4371449053287506, "learning_rate": 2.962131414330558e-05, "loss": 0.9849, "step": 7214 }, { "epoch": 0.6446712980543704, "grad_norm": 0.42081284523010254, "learning_rate": 2.9608100738936783e-05, "loss": 0.9994, "step": 7215 }, { "epoch": 0.6447606495856323, "grad_norm": 0.454153448343277, "learning_rate": 2.9594889042606923e-05, "loss": 0.9647, "step": 7216 }, { "epoch": 0.6448500011168942, "grad_norm": 0.4745718836784363, "learning_rate": 2.958167905542259e-05, "loss": 0.959, "step": 7217 }, { "epoch": 0.644939352648156, "grad_norm": 0.5167459845542908, "learning_rate": 2.9568470778490287e-05, "loss": 0.9293, "step": 7218 }, { "epoch": 0.6450287041794178, "grad_norm": 0.5096081495285034, "learning_rate": 2.9555264212916334e-05, "loss": 0.9004, "step": 7219 }, { "epoch": 0.6451180557106797, "grad_norm": 0.5786259770393372, "learning_rate": 2.9542059359806935e-05, "loss": 1.0463, "step": 7220 }, { "epoch": 0.6452074072419416, "grad_norm": 0.3960155248641968, "learning_rate": 2.9528856220268147e-05, "loss": 0.9449, "step": 7221 }, { "epoch": 0.6452967587732035, "grad_norm": 0.48387083411216736, "learning_rate": 2.951565479540584e-05, "loss": 0.9286, "step": 7222 }, { "epoch": 0.6453861103044654, "grad_norm": 0.46584317088127136, "learning_rate": 2.9502455086325787e-05, "loss": 0.922, "step": 7223 }, { "epoch": 0.6454754618357272, "grad_norm": 0.42123234272003174, "learning_rate": 2.9489257094133616e-05, "loss": 0.9701, "step": 7224 }, { "epoch": 0.6455648133669891, "grad_norm": 0.5903416275978088, "learning_rate": 2.9476060819934786e-05, "loss": 0.896, "step": 7225 }, { "epoch": 0.6456541648982509, "grad_norm": 0.4230227768421173, "learning_rate": 2.946286626483463e-05, "loss": 0.9556, "step": 7226 }, { "epoch": 0.6457435164295128, "grad_norm": 0.5032885670661926, "learning_rate": 2.9449673429938342e-05, "loss": 0.934, "step": 7227 }, { "epoch": 0.6458328679607747, "grad_norm": 0.5191749334335327, "learning_rate": 2.943648231635095e-05, "loss": 0.8587, "step": 7228 }, { "epoch": 0.6459222194920365, "grad_norm": 0.49752548336982727, "learning_rate": 2.942329292517736e-05, "loss": 0.9805, "step": 7229 }, { "epoch": 0.6460115710232984, "grad_norm": 0.5034040212631226, "learning_rate": 2.9410105257522314e-05, "loss": 0.9645, "step": 7230 }, { "epoch": 0.6461009225545603, "grad_norm": 0.5222768783569336, "learning_rate": 2.9396919314490447e-05, "loss": 0.9295, "step": 7231 }, { "epoch": 0.6461902740858222, "grad_norm": 0.4623466730117798, "learning_rate": 2.9383735097186175e-05, "loss": 0.9802, "step": 7232 }, { "epoch": 0.646279625617084, "grad_norm": 0.49886736273765564, "learning_rate": 2.9370552606713852e-05, "loss": 0.9273, "step": 7233 }, { "epoch": 0.6463689771483458, "grad_norm": 0.547577440738678, "learning_rate": 2.935737184417764e-05, "loss": 0.9833, "step": 7234 }, { "epoch": 0.6464583286796077, "grad_norm": 0.4106947183609009, "learning_rate": 2.9344192810681577e-05, "loss": 1.0004, "step": 7235 }, { "epoch": 0.6465476802108696, "grad_norm": 0.41953131556510925, "learning_rate": 2.933101550732953e-05, "loss": 0.9349, "step": 7236 }, { "epoch": 0.6466370317421315, "grad_norm": 0.4484841823577881, "learning_rate": 2.9317839935225254e-05, "loss": 0.9114, "step": 7237 }, { "epoch": 0.6467263832733934, "grad_norm": 0.4939296245574951, "learning_rate": 2.9304666095472334e-05, "loss": 0.9474, "step": 7238 }, { "epoch": 0.6468157348046553, "grad_norm": 0.409227579832077, "learning_rate": 2.9291493989174234e-05, "loss": 1.0057, "step": 7239 }, { "epoch": 0.646905086335917, "grad_norm": 0.44199416041374207, "learning_rate": 2.9278323617434245e-05, "loss": 0.899, "step": 7240 }, { "epoch": 0.6469944378671789, "grad_norm": 0.4873824417591095, "learning_rate": 2.9265154981355547e-05, "loss": 0.9753, "step": 7241 }, { "epoch": 0.6470837893984408, "grad_norm": 0.4727482497692108, "learning_rate": 2.9251988082041115e-05, "loss": 0.9691, "step": 7242 }, { "epoch": 0.6471731409297027, "grad_norm": 0.4748595952987671, "learning_rate": 2.9238822920593844e-05, "loss": 0.993, "step": 7243 }, { "epoch": 0.6472624924609646, "grad_norm": 0.490640252828598, "learning_rate": 2.9225659498116452e-05, "loss": 0.9925, "step": 7244 }, { "epoch": 0.6473518439922265, "grad_norm": 0.5214620232582092, "learning_rate": 2.9212497815711516e-05, "loss": 0.9996, "step": 7245 }, { "epoch": 0.6474411955234883, "grad_norm": 0.5297476649284363, "learning_rate": 2.9199337874481465e-05, "loss": 0.9351, "step": 7246 }, { "epoch": 0.6475305470547501, "grad_norm": 0.4094926416873932, "learning_rate": 2.9186179675528597e-05, "loss": 0.9445, "step": 7247 }, { "epoch": 0.647619898586012, "grad_norm": 0.5132883787155151, "learning_rate": 2.9173023219955032e-05, "loss": 0.8968, "step": 7248 }, { "epoch": 0.6477092501172739, "grad_norm": 0.5564563870429993, "learning_rate": 2.9159868508862766e-05, "loss": 0.8778, "step": 7249 }, { "epoch": 0.6477986016485358, "grad_norm": 0.5214046835899353, "learning_rate": 2.9146715543353652e-05, "loss": 0.9337, "step": 7250 }, { "epoch": 0.6478879531797976, "grad_norm": 0.38234221935272217, "learning_rate": 2.9133564324529415e-05, "loss": 0.9849, "step": 7251 }, { "epoch": 0.6479773047110595, "grad_norm": 0.4253350794315338, "learning_rate": 2.912041485349157e-05, "loss": 0.9478, "step": 7252 }, { "epoch": 0.6480666562423214, "grad_norm": 0.436994731426239, "learning_rate": 2.9107267131341537e-05, "loss": 0.9162, "step": 7253 }, { "epoch": 0.6481560077735832, "grad_norm": 0.4581502676010132, "learning_rate": 2.9094121159180588e-05, "loss": 0.9918, "step": 7254 }, { "epoch": 0.6482453593048451, "grad_norm": 0.45387864112854004, "learning_rate": 2.908097693810983e-05, "loss": 0.8974, "step": 7255 }, { "epoch": 0.6483347108361069, "grad_norm": 0.4383721947669983, "learning_rate": 2.9067834469230225e-05, "loss": 0.9876, "step": 7256 }, { "epoch": 0.6484240623673688, "grad_norm": 0.4127906858921051, "learning_rate": 2.9054693753642614e-05, "loss": 0.9452, "step": 7257 }, { "epoch": 0.6485134138986307, "grad_norm": 0.530967116355896, "learning_rate": 2.9041554792447655e-05, "loss": 0.918, "step": 7258 }, { "epoch": 0.6486027654298926, "grad_norm": 0.48077839612960815, "learning_rate": 2.9028417586745887e-05, "loss": 0.9524, "step": 7259 }, { "epoch": 0.6486921169611545, "grad_norm": 0.47966933250427246, "learning_rate": 2.9015282137637688e-05, "loss": 0.9938, "step": 7260 }, { "epoch": 0.6487814684924162, "grad_norm": 0.5792236328125, "learning_rate": 2.900214844622331e-05, "loss": 0.9617, "step": 7261 }, { "epoch": 0.6488708200236781, "grad_norm": 0.5350083708763123, "learning_rate": 2.8989016513602802e-05, "loss": 0.9807, "step": 7262 }, { "epoch": 0.64896017155494, "grad_norm": 0.4303712844848633, "learning_rate": 2.8975886340876117e-05, "loss": 0.9293, "step": 7263 }, { "epoch": 0.6490495230862019, "grad_norm": 0.451147198677063, "learning_rate": 2.896275792914306e-05, "loss": 1.0082, "step": 7264 }, { "epoch": 0.6491388746174638, "grad_norm": 0.37288275361061096, "learning_rate": 2.8949631279503264e-05, "loss": 1.0025, "step": 7265 }, { "epoch": 0.6492282261487257, "grad_norm": 0.480672687292099, "learning_rate": 2.8936506393056223e-05, "loss": 0.9353, "step": 7266 }, { "epoch": 0.6493175776799875, "grad_norm": 0.547631561756134, "learning_rate": 2.89233832709013e-05, "loss": 0.925, "step": 7267 }, { "epoch": 0.6494069292112493, "grad_norm": 0.43532001972198486, "learning_rate": 2.8910261914137682e-05, "loss": 0.9405, "step": 7268 }, { "epoch": 0.6494962807425112, "grad_norm": 0.45452019572257996, "learning_rate": 2.8897142323864433e-05, "loss": 0.9886, "step": 7269 }, { "epoch": 0.6495856322737731, "grad_norm": 0.4508405923843384, "learning_rate": 2.8884024501180456e-05, "loss": 0.9328, "step": 7270 }, { "epoch": 0.649674983805035, "grad_norm": 0.47930291295051575, "learning_rate": 2.887090844718453e-05, "loss": 0.9157, "step": 7271 }, { "epoch": 0.6497643353362968, "grad_norm": 0.47216659784317017, "learning_rate": 2.8857794162975214e-05, "loss": 0.9874, "step": 7272 }, { "epoch": 0.6498536868675587, "grad_norm": 0.4564785063266754, "learning_rate": 2.8844681649651e-05, "loss": 0.9879, "step": 7273 }, { "epoch": 0.6499430383988205, "grad_norm": 0.5167818069458008, "learning_rate": 2.8831570908310202e-05, "loss": 0.9293, "step": 7274 }, { "epoch": 0.6500323899300824, "grad_norm": 0.4396851658821106, "learning_rate": 2.8818461940050967e-05, "loss": 0.8964, "step": 7275 }, { "epoch": 0.6501217414613443, "grad_norm": 0.5146901607513428, "learning_rate": 2.8805354745971337e-05, "loss": 0.9672, "step": 7276 }, { "epoch": 0.6502110929926062, "grad_norm": 0.5108723044395447, "learning_rate": 2.879224932716918e-05, "loss": 0.9948, "step": 7277 }, { "epoch": 0.650300444523868, "grad_norm": 0.469088613986969, "learning_rate": 2.877914568474218e-05, "loss": 0.9109, "step": 7278 }, { "epoch": 0.6503897960551299, "grad_norm": 0.4965616464614868, "learning_rate": 2.8766043819787925e-05, "loss": 0.8499, "step": 7279 }, { "epoch": 0.6504791475863918, "grad_norm": 0.47035840153694153, "learning_rate": 2.875294373340384e-05, "loss": 0.9244, "step": 7280 }, { "epoch": 0.6505684991176536, "grad_norm": 0.5100567936897278, "learning_rate": 2.8739845426687218e-05, "loss": 0.9717, "step": 7281 }, { "epoch": 0.6506578506489155, "grad_norm": 0.40733376145362854, "learning_rate": 2.8726748900735133e-05, "loss": 0.9693, "step": 7282 }, { "epoch": 0.6507472021801773, "grad_norm": 0.4507538676261902, "learning_rate": 2.8713654156644588e-05, "loss": 0.9329, "step": 7283 }, { "epoch": 0.6508365537114392, "grad_norm": 0.44749560952186584, "learning_rate": 2.87005611955124e-05, "loss": 0.9209, "step": 7284 }, { "epoch": 0.6509259052427011, "grad_norm": 0.3972071409225464, "learning_rate": 2.8687470018435246e-05, "loss": 1.0053, "step": 7285 }, { "epoch": 0.651015256773963, "grad_norm": 0.4977121353149414, "learning_rate": 2.867438062650966e-05, "loss": 0.9951, "step": 7286 }, { "epoch": 0.6511046083052249, "grad_norm": 0.42729929089546204, "learning_rate": 2.866129302083201e-05, "loss": 0.9505, "step": 7287 }, { "epoch": 0.6511939598364866, "grad_norm": 0.3862477242946625, "learning_rate": 2.8648207202498524e-05, "loss": 0.9529, "step": 7288 }, { "epoch": 0.6512833113677485, "grad_norm": 0.5908159017562866, "learning_rate": 2.8635123172605273e-05, "loss": 0.9252, "step": 7289 }, { "epoch": 0.6513726628990104, "grad_norm": 0.5458805561065674, "learning_rate": 2.8622040932248196e-05, "loss": 0.9316, "step": 7290 }, { "epoch": 0.6514620144302723, "grad_norm": 0.5677804350852966, "learning_rate": 2.8608960482523056e-05, "loss": 0.9166, "step": 7291 }, { "epoch": 0.6515513659615342, "grad_norm": 0.57185298204422, "learning_rate": 2.859588182452551e-05, "loss": 0.9909, "step": 7292 }, { "epoch": 0.6516407174927961, "grad_norm": 0.4821870028972626, "learning_rate": 2.8582804959350994e-05, "loss": 0.9059, "step": 7293 }, { "epoch": 0.651730069024058, "grad_norm": 0.39487797021865845, "learning_rate": 2.8569729888094853e-05, "loss": 0.9615, "step": 7294 }, { "epoch": 0.6518194205553197, "grad_norm": 0.5088570713996887, "learning_rate": 2.8556656611852274e-05, "loss": 0.9741, "step": 7295 }, { "epoch": 0.6519087720865816, "grad_norm": 0.4211069941520691, "learning_rate": 2.8543585131718263e-05, "loss": 1.0125, "step": 7296 }, { "epoch": 0.6519981236178435, "grad_norm": 0.46207818388938904, "learning_rate": 2.853051544878771e-05, "loss": 0.9296, "step": 7297 }, { "epoch": 0.6520874751491054, "grad_norm": 0.454182893037796, "learning_rate": 2.851744756415533e-05, "loss": 0.8923, "step": 7298 }, { "epoch": 0.6521768266803672, "grad_norm": 0.4857379198074341, "learning_rate": 2.850438147891571e-05, "loss": 0.9274, "step": 7299 }, { "epoch": 0.6522661782116291, "grad_norm": 0.42418479919433594, "learning_rate": 2.8491317194163265e-05, "loss": 1.0274, "step": 7300 }, { "epoch": 0.652355529742891, "grad_norm": 0.49535322189331055, "learning_rate": 2.847825471099227e-05, "loss": 0.9725, "step": 7301 }, { "epoch": 0.6524448812741528, "grad_norm": 0.46349287033081055, "learning_rate": 2.8465194030496872e-05, "loss": 0.9706, "step": 7302 }, { "epoch": 0.6525342328054147, "grad_norm": 0.5424239635467529, "learning_rate": 2.8452135153771e-05, "loss": 0.9002, "step": 7303 }, { "epoch": 0.6526235843366766, "grad_norm": 0.5158531069755554, "learning_rate": 2.843907808190849e-05, "loss": 0.8912, "step": 7304 }, { "epoch": 0.6527129358679384, "grad_norm": 0.44023188948631287, "learning_rate": 2.8426022816003012e-05, "loss": 1.0164, "step": 7305 }, { "epoch": 0.6528022873992003, "grad_norm": 0.6754381060600281, "learning_rate": 2.841296935714809e-05, "loss": 0.9589, "step": 7306 }, { "epoch": 0.6528916389304622, "grad_norm": 0.65952068567276, "learning_rate": 2.8399917706437074e-05, "loss": 0.8979, "step": 7307 }, { "epoch": 0.6529809904617241, "grad_norm": 0.431598037481308, "learning_rate": 2.8386867864963202e-05, "loss": 0.8975, "step": 7308 }, { "epoch": 0.6530703419929859, "grad_norm": 0.45453155040740967, "learning_rate": 2.8373819833819526e-05, "loss": 0.9543, "step": 7309 }, { "epoch": 0.6531596935242477, "grad_norm": 0.4382008910179138, "learning_rate": 2.836077361409897e-05, "loss": 1.0249, "step": 7310 }, { "epoch": 0.6532490450555096, "grad_norm": 0.4890137314796448, "learning_rate": 2.8347729206894268e-05, "loss": 0.9056, "step": 7311 }, { "epoch": 0.6533383965867715, "grad_norm": 0.4039430022239685, "learning_rate": 2.8334686613298034e-05, "loss": 0.995, "step": 7312 }, { "epoch": 0.6534277481180334, "grad_norm": 0.5166674256324768, "learning_rate": 2.8321645834402737e-05, "loss": 0.9533, "step": 7313 }, { "epoch": 0.6535170996492953, "grad_norm": 0.5235624313354492, "learning_rate": 2.8308606871300697e-05, "loss": 0.9465, "step": 7314 }, { "epoch": 0.6536064511805572, "grad_norm": 0.46239298582077026, "learning_rate": 2.8295569725084027e-05, "loss": 0.9373, "step": 7315 }, { "epoch": 0.6536958027118189, "grad_norm": 0.46263396739959717, "learning_rate": 2.828253439684474e-05, "loss": 0.9634, "step": 7316 }, { "epoch": 0.6537851542430808, "grad_norm": 0.6211736798286438, "learning_rate": 2.8269500887674687e-05, "loss": 0.9254, "step": 7317 }, { "epoch": 0.6538745057743427, "grad_norm": 0.5086511969566345, "learning_rate": 2.825646919866557e-05, "loss": 0.8853, "step": 7318 }, { "epoch": 0.6539638573056046, "grad_norm": 0.4071234464645386, "learning_rate": 2.8243439330908926e-05, "loss": 0.9643, "step": 7319 }, { "epoch": 0.6540532088368665, "grad_norm": 0.4825928807258606, "learning_rate": 2.8230411285496145e-05, "loss": 0.9184, "step": 7320 }, { "epoch": 0.6541425603681283, "grad_norm": 0.5398180484771729, "learning_rate": 2.8217385063518463e-05, "loss": 0.921, "step": 7321 }, { "epoch": 0.6542319118993902, "grad_norm": 0.5476343631744385, "learning_rate": 2.8204360666067e-05, "loss": 0.9528, "step": 7322 }, { "epoch": 0.654321263430652, "grad_norm": 0.4591327905654907, "learning_rate": 2.819133809423262e-05, "loss": 0.9868, "step": 7323 }, { "epoch": 0.6544106149619139, "grad_norm": 0.5206472277641296, "learning_rate": 2.8178317349106155e-05, "loss": 0.9039, "step": 7324 }, { "epoch": 0.6544999664931758, "grad_norm": 0.4743161201477051, "learning_rate": 2.8165298431778197e-05, "loss": 0.9459, "step": 7325 }, { "epoch": 0.6545893180244376, "grad_norm": 0.48798489570617676, "learning_rate": 2.8152281343339248e-05, "loss": 0.9241, "step": 7326 }, { "epoch": 0.6546786695556995, "grad_norm": 0.4876413643360138, "learning_rate": 2.8139266084879614e-05, "loss": 0.9567, "step": 7327 }, { "epoch": 0.6547680210869614, "grad_norm": 0.5455504059791565, "learning_rate": 2.812625265748946e-05, "loss": 1.0707, "step": 7328 }, { "epoch": 0.6548573726182233, "grad_norm": 0.4286678731441498, "learning_rate": 2.811324106225881e-05, "loss": 0.9682, "step": 7329 }, { "epoch": 0.6549467241494851, "grad_norm": 0.4500292241573334, "learning_rate": 2.8100231300277514e-05, "loss": 0.8929, "step": 7330 }, { "epoch": 0.655036075680747, "grad_norm": 0.4849509596824646, "learning_rate": 2.8087223372635286e-05, "loss": 1.0102, "step": 7331 }, { "epoch": 0.6551254272120088, "grad_norm": 0.4313583970069885, "learning_rate": 2.8074217280421688e-05, "loss": 0.9825, "step": 7332 }, { "epoch": 0.6552147787432707, "grad_norm": 0.485607773065567, "learning_rate": 2.8061213024726085e-05, "loss": 0.8919, "step": 7333 }, { "epoch": 0.6553041302745326, "grad_norm": 0.5385865569114685, "learning_rate": 2.8048210606637744e-05, "loss": 0.9059, "step": 7334 }, { "epoch": 0.6553934818057945, "grad_norm": 0.4551725387573242, "learning_rate": 2.803521002724575e-05, "loss": 0.9603, "step": 7335 }, { "epoch": 0.6554828333370563, "grad_norm": 0.5102852582931519, "learning_rate": 2.8022211287639044e-05, "loss": 0.8227, "step": 7336 }, { "epoch": 0.6555721848683181, "grad_norm": 0.5619865655899048, "learning_rate": 2.8009214388906414e-05, "loss": 0.9732, "step": 7337 }, { "epoch": 0.65566153639958, "grad_norm": 0.4476434886455536, "learning_rate": 2.7996219332136486e-05, "loss": 1.0222, "step": 7338 }, { "epoch": 0.6557508879308419, "grad_norm": 0.4480132758617401, "learning_rate": 2.7983226118417728e-05, "loss": 0.8978, "step": 7339 }, { "epoch": 0.6558402394621038, "grad_norm": 0.5554249286651611, "learning_rate": 2.7970234748838466e-05, "loss": 0.8385, "step": 7340 }, { "epoch": 0.6559295909933657, "grad_norm": 0.4294489622116089, "learning_rate": 2.7957245224486862e-05, "loss": 0.9374, "step": 7341 }, { "epoch": 0.6560189425246276, "grad_norm": 0.5135436058044434, "learning_rate": 2.7944257546450948e-05, "loss": 0.9616, "step": 7342 }, { "epoch": 0.6561082940558893, "grad_norm": 0.47493478655815125, "learning_rate": 2.793127171581854e-05, "loss": 0.867, "step": 7343 }, { "epoch": 0.6561976455871512, "grad_norm": 0.4486094117164612, "learning_rate": 2.7918287733677372e-05, "loss": 0.9386, "step": 7344 }, { "epoch": 0.6562869971184131, "grad_norm": 0.42966026067733765, "learning_rate": 2.7905305601114972e-05, "loss": 1.0198, "step": 7345 }, { "epoch": 0.656376348649675, "grad_norm": 0.4275778830051422, "learning_rate": 2.7892325319218744e-05, "loss": 1.0089, "step": 7346 }, { "epoch": 0.6564657001809369, "grad_norm": 0.4956458508968353, "learning_rate": 2.787934688907594e-05, "loss": 0.9616, "step": 7347 }, { "epoch": 0.6565550517121987, "grad_norm": 0.5669902563095093, "learning_rate": 2.7866370311773603e-05, "loss": 0.8564, "step": 7348 }, { "epoch": 0.6566444032434606, "grad_norm": 0.5185860991477966, "learning_rate": 2.7853395588398677e-05, "loss": 0.9535, "step": 7349 }, { "epoch": 0.6567337547747224, "grad_norm": 0.4602813124656677, "learning_rate": 2.784042272003794e-05, "loss": 0.9282, "step": 7350 }, { "epoch": 0.6568231063059843, "grad_norm": 0.42367443442344666, "learning_rate": 2.7827451707778007e-05, "loss": 0.9314, "step": 7351 }, { "epoch": 0.6569124578372462, "grad_norm": 0.40423354506492615, "learning_rate": 2.7814482552705346e-05, "loss": 1.036, "step": 7352 }, { "epoch": 0.657001809368508, "grad_norm": 0.4704086184501648, "learning_rate": 2.780151525590624e-05, "loss": 0.9161, "step": 7353 }, { "epoch": 0.6570911608997699, "grad_norm": 0.4368595778942108, "learning_rate": 2.7788549818466847e-05, "loss": 1.052, "step": 7354 }, { "epoch": 0.6571805124310318, "grad_norm": 0.5148953199386597, "learning_rate": 2.7775586241473173e-05, "loss": 0.9886, "step": 7355 }, { "epoch": 0.6572698639622937, "grad_norm": 0.43235522508621216, "learning_rate": 2.7762624526011038e-05, "loss": 0.8993, "step": 7356 }, { "epoch": 0.6573592154935555, "grad_norm": 0.5775102376937866, "learning_rate": 2.774966467316613e-05, "loss": 0.9632, "step": 7357 }, { "epoch": 0.6574485670248174, "grad_norm": 0.4877246022224426, "learning_rate": 2.7736706684023982e-05, "loss": 0.8856, "step": 7358 }, { "epoch": 0.6575379185560792, "grad_norm": 0.4451915919780731, "learning_rate": 2.772375055966996e-05, "loss": 0.9679, "step": 7359 }, { "epoch": 0.6576272700873411, "grad_norm": 0.4021294414997101, "learning_rate": 2.7710796301189268e-05, "loss": 0.9819, "step": 7360 }, { "epoch": 0.657716621618603, "grad_norm": 0.4669017791748047, "learning_rate": 2.7697843909666977e-05, "loss": 0.9477, "step": 7361 }, { "epoch": 0.6578059731498649, "grad_norm": 0.48270532488822937, "learning_rate": 2.7684893386188003e-05, "loss": 0.993, "step": 7362 }, { "epoch": 0.6578953246811268, "grad_norm": 0.4589235186576843, "learning_rate": 2.767194473183705e-05, "loss": 0.9158, "step": 7363 }, { "epoch": 0.6579846762123885, "grad_norm": 0.4602046012878418, "learning_rate": 2.765899794769873e-05, "loss": 0.9123, "step": 7364 }, { "epoch": 0.6580740277436504, "grad_norm": 0.4203275144100189, "learning_rate": 2.7646053034857457e-05, "loss": 0.9179, "step": 7365 }, { "epoch": 0.6581633792749123, "grad_norm": 0.464804083108902, "learning_rate": 2.7633109994397533e-05, "loss": 0.9164, "step": 7366 }, { "epoch": 0.6582527308061742, "grad_norm": 0.4711710810661316, "learning_rate": 2.762016882740305e-05, "loss": 0.9645, "step": 7367 }, { "epoch": 0.6583420823374361, "grad_norm": 0.5707644820213318, "learning_rate": 2.7607229534957984e-05, "loss": 0.9153, "step": 7368 }, { "epoch": 0.658431433868698, "grad_norm": 0.39793646335601807, "learning_rate": 2.7594292118146137e-05, "loss": 0.9331, "step": 7369 }, { "epoch": 0.6585207853999598, "grad_norm": 0.4051116108894348, "learning_rate": 2.7581356578051143e-05, "loss": 0.9709, "step": 7370 }, { "epoch": 0.6586101369312216, "grad_norm": 0.5276316404342651, "learning_rate": 2.756842291575651e-05, "loss": 0.9835, "step": 7371 }, { "epoch": 0.6586994884624835, "grad_norm": 0.4137079417705536, "learning_rate": 2.7555491132345557e-05, "loss": 0.9525, "step": 7372 }, { "epoch": 0.6587888399937454, "grad_norm": 0.5179648995399475, "learning_rate": 2.7542561228901485e-05, "loss": 1.0175, "step": 7373 }, { "epoch": 0.6588781915250073, "grad_norm": 0.5228958129882812, "learning_rate": 2.752963320650727e-05, "loss": 0.8682, "step": 7374 }, { "epoch": 0.6589675430562691, "grad_norm": 0.49394968152046204, "learning_rate": 2.7516707066245796e-05, "loss": 0.9222, "step": 7375 }, { "epoch": 0.659056894587531, "grad_norm": 0.5419045686721802, "learning_rate": 2.7503782809199753e-05, "loss": 0.8827, "step": 7376 }, { "epoch": 0.6591462461187929, "grad_norm": 0.4782697260379791, "learning_rate": 2.7490860436451692e-05, "loss": 0.9742, "step": 7377 }, { "epoch": 0.6592355976500547, "grad_norm": 0.521144688129425, "learning_rate": 2.7477939949084e-05, "loss": 0.8683, "step": 7378 }, { "epoch": 0.6593249491813166, "grad_norm": 0.4898735284805298, "learning_rate": 2.7465021348178903e-05, "loss": 0.9738, "step": 7379 }, { "epoch": 0.6594143007125784, "grad_norm": 0.5147672891616821, "learning_rate": 2.7452104634818497e-05, "loss": 0.8741, "step": 7380 }, { "epoch": 0.6595036522438403, "grad_norm": 0.5325078368186951, "learning_rate": 2.7439189810084655e-05, "loss": 0.9312, "step": 7381 }, { "epoch": 0.6595930037751022, "grad_norm": 0.45045092701911926, "learning_rate": 2.7426276875059143e-05, "loss": 0.9429, "step": 7382 }, { "epoch": 0.6596823553063641, "grad_norm": 0.45888474583625793, "learning_rate": 2.7413365830823557e-05, "loss": 0.9197, "step": 7383 }, { "epoch": 0.659771706837626, "grad_norm": 0.4505671262741089, "learning_rate": 2.7400456678459363e-05, "loss": 0.8995, "step": 7384 }, { "epoch": 0.6598610583688878, "grad_norm": 0.6128329634666443, "learning_rate": 2.7387549419047798e-05, "loss": 0.895, "step": 7385 }, { "epoch": 0.6599504099001496, "grad_norm": 0.44439369440078735, "learning_rate": 2.7374644053669997e-05, "loss": 0.9927, "step": 7386 }, { "epoch": 0.6600397614314115, "grad_norm": 0.520553469657898, "learning_rate": 2.7361740583406924e-05, "loss": 0.93, "step": 7387 }, { "epoch": 0.6601291129626734, "grad_norm": 0.5277306437492371, "learning_rate": 2.734883900933939e-05, "loss": 0.9059, "step": 7388 }, { "epoch": 0.6602184644939353, "grad_norm": 0.5034501552581787, "learning_rate": 2.7335939332548032e-05, "loss": 0.9507, "step": 7389 }, { "epoch": 0.6603078160251972, "grad_norm": 0.5344181656837463, "learning_rate": 2.7323041554113333e-05, "loss": 0.834, "step": 7390 }, { "epoch": 0.660397167556459, "grad_norm": 0.5650686621665955, "learning_rate": 2.731014567511562e-05, "loss": 0.9379, "step": 7391 }, { "epoch": 0.6604865190877208, "grad_norm": 0.5536824464797974, "learning_rate": 2.7297251696635074e-05, "loss": 0.9655, "step": 7392 }, { "epoch": 0.6605758706189827, "grad_norm": 0.6149418950080872, "learning_rate": 2.7284359619751704e-05, "loss": 0.9708, "step": 7393 }, { "epoch": 0.6606652221502446, "grad_norm": 0.5840936303138733, "learning_rate": 2.7271469445545327e-05, "loss": 0.9482, "step": 7394 }, { "epoch": 0.6607545736815065, "grad_norm": 0.42912179231643677, "learning_rate": 2.7258581175095654e-05, "loss": 0.9896, "step": 7395 }, { "epoch": 0.6608439252127684, "grad_norm": 0.44300276041030884, "learning_rate": 2.7245694809482214e-05, "loss": 0.9567, "step": 7396 }, { "epoch": 0.6609332767440302, "grad_norm": 0.482937753200531, "learning_rate": 2.7232810349784375e-05, "loss": 0.945, "step": 7397 }, { "epoch": 0.661022628275292, "grad_norm": 0.40860891342163086, "learning_rate": 2.721992779708136e-05, "loss": 0.914, "step": 7398 }, { "epoch": 0.6611119798065539, "grad_norm": 0.4908876121044159, "learning_rate": 2.7207047152452196e-05, "loss": 0.9459, "step": 7399 }, { "epoch": 0.6612013313378158, "grad_norm": 0.475569486618042, "learning_rate": 2.7194168416975797e-05, "loss": 0.9524, "step": 7400 }, { "epoch": 0.6612906828690777, "grad_norm": 0.44693323969841003, "learning_rate": 2.7181291591730883e-05, "loss": 0.886, "step": 7401 }, { "epoch": 0.6613800344003395, "grad_norm": 0.613734781742096, "learning_rate": 2.7168416677796028e-05, "loss": 0.9082, "step": 7402 }, { "epoch": 0.6614693859316014, "grad_norm": 0.43988481163978577, "learning_rate": 2.715554367624966e-05, "loss": 0.9724, "step": 7403 }, { "epoch": 0.6615587374628633, "grad_norm": 0.37471234798431396, "learning_rate": 2.7142672588170002e-05, "loss": 0.9339, "step": 7404 }, { "epoch": 0.6616480889941251, "grad_norm": 0.45363959670066833, "learning_rate": 2.712980341463515e-05, "loss": 0.9341, "step": 7405 }, { "epoch": 0.661737440525387, "grad_norm": 0.5071055293083191, "learning_rate": 2.711693615672305e-05, "loss": 0.9066, "step": 7406 }, { "epoch": 0.6618267920566488, "grad_norm": 0.4539679288864136, "learning_rate": 2.710407081551145e-05, "loss": 0.975, "step": 7407 }, { "epoch": 0.6619161435879107, "grad_norm": 0.5037643909454346, "learning_rate": 2.7091207392077977e-05, "loss": 0.918, "step": 7408 }, { "epoch": 0.6620054951191726, "grad_norm": 0.47487950325012207, "learning_rate": 2.707834588750008e-05, "loss": 0.8837, "step": 7409 }, { "epoch": 0.6620948466504345, "grad_norm": 0.40642839670181274, "learning_rate": 2.7065486302855037e-05, "loss": 0.9693, "step": 7410 }, { "epoch": 0.6621841981816964, "grad_norm": 0.4429019093513489, "learning_rate": 2.705262863921998e-05, "loss": 1.0066, "step": 7411 }, { "epoch": 0.6622735497129582, "grad_norm": 0.5285657644271851, "learning_rate": 2.703977289767188e-05, "loss": 0.925, "step": 7412 }, { "epoch": 0.66236290124422, "grad_norm": 0.533004105091095, "learning_rate": 2.7026919079287555e-05, "loss": 0.951, "step": 7413 }, { "epoch": 0.6624522527754819, "grad_norm": 0.5454312562942505, "learning_rate": 2.701406718514361e-05, "loss": 0.96, "step": 7414 }, { "epoch": 0.6625416043067438, "grad_norm": 0.4193102717399597, "learning_rate": 2.7001217216316553e-05, "loss": 0.9904, "step": 7415 }, { "epoch": 0.6626309558380057, "grad_norm": 0.43523266911506653, "learning_rate": 2.698836917388271e-05, "loss": 0.9572, "step": 7416 }, { "epoch": 0.6627203073692676, "grad_norm": 0.4869917035102844, "learning_rate": 2.6975523058918252e-05, "loss": 1.0183, "step": 7417 }, { "epoch": 0.6628096589005295, "grad_norm": 0.4953290820121765, "learning_rate": 2.6962678872499137e-05, "loss": 0.8993, "step": 7418 }, { "epoch": 0.6628990104317912, "grad_norm": 0.5153881907463074, "learning_rate": 2.6949836615701225e-05, "loss": 0.9066, "step": 7419 }, { "epoch": 0.6629883619630531, "grad_norm": 0.42058584094047546, "learning_rate": 2.6936996289600198e-05, "loss": 0.989, "step": 7420 }, { "epoch": 0.663077713494315, "grad_norm": 0.4563521146774292, "learning_rate": 2.6924157895271563e-05, "loss": 0.9595, "step": 7421 }, { "epoch": 0.6631670650255769, "grad_norm": 0.5149219036102295, "learning_rate": 2.6911321433790677e-05, "loss": 0.9232, "step": 7422 }, { "epoch": 0.6632564165568388, "grad_norm": 0.478681743144989, "learning_rate": 2.6898486906232746e-05, "loss": 0.9574, "step": 7423 }, { "epoch": 0.6633457680881006, "grad_norm": 0.4329462945461273, "learning_rate": 2.6885654313672763e-05, "loss": 0.9712, "step": 7424 }, { "epoch": 0.6634351196193625, "grad_norm": 0.5026130676269531, "learning_rate": 2.6872823657185614e-05, "loss": 0.9287, "step": 7425 }, { "epoch": 0.6635244711506243, "grad_norm": 0.49470841884613037, "learning_rate": 2.6859994937846e-05, "loss": 0.876, "step": 7426 }, { "epoch": 0.6636138226818862, "grad_norm": 0.49729084968566895, "learning_rate": 2.6847168156728463e-05, "loss": 0.8896, "step": 7427 }, { "epoch": 0.6637031742131481, "grad_norm": 0.4407115876674652, "learning_rate": 2.6834343314907384e-05, "loss": 0.9779, "step": 7428 }, { "epoch": 0.66379252574441, "grad_norm": 0.3972383737564087, "learning_rate": 2.682152041345699e-05, "loss": 0.9496, "step": 7429 }, { "epoch": 0.6638818772756718, "grad_norm": 0.42927953600883484, "learning_rate": 2.6808699453451313e-05, "loss": 0.9761, "step": 7430 }, { "epoch": 0.6639712288069337, "grad_norm": 0.37218815088272095, "learning_rate": 2.679588043596427e-05, "loss": 0.9326, "step": 7431 }, { "epoch": 0.6640605803381956, "grad_norm": 0.513047456741333, "learning_rate": 2.6783063362069573e-05, "loss": 0.9675, "step": 7432 }, { "epoch": 0.6641499318694574, "grad_norm": 0.4514136016368866, "learning_rate": 2.677024823284081e-05, "loss": 0.9793, "step": 7433 }, { "epoch": 0.6642392834007192, "grad_norm": 0.5317848324775696, "learning_rate": 2.6757435049351353e-05, "loss": 0.8597, "step": 7434 }, { "epoch": 0.6643286349319811, "grad_norm": 0.6172546148300171, "learning_rate": 2.6744623812674463e-05, "loss": 0.8547, "step": 7435 }, { "epoch": 0.664417986463243, "grad_norm": 0.45025524497032166, "learning_rate": 2.6731814523883202e-05, "loss": 0.9209, "step": 7436 }, { "epoch": 0.6645073379945049, "grad_norm": 0.4900471866130829, "learning_rate": 2.6719007184050504e-05, "loss": 0.8974, "step": 7437 }, { "epoch": 0.6645966895257668, "grad_norm": 0.4560643434524536, "learning_rate": 2.6706201794249108e-05, "loss": 0.9878, "step": 7438 }, { "epoch": 0.6646860410570287, "grad_norm": 0.41704943776130676, "learning_rate": 2.6693398355551613e-05, "loss": 1.011, "step": 7439 }, { "epoch": 0.6647753925882904, "grad_norm": 0.6909659504890442, "learning_rate": 2.668059686903043e-05, "loss": 0.905, "step": 7440 }, { "epoch": 0.6648647441195523, "grad_norm": 0.4692802429199219, "learning_rate": 2.6667797335757827e-05, "loss": 0.957, "step": 7441 }, { "epoch": 0.6649540956508142, "grad_norm": 0.47277992963790894, "learning_rate": 2.66549997568059e-05, "loss": 0.9593, "step": 7442 }, { "epoch": 0.6650434471820761, "grad_norm": 0.4334234595298767, "learning_rate": 2.6642204133246605e-05, "loss": 0.9594, "step": 7443 }, { "epoch": 0.665132798713338, "grad_norm": 0.43913981318473816, "learning_rate": 2.662941046615167e-05, "loss": 0.9695, "step": 7444 }, { "epoch": 0.6652221502445999, "grad_norm": 0.43409544229507446, "learning_rate": 2.661661875659272e-05, "loss": 0.9474, "step": 7445 }, { "epoch": 0.6653115017758617, "grad_norm": 0.5566879510879517, "learning_rate": 2.6603829005641202e-05, "loss": 0.9446, "step": 7446 }, { "epoch": 0.6654008533071235, "grad_norm": 0.503091037273407, "learning_rate": 2.6591041214368385e-05, "loss": 0.8959, "step": 7447 }, { "epoch": 0.6654902048383854, "grad_norm": 0.4617300033569336, "learning_rate": 2.6578255383845384e-05, "loss": 1.0043, "step": 7448 }, { "epoch": 0.6655795563696473, "grad_norm": 0.5025232434272766, "learning_rate": 2.6565471515143157e-05, "loss": 0.9259, "step": 7449 }, { "epoch": 0.6656689079009092, "grad_norm": 0.5556004047393799, "learning_rate": 2.6552689609332504e-05, "loss": 0.9494, "step": 7450 }, { "epoch": 0.665758259432171, "grad_norm": 0.5287407040596008, "learning_rate": 2.653990966748401e-05, "loss": 0.9567, "step": 7451 }, { "epoch": 0.6658476109634329, "grad_norm": 0.5063120126724243, "learning_rate": 2.652713169066815e-05, "loss": 0.8983, "step": 7452 }, { "epoch": 0.6659369624946948, "grad_norm": 0.4847102463245392, "learning_rate": 2.6514355679955205e-05, "loss": 0.9043, "step": 7453 }, { "epoch": 0.6660263140259566, "grad_norm": 0.4578264355659485, "learning_rate": 2.650158163641534e-05, "loss": 0.974, "step": 7454 }, { "epoch": 0.6661156655572185, "grad_norm": 0.481656014919281, "learning_rate": 2.648880956111846e-05, "loss": 0.8898, "step": 7455 }, { "epoch": 0.6662050170884803, "grad_norm": 0.47429630160331726, "learning_rate": 2.6476039455134393e-05, "loss": 0.972, "step": 7456 }, { "epoch": 0.6662943686197422, "grad_norm": 0.48434558510780334, "learning_rate": 2.6463271319532766e-05, "loss": 1.0256, "step": 7457 }, { "epoch": 0.6663837201510041, "grad_norm": 0.5073295831680298, "learning_rate": 2.645050515538306e-05, "loss": 0.8717, "step": 7458 }, { "epoch": 0.666473071682266, "grad_norm": 0.5684570670127869, "learning_rate": 2.643774096375456e-05, "loss": 0.8968, "step": 7459 }, { "epoch": 0.6665624232135279, "grad_norm": 0.44450321793556213, "learning_rate": 2.642497874571641e-05, "loss": 0.9466, "step": 7460 }, { "epoch": 0.6666517747447896, "grad_norm": 0.40622055530548096, "learning_rate": 2.6412218502337582e-05, "loss": 0.9863, "step": 7461 }, { "epoch": 0.6667411262760515, "grad_norm": 0.42449328303337097, "learning_rate": 2.6399460234686877e-05, "loss": 0.935, "step": 7462 }, { "epoch": 0.6668304778073134, "grad_norm": 0.5839447975158691, "learning_rate": 2.6386703943832947e-05, "loss": 0.9843, "step": 7463 }, { "epoch": 0.6669198293385753, "grad_norm": 0.5379756093025208, "learning_rate": 2.6373949630844287e-05, "loss": 0.88, "step": 7464 }, { "epoch": 0.6670091808698372, "grad_norm": 0.4397304058074951, "learning_rate": 2.6361197296789153e-05, "loss": 0.934, "step": 7465 }, { "epoch": 0.6670985324010991, "grad_norm": 0.5913897156715393, "learning_rate": 2.6348446942735716e-05, "loss": 0.9605, "step": 7466 }, { "epoch": 0.6671878839323608, "grad_norm": 0.4340857267379761, "learning_rate": 2.6335698569751956e-05, "loss": 0.9753, "step": 7467 }, { "epoch": 0.6672772354636227, "grad_norm": 0.41628775000572205, "learning_rate": 2.6322952178905692e-05, "loss": 0.9943, "step": 7468 }, { "epoch": 0.6673665869948846, "grad_norm": 0.5939970016479492, "learning_rate": 2.631020777126455e-05, "loss": 0.8947, "step": 7469 }, { "epoch": 0.6674559385261465, "grad_norm": 0.413511723279953, "learning_rate": 2.6297465347896026e-05, "loss": 0.9991, "step": 7470 }, { "epoch": 0.6675452900574084, "grad_norm": 0.48279207944869995, "learning_rate": 2.6284724909867432e-05, "loss": 0.9453, "step": 7471 }, { "epoch": 0.6676346415886703, "grad_norm": 0.4602436125278473, "learning_rate": 2.6271986458245912e-05, "loss": 0.9822, "step": 7472 }, { "epoch": 0.6677239931199321, "grad_norm": 0.4432784616947174, "learning_rate": 2.6259249994098455e-05, "loss": 0.9506, "step": 7473 }, { "epoch": 0.6678133446511939, "grad_norm": 0.5207743644714355, "learning_rate": 2.624651551849188e-05, "loss": 0.8715, "step": 7474 }, { "epoch": 0.6679026961824558, "grad_norm": 0.49934712052345276, "learning_rate": 2.623378303249281e-05, "loss": 0.9471, "step": 7475 }, { "epoch": 0.6679920477137177, "grad_norm": 0.5130594968795776, "learning_rate": 2.622105253716774e-05, "loss": 0.9037, "step": 7476 }, { "epoch": 0.6680813992449796, "grad_norm": 0.538914680480957, "learning_rate": 2.6208324033582986e-05, "loss": 0.9393, "step": 7477 }, { "epoch": 0.6681707507762414, "grad_norm": 0.5530039668083191, "learning_rate": 2.6195597522804692e-05, "loss": 0.8773, "step": 7478 }, { "epoch": 0.6682601023075033, "grad_norm": 0.4622054696083069, "learning_rate": 2.6182873005898845e-05, "loss": 0.9016, "step": 7479 }, { "epoch": 0.6683494538387652, "grad_norm": 0.43117639422416687, "learning_rate": 2.6170150483931257e-05, "loss": 0.9483, "step": 7480 }, { "epoch": 0.668438805370027, "grad_norm": 0.45902514457702637, "learning_rate": 2.6157429957967566e-05, "loss": 0.9158, "step": 7481 }, { "epoch": 0.6685281569012889, "grad_norm": 0.6419569849967957, "learning_rate": 2.6144711429073265e-05, "loss": 0.913, "step": 7482 }, { "epoch": 0.6686175084325507, "grad_norm": 0.38565734028816223, "learning_rate": 2.6131994898313684e-05, "loss": 0.9589, "step": 7483 }, { "epoch": 0.6687068599638126, "grad_norm": 0.43412715196609497, "learning_rate": 2.6119280366753917e-05, "loss": 0.9686, "step": 7484 }, { "epoch": 0.6687962114950745, "grad_norm": 0.4611121714115143, "learning_rate": 2.610656783545898e-05, "loss": 1.0003, "step": 7485 }, { "epoch": 0.6688855630263364, "grad_norm": 0.4799754321575165, "learning_rate": 2.6093857305493664e-05, "loss": 0.969, "step": 7486 }, { "epoch": 0.6689749145575983, "grad_norm": 0.556797444820404, "learning_rate": 2.6081148777922643e-05, "loss": 0.9443, "step": 7487 }, { "epoch": 0.66906426608886, "grad_norm": 0.4632977247238159, "learning_rate": 2.606844225381035e-05, "loss": 0.9519, "step": 7488 }, { "epoch": 0.6691536176201219, "grad_norm": 0.43375352025032043, "learning_rate": 2.6055737734221108e-05, "loss": 0.9369, "step": 7489 }, { "epoch": 0.6692429691513838, "grad_norm": 0.48179739713668823, "learning_rate": 2.604303522021906e-05, "loss": 0.8513, "step": 7490 }, { "epoch": 0.6693323206826457, "grad_norm": 0.45359307527542114, "learning_rate": 2.6030334712868177e-05, "loss": 1.0067, "step": 7491 }, { "epoch": 0.6694216722139076, "grad_norm": 0.4388836622238159, "learning_rate": 2.6017636213232255e-05, "loss": 0.9204, "step": 7492 }, { "epoch": 0.6695110237451695, "grad_norm": 0.44517624378204346, "learning_rate": 2.600493972237493e-05, "loss": 0.955, "step": 7493 }, { "epoch": 0.6696003752764313, "grad_norm": 0.5145146250724792, "learning_rate": 2.5992245241359702e-05, "loss": 0.8947, "step": 7494 }, { "epoch": 0.6696897268076931, "grad_norm": 0.6348103880882263, "learning_rate": 2.5979552771249814e-05, "loss": 0.975, "step": 7495 }, { "epoch": 0.669779078338955, "grad_norm": 0.4274086654186249, "learning_rate": 2.596686231310842e-05, "loss": 0.9465, "step": 7496 }, { "epoch": 0.6698684298702169, "grad_norm": 0.6270675659179688, "learning_rate": 2.595417386799849e-05, "loss": 0.8768, "step": 7497 }, { "epoch": 0.6699577814014788, "grad_norm": 0.5795266032218933, "learning_rate": 2.5941487436982803e-05, "loss": 0.8254, "step": 7498 }, { "epoch": 0.6700471329327407, "grad_norm": 0.4406552016735077, "learning_rate": 2.592880302112399e-05, "loss": 0.9277, "step": 7499 }, { "epoch": 0.6701364844640025, "grad_norm": 0.4358079135417938, "learning_rate": 2.5916120621484498e-05, "loss": 0.9509, "step": 7500 }, { "epoch": 0.6702258359952644, "grad_norm": 0.5085236430168152, "learning_rate": 2.590344023912663e-05, "loss": 0.8566, "step": 7501 }, { "epoch": 0.6703151875265262, "grad_norm": 0.5449510812759399, "learning_rate": 2.5890761875112485e-05, "loss": 0.9698, "step": 7502 }, { "epoch": 0.6704045390577881, "grad_norm": 0.49688923358917236, "learning_rate": 2.587808553050402e-05, "loss": 0.9613, "step": 7503 }, { "epoch": 0.67049389058905, "grad_norm": 0.43229207396507263, "learning_rate": 2.586541120636303e-05, "loss": 0.9708, "step": 7504 }, { "epoch": 0.6705832421203118, "grad_norm": 0.4369189739227295, "learning_rate": 2.5852738903751095e-05, "loss": 0.9745, "step": 7505 }, { "epoch": 0.6706725936515737, "grad_norm": 0.42944756150245667, "learning_rate": 2.5840068623729668e-05, "loss": 0.9261, "step": 7506 }, { "epoch": 0.6707619451828356, "grad_norm": 0.4990726113319397, "learning_rate": 2.5827400367360015e-05, "loss": 0.975, "step": 7507 }, { "epoch": 0.6708512967140975, "grad_norm": 0.4963349401950836, "learning_rate": 2.5814734135703245e-05, "loss": 0.9149, "step": 7508 }, { "epoch": 0.6709406482453593, "grad_norm": 0.4562934339046478, "learning_rate": 2.5802069929820294e-05, "loss": 1.0564, "step": 7509 }, { "epoch": 0.6710299997766211, "grad_norm": 0.4936973750591278, "learning_rate": 2.578940775077191e-05, "loss": 0.921, "step": 7510 }, { "epoch": 0.671119351307883, "grad_norm": 0.4688092768192291, "learning_rate": 2.5776747599618688e-05, "loss": 0.8868, "step": 7511 }, { "epoch": 0.6712087028391449, "grad_norm": 0.525898277759552, "learning_rate": 2.5764089477421067e-05, "loss": 0.8827, "step": 7512 }, { "epoch": 0.6712980543704068, "grad_norm": 0.5553557872772217, "learning_rate": 2.5751433385239288e-05, "loss": 0.9034, "step": 7513 }, { "epoch": 0.6713874059016687, "grad_norm": 0.5192905068397522, "learning_rate": 2.5738779324133445e-05, "loss": 0.9762, "step": 7514 }, { "epoch": 0.6714767574329306, "grad_norm": 0.45627352595329285, "learning_rate": 2.5726127295163428e-05, "loss": 0.9173, "step": 7515 }, { "epoch": 0.6715661089641923, "grad_norm": 0.5443771481513977, "learning_rate": 2.5713477299388987e-05, "loss": 0.9714, "step": 7516 }, { "epoch": 0.6716554604954542, "grad_norm": 0.4559047520160675, "learning_rate": 2.57008293378697e-05, "loss": 0.9483, "step": 7517 }, { "epoch": 0.6717448120267161, "grad_norm": 0.47910940647125244, "learning_rate": 2.568818341166496e-05, "loss": 0.9451, "step": 7518 }, { "epoch": 0.671834163557978, "grad_norm": 0.4128173589706421, "learning_rate": 2.5675539521834012e-05, "loss": 0.9631, "step": 7519 }, { "epoch": 0.6719235150892399, "grad_norm": 0.424180805683136, "learning_rate": 2.5662897669435925e-05, "loss": 0.9432, "step": 7520 }, { "epoch": 0.6720128666205017, "grad_norm": 0.4650351405143738, "learning_rate": 2.5650257855529558e-05, "loss": 0.9263, "step": 7521 }, { "epoch": 0.6721022181517636, "grad_norm": 0.5273159742355347, "learning_rate": 2.5637620081173642e-05, "loss": 0.8891, "step": 7522 }, { "epoch": 0.6721915696830254, "grad_norm": 0.43151742219924927, "learning_rate": 2.5624984347426727e-05, "loss": 0.9896, "step": 7523 }, { "epoch": 0.6722809212142873, "grad_norm": 0.4380139112472534, "learning_rate": 2.5612350655347195e-05, "loss": 1.0411, "step": 7524 }, { "epoch": 0.6723702727455492, "grad_norm": 0.49319833517074585, "learning_rate": 2.559971900599326e-05, "loss": 0.8419, "step": 7525 }, { "epoch": 0.672459624276811, "grad_norm": 0.44189539551734924, "learning_rate": 2.5587089400422938e-05, "loss": 0.9321, "step": 7526 }, { "epoch": 0.6725489758080729, "grad_norm": 0.4883861243724823, "learning_rate": 2.55744618396941e-05, "loss": 0.9207, "step": 7527 }, { "epoch": 0.6726383273393348, "grad_norm": 0.46611472964286804, "learning_rate": 2.5561836324864442e-05, "loss": 0.8813, "step": 7528 }, { "epoch": 0.6727276788705966, "grad_norm": 0.49315086007118225, "learning_rate": 2.554921285699148e-05, "loss": 0.8707, "step": 7529 }, { "epoch": 0.6728170304018585, "grad_norm": 0.47379156947135925, "learning_rate": 2.5536591437132563e-05, "loss": 0.9026, "step": 7530 }, { "epoch": 0.6729063819331204, "grad_norm": 0.4609242081642151, "learning_rate": 2.552397206634488e-05, "loss": 0.9346, "step": 7531 }, { "epoch": 0.6729957334643822, "grad_norm": 0.4267633855342865, "learning_rate": 2.5511354745685433e-05, "loss": 0.9378, "step": 7532 }, { "epoch": 0.6730850849956441, "grad_norm": 0.4892594516277313, "learning_rate": 2.5498739476211054e-05, "loss": 0.9893, "step": 7533 }, { "epoch": 0.673174436526906, "grad_norm": 0.6081541180610657, "learning_rate": 2.5486126258978427e-05, "loss": 0.9632, "step": 7534 }, { "epoch": 0.6732637880581679, "grad_norm": 0.49728408455848694, "learning_rate": 2.547351509504401e-05, "loss": 0.9476, "step": 7535 }, { "epoch": 0.6733531395894297, "grad_norm": 0.44689908623695374, "learning_rate": 2.5460905985464134e-05, "loss": 1.0134, "step": 7536 }, { "epoch": 0.6734424911206915, "grad_norm": 0.5165348052978516, "learning_rate": 2.544829893129495e-05, "loss": 0.9183, "step": 7537 }, { "epoch": 0.6735318426519534, "grad_norm": 0.45430701971054077, "learning_rate": 2.5435693933592432e-05, "loss": 0.9704, "step": 7538 }, { "epoch": 0.6736211941832153, "grad_norm": 0.4814325273036957, "learning_rate": 2.5423090993412383e-05, "loss": 0.9752, "step": 7539 }, { "epoch": 0.6737105457144772, "grad_norm": 0.5146889090538025, "learning_rate": 2.5410490111810435e-05, "loss": 0.9822, "step": 7540 }, { "epoch": 0.6737998972457391, "grad_norm": 0.47414878010749817, "learning_rate": 2.5397891289842052e-05, "loss": 1.0163, "step": 7541 }, { "epoch": 0.673889248777001, "grad_norm": 0.4566519558429718, "learning_rate": 2.5385294528562507e-05, "loss": 0.9218, "step": 7542 }, { "epoch": 0.6739786003082627, "grad_norm": 0.7057138681411743, "learning_rate": 2.537269982902692e-05, "loss": 0.8424, "step": 7543 }, { "epoch": 0.6740679518395246, "grad_norm": 0.5519537329673767, "learning_rate": 2.536010719229023e-05, "loss": 0.8996, "step": 7544 }, { "epoch": 0.6741573033707865, "grad_norm": 0.46758851408958435, "learning_rate": 2.5347516619407223e-05, "loss": 0.8998, "step": 7545 }, { "epoch": 0.6742466549020484, "grad_norm": 0.452288955450058, "learning_rate": 2.533492811143246e-05, "loss": 0.8949, "step": 7546 }, { "epoch": 0.6743360064333103, "grad_norm": 0.46302369236946106, "learning_rate": 2.532234166942038e-05, "loss": 0.9162, "step": 7547 }, { "epoch": 0.6744253579645721, "grad_norm": 0.46767961978912354, "learning_rate": 2.5309757294425222e-05, "loss": 0.9122, "step": 7548 }, { "epoch": 0.674514709495834, "grad_norm": 0.42991432547569275, "learning_rate": 2.5297174987501077e-05, "loss": 1.0339, "step": 7549 }, { "epoch": 0.6746040610270958, "grad_norm": 0.40119388699531555, "learning_rate": 2.528459474970184e-05, "loss": 1.0173, "step": 7550 }, { "epoch": 0.6746934125583577, "grad_norm": 0.4925364553928375, "learning_rate": 2.5272016582081236e-05, "loss": 0.9063, "step": 7551 }, { "epoch": 0.6747827640896196, "grad_norm": 0.4693504273891449, "learning_rate": 2.525944048569282e-05, "loss": 0.9513, "step": 7552 }, { "epoch": 0.6748721156208815, "grad_norm": 0.4908875524997711, "learning_rate": 2.524686646159001e-05, "loss": 0.9932, "step": 7553 }, { "epoch": 0.6749614671521433, "grad_norm": 0.5330108404159546, "learning_rate": 2.5234294510825957e-05, "loss": 0.925, "step": 7554 }, { "epoch": 0.6750508186834052, "grad_norm": 0.41894859075546265, "learning_rate": 2.5221724634453724e-05, "loss": 0.9464, "step": 7555 }, { "epoch": 0.6751401702146671, "grad_norm": 0.4790763556957245, "learning_rate": 2.5209156833526172e-05, "loss": 0.9183, "step": 7556 }, { "epoch": 0.6752295217459289, "grad_norm": 0.6181892156600952, "learning_rate": 2.5196591109096e-05, "loss": 0.8195, "step": 7557 }, { "epoch": 0.6753188732771908, "grad_norm": 0.4377688765525818, "learning_rate": 2.5184027462215686e-05, "loss": 0.9403, "step": 7558 }, { "epoch": 0.6754082248084526, "grad_norm": 0.4455429017543793, "learning_rate": 2.5171465893937602e-05, "loss": 0.9458, "step": 7559 }, { "epoch": 0.6754975763397145, "grad_norm": 0.451734334230423, "learning_rate": 2.51589064053139e-05, "loss": 0.9318, "step": 7560 }, { "epoch": 0.6755869278709764, "grad_norm": 0.39565378427505493, "learning_rate": 2.5146348997396567e-05, "loss": 1.0195, "step": 7561 }, { "epoch": 0.6756762794022383, "grad_norm": 0.5276391506195068, "learning_rate": 2.5133793671237433e-05, "loss": 0.9303, "step": 7562 }, { "epoch": 0.6757656309335002, "grad_norm": 0.5773701667785645, "learning_rate": 2.512124042788813e-05, "loss": 0.9137, "step": 7563 }, { "epoch": 0.6758549824647619, "grad_norm": 0.45947515964508057, "learning_rate": 2.5108689268400132e-05, "loss": 0.933, "step": 7564 }, { "epoch": 0.6759443339960238, "grad_norm": 0.4164651930332184, "learning_rate": 2.5096140193824748e-05, "loss": 0.9255, "step": 7565 }, { "epoch": 0.6760336855272857, "grad_norm": 0.5537623763084412, "learning_rate": 2.5083593205213063e-05, "loss": 0.8421, "step": 7566 }, { "epoch": 0.6761230370585476, "grad_norm": 0.412296861410141, "learning_rate": 2.5071048303616028e-05, "loss": 0.9733, "step": 7567 }, { "epoch": 0.6762123885898095, "grad_norm": 0.6003457307815552, "learning_rate": 2.5058505490084428e-05, "loss": 0.8458, "step": 7568 }, { "epoch": 0.6763017401210714, "grad_norm": 0.4704027771949768, "learning_rate": 2.504596476566885e-05, "loss": 0.9236, "step": 7569 }, { "epoch": 0.6763910916523332, "grad_norm": 0.40603572130203247, "learning_rate": 2.5033426131419714e-05, "loss": 0.9337, "step": 7570 }, { "epoch": 0.676480443183595, "grad_norm": 0.433475136756897, "learning_rate": 2.5020889588387266e-05, "loss": 0.9907, "step": 7571 }, { "epoch": 0.6765697947148569, "grad_norm": 0.44331127405166626, "learning_rate": 2.500835513762157e-05, "loss": 0.9091, "step": 7572 }, { "epoch": 0.6766591462461188, "grad_norm": 0.43762436509132385, "learning_rate": 2.4995822780172522e-05, "loss": 0.9437, "step": 7573 }, { "epoch": 0.6767484977773807, "grad_norm": 0.4718449115753174, "learning_rate": 2.4983292517089846e-05, "loss": 0.9747, "step": 7574 }, { "epoch": 0.6768378493086425, "grad_norm": 0.48513391613960266, "learning_rate": 2.4970764349423093e-05, "loss": 0.9207, "step": 7575 }, { "epoch": 0.6769272008399044, "grad_norm": 0.5163725018501282, "learning_rate": 2.49582382782216e-05, "loss": 0.915, "step": 7576 }, { "epoch": 0.6770165523711663, "grad_norm": 0.592780351638794, "learning_rate": 2.4945714304534585e-05, "loss": 0.7994, "step": 7577 }, { "epoch": 0.6771059039024281, "grad_norm": 0.44290387630462646, "learning_rate": 2.4933192429411052e-05, "loss": 0.9619, "step": 7578 }, { "epoch": 0.67719525543369, "grad_norm": 0.5108716487884521, "learning_rate": 2.4920672653899847e-05, "loss": 0.9351, "step": 7579 }, { "epoch": 0.6772846069649519, "grad_norm": 0.6224034428596497, "learning_rate": 2.490815497904963e-05, "loss": 0.9648, "step": 7580 }, { "epoch": 0.6773739584962137, "grad_norm": 0.4487568736076355, "learning_rate": 2.4895639405908894e-05, "loss": 0.9194, "step": 7581 }, { "epoch": 0.6774633100274756, "grad_norm": 0.5798953771591187, "learning_rate": 2.4883125935525953e-05, "loss": 0.8891, "step": 7582 }, { "epoch": 0.6775526615587375, "grad_norm": 0.5566365122795105, "learning_rate": 2.487061456894894e-05, "loss": 0.9089, "step": 7583 }, { "epoch": 0.6776420130899994, "grad_norm": 0.5041350722312927, "learning_rate": 2.485810530722582e-05, "loss": 1.0092, "step": 7584 }, { "epoch": 0.6777313646212612, "grad_norm": 0.47851383686065674, "learning_rate": 2.484559815140439e-05, "loss": 0.8815, "step": 7585 }, { "epoch": 0.677820716152523, "grad_norm": 0.5732426047325134, "learning_rate": 2.4833093102532222e-05, "loss": 0.9326, "step": 7586 }, { "epoch": 0.6779100676837849, "grad_norm": 0.5327209234237671, "learning_rate": 2.482059016165677e-05, "loss": 0.8568, "step": 7587 }, { "epoch": 0.6779994192150468, "grad_norm": 0.5125129222869873, "learning_rate": 2.4808089329825286e-05, "loss": 0.897, "step": 7588 }, { "epoch": 0.6780887707463087, "grad_norm": 0.4826566278934479, "learning_rate": 2.479559060808484e-05, "loss": 0.9773, "step": 7589 }, { "epoch": 0.6781781222775706, "grad_norm": 0.5305161476135254, "learning_rate": 2.4783093997482364e-05, "loss": 0.8638, "step": 7590 }, { "epoch": 0.6782674738088323, "grad_norm": 0.4348098635673523, "learning_rate": 2.477059949906454e-05, "loss": 0.9555, "step": 7591 }, { "epoch": 0.6783568253400942, "grad_norm": 0.4758349061012268, "learning_rate": 2.4758107113877934e-05, "loss": 1.0283, "step": 7592 }, { "epoch": 0.6784461768713561, "grad_norm": 0.5508620142936707, "learning_rate": 2.474561684296891e-05, "loss": 0.8579, "step": 7593 }, { "epoch": 0.678535528402618, "grad_norm": 0.5436617732048035, "learning_rate": 2.4733128687383678e-05, "loss": 0.9722, "step": 7594 }, { "epoch": 0.6786248799338799, "grad_norm": 0.42772892117500305, "learning_rate": 2.4720642648168256e-05, "loss": 0.9366, "step": 7595 }, { "epoch": 0.6787142314651418, "grad_norm": 0.6332234740257263, "learning_rate": 2.4708158726368452e-05, "loss": 0.8871, "step": 7596 }, { "epoch": 0.6788035829964036, "grad_norm": 0.43669572472572327, "learning_rate": 2.4695676923029952e-05, "loss": 0.9658, "step": 7597 }, { "epoch": 0.6788929345276654, "grad_norm": 0.492891401052475, "learning_rate": 2.468319723919823e-05, "loss": 0.9739, "step": 7598 }, { "epoch": 0.6789822860589273, "grad_norm": 0.4240998923778534, "learning_rate": 2.4670719675918597e-05, "loss": 0.9061, "step": 7599 }, { "epoch": 0.6790716375901892, "grad_norm": 0.4478083848953247, "learning_rate": 2.465824423423618e-05, "loss": 1.009, "step": 7600 }, { "epoch": 0.6791609891214511, "grad_norm": 0.4915962815284729, "learning_rate": 2.4645770915195937e-05, "loss": 0.9018, "step": 7601 }, { "epoch": 0.679250340652713, "grad_norm": 0.42449894547462463, "learning_rate": 2.4633299719842633e-05, "loss": 0.985, "step": 7602 }, { "epoch": 0.6793396921839748, "grad_norm": 0.44549936056137085, "learning_rate": 2.4620830649220873e-05, "loss": 0.9358, "step": 7603 }, { "epoch": 0.6794290437152367, "grad_norm": 0.47829920053482056, "learning_rate": 2.460836370437506e-05, "loss": 0.9446, "step": 7604 }, { "epoch": 0.6795183952464985, "grad_norm": 0.5621015429496765, "learning_rate": 2.4595898886349466e-05, "loss": 0.9252, "step": 7605 }, { "epoch": 0.6796077467777604, "grad_norm": 0.49867716431617737, "learning_rate": 2.458343619618811e-05, "loss": 0.9293, "step": 7606 }, { "epoch": 0.6796970983090223, "grad_norm": 0.4213707447052002, "learning_rate": 2.4570975634934888e-05, "loss": 0.9489, "step": 7607 }, { "epoch": 0.6797864498402841, "grad_norm": 0.48737624287605286, "learning_rate": 2.455851720363352e-05, "loss": 0.9583, "step": 7608 }, { "epoch": 0.679875801371546, "grad_norm": 0.45338955521583557, "learning_rate": 2.4546060903327512e-05, "loss": 0.9993, "step": 7609 }, { "epoch": 0.6799651529028079, "grad_norm": 0.509468138217926, "learning_rate": 2.453360673506023e-05, "loss": 0.8476, "step": 7610 }, { "epoch": 0.6800545044340698, "grad_norm": 0.4940629303455353, "learning_rate": 2.4521154699874833e-05, "loss": 0.9198, "step": 7611 }, { "epoch": 0.6801438559653316, "grad_norm": 0.41363492608070374, "learning_rate": 2.450870479881432e-05, "loss": 0.9574, "step": 7612 }, { "epoch": 0.6802332074965934, "grad_norm": 0.4342886805534363, "learning_rate": 2.4496257032921494e-05, "loss": 0.9672, "step": 7613 }, { "epoch": 0.6803225590278553, "grad_norm": 0.5025731921195984, "learning_rate": 2.4483811403238987e-05, "loss": 0.9102, "step": 7614 }, { "epoch": 0.6804119105591172, "grad_norm": 0.5143190622329712, "learning_rate": 2.4471367910809284e-05, "loss": 0.9767, "step": 7615 }, { "epoch": 0.6805012620903791, "grad_norm": 0.442281574010849, "learning_rate": 2.4458926556674615e-05, "loss": 0.9579, "step": 7616 }, { "epoch": 0.680590613621641, "grad_norm": 0.4672459065914154, "learning_rate": 2.4446487341877095e-05, "loss": 0.9006, "step": 7617 }, { "epoch": 0.6806799651529029, "grad_norm": 0.4161359667778015, "learning_rate": 2.4434050267458636e-05, "loss": 0.9494, "step": 7618 }, { "epoch": 0.6807693166841646, "grad_norm": 0.4646640717983246, "learning_rate": 2.4421615334460986e-05, "loss": 0.9457, "step": 7619 }, { "epoch": 0.6808586682154265, "grad_norm": 0.45113250613212585, "learning_rate": 2.4409182543925698e-05, "loss": 0.9247, "step": 7620 }, { "epoch": 0.6809480197466884, "grad_norm": 0.4302740693092346, "learning_rate": 2.4396751896894144e-05, "loss": 0.9624, "step": 7621 }, { "epoch": 0.6810373712779503, "grad_norm": 0.42872077226638794, "learning_rate": 2.438432339440753e-05, "loss": 0.9573, "step": 7622 }, { "epoch": 0.6811267228092122, "grad_norm": 0.3714028596878052, "learning_rate": 2.43718970375069e-05, "loss": 0.9537, "step": 7623 }, { "epoch": 0.681216074340474, "grad_norm": 0.42748263478279114, "learning_rate": 2.435947282723305e-05, "loss": 0.8772, "step": 7624 }, { "epoch": 0.6813054258717359, "grad_norm": 0.42798498272895813, "learning_rate": 2.4347050764626656e-05, "loss": 0.9627, "step": 7625 }, { "epoch": 0.6813947774029977, "grad_norm": 0.5037773251533508, "learning_rate": 2.43346308507282e-05, "loss": 0.9368, "step": 7626 }, { "epoch": 0.6814841289342596, "grad_norm": 0.49071839451789856, "learning_rate": 2.432221308657799e-05, "loss": 0.9451, "step": 7627 }, { "epoch": 0.6815734804655215, "grad_norm": 0.519668459892273, "learning_rate": 2.430979747321615e-05, "loss": 0.8967, "step": 7628 }, { "epoch": 0.6816628319967833, "grad_norm": 0.389871746301651, "learning_rate": 2.4297384011682595e-05, "loss": 0.9651, "step": 7629 }, { "epoch": 0.6817521835280452, "grad_norm": 0.5641822814941406, "learning_rate": 2.42849727030171e-05, "loss": 0.8365, "step": 7630 }, { "epoch": 0.6818415350593071, "grad_norm": 0.4539962410926819, "learning_rate": 2.427256354825924e-05, "loss": 0.9822, "step": 7631 }, { "epoch": 0.681930886590569, "grad_norm": 0.4748673439025879, "learning_rate": 2.4260156548448427e-05, "loss": 0.9252, "step": 7632 }, { "epoch": 0.6820202381218308, "grad_norm": 0.4344809353351593, "learning_rate": 2.424775170462386e-05, "loss": 0.9967, "step": 7633 }, { "epoch": 0.6821095896530927, "grad_norm": 0.4680819511413574, "learning_rate": 2.4235349017824588e-05, "loss": 0.9189, "step": 7634 }, { "epoch": 0.6821989411843545, "grad_norm": 0.47244375944137573, "learning_rate": 2.422294848908947e-05, "loss": 0.9564, "step": 7635 }, { "epoch": 0.6822882927156164, "grad_norm": 0.47211724519729614, "learning_rate": 2.4210550119457197e-05, "loss": 1.0198, "step": 7636 }, { "epoch": 0.6823776442468783, "grad_norm": 0.4619308114051819, "learning_rate": 2.419815390996623e-05, "loss": 0.9311, "step": 7637 }, { "epoch": 0.6824669957781402, "grad_norm": 0.5194936394691467, "learning_rate": 2.41857598616549e-05, "loss": 0.9223, "step": 7638 }, { "epoch": 0.6825563473094021, "grad_norm": 0.4341084063053131, "learning_rate": 2.4173367975561345e-05, "loss": 0.9385, "step": 7639 }, { "epoch": 0.6826456988406638, "grad_norm": 0.5083869695663452, "learning_rate": 2.416097825272351e-05, "loss": 0.9852, "step": 7640 }, { "epoch": 0.6827350503719257, "grad_norm": 0.6154478192329407, "learning_rate": 2.4148590694179168e-05, "loss": 0.8429, "step": 7641 }, { "epoch": 0.6828244019031876, "grad_norm": 0.49730563163757324, "learning_rate": 2.413620530096592e-05, "loss": 0.9575, "step": 7642 }, { "epoch": 0.6829137534344495, "grad_norm": 0.5614471435546875, "learning_rate": 2.412382207412116e-05, "loss": 0.8966, "step": 7643 }, { "epoch": 0.6830031049657114, "grad_norm": 0.4349358081817627, "learning_rate": 2.4111441014682123e-05, "loss": 0.9805, "step": 7644 }, { "epoch": 0.6830924564969733, "grad_norm": 0.40343138575553894, "learning_rate": 2.4099062123685852e-05, "loss": 0.9149, "step": 7645 }, { "epoch": 0.6831818080282351, "grad_norm": 0.4427891969680786, "learning_rate": 2.4086685402169234e-05, "loss": 0.9948, "step": 7646 }, { "epoch": 0.6832711595594969, "grad_norm": 0.5709788203239441, "learning_rate": 2.407431085116891e-05, "loss": 0.946, "step": 7647 }, { "epoch": 0.6833605110907588, "grad_norm": 0.4347441494464874, "learning_rate": 2.4061938471721395e-05, "loss": 1.0256, "step": 7648 }, { "epoch": 0.6834498626220207, "grad_norm": 0.509070098400116, "learning_rate": 2.4049568264863022e-05, "loss": 0.885, "step": 7649 }, { "epoch": 0.6835392141532826, "grad_norm": 0.5047274231910706, "learning_rate": 2.403720023162991e-05, "loss": 0.9298, "step": 7650 }, { "epoch": 0.6836285656845444, "grad_norm": 0.4579404890537262, "learning_rate": 2.4024834373058023e-05, "loss": 0.9002, "step": 7651 }, { "epoch": 0.6837179172158063, "grad_norm": 0.4215395748615265, "learning_rate": 2.4012470690183136e-05, "loss": 1.0175, "step": 7652 }, { "epoch": 0.6838072687470681, "grad_norm": 0.4573095738887787, "learning_rate": 2.4000109184040837e-05, "loss": 0.9326, "step": 7653 }, { "epoch": 0.68389662027833, "grad_norm": 0.4098723530769348, "learning_rate": 2.3987749855666532e-05, "loss": 1.0006, "step": 7654 }, { "epoch": 0.6839859718095919, "grad_norm": 0.4912080764770508, "learning_rate": 2.3975392706095446e-05, "loss": 0.8999, "step": 7655 }, { "epoch": 0.6840753233408537, "grad_norm": 0.4591172933578491, "learning_rate": 2.3963037736362643e-05, "loss": 0.9086, "step": 7656 }, { "epoch": 0.6841646748721156, "grad_norm": 0.4539848864078522, "learning_rate": 2.3950684947502944e-05, "loss": 0.9633, "step": 7657 }, { "epoch": 0.6842540264033775, "grad_norm": 0.462184876203537, "learning_rate": 2.3938334340551044e-05, "loss": 0.9912, "step": 7658 }, { "epoch": 0.6843433779346394, "grad_norm": 0.46925488114356995, "learning_rate": 2.3925985916541443e-05, "loss": 0.9211, "step": 7659 }, { "epoch": 0.6844327294659012, "grad_norm": 0.5577223300933838, "learning_rate": 2.3913639676508472e-05, "loss": 0.8373, "step": 7660 }, { "epoch": 0.684522080997163, "grad_norm": 0.42695876955986023, "learning_rate": 2.390129562148622e-05, "loss": 0.8683, "step": 7661 }, { "epoch": 0.6846114325284249, "grad_norm": 0.5081654191017151, "learning_rate": 2.3888953752508647e-05, "loss": 0.8314, "step": 7662 }, { "epoch": 0.6847007840596868, "grad_norm": 0.6528906226158142, "learning_rate": 2.387661407060952e-05, "loss": 0.9019, "step": 7663 }, { "epoch": 0.6847901355909487, "grad_norm": 0.3996724784374237, "learning_rate": 2.3864276576822426e-05, "loss": 0.9953, "step": 7664 }, { "epoch": 0.6848794871222106, "grad_norm": 0.4197225868701935, "learning_rate": 2.385194127218075e-05, "loss": 1.0005, "step": 7665 }, { "epoch": 0.6849688386534725, "grad_norm": 0.51161789894104, "learning_rate": 2.3839608157717734e-05, "loss": 0.9327, "step": 7666 }, { "epoch": 0.6850581901847342, "grad_norm": 0.4613009989261627, "learning_rate": 2.3827277234466362e-05, "loss": 1.0265, "step": 7667 }, { "epoch": 0.6851475417159961, "grad_norm": 0.4957435429096222, "learning_rate": 2.3814948503459507e-05, "loss": 0.9579, "step": 7668 }, { "epoch": 0.685236893247258, "grad_norm": 0.5043631792068481, "learning_rate": 2.380262196572982e-05, "loss": 0.9527, "step": 7669 }, { "epoch": 0.6853262447785199, "grad_norm": 0.5093596577644348, "learning_rate": 2.3790297622309794e-05, "loss": 0.9432, "step": 7670 }, { "epoch": 0.6854155963097818, "grad_norm": 0.433877557516098, "learning_rate": 2.3777975474231718e-05, "loss": 0.8894, "step": 7671 }, { "epoch": 0.6855049478410437, "grad_norm": 0.558192253112793, "learning_rate": 2.3765655522527695e-05, "loss": 0.9565, "step": 7672 }, { "epoch": 0.6855942993723055, "grad_norm": 0.4382516145706177, "learning_rate": 2.3753337768229667e-05, "loss": 0.9967, "step": 7673 }, { "epoch": 0.6856836509035673, "grad_norm": 0.5013807415962219, "learning_rate": 2.374102221236937e-05, "loss": 0.9572, "step": 7674 }, { "epoch": 0.6857730024348292, "grad_norm": 0.43265098333358765, "learning_rate": 2.372870885597836e-05, "loss": 0.961, "step": 7675 }, { "epoch": 0.6858623539660911, "grad_norm": 0.4570571780204773, "learning_rate": 2.371639770008804e-05, "loss": 0.9556, "step": 7676 }, { "epoch": 0.685951705497353, "grad_norm": 0.4653104841709137, "learning_rate": 2.370408874572955e-05, "loss": 0.9336, "step": 7677 }, { "epoch": 0.6860410570286148, "grad_norm": 0.5548616051673889, "learning_rate": 2.3691781993933926e-05, "loss": 1.0193, "step": 7678 }, { "epoch": 0.6861304085598767, "grad_norm": 0.41967645287513733, "learning_rate": 2.3679477445731986e-05, "loss": 0.9393, "step": 7679 }, { "epoch": 0.6862197600911386, "grad_norm": 0.4959544241428375, "learning_rate": 2.366717510215436e-05, "loss": 0.8876, "step": 7680 }, { "epoch": 0.6863091116224004, "grad_norm": 0.45173823833465576, "learning_rate": 2.3654874964231518e-05, "loss": 0.9034, "step": 7681 }, { "epoch": 0.6863984631536623, "grad_norm": 0.4388938248157501, "learning_rate": 2.3642577032993705e-05, "loss": 0.8964, "step": 7682 }, { "epoch": 0.6864878146849241, "grad_norm": 0.42833590507507324, "learning_rate": 2.363028130947102e-05, "loss": 0.9542, "step": 7683 }, { "epoch": 0.686577166216186, "grad_norm": 0.556098997592926, "learning_rate": 2.361798779469336e-05, "loss": 0.9357, "step": 7684 }, { "epoch": 0.6866665177474479, "grad_norm": 0.5818011164665222, "learning_rate": 2.3605696489690427e-05, "loss": 0.8631, "step": 7685 }, { "epoch": 0.6867558692787098, "grad_norm": 0.40382498502731323, "learning_rate": 2.3593407395491778e-05, "loss": 0.9778, "step": 7686 }, { "epoch": 0.6868452208099717, "grad_norm": 0.44755667448043823, "learning_rate": 2.358112051312672e-05, "loss": 0.9621, "step": 7687 }, { "epoch": 0.6869345723412335, "grad_norm": 0.5690574049949646, "learning_rate": 2.3568835843624422e-05, "loss": 0.897, "step": 7688 }, { "epoch": 0.6870239238724953, "grad_norm": 0.5574625134468079, "learning_rate": 2.3556553388013852e-05, "loss": 0.9749, "step": 7689 }, { "epoch": 0.6871132754037572, "grad_norm": 0.4903115928173065, "learning_rate": 2.3544273147323807e-05, "loss": 0.9872, "step": 7690 }, { "epoch": 0.6872026269350191, "grad_norm": 0.4215288460254669, "learning_rate": 2.3531995122582883e-05, "loss": 0.9412, "step": 7691 }, { "epoch": 0.687291978466281, "grad_norm": 0.5319769978523254, "learning_rate": 2.3519719314819493e-05, "loss": 0.9143, "step": 7692 }, { "epoch": 0.6873813299975429, "grad_norm": 0.5102341175079346, "learning_rate": 2.3507445725061895e-05, "loss": 0.8844, "step": 7693 }, { "epoch": 0.6874706815288048, "grad_norm": 0.6138080954551697, "learning_rate": 2.3495174354338084e-05, "loss": 0.9267, "step": 7694 }, { "epoch": 0.6875600330600665, "grad_norm": 0.4891884922981262, "learning_rate": 2.348290520367595e-05, "loss": 0.9271, "step": 7695 }, { "epoch": 0.6876493845913284, "grad_norm": 0.44753071665763855, "learning_rate": 2.3470638274103147e-05, "loss": 0.9276, "step": 7696 }, { "epoch": 0.6877387361225903, "grad_norm": 0.44476214051246643, "learning_rate": 2.3458373566647174e-05, "loss": 0.9433, "step": 7697 }, { "epoch": 0.6878280876538522, "grad_norm": 0.39918237924575806, "learning_rate": 2.344611108233535e-05, "loss": 0.9188, "step": 7698 }, { "epoch": 0.687917439185114, "grad_norm": 0.4474645256996155, "learning_rate": 2.343385082219475e-05, "loss": 0.95, "step": 7699 }, { "epoch": 0.6880067907163759, "grad_norm": 0.455310195684433, "learning_rate": 2.342159278725231e-05, "loss": 0.9567, "step": 7700 }, { "epoch": 0.6880961422476378, "grad_norm": 0.5603063702583313, "learning_rate": 2.3409336978534783e-05, "loss": 0.8424, "step": 7701 }, { "epoch": 0.6881854937788996, "grad_norm": 0.4089902937412262, "learning_rate": 2.3397083397068724e-05, "loss": 0.9721, "step": 7702 }, { "epoch": 0.6882748453101615, "grad_norm": 0.563758373260498, "learning_rate": 2.3384832043880495e-05, "loss": 0.9647, "step": 7703 }, { "epoch": 0.6883641968414234, "grad_norm": 0.4580053389072418, "learning_rate": 2.337258291999628e-05, "loss": 1.037, "step": 7704 }, { "epoch": 0.6884535483726852, "grad_norm": 0.5256210565567017, "learning_rate": 2.336033602644207e-05, "loss": 0.9317, "step": 7705 }, { "epoch": 0.6885428999039471, "grad_norm": 0.5540886521339417, "learning_rate": 2.3348091364243703e-05, "loss": 0.9263, "step": 7706 }, { "epoch": 0.688632251435209, "grad_norm": 0.4322797954082489, "learning_rate": 2.3335848934426746e-05, "loss": 0.9876, "step": 7707 }, { "epoch": 0.6887216029664709, "grad_norm": 0.49090680480003357, "learning_rate": 2.3323608738016663e-05, "loss": 0.9329, "step": 7708 }, { "epoch": 0.6888109544977327, "grad_norm": 0.5516027808189392, "learning_rate": 2.3311370776038698e-05, "loss": 1.0222, "step": 7709 }, { "epoch": 0.6889003060289945, "grad_norm": 0.4046795666217804, "learning_rate": 2.3299135049517913e-05, "loss": 0.9255, "step": 7710 }, { "epoch": 0.6889896575602564, "grad_norm": 0.5095672607421875, "learning_rate": 2.3286901559479175e-05, "loss": 0.9252, "step": 7711 }, { "epoch": 0.6890790090915183, "grad_norm": 0.5826500058174133, "learning_rate": 2.3274670306947173e-05, "loss": 0.7873, "step": 7712 }, { "epoch": 0.6891683606227802, "grad_norm": 0.5110095143318176, "learning_rate": 2.3262441292946407e-05, "loss": 0.9253, "step": 7713 }, { "epoch": 0.6892577121540421, "grad_norm": 0.4405474364757538, "learning_rate": 2.3250214518501184e-05, "loss": 0.9763, "step": 7714 }, { "epoch": 0.689347063685304, "grad_norm": 0.42986834049224854, "learning_rate": 2.3237989984635628e-05, "loss": 0.9512, "step": 7715 }, { "epoch": 0.6894364152165657, "grad_norm": 0.42974916100502014, "learning_rate": 2.3225767692373686e-05, "loss": 0.9491, "step": 7716 }, { "epoch": 0.6895257667478276, "grad_norm": 0.41702863574028015, "learning_rate": 2.3213547642739082e-05, "loss": 0.9853, "step": 7717 }, { "epoch": 0.6896151182790895, "grad_norm": 0.45753413438796997, "learning_rate": 2.3201329836755382e-05, "loss": 0.9201, "step": 7718 }, { "epoch": 0.6897044698103514, "grad_norm": 0.6328713297843933, "learning_rate": 2.3189114275445963e-05, "loss": 0.8626, "step": 7719 }, { "epoch": 0.6897938213416133, "grad_norm": 0.4760781228542328, "learning_rate": 2.3176900959834004e-05, "loss": 0.9065, "step": 7720 }, { "epoch": 0.6898831728728752, "grad_norm": 0.5676954388618469, "learning_rate": 2.3164689890942504e-05, "loss": 0.9782, "step": 7721 }, { "epoch": 0.6899725244041369, "grad_norm": 0.4092563986778259, "learning_rate": 2.315248106979427e-05, "loss": 1.0402, "step": 7722 }, { "epoch": 0.6900618759353988, "grad_norm": 0.4689728915691376, "learning_rate": 2.3140274497411918e-05, "loss": 0.9736, "step": 7723 }, { "epoch": 0.6901512274666607, "grad_norm": 0.5073100924491882, "learning_rate": 2.3128070174817884e-05, "loss": 0.9342, "step": 7724 }, { "epoch": 0.6902405789979226, "grad_norm": 0.48991286754608154, "learning_rate": 2.311586810303441e-05, "loss": 0.9554, "step": 7725 }, { "epoch": 0.6903299305291845, "grad_norm": 0.45478180050849915, "learning_rate": 2.3103668283083564e-05, "loss": 0.9772, "step": 7726 }, { "epoch": 0.6904192820604463, "grad_norm": 0.5075895190238953, "learning_rate": 2.3091470715987167e-05, "loss": 0.8529, "step": 7727 }, { "epoch": 0.6905086335917082, "grad_norm": 0.5047464966773987, "learning_rate": 2.307927540276693e-05, "loss": 0.9655, "step": 7728 }, { "epoch": 0.69059798512297, "grad_norm": 0.47214803099632263, "learning_rate": 2.306708234444433e-05, "loss": 0.9132, "step": 7729 }, { "epoch": 0.6906873366542319, "grad_norm": 0.4735872745513916, "learning_rate": 2.305489154204067e-05, "loss": 0.8661, "step": 7730 }, { "epoch": 0.6907766881854938, "grad_norm": 0.4646959900856018, "learning_rate": 2.304270299657707e-05, "loss": 0.9096, "step": 7731 }, { "epoch": 0.6908660397167556, "grad_norm": 0.45085829496383667, "learning_rate": 2.3030516709074424e-05, "loss": 0.9554, "step": 7732 }, { "epoch": 0.6909553912480175, "grad_norm": 0.47358912229537964, "learning_rate": 2.3018332680553477e-05, "loss": 0.9515, "step": 7733 }, { "epoch": 0.6910447427792794, "grad_norm": 0.4590679407119751, "learning_rate": 2.3006150912034774e-05, "loss": 0.9225, "step": 7734 }, { "epoch": 0.6911340943105413, "grad_norm": 0.5170496702194214, "learning_rate": 2.2993971404538668e-05, "loss": 0.8962, "step": 7735 }, { "epoch": 0.6912234458418031, "grad_norm": 0.4720090627670288, "learning_rate": 2.298179415908531e-05, "loss": 0.9482, "step": 7736 }, { "epoch": 0.691312797373065, "grad_norm": 0.572025716304779, "learning_rate": 2.296961917669471e-05, "loss": 0.8859, "step": 7737 }, { "epoch": 0.6914021489043268, "grad_norm": 0.4607776403427124, "learning_rate": 2.295744645838661e-05, "loss": 0.9991, "step": 7738 }, { "epoch": 0.6914915004355887, "grad_norm": 0.5466684699058533, "learning_rate": 2.2945276005180623e-05, "loss": 0.896, "step": 7739 }, { "epoch": 0.6915808519668506, "grad_norm": 0.48179617524147034, "learning_rate": 2.293310781809615e-05, "loss": 0.9066, "step": 7740 }, { "epoch": 0.6916702034981125, "grad_norm": 0.4328363239765167, "learning_rate": 2.292094189815241e-05, "loss": 0.9941, "step": 7741 }, { "epoch": 0.6917595550293744, "grad_norm": 0.4751478135585785, "learning_rate": 2.290877824636843e-05, "loss": 0.9366, "step": 7742 }, { "epoch": 0.6918489065606361, "grad_norm": 0.47883662581443787, "learning_rate": 2.2896616863763038e-05, "loss": 1.0028, "step": 7743 }, { "epoch": 0.691938258091898, "grad_norm": 0.5021872520446777, "learning_rate": 2.2884457751354887e-05, "loss": 0.9387, "step": 7744 }, { "epoch": 0.6920276096231599, "grad_norm": 0.45752233266830444, "learning_rate": 2.2872300910162436e-05, "loss": 0.9381, "step": 7745 }, { "epoch": 0.6921169611544218, "grad_norm": 0.465744286775589, "learning_rate": 2.2860146341203937e-05, "loss": 0.921, "step": 7746 }, { "epoch": 0.6922063126856837, "grad_norm": 0.47951745986938477, "learning_rate": 2.2847994045497496e-05, "loss": 0.9759, "step": 7747 }, { "epoch": 0.6922956642169455, "grad_norm": 0.4690631628036499, "learning_rate": 2.2835844024060953e-05, "loss": 0.9177, "step": 7748 }, { "epoch": 0.6923850157482074, "grad_norm": 0.5409393310546875, "learning_rate": 2.282369627791202e-05, "loss": 0.905, "step": 7749 }, { "epoch": 0.6924743672794692, "grad_norm": 0.5218047499656677, "learning_rate": 2.2811550808068205e-05, "loss": 0.9922, "step": 7750 }, { "epoch": 0.6925637188107311, "grad_norm": 0.5113648176193237, "learning_rate": 2.2799407615546815e-05, "loss": 0.9426, "step": 7751 }, { "epoch": 0.692653070341993, "grad_norm": 0.4514075517654419, "learning_rate": 2.278726670136498e-05, "loss": 0.9237, "step": 7752 }, { "epoch": 0.6927424218732549, "grad_norm": 0.4475104510784149, "learning_rate": 2.277512806653962e-05, "loss": 0.9978, "step": 7753 }, { "epoch": 0.6928317734045167, "grad_norm": 0.4956379532814026, "learning_rate": 2.2762991712087484e-05, "loss": 0.8725, "step": 7754 }, { "epoch": 0.6929211249357786, "grad_norm": 0.52278733253479, "learning_rate": 2.2750857639025113e-05, "loss": 0.8942, "step": 7755 }, { "epoch": 0.6930104764670405, "grad_norm": 0.5214927792549133, "learning_rate": 2.2738725848368875e-05, "loss": 0.9485, "step": 7756 }, { "epoch": 0.6930998279983023, "grad_norm": 0.44215962290763855, "learning_rate": 2.272659634113495e-05, "loss": 0.9248, "step": 7757 }, { "epoch": 0.6931891795295642, "grad_norm": 0.4372926652431488, "learning_rate": 2.271446911833927e-05, "loss": 0.9661, "step": 7758 }, { "epoch": 0.693278531060826, "grad_norm": 0.4903610050678253, "learning_rate": 2.2702344180997647e-05, "loss": 0.945, "step": 7759 }, { "epoch": 0.6933678825920879, "grad_norm": 0.5382173657417297, "learning_rate": 2.2690221530125676e-05, "loss": 0.9443, "step": 7760 }, { "epoch": 0.6934572341233498, "grad_norm": 0.5113039016723633, "learning_rate": 2.2678101166738746e-05, "loss": 0.9368, "step": 7761 }, { "epoch": 0.6935465856546117, "grad_norm": 0.6735799312591553, "learning_rate": 2.2665983091852083e-05, "loss": 0.7989, "step": 7762 }, { "epoch": 0.6936359371858736, "grad_norm": 0.4851955771446228, "learning_rate": 2.2653867306480708e-05, "loss": 0.9514, "step": 7763 }, { "epoch": 0.6937252887171353, "grad_norm": 0.4901287853717804, "learning_rate": 2.2641753811639417e-05, "loss": 0.9322, "step": 7764 }, { "epoch": 0.6938146402483972, "grad_norm": 0.5687100291252136, "learning_rate": 2.262964260834286e-05, "loss": 0.8931, "step": 7765 }, { "epoch": 0.6939039917796591, "grad_norm": 0.45068779587745667, "learning_rate": 2.2617533697605485e-05, "loss": 0.9799, "step": 7766 }, { "epoch": 0.693993343310921, "grad_norm": 0.4677877724170685, "learning_rate": 2.260542708044154e-05, "loss": 0.9831, "step": 7767 }, { "epoch": 0.6940826948421829, "grad_norm": 0.4960726201534271, "learning_rate": 2.2593322757865097e-05, "loss": 0.8693, "step": 7768 }, { "epoch": 0.6941720463734448, "grad_norm": 0.5460396409034729, "learning_rate": 2.258122073088999e-05, "loss": 0.8695, "step": 7769 }, { "epoch": 0.6942613979047066, "grad_norm": 0.44287580251693726, "learning_rate": 2.2569121000529915e-05, "loss": 0.9818, "step": 7770 }, { "epoch": 0.6943507494359684, "grad_norm": 0.5541728734970093, "learning_rate": 2.2557023567798342e-05, "loss": 0.974, "step": 7771 }, { "epoch": 0.6944401009672303, "grad_norm": 0.5429545044898987, "learning_rate": 2.254492843370857e-05, "loss": 0.9813, "step": 7772 }, { "epoch": 0.6945294524984922, "grad_norm": 0.42696717381477356, "learning_rate": 2.2532835599273687e-05, "loss": 0.8715, "step": 7773 }, { "epoch": 0.6946188040297541, "grad_norm": 0.49874842166900635, "learning_rate": 2.2520745065506603e-05, "loss": 0.8765, "step": 7774 }, { "epoch": 0.694708155561016, "grad_norm": 0.49501675367355347, "learning_rate": 2.2508656833420026e-05, "loss": 0.9732, "step": 7775 }, { "epoch": 0.6947975070922778, "grad_norm": 0.48265352845191956, "learning_rate": 2.2496570904026483e-05, "loss": 0.8776, "step": 7776 }, { "epoch": 0.6948868586235397, "grad_norm": 0.46912387013435364, "learning_rate": 2.2484487278338305e-05, "loss": 0.9019, "step": 7777 }, { "epoch": 0.6949762101548015, "grad_norm": 0.4739210605621338, "learning_rate": 2.2472405957367593e-05, "loss": 0.899, "step": 7778 }, { "epoch": 0.6950655616860634, "grad_norm": 0.4149891436100006, "learning_rate": 2.2460326942126307e-05, "loss": 0.9424, "step": 7779 }, { "epoch": 0.6951549132173253, "grad_norm": 0.43324029445648193, "learning_rate": 2.2448250233626195e-05, "loss": 0.927, "step": 7780 }, { "epoch": 0.6952442647485871, "grad_norm": 0.7626486420631409, "learning_rate": 2.2436175832878802e-05, "loss": 0.9243, "step": 7781 }, { "epoch": 0.695333616279849, "grad_norm": 0.4425656795501709, "learning_rate": 2.242410374089549e-05, "loss": 0.9539, "step": 7782 }, { "epoch": 0.6954229678111109, "grad_norm": 0.4871158003807068, "learning_rate": 2.2412033958687433e-05, "loss": 0.8842, "step": 7783 }, { "epoch": 0.6955123193423727, "grad_norm": 0.5576094388961792, "learning_rate": 2.2399966487265596e-05, "loss": 0.9452, "step": 7784 }, { "epoch": 0.6956016708736346, "grad_norm": 0.41526561975479126, "learning_rate": 2.238790132764076e-05, "loss": 0.988, "step": 7785 }, { "epoch": 0.6956910224048964, "grad_norm": 0.4997550845146179, "learning_rate": 2.237583848082351e-05, "loss": 0.942, "step": 7786 }, { "epoch": 0.6957803739361583, "grad_norm": 0.44697871804237366, "learning_rate": 2.2363777947824265e-05, "loss": 0.9652, "step": 7787 }, { "epoch": 0.6958697254674202, "grad_norm": 0.4410659372806549, "learning_rate": 2.2351719729653175e-05, "loss": 0.9358, "step": 7788 }, { "epoch": 0.6959590769986821, "grad_norm": 0.39497098326683044, "learning_rate": 2.233966382732027e-05, "loss": 0.9459, "step": 7789 }, { "epoch": 0.696048428529944, "grad_norm": 0.45145925879478455, "learning_rate": 2.232761024183535e-05, "loss": 0.9192, "step": 7790 }, { "epoch": 0.6961377800612057, "grad_norm": 0.46766191720962524, "learning_rate": 2.2315558974208045e-05, "loss": 0.9029, "step": 7791 }, { "epoch": 0.6962271315924676, "grad_norm": 0.4594910740852356, "learning_rate": 2.2303510025447765e-05, "loss": 0.8901, "step": 7792 }, { "epoch": 0.6963164831237295, "grad_norm": 0.48282474279403687, "learning_rate": 2.229146339656375e-05, "loss": 0.9668, "step": 7793 }, { "epoch": 0.6964058346549914, "grad_norm": 0.5255979895591736, "learning_rate": 2.227941908856503e-05, "loss": 0.932, "step": 7794 }, { "epoch": 0.6964951861862533, "grad_norm": 0.4911576509475708, "learning_rate": 2.2267377102460436e-05, "loss": 0.9966, "step": 7795 }, { "epoch": 0.6965845377175152, "grad_norm": 0.5194364190101624, "learning_rate": 2.2255337439258633e-05, "loss": 0.9707, "step": 7796 }, { "epoch": 0.696673889248777, "grad_norm": 0.5468015074729919, "learning_rate": 2.2243300099968046e-05, "loss": 0.8358, "step": 7797 }, { "epoch": 0.6967632407800388, "grad_norm": 0.47449544072151184, "learning_rate": 2.2231265085596938e-05, "loss": 0.9037, "step": 7798 }, { "epoch": 0.6968525923113007, "grad_norm": 0.48344165086746216, "learning_rate": 2.2219232397153366e-05, "loss": 0.8762, "step": 7799 }, { "epoch": 0.6969419438425626, "grad_norm": 0.433371365070343, "learning_rate": 2.220720203564521e-05, "loss": 0.9288, "step": 7800 }, { "epoch": 0.6970312953738245, "grad_norm": 0.49428948760032654, "learning_rate": 2.219517400208015e-05, "loss": 0.9456, "step": 7801 }, { "epoch": 0.6971206469050863, "grad_norm": 0.4290115535259247, "learning_rate": 2.2183148297465627e-05, "loss": 0.9207, "step": 7802 }, { "epoch": 0.6972099984363482, "grad_norm": 0.4194071590900421, "learning_rate": 2.217112492280894e-05, "loss": 0.9429, "step": 7803 }, { "epoch": 0.6972993499676101, "grad_norm": 0.4213373363018036, "learning_rate": 2.2159103879117177e-05, "loss": 0.9443, "step": 7804 }, { "epoch": 0.6973887014988719, "grad_norm": 0.4857761263847351, "learning_rate": 2.2147085167397223e-05, "loss": 0.9155, "step": 7805 }, { "epoch": 0.6974780530301338, "grad_norm": 0.4734998941421509, "learning_rate": 2.2135068788655782e-05, "loss": 0.9608, "step": 7806 }, { "epoch": 0.6975674045613957, "grad_norm": 0.47898635268211365, "learning_rate": 2.2123054743899346e-05, "loss": 0.9165, "step": 7807 }, { "epoch": 0.6976567560926575, "grad_norm": 0.556919515132904, "learning_rate": 2.211104303413424e-05, "loss": 0.9658, "step": 7808 }, { "epoch": 0.6977461076239194, "grad_norm": 0.3948970139026642, "learning_rate": 2.2099033660366537e-05, "loss": 1.032, "step": 7809 }, { "epoch": 0.6978354591551813, "grad_norm": 0.4545915126800537, "learning_rate": 2.2087026623602164e-05, "loss": 0.9332, "step": 7810 }, { "epoch": 0.6979248106864432, "grad_norm": 0.4447174668312073, "learning_rate": 2.207502192484685e-05, "loss": 0.9648, "step": 7811 }, { "epoch": 0.698014162217705, "grad_norm": 0.5227632522583008, "learning_rate": 2.2063019565106102e-05, "loss": 0.9064, "step": 7812 }, { "epoch": 0.6981035137489668, "grad_norm": 0.41880759596824646, "learning_rate": 2.2051019545385255e-05, "loss": 0.9345, "step": 7813 }, { "epoch": 0.6981928652802287, "grad_norm": 0.5738639831542969, "learning_rate": 2.2039021866689435e-05, "loss": 0.8841, "step": 7814 }, { "epoch": 0.6982822168114906, "grad_norm": 0.38003218173980713, "learning_rate": 2.202702653002358e-05, "loss": 0.9731, "step": 7815 }, { "epoch": 0.6983715683427525, "grad_norm": 0.46742263436317444, "learning_rate": 2.2015033536392422e-05, "loss": 0.9019, "step": 7816 }, { "epoch": 0.6984609198740144, "grad_norm": 0.57480788230896, "learning_rate": 2.20030428868005e-05, "loss": 0.9423, "step": 7817 }, { "epoch": 0.6985502714052763, "grad_norm": 0.4974348545074463, "learning_rate": 2.199105458225218e-05, "loss": 0.9786, "step": 7818 }, { "epoch": 0.698639622936538, "grad_norm": 0.5195631980895996, "learning_rate": 2.197906862375158e-05, "loss": 0.9327, "step": 7819 }, { "epoch": 0.6987289744677999, "grad_norm": 0.4291267991065979, "learning_rate": 2.1967085012302663e-05, "loss": 0.9707, "step": 7820 }, { "epoch": 0.6988183259990618, "grad_norm": 0.5776417255401611, "learning_rate": 2.1955103748909185e-05, "loss": 0.836, "step": 7821 }, { "epoch": 0.6989076775303237, "grad_norm": 0.45474281907081604, "learning_rate": 2.19431248345747e-05, "loss": 0.9669, "step": 7822 }, { "epoch": 0.6989970290615856, "grad_norm": 0.41181471943855286, "learning_rate": 2.193114827030258e-05, "loss": 0.9725, "step": 7823 }, { "epoch": 0.6990863805928474, "grad_norm": 0.5399705767631531, "learning_rate": 2.191917405709598e-05, "loss": 0.8856, "step": 7824 }, { "epoch": 0.6991757321241093, "grad_norm": 0.37962204217910767, "learning_rate": 2.1907202195957882e-05, "loss": 0.9573, "step": 7825 }, { "epoch": 0.6992650836553711, "grad_norm": 0.5898367762565613, "learning_rate": 2.1895232687891043e-05, "loss": 0.9001, "step": 7826 }, { "epoch": 0.699354435186633, "grad_norm": 0.4448642432689667, "learning_rate": 2.1883265533898038e-05, "loss": 0.9668, "step": 7827 }, { "epoch": 0.6994437867178949, "grad_norm": 0.42721062898635864, "learning_rate": 2.187130073498127e-05, "loss": 0.96, "step": 7828 }, { "epoch": 0.6995331382491567, "grad_norm": 0.6047458648681641, "learning_rate": 2.1859338292142876e-05, "loss": 0.9264, "step": 7829 }, { "epoch": 0.6996224897804186, "grad_norm": 0.4223198890686035, "learning_rate": 2.184737820638486e-05, "loss": 0.9497, "step": 7830 }, { "epoch": 0.6997118413116805, "grad_norm": 0.5067864060401917, "learning_rate": 2.1835420478709e-05, "loss": 0.8954, "step": 7831 }, { "epoch": 0.6998011928429424, "grad_norm": 0.4639846682548523, "learning_rate": 2.182346511011689e-05, "loss": 0.9765, "step": 7832 }, { "epoch": 0.6998905443742042, "grad_norm": 0.44720324873924255, "learning_rate": 2.1811512101609922e-05, "loss": 0.9591, "step": 7833 }, { "epoch": 0.699979895905466, "grad_norm": 0.4672408699989319, "learning_rate": 2.17995614541893e-05, "loss": 0.9872, "step": 7834 }, { "epoch": 0.7000692474367279, "grad_norm": 0.5736651420593262, "learning_rate": 2.1787613168855974e-05, "loss": 0.9987, "step": 7835 }, { "epoch": 0.7001585989679898, "grad_norm": 0.49598804116249084, "learning_rate": 2.1775667246610775e-05, "loss": 0.905, "step": 7836 }, { "epoch": 0.7002479504992517, "grad_norm": 0.41947072744369507, "learning_rate": 2.1763723688454298e-05, "loss": 0.9417, "step": 7837 }, { "epoch": 0.7003373020305136, "grad_norm": 0.4067612588405609, "learning_rate": 2.1751782495386946e-05, "loss": 0.9841, "step": 7838 }, { "epoch": 0.7004266535617755, "grad_norm": 0.4044066071510315, "learning_rate": 2.1739843668408904e-05, "loss": 0.9668, "step": 7839 }, { "epoch": 0.7005160050930372, "grad_norm": 0.712028980255127, "learning_rate": 2.1727907208520187e-05, "loss": 0.8248, "step": 7840 }, { "epoch": 0.7006053566242991, "grad_norm": 0.4395425617694855, "learning_rate": 2.1715973116720594e-05, "loss": 0.9757, "step": 7841 }, { "epoch": 0.700694708155561, "grad_norm": 0.4635796546936035, "learning_rate": 2.1704041394009745e-05, "loss": 0.9057, "step": 7842 }, { "epoch": 0.7007840596868229, "grad_norm": 0.46956396102905273, "learning_rate": 2.169211204138704e-05, "loss": 0.9642, "step": 7843 }, { "epoch": 0.7008734112180848, "grad_norm": 0.4673489034175873, "learning_rate": 2.1680185059851692e-05, "loss": 0.9328, "step": 7844 }, { "epoch": 0.7009627627493467, "grad_norm": 0.4535006880760193, "learning_rate": 2.166826045040271e-05, "loss": 0.9427, "step": 7845 }, { "epoch": 0.7010521142806084, "grad_norm": 0.5039295554161072, "learning_rate": 2.165633821403892e-05, "loss": 0.894, "step": 7846 }, { "epoch": 0.7011414658118703, "grad_norm": 0.6144269704818726, "learning_rate": 2.1644418351758917e-05, "loss": 0.9041, "step": 7847 }, { "epoch": 0.7012308173431322, "grad_norm": 0.5136603116989136, "learning_rate": 2.1632500864561146e-05, "loss": 0.8892, "step": 7848 }, { "epoch": 0.7013201688743941, "grad_norm": 0.46805840730667114, "learning_rate": 2.1620585753443786e-05, "loss": 1.002, "step": 7849 }, { "epoch": 0.701409520405656, "grad_norm": 0.41075465083122253, "learning_rate": 2.160867301940487e-05, "loss": 0.9253, "step": 7850 }, { "epoch": 0.7014988719369178, "grad_norm": 0.46872520446777344, "learning_rate": 2.1596762663442218e-05, "loss": 0.909, "step": 7851 }, { "epoch": 0.7015882234681797, "grad_norm": 0.5055715441703796, "learning_rate": 2.1584854686553453e-05, "loss": 0.9786, "step": 7852 }, { "epoch": 0.7016775749994415, "grad_norm": 0.48132845759391785, "learning_rate": 2.1572949089735987e-05, "loss": 0.9081, "step": 7853 }, { "epoch": 0.7017669265307034, "grad_norm": 0.5568394660949707, "learning_rate": 2.1561045873987046e-05, "loss": 0.8675, "step": 7854 }, { "epoch": 0.7018562780619653, "grad_norm": 0.5073636770248413, "learning_rate": 2.1549145040303654e-05, "loss": 0.9566, "step": 7855 }, { "epoch": 0.7019456295932271, "grad_norm": 0.42798787355422974, "learning_rate": 2.153724658968263e-05, "loss": 0.9682, "step": 7856 }, { "epoch": 0.702034981124489, "grad_norm": 0.5035516023635864, "learning_rate": 2.152535052312059e-05, "loss": 0.9313, "step": 7857 }, { "epoch": 0.7021243326557509, "grad_norm": 0.43961191177368164, "learning_rate": 2.1513456841613982e-05, "loss": 0.9328, "step": 7858 }, { "epoch": 0.7022136841870128, "grad_norm": 0.4986516833305359, "learning_rate": 2.1501565546158993e-05, "loss": 0.9385, "step": 7859 }, { "epoch": 0.7023030357182746, "grad_norm": 0.5121946930885315, "learning_rate": 2.148967663775166e-05, "loss": 0.8721, "step": 7860 }, { "epoch": 0.7023923872495365, "grad_norm": 0.6112991571426392, "learning_rate": 2.1477790117387808e-05, "loss": 0.9234, "step": 7861 }, { "epoch": 0.7024817387807983, "grad_norm": 0.4206686317920685, "learning_rate": 2.1465905986063056e-05, "loss": 0.9169, "step": 7862 }, { "epoch": 0.7025710903120602, "grad_norm": 0.5358665585517883, "learning_rate": 2.145402424477283e-05, "loss": 0.9301, "step": 7863 }, { "epoch": 0.7026604418433221, "grad_norm": 0.5316134095191956, "learning_rate": 2.1442144894512352e-05, "loss": 0.9495, "step": 7864 }, { "epoch": 0.702749793374584, "grad_norm": 0.5568259954452515, "learning_rate": 2.1430267936276637e-05, "loss": 0.9321, "step": 7865 }, { "epoch": 0.7028391449058459, "grad_norm": 0.39786767959594727, "learning_rate": 2.1418393371060542e-05, "loss": 0.9312, "step": 7866 }, { "epoch": 0.7029284964371076, "grad_norm": 0.6305972933769226, "learning_rate": 2.1406521199858637e-05, "loss": 0.8965, "step": 7867 }, { "epoch": 0.7030178479683695, "grad_norm": 0.5135489106178284, "learning_rate": 2.1394651423665368e-05, "loss": 1.0525, "step": 7868 }, { "epoch": 0.7031071994996314, "grad_norm": 0.5106921792030334, "learning_rate": 2.1382784043474953e-05, "loss": 0.9549, "step": 7869 }, { "epoch": 0.7031965510308933, "grad_norm": 0.462363064289093, "learning_rate": 2.1370919060281415e-05, "loss": 0.9733, "step": 7870 }, { "epoch": 0.7032859025621552, "grad_norm": 0.4888690710067749, "learning_rate": 2.135905647507858e-05, "loss": 0.8726, "step": 7871 }, { "epoch": 0.7033752540934171, "grad_norm": 0.5045296549797058, "learning_rate": 2.1347196288860045e-05, "loss": 0.9865, "step": 7872 }, { "epoch": 0.7034646056246789, "grad_norm": 0.47808846831321716, "learning_rate": 2.1335338502619233e-05, "loss": 0.9196, "step": 7873 }, { "epoch": 0.7035539571559407, "grad_norm": 0.42234256863594055, "learning_rate": 2.1323483117349368e-05, "loss": 0.9596, "step": 7874 }, { "epoch": 0.7036433086872026, "grad_norm": 0.432258278131485, "learning_rate": 2.131163013404346e-05, "loss": 1.0006, "step": 7875 }, { "epoch": 0.7037326602184645, "grad_norm": 0.45173177123069763, "learning_rate": 2.1299779553694323e-05, "loss": 0.9875, "step": 7876 }, { "epoch": 0.7038220117497264, "grad_norm": 0.5817151069641113, "learning_rate": 2.128793137729457e-05, "loss": 0.8725, "step": 7877 }, { "epoch": 0.7039113632809882, "grad_norm": 0.4585109055042267, "learning_rate": 2.1276085605836636e-05, "loss": 0.891, "step": 7878 }, { "epoch": 0.7040007148122501, "grad_norm": 0.547097384929657, "learning_rate": 2.1264242240312687e-05, "loss": 0.9042, "step": 7879 }, { "epoch": 0.704090066343512, "grad_norm": 0.6232752799987793, "learning_rate": 2.1252401281714752e-05, "loss": 0.8304, "step": 7880 }, { "epoch": 0.7041794178747738, "grad_norm": 0.4578670859336853, "learning_rate": 2.1240562731034635e-05, "loss": 0.8873, "step": 7881 }, { "epoch": 0.7042687694060357, "grad_norm": 0.47056087851524353, "learning_rate": 2.1228726589263942e-05, "loss": 0.9029, "step": 7882 }, { "epoch": 0.7043581209372975, "grad_norm": 0.48560357093811035, "learning_rate": 2.1216892857394083e-05, "loss": 0.8779, "step": 7883 }, { "epoch": 0.7044474724685594, "grad_norm": 0.536952555179596, "learning_rate": 2.120506153641625e-05, "loss": 0.8473, "step": 7884 }, { "epoch": 0.7045368239998213, "grad_norm": 0.41716116666793823, "learning_rate": 2.119323262732144e-05, "loss": 0.9998, "step": 7885 }, { "epoch": 0.7046261755310832, "grad_norm": 0.5672418475151062, "learning_rate": 2.1181406131100463e-05, "loss": 0.9055, "step": 7886 }, { "epoch": 0.7047155270623451, "grad_norm": 0.5225016474723816, "learning_rate": 2.1169582048743902e-05, "loss": 0.9609, "step": 7887 }, { "epoch": 0.7048048785936069, "grad_norm": 0.4605422019958496, "learning_rate": 2.1157760381242174e-05, "loss": 0.9329, "step": 7888 }, { "epoch": 0.7048942301248687, "grad_norm": 0.4933791756629944, "learning_rate": 2.1145941129585435e-05, "loss": 0.9709, "step": 7889 }, { "epoch": 0.7049835816561306, "grad_norm": 0.42105743288993835, "learning_rate": 2.1134124294763678e-05, "loss": 0.9333, "step": 7890 }, { "epoch": 0.7050729331873925, "grad_norm": 0.5470524430274963, "learning_rate": 2.112230987776671e-05, "loss": 1.0609, "step": 7891 }, { "epoch": 0.7051622847186544, "grad_norm": 0.48633435368537903, "learning_rate": 2.11104978795841e-05, "loss": 0.9119, "step": 7892 }, { "epoch": 0.7052516362499163, "grad_norm": 0.6098655462265015, "learning_rate": 2.1098688301205237e-05, "loss": 0.8961, "step": 7893 }, { "epoch": 0.7053409877811782, "grad_norm": 0.5170839428901672, "learning_rate": 2.1086881143619292e-05, "loss": 0.923, "step": 7894 }, { "epoch": 0.7054303393124399, "grad_norm": 0.4735257923603058, "learning_rate": 2.1075076407815243e-05, "loss": 0.9574, "step": 7895 }, { "epoch": 0.7055196908437018, "grad_norm": 0.6144537329673767, "learning_rate": 2.106327409478186e-05, "loss": 0.8456, "step": 7896 }, { "epoch": 0.7056090423749637, "grad_norm": 0.5229153037071228, "learning_rate": 2.1051474205507715e-05, "loss": 0.9355, "step": 7897 }, { "epoch": 0.7056983939062256, "grad_norm": 0.429524689912796, "learning_rate": 2.1039676740981172e-05, "loss": 0.954, "step": 7898 }, { "epoch": 0.7057877454374875, "grad_norm": 0.5060709714889526, "learning_rate": 2.1027881702190422e-05, "loss": 0.8428, "step": 7899 }, { "epoch": 0.7058770969687493, "grad_norm": 0.5516795516014099, "learning_rate": 2.101608909012338e-05, "loss": 0.9072, "step": 7900 }, { "epoch": 0.7059664485000112, "grad_norm": 0.5318334102630615, "learning_rate": 2.100429890576782e-05, "loss": 0.9315, "step": 7901 }, { "epoch": 0.706055800031273, "grad_norm": 0.54982990026474, "learning_rate": 2.09925111501113e-05, "loss": 0.969, "step": 7902 }, { "epoch": 0.7061451515625349, "grad_norm": 0.4394816756248474, "learning_rate": 2.0980725824141166e-05, "loss": 1.042, "step": 7903 }, { "epoch": 0.7062345030937968, "grad_norm": 0.579893171787262, "learning_rate": 2.0968942928844593e-05, "loss": 0.8299, "step": 7904 }, { "epoch": 0.7063238546250586, "grad_norm": 0.4310462176799774, "learning_rate": 2.0957162465208475e-05, "loss": 0.9358, "step": 7905 }, { "epoch": 0.7064132061563205, "grad_norm": 0.46636763215065, "learning_rate": 2.094538443421958e-05, "loss": 0.9262, "step": 7906 }, { "epoch": 0.7065025576875824, "grad_norm": 0.5085285902023315, "learning_rate": 2.0933608836864433e-05, "loss": 0.9616, "step": 7907 }, { "epoch": 0.7065919092188442, "grad_norm": 0.43514180183410645, "learning_rate": 2.092183567412937e-05, "loss": 1.0016, "step": 7908 }, { "epoch": 0.7066812607501061, "grad_norm": 0.45315706729888916, "learning_rate": 2.091006494700054e-05, "loss": 0.9255, "step": 7909 }, { "epoch": 0.706770612281368, "grad_norm": 0.46915316581726074, "learning_rate": 2.0898296656463834e-05, "loss": 0.9525, "step": 7910 }, { "epoch": 0.7068599638126298, "grad_norm": 0.572519838809967, "learning_rate": 2.0886530803504977e-05, "loss": 0.9686, "step": 7911 }, { "epoch": 0.7069493153438917, "grad_norm": 0.5691891312599182, "learning_rate": 2.08747673891095e-05, "loss": 0.9626, "step": 7912 }, { "epoch": 0.7070386668751536, "grad_norm": 0.4699474275112152, "learning_rate": 2.0863006414262703e-05, "loss": 0.9125, "step": 7913 }, { "epoch": 0.7071280184064155, "grad_norm": 0.5651034116744995, "learning_rate": 2.0851247879949698e-05, "loss": 0.8713, "step": 7914 }, { "epoch": 0.7072173699376773, "grad_norm": 0.4276520609855652, "learning_rate": 2.0839491787155387e-05, "loss": 0.8954, "step": 7915 }, { "epoch": 0.7073067214689391, "grad_norm": 0.5207197070121765, "learning_rate": 2.0827738136864462e-05, "loss": 0.9387, "step": 7916 }, { "epoch": 0.707396073000201, "grad_norm": 0.43144193291664124, "learning_rate": 2.0815986930061428e-05, "loss": 0.8953, "step": 7917 }, { "epoch": 0.7074854245314629, "grad_norm": 0.48623722791671753, "learning_rate": 2.0804238167730566e-05, "loss": 0.8455, "step": 7918 }, { "epoch": 0.7075747760627248, "grad_norm": 0.4490886926651001, "learning_rate": 2.0792491850855976e-05, "loss": 0.9528, "step": 7919 }, { "epoch": 0.7076641275939867, "grad_norm": 0.44778475165367126, "learning_rate": 2.078074798042151e-05, "loss": 0.9674, "step": 7920 }, { "epoch": 0.7077534791252486, "grad_norm": 0.4196859300136566, "learning_rate": 2.0769006557410858e-05, "loss": 0.9325, "step": 7921 }, { "epoch": 0.7078428306565103, "grad_norm": 0.4247509837150574, "learning_rate": 2.0757267582807482e-05, "loss": 0.9765, "step": 7922 }, { "epoch": 0.7079321821877722, "grad_norm": 0.44996559619903564, "learning_rate": 2.0745531057594654e-05, "loss": 0.9291, "step": 7923 }, { "epoch": 0.7080215337190341, "grad_norm": 0.4982374906539917, "learning_rate": 2.0733796982755425e-05, "loss": 0.8631, "step": 7924 }, { "epoch": 0.708110885250296, "grad_norm": 0.515921413898468, "learning_rate": 2.0722065359272657e-05, "loss": 0.8803, "step": 7925 }, { "epoch": 0.7082002367815579, "grad_norm": 0.4899452030658722, "learning_rate": 2.0710336188128998e-05, "loss": 0.8997, "step": 7926 }, { "epoch": 0.7082895883128197, "grad_norm": 0.5061736106872559, "learning_rate": 2.0698609470306885e-05, "loss": 0.8947, "step": 7927 }, { "epoch": 0.7083789398440816, "grad_norm": 0.45645084977149963, "learning_rate": 2.0686885206788565e-05, "loss": 0.9493, "step": 7928 }, { "epoch": 0.7084682913753434, "grad_norm": 0.4615998864173889, "learning_rate": 2.0675163398556073e-05, "loss": 0.9419, "step": 7929 }, { "epoch": 0.7085576429066053, "grad_norm": 0.5820156335830688, "learning_rate": 2.066344404659122e-05, "loss": 0.8803, "step": 7930 }, { "epoch": 0.7086469944378672, "grad_norm": 0.48079875111579895, "learning_rate": 2.065172715187562e-05, "loss": 0.9185, "step": 7931 }, { "epoch": 0.708736345969129, "grad_norm": 0.48415642976760864, "learning_rate": 2.064001271539071e-05, "loss": 0.9558, "step": 7932 }, { "epoch": 0.7088256975003909, "grad_norm": 0.5177109241485596, "learning_rate": 2.062830073811769e-05, "loss": 0.9591, "step": 7933 }, { "epoch": 0.7089150490316528, "grad_norm": 0.4747374653816223, "learning_rate": 2.061659122103756e-05, "loss": 1.048, "step": 7934 }, { "epoch": 0.7090044005629147, "grad_norm": 0.5308428406715393, "learning_rate": 2.0604884165131122e-05, "loss": 0.8741, "step": 7935 }, { "epoch": 0.7090937520941765, "grad_norm": 0.45492875576019287, "learning_rate": 2.0593179571378964e-05, "loss": 0.9447, "step": 7936 }, { "epoch": 0.7091831036254383, "grad_norm": 0.534234881401062, "learning_rate": 2.0581477440761488e-05, "loss": 0.8724, "step": 7937 }, { "epoch": 0.7092724551567002, "grad_norm": 0.4276201128959656, "learning_rate": 2.0569777774258842e-05, "loss": 0.9492, "step": 7938 }, { "epoch": 0.7093618066879621, "grad_norm": 0.4283556044101715, "learning_rate": 2.0558080572851002e-05, "loss": 1.0475, "step": 7939 }, { "epoch": 0.709451158219224, "grad_norm": 0.458793580532074, "learning_rate": 2.054638583751775e-05, "loss": 0.9828, "step": 7940 }, { "epoch": 0.7095405097504859, "grad_norm": 0.41058874130249023, "learning_rate": 2.053469356923865e-05, "loss": 0.9937, "step": 7941 }, { "epoch": 0.7096298612817478, "grad_norm": 0.4860825538635254, "learning_rate": 2.0523003768993025e-05, "loss": 0.9317, "step": 7942 }, { "epoch": 0.7097192128130095, "grad_norm": 0.46478596329689026, "learning_rate": 2.0511316437760042e-05, "loss": 0.9276, "step": 7943 }, { "epoch": 0.7098085643442714, "grad_norm": 0.504979133605957, "learning_rate": 2.049963157651863e-05, "loss": 0.9422, "step": 7944 }, { "epoch": 0.7098979158755333, "grad_norm": 0.5935678482055664, "learning_rate": 2.0487949186247524e-05, "loss": 0.8924, "step": 7945 }, { "epoch": 0.7099872674067952, "grad_norm": 0.4765070080757141, "learning_rate": 2.0476269267925247e-05, "loss": 0.9904, "step": 7946 }, { "epoch": 0.7100766189380571, "grad_norm": 0.4645882844924927, "learning_rate": 2.0464591822530123e-05, "loss": 0.9594, "step": 7947 }, { "epoch": 0.710165970469319, "grad_norm": 0.4871085584163666, "learning_rate": 2.0452916851040256e-05, "loss": 0.8937, "step": 7948 }, { "epoch": 0.7102553220005808, "grad_norm": 0.44038885831832886, "learning_rate": 2.0441244354433568e-05, "loss": 0.951, "step": 7949 }, { "epoch": 0.7103446735318426, "grad_norm": 0.5087218880653381, "learning_rate": 2.042957433368773e-05, "loss": 0.9512, "step": 7950 }, { "epoch": 0.7104340250631045, "grad_norm": 0.5866579413414001, "learning_rate": 2.0417906789780235e-05, "loss": 0.8355, "step": 7951 }, { "epoch": 0.7105233765943664, "grad_norm": 0.4508510231971741, "learning_rate": 2.0406241723688362e-05, "loss": 0.9801, "step": 7952 }, { "epoch": 0.7106127281256283, "grad_norm": 0.4516523778438568, "learning_rate": 2.0394579136389203e-05, "loss": 0.9136, "step": 7953 }, { "epoch": 0.7107020796568901, "grad_norm": 0.45436009764671326, "learning_rate": 2.0382919028859605e-05, "loss": 0.8881, "step": 7954 }, { "epoch": 0.710791431188152, "grad_norm": 0.6993239521980286, "learning_rate": 2.0371261402076236e-05, "loss": 0.9434, "step": 7955 }, { "epoch": 0.7108807827194139, "grad_norm": 0.47904080152511597, "learning_rate": 2.0359606257015546e-05, "loss": 1.0162, "step": 7956 }, { "epoch": 0.7109701342506757, "grad_norm": 0.4850071370601654, "learning_rate": 2.034795359465377e-05, "loss": 0.9378, "step": 7957 }, { "epoch": 0.7110594857819376, "grad_norm": 0.5238434672355652, "learning_rate": 2.0336303415966952e-05, "loss": 0.9051, "step": 7958 }, { "epoch": 0.7111488373131994, "grad_norm": 0.5065889954566956, "learning_rate": 2.0324655721930937e-05, "loss": 0.9456, "step": 7959 }, { "epoch": 0.7112381888444613, "grad_norm": 0.5274893045425415, "learning_rate": 2.0313010513521298e-05, "loss": 0.9309, "step": 7960 }, { "epoch": 0.7113275403757232, "grad_norm": 0.47935160994529724, "learning_rate": 2.030136779171347e-05, "loss": 0.9542, "step": 7961 }, { "epoch": 0.7114168919069851, "grad_norm": 0.4876161515712738, "learning_rate": 2.0289727557482656e-05, "loss": 0.881, "step": 7962 }, { "epoch": 0.711506243438247, "grad_norm": 0.49741584062576294, "learning_rate": 2.0278089811803846e-05, "loss": 0.9275, "step": 7963 }, { "epoch": 0.7115955949695087, "grad_norm": 0.5018129944801331, "learning_rate": 2.0266454555651825e-05, "loss": 0.9503, "step": 7964 }, { "epoch": 0.7116849465007706, "grad_norm": 0.46502235531806946, "learning_rate": 2.0254821790001177e-05, "loss": 0.96, "step": 7965 }, { "epoch": 0.7117742980320325, "grad_norm": 0.5179951786994934, "learning_rate": 2.0243191515826265e-05, "loss": 1.0107, "step": 7966 }, { "epoch": 0.7118636495632944, "grad_norm": 0.49856871366500854, "learning_rate": 2.0231563734101243e-05, "loss": 0.9671, "step": 7967 }, { "epoch": 0.7119530010945563, "grad_norm": 0.4230417013168335, "learning_rate": 2.0219938445800074e-05, "loss": 0.9217, "step": 7968 }, { "epoch": 0.7120423526258182, "grad_norm": 0.5896261930465698, "learning_rate": 2.0208315651896504e-05, "loss": 0.9198, "step": 7969 }, { "epoch": 0.7121317041570799, "grad_norm": 0.46407637000083923, "learning_rate": 2.0196695353364042e-05, "loss": 1.0145, "step": 7970 }, { "epoch": 0.7122210556883418, "grad_norm": 0.39339298009872437, "learning_rate": 2.018507755117602e-05, "loss": 0.9654, "step": 7971 }, { "epoch": 0.7123104072196037, "grad_norm": 0.5174713730812073, "learning_rate": 2.017346224630556e-05, "loss": 0.9395, "step": 7972 }, { "epoch": 0.7123997587508656, "grad_norm": 0.5631799697875977, "learning_rate": 2.0161849439725565e-05, "loss": 0.8938, "step": 7973 }, { "epoch": 0.7124891102821275, "grad_norm": 0.4893137812614441, "learning_rate": 2.015023913240875e-05, "loss": 0.9324, "step": 7974 }, { "epoch": 0.7125784618133894, "grad_norm": 0.495174765586853, "learning_rate": 2.0138631325327563e-05, "loss": 0.9131, "step": 7975 }, { "epoch": 0.7126678133446512, "grad_norm": 0.4327716827392578, "learning_rate": 2.0127026019454304e-05, "loss": 0.9635, "step": 7976 }, { "epoch": 0.712757164875913, "grad_norm": 0.3611029386520386, "learning_rate": 2.0115423215761037e-05, "loss": 0.9878, "step": 7977 }, { "epoch": 0.7128465164071749, "grad_norm": 0.4893472492694855, "learning_rate": 2.0103822915219624e-05, "loss": 0.9732, "step": 7978 }, { "epoch": 0.7129358679384368, "grad_norm": 0.4027042090892792, "learning_rate": 2.0092225118801706e-05, "loss": 0.9217, "step": 7979 }, { "epoch": 0.7130252194696987, "grad_norm": 0.4319385290145874, "learning_rate": 2.0080629827478753e-05, "loss": 0.92, "step": 7980 }, { "epoch": 0.7131145710009605, "grad_norm": 0.48171430826187134, "learning_rate": 2.006903704222195e-05, "loss": 0.8967, "step": 7981 }, { "epoch": 0.7132039225322224, "grad_norm": 0.5436350703239441, "learning_rate": 2.0057446764002337e-05, "loss": 0.9484, "step": 7982 }, { "epoch": 0.7132932740634843, "grad_norm": 0.42786580324172974, "learning_rate": 2.004585899379072e-05, "loss": 1.0898, "step": 7983 }, { "epoch": 0.7133826255947461, "grad_norm": 0.4779389798641205, "learning_rate": 2.00342737325577e-05, "loss": 0.9715, "step": 7984 }, { "epoch": 0.713471977126008, "grad_norm": 0.47203120589256287, "learning_rate": 2.0022690981273666e-05, "loss": 0.9221, "step": 7985 }, { "epoch": 0.7135613286572698, "grad_norm": 0.43961775302886963, "learning_rate": 2.0011110740908802e-05, "loss": 0.9608, "step": 7986 }, { "epoch": 0.7136506801885317, "grad_norm": 0.5077892541885376, "learning_rate": 1.999953301243307e-05, "loss": 0.9414, "step": 7987 }, { "epoch": 0.7137400317197936, "grad_norm": 0.5579219460487366, "learning_rate": 1.998795779681623e-05, "loss": 0.9396, "step": 7988 }, { "epoch": 0.7138293832510555, "grad_norm": 0.5244463682174683, "learning_rate": 1.9976385095027826e-05, "loss": 0.938, "step": 7989 }, { "epoch": 0.7139187347823174, "grad_norm": 0.5091515779495239, "learning_rate": 1.9964814908037223e-05, "loss": 0.9086, "step": 7990 }, { "epoch": 0.7140080863135791, "grad_norm": 0.5169358253479004, "learning_rate": 1.9953247236813504e-05, "loss": 0.9151, "step": 7991 }, { "epoch": 0.714097437844841, "grad_norm": 0.509032666683197, "learning_rate": 1.9941682082325602e-05, "loss": 0.8878, "step": 7992 }, { "epoch": 0.7141867893761029, "grad_norm": 0.3874991238117218, "learning_rate": 1.9930119445542227e-05, "loss": 0.9988, "step": 7993 }, { "epoch": 0.7142761409073648, "grad_norm": 0.48893627524375916, "learning_rate": 1.991855932743187e-05, "loss": 0.9737, "step": 7994 }, { "epoch": 0.7143654924386267, "grad_norm": 0.5337826013565063, "learning_rate": 1.990700172896281e-05, "loss": 0.9399, "step": 7995 }, { "epoch": 0.7144548439698886, "grad_norm": 0.5026716589927673, "learning_rate": 1.989544665110313e-05, "loss": 0.9656, "step": 7996 }, { "epoch": 0.7145441955011504, "grad_norm": 0.5005045533180237, "learning_rate": 1.988389409482068e-05, "loss": 1.0192, "step": 7997 }, { "epoch": 0.7146335470324122, "grad_norm": 0.45082736015319824, "learning_rate": 1.9872344061083113e-05, "loss": 0.9364, "step": 7998 }, { "epoch": 0.7147228985636741, "grad_norm": 0.4575754702091217, "learning_rate": 1.9860796550857872e-05, "loss": 0.9433, "step": 7999 }, { "epoch": 0.714812250094936, "grad_norm": 0.5190432071685791, "learning_rate": 1.98492515651122e-05, "loss": 0.9311, "step": 8000 }, { "epoch": 0.7149016016261979, "grad_norm": 0.4815937578678131, "learning_rate": 1.9837709104813073e-05, "loss": 0.9925, "step": 8001 }, { "epoch": 0.7149909531574598, "grad_norm": 0.4584693908691406, "learning_rate": 1.9826169170927317e-05, "loss": 0.9623, "step": 8002 }, { "epoch": 0.7150803046887216, "grad_norm": 0.4569786489009857, "learning_rate": 1.9814631764421522e-05, "loss": 0.9571, "step": 8003 }, { "epoch": 0.7151696562199835, "grad_norm": 0.47242796421051025, "learning_rate": 1.9803096886262068e-05, "loss": 0.864, "step": 8004 }, { "epoch": 0.7152590077512453, "grad_norm": 0.4647274613380432, "learning_rate": 1.9791564537415124e-05, "loss": 0.9579, "step": 8005 }, { "epoch": 0.7153483592825072, "grad_norm": 0.4390351474285126, "learning_rate": 1.978003471884665e-05, "loss": 0.9682, "step": 8006 }, { "epoch": 0.7154377108137691, "grad_norm": 0.4374440908432007, "learning_rate": 1.976850743152241e-05, "loss": 0.9661, "step": 8007 }, { "epoch": 0.7155270623450309, "grad_norm": 0.48356321454048157, "learning_rate": 1.97569826764079e-05, "loss": 0.9314, "step": 8008 }, { "epoch": 0.7156164138762928, "grad_norm": 0.37716495990753174, "learning_rate": 1.9745460454468457e-05, "loss": 0.9704, "step": 8009 }, { "epoch": 0.7157057654075547, "grad_norm": 0.3916030526161194, "learning_rate": 1.973394076666919e-05, "loss": 0.9635, "step": 8010 }, { "epoch": 0.7157951169388166, "grad_norm": 0.5709711909294128, "learning_rate": 1.9722423613975016e-05, "loss": 0.9523, "step": 8011 }, { "epoch": 0.7158844684700784, "grad_norm": 0.4928154945373535, "learning_rate": 1.971090899735058e-05, "loss": 0.8978, "step": 8012 }, { "epoch": 0.7159738200013402, "grad_norm": 0.4418443739414215, "learning_rate": 1.9699396917760377e-05, "loss": 0.8998, "step": 8013 }, { "epoch": 0.7160631715326021, "grad_norm": 0.5247768759727478, "learning_rate": 1.9687887376168663e-05, "loss": 1.0516, "step": 8014 }, { "epoch": 0.716152523063864, "grad_norm": 0.5156433582305908, "learning_rate": 1.9676380373539482e-05, "loss": 0.9109, "step": 8015 }, { "epoch": 0.7162418745951259, "grad_norm": 0.4411882758140564, "learning_rate": 1.9664875910836677e-05, "loss": 0.9728, "step": 8016 }, { "epoch": 0.7163312261263878, "grad_norm": 0.5197750329971313, "learning_rate": 1.965337398902386e-05, "loss": 0.9223, "step": 8017 }, { "epoch": 0.7164205776576497, "grad_norm": 0.45580387115478516, "learning_rate": 1.9641874609064443e-05, "loss": 0.9332, "step": 8018 }, { "epoch": 0.7165099291889114, "grad_norm": 0.4132586419582367, "learning_rate": 1.963037777192162e-05, "loss": 0.9757, "step": 8019 }, { "epoch": 0.7165992807201733, "grad_norm": 0.5496894121170044, "learning_rate": 1.96188834785584e-05, "loss": 1.0128, "step": 8020 }, { "epoch": 0.7166886322514352, "grad_norm": 0.4949209988117218, "learning_rate": 1.9607391729937503e-05, "loss": 0.9594, "step": 8021 }, { "epoch": 0.7167779837826971, "grad_norm": 0.4753008782863617, "learning_rate": 1.9595902527021513e-05, "loss": 0.9873, "step": 8022 }, { "epoch": 0.716867335313959, "grad_norm": 0.4486302435398102, "learning_rate": 1.958441587077277e-05, "loss": 0.8871, "step": 8023 }, { "epoch": 0.7169566868452208, "grad_norm": 0.42516544461250305, "learning_rate": 1.9572931762153407e-05, "loss": 1.0009, "step": 8024 }, { "epoch": 0.7170460383764827, "grad_norm": 0.4175739288330078, "learning_rate": 1.9561450202125337e-05, "loss": 0.9372, "step": 8025 }, { "epoch": 0.7171353899077445, "grad_norm": 0.5125389695167542, "learning_rate": 1.9549971191650262e-05, "loss": 0.9469, "step": 8026 }, { "epoch": 0.7172247414390064, "grad_norm": 0.5798017382621765, "learning_rate": 1.953849473168968e-05, "loss": 0.8865, "step": 8027 }, { "epoch": 0.7173140929702683, "grad_norm": 0.4292055666446686, "learning_rate": 1.9527020823204856e-05, "loss": 0.991, "step": 8028 }, { "epoch": 0.7174034445015302, "grad_norm": 0.41361159086227417, "learning_rate": 1.951554946715686e-05, "loss": 0.9773, "step": 8029 }, { "epoch": 0.717492796032792, "grad_norm": 0.5009101629257202, "learning_rate": 1.9504080664506546e-05, "loss": 0.9358, "step": 8030 }, { "epoch": 0.7175821475640539, "grad_norm": 0.6405025124549866, "learning_rate": 1.9492614416214526e-05, "loss": 0.9336, "step": 8031 }, { "epoch": 0.7176714990953158, "grad_norm": 0.440360426902771, "learning_rate": 1.9481150723241236e-05, "loss": 0.923, "step": 8032 }, { "epoch": 0.7177608506265776, "grad_norm": 0.4327067732810974, "learning_rate": 1.946968958654688e-05, "loss": 0.9564, "step": 8033 }, { "epoch": 0.7178502021578395, "grad_norm": 0.6656114459037781, "learning_rate": 1.9458231007091456e-05, "loss": 0.9387, "step": 8034 }, { "epoch": 0.7179395536891013, "grad_norm": 0.5067597031593323, "learning_rate": 1.9446774985834726e-05, "loss": 0.905, "step": 8035 }, { "epoch": 0.7180289052203632, "grad_norm": 0.48535487055778503, "learning_rate": 1.943532152373627e-05, "loss": 0.8893, "step": 8036 }, { "epoch": 0.7181182567516251, "grad_norm": 0.6070266962051392, "learning_rate": 1.9423870621755434e-05, "loss": 0.8946, "step": 8037 }, { "epoch": 0.718207608282887, "grad_norm": 0.5179934501647949, "learning_rate": 1.941242228085135e-05, "loss": 0.8965, "step": 8038 }, { "epoch": 0.7182969598141488, "grad_norm": 0.49246200919151306, "learning_rate": 1.9400976501982943e-05, "loss": 0.8797, "step": 8039 }, { "epoch": 0.7183863113454106, "grad_norm": 0.47545865178108215, "learning_rate": 1.938953328610893e-05, "loss": 0.9602, "step": 8040 }, { "epoch": 0.7184756628766725, "grad_norm": 0.5661696791648865, "learning_rate": 1.9378092634187772e-05, "loss": 0.8881, "step": 8041 }, { "epoch": 0.7185650144079344, "grad_norm": 0.5181058645248413, "learning_rate": 1.9366654547177764e-05, "loss": 0.9065, "step": 8042 }, { "epoch": 0.7186543659391963, "grad_norm": 0.42876681685447693, "learning_rate": 1.9355219026036965e-05, "loss": 1.0135, "step": 8043 }, { "epoch": 0.7187437174704582, "grad_norm": 0.5706227421760559, "learning_rate": 1.934378607172324e-05, "loss": 0.9067, "step": 8044 }, { "epoch": 0.7188330690017201, "grad_norm": 0.4501083791255951, "learning_rate": 1.9332355685194182e-05, "loss": 0.9817, "step": 8045 }, { "epoch": 0.7189224205329818, "grad_norm": 0.5079956650733948, "learning_rate": 1.9320927867407234e-05, "loss": 0.9354, "step": 8046 }, { "epoch": 0.7190117720642437, "grad_norm": 0.43281540274620056, "learning_rate": 1.9309502619319592e-05, "loss": 0.9358, "step": 8047 }, { "epoch": 0.7191011235955056, "grad_norm": 0.45937132835388184, "learning_rate": 1.9298079941888237e-05, "loss": 0.9387, "step": 8048 }, { "epoch": 0.7191904751267675, "grad_norm": 0.572298526763916, "learning_rate": 1.9286659836069953e-05, "loss": 0.8596, "step": 8049 }, { "epoch": 0.7192798266580294, "grad_norm": 0.4632246494293213, "learning_rate": 1.9275242302821302e-05, "loss": 0.9057, "step": 8050 }, { "epoch": 0.7193691781892912, "grad_norm": 0.46226373314857483, "learning_rate": 1.9263827343098594e-05, "loss": 0.9242, "step": 8051 }, { "epoch": 0.7194585297205531, "grad_norm": 0.4486196041107178, "learning_rate": 1.9252414957857966e-05, "loss": 0.9282, "step": 8052 }, { "epoch": 0.7195478812518149, "grad_norm": 0.5384345054626465, "learning_rate": 1.9241005148055336e-05, "loss": 0.904, "step": 8053 }, { "epoch": 0.7196372327830768, "grad_norm": 0.5714906454086304, "learning_rate": 1.922959791464639e-05, "loss": 0.8597, "step": 8054 }, { "epoch": 0.7197265843143387, "grad_norm": 0.5070673227310181, "learning_rate": 1.921819325858661e-05, "loss": 0.9542, "step": 8055 }, { "epoch": 0.7198159358456006, "grad_norm": 0.5547393560409546, "learning_rate": 1.920679118083125e-05, "loss": 0.892, "step": 8056 }, { "epoch": 0.7199052873768624, "grad_norm": 0.4647422134876251, "learning_rate": 1.9195391682335368e-05, "loss": 0.8927, "step": 8057 }, { "epoch": 0.7199946389081243, "grad_norm": 0.4780432879924774, "learning_rate": 1.918399476405378e-05, "loss": 0.964, "step": 8058 }, { "epoch": 0.7200839904393862, "grad_norm": 0.5166242122650146, "learning_rate": 1.9172600426941113e-05, "loss": 0.9151, "step": 8059 }, { "epoch": 0.720173341970648, "grad_norm": 0.4829009771347046, "learning_rate": 1.9161208671951763e-05, "loss": 0.9347, "step": 8060 }, { "epoch": 0.7202626935019099, "grad_norm": 0.5364348292350769, "learning_rate": 1.9149819500039896e-05, "loss": 0.9733, "step": 8061 }, { "epoch": 0.7203520450331717, "grad_norm": 0.43714621663093567, "learning_rate": 1.913843291215948e-05, "loss": 0.9185, "step": 8062 }, { "epoch": 0.7204413965644336, "grad_norm": 0.5605136752128601, "learning_rate": 1.912704890926427e-05, "loss": 0.8812, "step": 8063 }, { "epoch": 0.7205307480956955, "grad_norm": 0.5223738551139832, "learning_rate": 1.9115667492307797e-05, "loss": 0.8438, "step": 8064 }, { "epoch": 0.7206200996269574, "grad_norm": 0.46035128831863403, "learning_rate": 1.9104288662243375e-05, "loss": 0.9511, "step": 8065 }, { "epoch": 0.7207094511582193, "grad_norm": 0.3924838602542877, "learning_rate": 1.90929124200241e-05, "loss": 0.9798, "step": 8066 }, { "epoch": 0.720798802689481, "grad_norm": 0.47366228699684143, "learning_rate": 1.9081538766602857e-05, "loss": 1.0173, "step": 8067 }, { "epoch": 0.7208881542207429, "grad_norm": 0.4460136294364929, "learning_rate": 1.9070167702932313e-05, "loss": 0.9475, "step": 8068 }, { "epoch": 0.7209775057520048, "grad_norm": 0.4173930883407593, "learning_rate": 1.9058799229964907e-05, "loss": 0.9413, "step": 8069 }, { "epoch": 0.7210668572832667, "grad_norm": 0.5706358551979065, "learning_rate": 1.9047433348652876e-05, "loss": 0.8534, "step": 8070 }, { "epoch": 0.7211562088145286, "grad_norm": 0.47395849227905273, "learning_rate": 1.9036070059948252e-05, "loss": 0.9308, "step": 8071 }, { "epoch": 0.7212455603457905, "grad_norm": 0.461373507976532, "learning_rate": 1.9024709364802795e-05, "loss": 0.8572, "step": 8072 }, { "epoch": 0.7213349118770523, "grad_norm": 0.40039876103401184, "learning_rate": 1.90133512641681e-05, "loss": 0.9251, "step": 8073 }, { "epoch": 0.7214242634083141, "grad_norm": 0.4980182945728302, "learning_rate": 1.9001995758995533e-05, "loss": 0.9338, "step": 8074 }, { "epoch": 0.721513614939576, "grad_norm": 0.4447602927684784, "learning_rate": 1.899064285023623e-05, "loss": 0.9201, "step": 8075 }, { "epoch": 0.7216029664708379, "grad_norm": 0.404520183801651, "learning_rate": 1.8979292538841132e-05, "loss": 0.9922, "step": 8076 }, { "epoch": 0.7216923180020998, "grad_norm": 0.4557284116744995, "learning_rate": 1.8967944825760954e-05, "loss": 0.9549, "step": 8077 }, { "epoch": 0.7217816695333616, "grad_norm": 0.45047318935394287, "learning_rate": 1.8956599711946156e-05, "loss": 0.9628, "step": 8078 }, { "epoch": 0.7218710210646235, "grad_norm": 0.4499797224998474, "learning_rate": 1.8945257198347034e-05, "loss": 0.9302, "step": 8079 }, { "epoch": 0.7219603725958854, "grad_norm": 0.4355414807796478, "learning_rate": 1.893391728591364e-05, "loss": 0.942, "step": 8080 }, { "epoch": 0.7220497241271472, "grad_norm": 0.5795384049415588, "learning_rate": 1.8922579975595835e-05, "loss": 0.8788, "step": 8081 }, { "epoch": 0.7221390756584091, "grad_norm": 0.5405740737915039, "learning_rate": 1.891124526834319e-05, "loss": 0.9337, "step": 8082 }, { "epoch": 0.722228427189671, "grad_norm": 0.4552251696586609, "learning_rate": 1.889991316510515e-05, "loss": 1.0248, "step": 8083 }, { "epoch": 0.7223177787209328, "grad_norm": 0.3918149471282959, "learning_rate": 1.8888583666830876e-05, "loss": 1.0259, "step": 8084 }, { "epoch": 0.7224071302521947, "grad_norm": 0.5067173838615417, "learning_rate": 1.887725677446935e-05, "loss": 0.9839, "step": 8085 }, { "epoch": 0.7224964817834566, "grad_norm": 0.5742607116699219, "learning_rate": 1.8865932488969307e-05, "loss": 0.8916, "step": 8086 }, { "epoch": 0.7225858333147185, "grad_norm": 0.5914955735206604, "learning_rate": 1.8854610811279288e-05, "loss": 0.9132, "step": 8087 }, { "epoch": 0.7226751848459803, "grad_norm": 0.479939341545105, "learning_rate": 1.88432917423476e-05, "loss": 1.0004, "step": 8088 }, { "epoch": 0.7227645363772421, "grad_norm": 0.5861053466796875, "learning_rate": 1.883197528312233e-05, "loss": 0.9021, "step": 8089 }, { "epoch": 0.722853887908504, "grad_norm": 0.607997715473175, "learning_rate": 1.8820661434551362e-05, "loss": 0.8781, "step": 8090 }, { "epoch": 0.7229432394397659, "grad_norm": 0.4828149378299713, "learning_rate": 1.8809350197582364e-05, "loss": 0.865, "step": 8091 }, { "epoch": 0.7230325909710278, "grad_norm": 0.6091709136962891, "learning_rate": 1.879804157316274e-05, "loss": 0.9282, "step": 8092 }, { "epoch": 0.7231219425022897, "grad_norm": 0.5312778353691101, "learning_rate": 1.878673556223972e-05, "loss": 0.9184, "step": 8093 }, { "epoch": 0.7232112940335516, "grad_norm": 0.41062840819358826, "learning_rate": 1.8775432165760303e-05, "loss": 0.9453, "step": 8094 }, { "epoch": 0.7233006455648133, "grad_norm": 0.6178465485572815, "learning_rate": 1.876413138467128e-05, "loss": 0.9003, "step": 8095 }, { "epoch": 0.7233899970960752, "grad_norm": 0.4309876561164856, "learning_rate": 1.8752833219919197e-05, "loss": 0.9821, "step": 8096 }, { "epoch": 0.7234793486273371, "grad_norm": 0.5962274670600891, "learning_rate": 1.8741537672450405e-05, "loss": 0.9284, "step": 8097 }, { "epoch": 0.723568700158599, "grad_norm": 0.4582142233848572, "learning_rate": 1.8730244743211027e-05, "loss": 0.938, "step": 8098 }, { "epoch": 0.7236580516898609, "grad_norm": 0.4365943670272827, "learning_rate": 1.8718954433146963e-05, "loss": 0.9746, "step": 8099 }, { "epoch": 0.7237474032211227, "grad_norm": 0.4775666892528534, "learning_rate": 1.8707666743203893e-05, "loss": 0.9665, "step": 8100 }, { "epoch": 0.7238367547523845, "grad_norm": 0.43144118785858154, "learning_rate": 1.8696381674327308e-05, "loss": 0.9532, "step": 8101 }, { "epoch": 0.7239261062836464, "grad_norm": 0.5456085205078125, "learning_rate": 1.8685099227462406e-05, "loss": 0.9138, "step": 8102 }, { "epoch": 0.7240154578149083, "grad_norm": 0.44839584827423096, "learning_rate": 1.8673819403554244e-05, "loss": 0.9408, "step": 8103 }, { "epoch": 0.7241048093461702, "grad_norm": 0.4968213737010956, "learning_rate": 1.8662542203547616e-05, "loss": 0.9101, "step": 8104 }, { "epoch": 0.724194160877432, "grad_norm": 0.5285754203796387, "learning_rate": 1.8651267628387108e-05, "loss": 0.8168, "step": 8105 }, { "epoch": 0.7242835124086939, "grad_norm": 0.4092321991920471, "learning_rate": 1.86399956790171e-05, "loss": 0.9397, "step": 8106 }, { "epoch": 0.7243728639399558, "grad_norm": 0.6239436864852905, "learning_rate": 1.862872635638172e-05, "loss": 0.8964, "step": 8107 }, { "epoch": 0.7244622154712176, "grad_norm": 0.5783222913742065, "learning_rate": 1.86174596614249e-05, "loss": 0.8807, "step": 8108 }, { "epoch": 0.7245515670024795, "grad_norm": 0.43302804231643677, "learning_rate": 1.8606195595090354e-05, "loss": 0.9818, "step": 8109 }, { "epoch": 0.7246409185337414, "grad_norm": 0.4686475694179535, "learning_rate": 1.859493415832157e-05, "loss": 0.9568, "step": 8110 }, { "epoch": 0.7247302700650032, "grad_norm": 0.6813352704048157, "learning_rate": 1.8583675352061792e-05, "loss": 0.878, "step": 8111 }, { "epoch": 0.7248196215962651, "grad_norm": 0.5830407738685608, "learning_rate": 1.857241917725408e-05, "loss": 0.8521, "step": 8112 }, { "epoch": 0.724908973127527, "grad_norm": 0.5068725347518921, "learning_rate": 1.856116563484125e-05, "loss": 0.8299, "step": 8113 }, { "epoch": 0.7249983246587889, "grad_norm": 0.4167884886264801, "learning_rate": 1.8549914725765932e-05, "loss": 1.0106, "step": 8114 }, { "epoch": 0.7250876761900507, "grad_norm": 0.5126107335090637, "learning_rate": 1.853866645097047e-05, "loss": 0.893, "step": 8115 }, { "epoch": 0.7251770277213125, "grad_norm": 0.46296408772468567, "learning_rate": 1.852742081139705e-05, "loss": 0.8905, "step": 8116 }, { "epoch": 0.7252663792525744, "grad_norm": 0.5143711566925049, "learning_rate": 1.8516177807987606e-05, "loss": 0.9687, "step": 8117 }, { "epoch": 0.7253557307838363, "grad_norm": 0.5294638276100159, "learning_rate": 1.8504937441683868e-05, "loss": 0.931, "step": 8118 }, { "epoch": 0.7254450823150982, "grad_norm": 0.4719218909740448, "learning_rate": 1.8493699713427333e-05, "loss": 0.9807, "step": 8119 }, { "epoch": 0.7255344338463601, "grad_norm": 0.4698675870895386, "learning_rate": 1.848246462415928e-05, "loss": 1.0442, "step": 8120 }, { "epoch": 0.725623785377622, "grad_norm": 0.6009805798530579, "learning_rate": 1.847123217482078e-05, "loss": 0.9352, "step": 8121 }, { "epoch": 0.7257131369088837, "grad_norm": 0.4387056827545166, "learning_rate": 1.846000236635264e-05, "loss": 0.9247, "step": 8122 }, { "epoch": 0.7258024884401456, "grad_norm": 0.5181190967559814, "learning_rate": 1.8448775199695502e-05, "loss": 0.8485, "step": 8123 }, { "epoch": 0.7258918399714075, "grad_norm": 0.4301597774028778, "learning_rate": 1.843755067578975e-05, "loss": 0.9481, "step": 8124 }, { "epoch": 0.7259811915026694, "grad_norm": 0.49605125188827515, "learning_rate": 1.842632879557556e-05, "loss": 0.9434, "step": 8125 }, { "epoch": 0.7260705430339313, "grad_norm": 0.43525439500808716, "learning_rate": 1.8415109559992882e-05, "loss": 0.9441, "step": 8126 }, { "epoch": 0.7261598945651931, "grad_norm": 0.5193489789962769, "learning_rate": 1.840389296998145e-05, "loss": 0.9361, "step": 8127 }, { "epoch": 0.726249246096455, "grad_norm": 0.45760974287986755, "learning_rate": 1.8392679026480774e-05, "loss": 0.9345, "step": 8128 }, { "epoch": 0.7263385976277168, "grad_norm": 0.6482862234115601, "learning_rate": 1.8381467730430134e-05, "loss": 0.8445, "step": 8129 }, { "epoch": 0.7264279491589787, "grad_norm": 0.45973724126815796, "learning_rate": 1.837025908276861e-05, "loss": 0.896, "step": 8130 }, { "epoch": 0.7265173006902406, "grad_norm": 0.4498787522315979, "learning_rate": 1.8359053084435046e-05, "loss": 0.9654, "step": 8131 }, { "epoch": 0.7266066522215024, "grad_norm": 0.4632214903831482, "learning_rate": 1.834784973636804e-05, "loss": 0.9408, "step": 8132 }, { "epoch": 0.7266960037527643, "grad_norm": 0.4368852972984314, "learning_rate": 1.8336649039506004e-05, "loss": 0.9919, "step": 8133 }, { "epoch": 0.7267853552840262, "grad_norm": 0.4225790798664093, "learning_rate": 1.8325450994787124e-05, "loss": 0.9568, "step": 8134 }, { "epoch": 0.7268747068152881, "grad_norm": 0.40643781423568726, "learning_rate": 1.8314255603149345e-05, "loss": 0.9723, "step": 8135 }, { "epoch": 0.7269640583465499, "grad_norm": 0.47153598070144653, "learning_rate": 1.8303062865530406e-05, "loss": 0.8922, "step": 8136 }, { "epoch": 0.7270534098778118, "grad_norm": 0.45243874192237854, "learning_rate": 1.8291872782867813e-05, "loss": 1.0081, "step": 8137 }, { "epoch": 0.7271427614090736, "grad_norm": 0.4166507124900818, "learning_rate": 1.8280685356098863e-05, "loss": 0.8909, "step": 8138 }, { "epoch": 0.7272321129403355, "grad_norm": 0.5738488435745239, "learning_rate": 1.826950058616062e-05, "loss": 0.9549, "step": 8139 }, { "epoch": 0.7273214644715974, "grad_norm": 0.5272175073623657, "learning_rate": 1.825831847398992e-05, "loss": 0.9165, "step": 8140 }, { "epoch": 0.7274108160028593, "grad_norm": 0.38618120551109314, "learning_rate": 1.8247139020523412e-05, "loss": 0.9462, "step": 8141 }, { "epoch": 0.7275001675341212, "grad_norm": 0.459420382976532, "learning_rate": 1.8235962226697457e-05, "loss": 0.9248, "step": 8142 }, { "epoch": 0.7275895190653829, "grad_norm": 0.5098559260368347, "learning_rate": 1.822478809344824e-05, "loss": 0.9542, "step": 8143 }, { "epoch": 0.7276788705966448, "grad_norm": 0.5070344805717468, "learning_rate": 1.8213616621711722e-05, "loss": 1.0229, "step": 8144 }, { "epoch": 0.7277682221279067, "grad_norm": 0.434377521276474, "learning_rate": 1.8202447812423634e-05, "loss": 0.9772, "step": 8145 }, { "epoch": 0.7278575736591686, "grad_norm": 0.5484136939048767, "learning_rate": 1.8191281666519473e-05, "loss": 0.9173, "step": 8146 }, { "epoch": 0.7279469251904305, "grad_norm": 0.4595695734024048, "learning_rate": 1.8180118184934548e-05, "loss": 0.9066, "step": 8147 }, { "epoch": 0.7280362767216924, "grad_norm": 0.5228673219680786, "learning_rate": 1.8168957368603884e-05, "loss": 0.8998, "step": 8148 }, { "epoch": 0.7281256282529542, "grad_norm": 0.5042482018470764, "learning_rate": 1.8157799218462335e-05, "loss": 0.9067, "step": 8149 }, { "epoch": 0.728214979784216, "grad_norm": 0.5893993973731995, "learning_rate": 1.814664373544452e-05, "loss": 0.8643, "step": 8150 }, { "epoch": 0.7283043313154779, "grad_norm": 0.4281430244445801, "learning_rate": 1.813549092048483e-05, "loss": 0.8613, "step": 8151 }, { "epoch": 0.7283936828467398, "grad_norm": 0.5029839873313904, "learning_rate": 1.8124340774517418e-05, "loss": 0.9724, "step": 8152 }, { "epoch": 0.7284830343780017, "grad_norm": 0.49639925360679626, "learning_rate": 1.8113193298476232e-05, "loss": 0.9772, "step": 8153 }, { "epoch": 0.7285723859092635, "grad_norm": 0.44179990887641907, "learning_rate": 1.8102048493295006e-05, "loss": 0.9583, "step": 8154 }, { "epoch": 0.7286617374405254, "grad_norm": 0.45038869976997375, "learning_rate": 1.8090906359907216e-05, "loss": 0.993, "step": 8155 }, { "epoch": 0.7287510889717873, "grad_norm": 0.5203961133956909, "learning_rate": 1.8079766899246148e-05, "loss": 0.8979, "step": 8156 }, { "epoch": 0.7288404405030491, "grad_norm": 0.5539426803588867, "learning_rate": 1.8068630112244843e-05, "loss": 0.999, "step": 8157 }, { "epoch": 0.728929792034311, "grad_norm": 0.48159733414649963, "learning_rate": 1.8057495999836137e-05, "loss": 1.0195, "step": 8158 }, { "epoch": 0.7290191435655728, "grad_norm": 0.45767495036125183, "learning_rate": 1.8046364562952624e-05, "loss": 0.9883, "step": 8159 }, { "epoch": 0.7291084950968347, "grad_norm": 0.5231384038925171, "learning_rate": 1.8035235802526674e-05, "loss": 0.8853, "step": 8160 }, { "epoch": 0.7291978466280966, "grad_norm": 0.46192824840545654, "learning_rate": 1.802410971949045e-05, "loss": 0.9542, "step": 8161 }, { "epoch": 0.7292871981593585, "grad_norm": 0.40954524278640747, "learning_rate": 1.801298631477589e-05, "loss": 0.9083, "step": 8162 }, { "epoch": 0.7293765496906203, "grad_norm": 0.5644052624702454, "learning_rate": 1.8001865589314664e-05, "loss": 0.8587, "step": 8163 }, { "epoch": 0.7294659012218822, "grad_norm": 0.4511362612247467, "learning_rate": 1.7990747544038277e-05, "loss": 0.9583, "step": 8164 }, { "epoch": 0.729555252753144, "grad_norm": 0.4605756103992462, "learning_rate": 1.7979632179877974e-05, "loss": 0.942, "step": 8165 }, { "epoch": 0.7296446042844059, "grad_norm": 0.49011021852493286, "learning_rate": 1.7968519497764784e-05, "loss": 0.9492, "step": 8166 }, { "epoch": 0.7297339558156678, "grad_norm": 0.48554643988609314, "learning_rate": 1.7957409498629522e-05, "loss": 0.9447, "step": 8167 }, { "epoch": 0.7298233073469297, "grad_norm": 0.43161845207214355, "learning_rate": 1.794630218340277e-05, "loss": 0.9717, "step": 8168 }, { "epoch": 0.7299126588781916, "grad_norm": 0.47446951270103455, "learning_rate": 1.793519755301487e-05, "loss": 0.8648, "step": 8169 }, { "epoch": 0.7300020104094533, "grad_norm": 0.41278836131095886, "learning_rate": 1.7924095608395963e-05, "loss": 0.9728, "step": 8170 }, { "epoch": 0.7300913619407152, "grad_norm": 0.4663233160972595, "learning_rate": 1.7912996350475954e-05, "loss": 0.9122, "step": 8171 }, { "epoch": 0.7301807134719771, "grad_norm": 0.6070008873939514, "learning_rate": 1.7901899780184537e-05, "loss": 0.8931, "step": 8172 }, { "epoch": 0.730270065003239, "grad_norm": 0.49870383739471436, "learning_rate": 1.789080589845114e-05, "loss": 0.8633, "step": 8173 }, { "epoch": 0.7303594165345009, "grad_norm": 0.4976552426815033, "learning_rate": 1.7879714706205008e-05, "loss": 0.9654, "step": 8174 }, { "epoch": 0.7304487680657628, "grad_norm": 0.43188008666038513, "learning_rate": 1.786862620437515e-05, "loss": 0.9809, "step": 8175 }, { "epoch": 0.7305381195970246, "grad_norm": 0.4651055335998535, "learning_rate": 1.7857540393890337e-05, "loss": 0.9153, "step": 8176 }, { "epoch": 0.7306274711282864, "grad_norm": 0.42811328172683716, "learning_rate": 1.7846457275679136e-05, "loss": 0.8451, "step": 8177 }, { "epoch": 0.7307168226595483, "grad_norm": 0.4533650875091553, "learning_rate": 1.7835376850669866e-05, "loss": 0.9642, "step": 8178 }, { "epoch": 0.7308061741908102, "grad_norm": 0.4616091549396515, "learning_rate": 1.7824299119790637e-05, "loss": 0.9361, "step": 8179 }, { "epoch": 0.7308955257220721, "grad_norm": 0.4673561155796051, "learning_rate": 1.7813224083969344e-05, "loss": 0.8883, "step": 8180 }, { "epoch": 0.7309848772533339, "grad_norm": 0.4838724136352539, "learning_rate": 1.78021517441336e-05, "loss": 0.8978, "step": 8181 }, { "epoch": 0.7310742287845958, "grad_norm": 0.5045418739318848, "learning_rate": 1.7791082101210853e-05, "loss": 0.9195, "step": 8182 }, { "epoch": 0.7311635803158577, "grad_norm": 0.40996941924095154, "learning_rate": 1.7780015156128305e-05, "loss": 0.9633, "step": 8183 }, { "epoch": 0.7312529318471195, "grad_norm": 0.577971875667572, "learning_rate": 1.776895090981294e-05, "loss": 1.0122, "step": 8184 }, { "epoch": 0.7313422833783814, "grad_norm": 0.4948059916496277, "learning_rate": 1.7757889363191483e-05, "loss": 0.8988, "step": 8185 }, { "epoch": 0.7314316349096432, "grad_norm": 0.4757169187068939, "learning_rate": 1.7746830517190467e-05, "loss": 0.9198, "step": 8186 }, { "epoch": 0.7315209864409051, "grad_norm": 0.5007458925247192, "learning_rate": 1.7735774372736187e-05, "loss": 0.9536, "step": 8187 }, { "epoch": 0.731610337972167, "grad_norm": 0.5878704190254211, "learning_rate": 1.772472093075471e-05, "loss": 0.9794, "step": 8188 }, { "epoch": 0.7316996895034289, "grad_norm": 0.4866276979446411, "learning_rate": 1.7713670192171895e-05, "loss": 0.9257, "step": 8189 }, { "epoch": 0.7317890410346908, "grad_norm": 0.4084816575050354, "learning_rate": 1.7702622157913344e-05, "loss": 0.9616, "step": 8190 }, { "epoch": 0.7318783925659526, "grad_norm": 0.5077846050262451, "learning_rate": 1.7691576828904456e-05, "loss": 0.9011, "step": 8191 }, { "epoch": 0.7319677440972144, "grad_norm": 0.46192851662635803, "learning_rate": 1.7680534206070405e-05, "loss": 0.9729, "step": 8192 }, { "epoch": 0.7320570956284763, "grad_norm": 0.42796364426612854, "learning_rate": 1.76694942903361e-05, "loss": 0.9862, "step": 8193 }, { "epoch": 0.7321464471597382, "grad_norm": 0.555330753326416, "learning_rate": 1.765845708262626e-05, "loss": 0.9163, "step": 8194 }, { "epoch": 0.7322357986910001, "grad_norm": 0.456575870513916, "learning_rate": 1.764742258386538e-05, "loss": 0.9215, "step": 8195 }, { "epoch": 0.732325150222262, "grad_norm": 0.4735051989555359, "learning_rate": 1.7636390794977713e-05, "loss": 0.9277, "step": 8196 }, { "epoch": 0.7324145017535238, "grad_norm": 0.4335395097732544, "learning_rate": 1.762536171688729e-05, "loss": 0.9389, "step": 8197 }, { "epoch": 0.7325038532847856, "grad_norm": 0.5177332758903503, "learning_rate": 1.7614335350517915e-05, "loss": 0.9101, "step": 8198 }, { "epoch": 0.7325932048160475, "grad_norm": 0.5252842903137207, "learning_rate": 1.760331169679315e-05, "loss": 0.885, "step": 8199 }, { "epoch": 0.7326825563473094, "grad_norm": 0.48446813225746155, "learning_rate": 1.7592290756636365e-05, "loss": 0.9211, "step": 8200 }, { "epoch": 0.7327719078785713, "grad_norm": 0.5630969405174255, "learning_rate": 1.7581272530970667e-05, "loss": 0.8899, "step": 8201 }, { "epoch": 0.7328612594098332, "grad_norm": 0.5070595741271973, "learning_rate": 1.7570257020718967e-05, "loss": 0.9564, "step": 8202 }, { "epoch": 0.732950610941095, "grad_norm": 0.4257377088069916, "learning_rate": 1.75592442268039e-05, "loss": 1.001, "step": 8203 }, { "epoch": 0.7330399624723569, "grad_norm": 0.40094104409217834, "learning_rate": 1.7548234150147925e-05, "loss": 0.9957, "step": 8204 }, { "epoch": 0.7331293140036187, "grad_norm": 0.44128090143203735, "learning_rate": 1.7537226791673256e-05, "loss": 0.9949, "step": 8205 }, { "epoch": 0.7332186655348806, "grad_norm": 0.46732866764068604, "learning_rate": 1.7526222152301862e-05, "loss": 0.9562, "step": 8206 }, { "epoch": 0.7333080170661425, "grad_norm": 0.41671401262283325, "learning_rate": 1.7515220232955513e-05, "loss": 0.9315, "step": 8207 }, { "epoch": 0.7333973685974043, "grad_norm": 0.5439326167106628, "learning_rate": 1.7504221034555734e-05, "loss": 0.9628, "step": 8208 }, { "epoch": 0.7334867201286662, "grad_norm": 0.4819839894771576, "learning_rate": 1.7493224558023825e-05, "loss": 0.9094, "step": 8209 }, { "epoch": 0.7335760716599281, "grad_norm": 0.4984486699104309, "learning_rate": 1.7482230804280852e-05, "loss": 0.9014, "step": 8210 }, { "epoch": 0.73366542319119, "grad_norm": 0.5585265159606934, "learning_rate": 1.7471239774247667e-05, "loss": 0.9843, "step": 8211 }, { "epoch": 0.7337547747224518, "grad_norm": 0.4539826214313507, "learning_rate": 1.7460251468844895e-05, "loss": 0.9753, "step": 8212 }, { "epoch": 0.7338441262537136, "grad_norm": 0.44110792875289917, "learning_rate": 1.74492658889929e-05, "loss": 0.9038, "step": 8213 }, { "epoch": 0.7339334777849755, "grad_norm": 0.44374075531959534, "learning_rate": 1.7438283035611846e-05, "loss": 0.9506, "step": 8214 }, { "epoch": 0.7340228293162374, "grad_norm": 0.43943607807159424, "learning_rate": 1.7427302909621672e-05, "loss": 0.9645, "step": 8215 }, { "epoch": 0.7341121808474993, "grad_norm": 0.5023413300514221, "learning_rate": 1.7416325511942085e-05, "loss": 0.8913, "step": 8216 }, { "epoch": 0.7342015323787612, "grad_norm": 0.43534350395202637, "learning_rate": 1.7405350843492567e-05, "loss": 0.907, "step": 8217 }, { "epoch": 0.7342908839100231, "grad_norm": 0.46004897356033325, "learning_rate": 1.7394378905192334e-05, "loss": 0.9859, "step": 8218 }, { "epoch": 0.7343802354412848, "grad_norm": 0.49676865339279175, "learning_rate": 1.738340969796042e-05, "loss": 0.9193, "step": 8219 }, { "epoch": 0.7344695869725467, "grad_norm": 0.47538769245147705, "learning_rate": 1.7372443222715605e-05, "loss": 0.9194, "step": 8220 }, { "epoch": 0.7345589385038086, "grad_norm": 0.477346271276474, "learning_rate": 1.736147948037646e-05, "loss": 0.9213, "step": 8221 }, { "epoch": 0.7346482900350705, "grad_norm": 0.3966813087463379, "learning_rate": 1.7350518471861328e-05, "loss": 0.9935, "step": 8222 }, { "epoch": 0.7347376415663324, "grad_norm": 0.46678826212882996, "learning_rate": 1.7339560198088273e-05, "loss": 0.923, "step": 8223 }, { "epoch": 0.7348269930975942, "grad_norm": 0.42243048548698425, "learning_rate": 1.7328604659975184e-05, "loss": 1.0018, "step": 8224 }, { "epoch": 0.734916344628856, "grad_norm": 0.4616301655769348, "learning_rate": 1.7317651858439714e-05, "loss": 0.9319, "step": 8225 }, { "epoch": 0.7350056961601179, "grad_norm": 0.644029438495636, "learning_rate": 1.7306701794399266e-05, "loss": 0.9191, "step": 8226 }, { "epoch": 0.7350950476913798, "grad_norm": 0.4394361078739166, "learning_rate": 1.7295754468771024e-05, "loss": 0.9644, "step": 8227 }, { "epoch": 0.7351843992226417, "grad_norm": 0.4633764922618866, "learning_rate": 1.7284809882471954e-05, "loss": 0.9198, "step": 8228 }, { "epoch": 0.7352737507539036, "grad_norm": 0.49035075306892395, "learning_rate": 1.727386803641877e-05, "loss": 0.9324, "step": 8229 }, { "epoch": 0.7353631022851654, "grad_norm": 0.45196330547332764, "learning_rate": 1.7262928931527977e-05, "loss": 0.9823, "step": 8230 }, { "epoch": 0.7354524538164273, "grad_norm": 0.459506630897522, "learning_rate": 1.7251992568715842e-05, "loss": 0.9723, "step": 8231 }, { "epoch": 0.7355418053476891, "grad_norm": 0.5117012858390808, "learning_rate": 1.724105894889841e-05, "loss": 0.9468, "step": 8232 }, { "epoch": 0.735631156878951, "grad_norm": 0.43561848998069763, "learning_rate": 1.7230128072991458e-05, "loss": 0.933, "step": 8233 }, { "epoch": 0.7357205084102129, "grad_norm": 0.5399268865585327, "learning_rate": 1.721919994191058e-05, "loss": 0.9901, "step": 8234 }, { "epoch": 0.7358098599414747, "grad_norm": 0.4303232729434967, "learning_rate": 1.720827455657113e-05, "loss": 1.0334, "step": 8235 }, { "epoch": 0.7358992114727366, "grad_norm": 0.5467885732650757, "learning_rate": 1.719735191788822e-05, "loss": 0.9286, "step": 8236 }, { "epoch": 0.7359885630039985, "grad_norm": 0.43079063296318054, "learning_rate": 1.7186432026776734e-05, "loss": 0.8944, "step": 8237 }, { "epoch": 0.7360779145352604, "grad_norm": 0.5494388341903687, "learning_rate": 1.717551488415134e-05, "loss": 0.8537, "step": 8238 }, { "epoch": 0.7361672660665222, "grad_norm": 0.5178488492965698, "learning_rate": 1.7164600490926454e-05, "loss": 0.8705, "step": 8239 }, { "epoch": 0.736256617597784, "grad_norm": 0.4486089050769806, "learning_rate": 1.7153688848016277e-05, "loss": 1.0209, "step": 8240 }, { "epoch": 0.7363459691290459, "grad_norm": 0.4773981273174286, "learning_rate": 1.7142779956334777e-05, "loss": 0.9607, "step": 8241 }, { "epoch": 0.7364353206603078, "grad_norm": 0.5104852318763733, "learning_rate": 1.7131873816795684e-05, "loss": 0.958, "step": 8242 }, { "epoch": 0.7365246721915697, "grad_norm": 0.461008220911026, "learning_rate": 1.7120970430312526e-05, "loss": 0.9717, "step": 8243 }, { "epoch": 0.7366140237228316, "grad_norm": 0.44321146607398987, "learning_rate": 1.711006979779855e-05, "loss": 0.9139, "step": 8244 }, { "epoch": 0.7367033752540935, "grad_norm": 0.49801915884017944, "learning_rate": 1.70991719201668e-05, "loss": 0.8756, "step": 8245 }, { "epoch": 0.7367927267853552, "grad_norm": 0.5032793283462524, "learning_rate": 1.7088276798330106e-05, "loss": 1.0369, "step": 8246 }, { "epoch": 0.7368820783166171, "grad_norm": 0.5026294589042664, "learning_rate": 1.7077384433201045e-05, "loss": 0.9135, "step": 8247 }, { "epoch": 0.736971429847879, "grad_norm": 0.4539667069911957, "learning_rate": 1.7066494825691966e-05, "loss": 0.9861, "step": 8248 }, { "epoch": 0.7370607813791409, "grad_norm": 0.4462776184082031, "learning_rate": 1.7055607976714988e-05, "loss": 0.9653, "step": 8249 }, { "epoch": 0.7371501329104028, "grad_norm": 0.4230305552482605, "learning_rate": 1.7044723887182017e-05, "loss": 0.9694, "step": 8250 }, { "epoch": 0.7372394844416646, "grad_norm": 0.4417788088321686, "learning_rate": 1.7033842558004693e-05, "loss": 0.9562, "step": 8251 }, { "epoch": 0.7373288359729265, "grad_norm": 0.4916762113571167, "learning_rate": 1.7022963990094442e-05, "loss": 0.8921, "step": 8252 }, { "epoch": 0.7374181875041883, "grad_norm": 0.44411367177963257, "learning_rate": 1.7012088184362467e-05, "loss": 0.961, "step": 8253 }, { "epoch": 0.7375075390354502, "grad_norm": 0.4935406446456909, "learning_rate": 1.700121514171975e-05, "loss": 0.9968, "step": 8254 }, { "epoch": 0.7375968905667121, "grad_norm": 0.5243939757347107, "learning_rate": 1.699034486307699e-05, "loss": 0.9191, "step": 8255 }, { "epoch": 0.737686242097974, "grad_norm": 0.5002910494804382, "learning_rate": 1.69794773493447e-05, "loss": 0.9762, "step": 8256 }, { "epoch": 0.7377755936292358, "grad_norm": 0.42303359508514404, "learning_rate": 1.6968612601433164e-05, "loss": 0.9327, "step": 8257 }, { "epoch": 0.7378649451604977, "grad_norm": 0.47965705394744873, "learning_rate": 1.695775062025241e-05, "loss": 0.906, "step": 8258 }, { "epoch": 0.7379542966917596, "grad_norm": 0.44880691170692444, "learning_rate": 1.6946891406712245e-05, "loss": 0.9013, "step": 8259 }, { "epoch": 0.7380436482230214, "grad_norm": 0.48894819617271423, "learning_rate": 1.6936034961722247e-05, "loss": 0.9125, "step": 8260 }, { "epoch": 0.7381329997542833, "grad_norm": 0.4461333751678467, "learning_rate": 1.6925181286191755e-05, "loss": 0.9559, "step": 8261 }, { "epoch": 0.7382223512855451, "grad_norm": 0.4503779411315918, "learning_rate": 1.6914330381029888e-05, "loss": 0.9575, "step": 8262 }, { "epoch": 0.738311702816807, "grad_norm": 0.4670964181423187, "learning_rate": 1.690348224714553e-05, "loss": 0.9779, "step": 8263 }, { "epoch": 0.7384010543480689, "grad_norm": 0.6177948713302612, "learning_rate": 1.68926368854473e-05, "loss": 0.994, "step": 8264 }, { "epoch": 0.7384904058793308, "grad_norm": 0.4183749854564667, "learning_rate": 1.6881794296843633e-05, "loss": 0.9373, "step": 8265 }, { "epoch": 0.7385797574105927, "grad_norm": 0.42233920097351074, "learning_rate": 1.6870954482242707e-05, "loss": 0.979, "step": 8266 }, { "epoch": 0.7386691089418544, "grad_norm": 0.45563188195228577, "learning_rate": 1.6860117442552477e-05, "loss": 0.9276, "step": 8267 }, { "epoch": 0.7387584604731163, "grad_norm": 0.43523088097572327, "learning_rate": 1.6849283178680653e-05, "loss": 1.0035, "step": 8268 }, { "epoch": 0.7388478120043782, "grad_norm": 0.6393901109695435, "learning_rate": 1.6838451691534724e-05, "loss": 0.9238, "step": 8269 }, { "epoch": 0.7389371635356401, "grad_norm": 0.5803185105323792, "learning_rate": 1.6827622982021947e-05, "loss": 0.8818, "step": 8270 }, { "epoch": 0.739026515066902, "grad_norm": 0.3935569226741791, "learning_rate": 1.6816797051049334e-05, "loss": 0.914, "step": 8271 }, { "epoch": 0.7391158665981639, "grad_norm": 0.43459293246269226, "learning_rate": 1.6805973899523675e-05, "loss": 0.9829, "step": 8272 }, { "epoch": 0.7392052181294257, "grad_norm": 0.46992024779319763, "learning_rate": 1.679515352835154e-05, "loss": 0.9613, "step": 8273 }, { "epoch": 0.7392945696606875, "grad_norm": 0.4815920889377594, "learning_rate": 1.678433593843922e-05, "loss": 0.9408, "step": 8274 }, { "epoch": 0.7393839211919494, "grad_norm": 0.5394062399864197, "learning_rate": 1.6773521130692822e-05, "loss": 0.8696, "step": 8275 }, { "epoch": 0.7394732727232113, "grad_norm": 0.4539199769496918, "learning_rate": 1.6762709106018194e-05, "loss": 0.9607, "step": 8276 }, { "epoch": 0.7395626242544732, "grad_norm": 0.3883989751338959, "learning_rate": 1.6751899865320963e-05, "loss": 0.957, "step": 8277 }, { "epoch": 0.739651975785735, "grad_norm": 0.4848140776157379, "learning_rate": 1.674109340950652e-05, "loss": 0.9505, "step": 8278 }, { "epoch": 0.7397413273169969, "grad_norm": 0.42425286769866943, "learning_rate": 1.6730289739480015e-05, "loss": 0.9207, "step": 8279 }, { "epoch": 0.7398306788482588, "grad_norm": 0.5808948278427124, "learning_rate": 1.6719488856146377e-05, "loss": 0.9163, "step": 8280 }, { "epoch": 0.7399200303795206, "grad_norm": 0.481012761592865, "learning_rate": 1.670869076041029e-05, "loss": 0.9699, "step": 8281 }, { "epoch": 0.7400093819107825, "grad_norm": 0.5220696926116943, "learning_rate": 1.669789545317621e-05, "loss": 0.912, "step": 8282 }, { "epoch": 0.7400987334420444, "grad_norm": 0.46939072012901306, "learning_rate": 1.668710293534838e-05, "loss": 1.0163, "step": 8283 }, { "epoch": 0.7401880849733062, "grad_norm": 0.4594574570655823, "learning_rate": 1.6676313207830752e-05, "loss": 0.9666, "step": 8284 }, { "epoch": 0.7402774365045681, "grad_norm": 0.36939460039138794, "learning_rate": 1.66655262715271e-05, "loss": 0.9669, "step": 8285 }, { "epoch": 0.74036678803583, "grad_norm": 0.45765185356140137, "learning_rate": 1.6654742127340938e-05, "loss": 0.8751, "step": 8286 }, { "epoch": 0.7404561395670918, "grad_norm": 0.5784175992012024, "learning_rate": 1.6643960776175577e-05, "loss": 0.8804, "step": 8287 }, { "epoch": 0.7405454910983537, "grad_norm": 0.4556187689304352, "learning_rate": 1.6633182218934035e-05, "loss": 0.9594, "step": 8288 }, { "epoch": 0.7406348426296155, "grad_norm": 0.6318187117576599, "learning_rate": 1.6622406456519146e-05, "loss": 0.9063, "step": 8289 }, { "epoch": 0.7407241941608774, "grad_norm": 0.5511077046394348, "learning_rate": 1.66116334898335e-05, "loss": 0.8661, "step": 8290 }, { "epoch": 0.7408135456921393, "grad_norm": 0.5508573651313782, "learning_rate": 1.6600863319779435e-05, "loss": 0.8953, "step": 8291 }, { "epoch": 0.7409028972234012, "grad_norm": 0.5418042540550232, "learning_rate": 1.659009594725908e-05, "loss": 0.8662, "step": 8292 }, { "epoch": 0.7409922487546631, "grad_norm": 0.45847129821777344, "learning_rate": 1.6579331373174335e-05, "loss": 0.9173, "step": 8293 }, { "epoch": 0.7410816002859248, "grad_norm": 0.45850539207458496, "learning_rate": 1.6568569598426803e-05, "loss": 0.8913, "step": 8294 }, { "epoch": 0.7411709518171867, "grad_norm": 0.46910685300827026, "learning_rate": 1.6557810623917923e-05, "loss": 0.9585, "step": 8295 }, { "epoch": 0.7412603033484486, "grad_norm": 0.5492746829986572, "learning_rate": 1.654705445054887e-05, "loss": 0.9454, "step": 8296 }, { "epoch": 0.7413496548797105, "grad_norm": 0.42353761196136475, "learning_rate": 1.6536301079220595e-05, "loss": 0.9887, "step": 8297 }, { "epoch": 0.7414390064109724, "grad_norm": 0.38787001371383667, "learning_rate": 1.65255505108338e-05, "loss": 0.9821, "step": 8298 }, { "epoch": 0.7415283579422343, "grad_norm": 0.45528730750083923, "learning_rate": 1.6514802746288955e-05, "loss": 0.9411, "step": 8299 }, { "epoch": 0.7416177094734961, "grad_norm": 0.4377180337905884, "learning_rate": 1.6504057786486316e-05, "loss": 0.9134, "step": 8300 }, { "epoch": 0.7417070610047579, "grad_norm": 0.4238099753856659, "learning_rate": 1.649331563232587e-05, "loss": 0.9492, "step": 8301 }, { "epoch": 0.7417964125360198, "grad_norm": 0.6428069472312927, "learning_rate": 1.6482576284707402e-05, "loss": 0.8924, "step": 8302 }, { "epoch": 0.7418857640672817, "grad_norm": 0.5138502717018127, "learning_rate": 1.6471839744530455e-05, "loss": 1.0355, "step": 8303 }, { "epoch": 0.7419751155985436, "grad_norm": 0.4536551833152771, "learning_rate": 1.6461106012694293e-05, "loss": 0.9953, "step": 8304 }, { "epoch": 0.7420644671298054, "grad_norm": 0.45389628410339355, "learning_rate": 1.6450375090098003e-05, "loss": 0.9809, "step": 8305 }, { "epoch": 0.7421538186610673, "grad_norm": 0.42163825035095215, "learning_rate": 1.6439646977640417e-05, "loss": 0.9368, "step": 8306 }, { "epoch": 0.7422431701923292, "grad_norm": 0.48852425813674927, "learning_rate": 1.6428921676220122e-05, "loss": 0.9138, "step": 8307 }, { "epoch": 0.742332521723591, "grad_norm": 0.4827854633331299, "learning_rate": 1.6418199186735476e-05, "loss": 0.9123, "step": 8308 }, { "epoch": 0.7424218732548529, "grad_norm": 0.4420393705368042, "learning_rate": 1.6407479510084607e-05, "loss": 0.9481, "step": 8309 }, { "epoch": 0.7425112247861148, "grad_norm": 0.47318053245544434, "learning_rate": 1.6396762647165398e-05, "loss": 0.9103, "step": 8310 }, { "epoch": 0.7426005763173766, "grad_norm": 0.5290870070457458, "learning_rate": 1.6386048598875502e-05, "loss": 0.9708, "step": 8311 }, { "epoch": 0.7426899278486385, "grad_norm": 0.52885901927948, "learning_rate": 1.6375337366112336e-05, "loss": 0.9811, "step": 8312 }, { "epoch": 0.7427792793799004, "grad_norm": 0.49882233142852783, "learning_rate": 1.6364628949773096e-05, "loss": 0.9632, "step": 8313 }, { "epoch": 0.7428686309111623, "grad_norm": 0.46031755208969116, "learning_rate": 1.6353923350754692e-05, "loss": 1.0104, "step": 8314 }, { "epoch": 0.7429579824424241, "grad_norm": 0.49714195728302, "learning_rate": 1.6343220569953848e-05, "loss": 0.9287, "step": 8315 }, { "epoch": 0.7430473339736859, "grad_norm": 0.5016975998878479, "learning_rate": 1.6332520608267032e-05, "loss": 0.9117, "step": 8316 }, { "epoch": 0.7431366855049478, "grad_norm": 0.5067397952079773, "learning_rate": 1.632182346659049e-05, "loss": 0.9105, "step": 8317 }, { "epoch": 0.7432260370362097, "grad_norm": 0.4985998272895813, "learning_rate": 1.6311129145820218e-05, "loss": 0.8446, "step": 8318 }, { "epoch": 0.7433153885674716, "grad_norm": 0.4276972711086273, "learning_rate": 1.6300437646851972e-05, "loss": 0.9542, "step": 8319 }, { "epoch": 0.7434047400987335, "grad_norm": 0.4463093876838684, "learning_rate": 1.6289748970581307e-05, "loss": 0.9895, "step": 8320 }, { "epoch": 0.7434940916299954, "grad_norm": 0.4791722297668457, "learning_rate": 1.627906311790347e-05, "loss": 0.9321, "step": 8321 }, { "epoch": 0.7435834431612571, "grad_norm": 0.4845844805240631, "learning_rate": 1.6268380089713542e-05, "loss": 0.9468, "step": 8322 }, { "epoch": 0.743672794692519, "grad_norm": 0.49456390738487244, "learning_rate": 1.625769988690633e-05, "loss": 0.9305, "step": 8323 }, { "epoch": 0.7437621462237809, "grad_norm": 0.5137072205543518, "learning_rate": 1.6247022510376435e-05, "loss": 0.9164, "step": 8324 }, { "epoch": 0.7438514977550428, "grad_norm": 0.5762333869934082, "learning_rate": 1.6236347961018172e-05, "loss": 0.9437, "step": 8325 }, { "epoch": 0.7439408492863047, "grad_norm": 0.43072283267974854, "learning_rate": 1.6225676239725663e-05, "loss": 0.927, "step": 8326 }, { "epoch": 0.7440302008175665, "grad_norm": 0.42828169465065, "learning_rate": 1.6215007347392775e-05, "loss": 0.9505, "step": 8327 }, { "epoch": 0.7441195523488284, "grad_norm": 0.44435444474220276, "learning_rate": 1.6204341284913144e-05, "loss": 0.9176, "step": 8328 }, { "epoch": 0.7442089038800902, "grad_norm": 0.38234448432922363, "learning_rate": 1.6193678053180168e-05, "loss": 1.0191, "step": 8329 }, { "epoch": 0.7442982554113521, "grad_norm": 0.5488905310630798, "learning_rate": 1.6183017653087e-05, "loss": 0.9291, "step": 8330 }, { "epoch": 0.744387606942614, "grad_norm": 0.4858629107475281, "learning_rate": 1.6172360085526565e-05, "loss": 0.9887, "step": 8331 }, { "epoch": 0.7444769584738758, "grad_norm": 0.517011821269989, "learning_rate": 1.616170535139156e-05, "loss": 0.9611, "step": 8332 }, { "epoch": 0.7445663100051377, "grad_norm": 0.5602929592132568, "learning_rate": 1.6151053451574416e-05, "loss": 0.9426, "step": 8333 }, { "epoch": 0.7446556615363996, "grad_norm": 0.4301270842552185, "learning_rate": 1.614040438696736e-05, "loss": 1.0005, "step": 8334 }, { "epoch": 0.7447450130676615, "grad_norm": 0.42185020446777344, "learning_rate": 1.612975815846235e-05, "loss": 0.9301, "step": 8335 }, { "epoch": 0.7448343645989233, "grad_norm": 0.5149445533752441, "learning_rate": 1.6119114766951116e-05, "loss": 0.9543, "step": 8336 }, { "epoch": 0.7449237161301852, "grad_norm": 0.48916956782341003, "learning_rate": 1.6108474213325165e-05, "loss": 0.8676, "step": 8337 }, { "epoch": 0.745013067661447, "grad_norm": 0.4371699392795563, "learning_rate": 1.609783649847576e-05, "loss": 0.9421, "step": 8338 }, { "epoch": 0.7451024191927089, "grad_norm": 0.6675881147384644, "learning_rate": 1.6087201623293917e-05, "loss": 0.8675, "step": 8339 }, { "epoch": 0.7451917707239708, "grad_norm": 0.5161626935005188, "learning_rate": 1.6076569588670425e-05, "loss": 0.9024, "step": 8340 }, { "epoch": 0.7452811222552327, "grad_norm": 0.4626888334751129, "learning_rate": 1.6065940395495825e-05, "loss": 0.9111, "step": 8341 }, { "epoch": 0.7453704737864946, "grad_norm": 0.45520034432411194, "learning_rate": 1.605531404466043e-05, "loss": 0.9532, "step": 8342 }, { "epoch": 0.7454598253177563, "grad_norm": 0.5057812929153442, "learning_rate": 1.6044690537054306e-05, "loss": 0.8732, "step": 8343 }, { "epoch": 0.7455491768490182, "grad_norm": 0.48415088653564453, "learning_rate": 1.6034069873567303e-05, "loss": 0.8654, "step": 8344 }, { "epoch": 0.7456385283802801, "grad_norm": 0.520176351070404, "learning_rate": 1.6023452055088982e-05, "loss": 0.9233, "step": 8345 }, { "epoch": 0.745727879911542, "grad_norm": 0.4946928024291992, "learning_rate": 1.601283708250872e-05, "loss": 0.9015, "step": 8346 }, { "epoch": 0.7458172314428039, "grad_norm": 0.4103570580482483, "learning_rate": 1.600222495671563e-05, "loss": 0.9471, "step": 8347 }, { "epoch": 0.7459065829740658, "grad_norm": 0.41291913390159607, "learning_rate": 1.599161567859858e-05, "loss": 0.9129, "step": 8348 }, { "epoch": 0.7459959345053276, "grad_norm": 0.4548855721950531, "learning_rate": 1.598100924904623e-05, "loss": 0.932, "step": 8349 }, { "epoch": 0.7460852860365894, "grad_norm": 0.43719008564949036, "learning_rate": 1.597040566894697e-05, "loss": 0.9768, "step": 8350 }, { "epoch": 0.7461746375678513, "grad_norm": 0.4432275593280792, "learning_rate": 1.595980493918896e-05, "loss": 0.9684, "step": 8351 }, { "epoch": 0.7462639890991132, "grad_norm": 0.5939651727676392, "learning_rate": 1.5949207060660138e-05, "loss": 0.9425, "step": 8352 }, { "epoch": 0.7463533406303751, "grad_norm": 0.45562806725502014, "learning_rate": 1.5938612034248184e-05, "loss": 0.9775, "step": 8353 }, { "epoch": 0.746442692161637, "grad_norm": 0.4708113670349121, "learning_rate": 1.5928019860840532e-05, "loss": 0.975, "step": 8354 }, { "epoch": 0.7465320436928988, "grad_norm": 0.5317729115486145, "learning_rate": 1.5917430541324398e-05, "loss": 0.9226, "step": 8355 }, { "epoch": 0.7466213952241606, "grad_norm": 0.5121520757675171, "learning_rate": 1.5906844076586746e-05, "loss": 0.9363, "step": 8356 }, { "epoch": 0.7467107467554225, "grad_norm": 0.629950225353241, "learning_rate": 1.5896260467514336e-05, "loss": 0.8352, "step": 8357 }, { "epoch": 0.7468000982866844, "grad_norm": 0.3894347846508026, "learning_rate": 1.58856797149936e-05, "loss": 0.9527, "step": 8358 }, { "epoch": 0.7468894498179462, "grad_norm": 0.5037431716918945, "learning_rate": 1.5875101819910833e-05, "loss": 0.9559, "step": 8359 }, { "epoch": 0.7469788013492081, "grad_norm": 0.46914321184158325, "learning_rate": 1.5864526783152028e-05, "loss": 0.9593, "step": 8360 }, { "epoch": 0.74706815288047, "grad_norm": 0.42743295431137085, "learning_rate": 1.5853954605602965e-05, "loss": 0.9583, "step": 8361 }, { "epoch": 0.7471575044117319, "grad_norm": 0.5981160402297974, "learning_rate": 1.584338528814917e-05, "loss": 0.831, "step": 8362 }, { "epoch": 0.7472468559429937, "grad_norm": 0.45480284094810486, "learning_rate": 1.5832818831675943e-05, "loss": 0.9134, "step": 8363 }, { "epoch": 0.7473362074742556, "grad_norm": 0.4186408817768097, "learning_rate": 1.5822255237068357e-05, "loss": 0.9619, "step": 8364 }, { "epoch": 0.7474255590055174, "grad_norm": 0.5457665920257568, "learning_rate": 1.5811694505211182e-05, "loss": 0.9438, "step": 8365 }, { "epoch": 0.7475149105367793, "grad_norm": 0.485774427652359, "learning_rate": 1.5801136636989012e-05, "loss": 0.9763, "step": 8366 }, { "epoch": 0.7476042620680412, "grad_norm": 0.42799893021583557, "learning_rate": 1.5790581633286184e-05, "loss": 0.9938, "step": 8367 }, { "epoch": 0.7476936135993031, "grad_norm": 0.45776692032814026, "learning_rate": 1.5780029494986794e-05, "loss": 0.9387, "step": 8368 }, { "epoch": 0.747782965130565, "grad_norm": 0.5079134702682495, "learning_rate": 1.5769480222974685e-05, "loss": 0.9021, "step": 8369 }, { "epoch": 0.7478723166618267, "grad_norm": 0.4531920850276947, "learning_rate": 1.575893381813348e-05, "loss": 0.9242, "step": 8370 }, { "epoch": 0.7479616681930886, "grad_norm": 0.5917310118675232, "learning_rate": 1.5748390281346553e-05, "loss": 0.8605, "step": 8371 }, { "epoch": 0.7480510197243505, "grad_norm": 0.4818684756755829, "learning_rate": 1.573784961349704e-05, "loss": 0.9327, "step": 8372 }, { "epoch": 0.7481403712556124, "grad_norm": 0.4459517300128937, "learning_rate": 1.5727311815467825e-05, "loss": 0.9717, "step": 8373 }, { "epoch": 0.7482297227868743, "grad_norm": 0.4536815583705902, "learning_rate": 1.5716776888141583e-05, "loss": 0.9551, "step": 8374 }, { "epoch": 0.7483190743181362, "grad_norm": 0.7309669256210327, "learning_rate": 1.5706244832400696e-05, "loss": 0.8589, "step": 8375 }, { "epoch": 0.748408425849398, "grad_norm": 0.45956024527549744, "learning_rate": 1.5695715649127345e-05, "loss": 0.9848, "step": 8376 }, { "epoch": 0.7484977773806598, "grad_norm": 0.450041264295578, "learning_rate": 1.568518933920347e-05, "loss": 0.938, "step": 8377 }, { "epoch": 0.7485871289119217, "grad_norm": 0.45489928126335144, "learning_rate": 1.5674665903510755e-05, "loss": 1.0276, "step": 8378 }, { "epoch": 0.7486764804431836, "grad_norm": 0.42867419123649597, "learning_rate": 1.566414534293065e-05, "loss": 0.9341, "step": 8379 }, { "epoch": 0.7487658319744455, "grad_norm": 0.44365933537483215, "learning_rate": 1.5653627658344374e-05, "loss": 0.943, "step": 8380 }, { "epoch": 0.7488551835057073, "grad_norm": 0.4565197229385376, "learning_rate": 1.5643112850632884e-05, "loss": 0.8627, "step": 8381 }, { "epoch": 0.7489445350369692, "grad_norm": 0.4930249750614166, "learning_rate": 1.563260092067691e-05, "loss": 0.9596, "step": 8382 }, { "epoch": 0.7490338865682311, "grad_norm": 0.5552300214767456, "learning_rate": 1.5622091869356937e-05, "loss": 0.9906, "step": 8383 }, { "epoch": 0.7491232380994929, "grad_norm": 0.44053712487220764, "learning_rate": 1.5611585697553232e-05, "loss": 0.9295, "step": 8384 }, { "epoch": 0.7492125896307548, "grad_norm": 0.4973897635936737, "learning_rate": 1.5601082406145762e-05, "loss": 0.9121, "step": 8385 }, { "epoch": 0.7493019411620166, "grad_norm": 0.4836944043636322, "learning_rate": 1.5590581996014304e-05, "loss": 0.8984, "step": 8386 }, { "epoch": 0.7493912926932785, "grad_norm": 0.4505099654197693, "learning_rate": 1.5580084468038382e-05, "loss": 0.9337, "step": 8387 }, { "epoch": 0.7494806442245404, "grad_norm": 0.42860540747642517, "learning_rate": 1.556958982309728e-05, "loss": 0.9484, "step": 8388 }, { "epoch": 0.7495699957558023, "grad_norm": 0.5451123714447021, "learning_rate": 1.5559098062070028e-05, "loss": 0.9509, "step": 8389 }, { "epoch": 0.7496593472870642, "grad_norm": 0.5574720501899719, "learning_rate": 1.5548609185835444e-05, "loss": 0.8854, "step": 8390 }, { "epoch": 0.749748698818326, "grad_norm": 0.42226511240005493, "learning_rate": 1.5538123195272054e-05, "loss": 0.9356, "step": 8391 }, { "epoch": 0.7498380503495878, "grad_norm": 0.4949922561645508, "learning_rate": 1.5527640091258177e-05, "loss": 0.9313, "step": 8392 }, { "epoch": 0.7499274018808497, "grad_norm": 0.49576741456985474, "learning_rate": 1.5517159874671892e-05, "loss": 0.8921, "step": 8393 }, { "epoch": 0.7500167534121116, "grad_norm": 0.47223761677742004, "learning_rate": 1.550668254639105e-05, "loss": 0.9675, "step": 8394 }, { "epoch": 0.7501061049433735, "grad_norm": 0.4839581847190857, "learning_rate": 1.5496208107293197e-05, "loss": 0.982, "step": 8395 }, { "epoch": 0.7501954564746354, "grad_norm": 0.539761483669281, "learning_rate": 1.5485736558255697e-05, "loss": 0.9059, "step": 8396 }, { "epoch": 0.7502848080058973, "grad_norm": 0.4613410532474518, "learning_rate": 1.547526790015566e-05, "loss": 0.9384, "step": 8397 }, { "epoch": 0.750374159537159, "grad_norm": 0.47453439235687256, "learning_rate": 1.5464802133869942e-05, "loss": 0.9226, "step": 8398 }, { "epoch": 0.7504635110684209, "grad_norm": 0.5073251128196716, "learning_rate": 1.5454339260275165e-05, "loss": 0.9383, "step": 8399 }, { "epoch": 0.7505528625996828, "grad_norm": 0.6021450757980347, "learning_rate": 1.5443879280247704e-05, "loss": 0.9338, "step": 8400 }, { "epoch": 0.7506422141309447, "grad_norm": 0.4408600330352783, "learning_rate": 1.5433422194663693e-05, "loss": 0.965, "step": 8401 }, { "epoch": 0.7507315656622066, "grad_norm": 0.5570371747016907, "learning_rate": 1.542296800439903e-05, "loss": 0.9716, "step": 8402 }, { "epoch": 0.7508209171934684, "grad_norm": 0.48043861985206604, "learning_rate": 1.541251671032936e-05, "loss": 0.9822, "step": 8403 }, { "epoch": 0.7509102687247303, "grad_norm": 0.5600388646125793, "learning_rate": 1.540206831333011e-05, "loss": 0.9544, "step": 8404 }, { "epoch": 0.7509996202559921, "grad_norm": 0.48118168115615845, "learning_rate": 1.5391622814276408e-05, "loss": 0.9213, "step": 8405 }, { "epoch": 0.751088971787254, "grad_norm": 0.5037118196487427, "learning_rate": 1.538118021404319e-05, "loss": 0.8725, "step": 8406 }, { "epoch": 0.7511783233185159, "grad_norm": 0.5096782445907593, "learning_rate": 1.5370740513505143e-05, "loss": 0.9062, "step": 8407 }, { "epoch": 0.7512676748497777, "grad_norm": 0.450232595205307, "learning_rate": 1.53603037135367e-05, "loss": 0.9839, "step": 8408 }, { "epoch": 0.7513570263810396, "grad_norm": 0.4577409625053406, "learning_rate": 1.534986981501205e-05, "loss": 0.946, "step": 8409 }, { "epoch": 0.7514463779123015, "grad_norm": 0.4218578338623047, "learning_rate": 1.5339438818805152e-05, "loss": 0.9404, "step": 8410 }, { "epoch": 0.7515357294435634, "grad_norm": 0.5897673964500427, "learning_rate": 1.5329010725789704e-05, "loss": 0.9004, "step": 8411 }, { "epoch": 0.7516250809748252, "grad_norm": 0.4546933174133301, "learning_rate": 1.531858553683918e-05, "loss": 0.9354, "step": 8412 }, { "epoch": 0.751714432506087, "grad_norm": 0.48495376110076904, "learning_rate": 1.530816325282679e-05, "loss": 1.0102, "step": 8413 }, { "epoch": 0.7518037840373489, "grad_norm": 0.5461007952690125, "learning_rate": 1.5297743874625515e-05, "loss": 0.9229, "step": 8414 }, { "epoch": 0.7518931355686108, "grad_norm": 0.42798981070518494, "learning_rate": 1.5287327403108108e-05, "loss": 0.9756, "step": 8415 }, { "epoch": 0.7519824870998727, "grad_norm": 0.4769188463687897, "learning_rate": 1.527691383914702e-05, "loss": 0.9252, "step": 8416 }, { "epoch": 0.7520718386311346, "grad_norm": 0.5491362810134888, "learning_rate": 1.526650318361453e-05, "loss": 0.912, "step": 8417 }, { "epoch": 0.7521611901623964, "grad_norm": 0.4550434648990631, "learning_rate": 1.5256095437382622e-05, "loss": 0.9414, "step": 8418 }, { "epoch": 0.7522505416936582, "grad_norm": 0.5997094511985779, "learning_rate": 1.524569060132307e-05, "loss": 0.9087, "step": 8419 }, { "epoch": 0.7523398932249201, "grad_norm": 0.4672020971775055, "learning_rate": 1.523528867630738e-05, "loss": 0.8856, "step": 8420 }, { "epoch": 0.752429244756182, "grad_norm": 0.5284818410873413, "learning_rate": 1.5224889663206832e-05, "loss": 0.7954, "step": 8421 }, { "epoch": 0.7525185962874439, "grad_norm": 0.6233134269714355, "learning_rate": 1.521449356289245e-05, "loss": 0.9528, "step": 8422 }, { "epoch": 0.7526079478187058, "grad_norm": 0.5407315492630005, "learning_rate": 1.5204100376235036e-05, "loss": 0.9614, "step": 8423 }, { "epoch": 0.7526972993499677, "grad_norm": 0.5749227404594421, "learning_rate": 1.5193710104105092e-05, "loss": 0.8443, "step": 8424 }, { "epoch": 0.7527866508812294, "grad_norm": 0.4009944796562195, "learning_rate": 1.518332274737294e-05, "loss": 0.9648, "step": 8425 }, { "epoch": 0.7528760024124913, "grad_norm": 0.44395560026168823, "learning_rate": 1.5172938306908623e-05, "loss": 0.972, "step": 8426 }, { "epoch": 0.7529653539437532, "grad_norm": 0.5940225720405579, "learning_rate": 1.5162556783581971e-05, "loss": 0.9004, "step": 8427 }, { "epoch": 0.7530547054750151, "grad_norm": 0.4562346339225769, "learning_rate": 1.5152178178262516e-05, "loss": 0.9244, "step": 8428 }, { "epoch": 0.753144057006277, "grad_norm": 0.483516663312912, "learning_rate": 1.5141802491819584e-05, "loss": 0.9087, "step": 8429 }, { "epoch": 0.7532334085375388, "grad_norm": 0.668719470500946, "learning_rate": 1.513142972512226e-05, "loss": 0.7838, "step": 8430 }, { "epoch": 0.7533227600688007, "grad_norm": 0.5353193879127502, "learning_rate": 1.5121059879039367e-05, "loss": 0.8938, "step": 8431 }, { "epoch": 0.7534121116000625, "grad_norm": 0.5128493309020996, "learning_rate": 1.5110692954439492e-05, "loss": 0.9597, "step": 8432 }, { "epoch": 0.7535014631313244, "grad_norm": 0.4552728831768036, "learning_rate": 1.5100328952190973e-05, "loss": 0.9204, "step": 8433 }, { "epoch": 0.7535908146625863, "grad_norm": 0.5088723301887512, "learning_rate": 1.5089967873161909e-05, "loss": 0.9603, "step": 8434 }, { "epoch": 0.7536801661938481, "grad_norm": 0.6546921133995056, "learning_rate": 1.5079609718220166e-05, "loss": 0.9088, "step": 8435 }, { "epoch": 0.75376951772511, "grad_norm": 0.41693705320358276, "learning_rate": 1.506925448823332e-05, "loss": 0.9853, "step": 8436 }, { "epoch": 0.7538588692563719, "grad_norm": 0.5113435387611389, "learning_rate": 1.5058902184068741e-05, "loss": 0.8789, "step": 8437 }, { "epoch": 0.7539482207876338, "grad_norm": 0.4677006006240845, "learning_rate": 1.5048552806593552e-05, "loss": 0.9137, "step": 8438 }, { "epoch": 0.7540375723188956, "grad_norm": 0.45884403586387634, "learning_rate": 1.5038206356674623e-05, "loss": 0.9903, "step": 8439 }, { "epoch": 0.7541269238501574, "grad_norm": 0.49759143590927124, "learning_rate": 1.5027862835178574e-05, "loss": 0.905, "step": 8440 }, { "epoch": 0.7542162753814193, "grad_norm": 0.4578782916069031, "learning_rate": 1.5017522242971794e-05, "loss": 0.9329, "step": 8441 }, { "epoch": 0.7543056269126812, "grad_norm": 0.40164539217948914, "learning_rate": 1.5007184580920408e-05, "loss": 1.0047, "step": 8442 }, { "epoch": 0.7543949784439431, "grad_norm": 0.4076503813266754, "learning_rate": 1.499684984989031e-05, "loss": 0.9041, "step": 8443 }, { "epoch": 0.754484329975205, "grad_norm": 0.5915509462356567, "learning_rate": 1.4986518050747145e-05, "loss": 0.9639, "step": 8444 }, { "epoch": 0.7545736815064669, "grad_norm": 0.3953361511230469, "learning_rate": 1.4976189184356327e-05, "loss": 0.9684, "step": 8445 }, { "epoch": 0.7546630330377286, "grad_norm": 0.5043690800666809, "learning_rate": 1.4965863251582974e-05, "loss": 0.9368, "step": 8446 }, { "epoch": 0.7547523845689905, "grad_norm": 0.4337480962276459, "learning_rate": 1.495554025329201e-05, "loss": 0.907, "step": 8447 }, { "epoch": 0.7548417361002524, "grad_norm": 0.5515368580818176, "learning_rate": 1.4945220190348102e-05, "loss": 0.9356, "step": 8448 }, { "epoch": 0.7549310876315143, "grad_norm": 0.5478143692016602, "learning_rate": 1.4934903063615657e-05, "loss": 0.967, "step": 8449 }, { "epoch": 0.7550204391627762, "grad_norm": 0.5199536085128784, "learning_rate": 1.4924588873958844e-05, "loss": 0.8784, "step": 8450 }, { "epoch": 0.755109790694038, "grad_norm": 0.667113184928894, "learning_rate": 1.4914277622241597e-05, "loss": 0.9662, "step": 8451 }, { "epoch": 0.7551991422252999, "grad_norm": 0.4980166256427765, "learning_rate": 1.4903969309327581e-05, "loss": 0.9127, "step": 8452 }, { "epoch": 0.7552884937565617, "grad_norm": 0.48468539118766785, "learning_rate": 1.4893663936080232e-05, "loss": 0.9079, "step": 8453 }, { "epoch": 0.7553778452878236, "grad_norm": 0.43085187673568726, "learning_rate": 1.4883361503362736e-05, "loss": 1.0368, "step": 8454 }, { "epoch": 0.7554671968190855, "grad_norm": 0.5685251355171204, "learning_rate": 1.4873062012038047e-05, "loss": 0.8219, "step": 8455 }, { "epoch": 0.7555565483503474, "grad_norm": 0.5110030174255371, "learning_rate": 1.4862765462968826e-05, "loss": 0.9895, "step": 8456 }, { "epoch": 0.7556458998816092, "grad_norm": 0.5514484643936157, "learning_rate": 1.485247185701753e-05, "loss": 0.9602, "step": 8457 }, { "epoch": 0.7557352514128711, "grad_norm": 0.47170817852020264, "learning_rate": 1.4842181195046361e-05, "loss": 0.9424, "step": 8458 }, { "epoch": 0.755824602944133, "grad_norm": 0.42950955033302307, "learning_rate": 1.483189347791728e-05, "loss": 0.9495, "step": 8459 }, { "epoch": 0.7559139544753948, "grad_norm": 0.4732508659362793, "learning_rate": 1.4821608706491996e-05, "loss": 0.9515, "step": 8460 }, { "epoch": 0.7560033060066567, "grad_norm": 0.47231292724609375, "learning_rate": 1.4811326881631937e-05, "loss": 0.9472, "step": 8461 }, { "epoch": 0.7560926575379185, "grad_norm": 0.4015584886074066, "learning_rate": 1.4801048004198342e-05, "loss": 0.947, "step": 8462 }, { "epoch": 0.7561820090691804, "grad_norm": 0.530529797077179, "learning_rate": 1.4790772075052173e-05, "loss": 0.9262, "step": 8463 }, { "epoch": 0.7562713606004423, "grad_norm": 0.5107260942459106, "learning_rate": 1.478049909505414e-05, "loss": 0.9101, "step": 8464 }, { "epoch": 0.7563607121317042, "grad_norm": 0.4164203703403473, "learning_rate": 1.4770229065064738e-05, "loss": 1.0175, "step": 8465 }, { "epoch": 0.7564500636629661, "grad_norm": 0.4908786714076996, "learning_rate": 1.475996198594416e-05, "loss": 0.9977, "step": 8466 }, { "epoch": 0.7565394151942278, "grad_norm": 0.5216073393821716, "learning_rate": 1.4749697858552398e-05, "loss": 0.9263, "step": 8467 }, { "epoch": 0.7566287667254897, "grad_norm": 0.48185113072395325, "learning_rate": 1.4739436683749181e-05, "loss": 0.8621, "step": 8468 }, { "epoch": 0.7567181182567516, "grad_norm": 0.49778178334236145, "learning_rate": 1.472917846239399e-05, "loss": 0.8806, "step": 8469 }, { "epoch": 0.7568074697880135, "grad_norm": 0.5073883533477783, "learning_rate": 1.4718923195346062e-05, "loss": 0.9601, "step": 8470 }, { "epoch": 0.7568968213192754, "grad_norm": 0.4837455153465271, "learning_rate": 1.4708670883464393e-05, "loss": 0.9467, "step": 8471 }, { "epoch": 0.7569861728505373, "grad_norm": 0.38693663477897644, "learning_rate": 1.469842152760771e-05, "loss": 0.9904, "step": 8472 }, { "epoch": 0.7570755243817991, "grad_norm": 0.46666041016578674, "learning_rate": 1.4688175128634512e-05, "loss": 0.9895, "step": 8473 }, { "epoch": 0.7571648759130609, "grad_norm": 0.42904338240623474, "learning_rate": 1.4677931687403046e-05, "loss": 0.999, "step": 8474 }, { "epoch": 0.7572542274443228, "grad_norm": 0.4167872369289398, "learning_rate": 1.4667691204771322e-05, "loss": 1.0134, "step": 8475 }, { "epoch": 0.7573435789755847, "grad_norm": 0.43804195523262024, "learning_rate": 1.4657453681597055e-05, "loss": 0.9359, "step": 8476 }, { "epoch": 0.7574329305068466, "grad_norm": 0.5330228805541992, "learning_rate": 1.4647219118737771e-05, "loss": 0.842, "step": 8477 }, { "epoch": 0.7575222820381085, "grad_norm": 0.4981614649295807, "learning_rate": 1.463698751705072e-05, "loss": 0.9647, "step": 8478 }, { "epoch": 0.7576116335693703, "grad_norm": 0.4153805077075958, "learning_rate": 1.4626758877392904e-05, "loss": 0.9969, "step": 8479 }, { "epoch": 0.7577009851006321, "grad_norm": 0.4333755671977997, "learning_rate": 1.461653320062109e-05, "loss": 0.9647, "step": 8480 }, { "epoch": 0.757790336631894, "grad_norm": 0.5431153178215027, "learning_rate": 1.4606310487591773e-05, "loss": 0.9157, "step": 8481 }, { "epoch": 0.7578796881631559, "grad_norm": 0.4706348180770874, "learning_rate": 1.4596090739161228e-05, "loss": 0.8865, "step": 8482 }, { "epoch": 0.7579690396944178, "grad_norm": 0.5356218814849854, "learning_rate": 1.4585873956185458e-05, "loss": 0.9127, "step": 8483 }, { "epoch": 0.7580583912256796, "grad_norm": 0.45213526487350464, "learning_rate": 1.4575660139520237e-05, "loss": 0.9477, "step": 8484 }, { "epoch": 0.7581477427569415, "grad_norm": 0.5066707134246826, "learning_rate": 1.4565449290021088e-05, "loss": 0.9715, "step": 8485 }, { "epoch": 0.7582370942882034, "grad_norm": 0.4998951554298401, "learning_rate": 1.4555241408543252e-05, "loss": 0.9402, "step": 8486 }, { "epoch": 0.7583264458194652, "grad_norm": 0.46853405237197876, "learning_rate": 1.454503649594176e-05, "loss": 0.9371, "step": 8487 }, { "epoch": 0.7584157973507271, "grad_norm": 0.5197616219520569, "learning_rate": 1.4534834553071386e-05, "loss": 0.9598, "step": 8488 }, { "epoch": 0.7585051488819889, "grad_norm": 0.4408440589904785, "learning_rate": 1.452463558078665e-05, "loss": 0.9645, "step": 8489 }, { "epoch": 0.7585945004132508, "grad_norm": 0.6004998087882996, "learning_rate": 1.4514439579941818e-05, "loss": 0.9181, "step": 8490 }, { "epoch": 0.7586838519445127, "grad_norm": 0.4586280286312103, "learning_rate": 1.4504246551390927e-05, "loss": 0.9704, "step": 8491 }, { "epoch": 0.7587732034757746, "grad_norm": 0.4923453629016876, "learning_rate": 1.4494056495987746e-05, "loss": 0.9155, "step": 8492 }, { "epoch": 0.7588625550070365, "grad_norm": 0.5735955238342285, "learning_rate": 1.448386941458581e-05, "loss": 0.9033, "step": 8493 }, { "epoch": 0.7589519065382982, "grad_norm": 0.40873488783836365, "learning_rate": 1.447368530803837e-05, "loss": 0.9818, "step": 8494 }, { "epoch": 0.7590412580695601, "grad_norm": 0.5386642217636108, "learning_rate": 1.4463504177198473e-05, "loss": 0.8663, "step": 8495 }, { "epoch": 0.759130609600822, "grad_norm": 0.43717023730278015, "learning_rate": 1.445332602291889e-05, "loss": 0.9881, "step": 8496 }, { "epoch": 0.7592199611320839, "grad_norm": 0.4573476314544678, "learning_rate": 1.4443150846052167e-05, "loss": 0.9784, "step": 8497 }, { "epoch": 0.7593093126633458, "grad_norm": 0.46079909801483154, "learning_rate": 1.4432978647450557e-05, "loss": 0.9627, "step": 8498 }, { "epoch": 0.7593986641946077, "grad_norm": 0.4422478973865509, "learning_rate": 1.44228094279661e-05, "loss": 0.9779, "step": 8499 }, { "epoch": 0.7594880157258695, "grad_norm": 0.42531049251556396, "learning_rate": 1.4412643188450581e-05, "loss": 0.9694, "step": 8500 }, { "epoch": 0.7595773672571313, "grad_norm": 0.5261896848678589, "learning_rate": 1.440247992975553e-05, "loss": 0.8767, "step": 8501 }, { "epoch": 0.7596667187883932, "grad_norm": 0.4981927275657654, "learning_rate": 1.4392319652732222e-05, "loss": 0.9391, "step": 8502 }, { "epoch": 0.7597560703196551, "grad_norm": 0.42882490158081055, "learning_rate": 1.43821623582317e-05, "loss": 0.9519, "step": 8503 }, { "epoch": 0.759845421850917, "grad_norm": 0.558502197265625, "learning_rate": 1.4372008047104735e-05, "loss": 0.9239, "step": 8504 }, { "epoch": 0.7599347733821789, "grad_norm": 0.4893379509449005, "learning_rate": 1.4361856720201866e-05, "loss": 0.952, "step": 8505 }, { "epoch": 0.7600241249134407, "grad_norm": 0.5038098096847534, "learning_rate": 1.4351708378373386e-05, "loss": 0.9585, "step": 8506 }, { "epoch": 0.7601134764447026, "grad_norm": 0.46212783455848694, "learning_rate": 1.4341563022469296e-05, "loss": 0.9344, "step": 8507 }, { "epoch": 0.7602028279759644, "grad_norm": 0.43376246094703674, "learning_rate": 1.4331420653339395e-05, "loss": 0.9815, "step": 8508 }, { "epoch": 0.7602921795072263, "grad_norm": 0.5722975730895996, "learning_rate": 1.4321281271833214e-05, "loss": 0.9366, "step": 8509 }, { "epoch": 0.7603815310384882, "grad_norm": 0.5890293717384338, "learning_rate": 1.4311144878800037e-05, "loss": 0.842, "step": 8510 }, { "epoch": 0.76047088256975, "grad_norm": 0.5008702278137207, "learning_rate": 1.4301011475088889e-05, "loss": 0.9603, "step": 8511 }, { "epoch": 0.7605602341010119, "grad_norm": 0.4864904284477234, "learning_rate": 1.4290881061548555e-05, "loss": 0.9232, "step": 8512 }, { "epoch": 0.7606495856322738, "grad_norm": 0.4470934271812439, "learning_rate": 1.4280753639027566e-05, "loss": 0.949, "step": 8513 }, { "epoch": 0.7607389371635357, "grad_norm": 0.5684083700180054, "learning_rate": 1.4270629208374197e-05, "loss": 0.8836, "step": 8514 }, { "epoch": 0.7608282886947975, "grad_norm": 0.4778299033641815, "learning_rate": 1.4260507770436482e-05, "loss": 0.9575, "step": 8515 }, { "epoch": 0.7609176402260593, "grad_norm": 0.4669052064418793, "learning_rate": 1.4250389326062207e-05, "loss": 0.9437, "step": 8516 }, { "epoch": 0.7610069917573212, "grad_norm": 0.4639929533004761, "learning_rate": 1.424027387609888e-05, "loss": 0.9677, "step": 8517 }, { "epoch": 0.7610963432885831, "grad_norm": 0.40526002645492554, "learning_rate": 1.4230161421393783e-05, "loss": 0.9132, "step": 8518 }, { "epoch": 0.761185694819845, "grad_norm": 0.45086291432380676, "learning_rate": 1.422005196279395e-05, "loss": 0.9638, "step": 8519 }, { "epoch": 0.7612750463511069, "grad_norm": 0.48102667927742004, "learning_rate": 1.420994550114615e-05, "loss": 0.94, "step": 8520 }, { "epoch": 0.7613643978823688, "grad_norm": 0.48567044734954834, "learning_rate": 1.4199842037296906e-05, "loss": 0.958, "step": 8521 }, { "epoch": 0.7614537494136305, "grad_norm": 0.398173063993454, "learning_rate": 1.4189741572092496e-05, "loss": 1.0515, "step": 8522 }, { "epoch": 0.7615431009448924, "grad_norm": 0.4758659601211548, "learning_rate": 1.4179644106378942e-05, "loss": 0.9731, "step": 8523 }, { "epoch": 0.7616324524761543, "grad_norm": 0.5288426280021667, "learning_rate": 1.4169549641002006e-05, "loss": 0.9146, "step": 8524 }, { "epoch": 0.7617218040074162, "grad_norm": 0.46394291520118713, "learning_rate": 1.4159458176807217e-05, "loss": 0.9605, "step": 8525 }, { "epoch": 0.7618111555386781, "grad_norm": 0.4080126881599426, "learning_rate": 1.4149369714639853e-05, "loss": 0.9506, "step": 8526 }, { "epoch": 0.76190050706994, "grad_norm": 0.5326384902000427, "learning_rate": 1.4139284255344897e-05, "loss": 0.8812, "step": 8527 }, { "epoch": 0.7619898586012018, "grad_norm": 0.3904532194137573, "learning_rate": 1.412920179976714e-05, "loss": 0.9381, "step": 8528 }, { "epoch": 0.7620792101324636, "grad_norm": 0.4524264931678772, "learning_rate": 1.411912234875108e-05, "loss": 1.029, "step": 8529 }, { "epoch": 0.7621685616637255, "grad_norm": 0.4898856580257416, "learning_rate": 1.4109045903141006e-05, "loss": 0.9016, "step": 8530 }, { "epoch": 0.7622579131949874, "grad_norm": 0.5062960982322693, "learning_rate": 1.4098972463780885e-05, "loss": 0.9609, "step": 8531 }, { "epoch": 0.7623472647262493, "grad_norm": 0.5530799627304077, "learning_rate": 1.4088902031514507e-05, "loss": 0.9659, "step": 8532 }, { "epoch": 0.7624366162575111, "grad_norm": 0.4216810464859009, "learning_rate": 1.4078834607185364e-05, "loss": 0.9497, "step": 8533 }, { "epoch": 0.762525967788773, "grad_norm": 0.6185227036476135, "learning_rate": 1.4068770191636716e-05, "loss": 0.8765, "step": 8534 }, { "epoch": 0.7626153193200349, "grad_norm": 0.5121302008628845, "learning_rate": 1.405870878571156e-05, "loss": 0.914, "step": 8535 }, { "epoch": 0.7627046708512967, "grad_norm": 0.42362403869628906, "learning_rate": 1.4048650390252671e-05, "loss": 0.9526, "step": 8536 }, { "epoch": 0.7627940223825586, "grad_norm": 0.4262167513370514, "learning_rate": 1.4038595006102506e-05, "loss": 0.9338, "step": 8537 }, { "epoch": 0.7628833739138204, "grad_norm": 0.655556857585907, "learning_rate": 1.4028542634103331e-05, "loss": 0.9118, "step": 8538 }, { "epoch": 0.7629727254450823, "grad_norm": 0.5181341767311096, "learning_rate": 1.4018493275097139e-05, "loss": 0.8456, "step": 8539 }, { "epoch": 0.7630620769763442, "grad_norm": 0.46238502860069275, "learning_rate": 1.4008446929925672e-05, "loss": 0.9437, "step": 8540 }, { "epoch": 0.7631514285076061, "grad_norm": 0.5332851409912109, "learning_rate": 1.3998403599430415e-05, "loss": 0.9058, "step": 8541 }, { "epoch": 0.7632407800388679, "grad_norm": 0.47178998589515686, "learning_rate": 1.398836328445261e-05, "loss": 0.9194, "step": 8542 }, { "epoch": 0.7633301315701297, "grad_norm": 0.5274223685264587, "learning_rate": 1.3978325985833229e-05, "loss": 0.9471, "step": 8543 }, { "epoch": 0.7634194831013916, "grad_norm": 0.5024911165237427, "learning_rate": 1.3968291704413018e-05, "loss": 1.0011, "step": 8544 }, { "epoch": 0.7635088346326535, "grad_norm": 0.5870459675788879, "learning_rate": 1.3958260441032445e-05, "loss": 0.8727, "step": 8545 }, { "epoch": 0.7635981861639154, "grad_norm": 0.592512309551239, "learning_rate": 1.3948232196531746e-05, "loss": 0.8981, "step": 8546 }, { "epoch": 0.7636875376951773, "grad_norm": 0.511677086353302, "learning_rate": 1.3938206971750878e-05, "loss": 0.9192, "step": 8547 }, { "epoch": 0.7637768892264392, "grad_norm": 0.4113241732120514, "learning_rate": 1.3928184767529562e-05, "loss": 0.912, "step": 8548 }, { "epoch": 0.7638662407577009, "grad_norm": 0.4762282967567444, "learning_rate": 1.3918165584707271e-05, "loss": 0.9661, "step": 8549 }, { "epoch": 0.7639555922889628, "grad_norm": 0.5495911836624146, "learning_rate": 1.3908149424123217e-05, "loss": 1.0346, "step": 8550 }, { "epoch": 0.7640449438202247, "grad_norm": 0.4995447099208832, "learning_rate": 1.3898136286616364e-05, "loss": 0.9112, "step": 8551 }, { "epoch": 0.7641342953514866, "grad_norm": 0.48613351583480835, "learning_rate": 1.3888126173025412e-05, "loss": 0.9372, "step": 8552 }, { "epoch": 0.7642236468827485, "grad_norm": 0.502858579158783, "learning_rate": 1.3878119084188818e-05, "loss": 0.9525, "step": 8553 }, { "epoch": 0.7643129984140103, "grad_norm": 0.4795863628387451, "learning_rate": 1.3868115020944783e-05, "loss": 0.9097, "step": 8554 }, { "epoch": 0.7644023499452722, "grad_norm": 0.49957504868507385, "learning_rate": 1.385811398413125e-05, "loss": 0.8775, "step": 8555 }, { "epoch": 0.764491701476534, "grad_norm": 0.5270972847938538, "learning_rate": 1.3848115974585934e-05, "loss": 0.9626, "step": 8556 }, { "epoch": 0.7645810530077959, "grad_norm": 0.4184163510799408, "learning_rate": 1.3838120993146243e-05, "loss": 0.8799, "step": 8557 }, { "epoch": 0.7646704045390578, "grad_norm": 0.5301498770713806, "learning_rate": 1.3828129040649374e-05, "loss": 0.8824, "step": 8558 }, { "epoch": 0.7647597560703197, "grad_norm": 0.4511061906814575, "learning_rate": 1.381814011793226e-05, "loss": 0.916, "step": 8559 }, { "epoch": 0.7648491076015815, "grad_norm": 0.4901033043861389, "learning_rate": 1.3808154225831583e-05, "loss": 0.8872, "step": 8560 }, { "epoch": 0.7649384591328434, "grad_norm": 0.45118212699890137, "learning_rate": 1.3798171365183771e-05, "loss": 0.9832, "step": 8561 }, { "epoch": 0.7650278106641053, "grad_norm": 0.42388466000556946, "learning_rate": 1.3788191536824984e-05, "loss": 0.9311, "step": 8562 }, { "epoch": 0.7651171621953671, "grad_norm": 0.4067128598690033, "learning_rate": 1.3778214741591167e-05, "loss": 0.9677, "step": 8563 }, { "epoch": 0.765206513726629, "grad_norm": 0.5313117504119873, "learning_rate": 1.3768240980317948e-05, "loss": 0.8713, "step": 8564 }, { "epoch": 0.7652958652578908, "grad_norm": 0.40129441022872925, "learning_rate": 1.3758270253840744e-05, "loss": 0.9213, "step": 8565 }, { "epoch": 0.7653852167891527, "grad_norm": 0.5199376940727234, "learning_rate": 1.374830256299472e-05, "loss": 0.9241, "step": 8566 }, { "epoch": 0.7654745683204146, "grad_norm": 0.4660845696926117, "learning_rate": 1.3738337908614768e-05, "loss": 0.9697, "step": 8567 }, { "epoch": 0.7655639198516765, "grad_norm": 0.5757974982261658, "learning_rate": 1.3728376291535555e-05, "loss": 0.8874, "step": 8568 }, { "epoch": 0.7656532713829384, "grad_norm": 0.45287203788757324, "learning_rate": 1.3718417712591441e-05, "loss": 0.9749, "step": 8569 }, { "epoch": 0.7657426229142001, "grad_norm": 0.4740961492061615, "learning_rate": 1.3708462172616577e-05, "loss": 0.9843, "step": 8570 }, { "epoch": 0.765831974445462, "grad_norm": 0.48708444833755493, "learning_rate": 1.3698509672444843e-05, "loss": 0.9231, "step": 8571 }, { "epoch": 0.7659213259767239, "grad_norm": 0.4899809956550598, "learning_rate": 1.3688560212909873e-05, "loss": 0.9621, "step": 8572 }, { "epoch": 0.7660106775079858, "grad_norm": 0.4719412922859192, "learning_rate": 1.3678613794845035e-05, "loss": 0.9742, "step": 8573 }, { "epoch": 0.7661000290392477, "grad_norm": 0.44776198267936707, "learning_rate": 1.3668670419083457e-05, "loss": 0.8969, "step": 8574 }, { "epoch": 0.7661893805705096, "grad_norm": 0.46503016352653503, "learning_rate": 1.3658730086457988e-05, "loss": 0.9827, "step": 8575 }, { "epoch": 0.7662787321017714, "grad_norm": 0.5194110870361328, "learning_rate": 1.3648792797801263e-05, "loss": 0.959, "step": 8576 }, { "epoch": 0.7663680836330332, "grad_norm": 0.46184584498405457, "learning_rate": 1.36388585539456e-05, "loss": 0.9375, "step": 8577 }, { "epoch": 0.7664574351642951, "grad_norm": 0.416530579328537, "learning_rate": 1.3628927355723114e-05, "loss": 0.988, "step": 8578 }, { "epoch": 0.766546786695557, "grad_norm": 0.4688471257686615, "learning_rate": 1.3618999203965654e-05, "loss": 0.9492, "step": 8579 }, { "epoch": 0.7666361382268189, "grad_norm": 0.635256826877594, "learning_rate": 1.3609074099504798e-05, "loss": 0.8742, "step": 8580 }, { "epoch": 0.7667254897580807, "grad_norm": 0.446486234664917, "learning_rate": 1.3599152043171893e-05, "loss": 0.9117, "step": 8581 }, { "epoch": 0.7668148412893426, "grad_norm": 0.5363548994064331, "learning_rate": 1.3589233035798005e-05, "loss": 0.9194, "step": 8582 }, { "epoch": 0.7669041928206045, "grad_norm": 0.5018768906593323, "learning_rate": 1.357931707821396e-05, "loss": 0.9498, "step": 8583 }, { "epoch": 0.7669935443518663, "grad_norm": 0.43810418248176575, "learning_rate": 1.3569404171250328e-05, "loss": 0.9512, "step": 8584 }, { "epoch": 0.7670828958831282, "grad_norm": 0.45606479048728943, "learning_rate": 1.355949431573742e-05, "loss": 0.9449, "step": 8585 }, { "epoch": 0.76717224741439, "grad_norm": 0.5644077062606812, "learning_rate": 1.354958751250529e-05, "loss": 0.9936, "step": 8586 }, { "epoch": 0.7672615989456519, "grad_norm": 0.483189195394516, "learning_rate": 1.3539683762383753e-05, "loss": 0.8959, "step": 8587 }, { "epoch": 0.7673509504769138, "grad_norm": 0.497502863407135, "learning_rate": 1.3529783066202329e-05, "loss": 0.9242, "step": 8588 }, { "epoch": 0.7674403020081757, "grad_norm": 0.5695610046386719, "learning_rate": 1.3519885424790313e-05, "loss": 0.8977, "step": 8589 }, { "epoch": 0.7675296535394376, "grad_norm": 0.5756702423095703, "learning_rate": 1.3509990838976744e-05, "loss": 0.8939, "step": 8590 }, { "epoch": 0.7676190050706994, "grad_norm": 0.5338248014450073, "learning_rate": 1.3500099309590397e-05, "loss": 0.9609, "step": 8591 }, { "epoch": 0.7677083566019612, "grad_norm": 0.46391648054122925, "learning_rate": 1.3490210837459799e-05, "loss": 0.9912, "step": 8592 }, { "epoch": 0.7677977081332231, "grad_norm": 0.5242441892623901, "learning_rate": 1.3480325423413204e-05, "loss": 0.8846, "step": 8593 }, { "epoch": 0.767887059664485, "grad_norm": 0.45362526178359985, "learning_rate": 1.3470443068278626e-05, "loss": 0.9602, "step": 8594 }, { "epoch": 0.7679764111957469, "grad_norm": 0.40796563029289246, "learning_rate": 1.3460563772883822e-05, "loss": 0.9368, "step": 8595 }, { "epoch": 0.7680657627270088, "grad_norm": 0.481330931186676, "learning_rate": 1.34506875380563e-05, "loss": 0.9077, "step": 8596 }, { "epoch": 0.7681551142582707, "grad_norm": 0.40942972898483276, "learning_rate": 1.3440814364623267e-05, "loss": 1.0025, "step": 8597 }, { "epoch": 0.7682444657895324, "grad_norm": 0.533697247505188, "learning_rate": 1.3430944253411727e-05, "loss": 0.8807, "step": 8598 }, { "epoch": 0.7683338173207943, "grad_norm": 0.4914191961288452, "learning_rate": 1.34210772052484e-05, "loss": 0.909, "step": 8599 }, { "epoch": 0.7684231688520562, "grad_norm": 0.4288891851902008, "learning_rate": 1.3411213220959773e-05, "loss": 0.9436, "step": 8600 }, { "epoch": 0.7685125203833181, "grad_norm": 0.4495335519313812, "learning_rate": 1.340135230137204e-05, "loss": 0.9656, "step": 8601 }, { "epoch": 0.76860187191458, "grad_norm": 0.46838313341140747, "learning_rate": 1.339149444731116e-05, "loss": 0.9274, "step": 8602 }, { "epoch": 0.7686912234458418, "grad_norm": 0.5495761632919312, "learning_rate": 1.3381639659602841e-05, "loss": 0.9106, "step": 8603 }, { "epoch": 0.7687805749771037, "grad_norm": 0.4932359755039215, "learning_rate": 1.3371787939072522e-05, "loss": 0.9227, "step": 8604 }, { "epoch": 0.7688699265083655, "grad_norm": 0.5239490270614624, "learning_rate": 1.3361939286545388e-05, "loss": 0.9407, "step": 8605 }, { "epoch": 0.7689592780396274, "grad_norm": 0.46050575375556946, "learning_rate": 1.335209370284638e-05, "loss": 0.966, "step": 8606 }, { "epoch": 0.7690486295708893, "grad_norm": 0.49059468507766724, "learning_rate": 1.3342251188800175e-05, "loss": 0.9366, "step": 8607 }, { "epoch": 0.7691379811021511, "grad_norm": 0.46661099791526794, "learning_rate": 1.333241174523116e-05, "loss": 0.9926, "step": 8608 }, { "epoch": 0.769227332633413, "grad_norm": 0.4436168372631073, "learning_rate": 1.3322575372963515e-05, "loss": 0.969, "step": 8609 }, { "epoch": 0.7693166841646749, "grad_norm": 0.4760691225528717, "learning_rate": 1.3312742072821127e-05, "loss": 0.943, "step": 8610 }, { "epoch": 0.7694060356959367, "grad_norm": 0.4735669195652008, "learning_rate": 1.3302911845627658e-05, "loss": 0.9113, "step": 8611 }, { "epoch": 0.7694953872271986, "grad_norm": 0.515501081943512, "learning_rate": 1.3293084692206476e-05, "loss": 0.8752, "step": 8612 }, { "epoch": 0.7695847387584605, "grad_norm": 0.46182894706726074, "learning_rate": 1.3283260613380727e-05, "loss": 0.9165, "step": 8613 }, { "epoch": 0.7696740902897223, "grad_norm": 0.5648226737976074, "learning_rate": 1.3273439609973271e-05, "loss": 0.9357, "step": 8614 }, { "epoch": 0.7697634418209842, "grad_norm": 0.5595985054969788, "learning_rate": 1.326362168280672e-05, "loss": 0.8938, "step": 8615 }, { "epoch": 0.7698527933522461, "grad_norm": 0.5155600309371948, "learning_rate": 1.3253806832703437e-05, "loss": 0.9146, "step": 8616 }, { "epoch": 0.769942144883508, "grad_norm": 0.4387071430683136, "learning_rate": 1.3243995060485537e-05, "loss": 0.9607, "step": 8617 }, { "epoch": 0.7700314964147698, "grad_norm": 0.45591017603874207, "learning_rate": 1.3234186366974822e-05, "loss": 0.9637, "step": 8618 }, { "epoch": 0.7701208479460316, "grad_norm": 0.47090432047843933, "learning_rate": 1.3224380752992898e-05, "loss": 0.9757, "step": 8619 }, { "epoch": 0.7702101994772935, "grad_norm": 0.45392322540283203, "learning_rate": 1.3214578219361085e-05, "loss": 0.961, "step": 8620 }, { "epoch": 0.7702995510085554, "grad_norm": 0.4720221161842346, "learning_rate": 1.3204778766900445e-05, "loss": 0.9789, "step": 8621 }, { "epoch": 0.7703889025398173, "grad_norm": 0.674929141998291, "learning_rate": 1.3194982396431798e-05, "loss": 0.8242, "step": 8622 }, { "epoch": 0.7704782540710792, "grad_norm": 0.5851225256919861, "learning_rate": 1.3185189108775687e-05, "loss": 0.87, "step": 8623 }, { "epoch": 0.770567605602341, "grad_norm": 0.5186543464660645, "learning_rate": 1.3175398904752407e-05, "loss": 0.8479, "step": 8624 }, { "epoch": 0.7706569571336028, "grad_norm": 0.5100897550582886, "learning_rate": 1.3165611785181986e-05, "loss": 0.9498, "step": 8625 }, { "epoch": 0.7707463086648647, "grad_norm": 0.43524983525276184, "learning_rate": 1.3155827750884209e-05, "loss": 1.0058, "step": 8626 }, { "epoch": 0.7708356601961266, "grad_norm": 0.6012685298919678, "learning_rate": 1.3146046802678602e-05, "loss": 0.8363, "step": 8627 }, { "epoch": 0.7709250117273885, "grad_norm": 0.5322076082229614, "learning_rate": 1.31362689413844e-05, "loss": 0.9146, "step": 8628 }, { "epoch": 0.7710143632586504, "grad_norm": 0.5446600317955017, "learning_rate": 1.3126494167820607e-05, "loss": 0.8362, "step": 8629 }, { "epoch": 0.7711037147899122, "grad_norm": 0.4060341417789459, "learning_rate": 1.311672248280597e-05, "loss": 1.0057, "step": 8630 }, { "epoch": 0.7711930663211741, "grad_norm": 0.45972713828086853, "learning_rate": 1.310695388715898e-05, "loss": 0.9607, "step": 8631 }, { "epoch": 0.7712824178524359, "grad_norm": 0.4327675402164459, "learning_rate": 1.3097188381697845e-05, "loss": 0.9479, "step": 8632 }, { "epoch": 0.7713717693836978, "grad_norm": 0.514522135257721, "learning_rate": 1.3087425967240557e-05, "loss": 0.9165, "step": 8633 }, { "epoch": 0.7714611209149597, "grad_norm": 0.531057596206665, "learning_rate": 1.307766664460479e-05, "loss": 0.8906, "step": 8634 }, { "epoch": 0.7715504724462215, "grad_norm": 0.5103472471237183, "learning_rate": 1.3067910414608003e-05, "loss": 1.0145, "step": 8635 }, { "epoch": 0.7716398239774834, "grad_norm": 0.519164502620697, "learning_rate": 1.305815727806739e-05, "loss": 0.9374, "step": 8636 }, { "epoch": 0.7717291755087453, "grad_norm": 0.5133655071258545, "learning_rate": 1.3048407235799876e-05, "loss": 0.9235, "step": 8637 }, { "epoch": 0.7718185270400072, "grad_norm": 0.5075699687004089, "learning_rate": 1.3038660288622145e-05, "loss": 0.9772, "step": 8638 }, { "epoch": 0.771907878571269, "grad_norm": 0.545677900314331, "learning_rate": 1.302891643735058e-05, "loss": 0.8661, "step": 8639 }, { "epoch": 0.7719972301025309, "grad_norm": 0.5071326494216919, "learning_rate": 1.3019175682801349e-05, "loss": 0.8474, "step": 8640 }, { "epoch": 0.7720865816337927, "grad_norm": 0.6671762466430664, "learning_rate": 1.3009438025790337e-05, "loss": 0.8427, "step": 8641 }, { "epoch": 0.7721759331650546, "grad_norm": 0.4839807450771332, "learning_rate": 1.2999703467133183e-05, "loss": 0.9872, "step": 8642 }, { "epoch": 0.7722652846963165, "grad_norm": 0.5609176754951477, "learning_rate": 1.2989972007645263e-05, "loss": 0.9293, "step": 8643 }, { "epoch": 0.7723546362275784, "grad_norm": 0.5578341484069824, "learning_rate": 1.2980243648141682e-05, "loss": 0.9176, "step": 8644 }, { "epoch": 0.7724439877588403, "grad_norm": 0.5111382007598877, "learning_rate": 1.2970518389437297e-05, "loss": 0.8884, "step": 8645 }, { "epoch": 0.772533339290102, "grad_norm": 0.6080954670906067, "learning_rate": 1.2960796232346706e-05, "loss": 0.9019, "step": 8646 }, { "epoch": 0.7726226908213639, "grad_norm": 0.4767414927482605, "learning_rate": 1.295107717768425e-05, "loss": 0.9232, "step": 8647 }, { "epoch": 0.7727120423526258, "grad_norm": 0.4408529996871948, "learning_rate": 1.2941361226263982e-05, "loss": 0.9483, "step": 8648 }, { "epoch": 0.7728013938838877, "grad_norm": 0.493099182844162, "learning_rate": 1.293164837889973e-05, "loss": 1.0169, "step": 8649 }, { "epoch": 0.7728907454151496, "grad_norm": 0.554325520992279, "learning_rate": 1.2921938636405045e-05, "loss": 0.8812, "step": 8650 }, { "epoch": 0.7729800969464115, "grad_norm": 0.513780951499939, "learning_rate": 1.2912231999593222e-05, "loss": 0.8577, "step": 8651 }, { "epoch": 0.7730694484776733, "grad_norm": 0.4900115132331848, "learning_rate": 1.2902528469277297e-05, "loss": 0.9635, "step": 8652 }, { "epoch": 0.7731588000089351, "grad_norm": 0.47862446308135986, "learning_rate": 1.2892828046270038e-05, "loss": 0.9151, "step": 8653 }, { "epoch": 0.773248151540197, "grad_norm": 0.4748653173446655, "learning_rate": 1.2883130731383969e-05, "loss": 0.9588, "step": 8654 }, { "epoch": 0.7733375030714589, "grad_norm": 0.4709342122077942, "learning_rate": 1.2873436525431342e-05, "loss": 0.9934, "step": 8655 }, { "epoch": 0.7734268546027208, "grad_norm": 0.4540732800960541, "learning_rate": 1.2863745429224144e-05, "loss": 0.9338, "step": 8656 }, { "epoch": 0.7735162061339826, "grad_norm": 0.5187547206878662, "learning_rate": 1.2854057443574124e-05, "loss": 0.8713, "step": 8657 }, { "epoch": 0.7736055576652445, "grad_norm": 0.522544801235199, "learning_rate": 1.2844372569292723e-05, "loss": 0.8775, "step": 8658 }, { "epoch": 0.7736949091965064, "grad_norm": 0.6091160774230957, "learning_rate": 1.2834690807191174e-05, "loss": 0.9253, "step": 8659 }, { "epoch": 0.7737842607277682, "grad_norm": 0.5314405560493469, "learning_rate": 1.2825012158080424e-05, "loss": 0.9408, "step": 8660 }, { "epoch": 0.7738736122590301, "grad_norm": 0.507613480091095, "learning_rate": 1.2815336622771157e-05, "loss": 0.9501, "step": 8661 }, { "epoch": 0.773962963790292, "grad_norm": 0.4508688151836395, "learning_rate": 1.2805664202073814e-05, "loss": 1.0068, "step": 8662 }, { "epoch": 0.7740523153215538, "grad_norm": 0.4795742332935333, "learning_rate": 1.2795994896798551e-05, "loss": 0.958, "step": 8663 }, { "epoch": 0.7741416668528157, "grad_norm": 0.6187158226966858, "learning_rate": 1.278632870775529e-05, "loss": 0.8805, "step": 8664 }, { "epoch": 0.7742310183840776, "grad_norm": 0.6057202816009521, "learning_rate": 1.2776665635753665e-05, "loss": 0.8507, "step": 8665 }, { "epoch": 0.7743203699153395, "grad_norm": 0.43509769439697266, "learning_rate": 1.2767005681603078e-05, "loss": 0.9527, "step": 8666 }, { "epoch": 0.7744097214466013, "grad_norm": 0.5171564817428589, "learning_rate": 1.2757348846112626e-05, "loss": 1.0171, "step": 8667 }, { "epoch": 0.7744990729778631, "grad_norm": 0.5768764615058899, "learning_rate": 1.2747695130091185e-05, "loss": 0.912, "step": 8668 }, { "epoch": 0.774588424509125, "grad_norm": 0.6042607426643372, "learning_rate": 1.2738044534347365e-05, "loss": 0.943, "step": 8669 }, { "epoch": 0.7746777760403869, "grad_norm": 0.4685840606689453, "learning_rate": 1.2728397059689495e-05, "loss": 0.942, "step": 8670 }, { "epoch": 0.7747671275716488, "grad_norm": 0.626491367816925, "learning_rate": 1.271875270692567e-05, "loss": 0.8608, "step": 8671 }, { "epoch": 0.7748564791029107, "grad_norm": 0.5503062605857849, "learning_rate": 1.2709111476863683e-05, "loss": 0.8855, "step": 8672 }, { "epoch": 0.7749458306341724, "grad_norm": 0.41604703664779663, "learning_rate": 1.2699473370311099e-05, "loss": 0.9734, "step": 8673 }, { "epoch": 0.7750351821654343, "grad_norm": 0.48552244901657104, "learning_rate": 1.268983838807522e-05, "loss": 0.9216, "step": 8674 }, { "epoch": 0.7751245336966962, "grad_norm": 0.45172011852264404, "learning_rate": 1.2680206530963073e-05, "loss": 0.9134, "step": 8675 }, { "epoch": 0.7752138852279581, "grad_norm": 0.4474288523197174, "learning_rate": 1.267057779978143e-05, "loss": 1.0127, "step": 8676 }, { "epoch": 0.77530323675922, "grad_norm": 0.5544458031654358, "learning_rate": 1.2660952195336795e-05, "loss": 0.8255, "step": 8677 }, { "epoch": 0.7753925882904819, "grad_norm": 0.4368109703063965, "learning_rate": 1.265132971843544e-05, "loss": 0.9037, "step": 8678 }, { "epoch": 0.7754819398217437, "grad_norm": 0.5210118889808655, "learning_rate": 1.2641710369883308e-05, "loss": 0.915, "step": 8679 }, { "epoch": 0.7755712913530055, "grad_norm": 0.5763956308364868, "learning_rate": 1.2632094150486146e-05, "loss": 1.0095, "step": 8680 }, { "epoch": 0.7756606428842674, "grad_norm": 0.5526300072669983, "learning_rate": 1.2622481061049413e-05, "loss": 0.9442, "step": 8681 }, { "epoch": 0.7757499944155293, "grad_norm": 0.44834303855895996, "learning_rate": 1.2612871102378304e-05, "loss": 0.898, "step": 8682 }, { "epoch": 0.7758393459467912, "grad_norm": 0.5319566130638123, "learning_rate": 1.2603264275277766e-05, "loss": 0.8959, "step": 8683 }, { "epoch": 0.775928697478053, "grad_norm": 0.45926499366760254, "learning_rate": 1.2593660580552457e-05, "loss": 0.9541, "step": 8684 }, { "epoch": 0.7760180490093149, "grad_norm": 0.4919742941856384, "learning_rate": 1.2584060019006799e-05, "loss": 0.8658, "step": 8685 }, { "epoch": 0.7761074005405768, "grad_norm": 0.42770183086395264, "learning_rate": 1.257446259144494e-05, "loss": 0.9669, "step": 8686 }, { "epoch": 0.7761967520718386, "grad_norm": 0.626804530620575, "learning_rate": 1.2564868298670773e-05, "loss": 0.8515, "step": 8687 }, { "epoch": 0.7762861036031005, "grad_norm": 0.5289289355278015, "learning_rate": 1.2555277141487925e-05, "loss": 0.8717, "step": 8688 }, { "epoch": 0.7763754551343623, "grad_norm": 0.3943426311016083, "learning_rate": 1.2545689120699733e-05, "loss": 0.9667, "step": 8689 }, { "epoch": 0.7764648066656242, "grad_norm": 0.44126003980636597, "learning_rate": 1.2536104237109314e-05, "loss": 0.9189, "step": 8690 }, { "epoch": 0.7765541581968861, "grad_norm": 0.5304616689682007, "learning_rate": 1.2526522491519499e-05, "loss": 0.891, "step": 8691 }, { "epoch": 0.776643509728148, "grad_norm": 0.5561332106590271, "learning_rate": 1.2516943884732862e-05, "loss": 0.9357, "step": 8692 }, { "epoch": 0.7767328612594099, "grad_norm": 0.48594895005226135, "learning_rate": 1.2507368417551717e-05, "loss": 0.8873, "step": 8693 }, { "epoch": 0.7768222127906717, "grad_norm": 0.48896369338035583, "learning_rate": 1.2497796090778113e-05, "loss": 0.9618, "step": 8694 }, { "epoch": 0.7769115643219335, "grad_norm": 0.5779229998588562, "learning_rate": 1.2488226905213829e-05, "loss": 0.9589, "step": 8695 }, { "epoch": 0.7770009158531954, "grad_norm": 0.4492420256137848, "learning_rate": 1.247866086166039e-05, "loss": 0.9654, "step": 8696 }, { "epoch": 0.7770902673844573, "grad_norm": 0.45466622710227966, "learning_rate": 1.2469097960919052e-05, "loss": 0.9783, "step": 8697 }, { "epoch": 0.7771796189157192, "grad_norm": 0.4977153241634369, "learning_rate": 1.2459538203790822e-05, "loss": 0.9013, "step": 8698 }, { "epoch": 0.7772689704469811, "grad_norm": 0.5779181122779846, "learning_rate": 1.244998159107641e-05, "loss": 0.8386, "step": 8699 }, { "epoch": 0.777358321978243, "grad_norm": 0.4691259264945984, "learning_rate": 1.2440428123576286e-05, "loss": 1.0294, "step": 8700 }, { "epoch": 0.7774476735095047, "grad_norm": 0.4812089502811432, "learning_rate": 1.2430877802090674e-05, "loss": 0.9459, "step": 8701 }, { "epoch": 0.7775370250407666, "grad_norm": 0.5185266137123108, "learning_rate": 1.2421330627419498e-05, "loss": 0.8558, "step": 8702 }, { "epoch": 0.7776263765720285, "grad_norm": 0.4775238037109375, "learning_rate": 1.2411786600362457e-05, "loss": 0.8715, "step": 8703 }, { "epoch": 0.7777157281032904, "grad_norm": 0.5433192253112793, "learning_rate": 1.2402245721718935e-05, "loss": 0.9181, "step": 8704 }, { "epoch": 0.7778050796345523, "grad_norm": 0.47637078166007996, "learning_rate": 1.2392707992288095e-05, "loss": 0.9268, "step": 8705 }, { "epoch": 0.7778944311658141, "grad_norm": 0.45217010378837585, "learning_rate": 1.2383173412868832e-05, "loss": 0.9737, "step": 8706 }, { "epoch": 0.777983782697076, "grad_norm": 0.40835416316986084, "learning_rate": 1.2373641984259754e-05, "loss": 1.0122, "step": 8707 }, { "epoch": 0.7780731342283378, "grad_norm": 0.43482106924057007, "learning_rate": 1.2364113707259251e-05, "loss": 1.0017, "step": 8708 }, { "epoch": 0.7781624857595997, "grad_norm": 0.4076036214828491, "learning_rate": 1.2354588582665371e-05, "loss": 0.9542, "step": 8709 }, { "epoch": 0.7782518372908616, "grad_norm": 0.5018858313560486, "learning_rate": 1.2345066611275973e-05, "loss": 0.9303, "step": 8710 }, { "epoch": 0.7783411888221234, "grad_norm": 0.45527756214141846, "learning_rate": 1.2335547793888619e-05, "loss": 1.0528, "step": 8711 }, { "epoch": 0.7784305403533853, "grad_norm": 0.46973609924316406, "learning_rate": 1.2326032131300613e-05, "loss": 0.9795, "step": 8712 }, { "epoch": 0.7785198918846472, "grad_norm": 0.46930214762687683, "learning_rate": 1.2316519624308991e-05, "loss": 0.8755, "step": 8713 }, { "epoch": 0.7786092434159091, "grad_norm": 0.4111507534980774, "learning_rate": 1.2307010273710528e-05, "loss": 0.9699, "step": 8714 }, { "epoch": 0.7786985949471709, "grad_norm": 0.5797132849693298, "learning_rate": 1.229750408030173e-05, "loss": 0.9161, "step": 8715 }, { "epoch": 0.7787879464784327, "grad_norm": 0.5236135721206665, "learning_rate": 1.2288001044878849e-05, "loss": 0.9461, "step": 8716 }, { "epoch": 0.7788772980096946, "grad_norm": 0.42989420890808105, "learning_rate": 1.227850116823786e-05, "loss": 0.973, "step": 8717 }, { "epoch": 0.7789666495409565, "grad_norm": 0.5021857619285583, "learning_rate": 1.2269004451174493e-05, "loss": 0.9027, "step": 8718 }, { "epoch": 0.7790560010722184, "grad_norm": 0.5190892219543457, "learning_rate": 1.2259510894484173e-05, "loss": 0.8612, "step": 8719 }, { "epoch": 0.7791453526034803, "grad_norm": 0.4393077492713928, "learning_rate": 1.22500204989621e-05, "loss": 0.8991, "step": 8720 }, { "epoch": 0.7792347041347422, "grad_norm": 0.5465923547744751, "learning_rate": 1.2240533265403198e-05, "loss": 0.8785, "step": 8721 }, { "epoch": 0.7793240556660039, "grad_norm": 0.5483127236366272, "learning_rate": 1.2231049194602122e-05, "loss": 0.8618, "step": 8722 }, { "epoch": 0.7794134071972658, "grad_norm": 0.4444792866706848, "learning_rate": 1.2221568287353263e-05, "loss": 0.9536, "step": 8723 }, { "epoch": 0.7795027587285277, "grad_norm": 0.5139243006706238, "learning_rate": 1.221209054445075e-05, "loss": 0.8762, "step": 8724 }, { "epoch": 0.7795921102597896, "grad_norm": 0.49245816469192505, "learning_rate": 1.2202615966688442e-05, "loss": 0.9376, "step": 8725 }, { "epoch": 0.7796814617910515, "grad_norm": 0.5652595162391663, "learning_rate": 1.2193144554859937e-05, "loss": 0.8839, "step": 8726 }, { "epoch": 0.7797708133223133, "grad_norm": 0.679634153842926, "learning_rate": 1.2183676309758574e-05, "loss": 0.8982, "step": 8727 }, { "epoch": 0.7798601648535752, "grad_norm": 0.4609330892562866, "learning_rate": 1.2174211232177419e-05, "loss": 0.9649, "step": 8728 }, { "epoch": 0.779949516384837, "grad_norm": 0.5099778771400452, "learning_rate": 1.2164749322909257e-05, "loss": 0.8654, "step": 8729 }, { "epoch": 0.7800388679160989, "grad_norm": 0.5093337297439575, "learning_rate": 1.2155290582746636e-05, "loss": 0.9262, "step": 8730 }, { "epoch": 0.7801282194473608, "grad_norm": 0.5493488907814026, "learning_rate": 1.214583501248182e-05, "loss": 0.9284, "step": 8731 }, { "epoch": 0.7802175709786227, "grad_norm": 0.4473147392272949, "learning_rate": 1.2136382612906822e-05, "loss": 1.0044, "step": 8732 }, { "epoch": 0.7803069225098845, "grad_norm": 0.4741600751876831, "learning_rate": 1.2126933384813378e-05, "loss": 0.913, "step": 8733 }, { "epoch": 0.7803962740411464, "grad_norm": 0.42708808183670044, "learning_rate": 1.2117487328992955e-05, "loss": 0.9394, "step": 8734 }, { "epoch": 0.7804856255724082, "grad_norm": 0.5554439425468445, "learning_rate": 1.210804444623677e-05, "loss": 0.9425, "step": 8735 }, { "epoch": 0.7805749771036701, "grad_norm": 0.5091964602470398, "learning_rate": 1.2098604737335778e-05, "loss": 0.9622, "step": 8736 }, { "epoch": 0.780664328634932, "grad_norm": 0.5323701500892639, "learning_rate": 1.2089168203080625e-05, "loss": 0.9593, "step": 8737 }, { "epoch": 0.7807536801661938, "grad_norm": 0.5752742290496826, "learning_rate": 1.207973484426173e-05, "loss": 0.8755, "step": 8738 }, { "epoch": 0.7808430316974557, "grad_norm": 0.4725722074508667, "learning_rate": 1.2070304661669251e-05, "loss": 1.0083, "step": 8739 }, { "epoch": 0.7809323832287176, "grad_norm": 0.5769376754760742, "learning_rate": 1.2060877656093051e-05, "loss": 0.9272, "step": 8740 }, { "epoch": 0.7810217347599795, "grad_norm": 0.5163832902908325, "learning_rate": 1.2051453828322768e-05, "loss": 0.9612, "step": 8741 }, { "epoch": 0.7811110862912413, "grad_norm": 0.5350446105003357, "learning_rate": 1.204203317914771e-05, "loss": 0.9202, "step": 8742 }, { "epoch": 0.7812004378225031, "grad_norm": 0.5735968351364136, "learning_rate": 1.2032615709356981e-05, "loss": 0.9389, "step": 8743 }, { "epoch": 0.781289789353765, "grad_norm": 0.5735962390899658, "learning_rate": 1.2023201419739389e-05, "loss": 0.9112, "step": 8744 }, { "epoch": 0.7813791408850269, "grad_norm": 0.5810607671737671, "learning_rate": 1.2013790311083478e-05, "loss": 0.9489, "step": 8745 }, { "epoch": 0.7814684924162888, "grad_norm": 0.554716944694519, "learning_rate": 1.2004382384177537e-05, "loss": 0.9159, "step": 8746 }, { "epoch": 0.7815578439475507, "grad_norm": 0.5520256757736206, "learning_rate": 1.1994977639809574e-05, "loss": 0.8593, "step": 8747 }, { "epoch": 0.7816471954788126, "grad_norm": 0.4506448805332184, "learning_rate": 1.1985576078767352e-05, "loss": 0.9468, "step": 8748 }, { "epoch": 0.7817365470100743, "grad_norm": 0.40401577949523926, "learning_rate": 1.197617770183832e-05, "loss": 0.9659, "step": 8749 }, { "epoch": 0.7818258985413362, "grad_norm": 0.4281572699546814, "learning_rate": 1.1966782509809715e-05, "loss": 0.9421, "step": 8750 }, { "epoch": 0.7819152500725981, "grad_norm": 0.4709114134311676, "learning_rate": 1.195739050346848e-05, "loss": 0.975, "step": 8751 }, { "epoch": 0.78200460160386, "grad_norm": 0.4489923119544983, "learning_rate": 1.1948001683601295e-05, "loss": 1.0054, "step": 8752 }, { "epoch": 0.7820939531351219, "grad_norm": 0.4930829107761383, "learning_rate": 1.1938616050994572e-05, "loss": 0.9004, "step": 8753 }, { "epoch": 0.7821833046663837, "grad_norm": 0.5570724606513977, "learning_rate": 1.192923360643446e-05, "loss": 0.8782, "step": 8754 }, { "epoch": 0.7822726561976456, "grad_norm": 0.44626376032829285, "learning_rate": 1.1919854350706838e-05, "loss": 0.9346, "step": 8755 }, { "epoch": 0.7823620077289074, "grad_norm": 0.39036697149276733, "learning_rate": 1.1910478284597321e-05, "loss": 0.9688, "step": 8756 }, { "epoch": 0.7824513592601693, "grad_norm": 0.5225417017936707, "learning_rate": 1.1901105408891256e-05, "loss": 0.8374, "step": 8757 }, { "epoch": 0.7825407107914312, "grad_norm": 0.43829452991485596, "learning_rate": 1.1891735724373726e-05, "loss": 0.9661, "step": 8758 }, { "epoch": 0.782630062322693, "grad_norm": 0.5867989659309387, "learning_rate": 1.1882369231829526e-05, "loss": 0.9218, "step": 8759 }, { "epoch": 0.7827194138539549, "grad_norm": 0.46479299664497375, "learning_rate": 1.1873005932043202e-05, "loss": 0.94, "step": 8760 }, { "epoch": 0.7828087653852168, "grad_norm": 0.543161153793335, "learning_rate": 1.1863645825799042e-05, "loss": 0.9706, "step": 8761 }, { "epoch": 0.7828981169164787, "grad_norm": 0.4378313720226288, "learning_rate": 1.185428891388104e-05, "loss": 0.9379, "step": 8762 }, { "epoch": 0.7829874684477405, "grad_norm": 0.44382184743881226, "learning_rate": 1.1844935197072954e-05, "loss": 0.9562, "step": 8763 }, { "epoch": 0.7830768199790024, "grad_norm": 0.5598951578140259, "learning_rate": 1.183558467615824e-05, "loss": 0.8841, "step": 8764 }, { "epoch": 0.7831661715102642, "grad_norm": 0.4546869695186615, "learning_rate": 1.1826237351920123e-05, "loss": 0.8926, "step": 8765 }, { "epoch": 0.7832555230415261, "grad_norm": 0.3868406414985657, "learning_rate": 1.1816893225141523e-05, "loss": 1.0242, "step": 8766 }, { "epoch": 0.783344874572788, "grad_norm": 0.4522659480571747, "learning_rate": 1.1807552296605118e-05, "loss": 0.9198, "step": 8767 }, { "epoch": 0.7834342261040499, "grad_norm": 0.47773608565330505, "learning_rate": 1.1798214567093313e-05, "loss": 0.96, "step": 8768 }, { "epoch": 0.7835235776353118, "grad_norm": 0.42124301195144653, "learning_rate": 1.1788880037388256e-05, "loss": 1.0142, "step": 8769 }, { "epoch": 0.7836129291665735, "grad_norm": 0.4496397376060486, "learning_rate": 1.1779548708271782e-05, "loss": 0.964, "step": 8770 }, { "epoch": 0.7837022806978354, "grad_norm": 0.4569949209690094, "learning_rate": 1.1770220580525504e-05, "loss": 0.9092, "step": 8771 }, { "epoch": 0.7837916322290973, "grad_norm": 0.6700246334075928, "learning_rate": 1.1760895654930748e-05, "loss": 0.8237, "step": 8772 }, { "epoch": 0.7838809837603592, "grad_norm": 0.4912089407444, "learning_rate": 1.175157393226859e-05, "loss": 0.8981, "step": 8773 }, { "epoch": 0.7839703352916211, "grad_norm": 0.5014065504074097, "learning_rate": 1.174225541331982e-05, "loss": 0.9595, "step": 8774 }, { "epoch": 0.784059686822883, "grad_norm": 0.5120930671691895, "learning_rate": 1.1732940098864947e-05, "loss": 0.9345, "step": 8775 }, { "epoch": 0.7841490383541448, "grad_norm": 0.45890259742736816, "learning_rate": 1.1723627989684239e-05, "loss": 1.0086, "step": 8776 }, { "epoch": 0.7842383898854066, "grad_norm": 0.43002086877822876, "learning_rate": 1.171431908655768e-05, "loss": 1.0457, "step": 8777 }, { "epoch": 0.7843277414166685, "grad_norm": 0.4677255153656006, "learning_rate": 1.1705013390264995e-05, "loss": 0.8648, "step": 8778 }, { "epoch": 0.7844170929479304, "grad_norm": 0.5233368277549744, "learning_rate": 1.169571090158565e-05, "loss": 0.8994, "step": 8779 }, { "epoch": 0.7845064444791923, "grad_norm": 0.4638941287994385, "learning_rate": 1.1686411621298793e-05, "loss": 0.9106, "step": 8780 }, { "epoch": 0.7845957960104541, "grad_norm": 0.4682579040527344, "learning_rate": 1.167711555018336e-05, "loss": 0.9533, "step": 8781 }, { "epoch": 0.784685147541716, "grad_norm": 0.47554928064346313, "learning_rate": 1.1667822689017988e-05, "loss": 0.9157, "step": 8782 }, { "epoch": 0.7847744990729779, "grad_norm": 0.5877280235290527, "learning_rate": 1.165853303858106e-05, "loss": 0.926, "step": 8783 }, { "epoch": 0.7848638506042397, "grad_norm": 0.47542765736579895, "learning_rate": 1.1649246599650681e-05, "loss": 0.952, "step": 8784 }, { "epoch": 0.7849532021355016, "grad_norm": 0.5808351635932922, "learning_rate": 1.1639963373004691e-05, "loss": 0.8716, "step": 8785 }, { "epoch": 0.7850425536667635, "grad_norm": 0.4650845527648926, "learning_rate": 1.1630683359420652e-05, "loss": 0.9614, "step": 8786 }, { "epoch": 0.7851319051980253, "grad_norm": 0.43621885776519775, "learning_rate": 1.1621406559675873e-05, "loss": 0.8881, "step": 8787 }, { "epoch": 0.7852212567292872, "grad_norm": 0.47639355063438416, "learning_rate": 1.1612132974547379e-05, "loss": 0.9544, "step": 8788 }, { "epoch": 0.7853106082605491, "grad_norm": 0.6018533706665039, "learning_rate": 1.1602862604811955e-05, "loss": 0.8597, "step": 8789 }, { "epoch": 0.785399959791811, "grad_norm": 0.44231539964675903, "learning_rate": 1.1593595451246047e-05, "loss": 0.9587, "step": 8790 }, { "epoch": 0.7854893113230728, "grad_norm": 0.5324105024337769, "learning_rate": 1.1584331514625912e-05, "loss": 0.9463, "step": 8791 }, { "epoch": 0.7855786628543346, "grad_norm": 0.5201995372772217, "learning_rate": 1.1575070795727489e-05, "loss": 0.9776, "step": 8792 }, { "epoch": 0.7856680143855965, "grad_norm": 0.45011529326438904, "learning_rate": 1.1565813295326466e-05, "loss": 0.9372, "step": 8793 }, { "epoch": 0.7857573659168584, "grad_norm": 0.5564170479774475, "learning_rate": 1.155655901419826e-05, "loss": 0.9709, "step": 8794 }, { "epoch": 0.7858467174481203, "grad_norm": 0.4589083790779114, "learning_rate": 1.1547307953118014e-05, "loss": 0.9465, "step": 8795 }, { "epoch": 0.7859360689793822, "grad_norm": 0.5712124705314636, "learning_rate": 1.1538060112860604e-05, "loss": 0.8866, "step": 8796 }, { "epoch": 0.786025420510644, "grad_norm": 0.45000290870666504, "learning_rate": 1.1528815494200634e-05, "loss": 0.9641, "step": 8797 }, { "epoch": 0.7861147720419058, "grad_norm": 0.5889281630516052, "learning_rate": 1.1519574097912444e-05, "loss": 0.8468, "step": 8798 }, { "epoch": 0.7862041235731677, "grad_norm": 0.5337108373641968, "learning_rate": 1.1510335924770105e-05, "loss": 0.9437, "step": 8799 }, { "epoch": 0.7862934751044296, "grad_norm": 0.4237595498561859, "learning_rate": 1.1501100975547385e-05, "loss": 0.9497, "step": 8800 }, { "epoch": 0.7863828266356915, "grad_norm": 0.4925912916660309, "learning_rate": 1.1491869251017834e-05, "loss": 0.945, "step": 8801 }, { "epoch": 0.7864721781669534, "grad_norm": 0.6141911745071411, "learning_rate": 1.1482640751954699e-05, "loss": 0.8556, "step": 8802 }, { "epoch": 0.7865615296982152, "grad_norm": 0.43323883414268494, "learning_rate": 1.1473415479130962e-05, "loss": 0.9278, "step": 8803 }, { "epoch": 0.786650881229477, "grad_norm": 0.44862401485443115, "learning_rate": 1.1464193433319347e-05, "loss": 0.9978, "step": 8804 }, { "epoch": 0.7867402327607389, "grad_norm": 0.46563851833343506, "learning_rate": 1.145497461529229e-05, "loss": 0.9032, "step": 8805 }, { "epoch": 0.7868295842920008, "grad_norm": 0.6122609972953796, "learning_rate": 1.144575902582199e-05, "loss": 0.953, "step": 8806 }, { "epoch": 0.7869189358232627, "grad_norm": 0.629047691822052, "learning_rate": 1.1436546665680309e-05, "loss": 0.945, "step": 8807 }, { "epoch": 0.7870082873545245, "grad_norm": 0.47389715909957886, "learning_rate": 1.1427337535638905e-05, "loss": 0.9537, "step": 8808 }, { "epoch": 0.7870976388857864, "grad_norm": 0.4402306079864502, "learning_rate": 1.1418131636469137e-05, "loss": 1.0046, "step": 8809 }, { "epoch": 0.7871869904170483, "grad_norm": 0.4482784867286682, "learning_rate": 1.1408928968942095e-05, "loss": 0.8711, "step": 8810 }, { "epoch": 0.7872763419483101, "grad_norm": 0.4512721598148346, "learning_rate": 1.1399729533828623e-05, "loss": 0.9093, "step": 8811 }, { "epoch": 0.787365693479572, "grad_norm": 0.5375701189041138, "learning_rate": 1.1390533331899234e-05, "loss": 0.892, "step": 8812 }, { "epoch": 0.7874550450108339, "grad_norm": 0.44732967019081116, "learning_rate": 1.1381340363924226e-05, "loss": 0.9631, "step": 8813 }, { "epoch": 0.7875443965420957, "grad_norm": 0.5341693758964539, "learning_rate": 1.1372150630673606e-05, "loss": 0.8947, "step": 8814 }, { "epoch": 0.7876337480733576, "grad_norm": 0.5837507247924805, "learning_rate": 1.136296413291712e-05, "loss": 0.9357, "step": 8815 }, { "epoch": 0.7877230996046195, "grad_norm": 0.49844542145729065, "learning_rate": 1.1353780871424225e-05, "loss": 0.9402, "step": 8816 }, { "epoch": 0.7878124511358814, "grad_norm": 0.5053272247314453, "learning_rate": 1.134460084696412e-05, "loss": 0.916, "step": 8817 }, { "epoch": 0.7879018026671432, "grad_norm": 0.47650039196014404, "learning_rate": 1.1335424060305733e-05, "loss": 0.9513, "step": 8818 }, { "epoch": 0.787991154198405, "grad_norm": 0.5099322199821472, "learning_rate": 1.1326250512217728e-05, "loss": 0.9191, "step": 8819 }, { "epoch": 0.7880805057296669, "grad_norm": 0.6279272437095642, "learning_rate": 1.1317080203468462e-05, "loss": 0.935, "step": 8820 }, { "epoch": 0.7881698572609288, "grad_norm": 0.5648776888847351, "learning_rate": 1.1307913134826059e-05, "loss": 0.8982, "step": 8821 }, { "epoch": 0.7882592087921907, "grad_norm": 0.6467058062553406, "learning_rate": 1.1298749307058359e-05, "loss": 0.9648, "step": 8822 }, { "epoch": 0.7883485603234526, "grad_norm": 0.5014479756355286, "learning_rate": 1.1289588720932931e-05, "loss": 0.928, "step": 8823 }, { "epoch": 0.7884379118547145, "grad_norm": 0.5449494123458862, "learning_rate": 1.1280431377217071e-05, "loss": 1.0022, "step": 8824 }, { "epoch": 0.7885272633859762, "grad_norm": 0.46424680948257446, "learning_rate": 1.1271277276677805e-05, "loss": 0.8855, "step": 8825 }, { "epoch": 0.7886166149172381, "grad_norm": 0.548555314540863, "learning_rate": 1.1262126420081887e-05, "loss": 0.8627, "step": 8826 }, { "epoch": 0.7887059664485, "grad_norm": 0.44086822867393494, "learning_rate": 1.125297880819579e-05, "loss": 0.9702, "step": 8827 }, { "epoch": 0.7887953179797619, "grad_norm": 0.4535292088985443, "learning_rate": 1.124383444178574e-05, "loss": 0.8851, "step": 8828 }, { "epoch": 0.7888846695110238, "grad_norm": 0.5612303614616394, "learning_rate": 1.1234693321617673e-05, "loss": 0.9546, "step": 8829 }, { "epoch": 0.7889740210422856, "grad_norm": 0.467670738697052, "learning_rate": 1.1225555448457242e-05, "loss": 0.9503, "step": 8830 }, { "epoch": 0.7890633725735475, "grad_norm": 0.4849298298358917, "learning_rate": 1.1216420823069846e-05, "loss": 0.9047, "step": 8831 }, { "epoch": 0.7891527241048093, "grad_norm": 0.41470497846603394, "learning_rate": 1.1207289446220604e-05, "loss": 0.9613, "step": 8832 }, { "epoch": 0.7892420756360712, "grad_norm": 0.4530922472476959, "learning_rate": 1.1198161318674377e-05, "loss": 0.9708, "step": 8833 }, { "epoch": 0.7893314271673331, "grad_norm": 0.422997385263443, "learning_rate": 1.1189036441195733e-05, "loss": 0.9678, "step": 8834 }, { "epoch": 0.789420778698595, "grad_norm": 0.5136018395423889, "learning_rate": 1.1179914814548986e-05, "loss": 0.9182, "step": 8835 }, { "epoch": 0.7895101302298568, "grad_norm": 0.529691755771637, "learning_rate": 1.1170796439498166e-05, "loss": 0.8935, "step": 8836 }, { "epoch": 0.7895994817611187, "grad_norm": 0.47432875633239746, "learning_rate": 1.1161681316807032e-05, "loss": 0.9239, "step": 8837 }, { "epoch": 0.7896888332923806, "grad_norm": 0.563213586807251, "learning_rate": 1.1152569447239075e-05, "loss": 0.9184, "step": 8838 }, { "epoch": 0.7897781848236424, "grad_norm": 0.4394233524799347, "learning_rate": 1.1143460831557523e-05, "loss": 0.9732, "step": 8839 }, { "epoch": 0.7898675363549043, "grad_norm": 0.46250519156455994, "learning_rate": 1.1134355470525293e-05, "loss": 0.9073, "step": 8840 }, { "epoch": 0.7899568878861661, "grad_norm": 0.43355268239974976, "learning_rate": 1.1125253364905075e-05, "loss": 0.9525, "step": 8841 }, { "epoch": 0.790046239417428, "grad_norm": 0.4754622280597687, "learning_rate": 1.111615451545926e-05, "loss": 0.946, "step": 8842 }, { "epoch": 0.7901355909486899, "grad_norm": 0.47392573952674866, "learning_rate": 1.1107058922949975e-05, "loss": 0.9907, "step": 8843 }, { "epoch": 0.7902249424799518, "grad_norm": 0.4125828742980957, "learning_rate": 1.1097966588139092e-05, "loss": 0.9411, "step": 8844 }, { "epoch": 0.7903142940112137, "grad_norm": 0.5055714249610901, "learning_rate": 1.1088877511788154e-05, "loss": 0.9096, "step": 8845 }, { "epoch": 0.7904036455424754, "grad_norm": 0.5957999229431152, "learning_rate": 1.1079791694658487e-05, "loss": 0.922, "step": 8846 }, { "epoch": 0.7904929970737373, "grad_norm": 0.48999646306037903, "learning_rate": 1.1070709137511132e-05, "loss": 0.9453, "step": 8847 }, { "epoch": 0.7905823486049992, "grad_norm": 0.40712419152259827, "learning_rate": 1.1061629841106836e-05, "loss": 0.9806, "step": 8848 }, { "epoch": 0.7906717001362611, "grad_norm": 0.5603836178779602, "learning_rate": 1.1052553806206094e-05, "loss": 1.014, "step": 8849 }, { "epoch": 0.790761051667523, "grad_norm": 0.467892587184906, "learning_rate": 1.1043481033569142e-05, "loss": 0.9254, "step": 8850 }, { "epoch": 0.7908504031987849, "grad_norm": 0.5536115169525146, "learning_rate": 1.103441152395588e-05, "loss": 0.9557, "step": 8851 }, { "epoch": 0.7909397547300467, "grad_norm": 0.4258303940296173, "learning_rate": 1.1025345278125998e-05, "loss": 0.9526, "step": 8852 }, { "epoch": 0.7910291062613085, "grad_norm": 0.4268374443054199, "learning_rate": 1.1016282296838887e-05, "loss": 0.9622, "step": 8853 }, { "epoch": 0.7911184577925704, "grad_norm": 0.525230348110199, "learning_rate": 1.100722258085367e-05, "loss": 0.9293, "step": 8854 }, { "epoch": 0.7912078093238323, "grad_norm": 0.49180522561073303, "learning_rate": 1.0998166130929199e-05, "loss": 1.0217, "step": 8855 }, { "epoch": 0.7912971608550942, "grad_norm": 0.4953739643096924, "learning_rate": 1.0989112947824043e-05, "loss": 0.9168, "step": 8856 }, { "epoch": 0.791386512386356, "grad_norm": 0.4349702298641205, "learning_rate": 1.0980063032296501e-05, "loss": 0.9029, "step": 8857 }, { "epoch": 0.7914758639176179, "grad_norm": 0.4538852870464325, "learning_rate": 1.0971016385104604e-05, "loss": 0.9189, "step": 8858 }, { "epoch": 0.7915652154488797, "grad_norm": 0.466751366853714, "learning_rate": 1.0961973007006105e-05, "loss": 0.941, "step": 8859 }, { "epoch": 0.7916545669801416, "grad_norm": 0.5299864411354065, "learning_rate": 1.0952932898758494e-05, "loss": 0.9471, "step": 8860 }, { "epoch": 0.7917439185114035, "grad_norm": 0.4934852719306946, "learning_rate": 1.0943896061118953e-05, "loss": 0.8567, "step": 8861 }, { "epoch": 0.7918332700426653, "grad_norm": 0.442080557346344, "learning_rate": 1.0934862494844427e-05, "loss": 0.9867, "step": 8862 }, { "epoch": 0.7919226215739272, "grad_norm": 0.5051554441452026, "learning_rate": 1.0925832200691566e-05, "loss": 0.9596, "step": 8863 }, { "epoch": 0.7920119731051891, "grad_norm": 0.4397631883621216, "learning_rate": 1.0916805179416761e-05, "loss": 0.9463, "step": 8864 }, { "epoch": 0.792101324636451, "grad_norm": 0.4761650264263153, "learning_rate": 1.0907781431776121e-05, "loss": 0.9427, "step": 8865 }, { "epoch": 0.7921906761677128, "grad_norm": 0.5251712203025818, "learning_rate": 1.0898760958525479e-05, "loss": 0.9143, "step": 8866 }, { "epoch": 0.7922800276989747, "grad_norm": 0.4629380404949188, "learning_rate": 1.0889743760420396e-05, "loss": 0.9407, "step": 8867 }, { "epoch": 0.7923693792302365, "grad_norm": 0.3857233226299286, "learning_rate": 1.0880729838216153e-05, "loss": 0.9492, "step": 8868 }, { "epoch": 0.7924587307614984, "grad_norm": 0.5180653929710388, "learning_rate": 1.0871719192667773e-05, "loss": 0.8831, "step": 8869 }, { "epoch": 0.7925480822927603, "grad_norm": 0.49814748764038086, "learning_rate": 1.0862711824530003e-05, "loss": 0.9292, "step": 8870 }, { "epoch": 0.7926374338240222, "grad_norm": 0.4596767723560333, "learning_rate": 1.0853707734557272e-05, "loss": 0.9822, "step": 8871 }, { "epoch": 0.7927267853552841, "grad_norm": 0.4747507870197296, "learning_rate": 1.084470692350379e-05, "loss": 0.9528, "step": 8872 }, { "epoch": 0.7928161368865458, "grad_norm": 0.5321727395057678, "learning_rate": 1.0835709392123472e-05, "loss": 0.9908, "step": 8873 }, { "epoch": 0.7929054884178077, "grad_norm": 0.529675304889679, "learning_rate": 1.0826715141169947e-05, "loss": 0.9526, "step": 8874 }, { "epoch": 0.7929948399490696, "grad_norm": 0.4251299798488617, "learning_rate": 1.0817724171396592e-05, "loss": 0.9465, "step": 8875 }, { "epoch": 0.7930841914803315, "grad_norm": 0.44601455330848694, "learning_rate": 1.0808736483556487e-05, "loss": 0.9579, "step": 8876 }, { "epoch": 0.7931735430115934, "grad_norm": 0.5946657061576843, "learning_rate": 1.0799752078402469e-05, "loss": 0.9116, "step": 8877 }, { "epoch": 0.7932628945428553, "grad_norm": 0.460638165473938, "learning_rate": 1.079077095668704e-05, "loss": 0.9722, "step": 8878 }, { "epoch": 0.7933522460741171, "grad_norm": 0.4943525493144989, "learning_rate": 1.0781793119162487e-05, "loss": 0.8539, "step": 8879 }, { "epoch": 0.7934415976053789, "grad_norm": 0.45845288038253784, "learning_rate": 1.0772818566580795e-05, "loss": 0.9393, "step": 8880 }, { "epoch": 0.7935309491366408, "grad_norm": 0.515739917755127, "learning_rate": 1.0763847299693691e-05, "loss": 0.8696, "step": 8881 }, { "epoch": 0.7936203006679027, "grad_norm": 0.5935304164886475, "learning_rate": 1.0754879319252591e-05, "loss": 1.0198, "step": 8882 }, { "epoch": 0.7937096521991646, "grad_norm": 0.4660301208496094, "learning_rate": 1.0745914626008675e-05, "loss": 0.8689, "step": 8883 }, { "epoch": 0.7937990037304264, "grad_norm": 0.47837138175964355, "learning_rate": 1.0736953220712826e-05, "loss": 0.8852, "step": 8884 }, { "epoch": 0.7938883552616883, "grad_norm": 0.4837789535522461, "learning_rate": 1.0727995104115656e-05, "loss": 0.8776, "step": 8885 }, { "epoch": 0.7939777067929502, "grad_norm": 0.6152971386909485, "learning_rate": 1.0719040276967507e-05, "loss": 0.8065, "step": 8886 }, { "epoch": 0.794067058324212, "grad_norm": 0.532853364944458, "learning_rate": 1.0710088740018437e-05, "loss": 0.8949, "step": 8887 }, { "epoch": 0.7941564098554739, "grad_norm": 0.4451427161693573, "learning_rate": 1.0701140494018242e-05, "loss": 0.9532, "step": 8888 }, { "epoch": 0.7942457613867357, "grad_norm": 0.4042229652404785, "learning_rate": 1.0692195539716421e-05, "loss": 0.9972, "step": 8889 }, { "epoch": 0.7943351129179976, "grad_norm": 0.47671109437942505, "learning_rate": 1.0683253877862225e-05, "loss": 0.9228, "step": 8890 }, { "epoch": 0.7944244644492595, "grad_norm": 0.4329127073287964, "learning_rate": 1.0674315509204596e-05, "loss": 0.93, "step": 8891 }, { "epoch": 0.7945138159805214, "grad_norm": 0.47998568415641785, "learning_rate": 1.0665380434492223e-05, "loss": 0.9536, "step": 8892 }, { "epoch": 0.7946031675117833, "grad_norm": 0.5138132572174072, "learning_rate": 1.0656448654473517e-05, "loss": 0.9208, "step": 8893 }, { "epoch": 0.794692519043045, "grad_norm": 0.48235902190208435, "learning_rate": 1.0647520169896607e-05, "loss": 0.8611, "step": 8894 }, { "epoch": 0.7947818705743069, "grad_norm": 0.394228458404541, "learning_rate": 1.0638594981509348e-05, "loss": 0.974, "step": 8895 }, { "epoch": 0.7948712221055688, "grad_norm": 0.4859960675239563, "learning_rate": 1.0629673090059322e-05, "loss": 0.9407, "step": 8896 }, { "epoch": 0.7949605736368307, "grad_norm": 0.5173121690750122, "learning_rate": 1.0620754496293833e-05, "loss": 1.0492, "step": 8897 }, { "epoch": 0.7950499251680926, "grad_norm": 0.453870564699173, "learning_rate": 1.061183920095991e-05, "loss": 0.9226, "step": 8898 }, { "epoch": 0.7951392766993545, "grad_norm": 0.44933760166168213, "learning_rate": 1.0602927204804296e-05, "loss": 0.9376, "step": 8899 }, { "epoch": 0.7952286282306164, "grad_norm": 0.48174723982810974, "learning_rate": 1.0594018508573489e-05, "loss": 0.9127, "step": 8900 }, { "epoch": 0.7953179797618781, "grad_norm": 0.5078961253166199, "learning_rate": 1.0585113113013657e-05, "loss": 0.8392, "step": 8901 }, { "epoch": 0.79540733129314, "grad_norm": 0.4766399562358856, "learning_rate": 1.0576211018870736e-05, "loss": 0.9298, "step": 8902 }, { "epoch": 0.7954966828244019, "grad_norm": 0.5817358493804932, "learning_rate": 1.0567312226890364e-05, "loss": 0.9257, "step": 8903 }, { "epoch": 0.7955860343556638, "grad_norm": 0.49117544293403625, "learning_rate": 1.0558416737817916e-05, "loss": 0.9214, "step": 8904 }, { "epoch": 0.7956753858869257, "grad_norm": 0.46288153529167175, "learning_rate": 1.0549524552398488e-05, "loss": 0.9006, "step": 8905 }, { "epoch": 0.7957647374181875, "grad_norm": 0.4867618978023529, "learning_rate": 1.0540635671376885e-05, "loss": 0.9344, "step": 8906 }, { "epoch": 0.7958540889494494, "grad_norm": 0.5538333058357239, "learning_rate": 1.053175009549765e-05, "loss": 0.7893, "step": 8907 }, { "epoch": 0.7959434404807112, "grad_norm": 0.4785383939743042, "learning_rate": 1.0522867825505051e-05, "loss": 0.9209, "step": 8908 }, { "epoch": 0.7960327920119731, "grad_norm": 0.5016849637031555, "learning_rate": 1.0513988862143082e-05, "loss": 0.9328, "step": 8909 }, { "epoch": 0.796122143543235, "grad_norm": 0.4911724030971527, "learning_rate": 1.050511320615542e-05, "loss": 0.9874, "step": 8910 }, { "epoch": 0.7962114950744968, "grad_norm": 0.5558311939239502, "learning_rate": 1.0496240858285511e-05, "loss": 0.9098, "step": 8911 }, { "epoch": 0.7963008466057587, "grad_norm": 0.6585413217544556, "learning_rate": 1.0487371819276509e-05, "loss": 0.9099, "step": 8912 }, { "epoch": 0.7963901981370206, "grad_norm": 0.5155404806137085, "learning_rate": 1.0478506089871293e-05, "loss": 0.9004, "step": 8913 }, { "epoch": 0.7964795496682825, "grad_norm": 0.4853888154029846, "learning_rate": 1.0469643670812479e-05, "loss": 0.9659, "step": 8914 }, { "epoch": 0.7965689011995443, "grad_norm": 0.5303243398666382, "learning_rate": 1.0460784562842351e-05, "loss": 0.8745, "step": 8915 }, { "epoch": 0.7966582527308061, "grad_norm": 0.40654778480529785, "learning_rate": 1.0451928766702979e-05, "loss": 0.9293, "step": 8916 }, { "epoch": 0.796747604262068, "grad_norm": 0.47125494480133057, "learning_rate": 1.0443076283136122e-05, "loss": 0.9285, "step": 8917 }, { "epoch": 0.7968369557933299, "grad_norm": 0.5250342488288879, "learning_rate": 1.043422711288327e-05, "loss": 0.9306, "step": 8918 }, { "epoch": 0.7969263073245918, "grad_norm": 0.495063841342926, "learning_rate": 1.042538125668564e-05, "loss": 0.9515, "step": 8919 }, { "epoch": 0.7970156588558537, "grad_norm": 0.5038367509841919, "learning_rate": 1.041653871528418e-05, "loss": 0.9353, "step": 8920 }, { "epoch": 0.7971050103871156, "grad_norm": 0.40401169657707214, "learning_rate": 1.040769948941952e-05, "loss": 0.9591, "step": 8921 }, { "epoch": 0.7971943619183773, "grad_norm": 0.5238271951675415, "learning_rate": 1.0398863579832047e-05, "loss": 0.9321, "step": 8922 }, { "epoch": 0.7972837134496392, "grad_norm": 0.5171613097190857, "learning_rate": 1.0390030987261866e-05, "loss": 0.957, "step": 8923 }, { "epoch": 0.7973730649809011, "grad_norm": 0.49921807646751404, "learning_rate": 1.0381201712448806e-05, "loss": 0.94, "step": 8924 }, { "epoch": 0.797462416512163, "grad_norm": 0.6377806067466736, "learning_rate": 1.0372375756132408e-05, "loss": 0.8859, "step": 8925 }, { "epoch": 0.7975517680434249, "grad_norm": 0.4824712872505188, "learning_rate": 1.036355311905194e-05, "loss": 0.9774, "step": 8926 }, { "epoch": 0.7976411195746868, "grad_norm": 0.5878000855445862, "learning_rate": 1.0354733801946393e-05, "loss": 0.91, "step": 8927 }, { "epoch": 0.7977304711059485, "grad_norm": 0.5659178495407104, "learning_rate": 1.0345917805554478e-05, "loss": 0.9632, "step": 8928 }, { "epoch": 0.7978198226372104, "grad_norm": 0.5241888165473938, "learning_rate": 1.0337105130614627e-05, "loss": 0.9609, "step": 8929 }, { "epoch": 0.7979091741684723, "grad_norm": 0.4879510998725891, "learning_rate": 1.0328295777865016e-05, "loss": 0.8878, "step": 8930 }, { "epoch": 0.7979985256997342, "grad_norm": 0.5221325159072876, "learning_rate": 1.0319489748043486e-05, "loss": 0.8727, "step": 8931 }, { "epoch": 0.7980878772309961, "grad_norm": 0.4458564519882202, "learning_rate": 1.0310687041887652e-05, "loss": 0.8967, "step": 8932 }, { "epoch": 0.7981772287622579, "grad_norm": 0.6000443696975708, "learning_rate": 1.0301887660134841e-05, "loss": 0.91, "step": 8933 }, { "epoch": 0.7982665802935198, "grad_norm": 0.4376446008682251, "learning_rate": 1.0293091603522081e-05, "loss": 0.9492, "step": 8934 }, { "epoch": 0.7983559318247816, "grad_norm": 0.4281294643878937, "learning_rate": 1.0284298872786153e-05, "loss": 0.9444, "step": 8935 }, { "epoch": 0.7984452833560435, "grad_norm": 0.5830131769180298, "learning_rate": 1.027550946866353e-05, "loss": 0.8794, "step": 8936 }, { "epoch": 0.7985346348873054, "grad_norm": 0.45756039023399353, "learning_rate": 1.026672339189042e-05, "loss": 0.9598, "step": 8937 }, { "epoch": 0.7986239864185672, "grad_norm": 0.42528459429740906, "learning_rate": 1.0257940643202757e-05, "loss": 1.0198, "step": 8938 }, { "epoch": 0.7987133379498291, "grad_norm": 0.4692322015762329, "learning_rate": 1.0249161223336185e-05, "loss": 1.0496, "step": 8939 }, { "epoch": 0.798802689481091, "grad_norm": 0.5667924284934998, "learning_rate": 1.024038513302607e-05, "loss": 0.913, "step": 8940 }, { "epoch": 0.7988920410123529, "grad_norm": 0.47331857681274414, "learning_rate": 1.0231612373007521e-05, "loss": 0.9722, "step": 8941 }, { "epoch": 0.7989813925436147, "grad_norm": 0.4151710867881775, "learning_rate": 1.0222842944015326e-05, "loss": 0.9374, "step": 8942 }, { "epoch": 0.7990707440748765, "grad_norm": 0.582118570804596, "learning_rate": 1.0214076846784027e-05, "loss": 0.9451, "step": 8943 }, { "epoch": 0.7991600956061384, "grad_norm": 0.46437790989875793, "learning_rate": 1.0205314082047879e-05, "loss": 0.9203, "step": 8944 }, { "epoch": 0.7992494471374003, "grad_norm": 0.4480501711368561, "learning_rate": 1.0196554650540857e-05, "loss": 0.9466, "step": 8945 }, { "epoch": 0.7993387986686622, "grad_norm": 0.5746645331382751, "learning_rate": 1.0187798552996653e-05, "loss": 0.8555, "step": 8946 }, { "epoch": 0.7994281501999241, "grad_norm": 0.4533185064792633, "learning_rate": 1.0179045790148707e-05, "loss": 0.9209, "step": 8947 }, { "epoch": 0.799517501731186, "grad_norm": 0.4475330412387848, "learning_rate": 1.0170296362730125e-05, "loss": 0.9418, "step": 8948 }, { "epoch": 0.7996068532624477, "grad_norm": 0.4576277434825897, "learning_rate": 1.0161550271473774e-05, "loss": 0.9764, "step": 8949 }, { "epoch": 0.7996962047937096, "grad_norm": 0.5483040809631348, "learning_rate": 1.0152807517112233e-05, "loss": 0.9551, "step": 8950 }, { "epoch": 0.7997855563249715, "grad_norm": 0.49113574624061584, "learning_rate": 1.0144068100377818e-05, "loss": 0.9638, "step": 8951 }, { "epoch": 0.7998749078562334, "grad_norm": 0.4609823226928711, "learning_rate": 1.013533202200252e-05, "loss": 0.949, "step": 8952 }, { "epoch": 0.7999642593874953, "grad_norm": 0.3998883068561554, "learning_rate": 1.0126599282718096e-05, "loss": 1.0431, "step": 8953 }, { "epoch": 0.8000536109187572, "grad_norm": 0.6044648289680481, "learning_rate": 1.0117869883255998e-05, "loss": 0.8911, "step": 8954 }, { "epoch": 0.800142962450019, "grad_norm": 0.4625280499458313, "learning_rate": 1.0109143824347411e-05, "loss": 0.9561, "step": 8955 }, { "epoch": 0.8002323139812808, "grad_norm": 0.45291417837142944, "learning_rate": 1.0100421106723234e-05, "loss": 0.9568, "step": 8956 }, { "epoch": 0.8003216655125427, "grad_norm": 0.4366665780544281, "learning_rate": 1.009170173111409e-05, "loss": 1.0054, "step": 8957 }, { "epoch": 0.8004110170438046, "grad_norm": 0.5513303875923157, "learning_rate": 1.0082985698250313e-05, "loss": 0.9255, "step": 8958 }, { "epoch": 0.8005003685750665, "grad_norm": 0.42455315589904785, "learning_rate": 1.0074273008861973e-05, "loss": 0.9084, "step": 8959 }, { "epoch": 0.8005897201063283, "grad_norm": 0.4331103563308716, "learning_rate": 1.0065563663678845e-05, "loss": 0.9705, "step": 8960 }, { "epoch": 0.8006790716375902, "grad_norm": 0.4532991349697113, "learning_rate": 1.005685766343044e-05, "loss": 0.8854, "step": 8961 }, { "epoch": 0.8007684231688521, "grad_norm": 0.4681682288646698, "learning_rate": 1.0048155008845962e-05, "loss": 0.9396, "step": 8962 }, { "epoch": 0.8008577747001139, "grad_norm": 0.4511670172214508, "learning_rate": 1.003945570065436e-05, "loss": 0.8475, "step": 8963 }, { "epoch": 0.8009471262313758, "grad_norm": 0.475967675447464, "learning_rate": 1.0030759739584284e-05, "loss": 0.9374, "step": 8964 }, { "epoch": 0.8010364777626376, "grad_norm": 0.547709047794342, "learning_rate": 1.0022067126364126e-05, "loss": 1.0038, "step": 8965 }, { "epoch": 0.8011258292938995, "grad_norm": 0.44402167201042175, "learning_rate": 1.001337786172198e-05, "loss": 0.9718, "step": 8966 }, { "epoch": 0.8012151808251614, "grad_norm": 0.40516430139541626, "learning_rate": 1.0004691946385663e-05, "loss": 1.0515, "step": 8967 }, { "epoch": 0.8013045323564233, "grad_norm": 0.4301491975784302, "learning_rate": 9.996009381082717e-06, "loss": 0.9238, "step": 8968 }, { "epoch": 0.8013938838876852, "grad_norm": 0.5374286770820618, "learning_rate": 9.987330166540398e-06, "loss": 0.9712, "step": 8969 }, { "epoch": 0.801483235418947, "grad_norm": 0.453789085149765, "learning_rate": 9.97865430348568e-06, "loss": 0.9511, "step": 8970 }, { "epoch": 0.8015725869502088, "grad_norm": 0.5180380344390869, "learning_rate": 9.969981792645273e-06, "loss": 0.8728, "step": 8971 }, { "epoch": 0.8016619384814707, "grad_norm": 0.46428194642066956, "learning_rate": 9.961312634745567e-06, "loss": 0.95, "step": 8972 }, { "epoch": 0.8017512900127326, "grad_norm": 0.5181485414505005, "learning_rate": 9.952646830512712e-06, "loss": 0.9228, "step": 8973 }, { "epoch": 0.8018406415439945, "grad_norm": 0.44874072074890137, "learning_rate": 9.94398438067256e-06, "loss": 0.9679, "step": 8974 }, { "epoch": 0.8019299930752564, "grad_norm": 0.47210460901260376, "learning_rate": 9.93532528595068e-06, "loss": 0.9655, "step": 8975 }, { "epoch": 0.8020193446065182, "grad_norm": 0.5403384566307068, "learning_rate": 9.926669547072364e-06, "loss": 0.9232, "step": 8976 }, { "epoch": 0.80210869613778, "grad_norm": 0.4292760193347931, "learning_rate": 9.918017164762627e-06, "loss": 0.9782, "step": 8977 }, { "epoch": 0.8021980476690419, "grad_norm": 0.4274854362010956, "learning_rate": 9.909368139746194e-06, "loss": 0.9455, "step": 8978 }, { "epoch": 0.8022873992003038, "grad_norm": 0.4722135663032532, "learning_rate": 9.90072247274752e-06, "loss": 1.0058, "step": 8979 }, { "epoch": 0.8023767507315657, "grad_norm": 0.4381501376628876, "learning_rate": 9.892080164490774e-06, "loss": 0.9518, "step": 8980 }, { "epoch": 0.8024661022628276, "grad_norm": 0.488213449716568, "learning_rate": 9.883441215699823e-06, "loss": 0.9261, "step": 8981 }, { "epoch": 0.8025554537940894, "grad_norm": 0.5511339902877808, "learning_rate": 9.874805627098282e-06, "loss": 0.9793, "step": 8982 }, { "epoch": 0.8026448053253513, "grad_norm": 0.5285826921463013, "learning_rate": 9.866173399409474e-06, "loss": 0.9814, "step": 8983 }, { "epoch": 0.8027341568566131, "grad_norm": 0.5300377607345581, "learning_rate": 9.857544533356456e-06, "loss": 1.0165, "step": 8984 }, { "epoch": 0.802823508387875, "grad_norm": 0.4703865051269531, "learning_rate": 9.848919029661952e-06, "loss": 0.8869, "step": 8985 }, { "epoch": 0.8029128599191369, "grad_norm": 0.5108640193939209, "learning_rate": 9.840296889048462e-06, "loss": 0.9011, "step": 8986 }, { "epoch": 0.8030022114503987, "grad_norm": 0.49105626344680786, "learning_rate": 9.831678112238173e-06, "loss": 0.8887, "step": 8987 }, { "epoch": 0.8030915629816606, "grad_norm": 0.5277919769287109, "learning_rate": 9.823062699953012e-06, "loss": 0.944, "step": 8988 }, { "epoch": 0.8031809145129225, "grad_norm": 0.4541589021682739, "learning_rate": 9.81445065291461e-06, "loss": 0.9229, "step": 8989 }, { "epoch": 0.8032702660441843, "grad_norm": 0.4479905366897583, "learning_rate": 9.805841971844305e-06, "loss": 0.9027, "step": 8990 }, { "epoch": 0.8033596175754462, "grad_norm": 0.4373217225074768, "learning_rate": 9.797236657463188e-06, "loss": 0.9373, "step": 8991 }, { "epoch": 0.803448969106708, "grad_norm": 0.407236248254776, "learning_rate": 9.788634710492017e-06, "loss": 0.9314, "step": 8992 }, { "epoch": 0.8035383206379699, "grad_norm": 0.5327048301696777, "learning_rate": 9.780036131651315e-06, "loss": 0.9155, "step": 8993 }, { "epoch": 0.8036276721692318, "grad_norm": 0.5354421138763428, "learning_rate": 9.7714409216613e-06, "loss": 0.8684, "step": 8994 }, { "epoch": 0.8037170237004937, "grad_norm": 0.5037555694580078, "learning_rate": 9.762849081241915e-06, "loss": 0.9443, "step": 8995 }, { "epoch": 0.8038063752317556, "grad_norm": 0.5367538928985596, "learning_rate": 9.754260611112814e-06, "loss": 0.9278, "step": 8996 }, { "epoch": 0.8038957267630173, "grad_norm": 0.4558742046356201, "learning_rate": 9.74567551199338e-06, "loss": 0.848, "step": 8997 }, { "epoch": 0.8039850782942792, "grad_norm": 0.45363011956214905, "learning_rate": 9.737093784602697e-06, "loss": 0.9489, "step": 8998 }, { "epoch": 0.8040744298255411, "grad_norm": 0.4492855668067932, "learning_rate": 9.728515429659585e-06, "loss": 0.9211, "step": 8999 }, { "epoch": 0.804163781356803, "grad_norm": 0.44268351793289185, "learning_rate": 9.719940447882563e-06, "loss": 0.9763, "step": 9000 }, { "epoch": 0.8042531328880649, "grad_norm": 0.4664279520511627, "learning_rate": 9.711368839989905e-06, "loss": 0.9177, "step": 9001 }, { "epoch": 0.8043424844193268, "grad_norm": 0.4780477285385132, "learning_rate": 9.70280060669953e-06, "loss": 0.8817, "step": 9002 }, { "epoch": 0.8044318359505886, "grad_norm": 0.5579407811164856, "learning_rate": 9.694235748729146e-06, "loss": 0.8823, "step": 9003 }, { "epoch": 0.8045211874818504, "grad_norm": 0.47675004601478577, "learning_rate": 9.685674266796146e-06, "loss": 0.9631, "step": 9004 }, { "epoch": 0.8046105390131123, "grad_norm": 0.3818524479866028, "learning_rate": 9.67711616161765e-06, "loss": 0.937, "step": 9005 }, { "epoch": 0.8046998905443742, "grad_norm": 0.4102923572063446, "learning_rate": 9.668561433910484e-06, "loss": 0.9733, "step": 9006 }, { "epoch": 0.8047892420756361, "grad_norm": 0.41497570276260376, "learning_rate": 9.660010084391197e-06, "loss": 1.0069, "step": 9007 }, { "epoch": 0.804878593606898, "grad_norm": 0.6005346775054932, "learning_rate": 9.65146211377606e-06, "loss": 0.925, "step": 9008 }, { "epoch": 0.8049679451381598, "grad_norm": 0.5480002164840698, "learning_rate": 9.642917522781058e-06, "loss": 0.9914, "step": 9009 }, { "epoch": 0.8050572966694217, "grad_norm": 0.5394856333732605, "learning_rate": 9.63437631212189e-06, "loss": 0.8734, "step": 9010 }, { "epoch": 0.8051466482006835, "grad_norm": 0.48310962319374084, "learning_rate": 9.625838482513983e-06, "loss": 1.0201, "step": 9011 }, { "epoch": 0.8052359997319454, "grad_norm": 0.485075443983078, "learning_rate": 9.617304034672448e-06, "loss": 0.916, "step": 9012 }, { "epoch": 0.8053253512632073, "grad_norm": 0.5205706357955933, "learning_rate": 9.608772969312147e-06, "loss": 0.9511, "step": 9013 }, { "epoch": 0.8054147027944691, "grad_norm": 0.48902761936187744, "learning_rate": 9.600245287147652e-06, "loss": 0.896, "step": 9014 }, { "epoch": 0.805504054325731, "grad_norm": 0.5032730102539062, "learning_rate": 9.591720988893244e-06, "loss": 0.9751, "step": 9015 }, { "epoch": 0.8055934058569929, "grad_norm": 0.537691593170166, "learning_rate": 9.583200075262921e-06, "loss": 0.983, "step": 9016 }, { "epoch": 0.8056827573882548, "grad_norm": 0.5454861521720886, "learning_rate": 9.57468254697042e-06, "loss": 0.9127, "step": 9017 }, { "epoch": 0.8057721089195166, "grad_norm": 0.4477491080760956, "learning_rate": 9.566168404729148e-06, "loss": 0.995, "step": 9018 }, { "epoch": 0.8058614604507784, "grad_norm": 0.5274454951286316, "learning_rate": 9.55765764925226e-06, "loss": 0.8299, "step": 9019 }, { "epoch": 0.8059508119820403, "grad_norm": 0.5348902940750122, "learning_rate": 9.549150281252633e-06, "loss": 0.9079, "step": 9020 }, { "epoch": 0.8060401635133022, "grad_norm": 0.5387945771217346, "learning_rate": 9.54064630144284e-06, "loss": 0.9053, "step": 9021 }, { "epoch": 0.8061295150445641, "grad_norm": 0.5068916082382202, "learning_rate": 9.532145710535207e-06, "loss": 0.926, "step": 9022 }, { "epoch": 0.806218866575826, "grad_norm": 0.39943739771842957, "learning_rate": 9.523648509241706e-06, "loss": 0.9773, "step": 9023 }, { "epoch": 0.8063082181070879, "grad_norm": 0.676214873790741, "learning_rate": 9.515154698274093e-06, "loss": 0.8005, "step": 9024 }, { "epoch": 0.8063975696383496, "grad_norm": 0.45418787002563477, "learning_rate": 9.506664278343808e-06, "loss": 0.9662, "step": 9025 }, { "epoch": 0.8064869211696115, "grad_norm": 0.5260841846466064, "learning_rate": 9.498177250162022e-06, "loss": 1.0117, "step": 9026 }, { "epoch": 0.8065762727008734, "grad_norm": 0.502400815486908, "learning_rate": 9.489693614439605e-06, "loss": 0.8963, "step": 9027 }, { "epoch": 0.8066656242321353, "grad_norm": 0.5157961249351501, "learning_rate": 9.48121337188716e-06, "loss": 0.9942, "step": 9028 }, { "epoch": 0.8067549757633972, "grad_norm": 0.6219414472579956, "learning_rate": 9.472736523214993e-06, "loss": 0.9617, "step": 9029 }, { "epoch": 0.806844327294659, "grad_norm": 0.41551846265792847, "learning_rate": 9.464263069133134e-06, "loss": 0.9622, "step": 9030 }, { "epoch": 0.8069336788259209, "grad_norm": 0.454323947429657, "learning_rate": 9.455793010351321e-06, "loss": 0.9332, "step": 9031 }, { "epoch": 0.8070230303571827, "grad_norm": 0.4970146119594574, "learning_rate": 9.447326347579028e-06, "loss": 0.9591, "step": 9032 }, { "epoch": 0.8071123818884446, "grad_norm": 0.44244319200515747, "learning_rate": 9.438863081525396e-06, "loss": 0.9451, "step": 9033 }, { "epoch": 0.8072017334197065, "grad_norm": 0.47928622364997864, "learning_rate": 9.430403212899336e-06, "loss": 0.8969, "step": 9034 }, { "epoch": 0.8072910849509684, "grad_norm": 0.45787954330444336, "learning_rate": 9.421946742409448e-06, "loss": 0.9851, "step": 9035 }, { "epoch": 0.8073804364822302, "grad_norm": 0.603981077671051, "learning_rate": 9.41349367076405e-06, "loss": 0.9052, "step": 9036 }, { "epoch": 0.8074697880134921, "grad_norm": 0.47599250078201294, "learning_rate": 9.405043998671176e-06, "loss": 0.9525, "step": 9037 }, { "epoch": 0.807559139544754, "grad_norm": 0.4036829471588135, "learning_rate": 9.39659772683858e-06, "loss": 0.9176, "step": 9038 }, { "epoch": 0.8076484910760158, "grad_norm": 0.49439674615859985, "learning_rate": 9.38815485597373e-06, "loss": 0.9366, "step": 9039 }, { "epoch": 0.8077378426072777, "grad_norm": 0.5236937403678894, "learning_rate": 9.379715386783794e-06, "loss": 0.9086, "step": 9040 }, { "epoch": 0.8078271941385395, "grad_norm": 0.4477667808532715, "learning_rate": 9.371279319975678e-06, "loss": 0.9332, "step": 9041 }, { "epoch": 0.8079165456698014, "grad_norm": 0.509158730506897, "learning_rate": 9.362846656256008e-06, "loss": 0.9012, "step": 9042 }, { "epoch": 0.8080058972010633, "grad_norm": 0.43936219811439514, "learning_rate": 9.354417396331073e-06, "loss": 0.9957, "step": 9043 }, { "epoch": 0.8080952487323252, "grad_norm": 0.5267393589019775, "learning_rate": 9.345991540906934e-06, "loss": 0.8568, "step": 9044 }, { "epoch": 0.8081846002635871, "grad_norm": 0.5685756802558899, "learning_rate": 9.337569090689346e-06, "loss": 0.9036, "step": 9045 }, { "epoch": 0.8082739517948488, "grad_norm": 0.5792932510375977, "learning_rate": 9.329150046383772e-06, "loss": 0.8976, "step": 9046 }, { "epoch": 0.8083633033261107, "grad_norm": 0.4724472761154175, "learning_rate": 9.320734408695403e-06, "loss": 0.8995, "step": 9047 }, { "epoch": 0.8084526548573726, "grad_norm": 0.4375053346157074, "learning_rate": 9.312322178329142e-06, "loss": 1.06, "step": 9048 }, { "epoch": 0.8085420063886345, "grad_norm": 0.6214897036552429, "learning_rate": 9.303913355989596e-06, "loss": 0.9019, "step": 9049 }, { "epoch": 0.8086313579198964, "grad_norm": 0.49101778864860535, "learning_rate": 9.295507942381103e-06, "loss": 0.9121, "step": 9050 }, { "epoch": 0.8087207094511583, "grad_norm": 0.4864928424358368, "learning_rate": 9.28710593820769e-06, "loss": 0.8843, "step": 9051 }, { "epoch": 0.80881006098242, "grad_norm": 0.4251103401184082, "learning_rate": 9.278707344173126e-06, "loss": 0.9909, "step": 9052 }, { "epoch": 0.8088994125136819, "grad_norm": 0.5644087195396423, "learning_rate": 9.270312160980876e-06, "loss": 0.9755, "step": 9053 }, { "epoch": 0.8089887640449438, "grad_norm": 0.42497533559799194, "learning_rate": 9.261920389334139e-06, "loss": 0.9219, "step": 9054 }, { "epoch": 0.8090781155762057, "grad_norm": 0.37208980321884155, "learning_rate": 9.253532029935796e-06, "loss": 0.9487, "step": 9055 }, { "epoch": 0.8091674671074676, "grad_norm": 0.5022962689399719, "learning_rate": 9.245147083488476e-06, "loss": 0.9539, "step": 9056 }, { "epoch": 0.8092568186387294, "grad_norm": 0.4653746485710144, "learning_rate": 9.236765550694498e-06, "loss": 1.0132, "step": 9057 }, { "epoch": 0.8093461701699913, "grad_norm": 0.4346064329147339, "learning_rate": 9.228387432255915e-06, "loss": 0.9706, "step": 9058 }, { "epoch": 0.8094355217012531, "grad_norm": 0.6596688032150269, "learning_rate": 9.220012728874473e-06, "loss": 0.8905, "step": 9059 }, { "epoch": 0.809524873232515, "grad_norm": 0.5736172795295715, "learning_rate": 9.21164144125165e-06, "loss": 0.9162, "step": 9060 }, { "epoch": 0.8096142247637769, "grad_norm": 0.43389225006103516, "learning_rate": 9.203273570088634e-06, "loss": 0.9322, "step": 9061 }, { "epoch": 0.8097035762950388, "grad_norm": 0.516688346862793, "learning_rate": 9.194909116086325e-06, "loss": 0.9036, "step": 9062 }, { "epoch": 0.8097929278263006, "grad_norm": 0.4250909984111786, "learning_rate": 9.186548079945318e-06, "loss": 0.9199, "step": 9063 }, { "epoch": 0.8098822793575625, "grad_norm": 0.5514670014381409, "learning_rate": 9.178190462365949e-06, "loss": 0.9008, "step": 9064 }, { "epoch": 0.8099716308888244, "grad_norm": 0.529180109500885, "learning_rate": 9.169836264048259e-06, "loss": 0.8523, "step": 9065 }, { "epoch": 0.8100609824200862, "grad_norm": 0.4616588056087494, "learning_rate": 9.161485485692001e-06, "loss": 0.9791, "step": 9066 }, { "epoch": 0.8101503339513481, "grad_norm": 0.5188908576965332, "learning_rate": 9.15313812799664e-06, "loss": 0.9484, "step": 9067 }, { "epoch": 0.8102396854826099, "grad_norm": 0.43526217341423035, "learning_rate": 9.144794191661355e-06, "loss": 0.9121, "step": 9068 }, { "epoch": 0.8103290370138718, "grad_norm": 0.44872915744781494, "learning_rate": 9.136453677385043e-06, "loss": 1.0135, "step": 9069 }, { "epoch": 0.8104183885451337, "grad_norm": 0.4532172679901123, "learning_rate": 9.128116585866308e-06, "loss": 0.9565, "step": 9070 }, { "epoch": 0.8105077400763956, "grad_norm": 0.5452584028244019, "learning_rate": 9.119782917803476e-06, "loss": 0.9311, "step": 9071 }, { "epoch": 0.8105970916076575, "grad_norm": 0.5876386165618896, "learning_rate": 9.111452673894588e-06, "loss": 0.8881, "step": 9072 }, { "epoch": 0.8106864431389192, "grad_norm": 0.4356098175048828, "learning_rate": 9.103125854837362e-06, "loss": 0.9527, "step": 9073 }, { "epoch": 0.8107757946701811, "grad_norm": 0.47350767254829407, "learning_rate": 9.094802461329277e-06, "loss": 0.9068, "step": 9074 }, { "epoch": 0.810865146201443, "grad_norm": 0.4826947748661041, "learning_rate": 9.086482494067506e-06, "loss": 0.8969, "step": 9075 }, { "epoch": 0.8109544977327049, "grad_norm": 0.5265054702758789, "learning_rate": 9.078165953748936e-06, "loss": 0.9365, "step": 9076 }, { "epoch": 0.8110438492639668, "grad_norm": 0.41897475719451904, "learning_rate": 9.069852841070159e-06, "loss": 0.9803, "step": 9077 }, { "epoch": 0.8111332007952287, "grad_norm": 0.4707808196544647, "learning_rate": 9.061543156727486e-06, "loss": 0.8801, "step": 9078 }, { "epoch": 0.8112225523264905, "grad_norm": 0.5158072710037231, "learning_rate": 9.05323690141695e-06, "loss": 0.9694, "step": 9079 }, { "epoch": 0.8113119038577523, "grad_norm": 0.47056683897972107, "learning_rate": 9.044934075834289e-06, "loss": 0.9452, "step": 9080 }, { "epoch": 0.8114012553890142, "grad_norm": 0.5674601793289185, "learning_rate": 9.036634680674943e-06, "loss": 0.8907, "step": 9081 }, { "epoch": 0.8114906069202761, "grad_norm": 0.44375982880592346, "learning_rate": 9.028338716634094e-06, "loss": 1.0265, "step": 9082 }, { "epoch": 0.811579958451538, "grad_norm": 0.4866836965084076, "learning_rate": 9.020046184406593e-06, "loss": 0.9754, "step": 9083 }, { "epoch": 0.8116693099827998, "grad_norm": 0.42839664220809937, "learning_rate": 9.01175708468704e-06, "loss": 0.9104, "step": 9084 }, { "epoch": 0.8117586615140617, "grad_norm": 0.4285668432712555, "learning_rate": 9.003471418169734e-06, "loss": 0.9743, "step": 9085 }, { "epoch": 0.8118480130453236, "grad_norm": 0.6068586111068726, "learning_rate": 8.995189185548686e-06, "loss": 0.978, "step": 9086 }, { "epoch": 0.8119373645765854, "grad_norm": 0.4698917865753174, "learning_rate": 8.986910387517639e-06, "loss": 0.9434, "step": 9087 }, { "epoch": 0.8120267161078473, "grad_norm": 0.4826454222202301, "learning_rate": 8.97863502477e-06, "loss": 0.9581, "step": 9088 }, { "epoch": 0.8121160676391092, "grad_norm": 0.5964739322662354, "learning_rate": 8.970363097998936e-06, "loss": 0.8737, "step": 9089 }, { "epoch": 0.812205419170371, "grad_norm": 0.48209187388420105, "learning_rate": 8.962094607897303e-06, "loss": 0.8962, "step": 9090 }, { "epoch": 0.8122947707016329, "grad_norm": 0.5253683924674988, "learning_rate": 8.953829555157684e-06, "loss": 0.874, "step": 9091 }, { "epoch": 0.8123841222328948, "grad_norm": 0.3659655451774597, "learning_rate": 8.945567940472371e-06, "loss": 1.0097, "step": 9092 }, { "epoch": 0.8124734737641567, "grad_norm": 0.5746763944625854, "learning_rate": 8.937309764533335e-06, "loss": 0.8948, "step": 9093 }, { "epoch": 0.8125628252954185, "grad_norm": 0.5120271444320679, "learning_rate": 8.929055028032312e-06, "loss": 0.9083, "step": 9094 }, { "epoch": 0.8126521768266803, "grad_norm": 0.4482232928276062, "learning_rate": 8.92080373166071e-06, "loss": 0.8775, "step": 9095 }, { "epoch": 0.8127415283579422, "grad_norm": 0.7028616070747375, "learning_rate": 8.912555876109668e-06, "loss": 0.8673, "step": 9096 }, { "epoch": 0.8128308798892041, "grad_norm": 0.5362510681152344, "learning_rate": 8.904311462070031e-06, "loss": 0.8818, "step": 9097 }, { "epoch": 0.812920231420466, "grad_norm": 0.42716673016548157, "learning_rate": 8.89607049023236e-06, "loss": 0.9894, "step": 9098 }, { "epoch": 0.8130095829517279, "grad_norm": 0.6123215556144714, "learning_rate": 8.887832961286923e-06, "loss": 0.9649, "step": 9099 }, { "epoch": 0.8130989344829898, "grad_norm": 0.4452812671661377, "learning_rate": 8.8795988759237e-06, "loss": 0.9172, "step": 9100 }, { "epoch": 0.8131882860142515, "grad_norm": 0.49280375242233276, "learning_rate": 8.871368234832378e-06, "loss": 0.9344, "step": 9101 }, { "epoch": 0.8132776375455134, "grad_norm": 0.4815100431442261, "learning_rate": 8.863141038702382e-06, "loss": 0.8467, "step": 9102 }, { "epoch": 0.8133669890767753, "grad_norm": 0.48964032530784607, "learning_rate": 8.854917288222803e-06, "loss": 0.9917, "step": 9103 }, { "epoch": 0.8134563406080372, "grad_norm": 0.45770344138145447, "learning_rate": 8.84669698408247e-06, "loss": 0.9124, "step": 9104 }, { "epoch": 0.8135456921392991, "grad_norm": 0.49135828018188477, "learning_rate": 8.838480126969928e-06, "loss": 0.8979, "step": 9105 }, { "epoch": 0.8136350436705609, "grad_norm": 0.44308167695999146, "learning_rate": 8.830266717573427e-06, "loss": 1.0013, "step": 9106 }, { "epoch": 0.8137243952018228, "grad_norm": 0.4538305699825287, "learning_rate": 8.822056756580926e-06, "loss": 0.9082, "step": 9107 }, { "epoch": 0.8138137467330846, "grad_norm": 0.5253844857215881, "learning_rate": 8.8138502446801e-06, "loss": 0.8479, "step": 9108 }, { "epoch": 0.8139030982643465, "grad_norm": 0.47331321239471436, "learning_rate": 8.805647182558324e-06, "loss": 0.8898, "step": 9109 }, { "epoch": 0.8139924497956084, "grad_norm": 0.5414149761199951, "learning_rate": 8.797447570902695e-06, "loss": 0.9196, "step": 9110 }, { "epoch": 0.8140818013268702, "grad_norm": 0.45004722476005554, "learning_rate": 8.789251410400023e-06, "loss": 0.94, "step": 9111 }, { "epoch": 0.8141711528581321, "grad_norm": 0.4916594624519348, "learning_rate": 8.781058701736822e-06, "loss": 0.9332, "step": 9112 }, { "epoch": 0.814260504389394, "grad_norm": 0.4902633726596832, "learning_rate": 8.77286944559933e-06, "loss": 0.8747, "step": 9113 }, { "epoch": 0.8143498559206558, "grad_norm": 0.5728904008865356, "learning_rate": 8.764683642673455e-06, "loss": 0.8908, "step": 9114 }, { "epoch": 0.8144392074519177, "grad_norm": 0.5360121726989746, "learning_rate": 8.756501293644865e-06, "loss": 0.8705, "step": 9115 }, { "epoch": 0.8145285589831796, "grad_norm": 0.5026247501373291, "learning_rate": 8.748322399198916e-06, "loss": 1.0092, "step": 9116 }, { "epoch": 0.8146179105144414, "grad_norm": 0.46945619583129883, "learning_rate": 8.740146960020673e-06, "loss": 0.9574, "step": 9117 }, { "epoch": 0.8147072620457033, "grad_norm": 0.4218684434890747, "learning_rate": 8.731974976794926e-06, "loss": 0.9714, "step": 9118 }, { "epoch": 0.8147966135769652, "grad_norm": 0.5485289692878723, "learning_rate": 8.723806450206157e-06, "loss": 0.9748, "step": 9119 }, { "epoch": 0.8148859651082271, "grad_norm": 0.5682856440544128, "learning_rate": 8.715641380938583e-06, "loss": 0.9386, "step": 9120 }, { "epoch": 0.8149753166394889, "grad_norm": 0.49733036756515503, "learning_rate": 8.707479769676091e-06, "loss": 0.9663, "step": 9121 }, { "epoch": 0.8150646681707507, "grad_norm": 0.42391541600227356, "learning_rate": 8.699321617102312e-06, "loss": 0.92, "step": 9122 }, { "epoch": 0.8151540197020126, "grad_norm": 0.48684948682785034, "learning_rate": 8.691166923900585e-06, "loss": 0.96, "step": 9123 }, { "epoch": 0.8152433712332745, "grad_norm": 0.4087164103984833, "learning_rate": 8.68301569075396e-06, "loss": 0.9743, "step": 9124 }, { "epoch": 0.8153327227645364, "grad_norm": 0.5374601483345032, "learning_rate": 8.67486791834517e-06, "loss": 0.9377, "step": 9125 }, { "epoch": 0.8154220742957983, "grad_norm": 0.4423445463180542, "learning_rate": 8.66672360735668e-06, "loss": 0.9455, "step": 9126 }, { "epoch": 0.8155114258270602, "grad_norm": 0.47707852721214294, "learning_rate": 8.658582758470673e-06, "loss": 0.926, "step": 9127 }, { "epoch": 0.8156007773583219, "grad_norm": 0.4866050183773041, "learning_rate": 8.650445372369025e-06, "loss": 0.9738, "step": 9128 }, { "epoch": 0.8156901288895838, "grad_norm": 0.5553078651428223, "learning_rate": 8.642311449733331e-06, "loss": 0.8595, "step": 9129 }, { "epoch": 0.8157794804208457, "grad_norm": 0.4209044277667999, "learning_rate": 8.634180991244894e-06, "loss": 0.9365, "step": 9130 }, { "epoch": 0.8158688319521076, "grad_norm": 0.4722152352333069, "learning_rate": 8.626053997584732e-06, "loss": 0.9841, "step": 9131 }, { "epoch": 0.8159581834833695, "grad_norm": 0.5327540040016174, "learning_rate": 8.61793046943356e-06, "loss": 0.9321, "step": 9132 }, { "epoch": 0.8160475350146313, "grad_norm": 0.4482969045639038, "learning_rate": 8.609810407471824e-06, "loss": 0.9341, "step": 9133 }, { "epoch": 0.8161368865458932, "grad_norm": 0.46904319524765015, "learning_rate": 8.60169381237964e-06, "loss": 1.0379, "step": 9134 }, { "epoch": 0.816226238077155, "grad_norm": 0.5137619972229004, "learning_rate": 8.593580684836877e-06, "loss": 0.8599, "step": 9135 }, { "epoch": 0.8163155896084169, "grad_norm": 0.5669674873352051, "learning_rate": 8.58547102552309e-06, "loss": 0.857, "step": 9136 }, { "epoch": 0.8164049411396788, "grad_norm": 0.45517855882644653, "learning_rate": 8.577364835117552e-06, "loss": 1.0162, "step": 9137 }, { "epoch": 0.8164942926709406, "grad_norm": 0.5764082074165344, "learning_rate": 8.569262114299243e-06, "loss": 0.9356, "step": 9138 }, { "epoch": 0.8165836442022025, "grad_norm": 0.69561767578125, "learning_rate": 8.561162863746847e-06, "loss": 0.8668, "step": 9139 }, { "epoch": 0.8166729957334644, "grad_norm": 0.6161789298057556, "learning_rate": 8.553067084138772e-06, "loss": 0.905, "step": 9140 }, { "epoch": 0.8167623472647263, "grad_norm": 0.5391383171081543, "learning_rate": 8.544974776153124e-06, "loss": 0.9331, "step": 9141 }, { "epoch": 0.8168516987959881, "grad_norm": 0.504325270652771, "learning_rate": 8.536885940467715e-06, "loss": 0.9957, "step": 9142 }, { "epoch": 0.81694105032725, "grad_norm": 0.4798524081707001, "learning_rate": 8.528800577760083e-06, "loss": 0.9162, "step": 9143 }, { "epoch": 0.8170304018585118, "grad_norm": 0.48512718081474304, "learning_rate": 8.520718688707447e-06, "loss": 0.9431, "step": 9144 }, { "epoch": 0.8171197533897737, "grad_norm": 0.5134716033935547, "learning_rate": 8.512640273986756e-06, "loss": 0.9639, "step": 9145 }, { "epoch": 0.8172091049210356, "grad_norm": 0.3981763422489166, "learning_rate": 8.504565334274662e-06, "loss": 0.9315, "step": 9146 }, { "epoch": 0.8172984564522975, "grad_norm": 0.43053102493286133, "learning_rate": 8.496493870247535e-06, "loss": 0.9206, "step": 9147 }, { "epoch": 0.8173878079835594, "grad_norm": 0.5616499185562134, "learning_rate": 8.488425882581436e-06, "loss": 0.9045, "step": 9148 }, { "epoch": 0.8174771595148211, "grad_norm": 0.4507736265659332, "learning_rate": 8.480361371952156e-06, "loss": 0.9098, "step": 9149 }, { "epoch": 0.817566511046083, "grad_norm": 0.4418569505214691, "learning_rate": 8.472300339035178e-06, "loss": 0.9385, "step": 9150 }, { "epoch": 0.8176558625773449, "grad_norm": 0.39274096488952637, "learning_rate": 8.4642427845057e-06, "loss": 0.9688, "step": 9151 }, { "epoch": 0.8177452141086068, "grad_norm": 0.48565006256103516, "learning_rate": 8.456188709038632e-06, "loss": 0.9807, "step": 9152 }, { "epoch": 0.8178345656398687, "grad_norm": 0.5166711211204529, "learning_rate": 8.44813811330859e-06, "loss": 1.0159, "step": 9153 }, { "epoch": 0.8179239171711306, "grad_norm": 0.5695293545722961, "learning_rate": 8.440090997989885e-06, "loss": 0.936, "step": 9154 }, { "epoch": 0.8180132687023924, "grad_norm": 0.4986543655395508, "learning_rate": 8.432047363756551e-06, "loss": 0.9075, "step": 9155 }, { "epoch": 0.8181026202336542, "grad_norm": 0.5056511163711548, "learning_rate": 8.424007211282337e-06, "loss": 0.9455, "step": 9156 }, { "epoch": 0.8181919717649161, "grad_norm": 0.5469470620155334, "learning_rate": 8.415970541240697e-06, "loss": 0.8948, "step": 9157 }, { "epoch": 0.818281323296178, "grad_norm": 0.5168966054916382, "learning_rate": 8.407937354304769e-06, "loss": 0.8955, "step": 9158 }, { "epoch": 0.8183706748274399, "grad_norm": 0.4793681800365448, "learning_rate": 8.39990765114742e-06, "loss": 0.9188, "step": 9159 }, { "epoch": 0.8184600263587017, "grad_norm": 0.5143187046051025, "learning_rate": 8.39188143244124e-06, "loss": 0.9672, "step": 9160 }, { "epoch": 0.8185493778899636, "grad_norm": 0.5519688725471497, "learning_rate": 8.383858698858494e-06, "loss": 0.9495, "step": 9161 }, { "epoch": 0.8186387294212255, "grad_norm": 0.45014795660972595, "learning_rate": 8.375839451071183e-06, "loss": 0.8814, "step": 9162 }, { "epoch": 0.8187280809524873, "grad_norm": 0.4443376064300537, "learning_rate": 8.367823689751009e-06, "loss": 0.9321, "step": 9163 }, { "epoch": 0.8188174324837492, "grad_norm": 0.5371510982513428, "learning_rate": 8.359811415569352e-06, "loss": 0.9393, "step": 9164 }, { "epoch": 0.818906784015011, "grad_norm": 0.5210739374160767, "learning_rate": 8.351802629197348e-06, "loss": 0.949, "step": 9165 }, { "epoch": 0.8189961355462729, "grad_norm": 0.4387337565422058, "learning_rate": 8.343797331305809e-06, "loss": 0.9241, "step": 9166 }, { "epoch": 0.8190854870775348, "grad_norm": 0.4759118854999542, "learning_rate": 8.335795522565264e-06, "loss": 0.9132, "step": 9167 }, { "epoch": 0.8191748386087967, "grad_norm": 0.5519284605979919, "learning_rate": 8.327797203645954e-06, "loss": 0.9745, "step": 9168 }, { "epoch": 0.8192641901400586, "grad_norm": 0.4415756165981293, "learning_rate": 8.319802375217821e-06, "loss": 1.0214, "step": 9169 }, { "epoch": 0.8193535416713204, "grad_norm": 0.5409457683563232, "learning_rate": 8.311811037950522e-06, "loss": 0.979, "step": 9170 }, { "epoch": 0.8194428932025822, "grad_norm": 0.49359166622161865, "learning_rate": 8.30382319251341e-06, "loss": 0.8628, "step": 9171 }, { "epoch": 0.8195322447338441, "grad_norm": 0.45984289050102234, "learning_rate": 8.295838839575559e-06, "loss": 0.9556, "step": 9172 }, { "epoch": 0.819621596265106, "grad_norm": 0.520918607711792, "learning_rate": 8.287857979805746e-06, "loss": 0.9423, "step": 9173 }, { "epoch": 0.8197109477963679, "grad_norm": 0.4687787890434265, "learning_rate": 8.279880613872438e-06, "loss": 0.9551, "step": 9174 }, { "epoch": 0.8198002993276298, "grad_norm": 0.4171432852745056, "learning_rate": 8.271906742443836e-06, "loss": 0.9329, "step": 9175 }, { "epoch": 0.8198896508588915, "grad_norm": 0.5635870099067688, "learning_rate": 8.263936366187824e-06, "loss": 0.8315, "step": 9176 }, { "epoch": 0.8199790023901534, "grad_norm": 0.5225967764854431, "learning_rate": 8.255969485772025e-06, "loss": 0.9817, "step": 9177 }, { "epoch": 0.8200683539214153, "grad_norm": 0.47580423951148987, "learning_rate": 8.248006101863736e-06, "loss": 0.9601, "step": 9178 }, { "epoch": 0.8201577054526772, "grad_norm": 0.5381955504417419, "learning_rate": 8.240046215129982e-06, "loss": 0.9547, "step": 9179 }, { "epoch": 0.8202470569839391, "grad_norm": 0.5118776559829712, "learning_rate": 8.232089826237487e-06, "loss": 0.9135, "step": 9180 }, { "epoch": 0.820336408515201, "grad_norm": 0.6581920981407166, "learning_rate": 8.224136935852683e-06, "loss": 0.8308, "step": 9181 }, { "epoch": 0.8204257600464628, "grad_norm": 0.45607322454452515, "learning_rate": 8.216187544641706e-06, "loss": 0.881, "step": 9182 }, { "epoch": 0.8205151115777246, "grad_norm": 0.4387778639793396, "learning_rate": 8.208241653270421e-06, "loss": 0.9486, "step": 9183 }, { "epoch": 0.8206044631089865, "grad_norm": 0.5159300565719604, "learning_rate": 8.200299262404353e-06, "loss": 1.005, "step": 9184 }, { "epoch": 0.8206938146402484, "grad_norm": 0.6031007766723633, "learning_rate": 8.19236037270878e-06, "loss": 0.8316, "step": 9185 }, { "epoch": 0.8207831661715103, "grad_norm": 0.4924890995025635, "learning_rate": 8.184424984848655e-06, "loss": 0.9564, "step": 9186 }, { "epoch": 0.8208725177027721, "grad_norm": 0.4664970934391022, "learning_rate": 8.176493099488663e-06, "loss": 0.9416, "step": 9187 }, { "epoch": 0.820961869234034, "grad_norm": 0.46760138869285583, "learning_rate": 8.168564717293182e-06, "loss": 0.9566, "step": 9188 }, { "epoch": 0.8210512207652959, "grad_norm": 0.5146795511245728, "learning_rate": 8.160639838926293e-06, "loss": 0.8951, "step": 9189 }, { "epoch": 0.8211405722965577, "grad_norm": 0.4736398458480835, "learning_rate": 8.152718465051806e-06, "loss": 1.0171, "step": 9190 }, { "epoch": 0.8212299238278196, "grad_norm": 0.6582403182983398, "learning_rate": 8.144800596333196e-06, "loss": 0.9182, "step": 9191 }, { "epoch": 0.8213192753590814, "grad_norm": 0.49453866481781006, "learning_rate": 8.136886233433683e-06, "loss": 0.8465, "step": 9192 }, { "epoch": 0.8214086268903433, "grad_norm": 0.4479573965072632, "learning_rate": 8.128975377016174e-06, "loss": 0.8996, "step": 9193 }, { "epoch": 0.8214979784216052, "grad_norm": 0.5152574777603149, "learning_rate": 8.1210680277433e-06, "loss": 0.9579, "step": 9194 }, { "epoch": 0.8215873299528671, "grad_norm": 0.5516067743301392, "learning_rate": 8.113164186277367e-06, "loss": 0.9078, "step": 9195 }, { "epoch": 0.821676681484129, "grad_norm": 0.4801537096500397, "learning_rate": 8.105263853280416e-06, "loss": 1.0086, "step": 9196 }, { "epoch": 0.8217660330153908, "grad_norm": 0.4999988377094269, "learning_rate": 8.097367029414182e-06, "loss": 0.8929, "step": 9197 }, { "epoch": 0.8218553845466526, "grad_norm": 0.45241791009902954, "learning_rate": 8.089473715340107e-06, "loss": 0.9495, "step": 9198 }, { "epoch": 0.8219447360779145, "grad_norm": 0.4731530249118805, "learning_rate": 8.081583911719343e-06, "loss": 0.9343, "step": 9199 }, { "epoch": 0.8220340876091764, "grad_norm": 0.5134366154670715, "learning_rate": 8.073697619212745e-06, "loss": 0.9878, "step": 9200 }, { "epoch": 0.8221234391404383, "grad_norm": 0.4594072103500366, "learning_rate": 8.065814838480879e-06, "loss": 0.9403, "step": 9201 }, { "epoch": 0.8222127906717002, "grad_norm": 0.49172157049179077, "learning_rate": 8.057935570184e-06, "loss": 0.9553, "step": 9202 }, { "epoch": 0.822302142202962, "grad_norm": 0.44806215167045593, "learning_rate": 8.050059814982092e-06, "loss": 0.9714, "step": 9203 }, { "epoch": 0.8223914937342238, "grad_norm": 0.4985756576061249, "learning_rate": 8.042187573534836e-06, "loss": 0.9834, "step": 9204 }, { "epoch": 0.8224808452654857, "grad_norm": 0.5466139912605286, "learning_rate": 8.034318846501598e-06, "loss": 0.9727, "step": 9205 }, { "epoch": 0.8225701967967476, "grad_norm": 0.4964464008808136, "learning_rate": 8.026453634541481e-06, "loss": 0.9444, "step": 9206 }, { "epoch": 0.8226595483280095, "grad_norm": 0.49595358967781067, "learning_rate": 8.018591938313275e-06, "loss": 0.973, "step": 9207 }, { "epoch": 0.8227488998592714, "grad_norm": 0.5283821821212769, "learning_rate": 8.010733758475486e-06, "loss": 0.956, "step": 9208 }, { "epoch": 0.8228382513905332, "grad_norm": 0.4949239492416382, "learning_rate": 8.002879095686317e-06, "loss": 0.9192, "step": 9209 }, { "epoch": 0.8229276029217951, "grad_norm": 0.48823899030685425, "learning_rate": 7.995027950603683e-06, "loss": 0.8878, "step": 9210 }, { "epoch": 0.8230169544530569, "grad_norm": 0.483600378036499, "learning_rate": 7.987180323885196e-06, "loss": 0.9239, "step": 9211 }, { "epoch": 0.8231063059843188, "grad_norm": 0.4983593225479126, "learning_rate": 7.979336216188182e-06, "loss": 0.9776, "step": 9212 }, { "epoch": 0.8231956575155807, "grad_norm": 0.4743238389492035, "learning_rate": 7.971495628169667e-06, "loss": 0.9436, "step": 9213 }, { "epoch": 0.8232850090468425, "grad_norm": 0.4517209529876709, "learning_rate": 7.963658560486399e-06, "loss": 0.9313, "step": 9214 }, { "epoch": 0.8233743605781044, "grad_norm": 0.5047935247421265, "learning_rate": 7.955825013794793e-06, "loss": 0.9093, "step": 9215 }, { "epoch": 0.8234637121093663, "grad_norm": 0.603001594543457, "learning_rate": 7.947994988750995e-06, "loss": 0.9476, "step": 9216 }, { "epoch": 0.8235530636406282, "grad_norm": 0.4813896119594574, "learning_rate": 7.940168486010862e-06, "loss": 0.8947, "step": 9217 }, { "epoch": 0.82364241517189, "grad_norm": 0.5081338882446289, "learning_rate": 7.932345506229944e-06, "loss": 0.8794, "step": 9218 }, { "epoch": 0.8237317667031518, "grad_norm": 0.4924575984477997, "learning_rate": 7.924526050063497e-06, "loss": 0.9315, "step": 9219 }, { "epoch": 0.8238211182344137, "grad_norm": 0.4447862207889557, "learning_rate": 7.916710118166482e-06, "loss": 0.9681, "step": 9220 }, { "epoch": 0.8239104697656756, "grad_norm": 0.5768123269081116, "learning_rate": 7.908897711193575e-06, "loss": 0.9272, "step": 9221 }, { "epoch": 0.8239998212969375, "grad_norm": 0.6067156791687012, "learning_rate": 7.901088829799142e-06, "loss": 0.9376, "step": 9222 }, { "epoch": 0.8240891728281994, "grad_norm": 0.6196531057357788, "learning_rate": 7.893283474637275e-06, "loss": 0.8734, "step": 9223 }, { "epoch": 0.8241785243594613, "grad_norm": 0.49695178866386414, "learning_rate": 7.88548164636173e-06, "loss": 0.9231, "step": 9224 }, { "epoch": 0.824267875890723, "grad_norm": 0.4435543417930603, "learning_rate": 7.877683345626008e-06, "loss": 0.9304, "step": 9225 }, { "epoch": 0.8243572274219849, "grad_norm": 0.4490625560283661, "learning_rate": 7.869888573083294e-06, "loss": 0.9596, "step": 9226 }, { "epoch": 0.8244465789532468, "grad_norm": 0.4157819151878357, "learning_rate": 7.862097329386497e-06, "loss": 0.9515, "step": 9227 }, { "epoch": 0.8245359304845087, "grad_norm": 0.4776933491230011, "learning_rate": 7.854309615188198e-06, "loss": 0.9247, "step": 9228 }, { "epoch": 0.8246252820157706, "grad_norm": 0.4814663529396057, "learning_rate": 7.84652543114071e-06, "loss": 0.9378, "step": 9229 }, { "epoch": 0.8247146335470324, "grad_norm": 0.45337748527526855, "learning_rate": 7.83874477789604e-06, "loss": 0.9357, "step": 9230 }, { "epoch": 0.8248039850782943, "grad_norm": 0.44966641068458557, "learning_rate": 7.830967656105903e-06, "loss": 0.9343, "step": 9231 }, { "epoch": 0.8248933366095561, "grad_norm": 0.4764994978904724, "learning_rate": 7.823194066421707e-06, "loss": 0.8798, "step": 9232 }, { "epoch": 0.824982688140818, "grad_norm": 0.44345101714134216, "learning_rate": 7.815424009494588e-06, "loss": 0.9255, "step": 9233 }, { "epoch": 0.8250720396720799, "grad_norm": 0.44141438603401184, "learning_rate": 7.807657485975372e-06, "loss": 0.9607, "step": 9234 }, { "epoch": 0.8251613912033418, "grad_norm": 0.4870518147945404, "learning_rate": 7.79989449651457e-06, "loss": 0.8497, "step": 9235 }, { "epoch": 0.8252507427346036, "grad_norm": 0.6152021288871765, "learning_rate": 7.792135041762421e-06, "loss": 0.8489, "step": 9236 }, { "epoch": 0.8253400942658655, "grad_norm": 0.6551914811134338, "learning_rate": 7.784379122368873e-06, "loss": 0.9771, "step": 9237 }, { "epoch": 0.8254294457971274, "grad_norm": 0.4421270787715912, "learning_rate": 7.776626738983556e-06, "loss": 0.9964, "step": 9238 }, { "epoch": 0.8255187973283892, "grad_norm": 0.55922931432724, "learning_rate": 7.768877892255816e-06, "loss": 0.968, "step": 9239 }, { "epoch": 0.8256081488596511, "grad_norm": 0.4829465448856354, "learning_rate": 7.761132582834708e-06, "loss": 0.967, "step": 9240 }, { "epoch": 0.8256975003909129, "grad_norm": 0.5545680522918701, "learning_rate": 7.753390811368971e-06, "loss": 0.9614, "step": 9241 }, { "epoch": 0.8257868519221748, "grad_norm": 0.5379648804664612, "learning_rate": 7.745652578507079e-06, "loss": 0.846, "step": 9242 }, { "epoch": 0.8258762034534367, "grad_norm": 0.6437764167785645, "learning_rate": 7.737917884897177e-06, "loss": 0.9404, "step": 9243 }, { "epoch": 0.8259655549846986, "grad_norm": 0.4301976263523102, "learning_rate": 7.73018673118715e-06, "loss": 0.9681, "step": 9244 }, { "epoch": 0.8260549065159604, "grad_norm": 0.46342891454696655, "learning_rate": 7.722459118024538e-06, "loss": 0.9234, "step": 9245 }, { "epoch": 0.8261442580472222, "grad_norm": 0.45039889216423035, "learning_rate": 7.714735046056615e-06, "loss": 0.9904, "step": 9246 }, { "epoch": 0.8262336095784841, "grad_norm": 0.5035346746444702, "learning_rate": 7.707014515930366e-06, "loss": 0.891, "step": 9247 }, { "epoch": 0.826322961109746, "grad_norm": 0.41054171323776245, "learning_rate": 7.69929752829246e-06, "loss": 0.943, "step": 9248 }, { "epoch": 0.8264123126410079, "grad_norm": 0.4819393754005432, "learning_rate": 7.691584083789277e-06, "loss": 0.9219, "step": 9249 }, { "epoch": 0.8265016641722698, "grad_norm": 0.4592518210411072, "learning_rate": 7.683874183066903e-06, "loss": 0.9504, "step": 9250 }, { "epoch": 0.8265910157035317, "grad_norm": 0.5692583322525024, "learning_rate": 7.676167826771124e-06, "loss": 0.8824, "step": 9251 }, { "epoch": 0.8266803672347934, "grad_norm": 0.48362407088279724, "learning_rate": 7.668465015547427e-06, "loss": 0.8867, "step": 9252 }, { "epoch": 0.8267697187660553, "grad_norm": 0.5431807041168213, "learning_rate": 7.66076575004101e-06, "loss": 0.8867, "step": 9253 }, { "epoch": 0.8268590702973172, "grad_norm": 0.6217208504676819, "learning_rate": 7.653070030896774e-06, "loss": 0.9971, "step": 9254 }, { "epoch": 0.8269484218285791, "grad_norm": 0.4189354181289673, "learning_rate": 7.6453778587593e-06, "loss": 0.9719, "step": 9255 }, { "epoch": 0.827037773359841, "grad_norm": 0.50577312707901, "learning_rate": 7.637689234272899e-06, "loss": 0.9394, "step": 9256 }, { "epoch": 0.8271271248911028, "grad_norm": 0.5943676829338074, "learning_rate": 7.630004158081572e-06, "loss": 0.8626, "step": 9257 }, { "epoch": 0.8272164764223647, "grad_norm": 0.42551112174987793, "learning_rate": 7.622322630829032e-06, "loss": 0.9436, "step": 9258 }, { "epoch": 0.8273058279536265, "grad_norm": 0.6228631138801575, "learning_rate": 7.6146446531586815e-06, "loss": 0.9074, "step": 9259 }, { "epoch": 0.8273951794848884, "grad_norm": 0.4399883449077606, "learning_rate": 7.60697022571365e-06, "loss": 0.9957, "step": 9260 }, { "epoch": 0.8274845310161503, "grad_norm": 0.5171003341674805, "learning_rate": 7.599299349136723e-06, "loss": 0.9123, "step": 9261 }, { "epoch": 0.8275738825474122, "grad_norm": 0.4070376753807068, "learning_rate": 7.591632024070439e-06, "loss": 0.9231, "step": 9262 }, { "epoch": 0.827663234078674, "grad_norm": 0.47427892684936523, "learning_rate": 7.583968251157014e-06, "loss": 0.9726, "step": 9263 }, { "epoch": 0.8277525856099359, "grad_norm": 0.5160714983940125, "learning_rate": 7.576308031038381e-06, "loss": 0.9672, "step": 9264 }, { "epoch": 0.8278419371411978, "grad_norm": 0.5352666974067688, "learning_rate": 7.568651364356144e-06, "loss": 0.8548, "step": 9265 }, { "epoch": 0.8279312886724596, "grad_norm": 0.4556531310081482, "learning_rate": 7.560998251751639e-06, "loss": 0.9801, "step": 9266 }, { "epoch": 0.8280206402037215, "grad_norm": 0.46089810132980347, "learning_rate": 7.553348693865897e-06, "loss": 0.9177, "step": 9267 }, { "epoch": 0.8281099917349833, "grad_norm": 0.4947889447212219, "learning_rate": 7.545702691339657e-06, "loss": 0.9936, "step": 9268 }, { "epoch": 0.8281993432662452, "grad_norm": 0.46438440680503845, "learning_rate": 7.538060244813339e-06, "loss": 0.9395, "step": 9269 }, { "epoch": 0.8282886947975071, "grad_norm": 0.46110087633132935, "learning_rate": 7.530421354927092e-06, "loss": 0.9576, "step": 9270 }, { "epoch": 0.828378046328769, "grad_norm": 0.4900936484336853, "learning_rate": 7.522786022320744e-06, "loss": 0.9473, "step": 9271 }, { "epoch": 0.8284673978600309, "grad_norm": 0.48400017619132996, "learning_rate": 7.5151542476338485e-06, "loss": 0.9329, "step": 9272 }, { "epoch": 0.8285567493912926, "grad_norm": 0.49266934394836426, "learning_rate": 7.507526031505635e-06, "loss": 0.8979, "step": 9273 }, { "epoch": 0.8286461009225545, "grad_norm": 0.5182574987411499, "learning_rate": 7.499901374575069e-06, "loss": 0.8894, "step": 9274 }, { "epoch": 0.8287354524538164, "grad_norm": 0.5964707732200623, "learning_rate": 7.492280277480768e-06, "loss": 0.9323, "step": 9275 }, { "epoch": 0.8288248039850783, "grad_norm": 0.45841360092163086, "learning_rate": 7.4846627408610925e-06, "loss": 0.9406, "step": 9276 }, { "epoch": 0.8289141555163402, "grad_norm": 0.4415697753429413, "learning_rate": 7.477048765354094e-06, "loss": 0.9698, "step": 9277 }, { "epoch": 0.8290035070476021, "grad_norm": 0.49116966128349304, "learning_rate": 7.469438351597524e-06, "loss": 0.9453, "step": 9278 }, { "epoch": 0.829092858578864, "grad_norm": 0.5854166746139526, "learning_rate": 7.4618315002288384e-06, "loss": 0.9059, "step": 9279 }, { "epoch": 0.8291822101101257, "grad_norm": 0.46509405970573425, "learning_rate": 7.454228211885184e-06, "loss": 0.9222, "step": 9280 }, { "epoch": 0.8292715616413876, "grad_norm": 0.442025363445282, "learning_rate": 7.446628487203422e-06, "loss": 0.9237, "step": 9281 }, { "epoch": 0.8293609131726495, "grad_norm": 0.6018460392951965, "learning_rate": 7.439032326820117e-06, "loss": 0.9529, "step": 9282 }, { "epoch": 0.8294502647039114, "grad_norm": 0.48638513684272766, "learning_rate": 7.4314397313715145e-06, "loss": 0.8751, "step": 9283 }, { "epoch": 0.8295396162351732, "grad_norm": 0.4461117684841156, "learning_rate": 7.423850701493585e-06, "loss": 0.9923, "step": 9284 }, { "epoch": 0.8296289677664351, "grad_norm": 0.5634244084358215, "learning_rate": 7.416265237822001e-06, "loss": 0.9615, "step": 9285 }, { "epoch": 0.829718319297697, "grad_norm": 0.5193483233451843, "learning_rate": 7.408683340992101e-06, "loss": 0.8622, "step": 9286 }, { "epoch": 0.8298076708289588, "grad_norm": 0.5628817081451416, "learning_rate": 7.401105011638965e-06, "loss": 0.8551, "step": 9287 }, { "epoch": 0.8298970223602207, "grad_norm": 0.5427822470664978, "learning_rate": 7.3935302503973515e-06, "loss": 0.8488, "step": 9288 }, { "epoch": 0.8299863738914826, "grad_norm": 0.4602343440055847, "learning_rate": 7.385959057901737e-06, "loss": 1.054, "step": 9289 }, { "epoch": 0.8300757254227444, "grad_norm": 0.5840815305709839, "learning_rate": 7.378391434786281e-06, "loss": 0.911, "step": 9290 }, { "epoch": 0.8301650769540063, "grad_norm": 0.514077365398407, "learning_rate": 7.370827381684853e-06, "loss": 0.923, "step": 9291 }, { "epoch": 0.8302544284852682, "grad_norm": 0.5028949975967407, "learning_rate": 7.3632668992310305e-06, "loss": 0.8987, "step": 9292 }, { "epoch": 0.8303437800165301, "grad_norm": 0.3772270083427429, "learning_rate": 7.35570998805809e-06, "loss": 0.9484, "step": 9293 }, { "epoch": 0.8304331315477919, "grad_norm": 0.5475994944572449, "learning_rate": 7.348156648798981e-06, "loss": 0.9564, "step": 9294 }, { "epoch": 0.8305224830790537, "grad_norm": 0.5647354125976562, "learning_rate": 7.340606882086393e-06, "loss": 0.894, "step": 9295 }, { "epoch": 0.8306118346103156, "grad_norm": 0.5140451788902283, "learning_rate": 7.33306068855269e-06, "loss": 0.9726, "step": 9296 }, { "epoch": 0.8307011861415775, "grad_norm": 0.45875853300094604, "learning_rate": 7.325518068829967e-06, "loss": 0.9158, "step": 9297 }, { "epoch": 0.8307905376728394, "grad_norm": 0.38240689039230347, "learning_rate": 7.31797902354997e-06, "loss": 0.9517, "step": 9298 }, { "epoch": 0.8308798892041013, "grad_norm": 0.5880802869796753, "learning_rate": 7.310443553344182e-06, "loss": 0.9576, "step": 9299 }, { "epoch": 0.8309692407353632, "grad_norm": 0.5717623829841614, "learning_rate": 7.302911658843792e-06, "loss": 0.9193, "step": 9300 }, { "epoch": 0.8310585922666249, "grad_norm": 0.4772949814796448, "learning_rate": 7.2953833406796675e-06, "loss": 0.8271, "step": 9301 }, { "epoch": 0.8311479437978868, "grad_norm": 0.43257272243499756, "learning_rate": 7.287858599482383e-06, "loss": 0.946, "step": 9302 }, { "epoch": 0.8312372953291487, "grad_norm": 0.5265113115310669, "learning_rate": 7.280337435882223e-06, "loss": 0.8861, "step": 9303 }, { "epoch": 0.8313266468604106, "grad_norm": 0.4444237947463989, "learning_rate": 7.272819850509161e-06, "loss": 0.9427, "step": 9304 }, { "epoch": 0.8314159983916725, "grad_norm": 0.53780198097229, "learning_rate": 7.265305843992881e-06, "loss": 0.8518, "step": 9305 }, { "epoch": 0.8315053499229343, "grad_norm": 0.5039278268814087, "learning_rate": 7.257795416962753e-06, "loss": 0.9144, "step": 9306 }, { "epoch": 0.8315947014541961, "grad_norm": 0.5485106110572815, "learning_rate": 7.250288570047853e-06, "loss": 0.9448, "step": 9307 }, { "epoch": 0.831684052985458, "grad_norm": 0.592823326587677, "learning_rate": 7.242785303876965e-06, "loss": 0.9934, "step": 9308 }, { "epoch": 0.8317734045167199, "grad_norm": 0.42520976066589355, "learning_rate": 7.235285619078569e-06, "loss": 0.9649, "step": 9309 }, { "epoch": 0.8318627560479818, "grad_norm": 0.4947132170200348, "learning_rate": 7.2277895162808416e-06, "loss": 0.9205, "step": 9310 }, { "epoch": 0.8319521075792436, "grad_norm": 0.42473235726356506, "learning_rate": 7.220296996111658e-06, "loss": 0.9293, "step": 9311 }, { "epoch": 0.8320414591105055, "grad_norm": 0.5703517198562622, "learning_rate": 7.2128080591986e-06, "loss": 0.8687, "step": 9312 }, { "epoch": 0.8321308106417674, "grad_norm": 0.5139065980911255, "learning_rate": 7.205322706168949e-06, "loss": 0.8515, "step": 9313 }, { "epoch": 0.8322201621730292, "grad_norm": 0.49325382709503174, "learning_rate": 7.19784093764968e-06, "loss": 0.9548, "step": 9314 }, { "epoch": 0.8323095137042911, "grad_norm": 0.4829562306404114, "learning_rate": 7.190362754267477e-06, "loss": 0.923, "step": 9315 }, { "epoch": 0.832398865235553, "grad_norm": 0.4533390402793884, "learning_rate": 7.182888156648704e-06, "loss": 0.9178, "step": 9316 }, { "epoch": 0.8324882167668148, "grad_norm": 0.42005079984664917, "learning_rate": 7.175417145419444e-06, "loss": 0.912, "step": 9317 }, { "epoch": 0.8325775682980767, "grad_norm": 0.5088595747947693, "learning_rate": 7.16794972120548e-06, "loss": 0.9998, "step": 9318 }, { "epoch": 0.8326669198293386, "grad_norm": 0.4803122580051422, "learning_rate": 7.160485884632278e-06, "loss": 0.8923, "step": 9319 }, { "epoch": 0.8327562713606005, "grad_norm": 0.439439058303833, "learning_rate": 7.153025636325023e-06, "loss": 0.9342, "step": 9320 }, { "epoch": 0.8328456228918623, "grad_norm": 0.4299473464488983, "learning_rate": 7.145568976908584e-06, "loss": 0.9986, "step": 9321 }, { "epoch": 0.8329349744231241, "grad_norm": 0.46970871090888977, "learning_rate": 7.138115907007537e-06, "loss": 0.9392, "step": 9322 }, { "epoch": 0.833024325954386, "grad_norm": 0.500106692314148, "learning_rate": 7.1306664272461635e-06, "loss": 0.9441, "step": 9323 }, { "epoch": 0.8331136774856479, "grad_norm": 0.5206024050712585, "learning_rate": 7.123220538248426e-06, "loss": 0.8643, "step": 9324 }, { "epoch": 0.8332030290169098, "grad_norm": 0.4776027798652649, "learning_rate": 7.115778240638016e-06, "loss": 0.9586, "step": 9325 }, { "epoch": 0.8332923805481717, "grad_norm": 0.45335862040519714, "learning_rate": 7.1083395350382776e-06, "loss": 0.941, "step": 9326 }, { "epoch": 0.8333817320794336, "grad_norm": 0.47026875615119934, "learning_rate": 7.100904422072296e-06, "loss": 0.9022, "step": 9327 }, { "epoch": 0.8334710836106953, "grad_norm": 0.4703599810600281, "learning_rate": 7.093472902362841e-06, "loss": 1.0376, "step": 9328 }, { "epoch": 0.8335604351419572, "grad_norm": 0.48730385303497314, "learning_rate": 7.086044976532386e-06, "loss": 0.983, "step": 9329 }, { "epoch": 0.8336497866732191, "grad_norm": 0.44514140486717224, "learning_rate": 7.078620645203099e-06, "loss": 0.8899, "step": 9330 }, { "epoch": 0.833739138204481, "grad_norm": 0.4243552088737488, "learning_rate": 7.0711999089968335e-06, "loss": 0.9852, "step": 9331 }, { "epoch": 0.8338284897357429, "grad_norm": 0.49389246106147766, "learning_rate": 7.06378276853516e-06, "loss": 0.8915, "step": 9332 }, { "epoch": 0.8339178412670047, "grad_norm": 0.4797744154930115, "learning_rate": 7.056369224439352e-06, "loss": 0.9607, "step": 9333 }, { "epoch": 0.8340071927982666, "grad_norm": 0.5037931203842163, "learning_rate": 7.048959277330363e-06, "loss": 0.9384, "step": 9334 }, { "epoch": 0.8340965443295284, "grad_norm": 0.474246084690094, "learning_rate": 7.0415529278288765e-06, "loss": 1.0259, "step": 9335 }, { "epoch": 0.8341858958607903, "grad_norm": 0.46430274844169617, "learning_rate": 7.034150176555221e-06, "loss": 0.9752, "step": 9336 }, { "epoch": 0.8342752473920522, "grad_norm": 0.5320345759391785, "learning_rate": 7.0267510241294745e-06, "loss": 0.917, "step": 9337 }, { "epoch": 0.834364598923314, "grad_norm": 0.4332819879055023, "learning_rate": 7.01935547117139e-06, "loss": 0.9781, "step": 9338 }, { "epoch": 0.8344539504545759, "grad_norm": 0.4649805724620819, "learning_rate": 7.011963518300424e-06, "loss": 0.9405, "step": 9339 }, { "epoch": 0.8345433019858378, "grad_norm": 0.4542677700519562, "learning_rate": 7.004575166135735e-06, "loss": 0.9736, "step": 9340 }, { "epoch": 0.8346326535170997, "grad_norm": 0.4982972741127014, "learning_rate": 6.997190415296173e-06, "loss": 0.8426, "step": 9341 }, { "epoch": 0.8347220050483615, "grad_norm": 0.49553072452545166, "learning_rate": 6.989809266400288e-06, "loss": 0.92, "step": 9342 }, { "epoch": 0.8348113565796234, "grad_norm": 0.4943731427192688, "learning_rate": 6.982431720066335e-06, "loss": 0.9163, "step": 9343 }, { "epoch": 0.8349007081108852, "grad_norm": 0.4164564311504364, "learning_rate": 6.975057776912258e-06, "loss": 0.9058, "step": 9344 }, { "epoch": 0.8349900596421471, "grad_norm": 0.4946102797985077, "learning_rate": 6.96768743755572e-06, "loss": 0.9323, "step": 9345 }, { "epoch": 0.835079411173409, "grad_norm": 0.5377085208892822, "learning_rate": 6.960320702614037e-06, "loss": 0.8814, "step": 9346 }, { "epoch": 0.8351687627046709, "grad_norm": 0.5740790367126465, "learning_rate": 6.952957572704266e-06, "loss": 0.9291, "step": 9347 }, { "epoch": 0.8352581142359328, "grad_norm": 0.5461665987968445, "learning_rate": 6.945598048443147e-06, "loss": 0.8508, "step": 9348 }, { "epoch": 0.8353474657671945, "grad_norm": 0.48720571398735046, "learning_rate": 6.93824213044712e-06, "loss": 0.9276, "step": 9349 }, { "epoch": 0.8354368172984564, "grad_norm": 0.4371801018714905, "learning_rate": 6.93088981933232e-06, "loss": 0.9428, "step": 9350 }, { "epoch": 0.8355261688297183, "grad_norm": 0.661524772644043, "learning_rate": 6.9235411157145765e-06, "loss": 0.8726, "step": 9351 }, { "epoch": 0.8356155203609802, "grad_norm": 0.4543387293815613, "learning_rate": 6.916196020209431e-06, "loss": 0.9342, "step": 9352 }, { "epoch": 0.8357048718922421, "grad_norm": 0.40771013498306274, "learning_rate": 6.908854533432107e-06, "loss": 0.9474, "step": 9353 }, { "epoch": 0.835794223423504, "grad_norm": 0.4102661609649658, "learning_rate": 6.901516655997536e-06, "loss": 0.9604, "step": 9354 }, { "epoch": 0.8358835749547658, "grad_norm": 0.47201770544052124, "learning_rate": 6.8941823885203485e-06, "loss": 0.9408, "step": 9355 }, { "epoch": 0.8359729264860276, "grad_norm": 0.508849561214447, "learning_rate": 6.886851731614857e-06, "loss": 0.8948, "step": 9356 }, { "epoch": 0.8360622780172895, "grad_norm": 0.48625683784484863, "learning_rate": 6.879524685895084e-06, "loss": 0.9328, "step": 9357 }, { "epoch": 0.8361516295485514, "grad_norm": 0.4217790961265564, "learning_rate": 6.872201251974747e-06, "loss": 1.0213, "step": 9358 }, { "epoch": 0.8362409810798133, "grad_norm": 0.4759739637374878, "learning_rate": 6.864881430467262e-06, "loss": 0.9459, "step": 9359 }, { "epoch": 0.8363303326110751, "grad_norm": 0.4266846776008606, "learning_rate": 6.857565221985751e-06, "loss": 0.9863, "step": 9360 }, { "epoch": 0.836419684142337, "grad_norm": 0.5146978497505188, "learning_rate": 6.850252627143017e-06, "loss": 0.963, "step": 9361 }, { "epoch": 0.8365090356735989, "grad_norm": 0.4575260877609253, "learning_rate": 6.842943646551564e-06, "loss": 0.9757, "step": 9362 }, { "epoch": 0.8365983872048607, "grad_norm": 0.4543364346027374, "learning_rate": 6.835638280823614e-06, "loss": 0.9393, "step": 9363 }, { "epoch": 0.8366877387361226, "grad_norm": 0.47015100717544556, "learning_rate": 6.82833653057105e-06, "loss": 1.009, "step": 9364 }, { "epoch": 0.8367770902673844, "grad_norm": 0.4650666415691376, "learning_rate": 6.821038396405477e-06, "loss": 0.9546, "step": 9365 }, { "epoch": 0.8368664417986463, "grad_norm": 0.537955105304718, "learning_rate": 6.813743878938189e-06, "loss": 0.9242, "step": 9366 }, { "epoch": 0.8369557933299082, "grad_norm": 0.5456058979034424, "learning_rate": 6.806452978780198e-06, "loss": 0.8273, "step": 9367 }, { "epoch": 0.8370451448611701, "grad_norm": 0.48449915647506714, "learning_rate": 6.79916569654217e-06, "loss": 0.9456, "step": 9368 }, { "epoch": 0.8371344963924319, "grad_norm": 0.5084838271141052, "learning_rate": 6.791882032834501e-06, "loss": 0.9431, "step": 9369 }, { "epoch": 0.8372238479236938, "grad_norm": 0.5198284387588501, "learning_rate": 6.784601988267281e-06, "loss": 0.9717, "step": 9370 }, { "epoch": 0.8373131994549556, "grad_norm": 0.5668700933456421, "learning_rate": 6.777325563450282e-06, "loss": 0.9418, "step": 9371 }, { "epoch": 0.8374025509862175, "grad_norm": 0.6485138535499573, "learning_rate": 6.77005275899299e-06, "loss": 0.966, "step": 9372 }, { "epoch": 0.8374919025174794, "grad_norm": 0.39778807759284973, "learning_rate": 6.762783575504578e-06, "loss": 0.9455, "step": 9373 }, { "epoch": 0.8375812540487413, "grad_norm": 0.48835721611976624, "learning_rate": 6.755518013593914e-06, "loss": 0.978, "step": 9374 }, { "epoch": 0.8376706055800032, "grad_norm": 0.6808341145515442, "learning_rate": 6.748256073869575e-06, "loss": 0.8574, "step": 9375 }, { "epoch": 0.8377599571112649, "grad_norm": 0.43278858065605164, "learning_rate": 6.7409977569398265e-06, "loss": 0.964, "step": 9376 }, { "epoch": 0.8378493086425268, "grad_norm": 0.4334334433078766, "learning_rate": 6.733743063412612e-06, "loss": 0.9898, "step": 9377 }, { "epoch": 0.8379386601737887, "grad_norm": 0.4900834262371063, "learning_rate": 6.726491993895606e-06, "loss": 0.8638, "step": 9378 }, { "epoch": 0.8380280117050506, "grad_norm": 0.6439689993858337, "learning_rate": 6.719244548996157e-06, "loss": 0.8321, "step": 9379 }, { "epoch": 0.8381173632363125, "grad_norm": 0.4418420195579529, "learning_rate": 6.712000729321311e-06, "loss": 0.9594, "step": 9380 }, { "epoch": 0.8382067147675744, "grad_norm": 0.5338155031204224, "learning_rate": 6.704760535477828e-06, "loss": 0.891, "step": 9381 }, { "epoch": 0.8382960662988362, "grad_norm": 0.48710137605667114, "learning_rate": 6.697523968072139e-06, "loss": 0.9445, "step": 9382 }, { "epoch": 0.838385417830098, "grad_norm": 0.42618831992149353, "learning_rate": 6.690291027710394e-06, "loss": 0.9553, "step": 9383 }, { "epoch": 0.8384747693613599, "grad_norm": 0.4654203951358795, "learning_rate": 6.6830617149984175e-06, "loss": 0.9249, "step": 9384 }, { "epoch": 0.8385641208926218, "grad_norm": 0.5858107805252075, "learning_rate": 6.675836030541755e-06, "loss": 0.8551, "step": 9385 }, { "epoch": 0.8386534724238837, "grad_norm": 0.5533519387245178, "learning_rate": 6.668613974945631e-06, "loss": 0.9609, "step": 9386 }, { "epoch": 0.8387428239551455, "grad_norm": 0.4850926399230957, "learning_rate": 6.661395548814958e-06, "loss": 0.8791, "step": 9387 }, { "epoch": 0.8388321754864074, "grad_norm": 0.3984906077384949, "learning_rate": 6.6541807527543635e-06, "loss": 0.9768, "step": 9388 }, { "epoch": 0.8389215270176693, "grad_norm": 0.5376686453819275, "learning_rate": 6.646969587368168e-06, "loss": 0.957, "step": 9389 }, { "epoch": 0.8390108785489311, "grad_norm": 0.5016261339187622, "learning_rate": 6.6397620532603735e-06, "loss": 1.0018, "step": 9390 }, { "epoch": 0.839100230080193, "grad_norm": 0.4670695960521698, "learning_rate": 6.632558151034701e-06, "loss": 0.9015, "step": 9391 }, { "epoch": 0.8391895816114548, "grad_norm": 0.653988778591156, "learning_rate": 6.6253578812945414e-06, "loss": 0.8509, "step": 9392 }, { "epoch": 0.8392789331427167, "grad_norm": 0.5167982578277588, "learning_rate": 6.6181612446430005e-06, "loss": 0.8648, "step": 9393 }, { "epoch": 0.8393682846739786, "grad_norm": 0.5537070631980896, "learning_rate": 6.610968241682875e-06, "loss": 0.8879, "step": 9394 }, { "epoch": 0.8394576362052405, "grad_norm": 0.489687979221344, "learning_rate": 6.603778873016653e-06, "loss": 0.92, "step": 9395 }, { "epoch": 0.8395469877365024, "grad_norm": 0.4659658670425415, "learning_rate": 6.596593139246532e-06, "loss": 0.9537, "step": 9396 }, { "epoch": 0.8396363392677642, "grad_norm": 0.45842671394348145, "learning_rate": 6.589411040974369e-06, "loss": 0.9171, "step": 9397 }, { "epoch": 0.839725690799026, "grad_norm": 0.5154210329055786, "learning_rate": 6.582232578801756e-06, "loss": 0.8698, "step": 9398 }, { "epoch": 0.8398150423302879, "grad_norm": 0.5210223197937012, "learning_rate": 6.575057753329966e-06, "loss": 0.8417, "step": 9399 }, { "epoch": 0.8399043938615498, "grad_norm": 0.605532169342041, "learning_rate": 6.567886565159975e-06, "loss": 0.8945, "step": 9400 }, { "epoch": 0.8399937453928117, "grad_norm": 0.4984580874443054, "learning_rate": 6.560719014892425e-06, "loss": 0.9281, "step": 9401 }, { "epoch": 0.8400830969240736, "grad_norm": 0.4397487938404083, "learning_rate": 6.55355510312769e-06, "loss": 0.9569, "step": 9402 }, { "epoch": 0.8401724484553355, "grad_norm": 0.5144175291061401, "learning_rate": 6.546394830465819e-06, "loss": 0.9275, "step": 9403 }, { "epoch": 0.8402617999865972, "grad_norm": 0.4532061219215393, "learning_rate": 6.539238197506564e-06, "loss": 0.9112, "step": 9404 }, { "epoch": 0.8403511515178591, "grad_norm": 0.5863621234893799, "learning_rate": 6.53208520484937e-06, "loss": 0.9107, "step": 9405 }, { "epoch": 0.840440503049121, "grad_norm": 0.5393437147140503, "learning_rate": 6.524935853093383e-06, "loss": 0.8843, "step": 9406 }, { "epoch": 0.8405298545803829, "grad_norm": 0.4814126193523407, "learning_rate": 6.5177901428374154e-06, "loss": 0.9806, "step": 9407 }, { "epoch": 0.8406192061116448, "grad_norm": 0.38951998949050903, "learning_rate": 6.510648074680015e-06, "loss": 0.9301, "step": 9408 }, { "epoch": 0.8407085576429066, "grad_norm": 0.487699955701828, "learning_rate": 6.503509649219403e-06, "loss": 0.9929, "step": 9409 }, { "epoch": 0.8407979091741685, "grad_norm": 0.6564955711364746, "learning_rate": 6.496374867053495e-06, "loss": 0.8443, "step": 9410 }, { "epoch": 0.8408872607054303, "grad_norm": 0.4607921838760376, "learning_rate": 6.489243728779904e-06, "loss": 0.9666, "step": 9411 }, { "epoch": 0.8409766122366922, "grad_norm": 0.6501184105873108, "learning_rate": 6.482116234995944e-06, "loss": 0.8588, "step": 9412 }, { "epoch": 0.8410659637679541, "grad_norm": 0.48878639936447144, "learning_rate": 6.474992386298617e-06, "loss": 0.964, "step": 9413 }, { "epoch": 0.8411553152992159, "grad_norm": 0.5011094808578491, "learning_rate": 6.467872183284623e-06, "loss": 0.8937, "step": 9414 }, { "epoch": 0.8412446668304778, "grad_norm": 0.429160475730896, "learning_rate": 6.460755626550352e-06, "loss": 0.9129, "step": 9415 }, { "epoch": 0.8413340183617397, "grad_norm": 0.4783921539783478, "learning_rate": 6.453642716691905e-06, "loss": 0.9367, "step": 9416 }, { "epoch": 0.8414233698930016, "grad_norm": 0.4726746380329132, "learning_rate": 6.446533454305037e-06, "loss": 0.8902, "step": 9417 }, { "epoch": 0.8415127214242634, "grad_norm": 0.4634154438972473, "learning_rate": 6.43942783998524e-06, "loss": 0.9265, "step": 9418 }, { "epoch": 0.8416020729555252, "grad_norm": 0.44272157549858093, "learning_rate": 6.432325874327683e-06, "loss": 0.9241, "step": 9419 }, { "epoch": 0.8416914244867871, "grad_norm": 0.49922677874565125, "learning_rate": 6.425227557927232e-06, "loss": 0.9276, "step": 9420 }, { "epoch": 0.841780776018049, "grad_norm": 0.4457327723503113, "learning_rate": 6.418132891378448e-06, "loss": 0.9738, "step": 9421 }, { "epoch": 0.8418701275493109, "grad_norm": 0.48788073658943176, "learning_rate": 6.411041875275581e-06, "loss": 0.9399, "step": 9422 }, { "epoch": 0.8419594790805728, "grad_norm": 0.4666908383369446, "learning_rate": 6.4039545102125845e-06, "loss": 0.8912, "step": 9423 }, { "epoch": 0.8420488306118347, "grad_norm": 0.5896921753883362, "learning_rate": 6.396870796783095e-06, "loss": 0.8858, "step": 9424 }, { "epoch": 0.8421381821430964, "grad_norm": 0.4257669150829315, "learning_rate": 6.389790735580458e-06, "loss": 0.9384, "step": 9425 }, { "epoch": 0.8422275336743583, "grad_norm": 0.43601882457733154, "learning_rate": 6.382714327197703e-06, "loss": 1.0046, "step": 9426 }, { "epoch": 0.8423168852056202, "grad_norm": 0.5371537208557129, "learning_rate": 6.375641572227542e-06, "loss": 0.9728, "step": 9427 }, { "epoch": 0.8424062367368821, "grad_norm": 0.5477351546287537, "learning_rate": 6.368572471262402e-06, "loss": 0.9175, "step": 9428 }, { "epoch": 0.842495588268144, "grad_norm": 0.4891539216041565, "learning_rate": 6.361507024894392e-06, "loss": 0.9221, "step": 9429 }, { "epoch": 0.8425849397994059, "grad_norm": 0.5484804511070251, "learning_rate": 6.354445233715328e-06, "loss": 0.8749, "step": 9430 }, { "epoch": 0.8426742913306676, "grad_norm": 0.5287595391273499, "learning_rate": 6.347387098316698e-06, "loss": 0.9356, "step": 9431 }, { "epoch": 0.8427636428619295, "grad_norm": 0.4888148605823517, "learning_rate": 6.340332619289701e-06, "loss": 0.9794, "step": 9432 }, { "epoch": 0.8428529943931914, "grad_norm": 0.5393836498260498, "learning_rate": 6.333281797225244e-06, "loss": 0.8226, "step": 9433 }, { "epoch": 0.8429423459244533, "grad_norm": 0.5648978352546692, "learning_rate": 6.326234632713873e-06, "loss": 0.902, "step": 9434 }, { "epoch": 0.8430316974557152, "grad_norm": 0.4472680687904358, "learning_rate": 6.319191126345881e-06, "loss": 0.9544, "step": 9435 }, { "epoch": 0.843121048986977, "grad_norm": 0.4734359681606293, "learning_rate": 6.312151278711237e-06, "loss": 0.8552, "step": 9436 }, { "epoch": 0.8432104005182389, "grad_norm": 0.492206335067749, "learning_rate": 6.305115090399616e-06, "loss": 0.9213, "step": 9437 }, { "epoch": 0.8432997520495007, "grad_norm": 0.4859955608844757, "learning_rate": 6.2980825620003455e-06, "loss": 0.8452, "step": 9438 }, { "epoch": 0.8433891035807626, "grad_norm": 0.4734523296356201, "learning_rate": 6.291053694102489e-06, "loss": 0.9237, "step": 9439 }, { "epoch": 0.8434784551120245, "grad_norm": 0.5449217557907104, "learning_rate": 6.284028487294796e-06, "loss": 0.8981, "step": 9440 }, { "epoch": 0.8435678066432863, "grad_norm": 0.4198647439479828, "learning_rate": 6.277006942165692e-06, "loss": 0.9808, "step": 9441 }, { "epoch": 0.8436571581745482, "grad_norm": 0.4316485524177551, "learning_rate": 6.269989059303305e-06, "loss": 0.9737, "step": 9442 }, { "epoch": 0.8437465097058101, "grad_norm": 0.46406179666519165, "learning_rate": 6.262974839295471e-06, "loss": 0.9257, "step": 9443 }, { "epoch": 0.843835861237072, "grad_norm": 0.4460984170436859, "learning_rate": 6.255964282729692e-06, "loss": 0.9946, "step": 9444 }, { "epoch": 0.8439252127683338, "grad_norm": 0.511184573173523, "learning_rate": 6.248957390193183e-06, "loss": 0.9457, "step": 9445 }, { "epoch": 0.8440145642995956, "grad_norm": 0.5544686913490295, "learning_rate": 6.2419541622728565e-06, "loss": 0.9525, "step": 9446 }, { "epoch": 0.8441039158308575, "grad_norm": 0.44529908895492554, "learning_rate": 6.2349545995552875e-06, "loss": 0.9527, "step": 9447 }, { "epoch": 0.8441932673621194, "grad_norm": 0.5516801476478577, "learning_rate": 6.227958702626769e-06, "loss": 0.8854, "step": 9448 }, { "epoch": 0.8442826188933813, "grad_norm": 0.4624561369419098, "learning_rate": 6.220966472073287e-06, "loss": 0.9874, "step": 9449 }, { "epoch": 0.8443719704246432, "grad_norm": 0.4866270422935486, "learning_rate": 6.213977908480518e-06, "loss": 0.9601, "step": 9450 }, { "epoch": 0.8444613219559051, "grad_norm": 0.4873912036418915, "learning_rate": 6.206993012433815e-06, "loss": 0.9031, "step": 9451 }, { "epoch": 0.8445506734871668, "grad_norm": 0.4816325306892395, "learning_rate": 6.200011784518256e-06, "loss": 0.9123, "step": 9452 }, { "epoch": 0.8446400250184287, "grad_norm": 0.5002326965332031, "learning_rate": 6.193034225318578e-06, "loss": 0.8816, "step": 9453 }, { "epoch": 0.8447293765496906, "grad_norm": 0.4894864559173584, "learning_rate": 6.186060335419236e-06, "loss": 0.9751, "step": 9454 }, { "epoch": 0.8448187280809525, "grad_norm": 0.5670939683914185, "learning_rate": 6.179090115404368e-06, "loss": 0.8294, "step": 9455 }, { "epoch": 0.8449080796122144, "grad_norm": 0.553982138633728, "learning_rate": 6.172123565857796e-06, "loss": 0.8847, "step": 9456 }, { "epoch": 0.8449974311434763, "grad_norm": 0.48611873388290405, "learning_rate": 6.165160687363053e-06, "loss": 0.9528, "step": 9457 }, { "epoch": 0.8450867826747381, "grad_norm": 0.4134733974933624, "learning_rate": 6.158201480503345e-06, "loss": 1.0282, "step": 9458 }, { "epoch": 0.8451761342059999, "grad_norm": 0.6506361961364746, "learning_rate": 6.151245945861578e-06, "loss": 0.8545, "step": 9459 }, { "epoch": 0.8452654857372618, "grad_norm": 0.49033278226852417, "learning_rate": 6.1442940840203575e-06, "loss": 0.857, "step": 9460 }, { "epoch": 0.8453548372685237, "grad_norm": 0.5886410474777222, "learning_rate": 6.13734589556198e-06, "loss": 0.8042, "step": 9461 }, { "epoch": 0.8454441887997856, "grad_norm": 0.4791713058948517, "learning_rate": 6.130401381068424e-06, "loss": 0.9883, "step": 9462 }, { "epoch": 0.8455335403310474, "grad_norm": 0.5414884686470032, "learning_rate": 6.123460541121368e-06, "loss": 0.9804, "step": 9463 }, { "epoch": 0.8456228918623093, "grad_norm": 0.5500231981277466, "learning_rate": 6.11652337630218e-06, "loss": 0.9065, "step": 9464 }, { "epoch": 0.8457122433935712, "grad_norm": 0.4416286051273346, "learning_rate": 6.109589887191924e-06, "loss": 0.9156, "step": 9465 }, { "epoch": 0.845801594924833, "grad_norm": 0.4755689799785614, "learning_rate": 6.102660074371369e-06, "loss": 0.9018, "step": 9466 }, { "epoch": 0.8458909464560949, "grad_norm": 0.5172240138053894, "learning_rate": 6.095733938420928e-06, "loss": 0.9731, "step": 9467 }, { "epoch": 0.8459802979873567, "grad_norm": 0.47983941435813904, "learning_rate": 6.0888114799207586e-06, "loss": 0.99, "step": 9468 }, { "epoch": 0.8460696495186186, "grad_norm": 0.4617604911327362, "learning_rate": 6.081892699450687e-06, "loss": 0.9207, "step": 9469 }, { "epoch": 0.8461590010498805, "grad_norm": 0.43974384665489197, "learning_rate": 6.0749775975902424e-06, "loss": 0.919, "step": 9470 }, { "epoch": 0.8462483525811424, "grad_norm": 0.5544477701187134, "learning_rate": 6.068066174918624e-06, "loss": 0.9164, "step": 9471 }, { "epoch": 0.8463377041124043, "grad_norm": 0.5196595191955566, "learning_rate": 6.0611584320147465e-06, "loss": 0.9605, "step": 9472 }, { "epoch": 0.846427055643666, "grad_norm": 0.47909513115882874, "learning_rate": 6.054254369457202e-06, "loss": 0.8867, "step": 9473 }, { "epoch": 0.8465164071749279, "grad_norm": 0.5090166926383972, "learning_rate": 6.047353987824278e-06, "loss": 0.8925, "step": 9474 }, { "epoch": 0.8466057587061898, "grad_norm": 0.4992344081401825, "learning_rate": 6.040457287693963e-06, "loss": 0.9032, "step": 9475 }, { "epoch": 0.8466951102374517, "grad_norm": 0.5999143123626709, "learning_rate": 6.033564269643926e-06, "loss": 0.9008, "step": 9476 }, { "epoch": 0.8467844617687136, "grad_norm": 0.5238880515098572, "learning_rate": 6.026674934251542e-06, "loss": 0.9067, "step": 9477 }, { "epoch": 0.8468738132999755, "grad_norm": 0.5634439587593079, "learning_rate": 6.0197892820938405e-06, "loss": 0.8967, "step": 9478 }, { "epoch": 0.8469631648312373, "grad_norm": 0.4486282467842102, "learning_rate": 6.012907313747585e-06, "loss": 0.9542, "step": 9479 }, { "epoch": 0.8470525163624991, "grad_norm": 0.49240148067474365, "learning_rate": 6.006029029789206e-06, "loss": 0.9843, "step": 9480 }, { "epoch": 0.847141867893761, "grad_norm": 0.5785838961601257, "learning_rate": 5.99915443079484e-06, "loss": 0.9319, "step": 9481 }, { "epoch": 0.8472312194250229, "grad_norm": 0.6052419543266296, "learning_rate": 5.992283517340308e-06, "loss": 0.7929, "step": 9482 }, { "epoch": 0.8473205709562848, "grad_norm": 0.452104777097702, "learning_rate": 5.985416290001117e-06, "loss": 0.9212, "step": 9483 }, { "epoch": 0.8474099224875467, "grad_norm": 0.49323323369026184, "learning_rate": 5.978552749352473e-06, "loss": 0.8627, "step": 9484 }, { "epoch": 0.8474992740188085, "grad_norm": 0.5414223670959473, "learning_rate": 5.971692895969272e-06, "loss": 0.9574, "step": 9485 }, { "epoch": 0.8475886255500704, "grad_norm": 0.6366201043128967, "learning_rate": 5.964836730426099e-06, "loss": 0.9616, "step": 9486 }, { "epoch": 0.8476779770813322, "grad_norm": 0.6016992926597595, "learning_rate": 5.957984253297244e-06, "loss": 0.9459, "step": 9487 }, { "epoch": 0.8477673286125941, "grad_norm": 0.48284274339675903, "learning_rate": 5.9511354651566485e-06, "loss": 0.9174, "step": 9488 }, { "epoch": 0.847856680143856, "grad_norm": 0.42610475420951843, "learning_rate": 5.944290366577987e-06, "loss": 0.965, "step": 9489 }, { "epoch": 0.8479460316751178, "grad_norm": 0.4493615925312042, "learning_rate": 5.937448958134606e-06, "loss": 0.9472, "step": 9490 }, { "epoch": 0.8480353832063797, "grad_norm": 0.46299251914024353, "learning_rate": 5.930611240399548e-06, "loss": 0.948, "step": 9491 }, { "epoch": 0.8481247347376416, "grad_norm": 0.482902854681015, "learning_rate": 5.923777213945541e-06, "loss": 0.905, "step": 9492 }, { "epoch": 0.8482140862689035, "grad_norm": 0.5270372033119202, "learning_rate": 5.916946879345015e-06, "loss": 0.9044, "step": 9493 }, { "epoch": 0.8483034378001653, "grad_norm": 0.47987642884254456, "learning_rate": 5.91012023717008e-06, "loss": 0.9566, "step": 9494 }, { "epoch": 0.8483927893314271, "grad_norm": 0.4810824990272522, "learning_rate": 5.903297287992538e-06, "loss": 0.8675, "step": 9495 }, { "epoch": 0.848482140862689, "grad_norm": 0.5205212831497192, "learning_rate": 5.89647803238389e-06, "loss": 0.9361, "step": 9496 }, { "epoch": 0.8485714923939509, "grad_norm": 0.45572689175605774, "learning_rate": 5.889662470915319e-06, "loss": 0.9834, "step": 9497 }, { "epoch": 0.8486608439252128, "grad_norm": 0.4855954945087433, "learning_rate": 5.882850604157691e-06, "loss": 0.8503, "step": 9498 }, { "epoch": 0.8487501954564747, "grad_norm": 0.4647740423679352, "learning_rate": 5.876042432681584e-06, "loss": 0.943, "step": 9499 }, { "epoch": 0.8488395469877364, "grad_norm": 0.5627195239067078, "learning_rate": 5.869237957057244e-06, "loss": 0.8846, "step": 9500 }, { "epoch": 0.8489288985189983, "grad_norm": 0.4476568102836609, "learning_rate": 5.86243717785463e-06, "loss": 0.9631, "step": 9501 }, { "epoch": 0.8490182500502602, "grad_norm": 0.42511650919914246, "learning_rate": 5.855640095643372e-06, "loss": 0.8941, "step": 9502 }, { "epoch": 0.8491076015815221, "grad_norm": 0.6032181978225708, "learning_rate": 5.848846710992817e-06, "loss": 0.9229, "step": 9503 }, { "epoch": 0.849196953112784, "grad_norm": 0.49917519092559814, "learning_rate": 5.842057024471953e-06, "loss": 0.9726, "step": 9504 }, { "epoch": 0.8492863046440459, "grad_norm": 0.4302211105823517, "learning_rate": 5.835271036649503e-06, "loss": 0.9793, "step": 9505 }, { "epoch": 0.8493756561753077, "grad_norm": 0.6006591320037842, "learning_rate": 5.8284887480938636e-06, "loss": 0.9136, "step": 9506 }, { "epoch": 0.8494650077065695, "grad_norm": 0.5111293792724609, "learning_rate": 5.821710159373128e-06, "loss": 0.8993, "step": 9507 }, { "epoch": 0.8495543592378314, "grad_norm": 0.5666483640670776, "learning_rate": 5.814935271055083e-06, "loss": 0.921, "step": 9508 }, { "epoch": 0.8496437107690933, "grad_norm": 0.522527813911438, "learning_rate": 5.808164083707179e-06, "loss": 0.9923, "step": 9509 }, { "epoch": 0.8497330623003552, "grad_norm": 0.5020400285720825, "learning_rate": 5.8013965978965855e-06, "loss": 0.9426, "step": 9510 }, { "epoch": 0.849822413831617, "grad_norm": 0.4437236487865448, "learning_rate": 5.794632814190148e-06, "loss": 0.9501, "step": 9511 }, { "epoch": 0.8499117653628789, "grad_norm": 0.49814578890800476, "learning_rate": 5.787872733154409e-06, "loss": 0.936, "step": 9512 }, { "epoch": 0.8500011168941408, "grad_norm": 0.550423264503479, "learning_rate": 5.781116355355593e-06, "loss": 1.0181, "step": 9513 }, { "epoch": 0.8500904684254026, "grad_norm": 0.7392222285270691, "learning_rate": 5.774363681359624e-06, "loss": 0.9255, "step": 9514 }, { "epoch": 0.8501798199566645, "grad_norm": 0.5093478560447693, "learning_rate": 5.767614711732111e-06, "loss": 0.8951, "step": 9515 }, { "epoch": 0.8502691714879264, "grad_norm": 0.5823287963867188, "learning_rate": 5.760869447038348e-06, "loss": 0.861, "step": 9516 }, { "epoch": 0.8503585230191882, "grad_norm": 0.42861655354499817, "learning_rate": 5.75412788784333e-06, "loss": 0.9809, "step": 9517 }, { "epoch": 0.8504478745504501, "grad_norm": 0.44293922185897827, "learning_rate": 5.74739003471172e-06, "loss": 0.9667, "step": 9518 }, { "epoch": 0.850537226081712, "grad_norm": 0.46408700942993164, "learning_rate": 5.740655888207897e-06, "loss": 0.9613, "step": 9519 }, { "epoch": 0.8506265776129739, "grad_norm": 0.42871806025505066, "learning_rate": 5.73392544889591e-06, "loss": 0.987, "step": 9520 }, { "epoch": 0.8507159291442357, "grad_norm": 0.5720949172973633, "learning_rate": 5.727198717339511e-06, "loss": 0.8512, "step": 9521 }, { "epoch": 0.8508052806754975, "grad_norm": 0.46692514419555664, "learning_rate": 5.720475694102128e-06, "loss": 0.9157, "step": 9522 }, { "epoch": 0.8508946322067594, "grad_norm": 0.47131991386413574, "learning_rate": 5.713756379746898e-06, "loss": 0.9097, "step": 9523 }, { "epoch": 0.8509839837380213, "grad_norm": 0.5264059901237488, "learning_rate": 5.707040774836625e-06, "loss": 0.8181, "step": 9524 }, { "epoch": 0.8510733352692832, "grad_norm": 0.5986820459365845, "learning_rate": 5.70032887993382e-06, "loss": 0.9024, "step": 9525 }, { "epoch": 0.8511626868005451, "grad_norm": 0.5732765197753906, "learning_rate": 5.693620695600671e-06, "loss": 0.8888, "step": 9526 }, { "epoch": 0.851252038331807, "grad_norm": 0.5805981755256653, "learning_rate": 5.686916222399069e-06, "loss": 0.8615, "step": 9527 }, { "epoch": 0.8513413898630687, "grad_norm": 0.4108264744281769, "learning_rate": 5.68021546089057e-06, "loss": 1.0108, "step": 9528 }, { "epoch": 0.8514307413943306, "grad_norm": 0.48710349202156067, "learning_rate": 5.673518411636436e-06, "loss": 0.9861, "step": 9529 }, { "epoch": 0.8515200929255925, "grad_norm": 0.5818296670913696, "learning_rate": 5.666825075197624e-06, "loss": 0.882, "step": 9530 }, { "epoch": 0.8516094444568544, "grad_norm": 0.433623343706131, "learning_rate": 5.6601354521347685e-06, "loss": 0.9407, "step": 9531 }, { "epoch": 0.8516987959881163, "grad_norm": 0.5480315685272217, "learning_rate": 5.653449543008199e-06, "loss": 0.8762, "step": 9532 }, { "epoch": 0.8517881475193781, "grad_norm": 0.45491036772727966, "learning_rate": 5.646767348377935e-06, "loss": 0.9376, "step": 9533 }, { "epoch": 0.85187749905064, "grad_norm": 0.43504709005355835, "learning_rate": 5.640088868803673e-06, "loss": 0.9852, "step": 9534 }, { "epoch": 0.8519668505819018, "grad_norm": 0.5115997791290283, "learning_rate": 5.633414104844808e-06, "loss": 0.9546, "step": 9535 }, { "epoch": 0.8520562021131637, "grad_norm": 0.3994527757167816, "learning_rate": 5.6267430570604405e-06, "loss": 0.9405, "step": 9536 }, { "epoch": 0.8521455536444256, "grad_norm": 0.5334661602973938, "learning_rate": 5.620075726009311e-06, "loss": 0.9164, "step": 9537 }, { "epoch": 0.8522349051756875, "grad_norm": 0.41900575160980225, "learning_rate": 5.6134121122498995e-06, "loss": 0.9545, "step": 9538 }, { "epoch": 0.8523242567069493, "grad_norm": 0.583827555179596, "learning_rate": 5.606752216340349e-06, "loss": 0.8339, "step": 9539 }, { "epoch": 0.8524136082382112, "grad_norm": 0.6421988606452942, "learning_rate": 5.6000960388385095e-06, "loss": 0.8723, "step": 9540 }, { "epoch": 0.8525029597694731, "grad_norm": 0.4667465090751648, "learning_rate": 5.593443580301888e-06, "loss": 0.934, "step": 9541 }, { "epoch": 0.8525923113007349, "grad_norm": 0.48862892389297485, "learning_rate": 5.586794841287701e-06, "loss": 0.9056, "step": 9542 }, { "epoch": 0.8526816628319968, "grad_norm": 0.4161957800388336, "learning_rate": 5.5801498223528644e-06, "loss": 0.9632, "step": 9543 }, { "epoch": 0.8527710143632586, "grad_norm": 0.6144065260887146, "learning_rate": 5.573508524053955e-06, "loss": 0.8685, "step": 9544 }, { "epoch": 0.8528603658945205, "grad_norm": 0.44452551007270813, "learning_rate": 5.566870946947261e-06, "loss": 0.9958, "step": 9545 }, { "epoch": 0.8529497174257824, "grad_norm": 0.6683482527732849, "learning_rate": 5.5602370915887494e-06, "loss": 0.8042, "step": 9546 }, { "epoch": 0.8530390689570443, "grad_norm": 0.4790489971637726, "learning_rate": 5.553606958534074e-06, "loss": 0.9908, "step": 9547 }, { "epoch": 0.8531284204883062, "grad_norm": 0.4621983766555786, "learning_rate": 5.546980548338593e-06, "loss": 0.9601, "step": 9548 }, { "epoch": 0.8532177720195679, "grad_norm": 0.42011430859565735, "learning_rate": 5.54035786155731e-06, "loss": 0.965, "step": 9549 }, { "epoch": 0.8533071235508298, "grad_norm": 0.4877914488315582, "learning_rate": 5.533738898744967e-06, "loss": 0.8975, "step": 9550 }, { "epoch": 0.8533964750820917, "grad_norm": 0.49201685190200806, "learning_rate": 5.527123660455969e-06, "loss": 0.9032, "step": 9551 }, { "epoch": 0.8534858266133536, "grad_norm": 0.4355340898036957, "learning_rate": 5.5205121472444055e-06, "loss": 0.9941, "step": 9552 }, { "epoch": 0.8535751781446155, "grad_norm": 0.43532299995422363, "learning_rate": 5.513904359664074e-06, "loss": 0.9603, "step": 9553 }, { "epoch": 0.8536645296758774, "grad_norm": 0.5881833434104919, "learning_rate": 5.507300298268436e-06, "loss": 0.9591, "step": 9554 }, { "epoch": 0.8537538812071392, "grad_norm": 0.4763388931751251, "learning_rate": 5.500699963610656e-06, "loss": 0.9825, "step": 9555 }, { "epoch": 0.853843232738401, "grad_norm": 0.45608317852020264, "learning_rate": 5.49410335624358e-06, "loss": 0.9578, "step": 9556 }, { "epoch": 0.8539325842696629, "grad_norm": 0.5364043712615967, "learning_rate": 5.487510476719748e-06, "loss": 1.0419, "step": 9557 }, { "epoch": 0.8540219358009248, "grad_norm": 0.41415050625801086, "learning_rate": 5.480921325591398e-06, "loss": 0.9417, "step": 9558 }, { "epoch": 0.8541112873321867, "grad_norm": 0.42311692237854004, "learning_rate": 5.47433590341041e-06, "loss": 0.9467, "step": 9559 }, { "epoch": 0.8542006388634485, "grad_norm": 0.44652259349823, "learning_rate": 5.467754210728404e-06, "loss": 1.0331, "step": 9560 }, { "epoch": 0.8542899903947104, "grad_norm": 0.4289079010486603, "learning_rate": 5.4611762480966555e-06, "loss": 0.9254, "step": 9561 }, { "epoch": 0.8543793419259722, "grad_norm": 0.4332316219806671, "learning_rate": 5.4546020160661515e-06, "loss": 1.0019, "step": 9562 }, { "epoch": 0.8544686934572341, "grad_norm": 0.5433120727539062, "learning_rate": 5.448031515187552e-06, "loss": 0.8531, "step": 9563 }, { "epoch": 0.854558044988496, "grad_norm": 0.4383524954319, "learning_rate": 5.441464746011194e-06, "loss": 0.9369, "step": 9564 }, { "epoch": 0.8546473965197579, "grad_norm": 0.4835048019886017, "learning_rate": 5.434901709087131e-06, "loss": 0.9205, "step": 9565 }, { "epoch": 0.8547367480510197, "grad_norm": 0.44369155168533325, "learning_rate": 5.428342404965076e-06, "loss": 0.9472, "step": 9566 }, { "epoch": 0.8548260995822816, "grad_norm": 0.4760458469390869, "learning_rate": 5.421786834194442e-06, "loss": 0.8901, "step": 9567 }, { "epoch": 0.8549154511135435, "grad_norm": 0.4910159707069397, "learning_rate": 5.415234997324348e-06, "loss": 1.0034, "step": 9568 }, { "epoch": 0.8550048026448053, "grad_norm": 0.600982129573822, "learning_rate": 5.4086868949035475e-06, "loss": 0.838, "step": 9569 }, { "epoch": 0.8550941541760672, "grad_norm": 0.6115323901176453, "learning_rate": 5.4021425274805245e-06, "loss": 0.8826, "step": 9570 }, { "epoch": 0.855183505707329, "grad_norm": 0.41822290420532227, "learning_rate": 5.395601895603453e-06, "loss": 0.9599, "step": 9571 }, { "epoch": 0.8552728572385909, "grad_norm": 0.5147873163223267, "learning_rate": 5.389064999820165e-06, "loss": 0.9285, "step": 9572 }, { "epoch": 0.8553622087698528, "grad_norm": 0.5221825838088989, "learning_rate": 5.3825318406782125e-06, "loss": 0.9447, "step": 9573 }, { "epoch": 0.8554515603011147, "grad_norm": 0.4737570285797119, "learning_rate": 5.376002418724796e-06, "loss": 0.9483, "step": 9574 }, { "epoch": 0.8555409118323766, "grad_norm": 0.5008856058120728, "learning_rate": 5.3694767345068345e-06, "loss": 0.8795, "step": 9575 }, { "epoch": 0.8556302633636383, "grad_norm": 0.4690033197402954, "learning_rate": 5.362954788570928e-06, "loss": 0.9913, "step": 9576 }, { "epoch": 0.8557196148949002, "grad_norm": 0.5061206221580505, "learning_rate": 5.356436581463353e-06, "loss": 0.8482, "step": 9577 }, { "epoch": 0.8558089664261621, "grad_norm": 0.5794250965118408, "learning_rate": 5.3499221137300885e-06, "loss": 0.8922, "step": 9578 }, { "epoch": 0.855898317957424, "grad_norm": 0.5473932027816772, "learning_rate": 5.343411385916769e-06, "loss": 0.9497, "step": 9579 }, { "epoch": 0.8559876694886859, "grad_norm": 0.42786940932273865, "learning_rate": 5.3369043985687595e-06, "loss": 0.9562, "step": 9580 }, { "epoch": 0.8560770210199478, "grad_norm": 0.5070851445198059, "learning_rate": 5.330401152231074e-06, "loss": 0.9229, "step": 9581 }, { "epoch": 0.8561663725512096, "grad_norm": 0.45324572920799255, "learning_rate": 5.323901647448437e-06, "loss": 1.023, "step": 9582 }, { "epoch": 0.8562557240824714, "grad_norm": 0.6096236109733582, "learning_rate": 5.317405884765253e-06, "loss": 0.9328, "step": 9583 }, { "epoch": 0.8563450756137333, "grad_norm": 0.5863158106803894, "learning_rate": 5.310913864725609e-06, "loss": 0.915, "step": 9584 }, { "epoch": 0.8564344271449952, "grad_norm": 0.4709130525588989, "learning_rate": 5.304425587873274e-06, "loss": 0.8696, "step": 9585 }, { "epoch": 0.8565237786762571, "grad_norm": 0.607227623462677, "learning_rate": 5.29794105475172e-06, "loss": 0.924, "step": 9586 }, { "epoch": 0.856613130207519, "grad_norm": 0.4545291066169739, "learning_rate": 5.291460265904097e-06, "loss": 0.9702, "step": 9587 }, { "epoch": 0.8567024817387808, "grad_norm": 0.44768497347831726, "learning_rate": 5.284983221873241e-06, "loss": 1.0336, "step": 9588 }, { "epoch": 0.8567918332700427, "grad_norm": 0.666070282459259, "learning_rate": 5.278509923201657e-06, "loss": 0.8109, "step": 9589 }, { "epoch": 0.8568811848013045, "grad_norm": 0.5126855969429016, "learning_rate": 5.2720403704315635e-06, "loss": 0.904, "step": 9590 }, { "epoch": 0.8569705363325664, "grad_norm": 0.47559547424316406, "learning_rate": 5.265574564104858e-06, "loss": 0.9819, "step": 9591 }, { "epoch": 0.8570598878638283, "grad_norm": 0.43424925208091736, "learning_rate": 5.259112504763114e-06, "loss": 0.9388, "step": 9592 }, { "epoch": 0.8571492393950901, "grad_norm": 0.3889505863189697, "learning_rate": 5.252654192947603e-06, "loss": 1.003, "step": 9593 }, { "epoch": 0.857238590926352, "grad_norm": 0.47286322712898254, "learning_rate": 5.246199629199277e-06, "loss": 0.8823, "step": 9594 }, { "epoch": 0.8573279424576139, "grad_norm": 0.46248650550842285, "learning_rate": 5.23974881405877e-06, "loss": 0.9612, "step": 9595 }, { "epoch": 0.8574172939888758, "grad_norm": 0.5204552412033081, "learning_rate": 5.233301748066416e-06, "loss": 0.9113, "step": 9596 }, { "epoch": 0.8575066455201376, "grad_norm": 0.42434239387512207, "learning_rate": 5.226858431762216e-06, "loss": 0.9449, "step": 9597 }, { "epoch": 0.8575959970513994, "grad_norm": 0.6636669039726257, "learning_rate": 5.2204188656858775e-06, "loss": 0.9023, "step": 9598 }, { "epoch": 0.8576853485826613, "grad_norm": 0.49965164065361023, "learning_rate": 5.213983050376764e-06, "loss": 0.9547, "step": 9599 }, { "epoch": 0.8577747001139232, "grad_norm": 0.43195703625679016, "learning_rate": 5.207550986373961e-06, "loss": 0.9084, "step": 9600 }, { "epoch": 0.8578640516451851, "grad_norm": 0.4506111145019531, "learning_rate": 5.201122674216208e-06, "loss": 0.9734, "step": 9601 }, { "epoch": 0.857953403176447, "grad_norm": 0.4218018054962158, "learning_rate": 5.194698114441959e-06, "loss": 0.9299, "step": 9602 }, { "epoch": 0.8580427547077089, "grad_norm": 0.5177663564682007, "learning_rate": 5.188277307589334e-06, "loss": 0.8914, "step": 9603 }, { "epoch": 0.8581321062389706, "grad_norm": 0.5173326134681702, "learning_rate": 5.181860254196136e-06, "loss": 0.9674, "step": 9604 }, { "epoch": 0.8582214577702325, "grad_norm": 0.49760016798973083, "learning_rate": 5.175446954799873e-06, "loss": 0.8798, "step": 9605 }, { "epoch": 0.8583108093014944, "grad_norm": 0.48603054881095886, "learning_rate": 5.169037409937733e-06, "loss": 0.8737, "step": 9606 }, { "epoch": 0.8584001608327563, "grad_norm": 0.4998380243778229, "learning_rate": 5.162631620146563e-06, "loss": 0.9471, "step": 9607 }, { "epoch": 0.8584895123640182, "grad_norm": 0.5357629656791687, "learning_rate": 5.15622958596293e-06, "loss": 0.9189, "step": 9608 }, { "epoch": 0.85857886389528, "grad_norm": 0.5003303289413452, "learning_rate": 5.149831307923064e-06, "loss": 0.9853, "step": 9609 }, { "epoch": 0.8586682154265419, "grad_norm": 0.4467265009880066, "learning_rate": 5.1434367865629e-06, "loss": 0.9858, "step": 9610 }, { "epoch": 0.8587575669578037, "grad_norm": 0.6121494770050049, "learning_rate": 5.137046022418046e-06, "loss": 0.859, "step": 9611 }, { "epoch": 0.8588469184890656, "grad_norm": 0.5334439873695374, "learning_rate": 5.130659016023787e-06, "loss": 0.884, "step": 9612 }, { "epoch": 0.8589362700203275, "grad_norm": 0.4914329946041107, "learning_rate": 5.12427576791511e-06, "loss": 0.9521, "step": 9613 }, { "epoch": 0.8590256215515893, "grad_norm": 0.5462926626205444, "learning_rate": 5.117896278626671e-06, "loss": 0.9285, "step": 9614 }, { "epoch": 0.8591149730828512, "grad_norm": 0.4273386001586914, "learning_rate": 5.111520548692833e-06, "loss": 0.9606, "step": 9615 }, { "epoch": 0.8592043246141131, "grad_norm": 0.6681180596351624, "learning_rate": 5.105148578647623e-06, "loss": 0.773, "step": 9616 }, { "epoch": 0.859293676145375, "grad_norm": 0.4087236225605011, "learning_rate": 5.0987803690247635e-06, "loss": 0.9703, "step": 9617 }, { "epoch": 0.8593830276766368, "grad_norm": 0.499735027551651, "learning_rate": 5.092415920357674e-06, "loss": 0.9526, "step": 9618 }, { "epoch": 0.8594723792078987, "grad_norm": 0.482085257768631, "learning_rate": 5.086055233179421e-06, "loss": 0.8669, "step": 9619 }, { "epoch": 0.8595617307391605, "grad_norm": 0.448794960975647, "learning_rate": 5.07969830802279e-06, "loss": 0.9589, "step": 9620 }, { "epoch": 0.8596510822704224, "grad_norm": 0.464096337556839, "learning_rate": 5.073345145420238e-06, "loss": 0.9648, "step": 9621 }, { "epoch": 0.8597404338016843, "grad_norm": 0.5646736025810242, "learning_rate": 5.066995745903919e-06, "loss": 0.96, "step": 9622 }, { "epoch": 0.8598297853329462, "grad_norm": 0.5770590901374817, "learning_rate": 5.060650110005655e-06, "loss": 0.9433, "step": 9623 }, { "epoch": 0.859919136864208, "grad_norm": 0.5076577067375183, "learning_rate": 5.0543082382569666e-06, "loss": 0.8988, "step": 9624 }, { "epoch": 0.8600084883954698, "grad_norm": 0.40074893832206726, "learning_rate": 5.047970131189045e-06, "loss": 0.9946, "step": 9625 }, { "epoch": 0.8600978399267317, "grad_norm": 0.530560314655304, "learning_rate": 5.0416357893327826e-06, "loss": 1.0576, "step": 9626 }, { "epoch": 0.8601871914579936, "grad_norm": 0.5239288806915283, "learning_rate": 5.035305213218744e-06, "loss": 0.9083, "step": 9627 }, { "epoch": 0.8602765429892555, "grad_norm": 0.6042668223381042, "learning_rate": 5.028978403377182e-06, "loss": 0.8771, "step": 9628 }, { "epoch": 0.8603658945205174, "grad_norm": 0.5551727414131165, "learning_rate": 5.022655360338047e-06, "loss": 0.9087, "step": 9629 }, { "epoch": 0.8604552460517793, "grad_norm": 0.5318360328674316, "learning_rate": 5.016336084630935e-06, "loss": 0.8723, "step": 9630 }, { "epoch": 0.860544597583041, "grad_norm": 0.42179903388023376, "learning_rate": 5.010020576785174e-06, "loss": 0.9642, "step": 9631 }, { "epoch": 0.8606339491143029, "grad_norm": 0.5042697787284851, "learning_rate": 5.0037088373297455e-06, "loss": 0.8744, "step": 9632 }, { "epoch": 0.8607233006455648, "grad_norm": 0.539291501045227, "learning_rate": 4.997400866793328e-06, "loss": 0.9179, "step": 9633 }, { "epoch": 0.8608126521768267, "grad_norm": 0.5395140647888184, "learning_rate": 4.991096665704281e-06, "loss": 0.9229, "step": 9634 }, { "epoch": 0.8609020037080886, "grad_norm": 0.46536731719970703, "learning_rate": 4.984796234590644e-06, "loss": 0.9679, "step": 9635 }, { "epoch": 0.8609913552393504, "grad_norm": 0.4683972895145416, "learning_rate": 4.978499573980155e-06, "loss": 0.9381, "step": 9636 }, { "epoch": 0.8610807067706123, "grad_norm": 0.49697425961494446, "learning_rate": 4.972206684400221e-06, "loss": 0.9342, "step": 9637 }, { "epoch": 0.8611700583018741, "grad_norm": 0.5050958395004272, "learning_rate": 4.9659175663779365e-06, "loss": 0.9076, "step": 9638 }, { "epoch": 0.861259409833136, "grad_norm": 0.5287109613418579, "learning_rate": 4.959632220440097e-06, "loss": 0.8461, "step": 9639 }, { "epoch": 0.8613487613643979, "grad_norm": 0.7550068497657776, "learning_rate": 4.953350647113148e-06, "loss": 0.8207, "step": 9640 }, { "epoch": 0.8614381128956597, "grad_norm": 0.4816301763057709, "learning_rate": 4.94707284692324e-06, "loss": 0.911, "step": 9641 }, { "epoch": 0.8615274644269216, "grad_norm": 0.4800666570663452, "learning_rate": 4.940798820396214e-06, "loss": 0.8894, "step": 9642 }, { "epoch": 0.8616168159581835, "grad_norm": 0.4487608075141907, "learning_rate": 4.934528568057589e-06, "loss": 0.9399, "step": 9643 }, { "epoch": 0.8617061674894454, "grad_norm": 0.5139451026916504, "learning_rate": 4.928262090432556e-06, "loss": 0.9371, "step": 9644 }, { "epoch": 0.8617955190207072, "grad_norm": 0.44007474184036255, "learning_rate": 4.921999388045995e-06, "loss": 0.9442, "step": 9645 }, { "epoch": 0.861884870551969, "grad_norm": 0.4355262219905853, "learning_rate": 4.915740461422491e-06, "loss": 0.9202, "step": 9646 }, { "epoch": 0.8619742220832309, "grad_norm": 0.425823450088501, "learning_rate": 4.909485311086281e-06, "loss": 1.0389, "step": 9647 }, { "epoch": 0.8620635736144928, "grad_norm": 0.5076099038124084, "learning_rate": 4.903233937561308e-06, "loss": 0.9965, "step": 9648 }, { "epoch": 0.8621529251457547, "grad_norm": 0.5130895972251892, "learning_rate": 4.896986341371201e-06, "loss": 0.8541, "step": 9649 }, { "epoch": 0.8622422766770166, "grad_norm": 0.4927546977996826, "learning_rate": 4.890742523039238e-06, "loss": 0.8913, "step": 9650 }, { "epoch": 0.8623316282082785, "grad_norm": 0.6410254240036011, "learning_rate": 4.884502483088421e-06, "loss": 0.8739, "step": 9651 }, { "epoch": 0.8624209797395402, "grad_norm": 0.4251442849636078, "learning_rate": 4.8782662220414225e-06, "loss": 0.9444, "step": 9652 }, { "epoch": 0.8625103312708021, "grad_norm": 0.46380364894866943, "learning_rate": 4.872033740420584e-06, "loss": 0.9366, "step": 9653 }, { "epoch": 0.862599682802064, "grad_norm": 0.4519484043121338, "learning_rate": 4.86580503874795e-06, "loss": 0.8937, "step": 9654 }, { "epoch": 0.8626890343333259, "grad_norm": 0.6642407178878784, "learning_rate": 4.8595801175452425e-06, "loss": 0.8715, "step": 9655 }, { "epoch": 0.8627783858645878, "grad_norm": 0.6670805215835571, "learning_rate": 4.853358977333866e-06, "loss": 0.8974, "step": 9656 }, { "epoch": 0.8628677373958497, "grad_norm": 0.4275446832180023, "learning_rate": 4.847141618634898e-06, "loss": 0.9749, "step": 9657 }, { "epoch": 0.8629570889271115, "grad_norm": 0.4982154369354248, "learning_rate": 4.8409280419691176e-06, "loss": 0.9995, "step": 9658 }, { "epoch": 0.8630464404583733, "grad_norm": 0.47910749912261963, "learning_rate": 4.834718247856978e-06, "loss": 0.944, "step": 9659 }, { "epoch": 0.8631357919896352, "grad_norm": 0.633050799369812, "learning_rate": 4.828512236818611e-06, "loss": 0.863, "step": 9660 }, { "epoch": 0.8632251435208971, "grad_norm": 0.6047154068946838, "learning_rate": 4.822310009373832e-06, "loss": 0.91, "step": 9661 }, { "epoch": 0.863314495052159, "grad_norm": 0.49586907029151917, "learning_rate": 4.816111566042153e-06, "loss": 0.9588, "step": 9662 }, { "epoch": 0.8634038465834208, "grad_norm": 0.5318160057067871, "learning_rate": 4.809916907342754e-06, "loss": 0.8139, "step": 9663 }, { "epoch": 0.8634931981146827, "grad_norm": 0.5686520338058472, "learning_rate": 4.803726033794509e-06, "loss": 0.9418, "step": 9664 }, { "epoch": 0.8635825496459446, "grad_norm": 0.5216206908226013, "learning_rate": 4.797538945915958e-06, "loss": 0.9086, "step": 9665 }, { "epoch": 0.8636719011772064, "grad_norm": 0.4904700815677643, "learning_rate": 4.791355644225354e-06, "loss": 0.93, "step": 9666 }, { "epoch": 0.8637612527084683, "grad_norm": 0.4535919725894928, "learning_rate": 4.7851761292405985e-06, "loss": 0.924, "step": 9667 }, { "epoch": 0.8638506042397301, "grad_norm": 0.5180773138999939, "learning_rate": 4.7790004014793e-06, "loss": 0.9589, "step": 9668 }, { "epoch": 0.863939955770992, "grad_norm": 0.4436119496822357, "learning_rate": 4.772828461458745e-06, "loss": 0.9582, "step": 9669 }, { "epoch": 0.8640293073022539, "grad_norm": 0.41180846095085144, "learning_rate": 4.766660309695886e-06, "loss": 0.9333, "step": 9670 }, { "epoch": 0.8641186588335158, "grad_norm": 0.4437258243560791, "learning_rate": 4.7604959467073774e-06, "loss": 0.9341, "step": 9671 }, { "epoch": 0.8642080103647777, "grad_norm": 0.5144766569137573, "learning_rate": 4.754335373009555e-06, "loss": 0.9834, "step": 9672 }, { "epoch": 0.8642973618960395, "grad_norm": 0.43276622891426086, "learning_rate": 4.7481785891184295e-06, "loss": 0.9718, "step": 9673 }, { "epoch": 0.8643867134273013, "grad_norm": 0.5187088847160339, "learning_rate": 4.7420255955496925e-06, "loss": 0.9335, "step": 9674 }, { "epoch": 0.8644760649585632, "grad_norm": 0.5164356827735901, "learning_rate": 4.735876392818727e-06, "loss": 0.9012, "step": 9675 }, { "epoch": 0.8645654164898251, "grad_norm": 0.49884214997291565, "learning_rate": 4.729730981440611e-06, "loss": 0.9389, "step": 9676 }, { "epoch": 0.864654768021087, "grad_norm": 0.5525695085525513, "learning_rate": 4.723589361930053e-06, "loss": 0.9312, "step": 9677 }, { "epoch": 0.8647441195523489, "grad_norm": 0.4606095850467682, "learning_rate": 4.717451534801504e-06, "loss": 0.9609, "step": 9678 }, { "epoch": 0.8648334710836107, "grad_norm": 0.513602077960968, "learning_rate": 4.711317500569068e-06, "loss": 0.9551, "step": 9679 }, { "epoch": 0.8649228226148725, "grad_norm": 0.4917820990085602, "learning_rate": 4.70518725974653e-06, "loss": 0.878, "step": 9680 }, { "epoch": 0.8650121741461344, "grad_norm": 0.49893245100975037, "learning_rate": 4.699060812847378e-06, "loss": 0.9257, "step": 9681 }, { "epoch": 0.8651015256773963, "grad_norm": 0.5214920043945312, "learning_rate": 4.692938160384747e-06, "loss": 0.8962, "step": 9682 }, { "epoch": 0.8651908772086582, "grad_norm": 0.5985978841781616, "learning_rate": 4.686819302871481e-06, "loss": 0.8617, "step": 9683 }, { "epoch": 0.86528022873992, "grad_norm": 0.47699883580207825, "learning_rate": 4.6807042408201e-06, "loss": 0.9344, "step": 9684 }, { "epoch": 0.8653695802711819, "grad_norm": 0.4804041087627411, "learning_rate": 4.674592974742814e-06, "loss": 0.9556, "step": 9685 }, { "epoch": 0.8654589318024437, "grad_norm": 0.5328719019889832, "learning_rate": 4.668485505151498e-06, "loss": 0.9159, "step": 9686 }, { "epoch": 0.8655482833337056, "grad_norm": 0.5171124935150146, "learning_rate": 4.662381832557722e-06, "loss": 0.9119, "step": 9687 }, { "epoch": 0.8656376348649675, "grad_norm": 0.44296491146087646, "learning_rate": 4.65628195747273e-06, "loss": 0.9932, "step": 9688 }, { "epoch": 0.8657269863962294, "grad_norm": 0.5319826602935791, "learning_rate": 4.65018588040746e-06, "loss": 0.9008, "step": 9689 }, { "epoch": 0.8658163379274912, "grad_norm": 0.5790243744850159, "learning_rate": 4.644093601872513e-06, "loss": 0.8628, "step": 9690 }, { "epoch": 0.8659056894587531, "grad_norm": 0.46617162227630615, "learning_rate": 4.638005122378181e-06, "loss": 0.9073, "step": 9691 }, { "epoch": 0.865995040990015, "grad_norm": 0.4653133451938629, "learning_rate": 4.631920442434446e-06, "loss": 0.9665, "step": 9692 }, { "epoch": 0.8660843925212768, "grad_norm": 0.4677208662033081, "learning_rate": 4.625839562550965e-06, "loss": 0.8954, "step": 9693 }, { "epoch": 0.8661737440525387, "grad_norm": 0.5210303664207458, "learning_rate": 4.619762483237072e-06, "loss": 0.9758, "step": 9694 }, { "epoch": 0.8662630955838005, "grad_norm": 0.5137758255004883, "learning_rate": 4.613689205001792e-06, "loss": 0.8698, "step": 9695 }, { "epoch": 0.8663524471150624, "grad_norm": 0.45305004715919495, "learning_rate": 4.607619728353818e-06, "loss": 0.9656, "step": 9696 }, { "epoch": 0.8664417986463243, "grad_norm": 0.5791176557540894, "learning_rate": 4.601554053801549e-06, "loss": 0.8502, "step": 9697 }, { "epoch": 0.8665311501775862, "grad_norm": 0.40173932909965515, "learning_rate": 4.595492181853034e-06, "loss": 0.9574, "step": 9698 }, { "epoch": 0.8666205017088481, "grad_norm": 0.4410465955734253, "learning_rate": 4.589434113016039e-06, "loss": 0.9465, "step": 9699 }, { "epoch": 0.8667098532401099, "grad_norm": 0.49588507413864136, "learning_rate": 4.583379847797964e-06, "loss": 0.9517, "step": 9700 }, { "epoch": 0.8667992047713717, "grad_norm": 0.46362167596817017, "learning_rate": 4.577329386705942e-06, "loss": 0.9281, "step": 9701 }, { "epoch": 0.8668885563026336, "grad_norm": 0.5212817192077637, "learning_rate": 4.571282730246745e-06, "loss": 0.9243, "step": 9702 }, { "epoch": 0.8669779078338955, "grad_norm": 0.45512816309928894, "learning_rate": 4.565239878926863e-06, "loss": 0.9509, "step": 9703 }, { "epoch": 0.8670672593651574, "grad_norm": 0.5159767866134644, "learning_rate": 4.5592008332524364e-06, "loss": 0.8746, "step": 9704 }, { "epoch": 0.8671566108964193, "grad_norm": 0.4418531656265259, "learning_rate": 4.553165593729303e-06, "loss": 1.0525, "step": 9705 }, { "epoch": 0.8672459624276811, "grad_norm": 0.5434200763702393, "learning_rate": 4.547134160862981e-06, "loss": 0.8959, "step": 9706 }, { "epoch": 0.8673353139589429, "grad_norm": 0.4653039276599884, "learning_rate": 4.541106535158668e-06, "loss": 0.9446, "step": 9707 }, { "epoch": 0.8674246654902048, "grad_norm": 0.46755141019821167, "learning_rate": 4.5350827171212366e-06, "loss": 0.8746, "step": 9708 }, { "epoch": 0.8675140170214667, "grad_norm": 0.5309289693832397, "learning_rate": 4.529062707255261e-06, "loss": 0.9246, "step": 9709 }, { "epoch": 0.8676033685527286, "grad_norm": 0.5540649890899658, "learning_rate": 4.5230465060649595e-06, "loss": 0.8565, "step": 9710 }, { "epoch": 0.8676927200839905, "grad_norm": 0.5240215063095093, "learning_rate": 4.517034114054258e-06, "loss": 0.9513, "step": 9711 }, { "epoch": 0.8677820716152523, "grad_norm": 0.4945807158946991, "learning_rate": 4.5110255317267704e-06, "loss": 1.0085, "step": 9712 }, { "epoch": 0.8678714231465142, "grad_norm": 0.446004182100296, "learning_rate": 4.505020759585765e-06, "loss": 0.9658, "step": 9713 }, { "epoch": 0.867960774677776, "grad_norm": 0.4994924068450928, "learning_rate": 4.499019798134224e-06, "loss": 0.9725, "step": 9714 }, { "epoch": 0.8680501262090379, "grad_norm": 0.42142254114151, "learning_rate": 4.493022647874773e-06, "loss": 0.9469, "step": 9715 }, { "epoch": 0.8681394777402998, "grad_norm": 0.49049112200737, "learning_rate": 4.487029309309748e-06, "loss": 0.9365, "step": 9716 }, { "epoch": 0.8682288292715616, "grad_norm": 0.5305113792419434, "learning_rate": 4.481039782941143e-06, "loss": 0.9549, "step": 9717 }, { "epoch": 0.8683181808028235, "grad_norm": 0.5698569416999817, "learning_rate": 4.4750540692706625e-06, "loss": 0.9227, "step": 9718 }, { "epoch": 0.8684075323340854, "grad_norm": 0.5504754185676575, "learning_rate": 4.469072168799659e-06, "loss": 1.002, "step": 9719 }, { "epoch": 0.8684968838653473, "grad_norm": 0.4171115756034851, "learning_rate": 4.4630940820291955e-06, "loss": 0.9375, "step": 9720 }, { "epoch": 0.8685862353966091, "grad_norm": 0.5085521340370178, "learning_rate": 4.4571198094599866e-06, "loss": 0.8873, "step": 9721 }, { "epoch": 0.868675586927871, "grad_norm": 0.3779529929161072, "learning_rate": 4.451149351592437e-06, "loss": 1.0097, "step": 9722 }, { "epoch": 0.8687649384591328, "grad_norm": 0.4312131702899933, "learning_rate": 4.445182708926654e-06, "loss": 0.9988, "step": 9723 }, { "epoch": 0.8688542899903947, "grad_norm": 0.578957200050354, "learning_rate": 4.439219881962392e-06, "loss": 0.8544, "step": 9724 }, { "epoch": 0.8689436415216566, "grad_norm": 0.47734835743904114, "learning_rate": 4.433260871199113e-06, "loss": 0.9883, "step": 9725 }, { "epoch": 0.8690329930529185, "grad_norm": 0.5002614259719849, "learning_rate": 4.42730567713594e-06, "loss": 0.9303, "step": 9726 }, { "epoch": 0.8691223445841804, "grad_norm": 0.4868052303791046, "learning_rate": 4.4213543002716904e-06, "loss": 0.9285, "step": 9727 }, { "epoch": 0.8692116961154421, "grad_norm": 0.4923020005226135, "learning_rate": 4.415406741104844e-06, "loss": 0.971, "step": 9728 }, { "epoch": 0.869301047646704, "grad_norm": 0.4633644223213196, "learning_rate": 4.409463000133584e-06, "loss": 0.9052, "step": 9729 }, { "epoch": 0.8693903991779659, "grad_norm": 0.5671141147613525, "learning_rate": 4.40352307785577e-06, "loss": 0.9258, "step": 9730 }, { "epoch": 0.8694797507092278, "grad_norm": 0.40942826867103577, "learning_rate": 4.397586974768908e-06, "loss": 0.9291, "step": 9731 }, { "epoch": 0.8695691022404897, "grad_norm": 0.38688722252845764, "learning_rate": 4.391654691370229e-06, "loss": 0.9367, "step": 9732 }, { "epoch": 0.8696584537717515, "grad_norm": 0.43114298582077026, "learning_rate": 4.385726228156617e-06, "loss": 0.9637, "step": 9733 }, { "epoch": 0.8697478053030134, "grad_norm": 0.4530818462371826, "learning_rate": 4.379801585624643e-06, "loss": 0.9887, "step": 9734 }, { "epoch": 0.8698371568342752, "grad_norm": 0.582784116268158, "learning_rate": 4.373880764270566e-06, "loss": 0.9341, "step": 9735 }, { "epoch": 0.8699265083655371, "grad_norm": 0.5109930634498596, "learning_rate": 4.367963764590311e-06, "loss": 0.9377, "step": 9736 }, { "epoch": 0.870015859896799, "grad_norm": 0.5649825930595398, "learning_rate": 4.362050587079497e-06, "loss": 0.9036, "step": 9737 }, { "epoch": 0.8701052114280609, "grad_norm": 0.6133013963699341, "learning_rate": 4.356141232233413e-06, "loss": 0.8088, "step": 9738 }, { "epoch": 0.8701945629593227, "grad_norm": 0.6256003379821777, "learning_rate": 4.350235700547028e-06, "loss": 0.8784, "step": 9739 }, { "epoch": 0.8702839144905846, "grad_norm": 0.5290203094482422, "learning_rate": 4.344333992515004e-06, "loss": 0.8624, "step": 9740 }, { "epoch": 0.8703732660218465, "grad_norm": 0.44871601462364197, "learning_rate": 4.338436108631649e-06, "loss": 0.9352, "step": 9741 }, { "epoch": 0.8704626175531083, "grad_norm": 0.6028452515602112, "learning_rate": 4.332542049390992e-06, "loss": 0.8648, "step": 9742 }, { "epoch": 0.8705519690843702, "grad_norm": 0.4009053409099579, "learning_rate": 4.3266518152867145e-06, "loss": 0.9586, "step": 9743 }, { "epoch": 0.870641320615632, "grad_norm": 0.5013288259506226, "learning_rate": 4.320765406812194e-06, "loss": 0.8975, "step": 9744 }, { "epoch": 0.8707306721468939, "grad_norm": 0.5538919568061829, "learning_rate": 4.314882824460475e-06, "loss": 1.09, "step": 9745 }, { "epoch": 0.8708200236781558, "grad_norm": 0.4379560947418213, "learning_rate": 4.309004068724298e-06, "loss": 0.9737, "step": 9746 }, { "epoch": 0.8709093752094177, "grad_norm": 0.47535446286201477, "learning_rate": 4.303129140096052e-06, "loss": 0.9569, "step": 9747 }, { "epoch": 0.8709987267406795, "grad_norm": 0.5819507837295532, "learning_rate": 4.297258039067831e-06, "loss": 0.8848, "step": 9748 }, { "epoch": 0.8710880782719413, "grad_norm": 0.4755156934261322, "learning_rate": 4.291390766131409e-06, "loss": 0.8717, "step": 9749 }, { "epoch": 0.8711774298032032, "grad_norm": 0.605339527130127, "learning_rate": 4.285527321778232e-06, "loss": 0.9916, "step": 9750 }, { "epoch": 0.8712667813344651, "grad_norm": 0.6067926287651062, "learning_rate": 4.279667706499424e-06, "loss": 0.9132, "step": 9751 }, { "epoch": 0.871356132865727, "grad_norm": 0.5548110604286194, "learning_rate": 4.273811920785786e-06, "loss": 0.8592, "step": 9752 }, { "epoch": 0.8714454843969889, "grad_norm": 0.533915638923645, "learning_rate": 4.2679599651278045e-06, "loss": 0.9079, "step": 9753 }, { "epoch": 0.8715348359282508, "grad_norm": 0.5274013876914978, "learning_rate": 4.262111840015642e-06, "loss": 0.9541, "step": 9754 }, { "epoch": 0.8716241874595125, "grad_norm": 0.4624837636947632, "learning_rate": 4.256267545939147e-06, "loss": 0.8876, "step": 9755 }, { "epoch": 0.8717135389907744, "grad_norm": 0.5232068300247192, "learning_rate": 4.250427083387837e-06, "loss": 0.9834, "step": 9756 }, { "epoch": 0.8718028905220363, "grad_norm": 0.666278600692749, "learning_rate": 4.244590452850916e-06, "loss": 0.8801, "step": 9757 }, { "epoch": 0.8718922420532982, "grad_norm": 0.49066150188446045, "learning_rate": 4.2387576548172605e-06, "loss": 0.907, "step": 9758 }, { "epoch": 0.8719815935845601, "grad_norm": 0.599941074848175, "learning_rate": 4.232928689775428e-06, "loss": 0.9306, "step": 9759 }, { "epoch": 0.872070945115822, "grad_norm": 0.5583867430686951, "learning_rate": 4.227103558213674e-06, "loss": 0.8552, "step": 9760 }, { "epoch": 0.8721602966470838, "grad_norm": 0.5444775819778442, "learning_rate": 4.221282260619891e-06, "loss": 0.9385, "step": 9761 }, { "epoch": 0.8722496481783456, "grad_norm": 0.5456721782684326, "learning_rate": 4.215464797481683e-06, "loss": 0.8826, "step": 9762 }, { "epoch": 0.8723389997096075, "grad_norm": 0.5117126107215881, "learning_rate": 4.2096511692863275e-06, "loss": 0.9481, "step": 9763 }, { "epoch": 0.8724283512408694, "grad_norm": 0.5069064497947693, "learning_rate": 4.203841376520773e-06, "loss": 0.9362, "step": 9764 }, { "epoch": 0.8725177027721313, "grad_norm": 0.5353370904922485, "learning_rate": 4.198035419671658e-06, "loss": 0.8717, "step": 9765 }, { "epoch": 0.8726070543033931, "grad_norm": 0.4016374945640564, "learning_rate": 4.19223329922529e-06, "loss": 0.8964, "step": 9766 }, { "epoch": 0.872696405834655, "grad_norm": 0.4020116627216339, "learning_rate": 4.186435015667661e-06, "loss": 1.0015, "step": 9767 }, { "epoch": 0.8727857573659169, "grad_norm": 0.6577222347259521, "learning_rate": 4.180640569484434e-06, "loss": 0.9153, "step": 9768 }, { "epoch": 0.8728751088971787, "grad_norm": 0.5189597010612488, "learning_rate": 4.17484996116096e-06, "loss": 0.903, "step": 9769 }, { "epoch": 0.8729644604284406, "grad_norm": 0.44712257385253906, "learning_rate": 4.169063191182271e-06, "loss": 0.9051, "step": 9770 }, { "epoch": 0.8730538119597024, "grad_norm": 0.497765451669693, "learning_rate": 4.163280260033053e-06, "loss": 0.9542, "step": 9771 }, { "epoch": 0.8731431634909643, "grad_norm": 0.4439071714878082, "learning_rate": 4.157501168197703e-06, "loss": 0.9827, "step": 9772 }, { "epoch": 0.8732325150222262, "grad_norm": 0.4466901421546936, "learning_rate": 4.1517259161602705e-06, "loss": 0.939, "step": 9773 }, { "epoch": 0.8733218665534881, "grad_norm": 0.5371690988540649, "learning_rate": 4.145954504404498e-06, "loss": 0.9615, "step": 9774 }, { "epoch": 0.87341121808475, "grad_norm": 0.4514279067516327, "learning_rate": 4.140186933413809e-06, "loss": 0.9192, "step": 9775 }, { "epoch": 0.8735005696160117, "grad_norm": 0.47442543506622314, "learning_rate": 4.134423203671295e-06, "loss": 0.9659, "step": 9776 }, { "epoch": 0.8735899211472736, "grad_norm": 0.532922625541687, "learning_rate": 4.128663315659725e-06, "loss": 0.9492, "step": 9777 }, { "epoch": 0.8736792726785355, "grad_norm": 0.6389763355255127, "learning_rate": 4.122907269861559e-06, "loss": 0.8919, "step": 9778 }, { "epoch": 0.8737686242097974, "grad_norm": 0.4954306483268738, "learning_rate": 4.117155066758938e-06, "loss": 1.0198, "step": 9779 }, { "epoch": 0.8738579757410593, "grad_norm": 0.46348780393600464, "learning_rate": 4.111406706833637e-06, "loss": 0.9218, "step": 9780 }, { "epoch": 0.8739473272723212, "grad_norm": 0.47658228874206543, "learning_rate": 4.105662190567166e-06, "loss": 0.8703, "step": 9781 }, { "epoch": 0.874036678803583, "grad_norm": 0.5489630103111267, "learning_rate": 4.099921518440686e-06, "loss": 0.9327, "step": 9782 }, { "epoch": 0.8741260303348448, "grad_norm": 0.5429866909980774, "learning_rate": 4.094184690935038e-06, "loss": 0.8676, "step": 9783 }, { "epoch": 0.8742153818661067, "grad_norm": 0.5859069228172302, "learning_rate": 4.088451708530755e-06, "loss": 0.8897, "step": 9784 }, { "epoch": 0.8743047333973686, "grad_norm": 0.5639720559120178, "learning_rate": 4.082722571708008e-06, "loss": 0.9152, "step": 9785 }, { "epoch": 0.8743940849286305, "grad_norm": 0.4945251941680908, "learning_rate": 4.076997280946693e-06, "loss": 0.9027, "step": 9786 }, { "epoch": 0.8744834364598923, "grad_norm": 0.49459564685821533, "learning_rate": 4.071275836726357e-06, "loss": 0.8952, "step": 9787 }, { "epoch": 0.8745727879911542, "grad_norm": 0.5829160213470459, "learning_rate": 4.065558239526241e-06, "loss": 0.9146, "step": 9788 }, { "epoch": 0.8746621395224161, "grad_norm": 0.472232848405838, "learning_rate": 4.059844489825243e-06, "loss": 0.9677, "step": 9789 }, { "epoch": 0.8747514910536779, "grad_norm": 0.4521077275276184, "learning_rate": 4.054134588101965e-06, "loss": 0.9456, "step": 9790 }, { "epoch": 0.8748408425849398, "grad_norm": 0.5067082047462463, "learning_rate": 4.048428534834653e-06, "loss": 0.9167, "step": 9791 }, { "epoch": 0.8749301941162017, "grad_norm": 0.6378511786460876, "learning_rate": 4.042726330501262e-06, "loss": 0.8104, "step": 9792 }, { "epoch": 0.8750195456474635, "grad_norm": 0.483568400144577, "learning_rate": 4.03702797557941e-06, "loss": 0.9183, "step": 9793 }, { "epoch": 0.8751088971787254, "grad_norm": 0.47959351539611816, "learning_rate": 4.031333470546394e-06, "loss": 0.9048, "step": 9794 }, { "epoch": 0.8751982487099873, "grad_norm": 0.5103621482849121, "learning_rate": 4.025642815879188e-06, "loss": 0.8854, "step": 9795 }, { "epoch": 0.8752876002412492, "grad_norm": 0.4524891674518585, "learning_rate": 4.019956012054455e-06, "loss": 1.0298, "step": 9796 }, { "epoch": 0.875376951772511, "grad_norm": 0.4642522633075714, "learning_rate": 4.014273059548512e-06, "loss": 0.9861, "step": 9797 }, { "epoch": 0.8754663033037728, "grad_norm": 0.456893652677536, "learning_rate": 4.0085939588373754e-06, "loss": 0.9152, "step": 9798 }, { "epoch": 0.8755556548350347, "grad_norm": 0.491441547870636, "learning_rate": 4.0029187103967245e-06, "loss": 0.9677, "step": 9799 }, { "epoch": 0.8756450063662966, "grad_norm": 0.4507903754711151, "learning_rate": 3.997247314701935e-06, "loss": 0.9474, "step": 9800 }, { "epoch": 0.8757343578975585, "grad_norm": 0.6348841190338135, "learning_rate": 3.991579772228032e-06, "loss": 0.9522, "step": 9801 }, { "epoch": 0.8758237094288204, "grad_norm": 0.40709996223449707, "learning_rate": 3.985916083449737e-06, "loss": 0.9453, "step": 9802 }, { "epoch": 0.8759130609600823, "grad_norm": 0.46370193362236023, "learning_rate": 3.980256248841441e-06, "loss": 0.9726, "step": 9803 }, { "epoch": 0.876002412491344, "grad_norm": 0.3987545073032379, "learning_rate": 3.974600268877221e-06, "loss": 0.9591, "step": 9804 }, { "epoch": 0.8760917640226059, "grad_norm": 0.5190705060958862, "learning_rate": 3.9689481440308265e-06, "loss": 0.9205, "step": 9805 }, { "epoch": 0.8761811155538678, "grad_norm": 0.4716487228870392, "learning_rate": 3.963299874775678e-06, "loss": 0.9981, "step": 9806 }, { "epoch": 0.8762704670851297, "grad_norm": 0.4962429702281952, "learning_rate": 3.957655461584881e-06, "loss": 0.897, "step": 9807 }, { "epoch": 0.8763598186163916, "grad_norm": 0.6615248322486877, "learning_rate": 3.952014904931217e-06, "loss": 0.8475, "step": 9808 }, { "epoch": 0.8764491701476534, "grad_norm": 0.47163599729537964, "learning_rate": 3.946378205287138e-06, "loss": 0.9074, "step": 9809 }, { "epoch": 0.8765385216789153, "grad_norm": 0.6437599658966064, "learning_rate": 3.940745363124787e-06, "loss": 0.8462, "step": 9810 }, { "epoch": 0.8766278732101771, "grad_norm": 0.5201234221458435, "learning_rate": 3.935116378915971e-06, "loss": 0.9933, "step": 9811 }, { "epoch": 0.876717224741439, "grad_norm": 0.47189584374427795, "learning_rate": 3.929491253132167e-06, "loss": 0.9501, "step": 9812 }, { "epoch": 0.8768065762727009, "grad_norm": 0.41938087344169617, "learning_rate": 3.923869986244549e-06, "loss": 0.9382, "step": 9813 }, { "epoch": 0.8768959278039627, "grad_norm": 0.42869406938552856, "learning_rate": 3.918252578723952e-06, "loss": 0.9126, "step": 9814 }, { "epoch": 0.8769852793352246, "grad_norm": 0.4569510221481323, "learning_rate": 3.912639031040899e-06, "loss": 0.9354, "step": 9815 }, { "epoch": 0.8770746308664865, "grad_norm": 0.46077507734298706, "learning_rate": 3.907029343665586e-06, "loss": 0.9599, "step": 9816 }, { "epoch": 0.8771639823977483, "grad_norm": 0.5191541314125061, "learning_rate": 3.901423517067887e-06, "loss": 0.8914, "step": 9817 }, { "epoch": 0.8772533339290102, "grad_norm": 0.48782411217689514, "learning_rate": 3.895821551717338e-06, "loss": 0.8774, "step": 9818 }, { "epoch": 0.877342685460272, "grad_norm": 0.42482325434684753, "learning_rate": 3.890223448083163e-06, "loss": 0.9598, "step": 9819 }, { "epoch": 0.8774320369915339, "grad_norm": 0.3945348858833313, "learning_rate": 3.884629206634277e-06, "loss": 0.9229, "step": 9820 }, { "epoch": 0.8775213885227958, "grad_norm": 0.41309240460395813, "learning_rate": 3.879038827839255e-06, "loss": 0.975, "step": 9821 }, { "epoch": 0.8776107400540577, "grad_norm": 0.5235944390296936, "learning_rate": 3.873452312166337e-06, "loss": 0.8413, "step": 9822 }, { "epoch": 0.8777000915853196, "grad_norm": 0.4890744388103485, "learning_rate": 3.867869660083456e-06, "loss": 0.8615, "step": 9823 }, { "epoch": 0.8777894431165814, "grad_norm": 0.46650564670562744, "learning_rate": 3.862290872058233e-06, "loss": 0.9691, "step": 9824 }, { "epoch": 0.8778787946478432, "grad_norm": 0.46951133012771606, "learning_rate": 3.856715948557938e-06, "loss": 0.9661, "step": 9825 }, { "epoch": 0.8779681461791051, "grad_norm": 0.5398757457733154, "learning_rate": 3.851144890049535e-06, "loss": 0.9167, "step": 9826 }, { "epoch": 0.878057497710367, "grad_norm": 0.49195584654808044, "learning_rate": 3.845577696999659e-06, "loss": 0.9914, "step": 9827 }, { "epoch": 0.8781468492416289, "grad_norm": 0.5577439665794373, "learning_rate": 3.840014369874617e-06, "loss": 0.856, "step": 9828 }, { "epoch": 0.8782362007728908, "grad_norm": 0.5505506992340088, "learning_rate": 3.834454909140406e-06, "loss": 0.8946, "step": 9829 }, { "epoch": 0.8783255523041527, "grad_norm": 0.4931388795375824, "learning_rate": 3.828899315262685e-06, "loss": 0.9288, "step": 9830 }, { "epoch": 0.8784149038354144, "grad_norm": 0.6808313131332397, "learning_rate": 3.823347588706805e-06, "loss": 0.9366, "step": 9831 }, { "epoch": 0.8785042553666763, "grad_norm": 0.6877908706665039, "learning_rate": 3.81779972993776e-06, "loss": 0.8903, "step": 9832 }, { "epoch": 0.8785936068979382, "grad_norm": 0.5493754744529724, "learning_rate": 3.812255739420256e-06, "loss": 0.9696, "step": 9833 }, { "epoch": 0.8786829584292001, "grad_norm": 0.48021358251571655, "learning_rate": 3.8067156176186616e-06, "loss": 0.9312, "step": 9834 }, { "epoch": 0.878772309960462, "grad_norm": 0.46737149357795715, "learning_rate": 3.8011793649970207e-06, "loss": 0.9433, "step": 9835 }, { "epoch": 0.8788616614917238, "grad_norm": 0.4425142705440521, "learning_rate": 3.7956469820190465e-06, "loss": 0.9032, "step": 9836 }, { "epoch": 0.8789510130229857, "grad_norm": 0.49997785687446594, "learning_rate": 3.790118469148146e-06, "loss": 0.8658, "step": 9837 }, { "epoch": 0.8790403645542475, "grad_norm": 0.47397980093955994, "learning_rate": 3.7845938268473823e-06, "loss": 0.9049, "step": 9838 }, { "epoch": 0.8791297160855094, "grad_norm": 0.5154423713684082, "learning_rate": 3.7790730555795075e-06, "loss": 0.956, "step": 9839 }, { "epoch": 0.8792190676167713, "grad_norm": 0.5313198566436768, "learning_rate": 3.7735561558069455e-06, "loss": 0.915, "step": 9840 }, { "epoch": 0.8793084191480331, "grad_norm": 0.499161034822464, "learning_rate": 3.7680431279917994e-06, "loss": 0.8453, "step": 9841 }, { "epoch": 0.879397770679295, "grad_norm": 0.4321475028991699, "learning_rate": 3.762533972595833e-06, "loss": 0.9776, "step": 9842 }, { "epoch": 0.8794871222105569, "grad_norm": 0.52239590883255, "learning_rate": 3.7570286900804998e-06, "loss": 0.9106, "step": 9843 }, { "epoch": 0.8795764737418188, "grad_norm": 0.5335208177566528, "learning_rate": 3.7515272809069303e-06, "loss": 0.9377, "step": 9844 }, { "epoch": 0.8796658252730806, "grad_norm": 0.5441358089447021, "learning_rate": 3.746029745535923e-06, "loss": 0.8779, "step": 9845 }, { "epoch": 0.8797551768043425, "grad_norm": 0.5699974894523621, "learning_rate": 3.7405360844279537e-06, "loss": 0.8933, "step": 9846 }, { "epoch": 0.8798445283356043, "grad_norm": 0.42555707693099976, "learning_rate": 3.735046298043182e-06, "loss": 0.9373, "step": 9847 }, { "epoch": 0.8799338798668662, "grad_norm": 0.5089072585105896, "learning_rate": 3.7295603868414297e-06, "loss": 0.863, "step": 9848 }, { "epoch": 0.8800232313981281, "grad_norm": 0.45135679841041565, "learning_rate": 3.724078351282212e-06, "loss": 0.9436, "step": 9849 }, { "epoch": 0.88011258292939, "grad_norm": 0.5915647745132446, "learning_rate": 3.7186001918246893e-06, "loss": 0.8365, "step": 9850 }, { "epoch": 0.8802019344606519, "grad_norm": 0.49184906482696533, "learning_rate": 3.713125908927728e-06, "loss": 0.8851, "step": 9851 }, { "epoch": 0.8802912859919136, "grad_norm": 0.4521387815475464, "learning_rate": 3.7076555030498506e-06, "loss": 0.8955, "step": 9852 }, { "epoch": 0.8803806375231755, "grad_norm": 0.43427813053131104, "learning_rate": 3.7021889746492676e-06, "loss": 0.9594, "step": 9853 }, { "epoch": 0.8804699890544374, "grad_norm": 0.4693211019039154, "learning_rate": 3.6967263241838636e-06, "loss": 0.8923, "step": 9854 }, { "epoch": 0.8805593405856993, "grad_norm": 0.4870004653930664, "learning_rate": 3.691267552111183e-06, "loss": 0.9082, "step": 9855 }, { "epoch": 0.8806486921169612, "grad_norm": 0.4808718264102936, "learning_rate": 3.6858126588884544e-06, "loss": 0.9936, "step": 9856 }, { "epoch": 0.8807380436482231, "grad_norm": 0.602287232875824, "learning_rate": 3.6803616449725964e-06, "loss": 0.8709, "step": 9857 }, { "epoch": 0.8808273951794849, "grad_norm": 0.549919843673706, "learning_rate": 3.6749145108201766e-06, "loss": 0.9254, "step": 9858 }, { "epoch": 0.8809167467107467, "grad_norm": 0.6031754612922668, "learning_rate": 3.6694712568874577e-06, "loss": 0.8894, "step": 9859 }, { "epoch": 0.8810060982420086, "grad_norm": 0.6014052033424377, "learning_rate": 3.66403188363037e-06, "loss": 0.9024, "step": 9860 }, { "epoch": 0.8810954497732705, "grad_norm": 0.4804718792438507, "learning_rate": 3.6585963915045264e-06, "loss": 0.9533, "step": 9861 }, { "epoch": 0.8811848013045324, "grad_norm": 0.5219516754150391, "learning_rate": 3.6531647809651904e-06, "loss": 0.9346, "step": 9862 }, { "epoch": 0.8812741528357942, "grad_norm": 0.4467054307460785, "learning_rate": 3.647737052467326e-06, "loss": 0.969, "step": 9863 }, { "epoch": 0.8813635043670561, "grad_norm": 0.4762718081474304, "learning_rate": 3.642313206465564e-06, "loss": 0.9917, "step": 9864 }, { "epoch": 0.881452855898318, "grad_norm": 0.5307314395904541, "learning_rate": 3.6368932434142076e-06, "loss": 0.9542, "step": 9865 }, { "epoch": 0.8815422074295798, "grad_norm": 0.38814452290534973, "learning_rate": 3.631477163767233e-06, "loss": 0.9693, "step": 9866 }, { "epoch": 0.8816315589608417, "grad_norm": 0.4484859108924866, "learning_rate": 3.6260649679783044e-06, "loss": 0.9018, "step": 9867 }, { "epoch": 0.8817209104921035, "grad_norm": 0.6517726182937622, "learning_rate": 3.620656656500743e-06, "loss": 0.7952, "step": 9868 }, { "epoch": 0.8818102620233654, "grad_norm": 0.5478501915931702, "learning_rate": 3.615252229787558e-06, "loss": 0.8884, "step": 9869 }, { "epoch": 0.8818996135546273, "grad_norm": 0.5623330473899841, "learning_rate": 3.6098516882914213e-06, "loss": 0.9665, "step": 9870 }, { "epoch": 0.8819889650858892, "grad_norm": 0.5018959641456604, "learning_rate": 3.6044550324646987e-06, "loss": 0.9083, "step": 9871 }, { "epoch": 0.8820783166171511, "grad_norm": 0.3743535280227661, "learning_rate": 3.599062262759395e-06, "loss": 0.9624, "step": 9872 }, { "epoch": 0.8821676681484129, "grad_norm": 0.6572086215019226, "learning_rate": 3.5936733796272327e-06, "loss": 0.8587, "step": 9873 }, { "epoch": 0.8822570196796747, "grad_norm": 0.49752864241600037, "learning_rate": 3.588288383519578e-06, "loss": 0.8681, "step": 9874 }, { "epoch": 0.8823463712109366, "grad_norm": 0.5613961219787598, "learning_rate": 3.5829072748874813e-06, "loss": 0.9468, "step": 9875 }, { "epoch": 0.8824357227421985, "grad_norm": 0.5418183207511902, "learning_rate": 3.577530054181677e-06, "loss": 0.8794, "step": 9876 }, { "epoch": 0.8825250742734604, "grad_norm": 0.5422852039337158, "learning_rate": 3.5721567218525542e-06, "loss": 0.9458, "step": 9877 }, { "epoch": 0.8826144258047223, "grad_norm": 0.42068326473236084, "learning_rate": 3.566787278350192e-06, "loss": 0.9237, "step": 9878 }, { "epoch": 0.882703777335984, "grad_norm": 0.4543195068836212, "learning_rate": 3.5614217241243363e-06, "loss": 0.9494, "step": 9879 }, { "epoch": 0.8827931288672459, "grad_norm": 0.5260372757911682, "learning_rate": 3.556060059624411e-06, "loss": 0.9434, "step": 9880 }, { "epoch": 0.8828824803985078, "grad_norm": 0.5226284861564636, "learning_rate": 3.550702285299523e-06, "loss": 0.9078, "step": 9881 }, { "epoch": 0.8829718319297697, "grad_norm": 0.4836858808994293, "learning_rate": 3.5453484015984253e-06, "loss": 0.9321, "step": 9882 }, { "epoch": 0.8830611834610316, "grad_norm": 0.47180086374282837, "learning_rate": 3.539998408969569e-06, "loss": 0.885, "step": 9883 }, { "epoch": 0.8831505349922935, "grad_norm": 0.473090261220932, "learning_rate": 3.5346523078610748e-06, "loss": 0.9625, "step": 9884 }, { "epoch": 0.8832398865235553, "grad_norm": 0.4888576567173004, "learning_rate": 3.529310098720734e-06, "loss": 0.8954, "step": 9885 }, { "epoch": 0.8833292380548171, "grad_norm": 0.46975868940353394, "learning_rate": 3.5239717819960104e-06, "loss": 1.0308, "step": 9886 }, { "epoch": 0.883418589586079, "grad_norm": 0.4399878680706024, "learning_rate": 3.5186373581340636e-06, "loss": 0.9445, "step": 9887 }, { "epoch": 0.8835079411173409, "grad_norm": 0.4216008484363556, "learning_rate": 3.5133068275816806e-06, "loss": 0.9615, "step": 9888 }, { "epoch": 0.8835972926486028, "grad_norm": 0.6720105409622192, "learning_rate": 3.5079801907853648e-06, "loss": 0.8721, "step": 9889 }, { "epoch": 0.8836866441798646, "grad_norm": 0.4864836037158966, "learning_rate": 3.5026574481912767e-06, "loss": 0.9017, "step": 9890 }, { "epoch": 0.8837759957111265, "grad_norm": 0.44451501965522766, "learning_rate": 3.4973386002452535e-06, "loss": 0.9123, "step": 9891 }, { "epoch": 0.8838653472423884, "grad_norm": 0.4861214756965637, "learning_rate": 3.492023647392817e-06, "loss": 0.9691, "step": 9892 }, { "epoch": 0.8839546987736502, "grad_norm": 0.5341703295707703, "learning_rate": 3.4867125900791274e-06, "loss": 0.8504, "step": 9893 }, { "epoch": 0.8840440503049121, "grad_norm": 0.4494452178478241, "learning_rate": 3.481405428749057e-06, "loss": 0.962, "step": 9894 }, { "epoch": 0.884133401836174, "grad_norm": 0.5674958229064941, "learning_rate": 3.4761021638471337e-06, "loss": 0.9775, "step": 9895 }, { "epoch": 0.8842227533674358, "grad_norm": 0.480878621339798, "learning_rate": 3.4708027958175625e-06, "loss": 0.8908, "step": 9896 }, { "epoch": 0.8843121048986977, "grad_norm": 0.6041078567504883, "learning_rate": 3.4655073251042226e-06, "loss": 0.8541, "step": 9897 }, { "epoch": 0.8844014564299596, "grad_norm": 0.47140049934387207, "learning_rate": 3.4602157521506638e-06, "loss": 0.9046, "step": 9898 }, { "epoch": 0.8844908079612215, "grad_norm": 0.5176533460617065, "learning_rate": 3.4549280774001158e-06, "loss": 0.9528, "step": 9899 }, { "epoch": 0.8845801594924833, "grad_norm": 0.48900794982910156, "learning_rate": 3.4496443012954795e-06, "loss": 0.904, "step": 9900 }, { "epoch": 0.8846695110237451, "grad_norm": 0.4905250668525696, "learning_rate": 3.444364424279323e-06, "loss": 0.8826, "step": 9901 }, { "epoch": 0.884758862555007, "grad_norm": 0.5473768711090088, "learning_rate": 3.4390884467938978e-06, "loss": 0.9235, "step": 9902 }, { "epoch": 0.8848482140862689, "grad_norm": 0.4095969498157501, "learning_rate": 3.433816369281112e-06, "loss": 0.9529, "step": 9903 }, { "epoch": 0.8849375656175308, "grad_norm": 0.5141059756278992, "learning_rate": 3.428548192182568e-06, "loss": 0.9323, "step": 9904 }, { "epoch": 0.8850269171487927, "grad_norm": 0.5582877993583679, "learning_rate": 3.423283915939529e-06, "loss": 0.9181, "step": 9905 }, { "epoch": 0.8851162686800546, "grad_norm": 0.4510602653026581, "learning_rate": 3.418023540992932e-06, "loss": 1.0325, "step": 9906 }, { "epoch": 0.8852056202113163, "grad_norm": 0.47843095660209656, "learning_rate": 3.41276706778339e-06, "loss": 1.007, "step": 9907 }, { "epoch": 0.8852949717425782, "grad_norm": 0.5486652255058289, "learning_rate": 3.4075144967511963e-06, "loss": 0.9213, "step": 9908 }, { "epoch": 0.8853843232738401, "grad_norm": 0.4576515257358551, "learning_rate": 3.4022658283362985e-06, "loss": 0.9397, "step": 9909 }, { "epoch": 0.885473674805102, "grad_norm": 0.47343116998672485, "learning_rate": 3.397021062978334e-06, "loss": 0.9073, "step": 9910 }, { "epoch": 0.8855630263363639, "grad_norm": 0.5906479358673096, "learning_rate": 3.3917802011166067e-06, "loss": 0.867, "step": 9911 }, { "epoch": 0.8856523778676257, "grad_norm": 0.4365607798099518, "learning_rate": 3.3865432431901046e-06, "loss": 0.9728, "step": 9912 }, { "epoch": 0.8857417293988876, "grad_norm": 0.47159871459007263, "learning_rate": 3.3813101896374653e-06, "loss": 0.9237, "step": 9913 }, { "epoch": 0.8858310809301494, "grad_norm": 0.6291021704673767, "learning_rate": 3.3760810408970113e-06, "loss": 0.9258, "step": 9914 }, { "epoch": 0.8859204324614113, "grad_norm": 0.4011688530445099, "learning_rate": 3.3708557974067523e-06, "loss": 0.9217, "step": 9915 }, { "epoch": 0.8860097839926732, "grad_norm": 0.6337020397186279, "learning_rate": 3.3656344596043442e-06, "loss": 0.9362, "step": 9916 }, { "epoch": 0.886099135523935, "grad_norm": 0.47050267457962036, "learning_rate": 3.3604170279271374e-06, "loss": 0.9049, "step": 9917 }, { "epoch": 0.8861884870551969, "grad_norm": 0.5305570960044861, "learning_rate": 3.3552035028121486e-06, "loss": 0.8664, "step": 9918 }, { "epoch": 0.8862778385864588, "grad_norm": 0.4407976269721985, "learning_rate": 3.3499938846960675e-06, "loss": 0.9272, "step": 9919 }, { "epoch": 0.8863671901177207, "grad_norm": 0.4790654182434082, "learning_rate": 3.3447881740152566e-06, "loss": 0.93, "step": 9920 }, { "epoch": 0.8864565416489825, "grad_norm": 0.5529206395149231, "learning_rate": 3.3395863712057383e-06, "loss": 0.8797, "step": 9921 }, { "epoch": 0.8865458931802443, "grad_norm": 0.6508722901344299, "learning_rate": 3.334388476703226e-06, "loss": 0.8327, "step": 9922 }, { "epoch": 0.8866352447115062, "grad_norm": 0.4920750558376312, "learning_rate": 3.329194490943094e-06, "loss": 0.9091, "step": 9923 }, { "epoch": 0.8867245962427681, "grad_norm": 0.5537749528884888, "learning_rate": 3.32400441436041e-06, "loss": 0.9325, "step": 9924 }, { "epoch": 0.88681394777403, "grad_norm": 0.5373173952102661, "learning_rate": 3.3188182473898767e-06, "loss": 0.8741, "step": 9925 }, { "epoch": 0.8869032993052919, "grad_norm": 0.5053735375404358, "learning_rate": 3.313635990465902e-06, "loss": 0.8817, "step": 9926 }, { "epoch": 0.8869926508365538, "grad_norm": 0.5587336421012878, "learning_rate": 3.3084576440225555e-06, "loss": 0.9233, "step": 9927 }, { "epoch": 0.8870820023678155, "grad_norm": 0.45530253648757935, "learning_rate": 3.3032832084935795e-06, "loss": 0.9825, "step": 9928 }, { "epoch": 0.8871713538990774, "grad_norm": 0.575091540813446, "learning_rate": 3.298112684312382e-06, "loss": 0.8839, "step": 9929 }, { "epoch": 0.8872607054303393, "grad_norm": 0.5958606004714966, "learning_rate": 3.292946071912051e-06, "loss": 0.8694, "step": 9930 }, { "epoch": 0.8873500569616012, "grad_norm": 0.5950548648834229, "learning_rate": 3.2877833717253503e-06, "loss": 0.9257, "step": 9931 }, { "epoch": 0.8874394084928631, "grad_norm": 0.46099838614463806, "learning_rate": 3.282624584184718e-06, "loss": 0.9811, "step": 9932 }, { "epoch": 0.887528760024125, "grad_norm": 0.5730208158493042, "learning_rate": 3.277469709722242e-06, "loss": 0.9349, "step": 9933 }, { "epoch": 0.8876181115553868, "grad_norm": 0.4635668992996216, "learning_rate": 3.2723187487696982e-06, "loss": 0.9519, "step": 9934 }, { "epoch": 0.8877074630866486, "grad_norm": 0.5630105137825012, "learning_rate": 3.267171701758548e-06, "loss": 0.9327, "step": 9935 }, { "epoch": 0.8877968146179105, "grad_norm": 0.43999359011650085, "learning_rate": 3.262028569119896e-06, "loss": 0.9838, "step": 9936 }, { "epoch": 0.8878861661491724, "grad_norm": 0.4146654009819031, "learning_rate": 3.2568893512845477e-06, "loss": 0.9695, "step": 9937 }, { "epoch": 0.8879755176804343, "grad_norm": 0.48017647862434387, "learning_rate": 3.251754048682959e-06, "loss": 0.8955, "step": 9938 }, { "epoch": 0.8880648692116961, "grad_norm": 0.48065900802612305, "learning_rate": 3.2466226617452745e-06, "loss": 0.9756, "step": 9939 }, { "epoch": 0.888154220742958, "grad_norm": 0.5396913290023804, "learning_rate": 3.2414951909012946e-06, "loss": 0.96, "step": 9940 }, { "epoch": 0.8882435722742198, "grad_norm": 0.665141224861145, "learning_rate": 3.2363716365804984e-06, "loss": 0.887, "step": 9941 }, { "epoch": 0.8883329238054817, "grad_norm": 0.548674464225769, "learning_rate": 3.2312519992120538e-06, "loss": 0.8511, "step": 9942 }, { "epoch": 0.8884222753367436, "grad_norm": 0.6099333763122559, "learning_rate": 3.226136279224762e-06, "loss": 0.9248, "step": 9943 }, { "epoch": 0.8885116268680054, "grad_norm": 0.5216543078422546, "learning_rate": 3.2210244770471356e-06, "loss": 0.8861, "step": 9944 }, { "epoch": 0.8886009783992673, "grad_norm": 0.4764314591884613, "learning_rate": 3.215916593107332e-06, "loss": 0.945, "step": 9945 }, { "epoch": 0.8886903299305292, "grad_norm": 0.42511335015296936, "learning_rate": 3.2108126278331983e-06, "loss": 0.9197, "step": 9946 }, { "epoch": 0.8887796814617911, "grad_norm": 0.5028632283210754, "learning_rate": 3.2057125816522483e-06, "loss": 0.9817, "step": 9947 }, { "epoch": 0.8888690329930529, "grad_norm": 0.40669888257980347, "learning_rate": 3.2006164549916563e-06, "loss": 1.0339, "step": 9948 }, { "epoch": 0.8889583845243147, "grad_norm": 0.4078708291053772, "learning_rate": 3.195524248278281e-06, "loss": 0.9188, "step": 9949 }, { "epoch": 0.8890477360555766, "grad_norm": 0.45386314392089844, "learning_rate": 3.190435961938654e-06, "loss": 0.9065, "step": 9950 }, { "epoch": 0.8891370875868385, "grad_norm": 0.39312687516212463, "learning_rate": 3.185351596398961e-06, "loss": 0.9465, "step": 9951 }, { "epoch": 0.8892264391181004, "grad_norm": 0.4432995319366455, "learning_rate": 3.1802711520850957e-06, "loss": 0.9804, "step": 9952 }, { "epoch": 0.8893157906493623, "grad_norm": 0.48735445737838745, "learning_rate": 3.1751946294225733e-06, "loss": 0.8804, "step": 9953 }, { "epoch": 0.8894051421806242, "grad_norm": 0.4991060793399811, "learning_rate": 3.1701220288366197e-06, "loss": 0.9357, "step": 9954 }, { "epoch": 0.8894944937118859, "grad_norm": 0.5495728254318237, "learning_rate": 3.165053350752112e-06, "loss": 0.8973, "step": 9955 }, { "epoch": 0.8895838452431478, "grad_norm": 0.5644074082374573, "learning_rate": 3.159988595593616e-06, "loss": 0.8905, "step": 9956 }, { "epoch": 0.8896731967744097, "grad_norm": 0.5330132842063904, "learning_rate": 3.1549277637853593e-06, "loss": 0.8993, "step": 9957 }, { "epoch": 0.8897625483056716, "grad_norm": 0.4881329834461212, "learning_rate": 3.1498708557512246e-06, "loss": 0.9377, "step": 9958 }, { "epoch": 0.8898518998369335, "grad_norm": 0.43445295095443726, "learning_rate": 3.1448178719147957e-06, "loss": 0.9774, "step": 9959 }, { "epoch": 0.8899412513681954, "grad_norm": 0.5099343657493591, "learning_rate": 3.1397688126993065e-06, "loss": 0.8734, "step": 9960 }, { "epoch": 0.8900306028994572, "grad_norm": 0.4378454387187958, "learning_rate": 3.134723678527679e-06, "loss": 0.945, "step": 9961 }, { "epoch": 0.890119954430719, "grad_norm": 0.45414188504219055, "learning_rate": 3.1296824698224924e-06, "loss": 0.9504, "step": 9962 }, { "epoch": 0.8902093059619809, "grad_norm": 0.40855854749679565, "learning_rate": 3.1246451870059977e-06, "loss": 0.9502, "step": 9963 }, { "epoch": 0.8902986574932428, "grad_norm": 0.521061360836029, "learning_rate": 3.1196118305001243e-06, "loss": 0.8663, "step": 9964 }, { "epoch": 0.8903880090245047, "grad_norm": 0.4307143986225128, "learning_rate": 3.114582400726468e-06, "loss": 0.9124, "step": 9965 }, { "epoch": 0.8904773605557665, "grad_norm": 0.5008573532104492, "learning_rate": 3.109556898106297e-06, "loss": 0.9045, "step": 9966 }, { "epoch": 0.8905667120870284, "grad_norm": 0.5884765982627869, "learning_rate": 3.1045353230605535e-06, "loss": 0.9376, "step": 9967 }, { "epoch": 0.8906560636182903, "grad_norm": 0.49903714656829834, "learning_rate": 3.0995176760098445e-06, "loss": 0.9345, "step": 9968 }, { "epoch": 0.8907454151495521, "grad_norm": 0.4943690299987793, "learning_rate": 3.094503957374456e-06, "loss": 0.9906, "step": 9969 }, { "epoch": 0.890834766680814, "grad_norm": 0.5311302542686462, "learning_rate": 3.089494167574336e-06, "loss": 0.9613, "step": 9970 }, { "epoch": 0.8909241182120758, "grad_norm": 0.5208662748336792, "learning_rate": 3.08448830702911e-06, "loss": 0.9179, "step": 9971 }, { "epoch": 0.8910134697433377, "grad_norm": 0.49993789196014404, "learning_rate": 3.0794863761580805e-06, "loss": 0.8284, "step": 9972 }, { "epoch": 0.8911028212745996, "grad_norm": 0.5702754259109497, "learning_rate": 3.074488375380197e-06, "loss": 0.9488, "step": 9973 }, { "epoch": 0.8911921728058615, "grad_norm": 0.41398748755455017, "learning_rate": 3.0694943051140958e-06, "loss": 0.9195, "step": 9974 }, { "epoch": 0.8912815243371234, "grad_norm": 0.46768733859062195, "learning_rate": 3.0645041657780927e-06, "loss": 0.9542, "step": 9975 }, { "epoch": 0.8913708758683851, "grad_norm": 0.47688964009284973, "learning_rate": 3.0595179577901643e-06, "loss": 0.8927, "step": 9976 }, { "epoch": 0.891460227399647, "grad_norm": 0.501356840133667, "learning_rate": 3.054535681567955e-06, "loss": 0.9192, "step": 9977 }, { "epoch": 0.8915495789309089, "grad_norm": 0.44670113921165466, "learning_rate": 3.0495573375287854e-06, "loss": 0.9633, "step": 9978 }, { "epoch": 0.8916389304621708, "grad_norm": 0.5801134705543518, "learning_rate": 3.044582926089645e-06, "loss": 0.8739, "step": 9979 }, { "epoch": 0.8917282819934327, "grad_norm": 0.5927140712738037, "learning_rate": 3.039612447667195e-06, "loss": 0.9109, "step": 9980 }, { "epoch": 0.8918176335246946, "grad_norm": 0.49671000242233276, "learning_rate": 3.034645902677763e-06, "loss": 0.9923, "step": 9981 }, { "epoch": 0.8919069850559564, "grad_norm": 0.46051478385925293, "learning_rate": 3.0296832915373497e-06, "loss": 0.9259, "step": 9982 }, { "epoch": 0.8919963365872182, "grad_norm": 0.5306558609008789, "learning_rate": 3.024724614661639e-06, "loss": 0.913, "step": 9983 }, { "epoch": 0.8920856881184801, "grad_norm": 0.3877553939819336, "learning_rate": 3.0197698724659497e-06, "loss": 1.001, "step": 9984 }, { "epoch": 0.892175039649742, "grad_norm": 0.5105606913566589, "learning_rate": 3.0148190653653096e-06, "loss": 0.9371, "step": 9985 }, { "epoch": 0.8922643911810039, "grad_norm": 0.5007322430610657, "learning_rate": 3.009872193774399e-06, "loss": 0.9932, "step": 9986 }, { "epoch": 0.8923537427122658, "grad_norm": 0.5739557147026062, "learning_rate": 3.0049292581075692e-06, "loss": 0.8694, "step": 9987 }, { "epoch": 0.8924430942435276, "grad_norm": 0.4719444215297699, "learning_rate": 2.9999902587788507e-06, "loss": 1.0219, "step": 9988 }, { "epoch": 0.8925324457747895, "grad_norm": 0.4949089586734772, "learning_rate": 2.9950551962019293e-06, "loss": 0.9235, "step": 9989 }, { "epoch": 0.8926217973060513, "grad_norm": 0.45871758460998535, "learning_rate": 2.990124070790179e-06, "loss": 0.9429, "step": 9990 }, { "epoch": 0.8927111488373132, "grad_norm": 0.5253780484199524, "learning_rate": 2.98519688295662e-06, "loss": 0.939, "step": 9991 }, { "epoch": 0.8928005003685751, "grad_norm": 0.4314812123775482, "learning_rate": 2.9802736331139615e-06, "loss": 0.9584, "step": 9992 }, { "epoch": 0.8928898518998369, "grad_norm": 0.473132848739624, "learning_rate": 2.9753543216745784e-06, "loss": 0.9659, "step": 9993 }, { "epoch": 0.8929792034310988, "grad_norm": 0.4422551989555359, "learning_rate": 2.9704389490505303e-06, "loss": 0.9317, "step": 9994 }, { "epoch": 0.8930685549623607, "grad_norm": 0.5181037783622742, "learning_rate": 2.9655275156535103e-06, "loss": 0.9623, "step": 9995 }, { "epoch": 0.8931579064936226, "grad_norm": 0.5638878345489502, "learning_rate": 2.960620021894911e-06, "loss": 0.908, "step": 9996 }, { "epoch": 0.8932472580248844, "grad_norm": 0.5014554858207703, "learning_rate": 2.955716468185793e-06, "loss": 0.8998, "step": 9997 }, { "epoch": 0.8933366095561462, "grad_norm": 0.46494272351264954, "learning_rate": 2.950816854936872e-06, "loss": 0.9615, "step": 9998 }, { "epoch": 0.8934259610874081, "grad_norm": 0.4722679555416107, "learning_rate": 2.9459211825585475e-06, "loss": 0.9962, "step": 9999 }, { "epoch": 0.89351531261867, "grad_norm": 0.6044051051139832, "learning_rate": 2.941029451460886e-06, "loss": 0.9006, "step": 10000 }, { "epoch": 0.8936046641499319, "grad_norm": 0.4716538190841675, "learning_rate": 2.936141662053621e-06, "loss": 0.9416, "step": 10001 }, { "epoch": 0.8936940156811938, "grad_norm": 0.5190637111663818, "learning_rate": 2.931257814746158e-06, "loss": 0.8654, "step": 10002 }, { "epoch": 0.8937833672124555, "grad_norm": 0.46541985869407654, "learning_rate": 2.926377909947575e-06, "loss": 0.9391, "step": 10003 }, { "epoch": 0.8938727187437174, "grad_norm": 0.46355336904525757, "learning_rate": 2.9215019480666015e-06, "loss": 0.8925, "step": 10004 }, { "epoch": 0.8939620702749793, "grad_norm": 0.5161470770835876, "learning_rate": 2.916629929511666e-06, "loss": 0.8671, "step": 10005 }, { "epoch": 0.8940514218062412, "grad_norm": 0.544810950756073, "learning_rate": 2.911761854690842e-06, "loss": 0.911, "step": 10006 }, { "epoch": 0.8941407733375031, "grad_norm": 0.4915909469127655, "learning_rate": 2.9068977240118867e-06, "loss": 0.8727, "step": 10007 }, { "epoch": 0.894230124868765, "grad_norm": 0.658275306224823, "learning_rate": 2.9020375378822297e-06, "loss": 0.9053, "step": 10008 }, { "epoch": 0.8943194764000268, "grad_norm": 0.48863136768341064, "learning_rate": 2.897181296708951e-06, "loss": 0.8799, "step": 10009 }, { "epoch": 0.8944088279312886, "grad_norm": 0.46332526206970215, "learning_rate": 2.8923290008988193e-06, "loss": 0.9081, "step": 10010 }, { "epoch": 0.8944981794625505, "grad_norm": 0.4719219505786896, "learning_rate": 2.8874806508582652e-06, "loss": 0.9298, "step": 10011 }, { "epoch": 0.8945875309938124, "grad_norm": 0.4817638099193573, "learning_rate": 2.882636246993392e-06, "loss": 0.9128, "step": 10012 }, { "epoch": 0.8946768825250743, "grad_norm": 0.49703267216682434, "learning_rate": 2.877795789709975e-06, "loss": 0.8182, "step": 10013 }, { "epoch": 0.8947662340563362, "grad_norm": 0.4652135372161865, "learning_rate": 2.87295927941344e-06, "loss": 0.9703, "step": 10014 }, { "epoch": 0.894855585587598, "grad_norm": 0.4329572916030884, "learning_rate": 2.868126716508901e-06, "loss": 0.939, "step": 10015 }, { "epoch": 0.8949449371188599, "grad_norm": 0.4363684356212616, "learning_rate": 2.8632981014011463e-06, "loss": 0.8893, "step": 10016 }, { "epoch": 0.8950342886501217, "grad_norm": 0.564899742603302, "learning_rate": 2.8584734344946073e-06, "loss": 0.9229, "step": 10017 }, { "epoch": 0.8951236401813836, "grad_norm": 0.5711920261383057, "learning_rate": 2.853652716193417e-06, "loss": 0.8848, "step": 10018 }, { "epoch": 0.8952129917126455, "grad_norm": 0.4611048996448517, "learning_rate": 2.8488359469013514e-06, "loss": 0.8717, "step": 10019 }, { "epoch": 0.8953023432439073, "grad_norm": 0.7279905676841736, "learning_rate": 2.844023127021872e-06, "loss": 0.7405, "step": 10020 }, { "epoch": 0.8953916947751692, "grad_norm": 0.5130376219749451, "learning_rate": 2.839214256958106e-06, "loss": 0.9462, "step": 10021 }, { "epoch": 0.8954810463064311, "grad_norm": 0.43503376841545105, "learning_rate": 2.8344093371128424e-06, "loss": 0.8974, "step": 10022 }, { "epoch": 0.895570397837693, "grad_norm": 0.5059356689453125, "learning_rate": 2.8296083678885477e-06, "loss": 0.9436, "step": 10023 }, { "epoch": 0.8956597493689548, "grad_norm": 0.5730863213539124, "learning_rate": 2.8248113496873507e-06, "loss": 0.8933, "step": 10024 }, { "epoch": 0.8957491009002166, "grad_norm": 0.4714071750640869, "learning_rate": 2.8200182829110523e-06, "loss": 0.9942, "step": 10025 }, { "epoch": 0.8958384524314785, "grad_norm": 0.4940183758735657, "learning_rate": 2.8152291679611255e-06, "loss": 0.9125, "step": 10026 }, { "epoch": 0.8959278039627404, "grad_norm": 0.6629910469055176, "learning_rate": 2.810444005238716e-06, "loss": 0.8461, "step": 10027 }, { "epoch": 0.8960171554940023, "grad_norm": 0.4740060865879059, "learning_rate": 2.80566279514462e-06, "loss": 1.0388, "step": 10028 }, { "epoch": 0.8961065070252642, "grad_norm": 0.4990270733833313, "learning_rate": 2.800885538079323e-06, "loss": 0.8857, "step": 10029 }, { "epoch": 0.8961958585565261, "grad_norm": 0.4073428213596344, "learning_rate": 2.796112234442966e-06, "loss": 0.9675, "step": 10030 }, { "epoch": 0.8962852100877878, "grad_norm": 0.541317343711853, "learning_rate": 2.791342884635362e-06, "loss": 0.9653, "step": 10031 }, { "epoch": 0.8963745616190497, "grad_norm": 0.3983585834503174, "learning_rate": 2.7865774890560025e-06, "loss": 0.9158, "step": 10032 }, { "epoch": 0.8964639131503116, "grad_norm": 0.4200194776058197, "learning_rate": 2.7818160481040465e-06, "loss": 0.9852, "step": 10033 }, { "epoch": 0.8965532646815735, "grad_norm": 0.48257651925086975, "learning_rate": 2.7770585621782973e-06, "loss": 0.9329, "step": 10034 }, { "epoch": 0.8966426162128354, "grad_norm": 0.4446716010570526, "learning_rate": 2.772305031677258e-06, "loss": 0.9151, "step": 10035 }, { "epoch": 0.8967319677440972, "grad_norm": 0.4650631546974182, "learning_rate": 2.767555456999077e-06, "loss": 0.956, "step": 10036 }, { "epoch": 0.8968213192753591, "grad_norm": 0.5073647499084473, "learning_rate": 2.762809838541591e-06, "loss": 0.9766, "step": 10037 }, { "epoch": 0.8969106708066209, "grad_norm": 0.5146875977516174, "learning_rate": 2.758068176702294e-06, "loss": 0.9526, "step": 10038 }, { "epoch": 0.8970000223378828, "grad_norm": 0.5599371790885925, "learning_rate": 2.7533304718783516e-06, "loss": 0.934, "step": 10039 }, { "epoch": 0.8970893738691447, "grad_norm": 0.5773612856864929, "learning_rate": 2.74859672446659e-06, "loss": 0.9475, "step": 10040 }, { "epoch": 0.8971787254004066, "grad_norm": 0.5281670093536377, "learning_rate": 2.7438669348635202e-06, "loss": 0.9335, "step": 10041 }, { "epoch": 0.8972680769316684, "grad_norm": 0.48857590556144714, "learning_rate": 2.7391411034653094e-06, "loss": 0.8603, "step": 10042 }, { "epoch": 0.8973574284629303, "grad_norm": 0.4282703995704651, "learning_rate": 2.734419230667801e-06, "loss": 0.9902, "step": 10043 }, { "epoch": 0.8974467799941922, "grad_norm": 0.5266483426094055, "learning_rate": 2.7297013168664897e-06, "loss": 0.8483, "step": 10044 }, { "epoch": 0.897536131525454, "grad_norm": 0.5313611626625061, "learning_rate": 2.7249873624565604e-06, "loss": 0.8934, "step": 10045 }, { "epoch": 0.8976254830567159, "grad_norm": 0.4362044632434845, "learning_rate": 2.720277367832852e-06, "loss": 0.9271, "step": 10046 }, { "epoch": 0.8977148345879777, "grad_norm": 0.5316069722175598, "learning_rate": 2.7155713333898825e-06, "loss": 0.8928, "step": 10047 }, { "epoch": 0.8978041861192396, "grad_norm": 0.5463095307350159, "learning_rate": 2.7108692595218254e-06, "loss": 0.932, "step": 10048 }, { "epoch": 0.8978935376505015, "grad_norm": 0.475721150636673, "learning_rate": 2.706171146622538e-06, "loss": 0.9658, "step": 10049 }, { "epoch": 0.8979828891817634, "grad_norm": 0.4900808036327362, "learning_rate": 2.7014769950855334e-06, "loss": 0.963, "step": 10050 }, { "epoch": 0.8980722407130253, "grad_norm": 0.411359578371048, "learning_rate": 2.6967868053039913e-06, "loss": 0.972, "step": 10051 }, { "epoch": 0.898161592244287, "grad_norm": 0.5783373713493347, "learning_rate": 2.6921005776707755e-06, "loss": 0.9393, "step": 10052 }, { "epoch": 0.8982509437755489, "grad_norm": 0.4613523483276367, "learning_rate": 2.6874183125784047e-06, "loss": 0.9651, "step": 10053 }, { "epoch": 0.8983402953068108, "grad_norm": 0.5736072063446045, "learning_rate": 2.682740010419066e-06, "loss": 0.8834, "step": 10054 }, { "epoch": 0.8984296468380727, "grad_norm": 0.4773879051208496, "learning_rate": 2.678065671584612e-06, "loss": 0.9388, "step": 10055 }, { "epoch": 0.8985189983693346, "grad_norm": 0.4866504669189453, "learning_rate": 2.673395296466574e-06, "loss": 0.9726, "step": 10056 }, { "epoch": 0.8986083499005965, "grad_norm": 0.5097846984863281, "learning_rate": 2.6687288854561455e-06, "loss": 0.8879, "step": 10057 }, { "epoch": 0.8986977014318583, "grad_norm": 0.6318731307983398, "learning_rate": 2.664066438944185e-06, "loss": 0.9752, "step": 10058 }, { "epoch": 0.8987870529631201, "grad_norm": 0.47188690304756165, "learning_rate": 2.6594079573212303e-06, "loss": 0.8645, "step": 10059 }, { "epoch": 0.898876404494382, "grad_norm": 0.43872636556625366, "learning_rate": 2.654753440977481e-06, "loss": 0.9638, "step": 10060 }, { "epoch": 0.8989657560256439, "grad_norm": 0.521487295627594, "learning_rate": 2.650102890302786e-06, "loss": 0.957, "step": 10061 }, { "epoch": 0.8990551075569058, "grad_norm": 0.5581962466239929, "learning_rate": 2.6454563056866834e-06, "loss": 0.855, "step": 10062 }, { "epoch": 0.8991444590881676, "grad_norm": 0.43780362606048584, "learning_rate": 2.640813687518384e-06, "loss": 0.9888, "step": 10063 }, { "epoch": 0.8992338106194295, "grad_norm": 0.46411171555519104, "learning_rate": 2.6361750361867554e-06, "loss": 0.8972, "step": 10064 }, { "epoch": 0.8993231621506913, "grad_norm": 0.5126439332962036, "learning_rate": 2.631540352080325e-06, "loss": 0.8815, "step": 10065 }, { "epoch": 0.8994125136819532, "grad_norm": 0.449725866317749, "learning_rate": 2.626909635587299e-06, "loss": 0.9732, "step": 10066 }, { "epoch": 0.8995018652132151, "grad_norm": 0.47947466373443604, "learning_rate": 2.6222828870955505e-06, "loss": 0.8826, "step": 10067 }, { "epoch": 0.899591216744477, "grad_norm": 0.4623918831348419, "learning_rate": 2.6176601069926255e-06, "loss": 0.9146, "step": 10068 }, { "epoch": 0.8996805682757388, "grad_norm": 0.5010434985160828, "learning_rate": 2.613041295665719e-06, "loss": 0.9345, "step": 10069 }, { "epoch": 0.8997699198070007, "grad_norm": 0.5494645833969116, "learning_rate": 2.608426453501722e-06, "loss": 0.8777, "step": 10070 }, { "epoch": 0.8998592713382626, "grad_norm": 0.4975086450576782, "learning_rate": 2.6038155808871587e-06, "loss": 0.8943, "step": 10071 }, { "epoch": 0.8999486228695244, "grad_norm": 0.4562755823135376, "learning_rate": 2.5992086782082536e-06, "loss": 0.8846, "step": 10072 }, { "epoch": 0.9000379744007863, "grad_norm": 0.5983653664588928, "learning_rate": 2.5946057458508756e-06, "loss": 0.8868, "step": 10073 }, { "epoch": 0.9001273259320481, "grad_norm": 0.586626410484314, "learning_rate": 2.5900067842005772e-06, "loss": 0.8883, "step": 10074 }, { "epoch": 0.90021667746331, "grad_norm": 0.5138862729072571, "learning_rate": 2.585411793642556e-06, "loss": 0.9107, "step": 10075 }, { "epoch": 0.9003060289945719, "grad_norm": 0.45086416602134705, "learning_rate": 2.580820774561704e-06, "loss": 0.9627, "step": 10076 }, { "epoch": 0.9003953805258338, "grad_norm": 0.5956899523735046, "learning_rate": 2.576233727342564e-06, "loss": 0.9463, "step": 10077 }, { "epoch": 0.9004847320570957, "grad_norm": 0.47443798184394836, "learning_rate": 2.571650652369351e-06, "loss": 0.9167, "step": 10078 }, { "epoch": 0.9005740835883574, "grad_norm": 0.5572949051856995, "learning_rate": 2.5670715500259403e-06, "loss": 0.8379, "step": 10079 }, { "epoch": 0.9006634351196193, "grad_norm": 0.6084146499633789, "learning_rate": 2.5624964206958924e-06, "loss": 0.8469, "step": 10080 }, { "epoch": 0.9007527866508812, "grad_norm": 0.5597148537635803, "learning_rate": 2.557925264762412e-06, "loss": 0.8754, "step": 10081 }, { "epoch": 0.9008421381821431, "grad_norm": 0.5465840101242065, "learning_rate": 2.5533580826083926e-06, "loss": 0.8959, "step": 10082 }, { "epoch": 0.900931489713405, "grad_norm": 0.39822328090667725, "learning_rate": 2.5487948746163726e-06, "loss": 0.9557, "step": 10083 }, { "epoch": 0.9010208412446669, "grad_norm": 0.45787888765335083, "learning_rate": 2.544235641168585e-06, "loss": 0.8887, "step": 10084 }, { "epoch": 0.9011101927759287, "grad_norm": 0.5872011184692383, "learning_rate": 2.5396803826468975e-06, "loss": 0.8258, "step": 10085 }, { "epoch": 0.9011995443071905, "grad_norm": 0.5175109505653381, "learning_rate": 2.53512909943287e-06, "loss": 0.9667, "step": 10086 }, { "epoch": 0.9012888958384524, "grad_norm": 0.4502319097518921, "learning_rate": 2.5305817919077157e-06, "loss": 0.9462, "step": 10087 }, { "epoch": 0.9013782473697143, "grad_norm": 0.4700741171836853, "learning_rate": 2.526038460452329e-06, "loss": 0.9298, "step": 10088 }, { "epoch": 0.9014675989009762, "grad_norm": 0.4288919270038605, "learning_rate": 2.521499105447256e-06, "loss": 0.9719, "step": 10089 }, { "epoch": 0.901556950432238, "grad_norm": 0.38474833965301514, "learning_rate": 2.516963727272714e-06, "loss": 1.0002, "step": 10090 }, { "epoch": 0.9016463019634999, "grad_norm": 0.5012978911399841, "learning_rate": 2.512432326308595e-06, "loss": 0.9018, "step": 10091 }, { "epoch": 0.9017356534947618, "grad_norm": 0.5238143801689148, "learning_rate": 2.5079049029344492e-06, "loss": 0.8814, "step": 10092 }, { "epoch": 0.9018250050260236, "grad_norm": 0.4294685423374176, "learning_rate": 2.503381457529508e-06, "loss": 0.9577, "step": 10093 }, { "epoch": 0.9019143565572855, "grad_norm": 0.5884724259376526, "learning_rate": 2.498861990472634e-06, "loss": 0.8814, "step": 10094 }, { "epoch": 0.9020037080885474, "grad_norm": 0.40673425793647766, "learning_rate": 2.494346502142397e-06, "loss": 0.952, "step": 10095 }, { "epoch": 0.9020930596198092, "grad_norm": 0.4494728744029999, "learning_rate": 2.4898349929170116e-06, "loss": 0.937, "step": 10096 }, { "epoch": 0.9021824111510711, "grad_norm": 0.5147069692611694, "learning_rate": 2.4853274631743807e-06, "loss": 0.8933, "step": 10097 }, { "epoch": 0.902271762682333, "grad_norm": 0.42955800890922546, "learning_rate": 2.4808239132920297e-06, "loss": 0.9895, "step": 10098 }, { "epoch": 0.9023611142135949, "grad_norm": 0.5820243954658508, "learning_rate": 2.4763243436472016e-06, "loss": 0.8731, "step": 10099 }, { "epoch": 0.9024504657448567, "grad_norm": 0.46939095854759216, "learning_rate": 2.471828754616773e-06, "loss": 0.9266, "step": 10100 }, { "epoch": 0.9025398172761185, "grad_norm": 0.5100577473640442, "learning_rate": 2.4673371465772978e-06, "loss": 0.9469, "step": 10101 }, { "epoch": 0.9026291688073804, "grad_norm": 0.5236694812774658, "learning_rate": 2.4628495199050027e-06, "loss": 0.9135, "step": 10102 }, { "epoch": 0.9027185203386423, "grad_norm": 0.5193259716033936, "learning_rate": 2.4583658749757656e-06, "loss": 0.9942, "step": 10103 }, { "epoch": 0.9028078718699042, "grad_norm": 0.553813099861145, "learning_rate": 2.453886212165152e-06, "loss": 0.9319, "step": 10104 }, { "epoch": 0.9028972234011661, "grad_norm": 0.5031151175498962, "learning_rate": 2.4494105318483674e-06, "loss": 0.8901, "step": 10105 }, { "epoch": 0.902986574932428, "grad_norm": 0.5096839070320129, "learning_rate": 2.444938834400301e-06, "loss": 0.8819, "step": 10106 }, { "epoch": 0.9030759264636897, "grad_norm": 0.48195287585258484, "learning_rate": 2.4404711201955088e-06, "loss": 0.969, "step": 10107 }, { "epoch": 0.9031652779949516, "grad_norm": 0.46852657198905945, "learning_rate": 2.4360073896082138e-06, "loss": 0.8956, "step": 10108 }, { "epoch": 0.9032546295262135, "grad_norm": 0.4824883043766022, "learning_rate": 2.4315476430122884e-06, "loss": 0.9332, "step": 10109 }, { "epoch": 0.9033439810574754, "grad_norm": 0.4670718312263489, "learning_rate": 2.4270918807812958e-06, "loss": 0.9048, "step": 10110 }, { "epoch": 0.9034333325887373, "grad_norm": 0.4740527868270874, "learning_rate": 2.422640103288443e-06, "loss": 0.9462, "step": 10111 }, { "epoch": 0.9035226841199991, "grad_norm": 0.49257439374923706, "learning_rate": 2.418192310906625e-06, "loss": 0.9087, "step": 10112 }, { "epoch": 0.903612035651261, "grad_norm": 0.5299413204193115, "learning_rate": 2.413748504008384e-06, "loss": 0.8926, "step": 10113 }, { "epoch": 0.9037013871825228, "grad_norm": 0.46102991700172424, "learning_rate": 2.4093086829659495e-06, "loss": 0.9209, "step": 10114 }, { "epoch": 0.9037907387137847, "grad_norm": 0.5814657211303711, "learning_rate": 2.404872848151185e-06, "loss": 1.0147, "step": 10115 }, { "epoch": 0.9038800902450466, "grad_norm": 0.5453367829322815, "learning_rate": 2.4004409999356437e-06, "loss": 0.8692, "step": 10116 }, { "epoch": 0.9039694417763084, "grad_norm": 0.5346057415008545, "learning_rate": 2.396013138690545e-06, "loss": 0.9964, "step": 10117 }, { "epoch": 0.9040587933075703, "grad_norm": 0.541984498500824, "learning_rate": 2.39158926478677e-06, "loss": 0.9529, "step": 10118 }, { "epoch": 0.9041481448388322, "grad_norm": 0.43673083186149597, "learning_rate": 2.3871693785948614e-06, "loss": 1.0007, "step": 10119 }, { "epoch": 0.9042374963700941, "grad_norm": 0.42175018787384033, "learning_rate": 2.3827534804850336e-06, "loss": 0.9554, "step": 10120 }, { "epoch": 0.9043268479013559, "grad_norm": 0.5283427238464355, "learning_rate": 2.3783415708271696e-06, "loss": 0.8791, "step": 10121 }, { "epoch": 0.9044161994326178, "grad_norm": 0.41755416989326477, "learning_rate": 2.3739336499908005e-06, "loss": 0.9335, "step": 10122 }, { "epoch": 0.9045055509638796, "grad_norm": 0.4738496243953705, "learning_rate": 2.3695297183451536e-06, "loss": 0.9587, "step": 10123 }, { "epoch": 0.9045949024951415, "grad_norm": 0.4655263423919678, "learning_rate": 2.3651297762591006e-06, "loss": 0.8765, "step": 10124 }, { "epoch": 0.9046842540264034, "grad_norm": 0.4394261837005615, "learning_rate": 2.3607338241011747e-06, "loss": 0.9538, "step": 10125 }, { "epoch": 0.9047736055576653, "grad_norm": 0.4117361605167389, "learning_rate": 2.356341862239586e-06, "loss": 1.0001, "step": 10126 }, { "epoch": 0.9048629570889272, "grad_norm": 0.5142612457275391, "learning_rate": 2.3519538910422134e-06, "loss": 0.9423, "step": 10127 }, { "epoch": 0.9049523086201889, "grad_norm": 0.5198079347610474, "learning_rate": 2.3475699108765958e-06, "loss": 0.9456, "step": 10128 }, { "epoch": 0.9050416601514508, "grad_norm": 0.521613359451294, "learning_rate": 2.3431899221099342e-06, "loss": 0.9063, "step": 10129 }, { "epoch": 0.9051310116827127, "grad_norm": 0.4279034435749054, "learning_rate": 2.3388139251091067e-06, "loss": 1.0047, "step": 10130 }, { "epoch": 0.9052203632139746, "grad_norm": 0.5167081356048584, "learning_rate": 2.334441920240643e-06, "loss": 0.9027, "step": 10131 }, { "epoch": 0.9053097147452365, "grad_norm": 0.494266152381897, "learning_rate": 2.3300739078707446e-06, "loss": 0.929, "step": 10132 }, { "epoch": 0.9053990662764984, "grad_norm": 0.4619883596897125, "learning_rate": 2.3257098883652795e-06, "loss": 0.944, "step": 10133 }, { "epoch": 0.9054884178077601, "grad_norm": 0.46336403489112854, "learning_rate": 2.321349862089789e-06, "loss": 0.9754, "step": 10134 }, { "epoch": 0.905577769339022, "grad_norm": 0.5507116913795471, "learning_rate": 2.3169938294094582e-06, "loss": 0.8841, "step": 10135 }, { "epoch": 0.9056671208702839, "grad_norm": 0.49154022336006165, "learning_rate": 2.312641790689163e-06, "loss": 0.887, "step": 10136 }, { "epoch": 0.9057564724015458, "grad_norm": 0.5543971061706543, "learning_rate": 2.3082937462934274e-06, "loss": 0.876, "step": 10137 }, { "epoch": 0.9058458239328077, "grad_norm": 0.5523365139961243, "learning_rate": 2.3039496965864436e-06, "loss": 0.9618, "step": 10138 }, { "epoch": 0.9059351754640695, "grad_norm": 0.5370588898658752, "learning_rate": 2.2996096419320824e-06, "loss": 0.9194, "step": 10139 }, { "epoch": 0.9060245269953314, "grad_norm": 0.4241883158683777, "learning_rate": 2.2952735826938576e-06, "loss": 0.8904, "step": 10140 }, { "epoch": 0.9061138785265932, "grad_norm": 0.45826974511146545, "learning_rate": 2.290941519234968e-06, "loss": 0.9604, "step": 10141 }, { "epoch": 0.9062032300578551, "grad_norm": 0.4920784533023834, "learning_rate": 2.286613451918268e-06, "loss": 0.9322, "step": 10142 }, { "epoch": 0.906292581589117, "grad_norm": 0.4059630334377289, "learning_rate": 2.2822893811062786e-06, "loss": 0.9157, "step": 10143 }, { "epoch": 0.9063819331203788, "grad_norm": 0.4405556321144104, "learning_rate": 2.2779693071611986e-06, "loss": 0.9764, "step": 10144 }, { "epoch": 0.9064712846516407, "grad_norm": 0.4672204852104187, "learning_rate": 2.273653230444861e-06, "loss": 0.9871, "step": 10145 }, { "epoch": 0.9065606361829026, "grad_norm": 0.5110070109367371, "learning_rate": 2.269341151318788e-06, "loss": 0.9669, "step": 10146 }, { "epoch": 0.9066499877141645, "grad_norm": 0.6213862299919128, "learning_rate": 2.2650330701441678e-06, "loss": 0.9349, "step": 10147 }, { "epoch": 0.9067393392454263, "grad_norm": 0.5088279247283936, "learning_rate": 2.260728987281846e-06, "loss": 0.895, "step": 10148 }, { "epoch": 0.9068286907766882, "grad_norm": 0.5393834710121155, "learning_rate": 2.2564289030923393e-06, "loss": 0.9513, "step": 10149 }, { "epoch": 0.90691804230795, "grad_norm": 0.4829711318016052, "learning_rate": 2.2521328179358146e-06, "loss": 0.8979, "step": 10150 }, { "epoch": 0.9070073938392119, "grad_norm": 0.5531469583511353, "learning_rate": 2.2478407321721296e-06, "loss": 0.8851, "step": 10151 }, { "epoch": 0.9070967453704738, "grad_norm": 0.45315778255462646, "learning_rate": 2.243552646160779e-06, "loss": 0.9384, "step": 10152 }, { "epoch": 0.9071860969017357, "grad_norm": 0.5223442912101746, "learning_rate": 2.239268560260943e-06, "loss": 0.8615, "step": 10153 }, { "epoch": 0.9072754484329976, "grad_norm": 0.47916871309280396, "learning_rate": 2.234988474831462e-06, "loss": 0.9782, "step": 10154 }, { "epoch": 0.9073647999642593, "grad_norm": 0.4964684247970581, "learning_rate": 2.230712390230838e-06, "loss": 0.8816, "step": 10155 }, { "epoch": 0.9074541514955212, "grad_norm": 0.5099236965179443, "learning_rate": 2.226440306817229e-06, "loss": 0.9071, "step": 10156 }, { "epoch": 0.9075435030267831, "grad_norm": 0.48252931237220764, "learning_rate": 2.222172224948471e-06, "loss": 0.9396, "step": 10157 }, { "epoch": 0.907632854558045, "grad_norm": 0.5542041063308716, "learning_rate": 2.2179081449820672e-06, "loss": 0.901, "step": 10158 }, { "epoch": 0.9077222060893069, "grad_norm": 0.5302069783210754, "learning_rate": 2.213648067275176e-06, "loss": 0.8758, "step": 10159 }, { "epoch": 0.9078115576205688, "grad_norm": 0.6605767607688904, "learning_rate": 2.2093919921846283e-06, "loss": 0.8491, "step": 10160 }, { "epoch": 0.9079009091518306, "grad_norm": 0.47050368785858154, "learning_rate": 2.2051399200669065e-06, "loss": 0.9554, "step": 10161 }, { "epoch": 0.9079902606830924, "grad_norm": 0.49454933404922485, "learning_rate": 2.200891851278175e-06, "loss": 0.8927, "step": 10162 }, { "epoch": 0.9080796122143543, "grad_norm": 0.46944358944892883, "learning_rate": 2.1966477861742607e-06, "loss": 0.9581, "step": 10163 }, { "epoch": 0.9081689637456162, "grad_norm": 0.48867207765579224, "learning_rate": 2.1924077251106347e-06, "loss": 1.0078, "step": 10164 }, { "epoch": 0.9082583152768781, "grad_norm": 0.46018025279045105, "learning_rate": 2.1881716684424568e-06, "loss": 0.9385, "step": 10165 }, { "epoch": 0.9083476668081399, "grad_norm": 0.4881027340888977, "learning_rate": 2.183939616524533e-06, "loss": 0.9308, "step": 10166 }, { "epoch": 0.9084370183394018, "grad_norm": 0.4833429157733917, "learning_rate": 2.1797115697113624e-06, "loss": 1.0144, "step": 10167 }, { "epoch": 0.9085263698706637, "grad_norm": 0.42278674244880676, "learning_rate": 2.175487528357062e-06, "loss": 0.9192, "step": 10168 }, { "epoch": 0.9086157214019255, "grad_norm": 0.632034957408905, "learning_rate": 2.17126749281546e-06, "loss": 0.9533, "step": 10169 }, { "epoch": 0.9087050729331874, "grad_norm": 0.5221367478370667, "learning_rate": 2.1670514634400173e-06, "loss": 0.9187, "step": 10170 }, { "epoch": 0.9087944244644492, "grad_norm": 0.4272196292877197, "learning_rate": 2.1628394405838803e-06, "loss": 1.0073, "step": 10171 }, { "epoch": 0.9088837759957111, "grad_norm": 0.44251886010169983, "learning_rate": 2.1586314245998497e-06, "loss": 0.9993, "step": 10172 }, { "epoch": 0.908973127526973, "grad_norm": 0.6176732778549194, "learning_rate": 2.1544274158403877e-06, "loss": 0.8595, "step": 10173 }, { "epoch": 0.9090624790582349, "grad_norm": 0.4189254343509674, "learning_rate": 2.150227414657624e-06, "loss": 0.9154, "step": 10174 }, { "epoch": 0.9091518305894968, "grad_norm": 0.47814953327178955, "learning_rate": 2.1460314214033662e-06, "loss": 0.94, "step": 10175 }, { "epoch": 0.9092411821207586, "grad_norm": 0.4514091908931732, "learning_rate": 2.141839436429055e-06, "loss": 0.9883, "step": 10176 }, { "epoch": 0.9093305336520204, "grad_norm": 0.38325291872024536, "learning_rate": 2.137651460085821e-06, "loss": 0.9605, "step": 10177 }, { "epoch": 0.9094198851832823, "grad_norm": 0.4543987512588501, "learning_rate": 2.1334674927244556e-06, "loss": 0.9641, "step": 10178 }, { "epoch": 0.9095092367145442, "grad_norm": 0.4916648268699646, "learning_rate": 2.1292875346954123e-06, "loss": 0.9521, "step": 10179 }, { "epoch": 0.9095985882458061, "grad_norm": 0.49808797240257263, "learning_rate": 2.1251115863487934e-06, "loss": 0.9676, "step": 10180 }, { "epoch": 0.909687939777068, "grad_norm": 0.4320428669452667, "learning_rate": 2.1209396480343977e-06, "loss": 0.9669, "step": 10181 }, { "epoch": 0.9097772913083298, "grad_norm": 0.5682074427604675, "learning_rate": 2.1167717201016568e-06, "loss": 0.8171, "step": 10182 }, { "epoch": 0.9098666428395916, "grad_norm": 0.5299460291862488, "learning_rate": 2.11260780289968e-06, "loss": 0.9178, "step": 10183 }, { "epoch": 0.9099559943708535, "grad_norm": 0.4480501711368561, "learning_rate": 2.1084478967772494e-06, "loss": 0.9736, "step": 10184 }, { "epoch": 0.9100453459021154, "grad_norm": 0.5181623697280884, "learning_rate": 2.1042920020827974e-06, "loss": 0.9647, "step": 10185 }, { "epoch": 0.9101346974333773, "grad_norm": 0.6131975650787354, "learning_rate": 2.100140119164412e-06, "loss": 0.9173, "step": 10186 }, { "epoch": 0.9102240489646392, "grad_norm": 0.5780476927757263, "learning_rate": 2.095992248369871e-06, "loss": 0.9846, "step": 10187 }, { "epoch": 0.910313400495901, "grad_norm": 0.4459938108921051, "learning_rate": 2.0918483900466025e-06, "loss": 0.9798, "step": 10188 }, { "epoch": 0.9104027520271629, "grad_norm": 0.47280803322792053, "learning_rate": 2.087708544541689e-06, "loss": 0.9052, "step": 10189 }, { "epoch": 0.9104921035584247, "grad_norm": 0.5585520267486572, "learning_rate": 2.083572712201898e-06, "loss": 0.8532, "step": 10190 }, { "epoch": 0.9105814550896866, "grad_norm": 0.5118190050125122, "learning_rate": 2.079440893373641e-06, "loss": 0.9004, "step": 10191 }, { "epoch": 0.9106708066209485, "grad_norm": 0.503633975982666, "learning_rate": 2.075313088403008e-06, "loss": 0.8772, "step": 10192 }, { "epoch": 0.9107601581522103, "grad_norm": 0.45112094283103943, "learning_rate": 2.071189297635745e-06, "loss": 0.9163, "step": 10193 }, { "epoch": 0.9108495096834722, "grad_norm": 0.4446631669998169, "learning_rate": 2.067069521417264e-06, "loss": 0.9806, "step": 10194 }, { "epoch": 0.9109388612147341, "grad_norm": 0.525492250919342, "learning_rate": 2.0629537600926395e-06, "loss": 0.9365, "step": 10195 }, { "epoch": 0.9110282127459959, "grad_norm": 0.4737085998058319, "learning_rate": 2.0588420140066067e-06, "loss": 1.0117, "step": 10196 }, { "epoch": 0.9111175642772578, "grad_norm": 0.4597386121749878, "learning_rate": 2.0547342835035733e-06, "loss": 0.896, "step": 10197 }, { "epoch": 0.9112069158085196, "grad_norm": 0.5008763074874878, "learning_rate": 2.050630568927603e-06, "loss": 0.9568, "step": 10198 }, { "epoch": 0.9112962673397815, "grad_norm": 0.5139524340629578, "learning_rate": 2.0465308706224207e-06, "loss": 0.8624, "step": 10199 }, { "epoch": 0.9113856188710434, "grad_norm": 0.5094760060310364, "learning_rate": 2.0424351889314354e-06, "loss": 0.9532, "step": 10200 }, { "epoch": 0.9114749704023053, "grad_norm": 0.5391070246696472, "learning_rate": 2.038343524197689e-06, "loss": 0.8551, "step": 10201 }, { "epoch": 0.9115643219335672, "grad_norm": 0.5024666786193848, "learning_rate": 2.0342558767639074e-06, "loss": 0.8772, "step": 10202 }, { "epoch": 0.911653673464829, "grad_norm": 0.48186391592025757, "learning_rate": 2.0301722469724726e-06, "loss": 0.875, "step": 10203 }, { "epoch": 0.9117430249960908, "grad_norm": 0.44918540120124817, "learning_rate": 2.026092635165433e-06, "loss": 0.9593, "step": 10204 }, { "epoch": 0.9118323765273527, "grad_norm": 0.48004093766212463, "learning_rate": 2.022017041684504e-06, "loss": 0.9952, "step": 10205 }, { "epoch": 0.9119217280586146, "grad_norm": 0.4600532352924347, "learning_rate": 2.0179454668710575e-06, "loss": 0.9583, "step": 10206 }, { "epoch": 0.9120110795898765, "grad_norm": 0.4377675950527191, "learning_rate": 2.0138779110661252e-06, "loss": 0.8938, "step": 10207 }, { "epoch": 0.9121004311211384, "grad_norm": 0.6013002395629883, "learning_rate": 2.0098143746104135e-06, "loss": 0.7857, "step": 10208 }, { "epoch": 0.9121897826524002, "grad_norm": 0.5890505313873291, "learning_rate": 2.005754857844283e-06, "loss": 0.9102, "step": 10209 }, { "epoch": 0.912279134183662, "grad_norm": 0.48680880665779114, "learning_rate": 2.0016993611077726e-06, "loss": 0.944, "step": 10210 }, { "epoch": 0.9123684857149239, "grad_norm": 0.5501675009727478, "learning_rate": 1.997647884740561e-06, "loss": 0.872, "step": 10211 }, { "epoch": 0.9124578372461858, "grad_norm": 0.46747952699661255, "learning_rate": 1.9936004290820098e-06, "loss": 0.9101, "step": 10212 }, { "epoch": 0.9125471887774477, "grad_norm": 0.47848084568977356, "learning_rate": 1.989556994471131e-06, "loss": 0.9689, "step": 10213 }, { "epoch": 0.9126365403087096, "grad_norm": 0.42791107296943665, "learning_rate": 1.98551758124661e-06, "loss": 0.9226, "step": 10214 }, { "epoch": 0.9127258918399714, "grad_norm": 0.6010297536849976, "learning_rate": 1.9814821897467973e-06, "loss": 0.8729, "step": 10215 }, { "epoch": 0.9128152433712333, "grad_norm": 0.5505849719047546, "learning_rate": 1.977450820309684e-06, "loss": 0.9354, "step": 10216 }, { "epoch": 0.9129045949024951, "grad_norm": 0.7786350250244141, "learning_rate": 1.973423473272945e-06, "loss": 0.8643, "step": 10217 }, { "epoch": 0.912993946433757, "grad_norm": 0.596895694732666, "learning_rate": 1.9694001489739213e-06, "loss": 0.9079, "step": 10218 }, { "epoch": 0.9130832979650189, "grad_norm": 0.4010644555091858, "learning_rate": 1.9653808477496038e-06, "loss": 0.9816, "step": 10219 }, { "epoch": 0.9131726494962807, "grad_norm": 0.537714421749115, "learning_rate": 1.9613655699366464e-06, "loss": 0.9397, "step": 10220 }, { "epoch": 0.9132620010275426, "grad_norm": 0.4720018804073334, "learning_rate": 1.957354315871385e-06, "loss": 0.9971, "step": 10221 }, { "epoch": 0.9133513525588045, "grad_norm": 0.45322003960609436, "learning_rate": 1.953347085889795e-06, "loss": 0.9162, "step": 10222 }, { "epoch": 0.9134407040900664, "grad_norm": 0.5041544437408447, "learning_rate": 1.9493438803275257e-06, "loss": 0.9266, "step": 10223 }, { "epoch": 0.9135300556213282, "grad_norm": 0.5631766319274902, "learning_rate": 1.945344699519891e-06, "loss": 0.896, "step": 10224 }, { "epoch": 0.91361940715259, "grad_norm": 0.5237100124359131, "learning_rate": 1.9413495438018736e-06, "loss": 0.9955, "step": 10225 }, { "epoch": 0.9137087586838519, "grad_norm": 0.5129988193511963, "learning_rate": 1.937358413508089e-06, "loss": 0.9253, "step": 10226 }, { "epoch": 0.9137981102151138, "grad_norm": 0.45968538522720337, "learning_rate": 1.933371308972848e-06, "loss": 0.9623, "step": 10227 }, { "epoch": 0.9138874617463757, "grad_norm": 0.5253444314002991, "learning_rate": 1.929388230530116e-06, "loss": 1.0036, "step": 10228 }, { "epoch": 0.9139768132776376, "grad_norm": 0.5315740704536438, "learning_rate": 1.9254091785135153e-06, "loss": 0.89, "step": 10229 }, { "epoch": 0.9140661648088995, "grad_norm": 0.4819380044937134, "learning_rate": 1.9214341532563296e-06, "loss": 0.936, "step": 10230 }, { "epoch": 0.9141555163401612, "grad_norm": 0.5596315264701843, "learning_rate": 1.91746315509152e-06, "loss": 0.8871, "step": 10231 }, { "epoch": 0.9142448678714231, "grad_norm": 0.4711841344833374, "learning_rate": 1.913496184351693e-06, "loss": 0.9442, "step": 10232 }, { "epoch": 0.914334219402685, "grad_norm": 0.4977385997772217, "learning_rate": 1.9095332413691326e-06, "loss": 0.9563, "step": 10233 }, { "epoch": 0.9144235709339469, "grad_norm": 0.592835545539856, "learning_rate": 1.905574326475762e-06, "loss": 0.8969, "step": 10234 }, { "epoch": 0.9145129224652088, "grad_norm": 0.47582802176475525, "learning_rate": 1.9016194400031884e-06, "loss": 0.9354, "step": 10235 }, { "epoch": 0.9146022739964706, "grad_norm": 0.4953663647174835, "learning_rate": 1.8976685822826856e-06, "loss": 0.9497, "step": 10236 }, { "epoch": 0.9146916255277325, "grad_norm": 0.49911701679229736, "learning_rate": 1.8937217536451778e-06, "loss": 0.9374, "step": 10237 }, { "epoch": 0.9147809770589943, "grad_norm": 0.5435790419578552, "learning_rate": 1.8897789544212396e-06, "loss": 0.8251, "step": 10238 }, { "epoch": 0.9148703285902562, "grad_norm": 0.5200591683387756, "learning_rate": 1.8858401849411344e-06, "loss": 0.9244, "step": 10239 }, { "epoch": 0.9149596801215181, "grad_norm": 0.6190335750579834, "learning_rate": 1.8819054455347707e-06, "loss": 0.8485, "step": 10240 }, { "epoch": 0.91504903165278, "grad_norm": 0.5141425132751465, "learning_rate": 1.877974736531729e-06, "loss": 0.946, "step": 10241 }, { "epoch": 0.9151383831840418, "grad_norm": 0.5247118473052979, "learning_rate": 1.8740480582612519e-06, "loss": 0.942, "step": 10242 }, { "epoch": 0.9152277347153037, "grad_norm": 0.5821022391319275, "learning_rate": 1.8701254110522315e-06, "loss": 0.9661, "step": 10243 }, { "epoch": 0.9153170862465656, "grad_norm": 0.6091264486312866, "learning_rate": 1.8662067952332386e-06, "loss": 0.9005, "step": 10244 }, { "epoch": 0.9154064377778274, "grad_norm": 0.44241493940353394, "learning_rate": 1.8622922111324937e-06, "loss": 0.9736, "step": 10245 }, { "epoch": 0.9154957893090893, "grad_norm": 0.4991171360015869, "learning_rate": 1.8583816590778901e-06, "loss": 0.9084, "step": 10246 }, { "epoch": 0.9155851408403511, "grad_norm": 0.6471422910690308, "learning_rate": 1.8544751393969716e-06, "loss": 0.8957, "step": 10247 }, { "epoch": 0.915674492371613, "grad_norm": 0.5588064193725586, "learning_rate": 1.8505726524169598e-06, "loss": 0.8469, "step": 10248 }, { "epoch": 0.9157638439028749, "grad_norm": 0.4519459307193756, "learning_rate": 1.8466741984647151e-06, "loss": 0.9619, "step": 10249 }, { "epoch": 0.9158531954341368, "grad_norm": 0.5813220739364624, "learning_rate": 1.8427797778667931e-06, "loss": 0.9573, "step": 10250 }, { "epoch": 0.9159425469653987, "grad_norm": 0.4593013823032379, "learning_rate": 1.8388893909493775e-06, "loss": 1.0341, "step": 10251 }, { "epoch": 0.9160318984966604, "grad_norm": 0.45809662342071533, "learning_rate": 1.8350030380383355e-06, "loss": 0.957, "step": 10252 }, { "epoch": 0.9161212500279223, "grad_norm": 0.58559250831604, "learning_rate": 1.831120719459195e-06, "loss": 0.8983, "step": 10253 }, { "epoch": 0.9162106015591842, "grad_norm": 0.4311509132385254, "learning_rate": 1.8272424355371353e-06, "loss": 0.903, "step": 10254 }, { "epoch": 0.9162999530904461, "grad_norm": 0.49322575330734253, "learning_rate": 1.8233681865970077e-06, "loss": 0.937, "step": 10255 }, { "epoch": 0.916389304621708, "grad_norm": 0.4198033809661865, "learning_rate": 1.8194979729633244e-06, "loss": 1.0386, "step": 10256 }, { "epoch": 0.9164786561529699, "grad_norm": 0.4912882149219513, "learning_rate": 1.8156317949602486e-06, "loss": 0.9599, "step": 10257 }, { "epoch": 0.9165680076842316, "grad_norm": 0.42434144020080566, "learning_rate": 1.8117696529116213e-06, "loss": 0.943, "step": 10258 }, { "epoch": 0.9166573592154935, "grad_norm": 0.5602464079856873, "learning_rate": 1.807911547140928e-06, "loss": 0.851, "step": 10259 }, { "epoch": 0.9167467107467554, "grad_norm": 0.4524661898612976, "learning_rate": 1.8040574779713382e-06, "loss": 1.0004, "step": 10260 }, { "epoch": 0.9168360622780173, "grad_norm": 0.5537035465240479, "learning_rate": 1.8002074457256658e-06, "loss": 0.9157, "step": 10261 }, { "epoch": 0.9169254138092792, "grad_norm": 0.48981863260269165, "learning_rate": 1.796361450726397e-06, "loss": 0.9785, "step": 10262 }, { "epoch": 0.917014765340541, "grad_norm": 0.6159743070602417, "learning_rate": 1.7925194932956635e-06, "loss": 0.9111, "step": 10263 }, { "epoch": 0.9171041168718029, "grad_norm": 0.40217313170433044, "learning_rate": 1.7886815737552797e-06, "loss": 0.9608, "step": 10264 }, { "epoch": 0.9171934684030647, "grad_norm": 0.4525063633918762, "learning_rate": 1.784847692426711e-06, "loss": 1.0066, "step": 10265 }, { "epoch": 0.9172828199343266, "grad_norm": 0.6385212540626526, "learning_rate": 1.7810178496310891e-06, "loss": 0.8665, "step": 10266 }, { "epoch": 0.9173721714655885, "grad_norm": 0.4889377951622009, "learning_rate": 1.7771920456891966e-06, "loss": 0.9201, "step": 10267 }, { "epoch": 0.9174615229968504, "grad_norm": 0.5491172671318054, "learning_rate": 1.7733702809214825e-06, "loss": 0.8458, "step": 10268 }, { "epoch": 0.9175508745281122, "grad_norm": 0.48966601490974426, "learning_rate": 1.7695525556480686e-06, "loss": 0.9615, "step": 10269 }, { "epoch": 0.9176402260593741, "grad_norm": 0.5428771376609802, "learning_rate": 1.7657388701887379e-06, "loss": 0.9203, "step": 10270 }, { "epoch": 0.917729577590636, "grad_norm": 0.45766574144363403, "learning_rate": 1.7619292248629071e-06, "loss": 0.9307, "step": 10271 }, { "epoch": 0.9178189291218978, "grad_norm": 0.49701622128486633, "learning_rate": 1.7581236199896879e-06, "loss": 0.868, "step": 10272 }, { "epoch": 0.9179082806531597, "grad_norm": 0.4536900520324707, "learning_rate": 1.754322055887836e-06, "loss": 0.9504, "step": 10273 }, { "epoch": 0.9179976321844215, "grad_norm": 0.5021615028381348, "learning_rate": 1.750524532875769e-06, "loss": 0.9365, "step": 10274 }, { "epoch": 0.9180869837156834, "grad_norm": 0.4921415448188782, "learning_rate": 1.7467310512715774e-06, "loss": 0.966, "step": 10275 }, { "epoch": 0.9181763352469453, "grad_norm": 0.5077376961708069, "learning_rate": 1.742941611393012e-06, "loss": 0.9469, "step": 10276 }, { "epoch": 0.9182656867782072, "grad_norm": 0.45509567856788635, "learning_rate": 1.7391562135574634e-06, "loss": 0.9654, "step": 10277 }, { "epoch": 0.9183550383094691, "grad_norm": 0.4772971570491791, "learning_rate": 1.7353748580820061e-06, "loss": 0.9162, "step": 10278 }, { "epoch": 0.9184443898407308, "grad_norm": 0.5425154566764832, "learning_rate": 1.7315975452833645e-06, "loss": 0.9155, "step": 10279 }, { "epoch": 0.9185337413719927, "grad_norm": 0.5885468125343323, "learning_rate": 1.727824275477935e-06, "loss": 0.9321, "step": 10280 }, { "epoch": 0.9186230929032546, "grad_norm": 0.41930609941482544, "learning_rate": 1.7240550489817653e-06, "loss": 0.9593, "step": 10281 }, { "epoch": 0.9187124444345165, "grad_norm": 0.5219174027442932, "learning_rate": 1.7202898661105748e-06, "loss": 0.9323, "step": 10282 }, { "epoch": 0.9188017959657784, "grad_norm": 0.5876883268356323, "learning_rate": 1.716528727179728e-06, "loss": 0.9279, "step": 10283 }, { "epoch": 0.9188911474970403, "grad_norm": 0.5892945528030396, "learning_rate": 1.712771632504262e-06, "loss": 0.8964, "step": 10284 }, { "epoch": 0.9189804990283021, "grad_norm": 0.4626266062259674, "learning_rate": 1.7090185823988857e-06, "loss": 0.907, "step": 10285 }, { "epoch": 0.9190698505595639, "grad_norm": 0.6098506450653076, "learning_rate": 1.7052695771779481e-06, "loss": 0.8302, "step": 10286 }, { "epoch": 0.9191592020908258, "grad_norm": 0.5348356366157532, "learning_rate": 1.7015246171554644e-06, "loss": 0.9675, "step": 10287 }, { "epoch": 0.9192485536220877, "grad_norm": 0.46181049942970276, "learning_rate": 1.697783702645117e-06, "loss": 0.849, "step": 10288 }, { "epoch": 0.9193379051533496, "grad_norm": 0.5558209419250488, "learning_rate": 1.69404683396025e-06, "loss": 0.9733, "step": 10289 }, { "epoch": 0.9194272566846114, "grad_norm": 0.48256248235702515, "learning_rate": 1.6903140114138627e-06, "loss": 0.948, "step": 10290 }, { "epoch": 0.9195166082158733, "grad_norm": 0.6082019805908203, "learning_rate": 1.6865852353186218e-06, "loss": 0.9981, "step": 10291 }, { "epoch": 0.9196059597471352, "grad_norm": 0.4192001223564148, "learning_rate": 1.6828605059868552e-06, "loss": 1.004, "step": 10292 }, { "epoch": 0.919695311278397, "grad_norm": 0.5222572684288025, "learning_rate": 1.6791398237305412e-06, "loss": 0.9626, "step": 10293 }, { "epoch": 0.9197846628096589, "grad_norm": 0.48702099919319153, "learning_rate": 1.6754231888613304e-06, "loss": 0.9927, "step": 10294 }, { "epoch": 0.9198740143409208, "grad_norm": 0.5022723078727722, "learning_rate": 1.6717106016905348e-06, "loss": 0.9374, "step": 10295 }, { "epoch": 0.9199633658721826, "grad_norm": 0.3965294063091278, "learning_rate": 1.6680020625291227e-06, "loss": 0.9798, "step": 10296 }, { "epoch": 0.9200527174034445, "grad_norm": 0.48680707812309265, "learning_rate": 1.6642975716877118e-06, "loss": 0.9358, "step": 10297 }, { "epoch": 0.9201420689347064, "grad_norm": 0.5684248805046082, "learning_rate": 1.6605971294766044e-06, "loss": 0.848, "step": 10298 }, { "epoch": 0.9202314204659683, "grad_norm": 0.5401350855827332, "learning_rate": 1.6569007362057465e-06, "loss": 0.9419, "step": 10299 }, { "epoch": 0.9203207719972301, "grad_norm": 0.5058586001396179, "learning_rate": 1.653208392184752e-06, "loss": 0.877, "step": 10300 }, { "epoch": 0.9204101235284919, "grad_norm": 0.4828777611255646, "learning_rate": 1.6495200977228897e-06, "loss": 0.9071, "step": 10301 }, { "epoch": 0.9204994750597538, "grad_norm": 0.5211459994316101, "learning_rate": 1.6458358531291074e-06, "loss": 0.926, "step": 10302 }, { "epoch": 0.9205888265910157, "grad_norm": 0.4530414044857025, "learning_rate": 1.6421556587119913e-06, "loss": 0.9432, "step": 10303 }, { "epoch": 0.9206781781222776, "grad_norm": 0.4439500868320465, "learning_rate": 1.6384795147797894e-06, "loss": 0.9568, "step": 10304 }, { "epoch": 0.9207675296535395, "grad_norm": 0.43074101209640503, "learning_rate": 1.6348074216404273e-06, "loss": 1.1121, "step": 10305 }, { "epoch": 0.9208568811848014, "grad_norm": 0.5717933177947998, "learning_rate": 1.6311393796014819e-06, "loss": 0.8781, "step": 10306 }, { "epoch": 0.9209462327160631, "grad_norm": 0.4109342396259308, "learning_rate": 1.62747538897019e-06, "loss": 0.9698, "step": 10307 }, { "epoch": 0.921035584247325, "grad_norm": 0.4915206730365753, "learning_rate": 1.6238154500534452e-06, "loss": 0.9776, "step": 10308 }, { "epoch": 0.9211249357785869, "grad_norm": 0.5269554853439331, "learning_rate": 1.620159563157808e-06, "loss": 0.8828, "step": 10309 }, { "epoch": 0.9212142873098488, "grad_norm": 0.446888267993927, "learning_rate": 1.6165077285895002e-06, "loss": 0.9208, "step": 10310 }, { "epoch": 0.9213036388411107, "grad_norm": 0.4172409772872925, "learning_rate": 1.6128599466543993e-06, "loss": 0.9789, "step": 10311 }, { "epoch": 0.9213929903723725, "grad_norm": 0.5068113803863525, "learning_rate": 1.6092162176580494e-06, "loss": 0.8979, "step": 10312 }, { "epoch": 0.9214823419036344, "grad_norm": 0.5210028290748596, "learning_rate": 1.6055765419056456e-06, "loss": 0.9481, "step": 10313 }, { "epoch": 0.9215716934348962, "grad_norm": 0.48873379826545715, "learning_rate": 1.6019409197020607e-06, "loss": 0.9693, "step": 10314 }, { "epoch": 0.9216610449661581, "grad_norm": 0.4486122727394104, "learning_rate": 1.5983093513518066e-06, "loss": 0.946, "step": 10315 }, { "epoch": 0.92175039649742, "grad_norm": 0.38552728295326233, "learning_rate": 1.5946818371590787e-06, "loss": 0.9608, "step": 10316 }, { "epoch": 0.9218397480286818, "grad_norm": 0.5378084182739258, "learning_rate": 1.591058377427701e-06, "loss": 0.9168, "step": 10317 }, { "epoch": 0.9219290995599437, "grad_norm": 0.48840612173080444, "learning_rate": 1.587438972461186e-06, "loss": 0.9724, "step": 10318 }, { "epoch": 0.9220184510912056, "grad_norm": 0.5533527135848999, "learning_rate": 1.5838236225626968e-06, "loss": 0.8688, "step": 10319 }, { "epoch": 0.9221078026224674, "grad_norm": 0.6588174700737, "learning_rate": 1.5802123280350633e-06, "loss": 0.8956, "step": 10320 }, { "epoch": 0.9221971541537293, "grad_norm": 0.4313110411167145, "learning_rate": 1.5766050891807604e-06, "loss": 0.9738, "step": 10321 }, { "epoch": 0.9222865056849912, "grad_norm": 0.44930917024612427, "learning_rate": 1.5730019063019407e-06, "loss": 1.0021, "step": 10322 }, { "epoch": 0.922375857216253, "grad_norm": 0.6756159067153931, "learning_rate": 1.5694027797004073e-06, "loss": 0.8297, "step": 10323 }, { "epoch": 0.9224652087475149, "grad_norm": 0.5268403887748718, "learning_rate": 1.5658077096776192e-06, "loss": 0.9241, "step": 10324 }, { "epoch": 0.9225545602787768, "grad_norm": 0.4503045082092285, "learning_rate": 1.562216696534713e-06, "loss": 0.8687, "step": 10325 }, { "epoch": 0.9226439118100387, "grad_norm": 0.5141546130180359, "learning_rate": 1.5586297405724648e-06, "loss": 0.8619, "step": 10326 }, { "epoch": 0.9227332633413005, "grad_norm": 0.5367921590805054, "learning_rate": 1.5550468420913288e-06, "loss": 0.9026, "step": 10327 }, { "epoch": 0.9228226148725623, "grad_norm": 0.575573205947876, "learning_rate": 1.5514680013913984e-06, "loss": 0.897, "step": 10328 }, { "epoch": 0.9229119664038242, "grad_norm": 0.44511479139328003, "learning_rate": 1.5478932187724504e-06, "loss": 0.9417, "step": 10329 }, { "epoch": 0.9230013179350861, "grad_norm": 0.6052654981613159, "learning_rate": 1.5443224945339063e-06, "loss": 0.9232, "step": 10330 }, { "epoch": 0.923090669466348, "grad_norm": 0.4598505198955536, "learning_rate": 1.5407558289748547e-06, "loss": 1.0025, "step": 10331 }, { "epoch": 0.9231800209976099, "grad_norm": 0.5528203845024109, "learning_rate": 1.53719322239404e-06, "loss": 0.9608, "step": 10332 }, { "epoch": 0.9232693725288718, "grad_norm": 0.47037777304649353, "learning_rate": 1.5336346750898678e-06, "loss": 0.9687, "step": 10333 }, { "epoch": 0.9233587240601335, "grad_norm": 0.5850094556808472, "learning_rate": 1.530080187360411e-06, "loss": 0.9053, "step": 10334 }, { "epoch": 0.9234480755913954, "grad_norm": 0.552905797958374, "learning_rate": 1.5265297595033868e-06, "loss": 0.9073, "step": 10335 }, { "epoch": 0.9235374271226573, "grad_norm": 0.5310145020484924, "learning_rate": 1.5229833918161906e-06, "loss": 0.9366, "step": 10336 }, { "epoch": 0.9236267786539192, "grad_norm": 0.4303600490093231, "learning_rate": 1.5194410845958574e-06, "loss": 0.9336, "step": 10337 }, { "epoch": 0.9237161301851811, "grad_norm": 0.44541823863983154, "learning_rate": 1.5159028381390994e-06, "loss": 0.9732, "step": 10338 }, { "epoch": 0.9238054817164429, "grad_norm": 0.6362563967704773, "learning_rate": 1.5123686527422854e-06, "loss": 0.9495, "step": 10339 }, { "epoch": 0.9238948332477048, "grad_norm": 0.4896201491355896, "learning_rate": 1.5088385287014395e-06, "loss": 0.8939, "step": 10340 }, { "epoch": 0.9239841847789666, "grad_norm": 0.5114855766296387, "learning_rate": 1.5053124663122419e-06, "loss": 0.8782, "step": 10341 }, { "epoch": 0.9240735363102285, "grad_norm": 0.4664299190044403, "learning_rate": 1.5017904658700398e-06, "loss": 0.9548, "step": 10342 }, { "epoch": 0.9241628878414904, "grad_norm": 0.5291177034378052, "learning_rate": 1.4982725276698418e-06, "loss": 0.8842, "step": 10343 }, { "epoch": 0.9242522393727522, "grad_norm": 0.5422632694244385, "learning_rate": 1.4947586520063062e-06, "loss": 0.8602, "step": 10344 }, { "epoch": 0.9243415909040141, "grad_norm": 0.6073907613754272, "learning_rate": 1.49124883917377e-06, "loss": 0.8669, "step": 10345 }, { "epoch": 0.924430942435276, "grad_norm": 0.6418290734291077, "learning_rate": 1.4877430894662036e-06, "loss": 0.8869, "step": 10346 }, { "epoch": 0.9245202939665379, "grad_norm": 0.634617030620575, "learning_rate": 1.4842414031772612e-06, "loss": 0.8845, "step": 10347 }, { "epoch": 0.9246096454977997, "grad_norm": 0.5437284708023071, "learning_rate": 1.480743780600241e-06, "loss": 0.9179, "step": 10348 }, { "epoch": 0.9246989970290616, "grad_norm": 0.44016095995903015, "learning_rate": 1.4772502220281093e-06, "loss": 0.9627, "step": 10349 }, { "epoch": 0.9247883485603234, "grad_norm": 0.39443960785865784, "learning_rate": 1.4737607277534815e-06, "loss": 0.9157, "step": 10350 }, { "epoch": 0.9248777000915853, "grad_norm": 0.4765508770942688, "learning_rate": 1.4702752980686462e-06, "loss": 0.973, "step": 10351 }, { "epoch": 0.9249670516228472, "grad_norm": 0.5301365256309509, "learning_rate": 1.4667939332655478e-06, "loss": 0.8771, "step": 10352 }, { "epoch": 0.9250564031541091, "grad_norm": 0.48352834582328796, "learning_rate": 1.4633166336357807e-06, "loss": 0.9077, "step": 10353 }, { "epoch": 0.925145754685371, "grad_norm": 0.540468156337738, "learning_rate": 1.4598433994706117e-06, "loss": 0.9076, "step": 10354 }, { "epoch": 0.9252351062166327, "grad_norm": 0.42415082454681396, "learning_rate": 1.4563742310609529e-06, "loss": 0.9269, "step": 10355 }, { "epoch": 0.9253244577478946, "grad_norm": 0.4903179407119751, "learning_rate": 1.4529091286973995e-06, "loss": 0.933, "step": 10356 }, { "epoch": 0.9254138092791565, "grad_norm": 0.5395703315734863, "learning_rate": 1.4494480926701803e-06, "loss": 0.8883, "step": 10357 }, { "epoch": 0.9255031608104184, "grad_norm": 0.5688163042068481, "learning_rate": 1.4459911232691914e-06, "loss": 0.9469, "step": 10358 }, { "epoch": 0.9255925123416803, "grad_norm": 0.41127413511276245, "learning_rate": 1.4425382207839954e-06, "loss": 1.0157, "step": 10359 }, { "epoch": 0.9256818638729422, "grad_norm": 0.46226775646209717, "learning_rate": 1.4390893855038057e-06, "loss": 0.9535, "step": 10360 }, { "epoch": 0.925771215404204, "grad_norm": 0.5173824429512024, "learning_rate": 1.4356446177175077e-06, "loss": 0.9242, "step": 10361 }, { "epoch": 0.9258605669354658, "grad_norm": 0.518804669380188, "learning_rate": 1.432203917713626e-06, "loss": 0.9252, "step": 10362 }, { "epoch": 0.9259499184667277, "grad_norm": 0.5390114188194275, "learning_rate": 1.4287672857803636e-06, "loss": 0.8615, "step": 10363 }, { "epoch": 0.9260392699979896, "grad_norm": 0.48673346638679504, "learning_rate": 1.4253347222055735e-06, "loss": 0.9964, "step": 10364 }, { "epoch": 0.9261286215292515, "grad_norm": 0.5194094181060791, "learning_rate": 1.4219062272767703e-06, "loss": 0.9008, "step": 10365 }, { "epoch": 0.9262179730605133, "grad_norm": 0.5350120067596436, "learning_rate": 1.418481801281124e-06, "loss": 0.8815, "step": 10366 }, { "epoch": 0.9263073245917752, "grad_norm": 0.4165932536125183, "learning_rate": 1.4150614445054778e-06, "loss": 0.9316, "step": 10367 }, { "epoch": 0.9263966761230371, "grad_norm": 0.6019562482833862, "learning_rate": 1.411645157236302e-06, "loss": 0.9976, "step": 10368 }, { "epoch": 0.9264860276542989, "grad_norm": 0.5063771605491638, "learning_rate": 1.4082329397597626e-06, "loss": 0.972, "step": 10369 }, { "epoch": 0.9265753791855608, "grad_norm": 0.46804505586624146, "learning_rate": 1.4048247923616642e-06, "loss": 0.9119, "step": 10370 }, { "epoch": 0.9266647307168226, "grad_norm": 0.4946081340312958, "learning_rate": 1.4014207153274783e-06, "loss": 0.9554, "step": 10371 }, { "epoch": 0.9267540822480845, "grad_norm": 0.5020267963409424, "learning_rate": 1.3980207089423326e-06, "loss": 0.9053, "step": 10372 }, { "epoch": 0.9268434337793464, "grad_norm": 0.5129795670509338, "learning_rate": 1.3946247734910156e-06, "loss": 0.9016, "step": 10373 }, { "epoch": 0.9269327853106083, "grad_norm": 0.43974798917770386, "learning_rate": 1.3912329092579668e-06, "loss": 0.963, "step": 10374 }, { "epoch": 0.9270221368418702, "grad_norm": 0.4542407691478729, "learning_rate": 1.387845116527292e-06, "loss": 0.9474, "step": 10375 }, { "epoch": 0.927111488373132, "grad_norm": 0.418317973613739, "learning_rate": 1.3844613955827535e-06, "loss": 0.9627, "step": 10376 }, { "epoch": 0.9272008399043938, "grad_norm": 0.5148499011993408, "learning_rate": 1.3810817467077852e-06, "loss": 0.9356, "step": 10377 }, { "epoch": 0.9272901914356557, "grad_norm": 0.44719183444976807, "learning_rate": 1.377706170185461e-06, "loss": 0.9662, "step": 10378 }, { "epoch": 0.9273795429669176, "grad_norm": 0.48192569613456726, "learning_rate": 1.3743346662985157e-06, "loss": 0.9405, "step": 10379 }, { "epoch": 0.9274688944981795, "grad_norm": 0.6048439741134644, "learning_rate": 1.3709672353293568e-06, "loss": 0.9381, "step": 10380 }, { "epoch": 0.9275582460294414, "grad_norm": 0.4958132207393646, "learning_rate": 1.3676038775600364e-06, "loss": 0.9396, "step": 10381 }, { "epoch": 0.9276475975607033, "grad_norm": 0.4941558539867401, "learning_rate": 1.3642445932722792e-06, "loss": 0.9128, "step": 10382 }, { "epoch": 0.927736949091965, "grad_norm": 0.5161193609237671, "learning_rate": 1.36088938274746e-06, "loss": 0.9273, "step": 10383 }, { "epoch": 0.9278263006232269, "grad_norm": 0.47243648767471313, "learning_rate": 1.3575382462666042e-06, "loss": 0.9353, "step": 10384 }, { "epoch": 0.9279156521544888, "grad_norm": 0.432420551776886, "learning_rate": 1.3541911841104149e-06, "loss": 0.9181, "step": 10385 }, { "epoch": 0.9280050036857507, "grad_norm": 0.507323145866394, "learning_rate": 1.3508481965592401e-06, "loss": 0.8991, "step": 10386 }, { "epoch": 0.9280943552170126, "grad_norm": 0.5412149429321289, "learning_rate": 1.3475092838930947e-06, "loss": 0.8649, "step": 10387 }, { "epoch": 0.9281837067482744, "grad_norm": 0.42463141679763794, "learning_rate": 1.344174446391644e-06, "loss": 0.9389, "step": 10388 }, { "epoch": 0.9282730582795362, "grad_norm": 0.4768441319465637, "learning_rate": 1.3408436843342142e-06, "loss": 0.9983, "step": 10389 }, { "epoch": 0.9283624098107981, "grad_norm": 0.584004282951355, "learning_rate": 1.3375169979997992e-06, "loss": 0.9239, "step": 10390 }, { "epoch": 0.92845176134206, "grad_norm": 0.4398382604122162, "learning_rate": 1.3341943876670371e-06, "loss": 0.9366, "step": 10391 }, { "epoch": 0.9285411128733219, "grad_norm": 0.4435214102268219, "learning_rate": 1.3308758536142384e-06, "loss": 0.9742, "step": 10392 }, { "epoch": 0.9286304644045837, "grad_norm": 0.5455980896949768, "learning_rate": 1.327561396119359e-06, "loss": 0.953, "step": 10393 }, { "epoch": 0.9287198159358456, "grad_norm": 0.4934171438217163, "learning_rate": 1.3242510154600207e-06, "loss": 0.8277, "step": 10394 }, { "epoch": 0.9288091674671075, "grad_norm": 0.4403308629989624, "learning_rate": 1.3209447119135132e-06, "loss": 0.9771, "step": 10395 }, { "epoch": 0.9288985189983693, "grad_norm": 0.600942075252533, "learning_rate": 1.3176424857567648e-06, "loss": 0.8728, "step": 10396 }, { "epoch": 0.9289878705296312, "grad_norm": 0.6132327318191528, "learning_rate": 1.3143443372663767e-06, "loss": 0.9608, "step": 10397 }, { "epoch": 0.929077222060893, "grad_norm": 0.7262205481529236, "learning_rate": 1.3110502667185997e-06, "loss": 0.8809, "step": 10398 }, { "epoch": 0.9291665735921549, "grad_norm": 0.49000728130340576, "learning_rate": 1.3077602743893523e-06, "loss": 0.8944, "step": 10399 }, { "epoch": 0.9292559251234168, "grad_norm": 0.5410964488983154, "learning_rate": 1.3044743605541975e-06, "loss": 0.9214, "step": 10400 }, { "epoch": 0.9293452766546787, "grad_norm": 0.46295785903930664, "learning_rate": 1.3011925254883761e-06, "loss": 0.9303, "step": 10401 }, { "epoch": 0.9294346281859406, "grad_norm": 0.4503743052482605, "learning_rate": 1.2979147694667738e-06, "loss": 0.9973, "step": 10402 }, { "epoch": 0.9295239797172024, "grad_norm": 0.4440877437591553, "learning_rate": 1.2946410927639374e-06, "loss": 0.9027, "step": 10403 }, { "epoch": 0.9296133312484642, "grad_norm": 0.5882024765014648, "learning_rate": 1.29137149565407e-06, "loss": 0.8308, "step": 10404 }, { "epoch": 0.9297026827797261, "grad_norm": 0.6086094975471497, "learning_rate": 1.2881059784110362e-06, "loss": 0.9265, "step": 10405 }, { "epoch": 0.929792034310988, "grad_norm": 0.522862434387207, "learning_rate": 1.284844541308361e-06, "loss": 0.9177, "step": 10406 }, { "epoch": 0.9298813858422499, "grad_norm": 0.42290636897087097, "learning_rate": 1.2815871846192152e-06, "loss": 0.9461, "step": 10407 }, { "epoch": 0.9299707373735118, "grad_norm": 0.47796404361724854, "learning_rate": 1.278333908616447e-06, "loss": 0.9295, "step": 10408 }, { "epoch": 0.9300600889047737, "grad_norm": 0.46141406893730164, "learning_rate": 1.2750847135725496e-06, "loss": 0.9564, "step": 10409 }, { "epoch": 0.9301494404360354, "grad_norm": 0.5057287216186523, "learning_rate": 1.2718395997596833e-06, "loss": 0.9667, "step": 10410 }, { "epoch": 0.9302387919672973, "grad_norm": 0.5116837620735168, "learning_rate": 1.268598567449647e-06, "loss": 0.9412, "step": 10411 }, { "epoch": 0.9303281434985592, "grad_norm": 0.47364234924316406, "learning_rate": 1.2653616169139237e-06, "loss": 0.9878, "step": 10412 }, { "epoch": 0.9304174950298211, "grad_norm": 0.5040470957756042, "learning_rate": 1.262128748423641e-06, "loss": 1.0276, "step": 10413 }, { "epoch": 0.930506846561083, "grad_norm": 0.5426703095436096, "learning_rate": 1.2588999622495768e-06, "loss": 0.9327, "step": 10414 }, { "epoch": 0.9305961980923448, "grad_norm": 0.5746845602989197, "learning_rate": 1.2556752586621868e-06, "loss": 0.9452, "step": 10415 }, { "epoch": 0.9306855496236067, "grad_norm": 0.5168527364730835, "learning_rate": 1.2524546379315717e-06, "loss": 0.9003, "step": 10416 }, { "epoch": 0.9307749011548685, "grad_norm": 0.40214788913726807, "learning_rate": 1.2492381003274934e-06, "loss": 0.9048, "step": 10417 }, { "epoch": 0.9308642526861304, "grad_norm": 0.43776369094848633, "learning_rate": 1.2460256461193754e-06, "loss": 0.8901, "step": 10418 }, { "epoch": 0.9309536042173923, "grad_norm": 0.4549417197704315, "learning_rate": 1.2428172755762802e-06, "loss": 0.9345, "step": 10419 }, { "epoch": 0.9310429557486541, "grad_norm": 0.6516943573951721, "learning_rate": 1.2396129889669595e-06, "loss": 0.8175, "step": 10420 }, { "epoch": 0.931132307279916, "grad_norm": 0.3830028772354126, "learning_rate": 1.236412786559793e-06, "loss": 0.9797, "step": 10421 }, { "epoch": 0.9312216588111779, "grad_norm": 0.5979812741279602, "learning_rate": 1.2332166686228386e-06, "loss": 0.8904, "step": 10422 }, { "epoch": 0.9313110103424398, "grad_norm": 0.44422391057014465, "learning_rate": 1.2300246354238042e-06, "loss": 0.9199, "step": 10423 }, { "epoch": 0.9314003618737016, "grad_norm": 0.45250019431114197, "learning_rate": 1.2268366872300597e-06, "loss": 1.0105, "step": 10424 }, { "epoch": 0.9314897134049634, "grad_norm": 0.5378450155258179, "learning_rate": 1.2236528243086298e-06, "loss": 0.9126, "step": 10425 }, { "epoch": 0.9315790649362253, "grad_norm": 0.518375039100647, "learning_rate": 1.2204730469261906e-06, "loss": 0.835, "step": 10426 }, { "epoch": 0.9316684164674872, "grad_norm": 0.6360508799552917, "learning_rate": 1.217297355349084e-06, "loss": 0.7857, "step": 10427 }, { "epoch": 0.9317577679987491, "grad_norm": 0.43396756052970886, "learning_rate": 1.21412574984332e-06, "loss": 0.9583, "step": 10428 }, { "epoch": 0.931847119530011, "grad_norm": 0.4555058777332306, "learning_rate": 1.210958230674536e-06, "loss": 0.939, "step": 10429 }, { "epoch": 0.9319364710612729, "grad_norm": 0.38424840569496155, "learning_rate": 1.2077947981080584e-06, "loss": 1.0483, "step": 10430 }, { "epoch": 0.9320258225925346, "grad_norm": 0.5196928977966309, "learning_rate": 1.2046354524088477e-06, "loss": 0.901, "step": 10431 }, { "epoch": 0.9321151741237965, "grad_norm": 0.443092942237854, "learning_rate": 1.2014801938415422e-06, "loss": 0.9166, "step": 10432 }, { "epoch": 0.9322045256550584, "grad_norm": 0.6846402883529663, "learning_rate": 1.1983290226704247e-06, "loss": 0.9144, "step": 10433 }, { "epoch": 0.9322938771863203, "grad_norm": 0.5252601504325867, "learning_rate": 1.1951819391594398e-06, "loss": 0.837, "step": 10434 }, { "epoch": 0.9323832287175822, "grad_norm": 0.48273909091949463, "learning_rate": 1.1920389435721935e-06, "loss": 0.9412, "step": 10435 }, { "epoch": 0.932472580248844, "grad_norm": 0.4483881890773773, "learning_rate": 1.188900036171936e-06, "loss": 0.9079, "step": 10436 }, { "epoch": 0.9325619317801059, "grad_norm": 0.5480309128761292, "learning_rate": 1.1857652172215905e-06, "loss": 0.8815, "step": 10437 }, { "epoch": 0.9326512833113677, "grad_norm": 0.4683678150177002, "learning_rate": 1.1826344869837359e-06, "loss": 0.9372, "step": 10438 }, { "epoch": 0.9327406348426296, "grad_norm": 0.43585410714149475, "learning_rate": 1.1795078457205956e-06, "loss": 0.9516, "step": 10439 }, { "epoch": 0.9328299863738915, "grad_norm": 0.5120673775672913, "learning_rate": 1.17638529369406e-06, "loss": 0.8705, "step": 10440 }, { "epoch": 0.9329193379051534, "grad_norm": 0.4925757944583893, "learning_rate": 1.1732668311656815e-06, "loss": 0.9006, "step": 10441 }, { "epoch": 0.9330086894364152, "grad_norm": 0.5412935614585876, "learning_rate": 1.1701524583966562e-06, "loss": 0.9419, "step": 10442 }, { "epoch": 0.9330980409676771, "grad_norm": 0.5587424039840698, "learning_rate": 1.1670421756478589e-06, "loss": 0.9397, "step": 10443 }, { "epoch": 0.933187392498939, "grad_norm": 0.6443805694580078, "learning_rate": 1.163935983179798e-06, "loss": 0.9046, "step": 10444 }, { "epoch": 0.9332767440302008, "grad_norm": 0.4743395149707794, "learning_rate": 1.1608338812526487e-06, "loss": 0.8918, "step": 10445 }, { "epoch": 0.9333660955614627, "grad_norm": 0.44435209035873413, "learning_rate": 1.157735870126253e-06, "loss": 0.9825, "step": 10446 }, { "epoch": 0.9334554470927245, "grad_norm": 0.46846985816955566, "learning_rate": 1.1546419500601036e-06, "loss": 0.9809, "step": 10447 }, { "epoch": 0.9335447986239864, "grad_norm": 0.5460909008979797, "learning_rate": 1.1515521213133429e-06, "loss": 0.9302, "step": 10448 }, { "epoch": 0.9336341501552483, "grad_norm": 0.5399797558784485, "learning_rate": 1.1484663841447807e-06, "loss": 0.9177, "step": 10449 }, { "epoch": 0.9337235016865102, "grad_norm": 0.6021638512611389, "learning_rate": 1.1453847388128712e-06, "loss": 0.964, "step": 10450 }, { "epoch": 0.933812853217772, "grad_norm": 0.4933503568172455, "learning_rate": 1.1423071855757473e-06, "loss": 0.8803, "step": 10451 }, { "epoch": 0.9339022047490338, "grad_norm": 0.5711500644683838, "learning_rate": 1.13923372469118e-06, "loss": 0.9169, "step": 10452 }, { "epoch": 0.9339915562802957, "grad_norm": 0.49056991934776306, "learning_rate": 1.136164356416608e-06, "loss": 0.8962, "step": 10453 }, { "epoch": 0.9340809078115576, "grad_norm": 0.5031728148460388, "learning_rate": 1.1330990810091257e-06, "loss": 0.963, "step": 10454 }, { "epoch": 0.9341702593428195, "grad_norm": 0.42288509011268616, "learning_rate": 1.1300378987254723e-06, "loss": 1.0058, "step": 10455 }, { "epoch": 0.9342596108740814, "grad_norm": 0.4871813952922821, "learning_rate": 1.1269808098220647e-06, "loss": 0.9285, "step": 10456 }, { "epoch": 0.9343489624053433, "grad_norm": 0.537005603313446, "learning_rate": 1.1239278145549648e-06, "loss": 0.9045, "step": 10457 }, { "epoch": 0.934438313936605, "grad_norm": 0.4379669725894928, "learning_rate": 1.1208789131798958e-06, "loss": 0.9574, "step": 10458 }, { "epoch": 0.9345276654678669, "grad_norm": 0.4513357877731323, "learning_rate": 1.1178341059522256e-06, "loss": 0.9283, "step": 10459 }, { "epoch": 0.9346170169991288, "grad_norm": 0.653385579586029, "learning_rate": 1.114793393126995e-06, "loss": 0.8258, "step": 10460 }, { "epoch": 0.9347063685303907, "grad_norm": 0.44483664631843567, "learning_rate": 1.1117567749588997e-06, "loss": 0.9546, "step": 10461 }, { "epoch": 0.9347957200616526, "grad_norm": 0.4962851405143738, "learning_rate": 1.1087242517022812e-06, "loss": 0.9296, "step": 10462 }, { "epoch": 0.9348850715929145, "grad_norm": 0.4276025891304016, "learning_rate": 1.1056958236111525e-06, "loss": 0.9155, "step": 10463 }, { "epoch": 0.9349744231241763, "grad_norm": 0.5110339522361755, "learning_rate": 1.1026714909391778e-06, "loss": 0.9607, "step": 10464 }, { "epoch": 0.9350637746554381, "grad_norm": 0.42139366269111633, "learning_rate": 1.0996512539396707e-06, "loss": 1.0041, "step": 10465 }, { "epoch": 0.9351531261867, "grad_norm": 0.4701647162437439, "learning_rate": 1.0966351128656072e-06, "loss": 0.9622, "step": 10466 }, { "epoch": 0.9352424777179619, "grad_norm": 0.47479528188705444, "learning_rate": 1.093623067969629e-06, "loss": 0.9374, "step": 10467 }, { "epoch": 0.9353318292492238, "grad_norm": 0.5557815432548523, "learning_rate": 1.0906151195040294e-06, "loss": 0.92, "step": 10468 }, { "epoch": 0.9354211807804856, "grad_norm": 0.46990975737571716, "learning_rate": 1.087611267720745e-06, "loss": 0.912, "step": 10469 }, { "epoch": 0.9355105323117475, "grad_norm": 0.5090728402137756, "learning_rate": 1.0846115128713862e-06, "loss": 0.9633, "step": 10470 }, { "epoch": 0.9355998838430094, "grad_norm": 0.45003437995910645, "learning_rate": 1.081615855207213e-06, "loss": 0.9622, "step": 10471 }, { "epoch": 0.9356892353742712, "grad_norm": 0.5591026544570923, "learning_rate": 1.0786242949791415e-06, "loss": 0.9682, "step": 10472 }, { "epoch": 0.9357785869055331, "grad_norm": 0.48961174488067627, "learning_rate": 1.0756368324377542e-06, "loss": 0.9561, "step": 10473 }, { "epoch": 0.9358679384367949, "grad_norm": 0.6354184746742249, "learning_rate": 1.0726534678332733e-06, "loss": 0.8864, "step": 10474 }, { "epoch": 0.9359572899680568, "grad_norm": 0.3932569921016693, "learning_rate": 1.069674201415599e-06, "loss": 0.9948, "step": 10475 }, { "epoch": 0.9360466414993187, "grad_norm": 0.4889991283416748, "learning_rate": 1.0666990334342707e-06, "loss": 0.9149, "step": 10476 }, { "epoch": 0.9361359930305806, "grad_norm": 0.46199727058410645, "learning_rate": 1.0637279641384834e-06, "loss": 0.9391, "step": 10477 }, { "epoch": 0.9362253445618425, "grad_norm": 0.5159189701080322, "learning_rate": 1.0607609937771046e-06, "loss": 0.9332, "step": 10478 }, { "epoch": 0.9363146960931042, "grad_norm": 0.5201767086982727, "learning_rate": 1.0577981225986467e-06, "loss": 0.8602, "step": 10479 }, { "epoch": 0.9364040476243661, "grad_norm": 0.4806266725063324, "learning_rate": 1.0548393508512887e-06, "loss": 0.9179, "step": 10480 }, { "epoch": 0.936493399155628, "grad_norm": 0.4578525722026825, "learning_rate": 1.0518846787828496e-06, "loss": 0.9347, "step": 10481 }, { "epoch": 0.9365827506868899, "grad_norm": 0.4183438718318939, "learning_rate": 1.0489341066408142e-06, "loss": 0.9347, "step": 10482 }, { "epoch": 0.9366721022181518, "grad_norm": 0.5696753859519958, "learning_rate": 1.0459876346723297e-06, "loss": 0.8983, "step": 10483 }, { "epoch": 0.9367614537494137, "grad_norm": 0.46501290798187256, "learning_rate": 1.0430452631241928e-06, "loss": 0.9691, "step": 10484 }, { "epoch": 0.9368508052806755, "grad_norm": 0.5632017254829407, "learning_rate": 1.0401069922428619e-06, "loss": 0.9061, "step": 10485 }, { "epoch": 0.9369401568119373, "grad_norm": 0.4606979787349701, "learning_rate": 1.0371728222744402e-06, "loss": 0.9337, "step": 10486 }, { "epoch": 0.9370295083431992, "grad_norm": 0.42771193385124207, "learning_rate": 1.0342427534647036e-06, "loss": 0.9453, "step": 10487 }, { "epoch": 0.9371188598744611, "grad_norm": 0.49479082226753235, "learning_rate": 1.0313167860590777e-06, "loss": 0.9413, "step": 10488 }, { "epoch": 0.937208211405723, "grad_norm": 0.5891922116279602, "learning_rate": 1.0283949203026332e-06, "loss": 0.934, "step": 10489 }, { "epoch": 0.9372975629369849, "grad_norm": 0.4088734984397888, "learning_rate": 1.0254771564401189e-06, "loss": 0.9396, "step": 10490 }, { "epoch": 0.9373869144682467, "grad_norm": 0.5687556862831116, "learning_rate": 1.0225634947159223e-06, "loss": 0.9473, "step": 10491 }, { "epoch": 0.9374762659995086, "grad_norm": 0.46405094861984253, "learning_rate": 1.019653935374093e-06, "loss": 0.9307, "step": 10492 }, { "epoch": 0.9375656175307704, "grad_norm": 0.451276570558548, "learning_rate": 1.0167484786583359e-06, "loss": 0.9777, "step": 10493 }, { "epoch": 0.9376549690620323, "grad_norm": 0.46850451827049255, "learning_rate": 1.013847124812023e-06, "loss": 0.9389, "step": 10494 }, { "epoch": 0.9377443205932942, "grad_norm": 0.454631507396698, "learning_rate": 1.010949874078171e-06, "loss": 0.8892, "step": 10495 }, { "epoch": 0.937833672124556, "grad_norm": 0.6396118402481079, "learning_rate": 1.0080567266994466e-06, "loss": 0.8246, "step": 10496 }, { "epoch": 0.9379230236558179, "grad_norm": 0.5484038591384888, "learning_rate": 1.00516768291819e-06, "loss": 0.8602, "step": 10497 }, { "epoch": 0.9380123751870798, "grad_norm": 0.5306549072265625, "learning_rate": 1.0022827429763903e-06, "loss": 1.019, "step": 10498 }, { "epoch": 0.9381017267183417, "grad_norm": 0.5081790089607239, "learning_rate": 9.99401907115688e-07, "loss": 0.8561, "step": 10499 }, { "epoch": 0.9381910782496035, "grad_norm": 0.43956875801086426, "learning_rate": 9.96525175577384e-07, "loss": 0.9825, "step": 10500 }, { "epoch": 0.9382804297808653, "grad_norm": 0.6813711524009705, "learning_rate": 9.936525486024362e-07, "loss": 0.7838, "step": 10501 }, { "epoch": 0.9383697813121272, "grad_norm": 0.5399671792984009, "learning_rate": 9.907840264314572e-07, "loss": 0.943, "step": 10502 }, { "epoch": 0.9384591328433891, "grad_norm": 0.4913422763347626, "learning_rate": 9.87919609304716e-07, "loss": 0.9302, "step": 10503 }, { "epoch": 0.938548484374651, "grad_norm": 0.5128286480903625, "learning_rate": 9.850592974621375e-07, "loss": 0.9291, "step": 10504 }, { "epoch": 0.9386378359059129, "grad_norm": 0.6238715052604675, "learning_rate": 9.822030911433023e-07, "loss": 0.9145, "step": 10505 }, { "epoch": 0.9387271874371748, "grad_norm": 0.5400130748748779, "learning_rate": 9.793509905874576e-07, "loss": 0.9463, "step": 10506 }, { "epoch": 0.9388165389684365, "grad_norm": 0.534290075302124, "learning_rate": 9.765029960334849e-07, "loss": 0.9139, "step": 10507 }, { "epoch": 0.9389058904996984, "grad_norm": 0.48370060324668884, "learning_rate": 9.736591077199374e-07, "loss": 0.9766, "step": 10508 }, { "epoch": 0.9389952420309603, "grad_norm": 0.44061046838760376, "learning_rate": 9.708193258850306e-07, "loss": 0.9452, "step": 10509 }, { "epoch": 0.9390845935622222, "grad_norm": 0.4897972643375397, "learning_rate": 9.679836507666185e-07, "loss": 0.9245, "step": 10510 }, { "epoch": 0.9391739450934841, "grad_norm": 0.5403768420219421, "learning_rate": 9.65152082602211e-07, "loss": 0.9311, "step": 10511 }, { "epoch": 0.939263296624746, "grad_norm": 0.4956395626068115, "learning_rate": 9.62324621628996e-07, "loss": 0.9089, "step": 10512 }, { "epoch": 0.9393526481560077, "grad_norm": 0.5809354782104492, "learning_rate": 9.595012680838012e-07, "loss": 0.8998, "step": 10513 }, { "epoch": 0.9394419996872696, "grad_norm": 0.46071991324424744, "learning_rate": 9.566820222031036e-07, "loss": 0.8987, "step": 10514 }, { "epoch": 0.9395313512185315, "grad_norm": 0.49813321232795715, "learning_rate": 9.538668842230537e-07, "loss": 0.9903, "step": 10515 }, { "epoch": 0.9396207027497934, "grad_norm": 0.47896721959114075, "learning_rate": 9.510558543794457e-07, "loss": 1.0244, "step": 10516 }, { "epoch": 0.9397100542810553, "grad_norm": 0.4987740218639374, "learning_rate": 9.482489329077304e-07, "loss": 0.9312, "step": 10517 }, { "epoch": 0.9397994058123171, "grad_norm": 0.4500676989555359, "learning_rate": 9.454461200430253e-07, "loss": 0.9262, "step": 10518 }, { "epoch": 0.939888757343579, "grad_norm": 0.4563513398170471, "learning_rate": 9.426474160200926e-07, "loss": 0.9153, "step": 10519 }, { "epoch": 0.9399781088748408, "grad_norm": 0.4408057928085327, "learning_rate": 9.398528210733504e-07, "loss": 0.9599, "step": 10520 }, { "epoch": 0.9400674604061027, "grad_norm": 0.5273951888084412, "learning_rate": 9.370623354368779e-07, "loss": 0.9386, "step": 10521 }, { "epoch": 0.9401568119373646, "grad_norm": 0.6240958571434021, "learning_rate": 9.342759593444106e-07, "loss": 0.8736, "step": 10522 }, { "epoch": 0.9402461634686264, "grad_norm": 0.48620718717575073, "learning_rate": 9.314936930293283e-07, "loss": 0.9117, "step": 10523 }, { "epoch": 0.9403355149998883, "grad_norm": 0.45210275053977966, "learning_rate": 9.287155367246891e-07, "loss": 0.9074, "step": 10524 }, { "epoch": 0.9404248665311502, "grad_norm": 0.5615553259849548, "learning_rate": 9.259414906631791e-07, "loss": 0.9937, "step": 10525 }, { "epoch": 0.9405142180624121, "grad_norm": 0.46976426243782043, "learning_rate": 9.23171555077168e-07, "loss": 1.0001, "step": 10526 }, { "epoch": 0.9406035695936739, "grad_norm": 0.5349940061569214, "learning_rate": 9.204057301986535e-07, "loss": 0.8587, "step": 10527 }, { "epoch": 0.9406929211249357, "grad_norm": 0.48549193143844604, "learning_rate": 9.176440162593169e-07, "loss": 0.9925, "step": 10528 }, { "epoch": 0.9407822726561976, "grad_norm": 0.48987144231796265, "learning_rate": 9.148864134904733e-07, "loss": 0.9652, "step": 10529 }, { "epoch": 0.9408716241874595, "grad_norm": 0.4827273190021515, "learning_rate": 9.12132922123099e-07, "loss": 0.8644, "step": 10530 }, { "epoch": 0.9409609757187214, "grad_norm": 0.5220755934715271, "learning_rate": 9.093835423878317e-07, "loss": 0.9001, "step": 10531 }, { "epoch": 0.9410503272499833, "grad_norm": 0.4924890995025635, "learning_rate": 9.066382745149649e-07, "loss": 0.9662, "step": 10532 }, { "epoch": 0.9411396787812452, "grad_norm": 0.5399982929229736, "learning_rate": 9.038971187344369e-07, "loss": 0.8605, "step": 10533 }, { "epoch": 0.9412290303125069, "grad_norm": 0.5098146200180054, "learning_rate": 9.011600752758531e-07, "loss": 0.9553, "step": 10534 }, { "epoch": 0.9413183818437688, "grad_norm": 0.42846280336380005, "learning_rate": 8.984271443684633e-07, "loss": 0.9544, "step": 10535 }, { "epoch": 0.9414077333750307, "grad_norm": 0.5252984762191772, "learning_rate": 8.9569832624119e-07, "loss": 0.8778, "step": 10536 }, { "epoch": 0.9414970849062926, "grad_norm": 0.5305891036987305, "learning_rate": 8.929736211226003e-07, "loss": 0.8921, "step": 10537 }, { "epoch": 0.9415864364375545, "grad_norm": 0.4615854024887085, "learning_rate": 8.902530292409062e-07, "loss": 0.9217, "step": 10538 }, { "epoch": 0.9416757879688163, "grad_norm": 0.45759010314941406, "learning_rate": 8.87536550824003e-07, "loss": 0.9712, "step": 10539 }, { "epoch": 0.9417651395000782, "grad_norm": 0.48792535066604614, "learning_rate": 8.848241860994089e-07, "loss": 0.876, "step": 10540 }, { "epoch": 0.94185449103134, "grad_norm": 0.4174993932247162, "learning_rate": 8.821159352943143e-07, "loss": 0.9367, "step": 10541 }, { "epoch": 0.9419438425626019, "grad_norm": 0.4825289845466614, "learning_rate": 8.794117986355766e-07, "loss": 0.9969, "step": 10542 }, { "epoch": 0.9420331940938638, "grad_norm": 0.44167813658714294, "learning_rate": 8.767117763496813e-07, "loss": 0.8798, "step": 10543 }, { "epoch": 0.9421225456251257, "grad_norm": 0.5294803380966187, "learning_rate": 8.740158686627975e-07, "loss": 0.8875, "step": 10544 }, { "epoch": 0.9422118971563875, "grad_norm": 0.4376986622810364, "learning_rate": 8.71324075800728e-07, "loss": 0.9939, "step": 10545 }, { "epoch": 0.9423012486876494, "grad_norm": 0.5412419438362122, "learning_rate": 8.686363979889478e-07, "loss": 0.9387, "step": 10546 }, { "epoch": 0.9423906002189113, "grad_norm": 0.5909743309020996, "learning_rate": 8.659528354525603e-07, "loss": 0.8535, "step": 10547 }, { "epoch": 0.9424799517501731, "grad_norm": 0.430073082447052, "learning_rate": 8.632733884163635e-07, "loss": 0.9364, "step": 10548 }, { "epoch": 0.942569303281435, "grad_norm": 0.5627753734588623, "learning_rate": 8.605980571047723e-07, "loss": 0.9279, "step": 10549 }, { "epoch": 0.9426586548126968, "grad_norm": 0.526467502117157, "learning_rate": 8.579268417418851e-07, "loss": 0.9335, "step": 10550 }, { "epoch": 0.9427480063439587, "grad_norm": 0.5456862449645996, "learning_rate": 8.552597425514508e-07, "loss": 0.8866, "step": 10551 }, { "epoch": 0.9428373578752206, "grad_norm": 0.5069810152053833, "learning_rate": 8.525967597568463e-07, "loss": 0.9052, "step": 10552 }, { "epoch": 0.9429267094064825, "grad_norm": 0.625331461429596, "learning_rate": 8.499378935811431e-07, "loss": 0.9148, "step": 10553 }, { "epoch": 0.9430160609377444, "grad_norm": 0.5902383327484131, "learning_rate": 8.472831442470408e-07, "loss": 0.802, "step": 10554 }, { "epoch": 0.9431054124690061, "grad_norm": 0.42086061835289, "learning_rate": 8.446325119769061e-07, "loss": 0.9733, "step": 10555 }, { "epoch": 0.943194764000268, "grad_norm": 0.5746432542800903, "learning_rate": 8.419859969927557e-07, "loss": 0.8944, "step": 10556 }, { "epoch": 0.9432841155315299, "grad_norm": 0.5022989511489868, "learning_rate": 8.393435995162624e-07, "loss": 0.9403, "step": 10557 }, { "epoch": 0.9433734670627918, "grad_norm": 0.443338543176651, "learning_rate": 8.367053197687602e-07, "loss": 0.9092, "step": 10558 }, { "epoch": 0.9434628185940537, "grad_norm": 0.592194676399231, "learning_rate": 8.340711579712391e-07, "loss": 0.8213, "step": 10559 }, { "epoch": 0.9435521701253156, "grad_norm": 0.47716042399406433, "learning_rate": 8.314411143443168e-07, "loss": 0.9157, "step": 10560 }, { "epoch": 0.9436415216565774, "grad_norm": 0.49664196372032166, "learning_rate": 8.288151891083062e-07, "loss": 0.8716, "step": 10561 }, { "epoch": 0.9437308731878392, "grad_norm": 0.471483439207077, "learning_rate": 8.261933824831481e-07, "loss": 0.9732, "step": 10562 }, { "epoch": 0.9438202247191011, "grad_norm": 0.4351029396057129, "learning_rate": 8.235756946884554e-07, "loss": 1.0083, "step": 10563 }, { "epoch": 0.943909576250363, "grad_norm": 0.4930981993675232, "learning_rate": 8.209621259434753e-07, "loss": 0.9445, "step": 10564 }, { "epoch": 0.9439989277816249, "grad_norm": 0.42323318123817444, "learning_rate": 8.183526764671267e-07, "loss": 0.9145, "step": 10565 }, { "epoch": 0.9440882793128867, "grad_norm": 0.4111577570438385, "learning_rate": 8.157473464779852e-07, "loss": 0.9204, "step": 10566 }, { "epoch": 0.9441776308441486, "grad_norm": 0.6320559978485107, "learning_rate": 8.13146136194265e-07, "loss": 0.9821, "step": 10567 }, { "epoch": 0.9442669823754105, "grad_norm": 0.47050777077674866, "learning_rate": 8.105490458338527e-07, "loss": 0.9344, "step": 10568 }, { "epoch": 0.9443563339066723, "grad_norm": 0.6935842037200928, "learning_rate": 8.079560756142857e-07, "loss": 0.9204, "step": 10569 }, { "epoch": 0.9444456854379342, "grad_norm": 0.5569193363189697, "learning_rate": 8.0536722575274e-07, "loss": 0.8738, "step": 10570 }, { "epoch": 0.944535036969196, "grad_norm": 0.4778384864330292, "learning_rate": 8.027824964660646e-07, "loss": 0.924, "step": 10571 }, { "epoch": 0.9446243885004579, "grad_norm": 0.4366244375705719, "learning_rate": 8.00201887970764e-07, "loss": 0.9712, "step": 10572 }, { "epoch": 0.9447137400317198, "grad_norm": 0.6287685036659241, "learning_rate": 7.976254004829875e-07, "loss": 0.9883, "step": 10573 }, { "epoch": 0.9448030915629817, "grad_norm": 0.464234858751297, "learning_rate": 7.950530342185402e-07, "loss": 0.9353, "step": 10574 }, { "epoch": 0.9448924430942435, "grad_norm": 0.5004445314407349, "learning_rate": 7.924847893928888e-07, "loss": 0.8881, "step": 10575 }, { "epoch": 0.9449817946255054, "grad_norm": 0.5838055610656738, "learning_rate": 7.8992066622115e-07, "loss": 0.9222, "step": 10576 }, { "epoch": 0.9450711461567672, "grad_norm": 0.4792827367782593, "learning_rate": 7.873606649180965e-07, "loss": 0.9294, "step": 10577 }, { "epoch": 0.9451604976880291, "grad_norm": 0.4360794723033905, "learning_rate": 7.848047856981622e-07, "loss": 0.9488, "step": 10578 }, { "epoch": 0.945249849219291, "grad_norm": 0.5515472888946533, "learning_rate": 7.822530287754204e-07, "loss": 0.8643, "step": 10579 }, { "epoch": 0.9453392007505529, "grad_norm": 0.6533846855163574, "learning_rate": 7.797053943636112e-07, "loss": 0.9626, "step": 10580 }, { "epoch": 0.9454285522818148, "grad_norm": 0.5607351660728455, "learning_rate": 7.771618826761252e-07, "loss": 0.9153, "step": 10581 }, { "epoch": 0.9455179038130765, "grad_norm": 0.5081503987312317, "learning_rate": 7.746224939260083e-07, "loss": 0.8538, "step": 10582 }, { "epoch": 0.9456072553443384, "grad_norm": 0.5000874996185303, "learning_rate": 7.720872283259684e-07, "loss": 0.9195, "step": 10583 }, { "epoch": 0.9456966068756003, "grad_norm": 0.49381589889526367, "learning_rate": 7.695560860883467e-07, "loss": 0.9285, "step": 10584 }, { "epoch": 0.9457859584068622, "grad_norm": 0.54331374168396, "learning_rate": 7.670290674251679e-07, "loss": 0.9005, "step": 10585 }, { "epoch": 0.9458753099381241, "grad_norm": 0.5094651579856873, "learning_rate": 7.64506172548085e-07, "loss": 0.8645, "step": 10586 }, { "epoch": 0.945964661469386, "grad_norm": 0.5358061194419861, "learning_rate": 7.619874016684237e-07, "loss": 0.8771, "step": 10587 }, { "epoch": 0.9460540130006478, "grad_norm": 0.45969656109809875, "learning_rate": 7.594727549971592e-07, "loss": 0.9333, "step": 10588 }, { "epoch": 0.9461433645319096, "grad_norm": 0.5184633135795593, "learning_rate": 7.569622327449177e-07, "loss": 0.8593, "step": 10589 }, { "epoch": 0.9462327160631715, "grad_norm": 0.6350348591804504, "learning_rate": 7.54455835121981e-07, "loss": 0.8568, "step": 10590 }, { "epoch": 0.9463220675944334, "grad_norm": 0.45934930443763733, "learning_rate": 7.519535623382867e-07, "loss": 0.8905, "step": 10591 }, { "epoch": 0.9464114191256953, "grad_norm": 0.6196146607398987, "learning_rate": 7.494554146034338e-07, "loss": 0.9422, "step": 10592 }, { "epoch": 0.9465007706569571, "grad_norm": 0.4321631193161011, "learning_rate": 7.46961392126655e-07, "loss": 0.9287, "step": 10593 }, { "epoch": 0.946590122188219, "grad_norm": 0.494315505027771, "learning_rate": 7.444714951168663e-07, "loss": 0.8973, "step": 10594 }, { "epoch": 0.9466794737194809, "grad_norm": 0.4586873948574066, "learning_rate": 7.419857237826122e-07, "loss": 0.8857, "step": 10595 }, { "epoch": 0.9467688252507427, "grad_norm": 0.46470096707344055, "learning_rate": 7.395040783321039e-07, "loss": 0.9417, "step": 10596 }, { "epoch": 0.9468581767820046, "grad_norm": 0.45211324095726013, "learning_rate": 7.37026558973214e-07, "loss": 0.8929, "step": 10597 }, { "epoch": 0.9469475283132665, "grad_norm": 0.5554351210594177, "learning_rate": 7.345531659134486e-07, "loss": 0.9703, "step": 10598 }, { "epoch": 0.9470368798445283, "grad_norm": 0.46897411346435547, "learning_rate": 7.320838993599921e-07, "loss": 0.9272, "step": 10599 }, { "epoch": 0.9471262313757902, "grad_norm": 0.6147266030311584, "learning_rate": 7.29618759519668e-07, "loss": 0.98, "step": 10600 }, { "epoch": 0.9472155829070521, "grad_norm": 0.5554401278495789, "learning_rate": 7.271577465989555e-07, "loss": 0.9312, "step": 10601 }, { "epoch": 0.947304934438314, "grad_norm": 0.5745187401771545, "learning_rate": 7.247008608039952e-07, "loss": 0.8978, "step": 10602 }, { "epoch": 0.9473942859695758, "grad_norm": 0.46855631470680237, "learning_rate": 7.222481023405725e-07, "loss": 0.949, "step": 10603 }, { "epoch": 0.9474836375008376, "grad_norm": 0.5807746052742004, "learning_rate": 7.197994714141343e-07, "loss": 0.8789, "step": 10604 }, { "epoch": 0.9475729890320995, "grad_norm": 0.539803147315979, "learning_rate": 7.173549682297775e-07, "loss": 1.0313, "step": 10605 }, { "epoch": 0.9476623405633614, "grad_norm": 0.45081114768981934, "learning_rate": 7.149145929922607e-07, "loss": 0.9441, "step": 10606 }, { "epoch": 0.9477516920946233, "grad_norm": 0.44997450709342957, "learning_rate": 7.124783459059869e-07, "loss": 0.9809, "step": 10607 }, { "epoch": 0.9478410436258852, "grad_norm": 0.43726733326911926, "learning_rate": 7.100462271750153e-07, "loss": 0.9145, "step": 10608 }, { "epoch": 0.947930395157147, "grad_norm": 0.5305030345916748, "learning_rate": 7.076182370030715e-07, "loss": 0.9287, "step": 10609 }, { "epoch": 0.9480197466884088, "grad_norm": 0.3688342869281769, "learning_rate": 7.051943755935208e-07, "loss": 0.9817, "step": 10610 }, { "epoch": 0.9481090982196707, "grad_norm": 0.40400731563568115, "learning_rate": 7.027746431493787e-07, "loss": 0.9076, "step": 10611 }, { "epoch": 0.9481984497509326, "grad_norm": 0.5000659823417664, "learning_rate": 7.003590398733329e-07, "loss": 0.9145, "step": 10612 }, { "epoch": 0.9482878012821945, "grad_norm": 0.4885746240615845, "learning_rate": 6.97947565967716e-07, "loss": 0.9152, "step": 10613 }, { "epoch": 0.9483771528134564, "grad_norm": 0.48515942692756653, "learning_rate": 6.955402216345108e-07, "loss": 0.9315, "step": 10614 }, { "epoch": 0.9484665043447182, "grad_norm": 0.43972206115722656, "learning_rate": 6.931370070753618e-07, "loss": 0.9436, "step": 10615 }, { "epoch": 0.9485558558759801, "grad_norm": 0.5218010544776917, "learning_rate": 6.907379224915633e-07, "loss": 0.9323, "step": 10616 }, { "epoch": 0.9486452074072419, "grad_norm": 0.5221048593521118, "learning_rate": 6.883429680840602e-07, "loss": 0.9423, "step": 10617 }, { "epoch": 0.9487345589385038, "grad_norm": 0.5904656648635864, "learning_rate": 6.859521440534533e-07, "loss": 0.8302, "step": 10618 }, { "epoch": 0.9488239104697657, "grad_norm": 0.4238061010837555, "learning_rate": 6.8356545060001e-07, "loss": 1.0147, "step": 10619 }, { "epoch": 0.9489132620010275, "grad_norm": 0.47028911113739014, "learning_rate": 6.81182887923637e-07, "loss": 0.8621, "step": 10620 }, { "epoch": 0.9490026135322894, "grad_norm": 0.5632848143577576, "learning_rate": 6.788044562238971e-07, "loss": 0.9579, "step": 10621 }, { "epoch": 0.9490919650635513, "grad_norm": 0.6260432004928589, "learning_rate": 6.764301557000086e-07, "loss": 0.8357, "step": 10622 }, { "epoch": 0.9491813165948132, "grad_norm": 0.6236041784286499, "learning_rate": 6.740599865508457e-07, "loss": 0.8724, "step": 10623 }, { "epoch": 0.949270668126075, "grad_norm": 0.4447486400604248, "learning_rate": 6.716939489749329e-07, "loss": 0.9633, "step": 10624 }, { "epoch": 0.9493600196573369, "grad_norm": 0.43622735142707825, "learning_rate": 6.693320431704564e-07, "loss": 0.93, "step": 10625 }, { "epoch": 0.9494493711885987, "grad_norm": 0.5855457186698914, "learning_rate": 6.669742693352521e-07, "loss": 0.8923, "step": 10626 }, { "epoch": 0.9495387227198606, "grad_norm": 0.4751369059085846, "learning_rate": 6.64620627666801e-07, "loss": 0.8902, "step": 10627 }, { "epoch": 0.9496280742511225, "grad_norm": 0.5162234902381897, "learning_rate": 6.622711183622454e-07, "loss": 0.9346, "step": 10628 }, { "epoch": 0.9497174257823844, "grad_norm": 0.3996429145336151, "learning_rate": 6.599257416183946e-07, "loss": 0.9754, "step": 10629 }, { "epoch": 0.9498067773136463, "grad_norm": 0.4350222647190094, "learning_rate": 6.575844976316859e-07, "loss": 0.9303, "step": 10630 }, { "epoch": 0.949896128844908, "grad_norm": 0.4600478708744049, "learning_rate": 6.552473865982289e-07, "loss": 0.8937, "step": 10631 }, { "epoch": 0.9499854803761699, "grad_norm": 0.6685845851898193, "learning_rate": 6.52914408713784e-07, "loss": 0.9852, "step": 10632 }, { "epoch": 0.9500748319074318, "grad_norm": 0.43094709515571594, "learning_rate": 6.505855641737502e-07, "loss": 1.0011, "step": 10633 }, { "epoch": 0.9501641834386937, "grad_norm": 0.4459003806114197, "learning_rate": 6.482608531732104e-07, "loss": 0.9094, "step": 10634 }, { "epoch": 0.9502535349699556, "grad_norm": 0.4444451630115509, "learning_rate": 6.4594027590687e-07, "loss": 0.993, "step": 10635 }, { "epoch": 0.9503428865012175, "grad_norm": 0.5474777817726135, "learning_rate": 6.436238325691125e-07, "loss": 0.8881, "step": 10636 }, { "epoch": 0.9504322380324792, "grad_norm": 0.5109720826148987, "learning_rate": 6.413115233539601e-07, "loss": 0.9903, "step": 10637 }, { "epoch": 0.9505215895637411, "grad_norm": 0.4545588195323944, "learning_rate": 6.390033484550917e-07, "loss": 0.9167, "step": 10638 }, { "epoch": 0.950610941095003, "grad_norm": 0.5279081463813782, "learning_rate": 6.366993080658413e-07, "loss": 0.9207, "step": 10639 }, { "epoch": 0.9507002926262649, "grad_norm": 0.5103138089179993, "learning_rate": 6.343994023792043e-07, "loss": 0.9425, "step": 10640 }, { "epoch": 0.9507896441575268, "grad_norm": 0.49605733156204224, "learning_rate": 6.32103631587816e-07, "loss": 0.898, "step": 10641 }, { "epoch": 0.9508789956887886, "grad_norm": 0.5339604616165161, "learning_rate": 6.298119958839721e-07, "loss": 0.8929, "step": 10642 }, { "epoch": 0.9509683472200505, "grad_norm": 0.48836469650268555, "learning_rate": 6.275244954596193e-07, "loss": 0.8902, "step": 10643 }, { "epoch": 0.9510576987513123, "grad_norm": 0.4708765149116516, "learning_rate": 6.252411305063599e-07, "loss": 0.9173, "step": 10644 }, { "epoch": 0.9511470502825742, "grad_norm": 0.47018665075302124, "learning_rate": 6.229619012154575e-07, "loss": 0.9882, "step": 10645 }, { "epoch": 0.9512364018138361, "grad_norm": 0.5642026662826538, "learning_rate": 6.206868077778149e-07, "loss": 0.9281, "step": 10646 }, { "epoch": 0.951325753345098, "grad_norm": 0.6329473853111267, "learning_rate": 6.18415850384002e-07, "loss": 0.887, "step": 10647 }, { "epoch": 0.9514151048763598, "grad_norm": 0.47655338048934937, "learning_rate": 6.161490292242278e-07, "loss": 0.908, "step": 10648 }, { "epoch": 0.9515044564076217, "grad_norm": 0.4613625109195709, "learning_rate": 6.138863444883735e-07, "loss": 0.9321, "step": 10649 }, { "epoch": 0.9515938079388836, "grad_norm": 0.5077817440032959, "learning_rate": 6.116277963659489e-07, "loss": 0.986, "step": 10650 }, { "epoch": 0.9516831594701454, "grad_norm": 0.4817694425582886, "learning_rate": 6.093733850461358e-07, "loss": 0.9819, "step": 10651 }, { "epoch": 0.9517725110014073, "grad_norm": 0.6474151015281677, "learning_rate": 6.071231107177722e-07, "loss": 0.8317, "step": 10652 }, { "epoch": 0.9518618625326691, "grad_norm": 0.529495894908905, "learning_rate": 6.048769735693404e-07, "loss": 0.872, "step": 10653 }, { "epoch": 0.951951214063931, "grad_norm": 0.574099600315094, "learning_rate": 6.026349737889736e-07, "loss": 0.9059, "step": 10654 }, { "epoch": 0.9520405655951929, "grad_norm": 0.5554847717285156, "learning_rate": 6.003971115644657e-07, "loss": 0.9797, "step": 10655 }, { "epoch": 0.9521299171264548, "grad_norm": 0.5695211887359619, "learning_rate": 5.981633870832614e-07, "loss": 0.9082, "step": 10656 }, { "epoch": 0.9522192686577167, "grad_norm": 0.6171822547912598, "learning_rate": 5.959338005324611e-07, "loss": 0.9138, "step": 10657 }, { "epoch": 0.9523086201889784, "grad_norm": 0.4547090232372284, "learning_rate": 5.937083520988151e-07, "loss": 0.9651, "step": 10658 }, { "epoch": 0.9523979717202403, "grad_norm": 0.5358104705810547, "learning_rate": 5.914870419687247e-07, "loss": 0.9548, "step": 10659 }, { "epoch": 0.9524873232515022, "grad_norm": 0.5056635737419128, "learning_rate": 5.892698703282517e-07, "loss": 0.8873, "step": 10660 }, { "epoch": 0.9525766747827641, "grad_norm": 0.5599133968353271, "learning_rate": 5.870568373631091e-07, "loss": 0.8852, "step": 10661 }, { "epoch": 0.952666026314026, "grad_norm": 0.4533610939979553, "learning_rate": 5.848479432586596e-07, "loss": 0.9582, "step": 10662 }, { "epoch": 0.9527553778452879, "grad_norm": 0.42300939559936523, "learning_rate": 5.826431881999217e-07, "loss": 0.938, "step": 10663 }, { "epoch": 0.9528447293765497, "grad_norm": 0.525570273399353, "learning_rate": 5.804425723715701e-07, "loss": 0.9239, "step": 10664 }, { "epoch": 0.9529340809078115, "grad_norm": 0.5064711570739746, "learning_rate": 5.782460959579239e-07, "loss": 0.8886, "step": 10665 }, { "epoch": 0.9530234324390734, "grad_norm": 0.4518652558326721, "learning_rate": 5.760537591429694e-07, "loss": 0.9666, "step": 10666 }, { "epoch": 0.9531127839703353, "grad_norm": 0.4667336344718933, "learning_rate": 5.738655621103317e-07, "loss": 0.9292, "step": 10667 }, { "epoch": 0.9532021355015972, "grad_norm": 0.4580380916595459, "learning_rate": 5.716815050432978e-07, "loss": 0.934, "step": 10668 }, { "epoch": 0.953291487032859, "grad_norm": 0.5494727492332458, "learning_rate": 5.6950158812481e-07, "loss": 0.9544, "step": 10669 }, { "epoch": 0.9533808385641209, "grad_norm": 0.5223135948181152, "learning_rate": 5.673258115374502e-07, "loss": 0.8561, "step": 10670 }, { "epoch": 0.9534701900953828, "grad_norm": 0.5879161953926086, "learning_rate": 5.651541754634726e-07, "loss": 0.8332, "step": 10671 }, { "epoch": 0.9535595416266446, "grad_norm": 0.4340555965900421, "learning_rate": 5.629866800847649e-07, "loss": 0.9383, "step": 10672 }, { "epoch": 0.9536488931579065, "grad_norm": 0.46510595083236694, "learning_rate": 5.608233255828876e-07, "loss": 0.8908, "step": 10673 }, { "epoch": 0.9537382446891683, "grad_norm": 0.5728726983070374, "learning_rate": 5.586641121390401e-07, "loss": 0.8977, "step": 10674 }, { "epoch": 0.9538275962204302, "grad_norm": 0.4889596402645111, "learning_rate": 5.565090399340778e-07, "loss": 0.9271, "step": 10675 }, { "epoch": 0.9539169477516921, "grad_norm": 0.46741414070129395, "learning_rate": 5.543581091485117e-07, "loss": 0.9841, "step": 10676 }, { "epoch": 0.954006299282954, "grad_norm": 0.48655492067337036, "learning_rate": 5.522113199625145e-07, "loss": 0.942, "step": 10677 }, { "epoch": 0.9540956508142159, "grad_norm": 0.4470749497413635, "learning_rate": 5.500686725558868e-07, "loss": 0.9452, "step": 10678 }, { "epoch": 0.9541850023454777, "grad_norm": 0.4536135494709015, "learning_rate": 5.479301671081072e-07, "loss": 0.9409, "step": 10679 }, { "epoch": 0.9542743538767395, "grad_norm": 0.4575904905796051, "learning_rate": 5.457958037982991e-07, "loss": 0.9012, "step": 10680 }, { "epoch": 0.9543637054080014, "grad_norm": 0.4889681935310364, "learning_rate": 5.436655828052417e-07, "loss": 0.971, "step": 10681 }, { "epoch": 0.9544530569392633, "grad_norm": 0.443453311920166, "learning_rate": 5.415395043073535e-07, "loss": 0.9073, "step": 10682 }, { "epoch": 0.9545424084705252, "grad_norm": 0.4644291400909424, "learning_rate": 5.394175684827196e-07, "loss": 0.9093, "step": 10683 }, { "epoch": 0.9546317600017871, "grad_norm": 0.5638459920883179, "learning_rate": 5.372997755090759e-07, "loss": 0.8817, "step": 10684 }, { "epoch": 0.954721111533049, "grad_norm": 0.42751070857048035, "learning_rate": 5.351861255638135e-07, "loss": 1.0156, "step": 10685 }, { "epoch": 0.9548104630643107, "grad_norm": 0.5429770350456238, "learning_rate": 5.330766188239689e-07, "loss": 0.9907, "step": 10686 }, { "epoch": 0.9548998145955726, "grad_norm": 0.5348584651947021, "learning_rate": 5.309712554662338e-07, "loss": 0.8703, "step": 10687 }, { "epoch": 0.9549891661268345, "grad_norm": 0.5381792783737183, "learning_rate": 5.288700356669618e-07, "loss": 0.9238, "step": 10688 }, { "epoch": 0.9550785176580964, "grad_norm": 0.5914027690887451, "learning_rate": 5.267729596021509e-07, "loss": 0.9369, "step": 10689 }, { "epoch": 0.9551678691893583, "grad_norm": 0.573617696762085, "learning_rate": 5.246800274474439e-07, "loss": 0.8617, "step": 10690 }, { "epoch": 0.9552572207206201, "grad_norm": 0.4635196030139923, "learning_rate": 5.225912393781617e-07, "loss": 0.9357, "step": 10691 }, { "epoch": 0.955346572251882, "grad_norm": 0.4225331246852875, "learning_rate": 5.205065955692534e-07, "loss": 0.9849, "step": 10692 }, { "epoch": 0.9554359237831438, "grad_norm": 0.5266596078872681, "learning_rate": 5.184260961953236e-07, "loss": 0.9057, "step": 10693 }, { "epoch": 0.9555252753144057, "grad_norm": 0.48360317945480347, "learning_rate": 5.163497414306495e-07, "loss": 0.897, "step": 10694 }, { "epoch": 0.9556146268456676, "grad_norm": 0.44401875138282776, "learning_rate": 5.142775314491422e-07, "loss": 0.9219, "step": 10695 }, { "epoch": 0.9557039783769294, "grad_norm": 0.4794129431247711, "learning_rate": 5.122094664243681e-07, "loss": 0.9605, "step": 10696 }, { "epoch": 0.9557933299081913, "grad_norm": 0.5471349358558655, "learning_rate": 5.101455465295557e-07, "loss": 0.9078, "step": 10697 }, { "epoch": 0.9558826814394532, "grad_norm": 0.5558369755744934, "learning_rate": 5.080857719375776e-07, "loss": 0.9239, "step": 10698 }, { "epoch": 0.9559720329707151, "grad_norm": 0.39528143405914307, "learning_rate": 5.060301428209624e-07, "loss": 0.9493, "step": 10699 }, { "epoch": 0.9560613845019769, "grad_norm": 0.44044265151023865, "learning_rate": 5.039786593518892e-07, "loss": 0.9668, "step": 10700 }, { "epoch": 0.9561507360332387, "grad_norm": 0.44840747117996216, "learning_rate": 5.019313217021982e-07, "loss": 0.943, "step": 10701 }, { "epoch": 0.9562400875645006, "grad_norm": 0.4926467537879944, "learning_rate": 4.998881300433688e-07, "loss": 0.9684, "step": 10702 }, { "epoch": 0.9563294390957625, "grad_norm": 0.44300875067710876, "learning_rate": 4.978490845465367e-07, "loss": 0.9856, "step": 10703 }, { "epoch": 0.9564187906270244, "grad_norm": 0.4651431143283844, "learning_rate": 4.958141853825038e-07, "loss": 0.9457, "step": 10704 }, { "epoch": 0.9565081421582863, "grad_norm": 0.4982956051826477, "learning_rate": 4.937834327217061e-07, "loss": 0.8796, "step": 10705 }, { "epoch": 0.956597493689548, "grad_norm": 0.46135175228118896, "learning_rate": 4.917568267342465e-07, "loss": 0.8973, "step": 10706 }, { "epoch": 0.9566868452208099, "grad_norm": 0.5590856075286865, "learning_rate": 4.897343675898669e-07, "loss": 0.9275, "step": 10707 }, { "epoch": 0.9567761967520718, "grad_norm": 0.5813520550727844, "learning_rate": 4.877160554579818e-07, "loss": 0.9401, "step": 10708 }, { "epoch": 0.9568655482833337, "grad_norm": 0.44653165340423584, "learning_rate": 4.857018905076394e-07, "loss": 1.0116, "step": 10709 }, { "epoch": 0.9569548998145956, "grad_norm": 0.47590041160583496, "learning_rate": 4.836918729075435e-07, "loss": 0.9912, "step": 10710 }, { "epoch": 0.9570442513458575, "grad_norm": 0.42629966139793396, "learning_rate": 4.81686002826065e-07, "loss": 0.9783, "step": 10711 }, { "epoch": 0.9571336028771193, "grad_norm": 0.5415294766426086, "learning_rate": 4.796842804312085e-07, "loss": 0.9848, "step": 10712 }, { "epoch": 0.9572229544083811, "grad_norm": 0.5828223824501038, "learning_rate": 4.776867058906453e-07, "loss": 0.8597, "step": 10713 }, { "epoch": 0.957312305939643, "grad_norm": 0.5175209045410156, "learning_rate": 4.756932793716862e-07, "loss": 0.9513, "step": 10714 }, { "epoch": 0.9574016574709049, "grad_norm": 0.5043903589248657, "learning_rate": 4.737040010413085e-07, "loss": 0.8341, "step": 10715 }, { "epoch": 0.9574910090021668, "grad_norm": 0.47045809030532837, "learning_rate": 4.717188710661291e-07, "loss": 0.8929, "step": 10716 }, { "epoch": 0.9575803605334287, "grad_norm": 0.44731298089027405, "learning_rate": 4.697378896124316e-07, "loss": 0.9371, "step": 10717 }, { "epoch": 0.9576697120646905, "grad_norm": 0.47598275542259216, "learning_rate": 4.677610568461388e-07, "loss": 0.9035, "step": 10718 }, { "epoch": 0.9577590635959524, "grad_norm": 0.49408817291259766, "learning_rate": 4.657883729328405e-07, "loss": 0.8944, "step": 10719 }, { "epoch": 0.9578484151272142, "grad_norm": 0.41959306597709656, "learning_rate": 4.638198380377545e-07, "loss": 0.9322, "step": 10720 }, { "epoch": 0.9579377666584761, "grad_norm": 0.43749403953552246, "learning_rate": 4.6185545232577676e-07, "loss": 0.9512, "step": 10721 }, { "epoch": 0.958027118189738, "grad_norm": 0.426736980676651, "learning_rate": 4.5989521596144226e-07, "loss": 0.9449, "step": 10722 }, { "epoch": 0.9581164697209998, "grad_norm": 0.5225173830986023, "learning_rate": 4.579391291089419e-07, "loss": 0.8694, "step": 10723 }, { "epoch": 0.9582058212522617, "grad_norm": 0.6672084927558899, "learning_rate": 4.559871919321279e-07, "loss": 0.8843, "step": 10724 }, { "epoch": 0.9582951727835236, "grad_norm": 0.4476062059402466, "learning_rate": 4.5403940459448067e-07, "loss": 0.898, "step": 10725 }, { "epoch": 0.9583845243147855, "grad_norm": 0.4649253189563751, "learning_rate": 4.5209576725915304e-07, "loss": 0.8641, "step": 10726 }, { "epoch": 0.9584738758460473, "grad_norm": 0.4393841028213501, "learning_rate": 4.501562800889536e-07, "loss": 0.9845, "step": 10727 }, { "epoch": 0.9585632273773091, "grad_norm": 0.5080018639564514, "learning_rate": 4.482209432463247e-07, "loss": 0.8999, "step": 10728 }, { "epoch": 0.958652578908571, "grad_norm": 0.5580213665962219, "learning_rate": 4.46289756893381e-07, "loss": 0.9335, "step": 10729 }, { "epoch": 0.9587419304398329, "grad_norm": 0.6076408624649048, "learning_rate": 4.443627211918711e-07, "loss": 0.9558, "step": 10730 }, { "epoch": 0.9588312819710948, "grad_norm": 0.4416392147541046, "learning_rate": 4.424398363032101e-07, "loss": 1.0056, "step": 10731 }, { "epoch": 0.9589206335023567, "grad_norm": 0.4628840684890747, "learning_rate": 4.405211023884581e-07, "loss": 0.9612, "step": 10732 }, { "epoch": 0.9590099850336186, "grad_norm": 0.5556047558784485, "learning_rate": 4.3860651960832557e-07, "loss": 1.0094, "step": 10733 }, { "epoch": 0.9590993365648803, "grad_norm": 0.4615866541862488, "learning_rate": 4.3669608812318965e-07, "loss": 0.9931, "step": 10734 }, { "epoch": 0.9591886880961422, "grad_norm": 0.4226515293121338, "learning_rate": 4.347898080930557e-07, "loss": 0.909, "step": 10735 }, { "epoch": 0.9592780396274041, "grad_norm": 0.4294814169406891, "learning_rate": 4.3288767967760715e-07, "loss": 0.9615, "step": 10736 }, { "epoch": 0.959367391158666, "grad_norm": 0.5078641772270203, "learning_rate": 4.3098970303616646e-07, "loss": 0.8305, "step": 10737 }, { "epoch": 0.9594567426899279, "grad_norm": 0.5030995607376099, "learning_rate": 4.2909587832770103e-07, "loss": 1.0022, "step": 10738 }, { "epoch": 0.9595460942211897, "grad_norm": 0.5276926159858704, "learning_rate": 4.27206205710845e-07, "loss": 0.8767, "step": 10739 }, { "epoch": 0.9596354457524516, "grad_norm": 0.450339138507843, "learning_rate": 4.2532068534387737e-07, "loss": 0.9567, "step": 10740 }, { "epoch": 0.9597247972837134, "grad_norm": 0.4343004822731018, "learning_rate": 4.2343931738473284e-07, "loss": 0.9784, "step": 10741 }, { "epoch": 0.9598141488149753, "grad_norm": 0.45386016368865967, "learning_rate": 4.215621019909854e-07, "loss": 0.8928, "step": 10742 }, { "epoch": 0.9599035003462372, "grad_norm": 0.6192027926445007, "learning_rate": 4.196890393198871e-07, "loss": 0.9318, "step": 10743 }, { "epoch": 0.959992851877499, "grad_norm": 0.5403250455856323, "learning_rate": 4.1782012952831796e-07, "loss": 0.841, "step": 10744 }, { "epoch": 0.9600822034087609, "grad_norm": 0.525173544883728, "learning_rate": 4.159553727728194e-07, "loss": 0.9311, "step": 10745 }, { "epoch": 0.9601715549400228, "grad_norm": 0.5325625538825989, "learning_rate": 4.140947692095887e-07, "loss": 0.8706, "step": 10746 }, { "epoch": 0.9602609064712847, "grad_norm": 0.49182307720184326, "learning_rate": 4.1223831899446785e-07, "loss": 0.9359, "step": 10747 }, { "epoch": 0.9603502580025465, "grad_norm": 0.444618821144104, "learning_rate": 4.103860222829603e-07, "loss": 0.9907, "step": 10748 }, { "epoch": 0.9604396095338084, "grad_norm": 0.4664936065673828, "learning_rate": 4.0853787923020303e-07, "loss": 1.0142, "step": 10749 }, { "epoch": 0.9605289610650702, "grad_norm": 0.6530784964561462, "learning_rate": 4.066938899910111e-07, "loss": 0.8257, "step": 10750 }, { "epoch": 0.9606183125963321, "grad_norm": 0.5376946330070496, "learning_rate": 4.048540547198332e-07, "loss": 0.8809, "step": 10751 }, { "epoch": 0.960707664127594, "grad_norm": 0.4881064295768738, "learning_rate": 4.030183735707682e-07, "loss": 0.9446, "step": 10752 }, { "epoch": 0.9607970156588559, "grad_norm": 0.46255356073379517, "learning_rate": 4.011868466975821e-07, "loss": 0.9665, "step": 10753 }, { "epoch": 0.9608863671901178, "grad_norm": 0.5297724008560181, "learning_rate": 3.9935947425368546e-07, "loss": 0.9761, "step": 10754 }, { "epoch": 0.9609757187213795, "grad_norm": 0.5168519616127014, "learning_rate": 3.9753625639213366e-07, "loss": 0.9226, "step": 10755 }, { "epoch": 0.9610650702526414, "grad_norm": 0.5492185354232788, "learning_rate": 3.9571719326564894e-07, "loss": 0.8866, "step": 10756 }, { "epoch": 0.9611544217839033, "grad_norm": 0.5541948676109314, "learning_rate": 3.939022850265928e-07, "loss": 0.9604, "step": 10757 }, { "epoch": 0.9612437733151652, "grad_norm": 0.6160182952880859, "learning_rate": 3.920915318269824e-07, "loss": 0.8829, "step": 10758 }, { "epoch": 0.9613331248464271, "grad_norm": 0.5799499154090881, "learning_rate": 3.902849338184911e-07, "loss": 0.8777, "step": 10759 }, { "epoch": 0.961422476377689, "grad_norm": 0.5014825463294983, "learning_rate": 3.8848249115243097e-07, "loss": 0.9618, "step": 10760 }, { "epoch": 0.9615118279089508, "grad_norm": 0.4629524052143097, "learning_rate": 3.866842039797869e-07, "loss": 0.949, "step": 10761 }, { "epoch": 0.9616011794402126, "grad_norm": 0.46119171380996704, "learning_rate": 3.848900724511828e-07, "loss": 0.9926, "step": 10762 }, { "epoch": 0.9616905309714745, "grad_norm": 0.5495138764381409, "learning_rate": 3.8310009671689297e-07, "loss": 0.9226, "step": 10763 }, { "epoch": 0.9617798825027364, "grad_norm": 0.49449822306632996, "learning_rate": 3.813142769268474e-07, "loss": 0.9972, "step": 10764 }, { "epoch": 0.9618692340339983, "grad_norm": 0.4808973968029022, "learning_rate": 3.7953261323063205e-07, "loss": 0.9218, "step": 10765 }, { "epoch": 0.9619585855652601, "grad_norm": 0.45000120997428894, "learning_rate": 3.7775510577747195e-07, "loss": 0.9093, "step": 10766 }, { "epoch": 0.962047937096522, "grad_norm": 0.42627647519111633, "learning_rate": 3.759817547162536e-07, "loss": 0.9849, "step": 10767 }, { "epoch": 0.9621372886277838, "grad_norm": 0.4252340793609619, "learning_rate": 3.742125601955249e-07, "loss": 0.9683, "step": 10768 }, { "epoch": 0.9622266401590457, "grad_norm": 0.45607298612594604, "learning_rate": 3.724475223634616e-07, "loss": 1.0085, "step": 10769 }, { "epoch": 0.9623159916903076, "grad_norm": 0.5643563866615295, "learning_rate": 3.706866413679122e-07, "loss": 0.9372, "step": 10770 }, { "epoch": 0.9624053432215695, "grad_norm": 0.5665273666381836, "learning_rate": 3.689299173563643e-07, "loss": 0.9523, "step": 10771 }, { "epoch": 0.9624946947528313, "grad_norm": 0.5675973892211914, "learning_rate": 3.6717735047597233e-07, "loss": 0.8598, "step": 10772 }, { "epoch": 0.9625840462840932, "grad_norm": 0.48373866081237793, "learning_rate": 3.6542894087351896e-07, "loss": 0.9075, "step": 10773 }, { "epoch": 0.9626733978153551, "grad_norm": 0.5747187733650208, "learning_rate": 3.6368468869545926e-07, "loss": 0.94, "step": 10774 }, { "epoch": 0.9627627493466169, "grad_norm": 0.4350042939186096, "learning_rate": 3.619445940878929e-07, "loss": 0.958, "step": 10775 }, { "epoch": 0.9628521008778788, "grad_norm": 0.5669698119163513, "learning_rate": 3.6020865719657016e-07, "loss": 0.9641, "step": 10776 }, { "epoch": 0.9629414524091406, "grad_norm": 0.5079529881477356, "learning_rate": 3.584768781668968e-07, "loss": 0.9328, "step": 10777 }, { "epoch": 0.9630308039404025, "grad_norm": 0.6118546724319458, "learning_rate": 3.5674925714391796e-07, "loss": 0.8199, "step": 10778 }, { "epoch": 0.9631201554716644, "grad_norm": 0.4477030038833618, "learning_rate": 3.5502579427235673e-07, "loss": 0.9591, "step": 10779 }, { "epoch": 0.9632095070029263, "grad_norm": 0.454915314912796, "learning_rate": 3.5330648969655876e-07, "loss": 0.9534, "step": 10780 }, { "epoch": 0.9632988585341882, "grad_norm": 0.5509199500083923, "learning_rate": 3.515913435605367e-07, "loss": 0.8643, "step": 10781 }, { "epoch": 0.96338821006545, "grad_norm": 0.4031422734260559, "learning_rate": 3.4988035600795886e-07, "loss": 0.9174, "step": 10782 }, { "epoch": 0.9634775615967118, "grad_norm": 0.636800229549408, "learning_rate": 3.481735271821274e-07, "loss": 1.0021, "step": 10783 }, { "epoch": 0.9635669131279737, "grad_norm": 0.4733976721763611, "learning_rate": 3.464708572260167e-07, "loss": 0.9752, "step": 10784 }, { "epoch": 0.9636562646592356, "grad_norm": 0.4569736123085022, "learning_rate": 3.44772346282235e-07, "loss": 0.933, "step": 10785 }, { "epoch": 0.9637456161904975, "grad_norm": 0.49324387311935425, "learning_rate": 3.4307799449306286e-07, "loss": 0.9589, "step": 10786 }, { "epoch": 0.9638349677217594, "grad_norm": 0.5697484016418457, "learning_rate": 3.413878020004091e-07, "loss": 0.9579, "step": 10787 }, { "epoch": 0.9639243192530212, "grad_norm": 0.5456252694129944, "learning_rate": 3.397017689458548e-07, "loss": 0.8727, "step": 10788 }, { "epoch": 0.964013670784283, "grad_norm": 0.5522463917732239, "learning_rate": 3.3801989547061484e-07, "loss": 0.9692, "step": 10789 }, { "epoch": 0.9641030223155449, "grad_norm": 0.5635389685630798, "learning_rate": 3.363421817155654e-07, "loss": 0.8955, "step": 10790 }, { "epoch": 0.9641923738468068, "grad_norm": 0.525848388671875, "learning_rate": 3.3466862782123853e-07, "loss": 0.8466, "step": 10791 }, { "epoch": 0.9642817253780687, "grad_norm": 0.5157281160354614, "learning_rate": 3.3299923392780543e-07, "loss": 1.019, "step": 10792 }, { "epoch": 0.9643710769093305, "grad_norm": 0.6579477787017822, "learning_rate": 3.3133400017509865e-07, "loss": 0.8579, "step": 10793 }, { "epoch": 0.9644604284405924, "grad_norm": 0.5229929089546204, "learning_rate": 3.2967292670260106e-07, "loss": 0.889, "step": 10794 }, { "epoch": 0.9645497799718543, "grad_norm": 0.5078433752059937, "learning_rate": 3.280160136494459e-07, "loss": 0.9574, "step": 10795 }, { "epoch": 0.9646391315031161, "grad_norm": 0.5875293016433716, "learning_rate": 3.2636326115441097e-07, "loss": 0.8764, "step": 10796 }, { "epoch": 0.964728483034378, "grad_norm": 0.47622236609458923, "learning_rate": 3.247146693559355e-07, "loss": 0.9398, "step": 10797 }, { "epoch": 0.9648178345656399, "grad_norm": 0.5416803956031799, "learning_rate": 3.2307023839210914e-07, "loss": 0.9299, "step": 10798 }, { "epoch": 0.9649071860969017, "grad_norm": 0.47890692949295044, "learning_rate": 3.214299684006661e-07, "loss": 0.9296, "step": 10799 }, { "epoch": 0.9649965376281636, "grad_norm": 0.42355430126190186, "learning_rate": 3.197938595189964e-07, "loss": 0.9806, "step": 10800 }, { "epoch": 0.9650858891594255, "grad_norm": 0.46457356214523315, "learning_rate": 3.1816191188415166e-07, "loss": 0.9908, "step": 10801 }, { "epoch": 0.9651752406906874, "grad_norm": 0.5232931971549988, "learning_rate": 3.1653412563281135e-07, "loss": 0.8684, "step": 10802 }, { "epoch": 0.9652645922219492, "grad_norm": 0.4481331408023834, "learning_rate": 3.1491050090132757e-07, "loss": 0.9863, "step": 10803 }, { "epoch": 0.965353943753211, "grad_norm": 0.5186441540718079, "learning_rate": 3.1329103782569145e-07, "loss": 0.8764, "step": 10804 }, { "epoch": 0.9654432952844729, "grad_norm": 0.5062096118927002, "learning_rate": 3.116757365415557e-07, "loss": 0.8986, "step": 10805 }, { "epoch": 0.9655326468157348, "grad_norm": 0.42634356021881104, "learning_rate": 3.1006459718421755e-07, "loss": 0.9799, "step": 10806 }, { "epoch": 0.9656219983469967, "grad_norm": 0.4971635937690735, "learning_rate": 3.0845761988862464e-07, "loss": 0.8796, "step": 10807 }, { "epoch": 0.9657113498782586, "grad_norm": 0.5731960535049438, "learning_rate": 3.068548047893804e-07, "loss": 0.8347, "step": 10808 }, { "epoch": 0.9658007014095205, "grad_norm": 0.6698244214057922, "learning_rate": 3.0525615202073863e-07, "loss": 0.8799, "step": 10809 }, { "epoch": 0.9658900529407822, "grad_norm": 0.4804188907146454, "learning_rate": 3.036616617165977e-07, "loss": 0.959, "step": 10810 }, { "epoch": 0.9659794044720441, "grad_norm": 0.45491814613342285, "learning_rate": 3.020713340105175e-07, "loss": 0.9305, "step": 10811 }, { "epoch": 0.966068756003306, "grad_norm": 0.43385374546051025, "learning_rate": 3.0048516903571357e-07, "loss": 0.9343, "step": 10812 }, { "epoch": 0.9661581075345679, "grad_norm": 0.42967671155929565, "learning_rate": 2.989031669250297e-07, "loss": 0.978, "step": 10813 }, { "epoch": 0.9662474590658298, "grad_norm": 0.4216921031475067, "learning_rate": 2.973253278109767e-07, "loss": 1.0218, "step": 10814 }, { "epoch": 0.9663368105970916, "grad_norm": 0.45912298560142517, "learning_rate": 2.957516518257264e-07, "loss": 0.9561, "step": 10815 }, { "epoch": 0.9664261621283535, "grad_norm": 0.4412069022655487, "learning_rate": 2.9418213910107907e-07, "loss": 1.0146, "step": 10816 }, { "epoch": 0.9665155136596153, "grad_norm": 0.4734534025192261, "learning_rate": 2.9261678976850726e-07, "loss": 1.0074, "step": 10817 }, { "epoch": 0.9666048651908772, "grad_norm": 0.44548165798187256, "learning_rate": 2.910556039591228e-07, "loss": 0.9156, "step": 10818 }, { "epoch": 0.9666942167221391, "grad_norm": 0.45493119955062866, "learning_rate": 2.894985818036877e-07, "loss": 0.9426, "step": 10819 }, { "epoch": 0.966783568253401, "grad_norm": 0.4516558051109314, "learning_rate": 2.879457234326255e-07, "loss": 0.9776, "step": 10820 }, { "epoch": 0.9668729197846628, "grad_norm": 0.4548396170139313, "learning_rate": 2.8639702897599873e-07, "loss": 0.8973, "step": 10821 }, { "epoch": 0.9669622713159247, "grad_norm": 0.4782199263572693, "learning_rate": 2.8485249856353147e-07, "loss": 0.8619, "step": 10822 }, { "epoch": 0.9670516228471866, "grad_norm": 0.6326056718826294, "learning_rate": 2.833121323245924e-07, "loss": 0.8101, "step": 10823 }, { "epoch": 0.9671409743784484, "grad_norm": 0.4548356533050537, "learning_rate": 2.817759303882006e-07, "loss": 0.9549, "step": 10824 }, { "epoch": 0.9672303259097103, "grad_norm": 0.5176000595092773, "learning_rate": 2.802438928830364e-07, "loss": 0.8751, "step": 10825 }, { "epoch": 0.9673196774409721, "grad_norm": 0.4941912293434143, "learning_rate": 2.7871601993741947e-07, "loss": 0.9054, "step": 10826 }, { "epoch": 0.967409028972234, "grad_norm": 0.5169671773910522, "learning_rate": 2.7719231167933067e-07, "loss": 0.9378, "step": 10827 }, { "epoch": 0.9674983805034959, "grad_norm": 0.5568271279335022, "learning_rate": 2.7567276823639023e-07, "loss": 0.9216, "step": 10828 }, { "epoch": 0.9675877320347578, "grad_norm": 0.6154254674911499, "learning_rate": 2.741573897358796e-07, "loss": 0.7888, "step": 10829 }, { "epoch": 0.9676770835660196, "grad_norm": 0.4801642894744873, "learning_rate": 2.72646176304725e-07, "loss": 0.9863, "step": 10830 }, { "epoch": 0.9677664350972814, "grad_norm": 0.5245612263679504, "learning_rate": 2.711391280695086e-07, "loss": 0.9012, "step": 10831 }, { "epoch": 0.9678557866285433, "grad_norm": 0.5288980603218079, "learning_rate": 2.6963624515646266e-07, "loss": 0.9981, "step": 10832 }, { "epoch": 0.9679451381598052, "grad_norm": 0.3883173167705536, "learning_rate": 2.681375276914644e-07, "loss": 0.9628, "step": 10833 }, { "epoch": 0.9680344896910671, "grad_norm": 0.6051060557365417, "learning_rate": 2.666429758000577e-07, "loss": 0.8418, "step": 10834 }, { "epoch": 0.968123841222329, "grad_norm": 0.4276251196861267, "learning_rate": 2.651525896074203e-07, "loss": 0.9092, "step": 10835 }, { "epoch": 0.9682131927535909, "grad_norm": 0.48356980085372925, "learning_rate": 2.636663692383856e-07, "loss": 1.0608, "step": 10836 }, { "epoch": 0.9683025442848526, "grad_norm": 0.5755243897438049, "learning_rate": 2.62184314817443e-07, "loss": 0.831, "step": 10837 }, { "epoch": 0.9683918958161145, "grad_norm": 0.4929053783416748, "learning_rate": 2.6070642646873757e-07, "loss": 0.9273, "step": 10838 }, { "epoch": 0.9684812473473764, "grad_norm": 0.48126858472824097, "learning_rate": 2.5923270431604804e-07, "loss": 0.9546, "step": 10839 }, { "epoch": 0.9685705988786383, "grad_norm": 0.524333119392395, "learning_rate": 2.577631484828147e-07, "loss": 0.8945, "step": 10840 }, { "epoch": 0.9686599504099002, "grad_norm": 0.5595930218696594, "learning_rate": 2.5629775909213337e-07, "loss": 0.9558, "step": 10841 }, { "epoch": 0.968749301941162, "grad_norm": 0.507494330406189, "learning_rate": 2.5483653626675043e-07, "loss": 1.0194, "step": 10842 }, { "epoch": 0.9688386534724239, "grad_norm": 0.43305492401123047, "learning_rate": 2.5337948012904566e-07, "loss": 0.9712, "step": 10843 }, { "epoch": 0.9689280050036857, "grad_norm": 0.46509480476379395, "learning_rate": 2.5192659080107704e-07, "loss": 0.9215, "step": 10844 }, { "epoch": 0.9690173565349476, "grad_norm": 0.44424694776535034, "learning_rate": 2.5047786840452504e-07, "loss": 0.9837, "step": 10845 }, { "epoch": 0.9691067080662095, "grad_norm": 0.44861480593681335, "learning_rate": 2.4903331306074804e-07, "loss": 0.9725, "step": 10846 }, { "epoch": 0.9691960595974713, "grad_norm": 0.4651372730731964, "learning_rate": 2.475929248907383e-07, "loss": 0.9143, "step": 10847 }, { "epoch": 0.9692854111287332, "grad_norm": 0.5831353664398193, "learning_rate": 2.4615670401514356e-07, "loss": 0.9379, "step": 10848 }, { "epoch": 0.9693747626599951, "grad_norm": 0.45759260654449463, "learning_rate": 2.4472465055426217e-07, "loss": 0.9993, "step": 10849 }, { "epoch": 0.969464114191257, "grad_norm": 0.6619382500648499, "learning_rate": 2.432967646280426e-07, "loss": 0.8364, "step": 10850 }, { "epoch": 0.9695534657225188, "grad_norm": 0.3795022964477539, "learning_rate": 2.4187304635608923e-07, "loss": 0.9585, "step": 10851 }, { "epoch": 0.9696428172537807, "grad_norm": 0.5020806193351746, "learning_rate": 2.4045349585765097e-07, "loss": 0.9079, "step": 10852 }, { "epoch": 0.9697321687850425, "grad_norm": 0.4140533208847046, "learning_rate": 2.3903811325163283e-07, "loss": 0.9358, "step": 10853 }, { "epoch": 0.9698215203163044, "grad_norm": 0.6230595707893372, "learning_rate": 2.3762689865658438e-07, "loss": 0.8465, "step": 10854 }, { "epoch": 0.9699108718475663, "grad_norm": 0.5708101391792297, "learning_rate": 2.3621985219071108e-07, "loss": 0.8622, "step": 10855 }, { "epoch": 0.9700002233788282, "grad_norm": 0.5391978025436401, "learning_rate": 2.3481697397187418e-07, "loss": 0.905, "step": 10856 }, { "epoch": 0.9700895749100901, "grad_norm": 0.4433526396751404, "learning_rate": 2.334182641175686e-07, "loss": 0.9798, "step": 10857 }, { "epoch": 0.9701789264413518, "grad_norm": 0.669391393661499, "learning_rate": 2.320237227449562e-07, "loss": 0.8246, "step": 10858 }, { "epoch": 0.9702682779726137, "grad_norm": 0.47016438841819763, "learning_rate": 2.3063334997084907e-07, "loss": 0.8787, "step": 10859 }, { "epoch": 0.9703576295038756, "grad_norm": 0.5404382944107056, "learning_rate": 2.2924714591170403e-07, "loss": 0.9036, "step": 10860 }, { "epoch": 0.9704469810351375, "grad_norm": 0.44831082224845886, "learning_rate": 2.2786511068362826e-07, "loss": 0.9216, "step": 10861 }, { "epoch": 0.9705363325663994, "grad_norm": 0.44715404510498047, "learning_rate": 2.2648724440237913e-07, "loss": 0.9903, "step": 10862 }, { "epoch": 0.9706256840976613, "grad_norm": 0.5612984895706177, "learning_rate": 2.2511354718336986e-07, "loss": 0.8568, "step": 10863 }, { "epoch": 0.9707150356289231, "grad_norm": 0.45841625332832336, "learning_rate": 2.2374401914166953e-07, "loss": 0.9588, "step": 10864 }, { "epoch": 0.9708043871601849, "grad_norm": 0.4593586325645447, "learning_rate": 2.2237866039198085e-07, "loss": 0.9923, "step": 10865 }, { "epoch": 0.9708937386914468, "grad_norm": 0.46852678060531616, "learning_rate": 2.2101747104866788e-07, "loss": 0.9463, "step": 10866 }, { "epoch": 0.9709830902227087, "grad_norm": 0.4527819752693176, "learning_rate": 2.1966045122575052e-07, "loss": 0.9759, "step": 10867 }, { "epoch": 0.9710724417539706, "grad_norm": 0.42089274525642395, "learning_rate": 2.1830760103688784e-07, "loss": 0.9125, "step": 10868 }, { "epoch": 0.9711617932852324, "grad_norm": 0.4603272080421448, "learning_rate": 2.1695892059540035e-07, "loss": 0.9218, "step": 10869 }, { "epoch": 0.9712511448164943, "grad_norm": 0.444743275642395, "learning_rate": 2.156144100142532e-07, "loss": 0.9502, "step": 10870 }, { "epoch": 0.9713404963477562, "grad_norm": 0.5526580810546875, "learning_rate": 2.1427406940606187e-07, "loss": 0.9189, "step": 10871 }, { "epoch": 0.971429847879018, "grad_norm": 0.4762706160545349, "learning_rate": 2.1293789888309212e-07, "loss": 0.9151, "step": 10872 }, { "epoch": 0.9715191994102799, "grad_norm": 0.5218154191970825, "learning_rate": 2.116058985572711e-07, "loss": 0.8963, "step": 10873 }, { "epoch": 0.9716085509415417, "grad_norm": 0.5006052851676941, "learning_rate": 2.1027806854015954e-07, "loss": 0.9783, "step": 10874 }, { "epoch": 0.9716979024728036, "grad_norm": 0.45393121242523193, "learning_rate": 2.0895440894297402e-07, "loss": 0.9236, "step": 10875 }, { "epoch": 0.9717872540040655, "grad_norm": 0.5509636998176575, "learning_rate": 2.0763491987659812e-07, "loss": 0.9322, "step": 10876 }, { "epoch": 0.9718766055353274, "grad_norm": 0.49217623472213745, "learning_rate": 2.0631960145154338e-07, "loss": 0.9342, "step": 10877 }, { "epoch": 0.9719659570665893, "grad_norm": 0.48069292306900024, "learning_rate": 2.050084537779884e-07, "loss": 0.9461, "step": 10878 }, { "epoch": 0.972055308597851, "grad_norm": 0.4449180066585541, "learning_rate": 2.0370147696574526e-07, "loss": 0.9321, "step": 10879 }, { "epoch": 0.9721446601291129, "grad_norm": 0.5779398083686829, "learning_rate": 2.0239867112429868e-07, "loss": 0.9169, "step": 10880 }, { "epoch": 0.9722340116603748, "grad_norm": 0.5261817574501038, "learning_rate": 2.0110003636276687e-07, "loss": 0.9289, "step": 10881 }, { "epoch": 0.9723233631916367, "grad_norm": 0.5714827179908752, "learning_rate": 1.9980557278992397e-07, "loss": 0.8583, "step": 10882 }, { "epoch": 0.9724127147228986, "grad_norm": 0.5161953568458557, "learning_rate": 1.9851528051419988e-07, "loss": 0.9715, "step": 10883 }, { "epoch": 0.9725020662541605, "grad_norm": 0.5811159610748291, "learning_rate": 1.9722915964366372e-07, "loss": 0.8745, "step": 10884 }, { "epoch": 0.9725914177854224, "grad_norm": 0.4967767000198364, "learning_rate": 1.95947210286046e-07, "loss": 0.9639, "step": 10885 }, { "epoch": 0.9726807693166841, "grad_norm": 0.538653552532196, "learning_rate": 1.9466943254872193e-07, "loss": 0.9367, "step": 10886 }, { "epoch": 0.972770120847946, "grad_norm": 0.5293512344360352, "learning_rate": 1.9339582653871703e-07, "loss": 0.9583, "step": 10887 }, { "epoch": 0.9728594723792079, "grad_norm": 0.4885563254356384, "learning_rate": 1.9212639236271256e-07, "loss": 0.8537, "step": 10888 }, { "epoch": 0.9729488239104698, "grad_norm": 0.5381324291229248, "learning_rate": 1.908611301270402e-07, "loss": 0.9268, "step": 10889 }, { "epoch": 0.9730381754417317, "grad_norm": 0.550106406211853, "learning_rate": 1.8960003993767073e-07, "loss": 0.9345, "step": 10890 }, { "epoch": 0.9731275269729935, "grad_norm": 0.534343421459198, "learning_rate": 1.8834312190024183e-07, "loss": 0.8775, "step": 10891 }, { "epoch": 0.9732168785042553, "grad_norm": 0.5603974461555481, "learning_rate": 1.8709037612003045e-07, "loss": 0.9408, "step": 10892 }, { "epoch": 0.9733062300355172, "grad_norm": 0.3925410509109497, "learning_rate": 1.8584180270196926e-07, "loss": 0.9445, "step": 10893 }, { "epoch": 0.9733955815667791, "grad_norm": 0.44486695528030396, "learning_rate": 1.8459740175063577e-07, "loss": 0.9394, "step": 10894 }, { "epoch": 0.973484933098041, "grad_norm": 0.5443863868713379, "learning_rate": 1.8335717337026326e-07, "loss": 0.9071, "step": 10895 }, { "epoch": 0.9735742846293028, "grad_norm": 0.504984974861145, "learning_rate": 1.8212111766473528e-07, "loss": 0.8573, "step": 10896 }, { "epoch": 0.9736636361605647, "grad_norm": 0.49120062589645386, "learning_rate": 1.8088923473758568e-07, "loss": 0.929, "step": 10897 }, { "epoch": 0.9737529876918266, "grad_norm": 0.4665651321411133, "learning_rate": 1.7966152469199305e-07, "loss": 0.9986, "step": 10898 }, { "epoch": 0.9738423392230884, "grad_norm": 0.46804356575012207, "learning_rate": 1.7843798763079733e-07, "loss": 0.9408, "step": 10899 }, { "epoch": 0.9739316907543503, "grad_norm": 0.49248629808425903, "learning_rate": 1.7721862365647767e-07, "loss": 0.8993, "step": 10900 }, { "epoch": 0.9740210422856121, "grad_norm": 0.5523046255111694, "learning_rate": 1.7600343287116904e-07, "loss": 0.9051, "step": 10901 }, { "epoch": 0.974110393816874, "grad_norm": 0.46335333585739136, "learning_rate": 1.7479241537666225e-07, "loss": 0.9142, "step": 10902 }, { "epoch": 0.9741997453481359, "grad_norm": 0.4312766194343567, "learning_rate": 1.7358557127438723e-07, "loss": 0.9891, "step": 10903 }, { "epoch": 0.9742890968793978, "grad_norm": 0.4239676594734192, "learning_rate": 1.7238290066543538e-07, "loss": 0.9441, "step": 10904 }, { "epoch": 0.9743784484106597, "grad_norm": 0.4999157190322876, "learning_rate": 1.7118440365053722e-07, "loss": 0.9123, "step": 10905 }, { "epoch": 0.9744677999419215, "grad_norm": 0.5276448130607605, "learning_rate": 1.6999008033007913e-07, "loss": 0.8139, "step": 10906 }, { "epoch": 0.9745571514731833, "grad_norm": 0.44836434721946716, "learning_rate": 1.6879993080410327e-07, "loss": 0.9032, "step": 10907 }, { "epoch": 0.9746465030044452, "grad_norm": 0.49218374490737915, "learning_rate": 1.6761395517230215e-07, "loss": 1.0147, "step": 10908 }, { "epoch": 0.9747358545357071, "grad_norm": 0.4299551248550415, "learning_rate": 1.6643215353400188e-07, "loss": 0.9801, "step": 10909 }, { "epoch": 0.974825206066969, "grad_norm": 0.4688587784767151, "learning_rate": 1.6525452598819547e-07, "loss": 0.8839, "step": 10910 }, { "epoch": 0.9749145575982309, "grad_norm": 0.509926438331604, "learning_rate": 1.6408107263352069e-07, "loss": 0.9189, "step": 10911 }, { "epoch": 0.9750039091294928, "grad_norm": 0.6090724468231201, "learning_rate": 1.6291179356827114e-07, "loss": 0.9022, "step": 10912 }, { "epoch": 0.9750932606607545, "grad_norm": 0.5029346942901611, "learning_rate": 1.617466888903907e-07, "loss": 0.9822, "step": 10913 }, { "epoch": 0.9751826121920164, "grad_norm": 0.594288170337677, "learning_rate": 1.6058575869745686e-07, "loss": 0.8495, "step": 10914 }, { "epoch": 0.9752719637232783, "grad_norm": 0.4893123507499695, "learning_rate": 1.5942900308671405e-07, "loss": 0.9206, "step": 10915 }, { "epoch": 0.9753613152545402, "grad_norm": 0.4604051113128662, "learning_rate": 1.58276422155057e-07, "loss": 0.9293, "step": 10916 }, { "epoch": 0.9754506667858021, "grad_norm": 0.4965328872203827, "learning_rate": 1.5712801599902515e-07, "loss": 0.8888, "step": 10917 }, { "epoch": 0.9755400183170639, "grad_norm": 0.47335171699523926, "learning_rate": 1.5598378471480267e-07, "loss": 0.9663, "step": 10918 }, { "epoch": 0.9756293698483258, "grad_norm": 0.4554191827774048, "learning_rate": 1.548437283982407e-07, "loss": 0.9362, "step": 10919 }, { "epoch": 0.9757187213795876, "grad_norm": 0.4438236951828003, "learning_rate": 1.5370784714482944e-07, "loss": 0.9775, "step": 10920 }, { "epoch": 0.9758080729108495, "grad_norm": 0.5050124526023865, "learning_rate": 1.5257614104970952e-07, "loss": 0.9673, "step": 10921 }, { "epoch": 0.9758974244421114, "grad_norm": 0.4388897716999054, "learning_rate": 1.5144861020767176e-07, "loss": 0.9194, "step": 10922 }, { "epoch": 0.9759867759733732, "grad_norm": 0.6075052618980408, "learning_rate": 1.5032525471316284e-07, "loss": 0.8963, "step": 10923 }, { "epoch": 0.9760761275046351, "grad_norm": 0.4434504806995392, "learning_rate": 1.4920607466026858e-07, "loss": 0.9343, "step": 10924 }, { "epoch": 0.976165479035897, "grad_norm": 0.4541495442390442, "learning_rate": 1.4809107014274182e-07, "loss": 0.9642, "step": 10925 }, { "epoch": 0.9762548305671589, "grad_norm": 0.49039971828460693, "learning_rate": 1.4698024125396892e-07, "loss": 0.9018, "step": 10926 }, { "epoch": 0.9763441820984207, "grad_norm": 0.46627819538116455, "learning_rate": 1.4587358808699215e-07, "loss": 1.0147, "step": 10927 }, { "epoch": 0.9764335336296825, "grad_norm": 0.4772813618183136, "learning_rate": 1.447711107345151e-07, "loss": 0.9074, "step": 10928 }, { "epoch": 0.9765228851609444, "grad_norm": 0.606759250164032, "learning_rate": 1.436728092888695e-07, "loss": 0.8712, "step": 10929 }, { "epoch": 0.9766122366922063, "grad_norm": 0.4552794098854065, "learning_rate": 1.4257868384206508e-07, "loss": 0.9339, "step": 10930 }, { "epoch": 0.9767015882234682, "grad_norm": 0.5520709753036499, "learning_rate": 1.4148873448573408e-07, "loss": 0.869, "step": 10931 }, { "epoch": 0.9767909397547301, "grad_norm": 0.49193331599235535, "learning_rate": 1.4040296131117013e-07, "loss": 0.9923, "step": 10932 }, { "epoch": 0.976880291285992, "grad_norm": 0.5167152881622314, "learning_rate": 1.393213644093283e-07, "loss": 0.8921, "step": 10933 }, { "epoch": 0.9769696428172537, "grad_norm": 0.5435425639152527, "learning_rate": 1.382439438707972e-07, "loss": 0.9036, "step": 10934 }, { "epoch": 0.9770589943485156, "grad_norm": 0.5024846196174622, "learning_rate": 1.3717069978582687e-07, "loss": 0.899, "step": 10935 }, { "epoch": 0.9771483458797775, "grad_norm": 0.4377569854259491, "learning_rate": 1.361016322443065e-07, "loss": 0.9288, "step": 10936 }, { "epoch": 0.9772376974110394, "grad_norm": 0.549490213394165, "learning_rate": 1.350367413357867e-07, "loss": 0.9464, "step": 10937 }, { "epoch": 0.9773270489423013, "grad_norm": 0.5465390086174011, "learning_rate": 1.3397602714946278e-07, "loss": 0.7978, "step": 10938 }, { "epoch": 0.9774164004735632, "grad_norm": 0.4476923942565918, "learning_rate": 1.3291948977418033e-07, "loss": 0.9909, "step": 10939 }, { "epoch": 0.977505752004825, "grad_norm": 0.4725693464279175, "learning_rate": 1.318671292984297e-07, "loss": 0.9513, "step": 10940 }, { "epoch": 0.9775951035360868, "grad_norm": 0.4664715528488159, "learning_rate": 1.3081894581036813e-07, "loss": 0.9367, "step": 10941 }, { "epoch": 0.9776844550673487, "grad_norm": 0.5551668405532837, "learning_rate": 1.297749393977865e-07, "loss": 0.9184, "step": 10942 }, { "epoch": 0.9777738065986106, "grad_norm": 0.44665080308914185, "learning_rate": 1.2873511014813155e-07, "loss": 0.9797, "step": 10943 }, { "epoch": 0.9778631581298725, "grad_norm": 0.5233182907104492, "learning_rate": 1.2769945814850582e-07, "loss": 0.9347, "step": 10944 }, { "epoch": 0.9779525096611343, "grad_norm": 0.48261916637420654, "learning_rate": 1.2666798348564546e-07, "loss": 0.9018, "step": 10945 }, { "epoch": 0.9780418611923962, "grad_norm": 0.5972064733505249, "learning_rate": 1.256406862459536e-07, "loss": 0.9394, "step": 10946 }, { "epoch": 0.9781312127236581, "grad_norm": 0.49780553579330444, "learning_rate": 1.2461756651547807e-07, "loss": 0.9478, "step": 10947 }, { "epoch": 0.9782205642549199, "grad_norm": 0.48487037420272827, "learning_rate": 1.2359862437991144e-07, "loss": 0.8759, "step": 10948 }, { "epoch": 0.9783099157861818, "grad_norm": 0.6491232514381409, "learning_rate": 1.2258385992460764e-07, "loss": 0.8842, "step": 10949 }, { "epoch": 0.9783992673174436, "grad_norm": 0.4678862690925598, "learning_rate": 1.215732732345598e-07, "loss": 0.9332, "step": 10950 }, { "epoch": 0.9784886188487055, "grad_norm": 0.4849478304386139, "learning_rate": 1.205668643944169e-07, "loss": 0.9802, "step": 10951 }, { "epoch": 0.9785779703799674, "grad_norm": 0.5359340310096741, "learning_rate": 1.195646334884726e-07, "loss": 0.9359, "step": 10952 }, { "epoch": 0.9786673219112293, "grad_norm": 0.49691978096961975, "learning_rate": 1.1856658060068193e-07, "loss": 0.9549, "step": 10953 }, { "epoch": 0.9787566734424911, "grad_norm": 0.6014655232429504, "learning_rate": 1.1757270581463364e-07, "loss": 0.9857, "step": 10954 }, { "epoch": 0.978846024973753, "grad_norm": 0.4421977400779724, "learning_rate": 1.1658300921358334e-07, "loss": 0.9145, "step": 10955 }, { "epoch": 0.9789353765050148, "grad_norm": 0.552693247795105, "learning_rate": 1.1559749088042026e-07, "loss": 0.8431, "step": 10956 }, { "epoch": 0.9790247280362767, "grad_norm": 0.46435338258743286, "learning_rate": 1.1461615089770062e-07, "loss": 0.9223, "step": 10957 }, { "epoch": 0.9791140795675386, "grad_norm": 0.4661587178707123, "learning_rate": 1.1363898934761974e-07, "loss": 0.9771, "step": 10958 }, { "epoch": 0.9792034310988005, "grad_norm": 0.5485957264900208, "learning_rate": 1.1266600631202328e-07, "loss": 0.9378, "step": 10959 }, { "epoch": 0.9792927826300624, "grad_norm": 0.5378150939941406, "learning_rate": 1.1169720187240718e-07, "loss": 0.9114, "step": 10960 }, { "epoch": 0.9793821341613241, "grad_norm": 0.5579792261123657, "learning_rate": 1.1073257610991761e-07, "loss": 0.9124, "step": 10961 }, { "epoch": 0.979471485692586, "grad_norm": 0.3841468393802643, "learning_rate": 1.0977212910536217e-07, "loss": 1.0001, "step": 10962 }, { "epoch": 0.9795608372238479, "grad_norm": 0.4615533649921417, "learning_rate": 1.0881586093918205e-07, "loss": 0.918, "step": 10963 }, { "epoch": 0.9796501887551098, "grad_norm": 0.46879342198371887, "learning_rate": 1.0786377169147432e-07, "loss": 0.982, "step": 10964 }, { "epoch": 0.9797395402863717, "grad_norm": 0.41505542397499084, "learning_rate": 1.0691586144199184e-07, "loss": 0.9517, "step": 10965 }, { "epoch": 0.9798288918176336, "grad_norm": 0.5566489696502686, "learning_rate": 1.059721302701211e-07, "loss": 0.8337, "step": 10966 }, { "epoch": 0.9799182433488954, "grad_norm": 0.5348109602928162, "learning_rate": 1.0503257825492108e-07, "loss": 1.0199, "step": 10967 }, { "epoch": 0.9800075948801572, "grad_norm": 0.47354453802108765, "learning_rate": 1.040972054750844e-07, "loss": 0.9287, "step": 10968 }, { "epoch": 0.9800969464114191, "grad_norm": 0.5585243105888367, "learning_rate": 1.0316601200895948e-07, "loss": 0.9726, "step": 10969 }, { "epoch": 0.980186297942681, "grad_norm": 0.5595225095748901, "learning_rate": 1.022389979345395e-07, "loss": 0.8816, "step": 10970 }, { "epoch": 0.9802756494739429, "grad_norm": 0.5292766094207764, "learning_rate": 1.0131616332947346e-07, "loss": 0.9289, "step": 10971 }, { "epoch": 0.9803650010052047, "grad_norm": 0.42903298139572144, "learning_rate": 1.003975082710662e-07, "loss": 0.9523, "step": 10972 }, { "epoch": 0.9804543525364666, "grad_norm": 0.43353918194770813, "learning_rate": 9.948303283625615e-08, "loss": 0.9218, "step": 10973 }, { "epoch": 0.9805437040677285, "grad_norm": 0.6073578596115112, "learning_rate": 9.857273710164871e-08, "loss": 0.9989, "step": 10974 }, { "epoch": 0.9806330555989903, "grad_norm": 0.4935568869113922, "learning_rate": 9.766662114347736e-08, "loss": 0.9325, "step": 10975 }, { "epoch": 0.9807224071302522, "grad_norm": 0.6012499332427979, "learning_rate": 9.676468503765357e-08, "loss": 0.9096, "step": 10976 }, { "epoch": 0.980811758661514, "grad_norm": 0.5922300219535828, "learning_rate": 9.586692885971137e-08, "loss": 0.8704, "step": 10977 }, { "epoch": 0.9809011101927759, "grad_norm": 0.4871740937232971, "learning_rate": 9.49733526848573e-08, "loss": 0.901, "step": 10978 }, { "epoch": 0.9809904617240378, "grad_norm": 0.5010733604431152, "learning_rate": 9.4083956587937e-08, "loss": 0.9744, "step": 10979 }, { "epoch": 0.9810798132552997, "grad_norm": 0.5309439301490784, "learning_rate": 9.319874064344092e-08, "loss": 1.0236, "step": 10980 }, { "epoch": 0.9811691647865616, "grad_norm": 0.542961597442627, "learning_rate": 9.231770492552084e-08, "loss": 0.9089, "step": 10981 }, { "epoch": 0.9812585163178233, "grad_norm": 0.5267024636268616, "learning_rate": 9.144084950796772e-08, "loss": 0.9133, "step": 10982 }, { "epoch": 0.9813478678490852, "grad_norm": 0.48576876521110535, "learning_rate": 9.056817446422839e-08, "loss": 0.9561, "step": 10983 }, { "epoch": 0.9814372193803471, "grad_norm": 0.4452846944332123, "learning_rate": 8.969967986740546e-08, "loss": 1.0614, "step": 10984 }, { "epoch": 0.981526570911609, "grad_norm": 0.5701860785484314, "learning_rate": 8.883536579024077e-08, "loss": 0.8198, "step": 10985 }, { "epoch": 0.9816159224428709, "grad_norm": 0.4147273898124695, "learning_rate": 8.797523230512639e-08, "loss": 0.9686, "step": 10986 }, { "epoch": 0.9817052739741328, "grad_norm": 0.5088819861412048, "learning_rate": 8.711927948411025e-08, "loss": 0.913, "step": 10987 }, { "epoch": 0.9817946255053946, "grad_norm": 0.5437602996826172, "learning_rate": 8.626750739888501e-08, "loss": 1.0007, "step": 10988 }, { "epoch": 0.9818839770366564, "grad_norm": 0.6010596752166748, "learning_rate": 8.541991612080469e-08, "loss": 0.8717, "step": 10989 }, { "epoch": 0.9819733285679183, "grad_norm": 0.44414857029914856, "learning_rate": 8.457650572085141e-08, "loss": 0.9774, "step": 10990 }, { "epoch": 0.9820626800991802, "grad_norm": 0.4728522002696991, "learning_rate": 8.373727626967975e-08, "loss": 0.9735, "step": 10991 }, { "epoch": 0.9821520316304421, "grad_norm": 0.543158233165741, "learning_rate": 8.290222783757795e-08, "loss": 0.8637, "step": 10992 }, { "epoch": 0.982241383161704, "grad_norm": 0.4106288254261017, "learning_rate": 8.20713604944956e-08, "loss": 0.9912, "step": 10993 }, { "epoch": 0.9823307346929658, "grad_norm": 0.40964820981025696, "learning_rate": 8.124467431002148e-08, "loss": 0.9706, "step": 10994 }, { "epoch": 0.9824200862242277, "grad_norm": 0.4523217976093292, "learning_rate": 8.042216935340019e-08, "loss": 0.9801, "step": 10995 }, { "epoch": 0.9825094377554895, "grad_norm": 0.5060840249061584, "learning_rate": 7.960384569353219e-08, "loss": 0.9665, "step": 10996 }, { "epoch": 0.9825987892867514, "grad_norm": 0.5814510583877563, "learning_rate": 7.878970339894598e-08, "loss": 0.9288, "step": 10997 }, { "epoch": 0.9826881408180133, "grad_norm": 0.45717230439186096, "learning_rate": 7.797974253785367e-08, "loss": 0.9257, "step": 10998 }, { "epoch": 0.9827774923492751, "grad_norm": 0.3922685384750366, "learning_rate": 7.717396317808433e-08, "loss": 0.9438, "step": 10999 }, { "epoch": 0.982866843880537, "grad_norm": 0.5810117125511169, "learning_rate": 7.637236538713399e-08, "loss": 0.8851, "step": 11000 }, { "epoch": 0.9829561954117989, "grad_norm": 0.5308172106742859, "learning_rate": 7.557494923214337e-08, "loss": 0.8542, "step": 11001 }, { "epoch": 0.9830455469430608, "grad_norm": 0.48735228180885315, "learning_rate": 7.478171477990902e-08, "loss": 0.9284, "step": 11002 }, { "epoch": 0.9831348984743226, "grad_norm": 0.5343984365463257, "learning_rate": 7.399266209687228e-08, "loss": 0.9664, "step": 11003 }, { "epoch": 0.9832242500055844, "grad_norm": 0.40684744715690613, "learning_rate": 7.32077912491247e-08, "loss": 0.9746, "step": 11004 }, { "epoch": 0.9833136015368463, "grad_norm": 0.4683247208595276, "learning_rate": 7.242710230240257e-08, "loss": 0.9381, "step": 11005 }, { "epoch": 0.9834029530681082, "grad_norm": 0.4208630323410034, "learning_rate": 7.165059532210361e-08, "loss": 0.9214, "step": 11006 }, { "epoch": 0.9834923045993701, "grad_norm": 0.45483431220054626, "learning_rate": 7.087827037325912e-08, "loss": 0.9788, "step": 11007 }, { "epoch": 0.983581656130632, "grad_norm": 0.6926810145378113, "learning_rate": 7.011012752056733e-08, "loss": 0.826, "step": 11008 }, { "epoch": 0.9836710076618939, "grad_norm": 0.4510803818702698, "learning_rate": 6.934616682837125e-08, "loss": 0.9017, "step": 11009 }, { "epoch": 0.9837603591931556, "grad_norm": 0.5115488171577454, "learning_rate": 6.8586388360653e-08, "loss": 0.8919, "step": 11010 }, { "epoch": 0.9838497107244175, "grad_norm": 0.5512319207191467, "learning_rate": 6.783079218105614e-08, "loss": 0.9227, "step": 11011 }, { "epoch": 0.9839390622556794, "grad_norm": 0.5396715998649597, "learning_rate": 6.707937835286893e-08, "loss": 0.9214, "step": 11012 }, { "epoch": 0.9840284137869413, "grad_norm": 0.5042988061904907, "learning_rate": 6.63321469390299e-08, "loss": 1.0184, "step": 11013 }, { "epoch": 0.9841177653182032, "grad_norm": 0.5353363752365112, "learning_rate": 6.558909800212787e-08, "loss": 0.929, "step": 11014 }, { "epoch": 0.984207116849465, "grad_norm": 0.5171821117401123, "learning_rate": 6.485023160440195e-08, "loss": 0.8725, "step": 11015 }, { "epoch": 0.9842964683807269, "grad_norm": 0.44513246417045593, "learning_rate": 6.411554780774154e-08, "loss": 0.9139, "step": 11016 }, { "epoch": 0.9843858199119887, "grad_norm": 0.4491952061653137, "learning_rate": 6.338504667368072e-08, "loss": 0.9752, "step": 11017 }, { "epoch": 0.9844751714432506, "grad_norm": 0.47702059149742126, "learning_rate": 6.265872826340946e-08, "loss": 0.9612, "step": 11018 }, { "epoch": 0.9845645229745125, "grad_norm": 0.6224638223648071, "learning_rate": 6.193659263776242e-08, "loss": 0.9551, "step": 11019 }, { "epoch": 0.9846538745057744, "grad_norm": 0.521059513092041, "learning_rate": 6.121863985722454e-08, "loss": 0.9127, "step": 11020 }, { "epoch": 0.9847432260370362, "grad_norm": 0.4040971100330353, "learning_rate": 6.05048699819366e-08, "loss": 0.9459, "step": 11021 }, { "epoch": 0.9848325775682981, "grad_norm": 0.6411527395248413, "learning_rate": 5.979528307168414e-08, "loss": 0.9753, "step": 11022 }, { "epoch": 0.9849219290995599, "grad_norm": 0.469348669052124, "learning_rate": 5.908987918589737e-08, "loss": 0.883, "step": 11023 }, { "epoch": 0.9850112806308218, "grad_norm": 0.45892199873924255, "learning_rate": 5.838865838366792e-08, "loss": 0.9839, "step": 11024 }, { "epoch": 0.9851006321620837, "grad_norm": 0.5176712870597839, "learning_rate": 5.769162072373213e-08, "loss": 0.9604, "step": 11025 }, { "epoch": 0.9851899836933455, "grad_norm": 0.45386213064193726, "learning_rate": 5.699876626446554e-08, "loss": 0.9464, "step": 11026 }, { "epoch": 0.9852793352246074, "grad_norm": 0.5465309619903564, "learning_rate": 5.6310095063905056e-08, "loss": 0.8722, "step": 11027 }, { "epoch": 0.9853686867558693, "grad_norm": 0.4664804935455322, "learning_rate": 5.562560717973786e-08, "loss": 0.9386, "step": 11028 }, { "epoch": 0.9854580382871312, "grad_norm": 0.4405352771282196, "learning_rate": 5.494530266929032e-08, "loss": 0.9666, "step": 11029 }, { "epoch": 0.985547389818393, "grad_norm": 0.41485795378685, "learning_rate": 5.426918158955574e-08, "loss": 1.0112, "step": 11030 }, { "epoch": 0.9856367413496548, "grad_norm": 0.45383763313293457, "learning_rate": 5.359724399715549e-08, "loss": 1.0, "step": 11031 }, { "epoch": 0.9857260928809167, "grad_norm": 0.44838976860046387, "learning_rate": 5.2929489948377876e-08, "loss": 0.926, "step": 11032 }, { "epoch": 0.9858154444121786, "grad_norm": 0.49654853343963623, "learning_rate": 5.226591949915594e-08, "loss": 0.9731, "step": 11033 }, { "epoch": 0.9859047959434405, "grad_norm": 0.4356088936328888, "learning_rate": 5.1606532705067436e-08, "loss": 1.0049, "step": 11034 }, { "epoch": 0.9859941474747024, "grad_norm": 0.5468194484710693, "learning_rate": 5.0951329621340416e-08, "loss": 0.9498, "step": 11035 }, { "epoch": 0.9860834990059643, "grad_norm": 0.4790467321872711, "learning_rate": 5.0300310302858754e-08, "loss": 0.9816, "step": 11036 }, { "epoch": 0.986172850537226, "grad_norm": 0.4556531012058258, "learning_rate": 4.965347480415106e-08, "loss": 0.9038, "step": 11037 }, { "epoch": 0.9862622020684879, "grad_norm": 0.4872002601623535, "learning_rate": 4.901082317940176e-08, "loss": 0.9294, "step": 11038 }, { "epoch": 0.9863515535997498, "grad_norm": 0.47440412640571594, "learning_rate": 4.837235548242891e-08, "loss": 0.9039, "step": 11039 }, { "epoch": 0.9864409051310117, "grad_norm": 0.616805374622345, "learning_rate": 4.773807176672307e-08, "loss": 0.9557, "step": 11040 }, { "epoch": 0.9865302566622736, "grad_norm": 0.6758015751838684, "learning_rate": 4.710797208540285e-08, "loss": 0.8747, "step": 11041 }, { "epoch": 0.9866196081935354, "grad_norm": 0.5714487433433533, "learning_rate": 4.648205649124826e-08, "loss": 0.9604, "step": 11042 }, { "epoch": 0.9867089597247973, "grad_norm": 0.5039824843406677, "learning_rate": 4.586032503668958e-08, "loss": 0.949, "step": 11043 }, { "epoch": 0.9867983112560591, "grad_norm": 0.45489272475242615, "learning_rate": 4.5242777773801816e-08, "loss": 0.9297, "step": 11044 }, { "epoch": 0.986887662787321, "grad_norm": 0.49669596552848816, "learning_rate": 4.4629414754310264e-08, "loss": 1.0207, "step": 11045 }, { "epoch": 0.9869770143185829, "grad_norm": 0.4577721059322357, "learning_rate": 4.402023602959604e-08, "loss": 0.9492, "step": 11046 }, { "epoch": 0.9870663658498448, "grad_norm": 0.4786894619464874, "learning_rate": 4.34152416506739e-08, "loss": 0.9994, "step": 11047 }, { "epoch": 0.9871557173811066, "grad_norm": 0.49558448791503906, "learning_rate": 4.281443166822552e-08, "loss": 0.928, "step": 11048 }, { "epoch": 0.9872450689123685, "grad_norm": 0.5031788945198059, "learning_rate": 4.221780613257176e-08, "loss": 0.9041, "step": 11049 }, { "epoch": 0.9873344204436304, "grad_norm": 0.4300597310066223, "learning_rate": 4.1625365093689306e-08, "loss": 1.0044, "step": 11050 }, { "epoch": 0.9874237719748922, "grad_norm": 0.5299180746078491, "learning_rate": 4.103710860120513e-08, "loss": 0.9538, "step": 11051 }, { "epoch": 0.9875131235061541, "grad_norm": 0.4451320469379425, "learning_rate": 4.045303670438538e-08, "loss": 0.9107, "step": 11052 }, { "epoch": 0.9876024750374159, "grad_norm": 0.4782625138759613, "learning_rate": 3.987314945215204e-08, "loss": 0.962, "step": 11053 }, { "epoch": 0.9876918265686778, "grad_norm": 0.4866953194141388, "learning_rate": 3.929744689307735e-08, "loss": 0.9365, "step": 11054 }, { "epoch": 0.9877811780999397, "grad_norm": 0.5117444396018982, "learning_rate": 3.872592907538941e-08, "loss": 0.8583, "step": 11055 }, { "epoch": 0.9878705296312016, "grad_norm": 0.4728601574897766, "learning_rate": 3.815859604694993e-08, "loss": 0.8894, "step": 11056 }, { "epoch": 0.9879598811624635, "grad_norm": 0.4523528814315796, "learning_rate": 3.759544785528757e-08, "loss": 1.0043, "step": 11057 }, { "epoch": 0.9880492326937252, "grad_norm": 0.446228951215744, "learning_rate": 3.7036484547564585e-08, "loss": 0.9598, "step": 11058 }, { "epoch": 0.9881385842249871, "grad_norm": 0.5366201996803284, "learning_rate": 3.6481706170599094e-08, "loss": 0.8954, "step": 11059 }, { "epoch": 0.988227935756249, "grad_norm": 0.6366725564002991, "learning_rate": 3.593111277086503e-08, "loss": 0.9405, "step": 11060 }, { "epoch": 0.9883172872875109, "grad_norm": 0.44401809573173523, "learning_rate": 3.538470439448105e-08, "loss": 0.9049, "step": 11061 }, { "epoch": 0.9884066388187728, "grad_norm": 0.48204681277275085, "learning_rate": 3.4842481087216104e-08, "loss": 0.8971, "step": 11062 }, { "epoch": 0.9884959903500347, "grad_norm": 0.4742887616157532, "learning_rate": 3.4304442894478316e-08, "loss": 0.9513, "step": 11063 }, { "epoch": 0.9885853418812965, "grad_norm": 0.49685031175613403, "learning_rate": 3.377058986134274e-08, "loss": 0.9369, "step": 11064 }, { "epoch": 0.9886746934125583, "grad_norm": 0.41819649934768677, "learning_rate": 3.324092203251805e-08, "loss": 0.9412, "step": 11065 }, { "epoch": 0.9887640449438202, "grad_norm": 0.4875706434249878, "learning_rate": 3.271543945237987e-08, "loss": 0.8598, "step": 11066 }, { "epoch": 0.9888533964750821, "grad_norm": 0.6076412200927734, "learning_rate": 3.219414216493188e-08, "loss": 0.9209, "step": 11067 }, { "epoch": 0.988942748006344, "grad_norm": 0.5355422496795654, "learning_rate": 3.167703021384471e-08, "loss": 0.9449, "step": 11068 }, { "epoch": 0.9890320995376058, "grad_norm": 0.48543617129325867, "learning_rate": 3.1164103642428165e-08, "loss": 0.9368, "step": 11069 }, { "epoch": 0.9891214510688677, "grad_norm": 0.4472845494747162, "learning_rate": 3.0655362493647865e-08, "loss": 0.9731, "step": 11070 }, { "epoch": 0.9892108026001296, "grad_norm": 0.4854414761066437, "learning_rate": 3.015080681011972e-08, "loss": 0.9416, "step": 11071 }, { "epoch": 0.9893001541313914, "grad_norm": 0.5297408103942871, "learning_rate": 2.965043663409883e-08, "loss": 0.8901, "step": 11072 }, { "epoch": 0.9893895056626533, "grad_norm": 0.5325205326080322, "learning_rate": 2.9154252007496108e-08, "loss": 0.8408, "step": 11073 }, { "epoch": 0.9894788571939152, "grad_norm": 0.42406973242759705, "learning_rate": 2.86622529718783e-08, "loss": 0.9288, "step": 11074 }, { "epoch": 0.989568208725177, "grad_norm": 0.49581971764564514, "learning_rate": 2.817443956845689e-08, "loss": 0.9434, "step": 11075 }, { "epoch": 0.9896575602564389, "grad_norm": 0.5132964849472046, "learning_rate": 2.769081183808253e-08, "loss": 0.9745, "step": 11076 }, { "epoch": 0.9897469117877008, "grad_norm": 0.4875660538673401, "learning_rate": 2.7211369821272816e-08, "loss": 0.9265, "step": 11077 }, { "epoch": 0.9898362633189627, "grad_norm": 0.46364524960517883, "learning_rate": 2.6736113558178954e-08, "loss": 0.9082, "step": 11078 }, { "epoch": 0.9899256148502245, "grad_norm": 0.4320114254951477, "learning_rate": 2.626504308861355e-08, "loss": 0.8796, "step": 11079 }, { "epoch": 0.9900149663814863, "grad_norm": 0.471098393201828, "learning_rate": 2.5798158452033927e-08, "loss": 0.8859, "step": 11080 }, { "epoch": 0.9901043179127482, "grad_norm": 0.5627569556236267, "learning_rate": 2.533545968754214e-08, "loss": 0.8719, "step": 11081 }, { "epoch": 0.9901936694440101, "grad_norm": 0.4802547097206116, "learning_rate": 2.4876946833901628e-08, "loss": 0.949, "step": 11082 }, { "epoch": 0.990283020975272, "grad_norm": 0.4991259276866913, "learning_rate": 2.442261992950945e-08, "loss": 0.8312, "step": 11083 }, { "epoch": 0.9903723725065339, "grad_norm": 0.4693763256072998, "learning_rate": 2.3972479012429605e-08, "loss": 0.9392, "step": 11084 }, { "epoch": 0.9904617240377956, "grad_norm": 0.6424983143806458, "learning_rate": 2.3526524120354164e-08, "loss": 0.8377, "step": 11085 }, { "epoch": 0.9905510755690575, "grad_norm": 0.601347029209137, "learning_rate": 2.3084755290647685e-08, "loss": 0.8502, "step": 11086 }, { "epoch": 0.9906404271003194, "grad_norm": 0.43913698196411133, "learning_rate": 2.264717256030835e-08, "loss": 0.9496, "step": 11087 }, { "epoch": 0.9907297786315813, "grad_norm": 0.6155788898468018, "learning_rate": 2.2213775965984617e-08, "loss": 0.9325, "step": 11088 }, { "epoch": 0.9908191301628432, "grad_norm": 0.48678359389305115, "learning_rate": 2.1784565543986337e-08, "loss": 0.9062, "step": 11089 }, { "epoch": 0.9909084816941051, "grad_norm": 0.5303758978843689, "learning_rate": 2.135954133025697e-08, "loss": 0.8908, "step": 11090 }, { "epoch": 0.9909978332253669, "grad_norm": 0.4228883385658264, "learning_rate": 2.093870336040138e-08, "loss": 0.9731, "step": 11091 }, { "epoch": 0.9910871847566287, "grad_norm": 0.5669239163398743, "learning_rate": 2.052205166966914e-08, "loss": 0.9193, "step": 11092 }, { "epoch": 0.9911765362878906, "grad_norm": 0.5962837338447571, "learning_rate": 2.0109586292960116e-08, "loss": 0.8922, "step": 11093 }, { "epoch": 0.9912658878191525, "grad_norm": 0.47855356335639954, "learning_rate": 1.9701307264818893e-08, "loss": 0.8838, "step": 11094 }, { "epoch": 0.9913552393504144, "grad_norm": 0.5598346590995789, "learning_rate": 1.929721461944034e-08, "loss": 0.8942, "step": 11095 }, { "epoch": 0.9914445908816762, "grad_norm": 0.45872122049331665, "learning_rate": 1.889730839068071e-08, "loss": 1.0056, "step": 11096 }, { "epoch": 0.9915339424129381, "grad_norm": 0.5087199807167053, "learning_rate": 1.8501588612029887e-08, "loss": 0.8756, "step": 11097 }, { "epoch": 0.9916232939442, "grad_norm": 0.4826347529888153, "learning_rate": 1.8110055316633567e-08, "loss": 0.8969, "step": 11098 }, { "epoch": 0.9917126454754618, "grad_norm": 0.4179272949695587, "learning_rate": 1.7722708537293296e-08, "loss": 0.9523, "step": 11099 }, { "epoch": 0.9918019970067237, "grad_norm": 0.44711118936538696, "learning_rate": 1.7339548306449794e-08, "loss": 0.9084, "step": 11100 }, { "epoch": 0.9918913485379856, "grad_norm": 0.4555767774581909, "learning_rate": 1.69605746561885e-08, "loss": 0.9573, "step": 11101 }, { "epoch": 0.9919807000692474, "grad_norm": 0.5947062373161316, "learning_rate": 1.6585787618267346e-08, "loss": 0.8424, "step": 11102 }, { "epoch": 0.9920700516005093, "grad_norm": 0.4358064532279968, "learning_rate": 1.6215187224066787e-08, "loss": 0.911, "step": 11103 }, { "epoch": 0.9921594031317712, "grad_norm": 0.5996667742729187, "learning_rate": 1.5848773504634207e-08, "loss": 0.8718, "step": 11104 }, { "epoch": 0.9922487546630331, "grad_norm": 0.4151008129119873, "learning_rate": 1.5486546490661724e-08, "loss": 0.9816, "step": 11105 }, { "epoch": 0.9923381061942949, "grad_norm": 0.5637631416320801, "learning_rate": 1.5128506212486183e-08, "loss": 0.9081, "step": 11106 }, { "epoch": 0.9924274577255567, "grad_norm": 0.46075546741485596, "learning_rate": 1.4774652700100256e-08, "loss": 0.8814, "step": 11107 }, { "epoch": 0.9925168092568186, "grad_norm": 0.46446409821510315, "learning_rate": 1.4424985983141348e-08, "loss": 0.9606, "step": 11108 }, { "epoch": 0.9926061607880805, "grad_norm": 0.4711821377277374, "learning_rate": 1.4079506090891592e-08, "loss": 0.932, "step": 11109 }, { "epoch": 0.9926955123193424, "grad_norm": 0.5269238352775574, "learning_rate": 1.3738213052300053e-08, "loss": 0.8762, "step": 11110 }, { "epoch": 0.9927848638506043, "grad_norm": 0.4487016499042511, "learning_rate": 1.3401106895949422e-08, "loss": 0.9347, "step": 11111 }, { "epoch": 0.9928742153818662, "grad_norm": 0.6086486577987671, "learning_rate": 1.3068187650072672e-08, "loss": 0.8741, "step": 11112 }, { "epoch": 0.9929635669131279, "grad_norm": 0.540021538734436, "learning_rate": 1.2739455342558603e-08, "loss": 0.8542, "step": 11113 }, { "epoch": 0.9930529184443898, "grad_norm": 0.43035051226615906, "learning_rate": 1.241491000094075e-08, "loss": 0.8728, "step": 11114 }, { "epoch": 0.9931422699756517, "grad_norm": 0.4739319980144501, "learning_rate": 1.2094551652402919e-08, "loss": 0.9627, "step": 11115 }, { "epoch": 0.9932316215069136, "grad_norm": 0.42306095361709595, "learning_rate": 1.1778380323779203e-08, "loss": 0.9101, "step": 11116 }, { "epoch": 0.9933209730381755, "grad_norm": 0.46345555782318115, "learning_rate": 1.1466396041553973e-08, "loss": 0.8924, "step": 11117 }, { "epoch": 0.9934103245694373, "grad_norm": 0.4990481734275818, "learning_rate": 1.1158598831856326e-08, "loss": 0.9019, "step": 11118 }, { "epoch": 0.9934996761006992, "grad_norm": 0.4848730266094208, "learning_rate": 1.0854988720471193e-08, "loss": 0.9512, "step": 11119 }, { "epoch": 0.993589027631961, "grad_norm": 0.446789413690567, "learning_rate": 1.0555565732822681e-08, "loss": 0.951, "step": 11120 }, { "epoch": 0.9936783791632229, "grad_norm": 0.5458630323410034, "learning_rate": 1.0260329893996278e-08, "loss": 0.9909, "step": 11121 }, { "epoch": 0.9937677306944848, "grad_norm": 0.4132474660873413, "learning_rate": 9.969281228722204e-09, "loss": 0.9564, "step": 11122 }, { "epoch": 0.9938570822257466, "grad_norm": 0.47372639179229736, "learning_rate": 9.682419761369855e-09, "loss": 0.9558, "step": 11123 }, { "epoch": 0.9939464337570085, "grad_norm": 0.48265764117240906, "learning_rate": 9.399745515981106e-09, "loss": 0.94, "step": 11124 }, { "epoch": 0.9940357852882704, "grad_norm": 0.4469437003135681, "learning_rate": 9.121258516220366e-09, "loss": 0.9356, "step": 11125 }, { "epoch": 0.9941251368195323, "grad_norm": 0.553846538066864, "learning_rate": 8.846958785418968e-09, "loss": 0.941, "step": 11126 }, { "epoch": 0.9942144883507941, "grad_norm": 0.46764692664146423, "learning_rate": 8.57684634655298e-09, "loss": 0.981, "step": 11127 }, { "epoch": 0.994303839882056, "grad_norm": 0.5349944233894348, "learning_rate": 8.310921222243195e-09, "loss": 0.9157, "step": 11128 }, { "epoch": 0.9943931914133178, "grad_norm": 0.47920531034469604, "learning_rate": 8.04918343477179e-09, "loss": 0.9246, "step": 11129 }, { "epoch": 0.9944825429445797, "grad_norm": 0.4930896759033203, "learning_rate": 7.791633006054567e-09, "loss": 0.8867, "step": 11130 }, { "epoch": 0.9945718944758416, "grad_norm": 0.5266461372375488, "learning_rate": 7.538269957668709e-09, "loss": 0.8601, "step": 11131 }, { "epoch": 0.9946612460071035, "grad_norm": 0.5032413601875305, "learning_rate": 7.2890943108305796e-09, "loss": 0.9025, "step": 11132 }, { "epoch": 0.9947505975383654, "grad_norm": 0.49191898107528687, "learning_rate": 7.0441060864179235e-09, "loss": 0.8893, "step": 11133 }, { "epoch": 0.9948399490696271, "grad_norm": 0.46208131313323975, "learning_rate": 6.80330530494766e-09, "loss": 0.876, "step": 11134 }, { "epoch": 0.994929300600889, "grad_norm": 0.5641262531280518, "learning_rate": 6.566691986592543e-09, "loss": 0.889, "step": 11135 }, { "epoch": 0.9950186521321509, "grad_norm": 0.4556792080402374, "learning_rate": 6.334266151164503e-09, "loss": 0.933, "step": 11136 }, { "epoch": 0.9951080036634128, "grad_norm": 0.439849317073822, "learning_rate": 6.106027818136851e-09, "loss": 0.9767, "step": 11137 }, { "epoch": 0.9951973551946747, "grad_norm": 0.5620716214179993, "learning_rate": 5.881977006622075e-09, "loss": 0.8875, "step": 11138 }, { "epoch": 0.9952867067259366, "grad_norm": 0.4618760943412781, "learning_rate": 5.662113735394048e-09, "loss": 0.9241, "step": 11139 }, { "epoch": 0.9953760582571984, "grad_norm": 0.4364456236362457, "learning_rate": 5.4464380228658184e-09, "loss": 0.8809, "step": 11140 }, { "epoch": 0.9954654097884602, "grad_norm": 0.5377599000930786, "learning_rate": 5.234949887106266e-09, "loss": 0.9302, "step": 11141 }, { "epoch": 0.9955547613197221, "grad_norm": 0.4686380624771118, "learning_rate": 5.0276493458178976e-09, "loss": 0.9593, "step": 11142 }, { "epoch": 0.995644112850984, "grad_norm": 0.4569147527217865, "learning_rate": 4.8245364163757025e-09, "loss": 0.8769, "step": 11143 }, { "epoch": 0.9957334643822459, "grad_norm": 0.6316559910774231, "learning_rate": 4.6256111157882976e-09, "loss": 0.9268, "step": 11144 }, { "epoch": 0.9958228159135077, "grad_norm": 0.6083618402481079, "learning_rate": 4.430873460720131e-09, "loss": 0.8524, "step": 11145 }, { "epoch": 0.9959121674447696, "grad_norm": 0.42265886068344116, "learning_rate": 4.2403234674803785e-09, "loss": 0.9046, "step": 11146 }, { "epoch": 0.9960015189760314, "grad_norm": 0.4713272750377655, "learning_rate": 4.053961152028496e-09, "loss": 0.9119, "step": 11147 }, { "epoch": 0.9960908705072933, "grad_norm": 0.5426056385040283, "learning_rate": 3.871786529974219e-09, "loss": 0.901, "step": 11148 }, { "epoch": 0.9961802220385552, "grad_norm": 0.46449586749076843, "learning_rate": 3.6937996165831156e-09, "loss": 0.9605, "step": 11149 }, { "epoch": 0.996269573569817, "grad_norm": 0.4397094249725342, "learning_rate": 3.5200004267543773e-09, "loss": 0.9556, "step": 11150 }, { "epoch": 0.9963589251010789, "grad_norm": 0.47000452876091003, "learning_rate": 3.3503889750485796e-09, "loss": 0.9636, "step": 11151 }, { "epoch": 0.9964482766323408, "grad_norm": 0.48919039964675903, "learning_rate": 3.184965275676577e-09, "loss": 0.8975, "step": 11152 }, { "epoch": 0.9965376281636027, "grad_norm": 0.46157464385032654, "learning_rate": 3.0237293424939527e-09, "loss": 0.8855, "step": 11153 }, { "epoch": 0.9966269796948645, "grad_norm": 0.4459165036678314, "learning_rate": 2.8666811890010193e-09, "loss": 0.9519, "step": 11154 }, { "epoch": 0.9967163312261264, "grad_norm": 0.5475977063179016, "learning_rate": 2.713820828353919e-09, "loss": 0.8692, "step": 11155 }, { "epoch": 0.9968056827573882, "grad_norm": 0.40559154748916626, "learning_rate": 2.5651482733535237e-09, "loss": 0.9232, "step": 11156 }, { "epoch": 0.9968950342886501, "grad_norm": 0.4425158202648163, "learning_rate": 2.4206635364620867e-09, "loss": 0.916, "step": 11157 }, { "epoch": 0.996984385819912, "grad_norm": 0.4523138105869293, "learning_rate": 2.2803666297754876e-09, "loss": 0.8618, "step": 11158 }, { "epoch": 0.9970737373511739, "grad_norm": 0.5861170291900635, "learning_rate": 2.144257565045438e-09, "loss": 0.8668, "step": 11159 }, { "epoch": 0.9971630888824358, "grad_norm": 0.5498065948486328, "learning_rate": 2.012336353668376e-09, "loss": 0.8823, "step": 11160 }, { "epoch": 0.9972524404136975, "grad_norm": 0.5954715013504028, "learning_rate": 1.884603006702124e-09, "loss": 0.8704, "step": 11161 }, { "epoch": 0.9973417919449594, "grad_norm": 0.5423356294631958, "learning_rate": 1.7610575348436798e-09, "loss": 1.0066, "step": 11162 }, { "epoch": 0.9974311434762213, "grad_norm": 0.51182621717453, "learning_rate": 1.6416999484347716e-09, "loss": 0.9438, "step": 11163 }, { "epoch": 0.9975204950074832, "grad_norm": 0.5101816654205322, "learning_rate": 1.5265302574785089e-09, "loss": 1.0102, "step": 11164 }, { "epoch": 0.9976098465387451, "grad_norm": 0.44059595465660095, "learning_rate": 1.4155484716227296e-09, "loss": 0.9013, "step": 11165 }, { "epoch": 0.997699198070007, "grad_norm": 0.4549104869365692, "learning_rate": 1.3087546001600004e-09, "loss": 0.9969, "step": 11166 }, { "epoch": 0.9977885496012688, "grad_norm": 0.6448225378990173, "learning_rate": 1.2061486520387188e-09, "loss": 0.8289, "step": 11167 }, { "epoch": 0.9978779011325306, "grad_norm": 0.5743524432182312, "learning_rate": 1.1077306358520112e-09, "loss": 0.8468, "step": 11168 }, { "epoch": 0.9979672526637925, "grad_norm": 0.5093139410018921, "learning_rate": 1.0135005598432835e-09, "loss": 0.9001, "step": 11169 }, { "epoch": 0.9980566041950544, "grad_norm": 0.5424085855484009, "learning_rate": 9.2345843190067e-10, "loss": 0.8673, "step": 11170 }, { "epoch": 0.9981459557263163, "grad_norm": 0.4926496744155884, "learning_rate": 8.376042595736877e-10, "loss": 0.9348, "step": 11171 }, { "epoch": 0.9982353072575781, "grad_norm": 0.5678501129150391, "learning_rate": 7.559380500454794e-10, "loss": 0.9215, "step": 11172 }, { "epoch": 0.99832465878884, "grad_norm": 0.5548151135444641, "learning_rate": 6.784598101661211e-10, "loss": 0.9721, "step": 11173 }, { "epoch": 0.9984140103201019, "grad_norm": 0.5497487783432007, "learning_rate": 6.051695464193152e-10, "loss": 0.8651, "step": 11174 }, { "epoch": 0.9985033618513637, "grad_norm": 0.6130139827728271, "learning_rate": 5.360672649445952e-10, "loss": 0.8772, "step": 11175 }, { "epoch": 0.9985927133826256, "grad_norm": 0.6136727333068848, "learning_rate": 4.711529715262231e-10, "loss": 0.7813, "step": 11176 }, { "epoch": 0.9986820649138874, "grad_norm": 0.5682715773582458, "learning_rate": 4.104266716098426e-10, "loss": 0.9652, "step": 11177 }, { "epoch": 0.9987714164451493, "grad_norm": 0.4911440908908844, "learning_rate": 3.538883702747242e-10, "loss": 0.9506, "step": 11178 }, { "epoch": 0.9988607679764112, "grad_norm": 0.48630785942077637, "learning_rate": 3.015380722615202e-10, "loss": 1.0029, "step": 11179 }, { "epoch": 0.9989501195076731, "grad_norm": 0.4619069993495941, "learning_rate": 2.533757819556115e-10, "loss": 0.9795, "step": 11180 }, { "epoch": 0.999039471038935, "grad_norm": 0.47902753949165344, "learning_rate": 2.0940150338155662e-10, "loss": 0.9322, "step": 11181 }, { "epoch": 0.9991288225701968, "grad_norm": 0.4930683970451355, "learning_rate": 1.6961524023639819e-10, "loss": 0.9295, "step": 11182 }, { "epoch": 0.9992181741014586, "grad_norm": 0.6377449035644531, "learning_rate": 1.3401699583970306e-10, "loss": 0.8337, "step": 11183 }, { "epoch": 0.9993075256327205, "grad_norm": 0.47255298495292664, "learning_rate": 1.026067731835223e-10, "loss": 0.9339, "step": 11184 }, { "epoch": 0.9993968771639824, "grad_norm": 0.5022866129875183, "learning_rate": 7.538457489353334e-11, "loss": 0.892, "step": 11185 }, { "epoch": 0.9994862286952443, "grad_norm": 0.47073841094970703, "learning_rate": 5.2350403251244516e-11, "loss": 0.9153, "step": 11186 }, { "epoch": 0.9995755802265062, "grad_norm": 0.5592496395111084, "learning_rate": 3.3504260182892765e-11, "loss": 0.9546, "step": 11187 }, { "epoch": 0.999664931757768, "grad_norm": 0.41335591673851013, "learning_rate": 1.8846147276097014e-11, "loss": 0.9724, "step": 11188 }, { "epoch": 0.9997542832890298, "grad_norm": 0.6354795694351196, "learning_rate": 8.376065746551476e-12, "loss": 0.8885, "step": 11189 }, { "epoch": 0.9998436348202917, "grad_norm": 0.4729311466217041, "learning_rate": 2.094016482434569e-12, "loss": 0.897, "step": 11190 }, { "epoch": 0.9999329863515536, "grad_norm": 0.5076917409896851, "learning_rate": 0.0, "loss": 0.9054, "step": 11191 } ], "logging_steps": 1.0, "max_steps": 11191, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.597312254069965e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }