diff --git "a/marques/checkpoint-9500/trainer_state.json" "b/marques/checkpoint-9500/trainer_state.json" deleted file mode 100644--- "a/marques/checkpoint-9500/trainer_state.json" +++ /dev/null @@ -1,66534 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.3334561856122431, - "eval_steps": 500, - "global_step": 9500, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 3.510065111707822e-05, - "grad_norm": 1.3214704990386963, - "learning_rate": 0.0, - "loss": 2.5456, - "step": 1 - }, - { - "epoch": 7.020130223415644e-05, - "grad_norm": 1.3258496522903442, - "learning_rate": 1.6666666666666667e-08, - "loss": 2.5931, - "step": 2 - }, - { - "epoch": 0.00010530195335123467, - "grad_norm": 1.3362666368484497, - "learning_rate": 3.3333333333333334e-08, - "loss": 2.6118, - "step": 3 - }, - { - "epoch": 0.0001404026044683129, - "grad_norm": 1.352743148803711, - "learning_rate": 5.0000000000000004e-08, - "loss": 2.6585, - "step": 4 - }, - { - "epoch": 0.0001755032555853911, - "grad_norm": 1.3314776420593262, - "learning_rate": 6.666666666666667e-08, - "loss": 2.5445, - "step": 5 - }, - { - "epoch": 0.00021060390670246933, - "grad_norm": 1.3044579029083252, - "learning_rate": 8.333333333333334e-08, - "loss": 2.6963, - "step": 6 - }, - { - "epoch": 0.00024570455781954757, - "grad_norm": 1.3040300607681274, - "learning_rate": 1.0000000000000001e-07, - "loss": 2.6423, - "step": 7 - }, - { - "epoch": 0.0002808052089366258, - "grad_norm": 1.5400934219360352, - "learning_rate": 1.1666666666666668e-07, - "loss": 2.688, - "step": 8 - }, - { - "epoch": 0.000315905860053704, - "grad_norm": 1.3288710117340088, - "learning_rate": 1.3333333333333334e-07, - "loss": 2.7358, - "step": 9 - }, - { - "epoch": 0.0003510065111707822, - "grad_norm": 1.3219934701919556, - "learning_rate": 1.5000000000000002e-07, - "loss": 2.4848, - "step": 10 - }, - { - "epoch": 0.00038610716228786046, - "grad_norm": 1.4280380010604858, - "learning_rate": 1.6666666666666668e-07, - "loss": 2.6836, - "step": 11 - }, - { - "epoch": 0.00042120781340493867, - "grad_norm": 1.3219069242477417, - "learning_rate": 1.8333333333333333e-07, - "loss": 2.663, - "step": 12 - }, - { - "epoch": 0.0004563084645220169, - "grad_norm": 1.3635289669036865, - "learning_rate": 2.0000000000000002e-07, - "loss": 2.6805, - "step": 13 - }, - { - "epoch": 0.0004914091156390951, - "grad_norm": 1.394425630569458, - "learning_rate": 2.1666666666666667e-07, - "loss": 2.553, - "step": 14 - }, - { - "epoch": 0.0005265097667561733, - "grad_norm": 1.317286729812622, - "learning_rate": 2.3333333333333336e-07, - "loss": 2.5429, - "step": 15 - }, - { - "epoch": 0.0005616104178732516, - "grad_norm": 1.3305565118789673, - "learning_rate": 2.5000000000000004e-07, - "loss": 2.6884, - "step": 16 - }, - { - "epoch": 0.0005967110689903298, - "grad_norm": 1.3051375150680542, - "learning_rate": 2.6666666666666667e-07, - "loss": 2.6097, - "step": 17 - }, - { - "epoch": 0.000631811720107408, - "grad_norm": 1.308965802192688, - "learning_rate": 2.8333333333333336e-07, - "loss": 2.4697, - "step": 18 - }, - { - "epoch": 0.0006669123712244862, - "grad_norm": 1.3310776948928833, - "learning_rate": 3.0000000000000004e-07, - "loss": 2.4734, - "step": 19 - }, - { - "epoch": 0.0007020130223415644, - "grad_norm": 1.4670143127441406, - "learning_rate": 3.1666666666666667e-07, - "loss": 2.7448, - "step": 20 - }, - { - "epoch": 0.0007371136734586427, - "grad_norm": 1.303655982017517, - "learning_rate": 3.3333333333333335e-07, - "loss": 2.6613, - "step": 21 - }, - { - "epoch": 0.0007722143245757209, - "grad_norm": 1.3009381294250488, - "learning_rate": 3.5000000000000004e-07, - "loss": 2.6894, - "step": 22 - }, - { - "epoch": 0.0008073149756927991, - "grad_norm": 1.5533232688903809, - "learning_rate": 3.6666666666666667e-07, - "loss": 2.628, - "step": 23 - }, - { - "epoch": 0.0008424156268098773, - "grad_norm": 1.3447502851486206, - "learning_rate": 3.8333333333333335e-07, - "loss": 2.6114, - "step": 24 - }, - { - "epoch": 0.0008775162779269556, - "grad_norm": 1.3052018880844116, - "learning_rate": 4.0000000000000003e-07, - "loss": 2.6201, - "step": 25 - }, - { - "epoch": 0.0009126169290440338, - "grad_norm": 1.3401448726654053, - "learning_rate": 4.1666666666666667e-07, - "loss": 2.6625, - "step": 26 - }, - { - "epoch": 0.000947717580161112, - "grad_norm": 1.3576993942260742, - "learning_rate": 4.3333333333333335e-07, - "loss": 2.7129, - "step": 27 - }, - { - "epoch": 0.0009828182312781903, - "grad_norm": 1.327500581741333, - "learning_rate": 4.5e-07, - "loss": 2.676, - "step": 28 - }, - { - "epoch": 0.0010179188823952684, - "grad_norm": 1.4109827280044556, - "learning_rate": 4.666666666666667e-07, - "loss": 2.7424, - "step": 29 - }, - { - "epoch": 0.0010530195335123466, - "grad_norm": 1.4408026933670044, - "learning_rate": 4.833333333333334e-07, - "loss": 2.7228, - "step": 30 - }, - { - "epoch": 0.001088120184629425, - "grad_norm": 1.3278839588165283, - "learning_rate": 5.000000000000001e-07, - "loss": 2.5503, - "step": 31 - }, - { - "epoch": 0.0011232208357465031, - "grad_norm": 1.2992643117904663, - "learning_rate": 5.166666666666667e-07, - "loss": 2.5652, - "step": 32 - }, - { - "epoch": 0.0011583214868635813, - "grad_norm": 1.3145183324813843, - "learning_rate": 5.333333333333333e-07, - "loss": 2.6206, - "step": 33 - }, - { - "epoch": 0.0011934221379806596, - "grad_norm": 1.3844133615493774, - "learning_rate": 5.5e-07, - "loss": 2.7305, - "step": 34 - }, - { - "epoch": 0.0012285227890977378, - "grad_norm": 1.5124484300613403, - "learning_rate": 5.666666666666667e-07, - "loss": 2.7045, - "step": 35 - }, - { - "epoch": 0.001263623440214816, - "grad_norm": 1.3393868207931519, - "learning_rate": 5.833333333333334e-07, - "loss": 2.6123, - "step": 36 - }, - { - "epoch": 0.001298724091331894, - "grad_norm": 1.3525620698928833, - "learning_rate": 6.000000000000001e-07, - "loss": 2.5713, - "step": 37 - }, - { - "epoch": 0.0013338247424489725, - "grad_norm": 1.2927671670913696, - "learning_rate": 6.166666666666667e-07, - "loss": 2.6034, - "step": 38 - }, - { - "epoch": 0.0013689253935660506, - "grad_norm": 1.3792005777359009, - "learning_rate": 6.333333333333333e-07, - "loss": 2.5869, - "step": 39 - }, - { - "epoch": 0.0014040260446831288, - "grad_norm": 1.3556491136550903, - "learning_rate": 6.5e-07, - "loss": 2.5316, - "step": 40 - }, - { - "epoch": 0.0014391266958002072, - "grad_norm": 1.3625715970993042, - "learning_rate": 6.666666666666667e-07, - "loss": 2.6613, - "step": 41 - }, - { - "epoch": 0.0014742273469172853, - "grad_norm": 1.3567153215408325, - "learning_rate": 6.833333333333334e-07, - "loss": 2.6499, - "step": 42 - }, - { - "epoch": 0.0015093279980343635, - "grad_norm": 1.42324960231781, - "learning_rate": 7.000000000000001e-07, - "loss": 2.6099, - "step": 43 - }, - { - "epoch": 0.0015444286491514418, - "grad_norm": 1.3361579179763794, - "learning_rate": 7.166666666666667e-07, - "loss": 2.5485, - "step": 44 - }, - { - "epoch": 0.00157952930026852, - "grad_norm": 1.35533607006073, - "learning_rate": 7.333333333333333e-07, - "loss": 2.5609, - "step": 45 - }, - { - "epoch": 0.0016146299513855981, - "grad_norm": 1.3744893074035645, - "learning_rate": 7.5e-07, - "loss": 2.6161, - "step": 46 - }, - { - "epoch": 0.0016497306025026765, - "grad_norm": 1.3804478645324707, - "learning_rate": 7.666666666666667e-07, - "loss": 2.7962, - "step": 47 - }, - { - "epoch": 0.0016848312536197547, - "grad_norm": 1.4268383979797363, - "learning_rate": 7.833333333333333e-07, - "loss": 2.5146, - "step": 48 - }, - { - "epoch": 0.0017199319047368328, - "grad_norm": 1.4491099119186401, - "learning_rate": 8.000000000000001e-07, - "loss": 2.6346, - "step": 49 - }, - { - "epoch": 0.0017550325558539112, - "grad_norm": 1.4292728900909424, - "learning_rate": 8.166666666666666e-07, - "loss": 2.7786, - "step": 50 - }, - { - "epoch": 0.0017901332069709894, - "grad_norm": 1.3818882703781128, - "learning_rate": 8.333333333333333e-07, - "loss": 2.5806, - "step": 51 - }, - { - "epoch": 0.0018252338580880675, - "grad_norm": 1.411785364151001, - "learning_rate": 8.500000000000001e-07, - "loss": 2.6666, - "step": 52 - }, - { - "epoch": 0.0018603345092051457, - "grad_norm": 1.4511009454727173, - "learning_rate": 8.666666666666667e-07, - "loss": 2.5428, - "step": 53 - }, - { - "epoch": 0.001895435160322224, - "grad_norm": 1.3422091007232666, - "learning_rate": 8.833333333333334e-07, - "loss": 2.6229, - "step": 54 - }, - { - "epoch": 0.0019305358114393022, - "grad_norm": 1.3750977516174316, - "learning_rate": 9e-07, - "loss": 2.64, - "step": 55 - }, - { - "epoch": 0.0019656364625563806, - "grad_norm": 1.3546466827392578, - "learning_rate": 9.166666666666667e-07, - "loss": 2.5125, - "step": 56 - }, - { - "epoch": 0.0020007371136734585, - "grad_norm": 1.3342788219451904, - "learning_rate": 9.333333333333334e-07, - "loss": 2.5709, - "step": 57 - }, - { - "epoch": 0.002035837764790537, - "grad_norm": 1.4129695892333984, - "learning_rate": 9.5e-07, - "loss": 2.6767, - "step": 58 - }, - { - "epoch": 0.0020709384159076152, - "grad_norm": 1.3398574590682983, - "learning_rate": 9.666666666666668e-07, - "loss": 2.5676, - "step": 59 - }, - { - "epoch": 0.002106039067024693, - "grad_norm": 1.4062950611114502, - "learning_rate": 9.833333333333334e-07, - "loss": 2.6757, - "step": 60 - }, - { - "epoch": 0.0021411397181417715, - "grad_norm": 1.385626196861267, - "learning_rate": 1.0000000000000002e-06, - "loss": 2.5899, - "step": 61 - }, - { - "epoch": 0.00217624036925885, - "grad_norm": 1.4252026081085205, - "learning_rate": 1.0166666666666665e-06, - "loss": 2.7198, - "step": 62 - }, - { - "epoch": 0.002211341020375928, - "grad_norm": 1.4209388494491577, - "learning_rate": 1.0333333333333333e-06, - "loss": 2.6447, - "step": 63 - }, - { - "epoch": 0.0022464416714930062, - "grad_norm": 1.3977481126785278, - "learning_rate": 1.0500000000000001e-06, - "loss": 2.6611, - "step": 64 - }, - { - "epoch": 0.0022815423226100846, - "grad_norm": 1.3982129096984863, - "learning_rate": 1.0666666666666667e-06, - "loss": 2.5228, - "step": 65 - }, - { - "epoch": 0.0023166429737271625, - "grad_norm": 1.3869361877441406, - "learning_rate": 1.0833333333333335e-06, - "loss": 2.5207, - "step": 66 - }, - { - "epoch": 0.002351743624844241, - "grad_norm": 1.4882638454437256, - "learning_rate": 1.1e-06, - "loss": 2.7911, - "step": 67 - }, - { - "epoch": 0.0023868442759613193, - "grad_norm": 1.4435547590255737, - "learning_rate": 1.1166666666666666e-06, - "loss": 2.6026, - "step": 68 - }, - { - "epoch": 0.002421944927078397, - "grad_norm": 1.4592955112457275, - "learning_rate": 1.1333333333333334e-06, - "loss": 2.6969, - "step": 69 - }, - { - "epoch": 0.0024570455781954756, - "grad_norm": 1.4256402254104614, - "learning_rate": 1.15e-06, - "loss": 2.541, - "step": 70 - }, - { - "epoch": 0.002492146229312554, - "grad_norm": 1.4301166534423828, - "learning_rate": 1.1666666666666668e-06, - "loss": 2.5921, - "step": 71 - }, - { - "epoch": 0.002527246880429632, - "grad_norm": 1.4168128967285156, - "learning_rate": 1.1833333333333334e-06, - "loss": 2.6888, - "step": 72 - }, - { - "epoch": 0.0025623475315467103, - "grad_norm": 1.4437116384506226, - "learning_rate": 1.2000000000000002e-06, - "loss": 2.4758, - "step": 73 - }, - { - "epoch": 0.002597448182663788, - "grad_norm": 1.4404387474060059, - "learning_rate": 1.2166666666666667e-06, - "loss": 2.5249, - "step": 74 - }, - { - "epoch": 0.0026325488337808666, - "grad_norm": 1.4457170963287354, - "learning_rate": 1.2333333333333333e-06, - "loss": 2.5248, - "step": 75 - }, - { - "epoch": 0.002667649484897945, - "grad_norm": 1.4326978921890259, - "learning_rate": 1.25e-06, - "loss": 2.5988, - "step": 76 - }, - { - "epoch": 0.002702750136015023, - "grad_norm": 1.482635498046875, - "learning_rate": 1.2666666666666667e-06, - "loss": 2.5656, - "step": 77 - }, - { - "epoch": 0.0027378507871321013, - "grad_norm": 1.4993175268173218, - "learning_rate": 1.2833333333333335e-06, - "loss": 2.5206, - "step": 78 - }, - { - "epoch": 0.0027729514382491796, - "grad_norm": 1.5097776651382446, - "learning_rate": 1.3e-06, - "loss": 2.4973, - "step": 79 - }, - { - "epoch": 0.0028080520893662576, - "grad_norm": 1.494784951210022, - "learning_rate": 1.3166666666666668e-06, - "loss": 2.6671, - "step": 80 - }, - { - "epoch": 0.002843152740483336, - "grad_norm": 1.4565764665603638, - "learning_rate": 1.3333333333333334e-06, - "loss": 2.5692, - "step": 81 - }, - { - "epoch": 0.0028782533916004143, - "grad_norm": 1.533872127532959, - "learning_rate": 1.35e-06, - "loss": 2.6608, - "step": 82 - }, - { - "epoch": 0.0029133540427174922, - "grad_norm": 1.4808841943740845, - "learning_rate": 1.3666666666666668e-06, - "loss": 2.7395, - "step": 83 - }, - { - "epoch": 0.0029484546938345706, - "grad_norm": 1.4748013019561768, - "learning_rate": 1.3833333333333334e-06, - "loss": 2.6319, - "step": 84 - }, - { - "epoch": 0.002983555344951649, - "grad_norm": 1.5720763206481934, - "learning_rate": 1.4000000000000001e-06, - "loss": 2.63, - "step": 85 - }, - { - "epoch": 0.003018655996068727, - "grad_norm": 1.5028517246246338, - "learning_rate": 1.4166666666666667e-06, - "loss": 2.6607, - "step": 86 - }, - { - "epoch": 0.0030537566471858053, - "grad_norm": 1.5843889713287354, - "learning_rate": 1.4333333333333333e-06, - "loss": 2.7085, - "step": 87 - }, - { - "epoch": 0.0030888572983028837, - "grad_norm": 1.5013842582702637, - "learning_rate": 1.45e-06, - "loss": 2.5905, - "step": 88 - }, - { - "epoch": 0.0031239579494199616, - "grad_norm": 1.4916142225265503, - "learning_rate": 1.4666666666666667e-06, - "loss": 2.3226, - "step": 89 - }, - { - "epoch": 0.00315905860053704, - "grad_norm": 1.516467571258545, - "learning_rate": 1.4833333333333335e-06, - "loss": 2.5548, - "step": 90 - }, - { - "epoch": 0.0031941592516541184, - "grad_norm": 1.4865124225616455, - "learning_rate": 1.5e-06, - "loss": 2.6782, - "step": 91 - }, - { - "epoch": 0.0032292599027711963, - "grad_norm": 1.4746567010879517, - "learning_rate": 1.5166666666666668e-06, - "loss": 2.4711, - "step": 92 - }, - { - "epoch": 0.0032643605538882747, - "grad_norm": 1.5625407695770264, - "learning_rate": 1.5333333333333334e-06, - "loss": 2.4659, - "step": 93 - }, - { - "epoch": 0.003299461205005353, - "grad_norm": 1.5168863534927368, - "learning_rate": 1.55e-06, - "loss": 2.6211, - "step": 94 - }, - { - "epoch": 0.003334561856122431, - "grad_norm": 1.588249921798706, - "learning_rate": 1.5666666666666666e-06, - "loss": 2.4863, - "step": 95 - }, - { - "epoch": 0.0033696625072395093, - "grad_norm": 1.5579643249511719, - "learning_rate": 1.5833333333333336e-06, - "loss": 2.6549, - "step": 96 - }, - { - "epoch": 0.0034047631583565877, - "grad_norm": 1.5403274297714233, - "learning_rate": 1.6000000000000001e-06, - "loss": 2.582, - "step": 97 - }, - { - "epoch": 0.0034398638094736656, - "grad_norm": 1.5402603149414062, - "learning_rate": 1.6166666666666667e-06, - "loss": 2.536, - "step": 98 - }, - { - "epoch": 0.003474964460590744, - "grad_norm": 1.5082433223724365, - "learning_rate": 1.6333333333333333e-06, - "loss": 2.4225, - "step": 99 - }, - { - "epoch": 0.0035100651117078224, - "grad_norm": 1.5788393020629883, - "learning_rate": 1.65e-06, - "loss": 2.5115, - "step": 100 - }, - { - "epoch": 0.0035451657628249003, - "grad_norm": 1.585938572883606, - "learning_rate": 1.6666666666666667e-06, - "loss": 2.648, - "step": 101 - }, - { - "epoch": 0.0035802664139419787, - "grad_norm": 1.5867786407470703, - "learning_rate": 1.6833333333333332e-06, - "loss": 2.6337, - "step": 102 - }, - { - "epoch": 0.0036153670650590566, - "grad_norm": 1.5875111818313599, - "learning_rate": 1.7000000000000002e-06, - "loss": 2.6877, - "step": 103 - }, - { - "epoch": 0.003650467716176135, - "grad_norm": 1.5568406581878662, - "learning_rate": 1.7166666666666668e-06, - "loss": 2.4481, - "step": 104 - }, - { - "epoch": 0.0036855683672932134, - "grad_norm": 1.6694421768188477, - "learning_rate": 1.7333333333333334e-06, - "loss": 2.5364, - "step": 105 - }, - { - "epoch": 0.0037206690184102913, - "grad_norm": 1.588732123374939, - "learning_rate": 1.7500000000000002e-06, - "loss": 2.3705, - "step": 106 - }, - { - "epoch": 0.0037557696695273697, - "grad_norm": 1.5961308479309082, - "learning_rate": 1.7666666666666668e-06, - "loss": 2.5429, - "step": 107 - }, - { - "epoch": 0.003790870320644448, - "grad_norm": 1.623733401298523, - "learning_rate": 1.7833333333333333e-06, - "loss": 2.6228, - "step": 108 - }, - { - "epoch": 0.003825970971761526, - "grad_norm": 1.620150089263916, - "learning_rate": 1.8e-06, - "loss": 2.4648, - "step": 109 - }, - { - "epoch": 0.0038610716228786044, - "grad_norm": 1.694027304649353, - "learning_rate": 1.816666666666667e-06, - "loss": 2.5526, - "step": 110 - }, - { - "epoch": 0.0038961722739956827, - "grad_norm": 1.6386096477508545, - "learning_rate": 1.8333333333333335e-06, - "loss": 2.6196, - "step": 111 - }, - { - "epoch": 0.003931272925112761, - "grad_norm": 1.6173791885375977, - "learning_rate": 1.85e-06, - "loss": 2.4797, - "step": 112 - }, - { - "epoch": 0.0039663735762298395, - "grad_norm": 1.6572661399841309, - "learning_rate": 1.8666666666666669e-06, - "loss": 2.5358, - "step": 113 - }, - { - "epoch": 0.004001474227346917, - "grad_norm": 1.6551135778427124, - "learning_rate": 1.8833333333333334e-06, - "loss": 2.4607, - "step": 114 - }, - { - "epoch": 0.004036574878463995, - "grad_norm": 1.5986158847808838, - "learning_rate": 1.9e-06, - "loss": 2.4823, - "step": 115 - }, - { - "epoch": 0.004071675529581074, - "grad_norm": 1.677937388420105, - "learning_rate": 1.916666666666667e-06, - "loss": 2.4256, - "step": 116 - }, - { - "epoch": 0.004106776180698152, - "grad_norm": 1.6852525472640991, - "learning_rate": 1.9333333333333336e-06, - "loss": 2.5308, - "step": 117 - }, - { - "epoch": 0.0041418768318152305, - "grad_norm": 1.6760343313217163, - "learning_rate": 1.95e-06, - "loss": 2.5353, - "step": 118 - }, - { - "epoch": 0.004176977482932308, - "grad_norm": 1.7016017436981201, - "learning_rate": 1.9666666666666668e-06, - "loss": 2.5854, - "step": 119 - }, - { - "epoch": 0.004212078134049386, - "grad_norm": 1.7039109468460083, - "learning_rate": 1.9833333333333335e-06, - "loss": 2.4231, - "step": 120 - }, - { - "epoch": 0.004247178785166465, - "grad_norm": 1.6607680320739746, - "learning_rate": 2.0000000000000003e-06, - "loss": 2.4374, - "step": 121 - }, - { - "epoch": 0.004282279436283543, - "grad_norm": 1.6147366762161255, - "learning_rate": 2.0166666666666667e-06, - "loss": 2.3457, - "step": 122 - }, - { - "epoch": 0.0043173800874006215, - "grad_norm": 1.6434422731399536, - "learning_rate": 2.033333333333333e-06, - "loss": 2.3679, - "step": 123 - }, - { - "epoch": 0.0043524807385177, - "grad_norm": 1.6357187032699585, - "learning_rate": 2.0500000000000003e-06, - "loss": 2.4089, - "step": 124 - }, - { - "epoch": 0.004387581389634777, - "grad_norm": 1.6774718761444092, - "learning_rate": 2.0666666666666666e-06, - "loss": 2.4289, - "step": 125 - }, - { - "epoch": 0.004422682040751856, - "grad_norm": 1.6337573528289795, - "learning_rate": 2.0833333333333334e-06, - "loss": 2.4612, - "step": 126 - }, - { - "epoch": 0.004457782691868934, - "grad_norm": 1.6393815279006958, - "learning_rate": 2.1000000000000002e-06, - "loss": 2.4476, - "step": 127 - }, - { - "epoch": 0.0044928833429860125, - "grad_norm": 1.6790637969970703, - "learning_rate": 2.1166666666666666e-06, - "loss": 2.488, - "step": 128 - }, - { - "epoch": 0.004527983994103091, - "grad_norm": 1.6586476564407349, - "learning_rate": 2.1333333333333334e-06, - "loss": 2.2845, - "step": 129 - }, - { - "epoch": 0.004563084645220169, - "grad_norm": 1.816952109336853, - "learning_rate": 2.1499999999999997e-06, - "loss": 2.3897, - "step": 130 - }, - { - "epoch": 0.004598185296337247, - "grad_norm": 1.6667451858520508, - "learning_rate": 2.166666666666667e-06, - "loss": 2.4204, - "step": 131 - }, - { - "epoch": 0.004633285947454325, - "grad_norm": 1.722793698310852, - "learning_rate": 2.1833333333333333e-06, - "loss": 2.3172, - "step": 132 - }, - { - "epoch": 0.0046683865985714034, - "grad_norm": 1.62187659740448, - "learning_rate": 2.2e-06, - "loss": 2.3501, - "step": 133 - }, - { - "epoch": 0.004703487249688482, - "grad_norm": 1.752081036567688, - "learning_rate": 2.216666666666667e-06, - "loss": 2.4828, - "step": 134 - }, - { - "epoch": 0.00473858790080556, - "grad_norm": 1.654988408088684, - "learning_rate": 2.2333333333333333e-06, - "loss": 2.3159, - "step": 135 - }, - { - "epoch": 0.0047736885519226386, - "grad_norm": 1.6875895261764526, - "learning_rate": 2.25e-06, - "loss": 2.299, - "step": 136 - }, - { - "epoch": 0.004808789203039716, - "grad_norm": 1.6921578645706177, - "learning_rate": 2.266666666666667e-06, - "loss": 2.2206, - "step": 137 - }, - { - "epoch": 0.004843889854156794, - "grad_norm": 1.6668251752853394, - "learning_rate": 2.2833333333333336e-06, - "loss": 2.3227, - "step": 138 - }, - { - "epoch": 0.004878990505273873, - "grad_norm": 1.6687926054000854, - "learning_rate": 2.3e-06, - "loss": 2.2902, - "step": 139 - }, - { - "epoch": 0.004914091156390951, - "grad_norm": 1.7063510417938232, - "learning_rate": 2.316666666666667e-06, - "loss": 2.4339, - "step": 140 - }, - { - "epoch": 0.0049491918075080295, - "grad_norm": 1.6697620153427124, - "learning_rate": 2.3333333333333336e-06, - "loss": 2.1373, - "step": 141 - }, - { - "epoch": 0.004984292458625108, - "grad_norm": 1.5861527919769287, - "learning_rate": 2.35e-06, - "loss": 2.2233, - "step": 142 - }, - { - "epoch": 0.005019393109742185, - "grad_norm": 1.6283432245254517, - "learning_rate": 2.3666666666666667e-06, - "loss": 2.2406, - "step": 143 - }, - { - "epoch": 0.005054493760859264, - "grad_norm": 1.7443870306015015, - "learning_rate": 2.3833333333333335e-06, - "loss": 2.2708, - "step": 144 - }, - { - "epoch": 0.005089594411976342, - "grad_norm": 1.6880239248275757, - "learning_rate": 2.4000000000000003e-06, - "loss": 2.3923, - "step": 145 - }, - { - "epoch": 0.0051246950630934205, - "grad_norm": 1.628446102142334, - "learning_rate": 2.4166666666666667e-06, - "loss": 2.3176, - "step": 146 - }, - { - "epoch": 0.005159795714210499, - "grad_norm": 1.7313412427902222, - "learning_rate": 2.4333333333333335e-06, - "loss": 2.2799, - "step": 147 - }, - { - "epoch": 0.005194896365327576, - "grad_norm": 1.6849360466003418, - "learning_rate": 2.4500000000000003e-06, - "loss": 2.086, - "step": 148 - }, - { - "epoch": 0.005229997016444655, - "grad_norm": 1.6572667360305786, - "learning_rate": 2.4666666666666666e-06, - "loss": 2.2343, - "step": 149 - }, - { - "epoch": 0.005265097667561733, - "grad_norm": 1.6707414388656616, - "learning_rate": 2.4833333333333334e-06, - "loss": 2.2537, - "step": 150 - }, - { - "epoch": 0.0053001983186788115, - "grad_norm": 1.6037425994873047, - "learning_rate": 2.5e-06, - "loss": 2.1693, - "step": 151 - }, - { - "epoch": 0.00533529896979589, - "grad_norm": 1.7840746641159058, - "learning_rate": 2.516666666666667e-06, - "loss": 2.1129, - "step": 152 - }, - { - "epoch": 0.005370399620912968, - "grad_norm": 1.6484638452529907, - "learning_rate": 2.5333333333333334e-06, - "loss": 2.09, - "step": 153 - }, - { - "epoch": 0.005405500272030046, - "grad_norm": 1.6631654500961304, - "learning_rate": 2.55e-06, - "loss": 1.9526, - "step": 154 - }, - { - "epoch": 0.005440600923147124, - "grad_norm": 1.7324082851409912, - "learning_rate": 2.566666666666667e-06, - "loss": 2.1543, - "step": 155 - }, - { - "epoch": 0.0054757015742642025, - "grad_norm": 1.6719396114349365, - "learning_rate": 2.5833333333333333e-06, - "loss": 1.985, - "step": 156 - }, - { - "epoch": 0.005510802225381281, - "grad_norm": 1.6808713674545288, - "learning_rate": 2.6e-06, - "loss": 2.062, - "step": 157 - }, - { - "epoch": 0.005545902876498359, - "grad_norm": 1.7197383642196655, - "learning_rate": 2.616666666666667e-06, - "loss": 2.0833, - "step": 158 - }, - { - "epoch": 0.005581003527615438, - "grad_norm": 1.7181674242019653, - "learning_rate": 2.6333333333333337e-06, - "loss": 2.1053, - "step": 159 - }, - { - "epoch": 0.005616104178732515, - "grad_norm": 1.7336634397506714, - "learning_rate": 2.65e-06, - "loss": 1.9367, - "step": 160 - }, - { - "epoch": 0.0056512048298495935, - "grad_norm": 1.7234174013137817, - "learning_rate": 2.666666666666667e-06, - "loss": 2.0573, - "step": 161 - }, - { - "epoch": 0.005686305480966672, - "grad_norm": 1.845238447189331, - "learning_rate": 2.6833333333333336e-06, - "loss": 2.1167, - "step": 162 - }, - { - "epoch": 0.00572140613208375, - "grad_norm": 1.9366627931594849, - "learning_rate": 2.7e-06, - "loss": 2.1551, - "step": 163 - }, - { - "epoch": 0.005756506783200829, - "grad_norm": 1.7695661783218384, - "learning_rate": 2.7166666666666668e-06, - "loss": 2.0239, - "step": 164 - }, - { - "epoch": 0.005791607434317907, - "grad_norm": 1.8904856443405151, - "learning_rate": 2.7333333333333336e-06, - "loss": 2.0632, - "step": 165 - }, - { - "epoch": 0.0058267080854349845, - "grad_norm": 1.865329384803772, - "learning_rate": 2.7500000000000004e-06, - "loss": 2.0688, - "step": 166 - }, - { - "epoch": 0.005861808736552063, - "grad_norm": 1.7832783460617065, - "learning_rate": 2.7666666666666667e-06, - "loss": 1.9041, - "step": 167 - }, - { - "epoch": 0.005896909387669141, - "grad_norm": 1.8187719583511353, - "learning_rate": 2.7833333333333335e-06, - "loss": 1.8907, - "step": 168 - }, - { - "epoch": 0.00593201003878622, - "grad_norm": 1.8283106088638306, - "learning_rate": 2.8000000000000003e-06, - "loss": 1.7501, - "step": 169 - }, - { - "epoch": 0.005967110689903298, - "grad_norm": 1.8078088760375977, - "learning_rate": 2.8166666666666667e-06, - "loss": 2.0106, - "step": 170 - }, - { - "epoch": 0.006002211341020376, - "grad_norm": 1.7783054113388062, - "learning_rate": 2.8333333333333335e-06, - "loss": 1.8197, - "step": 171 - }, - { - "epoch": 0.006037311992137454, - "grad_norm": 1.746432900428772, - "learning_rate": 2.8500000000000002e-06, - "loss": 1.9177, - "step": 172 - }, - { - "epoch": 0.006072412643254532, - "grad_norm": 1.6836683750152588, - "learning_rate": 2.8666666666666666e-06, - "loss": 1.7349, - "step": 173 - }, - { - "epoch": 0.006107513294371611, - "grad_norm": 1.635859489440918, - "learning_rate": 2.8833333333333334e-06, - "loss": 1.8159, - "step": 174 - }, - { - "epoch": 0.006142613945488689, - "grad_norm": 1.6566851139068604, - "learning_rate": 2.9e-06, - "loss": 1.7602, - "step": 175 - }, - { - "epoch": 0.006177714596605767, - "grad_norm": 1.7734028100967407, - "learning_rate": 2.916666666666667e-06, - "loss": 1.8053, - "step": 176 - }, - { - "epoch": 0.006212815247722845, - "grad_norm": 1.7016065120697021, - "learning_rate": 2.9333333333333333e-06, - "loss": 1.8371, - "step": 177 - }, - { - "epoch": 0.006247915898839923, - "grad_norm": 1.7356066703796387, - "learning_rate": 2.95e-06, - "loss": 1.7793, - "step": 178 - }, - { - "epoch": 0.006283016549957002, - "grad_norm": 1.7265770435333252, - "learning_rate": 2.966666666666667e-06, - "loss": 1.6967, - "step": 179 - }, - { - "epoch": 0.00631811720107408, - "grad_norm": 1.7281289100646973, - "learning_rate": 2.9833333333333333e-06, - "loss": 1.6651, - "step": 180 - }, - { - "epoch": 0.006353217852191158, - "grad_norm": 1.8002333641052246, - "learning_rate": 3e-06, - "loss": 1.7168, - "step": 181 - }, - { - "epoch": 0.006388318503308237, - "grad_norm": 1.9847668409347534, - "learning_rate": 3.016666666666667e-06, - "loss": 1.7488, - "step": 182 - }, - { - "epoch": 0.006423419154425314, - "grad_norm": 1.7108169794082642, - "learning_rate": 3.0333333333333337e-06, - "loss": 1.5508, - "step": 183 - }, - { - "epoch": 0.006458519805542393, - "grad_norm": 1.615768551826477, - "learning_rate": 3.05e-06, - "loss": 1.6756, - "step": 184 - }, - { - "epoch": 0.006493620456659471, - "grad_norm": 1.6229082345962524, - "learning_rate": 3.066666666666667e-06, - "loss": 1.6997, - "step": 185 - }, - { - "epoch": 0.006528721107776549, - "grad_norm": 1.5525057315826416, - "learning_rate": 3.0833333333333336e-06, - "loss": 1.6093, - "step": 186 - }, - { - "epoch": 0.006563821758893628, - "grad_norm": 1.5964235067367554, - "learning_rate": 3.1e-06, - "loss": 1.5063, - "step": 187 - }, - { - "epoch": 0.006598922410010706, - "grad_norm": 1.6332372426986694, - "learning_rate": 3.1166666666666668e-06, - "loss": 1.6273, - "step": 188 - }, - { - "epoch": 0.006634023061127784, - "grad_norm": 1.6180667877197266, - "learning_rate": 3.133333333333333e-06, - "loss": 1.5074, - "step": 189 - }, - { - "epoch": 0.006669123712244862, - "grad_norm": 1.749247670173645, - "learning_rate": 3.1500000000000003e-06, - "loss": 1.6974, - "step": 190 - }, - { - "epoch": 0.00670422436336194, - "grad_norm": 1.559922456741333, - "learning_rate": 3.166666666666667e-06, - "loss": 1.4422, - "step": 191 - }, - { - "epoch": 0.006739325014479019, - "grad_norm": 1.6004679203033447, - "learning_rate": 3.1833333333333335e-06, - "loss": 1.6448, - "step": 192 - }, - { - "epoch": 0.006774425665596097, - "grad_norm": 1.5353906154632568, - "learning_rate": 3.2000000000000003e-06, - "loss": 1.4762, - "step": 193 - }, - { - "epoch": 0.006809526316713175, - "grad_norm": 1.57126784324646, - "learning_rate": 3.216666666666667e-06, - "loss": 1.4222, - "step": 194 - }, - { - "epoch": 0.006844626967830253, - "grad_norm": 1.5774754285812378, - "learning_rate": 3.2333333333333334e-06, - "loss": 1.4568, - "step": 195 - }, - { - "epoch": 0.006879727618947331, - "grad_norm": 1.6749789714813232, - "learning_rate": 3.2500000000000002e-06, - "loss": 1.4025, - "step": 196 - }, - { - "epoch": 0.00691482827006441, - "grad_norm": 1.7544447183609009, - "learning_rate": 3.2666666666666666e-06, - "loss": 1.5507, - "step": 197 - }, - { - "epoch": 0.006949928921181488, - "grad_norm": 1.6086649894714355, - "learning_rate": 3.2833333333333334e-06, - "loss": 1.3299, - "step": 198 - }, - { - "epoch": 0.006985029572298566, - "grad_norm": 1.6512669324874878, - "learning_rate": 3.3e-06, - "loss": 1.4528, - "step": 199 - }, - { - "epoch": 0.007020130223415645, - "grad_norm": 1.6389107704162598, - "learning_rate": 3.3166666666666665e-06, - "loss": 1.2996, - "step": 200 - }, - { - "epoch": 0.007055230874532722, - "grad_norm": 1.7409394979476929, - "learning_rate": 3.3333333333333333e-06, - "loss": 1.4272, - "step": 201 - }, - { - "epoch": 0.007090331525649801, - "grad_norm": 1.7825279235839844, - "learning_rate": 3.3500000000000005e-06, - "loss": 1.2825, - "step": 202 - }, - { - "epoch": 0.007125432176766879, - "grad_norm": 1.8998286724090576, - "learning_rate": 3.3666666666666665e-06, - "loss": 1.1427, - "step": 203 - }, - { - "epoch": 0.007160532827883957, - "grad_norm": 1.9221959114074707, - "learning_rate": 3.3833333333333337e-06, - "loss": 1.2577, - "step": 204 - }, - { - "epoch": 0.007195633479001036, - "grad_norm": 1.9791948795318604, - "learning_rate": 3.4000000000000005e-06, - "loss": 1.2624, - "step": 205 - }, - { - "epoch": 0.007230734130118113, - "grad_norm": 2.114811420440674, - "learning_rate": 3.4166666666666664e-06, - "loss": 1.178, - "step": 206 - }, - { - "epoch": 0.007265834781235192, - "grad_norm": 2.178661346435547, - "learning_rate": 3.4333333333333336e-06, - "loss": 1.2144, - "step": 207 - }, - { - "epoch": 0.00730093543235227, - "grad_norm": 2.3571386337280273, - "learning_rate": 3.4500000000000004e-06, - "loss": 1.174, - "step": 208 - }, - { - "epoch": 0.007336036083469348, - "grad_norm": 2.4643797874450684, - "learning_rate": 3.466666666666667e-06, - "loss": 1.2923, - "step": 209 - }, - { - "epoch": 0.007371136734586427, - "grad_norm": 2.5094475746154785, - "learning_rate": 3.4833333333333336e-06, - "loss": 1.1512, - "step": 210 - }, - { - "epoch": 0.007406237385703505, - "grad_norm": 2.482947826385498, - "learning_rate": 3.5000000000000004e-06, - "loss": 1.2783, - "step": 211 - }, - { - "epoch": 0.007441338036820583, - "grad_norm": 2.4269304275512695, - "learning_rate": 3.5166666666666667e-06, - "loss": 1.0908, - "step": 212 - }, - { - "epoch": 0.007476438687937661, - "grad_norm": 2.377413034439087, - "learning_rate": 3.5333333333333335e-06, - "loss": 1.2307, - "step": 213 - }, - { - "epoch": 0.007511539339054739, - "grad_norm": 2.351454019546509, - "learning_rate": 3.55e-06, - "loss": 1.127, - "step": 214 - }, - { - "epoch": 0.007546639990171818, - "grad_norm": 2.160515308380127, - "learning_rate": 3.5666666666666667e-06, - "loss": 1.0546, - "step": 215 - }, - { - "epoch": 0.007581740641288896, - "grad_norm": 1.9296139478683472, - "learning_rate": 3.5833333333333335e-06, - "loss": 1.0938, - "step": 216 - }, - { - "epoch": 0.0076168412924059745, - "grad_norm": 1.8617360591888428, - "learning_rate": 3.6e-06, - "loss": 1.0603, - "step": 217 - }, - { - "epoch": 0.007651941943523052, - "grad_norm": 1.8148412704467773, - "learning_rate": 3.6166666666666666e-06, - "loss": 1.0523, - "step": 218 - }, - { - "epoch": 0.00768704259464013, - "grad_norm": 1.598833680152893, - "learning_rate": 3.633333333333334e-06, - "loss": 1.0182, - "step": 219 - }, - { - "epoch": 0.007722143245757209, - "grad_norm": 1.4994378089904785, - "learning_rate": 3.6499999999999998e-06, - "loss": 1.0288, - "step": 220 - }, - { - "epoch": 0.007757243896874287, - "grad_norm": 1.4672083854675293, - "learning_rate": 3.666666666666667e-06, - "loss": 0.9027, - "step": 221 - }, - { - "epoch": 0.0077923445479913655, - "grad_norm": 1.4634737968444824, - "learning_rate": 3.6833333333333338e-06, - "loss": 1.019, - "step": 222 - }, - { - "epoch": 0.007827445199108444, - "grad_norm": 1.3608646392822266, - "learning_rate": 3.7e-06, - "loss": 1.0177, - "step": 223 - }, - { - "epoch": 0.007862545850225522, - "grad_norm": 1.1849548816680908, - "learning_rate": 3.716666666666667e-06, - "loss": 0.9378, - "step": 224 - }, - { - "epoch": 0.0078976465013426, - "grad_norm": 1.422735571861267, - "learning_rate": 3.7333333333333337e-06, - "loss": 0.8742, - "step": 225 - }, - { - "epoch": 0.007932747152459679, - "grad_norm": 1.5660419464111328, - "learning_rate": 3.75e-06, - "loss": 0.9658, - "step": 226 - }, - { - "epoch": 0.007967847803576756, - "grad_norm": 1.5559109449386597, - "learning_rate": 3.766666666666667e-06, - "loss": 0.9744, - "step": 227 - }, - { - "epoch": 0.008002948454693834, - "grad_norm": 1.505341649055481, - "learning_rate": 3.7833333333333333e-06, - "loss": 0.9361, - "step": 228 - }, - { - "epoch": 0.008038049105810912, - "grad_norm": 1.5667076110839844, - "learning_rate": 3.8e-06, - "loss": 0.9046, - "step": 229 - }, - { - "epoch": 0.00807314975692799, - "grad_norm": 1.278931736946106, - "learning_rate": 3.816666666666667e-06, - "loss": 0.81, - "step": 230 - }, - { - "epoch": 0.008108250408045069, - "grad_norm": 1.242944359779358, - "learning_rate": 3.833333333333334e-06, - "loss": 0.9296, - "step": 231 - }, - { - "epoch": 0.008143351059162147, - "grad_norm": 1.1765793561935425, - "learning_rate": 3.85e-06, - "loss": 0.8238, - "step": 232 - }, - { - "epoch": 0.008178451710279226, - "grad_norm": 1.2059770822525024, - "learning_rate": 3.866666666666667e-06, - "loss": 0.9218, - "step": 233 - }, - { - "epoch": 0.008213552361396304, - "grad_norm": 1.4556992053985596, - "learning_rate": 3.883333333333333e-06, - "loss": 0.76, - "step": 234 - }, - { - "epoch": 0.008248653012513383, - "grad_norm": 1.0794739723205566, - "learning_rate": 3.9e-06, - "loss": 0.858, - "step": 235 - }, - { - "epoch": 0.008283753663630461, - "grad_norm": 1.0386093854904175, - "learning_rate": 3.916666666666667e-06, - "loss": 0.9096, - "step": 236 - }, - { - "epoch": 0.00831885431474754, - "grad_norm": 1.0348325967788696, - "learning_rate": 3.9333333333333335e-06, - "loss": 0.8614, - "step": 237 - }, - { - "epoch": 0.008353954965864616, - "grad_norm": 0.9690415263175964, - "learning_rate": 3.95e-06, - "loss": 0.8409, - "step": 238 - }, - { - "epoch": 0.008389055616981694, - "grad_norm": 0.9543929696083069, - "learning_rate": 3.966666666666667e-06, - "loss": 0.8845, - "step": 239 - }, - { - "epoch": 0.008424156268098773, - "grad_norm": 0.9254280924797058, - "learning_rate": 3.983333333333333e-06, - "loss": 0.7541, - "step": 240 - }, - { - "epoch": 0.008459256919215851, - "grad_norm": 0.9917170405387878, - "learning_rate": 4.000000000000001e-06, - "loss": 0.927, - "step": 241 - }, - { - "epoch": 0.00849435757033293, - "grad_norm": 0.886038601398468, - "learning_rate": 4.0166666666666675e-06, - "loss": 0.7548, - "step": 242 - }, - { - "epoch": 0.008529458221450008, - "grad_norm": 0.9580444097518921, - "learning_rate": 4.033333333333333e-06, - "loss": 0.839, - "step": 243 - }, - { - "epoch": 0.008564558872567086, - "grad_norm": 1.0148797035217285, - "learning_rate": 4.05e-06, - "loss": 0.771, - "step": 244 - }, - { - "epoch": 0.008599659523684165, - "grad_norm": 1.18535578250885, - "learning_rate": 4.066666666666666e-06, - "loss": 0.8939, - "step": 245 - }, - { - "epoch": 0.008634760174801243, - "grad_norm": 1.480730414390564, - "learning_rate": 4.083333333333334e-06, - "loss": 0.8127, - "step": 246 - }, - { - "epoch": 0.008669860825918321, - "grad_norm": 1.2552489042282104, - "learning_rate": 4.1000000000000006e-06, - "loss": 0.8572, - "step": 247 - }, - { - "epoch": 0.0087049614770354, - "grad_norm": 2.2307631969451904, - "learning_rate": 4.1166666666666665e-06, - "loss": 0.8313, - "step": 248 - }, - { - "epoch": 0.008740062128152478, - "grad_norm": 1.1535265445709229, - "learning_rate": 4.133333333333333e-06, - "loss": 0.7803, - "step": 249 - }, - { - "epoch": 0.008775162779269555, - "grad_norm": 0.8258942365646362, - "learning_rate": 4.15e-06, - "loss": 0.8257, - "step": 250 - }, - { - "epoch": 0.008810263430386633, - "grad_norm": 0.8490951061248779, - "learning_rate": 4.166666666666667e-06, - "loss": 0.7715, - "step": 251 - }, - { - "epoch": 0.008845364081503711, - "grad_norm": 0.9236701726913452, - "learning_rate": 4.183333333333334e-06, - "loss": 0.9389, - "step": 252 - }, - { - "epoch": 0.00888046473262079, - "grad_norm": 0.8142258524894714, - "learning_rate": 4.2000000000000004e-06, - "loss": 0.7969, - "step": 253 - }, - { - "epoch": 0.008915565383737868, - "grad_norm": 0.7428544759750366, - "learning_rate": 4.216666666666666e-06, - "loss": 0.8157, - "step": 254 - }, - { - "epoch": 0.008950666034854947, - "grad_norm": 1.0453758239746094, - "learning_rate": 4.233333333333333e-06, - "loss": 0.7079, - "step": 255 - }, - { - "epoch": 0.008985766685972025, - "grad_norm": 0.7731630802154541, - "learning_rate": 4.250000000000001e-06, - "loss": 0.633, - "step": 256 - }, - { - "epoch": 0.009020867337089103, - "grad_norm": 0.7817323207855225, - "learning_rate": 4.266666666666667e-06, - "loss": 0.8372, - "step": 257 - }, - { - "epoch": 0.009055967988206182, - "grad_norm": 0.8464375734329224, - "learning_rate": 4.2833333333333335e-06, - "loss": 0.9204, - "step": 258 - }, - { - "epoch": 0.00909106863932326, - "grad_norm": 0.8311069011688232, - "learning_rate": 4.2999999999999995e-06, - "loss": 0.9156, - "step": 259 - }, - { - "epoch": 0.009126169290440338, - "grad_norm": 0.9367508292198181, - "learning_rate": 4.316666666666667e-06, - "loss": 0.8093, - "step": 260 - }, - { - "epoch": 0.009161269941557415, - "grad_norm": 0.8207131624221802, - "learning_rate": 4.333333333333334e-06, - "loss": 0.7484, - "step": 261 - }, - { - "epoch": 0.009196370592674493, - "grad_norm": 0.7581722736358643, - "learning_rate": 4.35e-06, - "loss": 0.7525, - "step": 262 - }, - { - "epoch": 0.009231471243791572, - "grad_norm": 0.7329070568084717, - "learning_rate": 4.366666666666667e-06, - "loss": 0.8455, - "step": 263 - }, - { - "epoch": 0.00926657189490865, - "grad_norm": 0.8402296304702759, - "learning_rate": 4.3833333333333334e-06, - "loss": 0.7479, - "step": 264 - }, - { - "epoch": 0.009301672546025729, - "grad_norm": 0.7095234394073486, - "learning_rate": 4.4e-06, - "loss": 0.7173, - "step": 265 - }, - { - "epoch": 0.009336773197142807, - "grad_norm": 0.7721574902534485, - "learning_rate": 4.416666666666667e-06, - "loss": 0.8435, - "step": 266 - }, - { - "epoch": 0.009371873848259885, - "grad_norm": 0.7719969749450684, - "learning_rate": 4.433333333333334e-06, - "loss": 0.6809, - "step": 267 - }, - { - "epoch": 0.009406974499376964, - "grad_norm": 0.886405348777771, - "learning_rate": 4.45e-06, - "loss": 0.7364, - "step": 268 - }, - { - "epoch": 0.009442075150494042, - "grad_norm": 0.7722795605659485, - "learning_rate": 4.4666666666666665e-06, - "loss": 0.8274, - "step": 269 - }, - { - "epoch": 0.00947717580161112, - "grad_norm": 0.7272798418998718, - "learning_rate": 4.483333333333334e-06, - "loss": 0.8285, - "step": 270 - }, - { - "epoch": 0.009512276452728199, - "grad_norm": 0.8884292244911194, - "learning_rate": 4.5e-06, - "loss": 0.7845, - "step": 271 - }, - { - "epoch": 0.009547377103845277, - "grad_norm": 0.7760015726089478, - "learning_rate": 4.516666666666667e-06, - "loss": 0.8471, - "step": 272 - }, - { - "epoch": 0.009582477754962354, - "grad_norm": 0.7963578104972839, - "learning_rate": 4.533333333333334e-06, - "loss": 0.74, - "step": 273 - }, - { - "epoch": 0.009617578406079432, - "grad_norm": 0.964624285697937, - "learning_rate": 4.5500000000000005e-06, - "loss": 0.6235, - "step": 274 - }, - { - "epoch": 0.00965267905719651, - "grad_norm": 0.8443493843078613, - "learning_rate": 4.566666666666667e-06, - "loss": 0.6929, - "step": 275 - }, - { - "epoch": 0.009687779708313589, - "grad_norm": 0.709759533405304, - "learning_rate": 4.583333333333333e-06, - "loss": 0.6605, - "step": 276 - }, - { - "epoch": 0.009722880359430667, - "grad_norm": 0.7609758973121643, - "learning_rate": 4.6e-06, - "loss": 0.6723, - "step": 277 - }, - { - "epoch": 0.009757981010547746, - "grad_norm": 0.799526035785675, - "learning_rate": 4.616666666666667e-06, - "loss": 0.8091, - "step": 278 - }, - { - "epoch": 0.009793081661664824, - "grad_norm": 0.7876430153846741, - "learning_rate": 4.633333333333334e-06, - "loss": 0.6546, - "step": 279 - }, - { - "epoch": 0.009828182312781902, - "grad_norm": 0.7812764644622803, - "learning_rate": 4.65e-06, - "loss": 0.7997, - "step": 280 - }, - { - "epoch": 0.00986328296389898, - "grad_norm": 0.870790421962738, - "learning_rate": 4.666666666666667e-06, - "loss": 0.6906, - "step": 281 - }, - { - "epoch": 0.009898383615016059, - "grad_norm": 0.7857239842414856, - "learning_rate": 4.683333333333333e-06, - "loss": 0.7876, - "step": 282 - }, - { - "epoch": 0.009933484266133137, - "grad_norm": 0.936396598815918, - "learning_rate": 4.7e-06, - "loss": 0.693, - "step": 283 - }, - { - "epoch": 0.009968584917250216, - "grad_norm": 0.8367801308631897, - "learning_rate": 4.7166666666666675e-06, - "loss": 0.6534, - "step": 284 - }, - { - "epoch": 0.010003685568367292, - "grad_norm": 0.8706153035163879, - "learning_rate": 4.7333333333333335e-06, - "loss": 0.7938, - "step": 285 - }, - { - "epoch": 0.01003878621948437, - "grad_norm": 0.9653378129005432, - "learning_rate": 4.75e-06, - "loss": 0.7498, - "step": 286 - }, - { - "epoch": 0.01007388687060145, - "grad_norm": 0.9701070189476013, - "learning_rate": 4.766666666666667e-06, - "loss": 0.7145, - "step": 287 - }, - { - "epoch": 0.010108987521718528, - "grad_norm": 0.8391234278678894, - "learning_rate": 4.783333333333333e-06, - "loss": 0.793, - "step": 288 - }, - { - "epoch": 0.010144088172835606, - "grad_norm": 0.9332094788551331, - "learning_rate": 4.800000000000001e-06, - "loss": 0.8069, - "step": 289 - }, - { - "epoch": 0.010179188823952684, - "grad_norm": 0.7807117700576782, - "learning_rate": 4.816666666666667e-06, - "loss": 0.6464, - "step": 290 - }, - { - "epoch": 0.010214289475069763, - "grad_norm": 0.9562169909477234, - "learning_rate": 4.833333333333333e-06, - "loss": 0.76, - "step": 291 - }, - { - "epoch": 0.010249390126186841, - "grad_norm": 1.0865740776062012, - "learning_rate": 4.85e-06, - "loss": 0.719, - "step": 292 - }, - { - "epoch": 0.01028449077730392, - "grad_norm": 0.9776701331138611, - "learning_rate": 4.866666666666667e-06, - "loss": 0.7546, - "step": 293 - }, - { - "epoch": 0.010319591428420998, - "grad_norm": 0.8676882982254028, - "learning_rate": 4.883333333333334e-06, - "loss": 0.7436, - "step": 294 - }, - { - "epoch": 0.010354692079538076, - "grad_norm": 0.8488994836807251, - "learning_rate": 4.9000000000000005e-06, - "loss": 0.7222, - "step": 295 - }, - { - "epoch": 0.010389792730655153, - "grad_norm": 1.0382699966430664, - "learning_rate": 4.9166666666666665e-06, - "loss": 0.7249, - "step": 296 - }, - { - "epoch": 0.010424893381772231, - "grad_norm": 0.8003467321395874, - "learning_rate": 4.933333333333333e-06, - "loss": 0.7526, - "step": 297 - }, - { - "epoch": 0.01045999403288931, - "grad_norm": 0.8216441869735718, - "learning_rate": 4.950000000000001e-06, - "loss": 0.7505, - "step": 298 - }, - { - "epoch": 0.010495094684006388, - "grad_norm": 0.7926090359687805, - "learning_rate": 4.966666666666667e-06, - "loss": 0.6753, - "step": 299 - }, - { - "epoch": 0.010530195335123466, - "grad_norm": 0.8442890644073486, - "learning_rate": 4.983333333333334e-06, - "loss": 0.8026, - "step": 300 - }, - { - "epoch": 0.010565295986240545, - "grad_norm": 0.8300396800041199, - "learning_rate": 5e-06, - "loss": 0.6025, - "step": 301 - }, - { - "epoch": 0.010600396637357623, - "grad_norm": 0.84455406665802, - "learning_rate": 5.016666666666666e-06, - "loss": 0.6243, - "step": 302 - }, - { - "epoch": 0.010635497288474701, - "grad_norm": 0.8164980411529541, - "learning_rate": 5.033333333333334e-06, - "loss": 0.7262, - "step": 303 - }, - { - "epoch": 0.01067059793959178, - "grad_norm": 0.7655025720596313, - "learning_rate": 5.050000000000001e-06, - "loss": 0.6406, - "step": 304 - }, - { - "epoch": 0.010705698590708858, - "grad_norm": 1.0858434438705444, - "learning_rate": 5.066666666666667e-06, - "loss": 0.8288, - "step": 305 - }, - { - "epoch": 0.010740799241825937, - "grad_norm": 0.7846461534500122, - "learning_rate": 5.0833333333333335e-06, - "loss": 0.6039, - "step": 306 - }, - { - "epoch": 0.010775899892943015, - "grad_norm": 0.9359503984451294, - "learning_rate": 5.1e-06, - "loss": 0.818, - "step": 307 - }, - { - "epoch": 0.010811000544060092, - "grad_norm": 0.7881820797920227, - "learning_rate": 5.116666666666667e-06, - "loss": 0.7273, - "step": 308 - }, - { - "epoch": 0.01084610119517717, - "grad_norm": 0.954420804977417, - "learning_rate": 5.133333333333334e-06, - "loss": 0.7725, - "step": 309 - }, - { - "epoch": 0.010881201846294248, - "grad_norm": 0.7783780097961426, - "learning_rate": 5.15e-06, - "loss": 0.691, - "step": 310 - }, - { - "epoch": 0.010916302497411327, - "grad_norm": 0.8650756478309631, - "learning_rate": 5.166666666666667e-06, - "loss": 0.5987, - "step": 311 - }, - { - "epoch": 0.010951403148528405, - "grad_norm": 0.8056005239486694, - "learning_rate": 5.183333333333333e-06, - "loss": 0.7139, - "step": 312 - }, - { - "epoch": 0.010986503799645483, - "grad_norm": 0.7775111198425293, - "learning_rate": 5.2e-06, - "loss": 0.7587, - "step": 313 - }, - { - "epoch": 0.011021604450762562, - "grad_norm": 0.9487075805664062, - "learning_rate": 5.216666666666667e-06, - "loss": 0.8407, - "step": 314 - }, - { - "epoch": 0.01105670510187964, - "grad_norm": 0.7570939064025879, - "learning_rate": 5.233333333333334e-06, - "loss": 0.7347, - "step": 315 - }, - { - "epoch": 0.011091805752996719, - "grad_norm": 0.917307436466217, - "learning_rate": 5.25e-06, - "loss": 0.6127, - "step": 316 - }, - { - "epoch": 0.011126906404113797, - "grad_norm": 0.7810919284820557, - "learning_rate": 5.266666666666667e-06, - "loss": 0.78, - "step": 317 - }, - { - "epoch": 0.011162007055230875, - "grad_norm": 0.8834332227706909, - "learning_rate": 5.283333333333334e-06, - "loss": 0.8012, - "step": 318 - }, - { - "epoch": 0.011197107706347952, - "grad_norm": 0.8228054642677307, - "learning_rate": 5.3e-06, - "loss": 0.7748, - "step": 319 - }, - { - "epoch": 0.01123220835746503, - "grad_norm": 0.7978740930557251, - "learning_rate": 5.316666666666667e-06, - "loss": 0.6346, - "step": 320 - }, - { - "epoch": 0.011267309008582109, - "grad_norm": 0.8881126642227173, - "learning_rate": 5.333333333333334e-06, - "loss": 0.7234, - "step": 321 - }, - { - "epoch": 0.011302409659699187, - "grad_norm": 0.9592880010604858, - "learning_rate": 5.3500000000000004e-06, - "loss": 0.8401, - "step": 322 - }, - { - "epoch": 0.011337510310816265, - "grad_norm": 0.8281012773513794, - "learning_rate": 5.366666666666667e-06, - "loss": 0.7539, - "step": 323 - }, - { - "epoch": 0.011372610961933344, - "grad_norm": 0.840793251991272, - "learning_rate": 5.383333333333333e-06, - "loss": 0.7816, - "step": 324 - }, - { - "epoch": 0.011407711613050422, - "grad_norm": 0.9992218613624573, - "learning_rate": 5.4e-06, - "loss": 0.7114, - "step": 325 - }, - { - "epoch": 0.0114428122641675, - "grad_norm": 0.8609092831611633, - "learning_rate": 5.416666666666667e-06, - "loss": 0.7862, - "step": 326 - }, - { - "epoch": 0.011477912915284579, - "grad_norm": 0.9303110241889954, - "learning_rate": 5.4333333333333335e-06, - "loss": 0.6829, - "step": 327 - }, - { - "epoch": 0.011513013566401657, - "grad_norm": 1.1091980934143066, - "learning_rate": 5.45e-06, - "loss": 0.6345, - "step": 328 - }, - { - "epoch": 0.011548114217518736, - "grad_norm": 0.8417768478393555, - "learning_rate": 5.466666666666667e-06, - "loss": 0.6315, - "step": 329 - }, - { - "epoch": 0.011583214868635814, - "grad_norm": 0.8682668805122375, - "learning_rate": 5.483333333333333e-06, - "loss": 0.7229, - "step": 330 - }, - { - "epoch": 0.01161831551975289, - "grad_norm": 0.9902169108390808, - "learning_rate": 5.500000000000001e-06, - "loss": 0.6005, - "step": 331 - }, - { - "epoch": 0.011653416170869969, - "grad_norm": 0.9120174050331116, - "learning_rate": 5.5166666666666675e-06, - "loss": 0.7423, - "step": 332 - }, - { - "epoch": 0.011688516821987047, - "grad_norm": 0.9208419322967529, - "learning_rate": 5.5333333333333334e-06, - "loss": 0.7813, - "step": 333 - }, - { - "epoch": 0.011723617473104126, - "grad_norm": 0.8577231168746948, - "learning_rate": 5.55e-06, - "loss": 0.739, - "step": 334 - }, - { - "epoch": 0.011758718124221204, - "grad_norm": 0.8232165575027466, - "learning_rate": 5.566666666666667e-06, - "loss": 0.7388, - "step": 335 - }, - { - "epoch": 0.011793818775338282, - "grad_norm": 0.999883234500885, - "learning_rate": 5.583333333333334e-06, - "loss": 0.8464, - "step": 336 - }, - { - "epoch": 0.01182891942645536, - "grad_norm": 0.8235347867012024, - "learning_rate": 5.600000000000001e-06, - "loss": 0.7243, - "step": 337 - }, - { - "epoch": 0.01186402007757244, - "grad_norm": 0.8665674924850464, - "learning_rate": 5.6166666666666665e-06, - "loss": 0.699, - "step": 338 - }, - { - "epoch": 0.011899120728689518, - "grad_norm": 0.9395737648010254, - "learning_rate": 5.633333333333333e-06, - "loss": 0.6588, - "step": 339 - }, - { - "epoch": 0.011934221379806596, - "grad_norm": 0.8183184862136841, - "learning_rate": 5.65e-06, - "loss": 0.7649, - "step": 340 - }, - { - "epoch": 0.011969322030923674, - "grad_norm": 0.8857421278953552, - "learning_rate": 5.666666666666667e-06, - "loss": 0.6247, - "step": 341 - }, - { - "epoch": 0.012004422682040753, - "grad_norm": 0.9386955499649048, - "learning_rate": 5.683333333333334e-06, - "loss": 0.8272, - "step": 342 - }, - { - "epoch": 0.01203952333315783, - "grad_norm": 0.8459672927856445, - "learning_rate": 5.7000000000000005e-06, - "loss": 0.595, - "step": 343 - }, - { - "epoch": 0.012074623984274908, - "grad_norm": 0.9243751764297485, - "learning_rate": 5.7166666666666664e-06, - "loss": 0.797, - "step": 344 - }, - { - "epoch": 0.012109724635391986, - "grad_norm": 0.882723867893219, - "learning_rate": 5.733333333333333e-06, - "loss": 0.6712, - "step": 345 - }, - { - "epoch": 0.012144825286509064, - "grad_norm": 0.8904788494110107, - "learning_rate": 5.750000000000001e-06, - "loss": 0.7006, - "step": 346 - }, - { - "epoch": 0.012179925937626143, - "grad_norm": 1.0569589138031006, - "learning_rate": 5.766666666666667e-06, - "loss": 0.745, - "step": 347 - }, - { - "epoch": 0.012215026588743221, - "grad_norm": 1.0098443031311035, - "learning_rate": 5.783333333333334e-06, - "loss": 0.7286, - "step": 348 - }, - { - "epoch": 0.0122501272398603, - "grad_norm": 0.9146799445152283, - "learning_rate": 5.8e-06, - "loss": 0.6196, - "step": 349 - }, - { - "epoch": 0.012285227890977378, - "grad_norm": 0.988031804561615, - "learning_rate": 5.816666666666667e-06, - "loss": 0.8081, - "step": 350 - }, - { - "epoch": 0.012320328542094456, - "grad_norm": 0.9418520331382751, - "learning_rate": 5.833333333333334e-06, - "loss": 0.6797, - "step": 351 - }, - { - "epoch": 0.012355429193211535, - "grad_norm": 0.9020828008651733, - "learning_rate": 5.850000000000001e-06, - "loss": 0.7194, - "step": 352 - }, - { - "epoch": 0.012390529844328613, - "grad_norm": 0.8916172981262207, - "learning_rate": 5.866666666666667e-06, - "loss": 0.7318, - "step": 353 - }, - { - "epoch": 0.01242563049544569, - "grad_norm": 1.0686781406402588, - "learning_rate": 5.8833333333333335e-06, - "loss": 0.7286, - "step": 354 - }, - { - "epoch": 0.012460731146562768, - "grad_norm": 1.0727556943893433, - "learning_rate": 5.9e-06, - "loss": 0.7819, - "step": 355 - }, - { - "epoch": 0.012495831797679846, - "grad_norm": 0.8901796340942383, - "learning_rate": 5.916666666666667e-06, - "loss": 0.7272, - "step": 356 - }, - { - "epoch": 0.012530932448796925, - "grad_norm": 0.9587379693984985, - "learning_rate": 5.933333333333334e-06, - "loss": 0.7075, - "step": 357 - }, - { - "epoch": 0.012566033099914003, - "grad_norm": 0.9002569317817688, - "learning_rate": 5.95e-06, - "loss": 0.7323, - "step": 358 - }, - { - "epoch": 0.012601133751031082, - "grad_norm": 0.9094626307487488, - "learning_rate": 5.9666666666666666e-06, - "loss": 0.816, - "step": 359 - }, - { - "epoch": 0.01263623440214816, - "grad_norm": 1.2198059558868408, - "learning_rate": 5.983333333333334e-06, - "loss": 0.7759, - "step": 360 - }, - { - "epoch": 0.012671335053265238, - "grad_norm": 0.8315908908843994, - "learning_rate": 6e-06, - "loss": 0.6849, - "step": 361 - }, - { - "epoch": 0.012706435704382317, - "grad_norm": 0.9600279927253723, - "learning_rate": 6.016666666666667e-06, - "loss": 0.6583, - "step": 362 - }, - { - "epoch": 0.012741536355499395, - "grad_norm": 0.927463173866272, - "learning_rate": 6.033333333333334e-06, - "loss": 0.6442, - "step": 363 - }, - { - "epoch": 0.012776637006616473, - "grad_norm": 0.9484896659851074, - "learning_rate": 6.0500000000000005e-06, - "loss": 0.6289, - "step": 364 - }, - { - "epoch": 0.012811737657733552, - "grad_norm": 0.8671932220458984, - "learning_rate": 6.066666666666667e-06, - "loss": 0.6338, - "step": 365 - }, - { - "epoch": 0.012846838308850628, - "grad_norm": 0.9346337914466858, - "learning_rate": 6.083333333333334e-06, - "loss": 0.7109, - "step": 366 - }, - { - "epoch": 0.012881938959967707, - "grad_norm": 0.9063475131988525, - "learning_rate": 6.1e-06, - "loss": 0.5422, - "step": 367 - }, - { - "epoch": 0.012917039611084785, - "grad_norm": 1.0502984523773193, - "learning_rate": 6.116666666666667e-06, - "loss": 0.6971, - "step": 368 - }, - { - "epoch": 0.012952140262201864, - "grad_norm": 1.0495177507400513, - "learning_rate": 6.133333333333334e-06, - "loss": 0.6068, - "step": 369 - }, - { - "epoch": 0.012987240913318942, - "grad_norm": 0.9865358471870422, - "learning_rate": 6.15e-06, - "loss": 0.7368, - "step": 370 - }, - { - "epoch": 0.01302234156443602, - "grad_norm": 1.0409873723983765, - "learning_rate": 6.166666666666667e-06, - "loss": 0.7196, - "step": 371 - }, - { - "epoch": 0.013057442215553099, - "grad_norm": 1.1532410383224487, - "learning_rate": 6.183333333333333e-06, - "loss": 0.7371, - "step": 372 - }, - { - "epoch": 0.013092542866670177, - "grad_norm": 1.023834466934204, - "learning_rate": 6.2e-06, - "loss": 0.7233, - "step": 373 - }, - { - "epoch": 0.013127643517787255, - "grad_norm": 1.0173529386520386, - "learning_rate": 6.2166666666666676e-06, - "loss": 0.7363, - "step": 374 - }, - { - "epoch": 0.013162744168904334, - "grad_norm": 1.0602985620498657, - "learning_rate": 6.2333333333333335e-06, - "loss": 0.7341, - "step": 375 - }, - { - "epoch": 0.013197844820021412, - "grad_norm": 0.9712168574333191, - "learning_rate": 6.25e-06, - "loss": 0.5774, - "step": 376 - }, - { - "epoch": 0.013232945471138489, - "grad_norm": 1.0337426662445068, - "learning_rate": 6.266666666666666e-06, - "loss": 0.6795, - "step": 377 - }, - { - "epoch": 0.013268046122255567, - "grad_norm": 1.1107357740402222, - "learning_rate": 6.283333333333334e-06, - "loss": 0.6859, - "step": 378 - }, - { - "epoch": 0.013303146773372646, - "grad_norm": 1.078075885772705, - "learning_rate": 6.300000000000001e-06, - "loss": 0.6766, - "step": 379 - }, - { - "epoch": 0.013338247424489724, - "grad_norm": 1.1363199949264526, - "learning_rate": 6.316666666666667e-06, - "loss": 0.7178, - "step": 380 - }, - { - "epoch": 0.013373348075606802, - "grad_norm": 1.0428013801574707, - "learning_rate": 6.333333333333334e-06, - "loss": 0.7278, - "step": 381 - }, - { - "epoch": 0.01340844872672388, - "grad_norm": 1.0167087316513062, - "learning_rate": 6.35e-06, - "loss": 0.7683, - "step": 382 - }, - { - "epoch": 0.013443549377840959, - "grad_norm": 1.063559651374817, - "learning_rate": 6.366666666666667e-06, - "loss": 0.6752, - "step": 383 - }, - { - "epoch": 0.013478650028958037, - "grad_norm": 1.0544387102127075, - "learning_rate": 6.383333333333335e-06, - "loss": 0.6205, - "step": 384 - }, - { - "epoch": 0.013513750680075116, - "grad_norm": 1.0387669801712036, - "learning_rate": 6.4000000000000006e-06, - "loss": 0.6267, - "step": 385 - }, - { - "epoch": 0.013548851331192194, - "grad_norm": 1.0910186767578125, - "learning_rate": 6.4166666666666665e-06, - "loss": 0.7712, - "step": 386 - }, - { - "epoch": 0.013583951982309272, - "grad_norm": 1.0286012887954712, - "learning_rate": 6.433333333333334e-06, - "loss": 0.7358, - "step": 387 - }, - { - "epoch": 0.01361905263342635, - "grad_norm": 1.0297938585281372, - "learning_rate": 6.45e-06, - "loss": 0.7051, - "step": 388 - }, - { - "epoch": 0.013654153284543427, - "grad_norm": 0.9862657189369202, - "learning_rate": 6.466666666666667e-06, - "loss": 0.5874, - "step": 389 - }, - { - "epoch": 0.013689253935660506, - "grad_norm": 1.079933762550354, - "learning_rate": 6.4833333333333345e-06, - "loss": 0.7221, - "step": 390 - }, - { - "epoch": 0.013724354586777584, - "grad_norm": 1.1751629114151, - "learning_rate": 6.5000000000000004e-06, - "loss": 0.5966, - "step": 391 - }, - { - "epoch": 0.013759455237894663, - "grad_norm": 1.0793417692184448, - "learning_rate": 6.516666666666666e-06, - "loss": 0.5831, - "step": 392 - }, - { - "epoch": 0.013794555889011741, - "grad_norm": 1.0433924198150635, - "learning_rate": 6.533333333333333e-06, - "loss": 0.7164, - "step": 393 - }, - { - "epoch": 0.01382965654012882, - "grad_norm": 1.1008596420288086, - "learning_rate": 6.550000000000001e-06, - "loss": 0.6786, - "step": 394 - }, - { - "epoch": 0.013864757191245898, - "grad_norm": 1.1843255758285522, - "learning_rate": 6.566666666666667e-06, - "loss": 0.7748, - "step": 395 - }, - { - "epoch": 0.013899857842362976, - "grad_norm": 1.142665147781372, - "learning_rate": 6.583333333333333e-06, - "loss": 0.571, - "step": 396 - }, - { - "epoch": 0.013934958493480054, - "grad_norm": 1.1519567966461182, - "learning_rate": 6.6e-06, - "loss": 0.6107, - "step": 397 - }, - { - "epoch": 0.013970059144597133, - "grad_norm": 1.0905183553695679, - "learning_rate": 6.616666666666667e-06, - "loss": 0.7137, - "step": 398 - }, - { - "epoch": 0.014005159795714211, - "grad_norm": 1.0726505517959595, - "learning_rate": 6.633333333333333e-06, - "loss": 0.6023, - "step": 399 - }, - { - "epoch": 0.01404026044683129, - "grad_norm": 1.1566104888916016, - "learning_rate": 6.650000000000001e-06, - "loss": 0.7291, - "step": 400 - }, - { - "epoch": 0.014075361097948366, - "grad_norm": 1.1286085844039917, - "learning_rate": 6.666666666666667e-06, - "loss": 0.7202, - "step": 401 - }, - { - "epoch": 0.014110461749065445, - "grad_norm": 1.1955941915512085, - "learning_rate": 6.6833333333333334e-06, - "loss": 0.6424, - "step": 402 - }, - { - "epoch": 0.014145562400182523, - "grad_norm": 1.216745376586914, - "learning_rate": 6.700000000000001e-06, - "loss": 0.7081, - "step": 403 - }, - { - "epoch": 0.014180663051299601, - "grad_norm": 1.0637097358703613, - "learning_rate": 6.716666666666667e-06, - "loss": 0.5796, - "step": 404 - }, - { - "epoch": 0.01421576370241668, - "grad_norm": 1.1549216508865356, - "learning_rate": 6.733333333333333e-06, - "loss": 0.4727, - "step": 405 - }, - { - "epoch": 0.014250864353533758, - "grad_norm": 1.156494140625, - "learning_rate": 6.750000000000001e-06, - "loss": 0.6188, - "step": 406 - }, - { - "epoch": 0.014285965004650836, - "grad_norm": 1.1491996049880981, - "learning_rate": 6.766666666666667e-06, - "loss": 0.6702, - "step": 407 - }, - { - "epoch": 0.014321065655767915, - "grad_norm": 1.150155782699585, - "learning_rate": 6.783333333333333e-06, - "loss": 0.741, - "step": 408 - }, - { - "epoch": 0.014356166306884993, - "grad_norm": 1.2247633934020996, - "learning_rate": 6.800000000000001e-06, - "loss": 0.6495, - "step": 409 - }, - { - "epoch": 0.014391266958002072, - "grad_norm": 1.12718665599823, - "learning_rate": 6.816666666666667e-06, - "loss": 0.624, - "step": 410 - }, - { - "epoch": 0.01442636760911915, - "grad_norm": 1.1828604936599731, - "learning_rate": 6.833333333333333e-06, - "loss": 0.5882, - "step": 411 - }, - { - "epoch": 0.014461468260236227, - "grad_norm": 1.2903661727905273, - "learning_rate": 6.8500000000000005e-06, - "loss": 0.5199, - "step": 412 - }, - { - "epoch": 0.014496568911353305, - "grad_norm": 1.2523154020309448, - "learning_rate": 6.866666666666667e-06, - "loss": 0.6694, - "step": 413 - }, - { - "epoch": 0.014531669562470383, - "grad_norm": 1.2074534893035889, - "learning_rate": 6.883333333333333e-06, - "loss": 0.6363, - "step": 414 - }, - { - "epoch": 0.014566770213587462, - "grad_norm": 1.283408522605896, - "learning_rate": 6.900000000000001e-06, - "loss": 0.7172, - "step": 415 - }, - { - "epoch": 0.01460187086470454, - "grad_norm": 1.1132594347000122, - "learning_rate": 6.916666666666667e-06, - "loss": 0.6011, - "step": 416 - }, - { - "epoch": 0.014636971515821618, - "grad_norm": 1.1856974363327026, - "learning_rate": 6.933333333333334e-06, - "loss": 0.5045, - "step": 417 - }, - { - "epoch": 0.014672072166938697, - "grad_norm": 1.1515536308288574, - "learning_rate": 6.950000000000001e-06, - "loss": 0.4622, - "step": 418 - }, - { - "epoch": 0.014707172818055775, - "grad_norm": 1.1630934476852417, - "learning_rate": 6.966666666666667e-06, - "loss": 0.5821, - "step": 419 - }, - { - "epoch": 0.014742273469172854, - "grad_norm": 1.2615609169006348, - "learning_rate": 6.983333333333333e-06, - "loss": 0.6047, - "step": 420 - }, - { - "epoch": 0.014777374120289932, - "grad_norm": 1.1740374565124512, - "learning_rate": 7.000000000000001e-06, - "loss": 0.5898, - "step": 421 - }, - { - "epoch": 0.01481247477140701, - "grad_norm": 1.3241665363311768, - "learning_rate": 7.0166666666666675e-06, - "loss": 0.6924, - "step": 422 - }, - { - "epoch": 0.014847575422524089, - "grad_norm": 1.4314783811569214, - "learning_rate": 7.0333333333333335e-06, - "loss": 0.7658, - "step": 423 - }, - { - "epoch": 0.014882676073641165, - "grad_norm": 1.270350694656372, - "learning_rate": 7.049999999999999e-06, - "loss": 0.6821, - "step": 424 - }, - { - "epoch": 0.014917776724758244, - "grad_norm": 1.4139686822891235, - "learning_rate": 7.066666666666667e-06, - "loss": 0.7408, - "step": 425 - }, - { - "epoch": 0.014952877375875322, - "grad_norm": 1.277113914489746, - "learning_rate": 7.083333333333334e-06, - "loss": 0.6623, - "step": 426 - }, - { - "epoch": 0.0149879780269924, - "grad_norm": 1.2972047328948975, - "learning_rate": 7.1e-06, - "loss": 0.6572, - "step": 427 - }, - { - "epoch": 0.015023078678109479, - "grad_norm": 1.1628177165985107, - "learning_rate": 7.116666666666667e-06, - "loss": 0.5729, - "step": 428 - }, - { - "epoch": 0.015058179329226557, - "grad_norm": 1.1344221830368042, - "learning_rate": 7.133333333333333e-06, - "loss": 0.5468, - "step": 429 - }, - { - "epoch": 0.015093279980343636, - "grad_norm": 1.1982285976409912, - "learning_rate": 7.15e-06, - "loss": 0.5953, - "step": 430 - }, - { - "epoch": 0.015128380631460714, - "grad_norm": 1.2882599830627441, - "learning_rate": 7.166666666666667e-06, - "loss": 0.4917, - "step": 431 - }, - { - "epoch": 0.015163481282577792, - "grad_norm": 1.125488042831421, - "learning_rate": 7.183333333333334e-06, - "loss": 0.6116, - "step": 432 - }, - { - "epoch": 0.01519858193369487, - "grad_norm": 1.3158310651779175, - "learning_rate": 7.2e-06, - "loss": 0.6512, - "step": 433 - }, - { - "epoch": 0.015233682584811949, - "grad_norm": 1.1793767213821411, - "learning_rate": 7.216666666666667e-06, - "loss": 0.6071, - "step": 434 - }, - { - "epoch": 0.015268783235929026, - "grad_norm": 1.1246131658554077, - "learning_rate": 7.233333333333333e-06, - "loss": 0.6291, - "step": 435 - }, - { - "epoch": 0.015303883887046104, - "grad_norm": 1.1401065587997437, - "learning_rate": 7.25e-06, - "loss": 0.6652, - "step": 436 - }, - { - "epoch": 0.015338984538163182, - "grad_norm": 1.1254594326019287, - "learning_rate": 7.266666666666668e-06, - "loss": 0.6113, - "step": 437 - }, - { - "epoch": 0.01537408518928026, - "grad_norm": 1.1503032445907593, - "learning_rate": 7.283333333333334e-06, - "loss": 0.6238, - "step": 438 - }, - { - "epoch": 0.015409185840397339, - "grad_norm": 1.2066023349761963, - "learning_rate": 7.2999999999999996e-06, - "loss": 0.7469, - "step": 439 - }, - { - "epoch": 0.015444286491514417, - "grad_norm": 1.0497260093688965, - "learning_rate": 7.316666666666667e-06, - "loss": 0.526, - "step": 440 - }, - { - "epoch": 0.015479387142631496, - "grad_norm": 1.0652025938034058, - "learning_rate": 7.333333333333334e-06, - "loss": 0.6904, - "step": 441 - }, - { - "epoch": 0.015514487793748574, - "grad_norm": 1.0633105039596558, - "learning_rate": 7.35e-06, - "loss": 0.5099, - "step": 442 - }, - { - "epoch": 0.015549588444865653, - "grad_norm": 1.0627790689468384, - "learning_rate": 7.3666666666666676e-06, - "loss": 0.6, - "step": 443 - }, - { - "epoch": 0.015584689095982731, - "grad_norm": 1.2586344480514526, - "learning_rate": 7.3833333333333335e-06, - "loss": 0.5875, - "step": 444 - }, - { - "epoch": 0.01561978974709981, - "grad_norm": 1.1343268156051636, - "learning_rate": 7.4e-06, - "loss": 0.6261, - "step": 445 - }, - { - "epoch": 0.015654890398216888, - "grad_norm": 1.1174184083938599, - "learning_rate": 7.416666666666668e-06, - "loss": 0.6316, - "step": 446 - }, - { - "epoch": 0.015689991049333964, - "grad_norm": 0.9128473401069641, - "learning_rate": 7.433333333333334e-06, - "loss": 0.5585, - "step": 447 - }, - { - "epoch": 0.015725091700451044, - "grad_norm": 0.9832584857940674, - "learning_rate": 7.45e-06, - "loss": 0.5598, - "step": 448 - }, - { - "epoch": 0.01576019235156812, - "grad_norm": 1.0046968460083008, - "learning_rate": 7.4666666666666675e-06, - "loss": 0.5969, - "step": 449 - }, - { - "epoch": 0.0157952930026852, - "grad_norm": 1.4050061702728271, - "learning_rate": 7.483333333333334e-06, - "loss": 0.7326, - "step": 450 - }, - { - "epoch": 0.015830393653802278, - "grad_norm": 1.023136019706726, - "learning_rate": 7.5e-06, - "loss": 0.6683, - "step": 451 - }, - { - "epoch": 0.015865494304919358, - "grad_norm": 0.9094240069389343, - "learning_rate": 7.516666666666668e-06, - "loss": 0.5928, - "step": 452 - }, - { - "epoch": 0.015900594956036435, - "grad_norm": 0.8489618301391602, - "learning_rate": 7.533333333333334e-06, - "loss": 0.687, - "step": 453 - }, - { - "epoch": 0.01593569560715351, - "grad_norm": 0.7638569474220276, - "learning_rate": 7.55e-06, - "loss": 0.61, - "step": 454 - }, - { - "epoch": 0.01597079625827059, - "grad_norm": 0.9805753231048584, - "learning_rate": 7.5666666666666665e-06, - "loss": 0.6172, - "step": 455 - }, - { - "epoch": 0.016005896909387668, - "grad_norm": 0.9835997819900513, - "learning_rate": 7.583333333333334e-06, - "loss": 0.6591, - "step": 456 - }, - { - "epoch": 0.016040997560504748, - "grad_norm": 1.001768708229065, - "learning_rate": 7.6e-06, - "loss": 0.6956, - "step": 457 - }, - { - "epoch": 0.016076098211621825, - "grad_norm": 0.973060131072998, - "learning_rate": 7.616666666666666e-06, - "loss": 0.5476, - "step": 458 - }, - { - "epoch": 0.016111198862738905, - "grad_norm": 0.8502066731452942, - "learning_rate": 7.633333333333334e-06, - "loss": 0.6152, - "step": 459 - }, - { - "epoch": 0.01614629951385598, - "grad_norm": 1.1076459884643555, - "learning_rate": 7.65e-06, - "loss": 0.6616, - "step": 460 - }, - { - "epoch": 0.01618140016497306, - "grad_norm": 0.8689216375350952, - "learning_rate": 7.666666666666667e-06, - "loss": 0.5377, - "step": 461 - }, - { - "epoch": 0.016216500816090138, - "grad_norm": 1.400158166885376, - "learning_rate": 7.683333333333335e-06, - "loss": 0.6558, - "step": 462 - }, - { - "epoch": 0.01625160146720722, - "grad_norm": 0.9165375232696533, - "learning_rate": 7.7e-06, - "loss": 0.6927, - "step": 463 - }, - { - "epoch": 0.016286702118324295, - "grad_norm": 0.7525034546852112, - "learning_rate": 7.716666666666667e-06, - "loss": 0.5171, - "step": 464 - }, - { - "epoch": 0.01632180276944137, - "grad_norm": 0.9681416749954224, - "learning_rate": 7.733333333333334e-06, - "loss": 0.5953, - "step": 465 - }, - { - "epoch": 0.01635690342055845, - "grad_norm": 1.0100696086883545, - "learning_rate": 7.75e-06, - "loss": 0.6335, - "step": 466 - }, - { - "epoch": 0.01639200407167553, - "grad_norm": 0.8999189138412476, - "learning_rate": 7.766666666666666e-06, - "loss": 0.5858, - "step": 467 - }, - { - "epoch": 0.01642710472279261, - "grad_norm": 0.7750583291053772, - "learning_rate": 7.783333333333334e-06, - "loss": 0.5194, - "step": 468 - }, - { - "epoch": 0.016462205373909685, - "grad_norm": 0.9349814057350159, - "learning_rate": 7.8e-06, - "loss": 0.507, - "step": 469 - }, - { - "epoch": 0.016497306025026765, - "grad_norm": 0.766467273235321, - "learning_rate": 7.816666666666666e-06, - "loss": 0.5136, - "step": 470 - }, - { - "epoch": 0.016532406676143842, - "grad_norm": 0.779218316078186, - "learning_rate": 7.833333333333333e-06, - "loss": 0.6807, - "step": 471 - }, - { - "epoch": 0.016567507327260922, - "grad_norm": 1.0625845193862915, - "learning_rate": 7.850000000000001e-06, - "loss": 0.636, - "step": 472 - }, - { - "epoch": 0.016602607978378, - "grad_norm": 0.9863707423210144, - "learning_rate": 7.866666666666667e-06, - "loss": 0.6045, - "step": 473 - }, - { - "epoch": 0.01663770862949508, - "grad_norm": 0.8744222521781921, - "learning_rate": 7.883333333333335e-06, - "loss": 0.5352, - "step": 474 - }, - { - "epoch": 0.016672809280612155, - "grad_norm": 1.322165846824646, - "learning_rate": 7.9e-06, - "loss": 0.6636, - "step": 475 - }, - { - "epoch": 0.016707909931729232, - "grad_norm": 0.9002565741539001, - "learning_rate": 7.916666666666667e-06, - "loss": 0.5134, - "step": 476 - }, - { - "epoch": 0.016743010582846312, - "grad_norm": 1.260679006576538, - "learning_rate": 7.933333333333334e-06, - "loss": 0.6594, - "step": 477 - }, - { - "epoch": 0.01677811123396339, - "grad_norm": 0.9266374707221985, - "learning_rate": 7.95e-06, - "loss": 0.5992, - "step": 478 - }, - { - "epoch": 0.01681321188508047, - "grad_norm": 0.924260675907135, - "learning_rate": 7.966666666666666e-06, - "loss": 0.6994, - "step": 479 - }, - { - "epoch": 0.016848312536197545, - "grad_norm": 0.9654821753501892, - "learning_rate": 7.983333333333334e-06, - "loss": 0.6596, - "step": 480 - }, - { - "epoch": 0.016883413187314626, - "grad_norm": 0.9240251779556274, - "learning_rate": 8.000000000000001e-06, - "loss": 0.6385, - "step": 481 - }, - { - "epoch": 0.016918513838431702, - "grad_norm": 0.8140637278556824, - "learning_rate": 8.016666666666667e-06, - "loss": 0.6262, - "step": 482 - }, - { - "epoch": 0.016953614489548782, - "grad_norm": 0.9621090888977051, - "learning_rate": 8.033333333333335e-06, - "loss": 0.6215, - "step": 483 - }, - { - "epoch": 0.01698871514066586, - "grad_norm": 0.7109926342964172, - "learning_rate": 8.050000000000001e-06, - "loss": 0.6163, - "step": 484 - }, - { - "epoch": 0.01702381579178294, - "grad_norm": 0.8489733338356018, - "learning_rate": 8.066666666666667e-06, - "loss": 0.6145, - "step": 485 - }, - { - "epoch": 0.017058916442900016, - "grad_norm": 0.8760428428649902, - "learning_rate": 8.083333333333333e-06, - "loss": 0.7166, - "step": 486 - }, - { - "epoch": 0.017094017094017096, - "grad_norm": 1.0702654123306274, - "learning_rate": 8.1e-06, - "loss": 0.5567, - "step": 487 - }, - { - "epoch": 0.017129117745134172, - "grad_norm": 0.964957594871521, - "learning_rate": 8.116666666666666e-06, - "loss": 0.5569, - "step": 488 - }, - { - "epoch": 0.01716421839625125, - "grad_norm": 0.8697575926780701, - "learning_rate": 8.133333333333332e-06, - "loss": 0.6209, - "step": 489 - }, - { - "epoch": 0.01719931904736833, - "grad_norm": 0.8674750924110413, - "learning_rate": 8.15e-06, - "loss": 0.7149, - "step": 490 - }, - { - "epoch": 0.017234419698485406, - "grad_norm": 0.8723517656326294, - "learning_rate": 8.166666666666668e-06, - "loss": 0.6414, - "step": 491 - }, - { - "epoch": 0.017269520349602486, - "grad_norm": 1.0633617639541626, - "learning_rate": 8.183333333333333e-06, - "loss": 0.5391, - "step": 492 - }, - { - "epoch": 0.017304621000719562, - "grad_norm": 1.3720223903656006, - "learning_rate": 8.200000000000001e-06, - "loss": 0.638, - "step": 493 - }, - { - "epoch": 0.017339721651836643, - "grad_norm": 0.9360631108283997, - "learning_rate": 8.216666666666667e-06, - "loss": 0.653, - "step": 494 - }, - { - "epoch": 0.01737482230295372, - "grad_norm": 0.8658445477485657, - "learning_rate": 8.233333333333333e-06, - "loss": 0.5226, - "step": 495 - }, - { - "epoch": 0.0174099229540708, - "grad_norm": 0.9256001114845276, - "learning_rate": 8.25e-06, - "loss": 0.631, - "step": 496 - }, - { - "epoch": 0.017445023605187876, - "grad_norm": 1.0080300569534302, - "learning_rate": 8.266666666666667e-06, - "loss": 0.6835, - "step": 497 - }, - { - "epoch": 0.017480124256304956, - "grad_norm": 0.9362330436706543, - "learning_rate": 8.283333333333333e-06, - "loss": 0.5956, - "step": 498 - }, - { - "epoch": 0.017515224907422033, - "grad_norm": 0.923521876335144, - "learning_rate": 8.3e-06, - "loss": 0.5808, - "step": 499 - }, - { - "epoch": 0.01755032555853911, - "grad_norm": 1.0345176458358765, - "learning_rate": 8.316666666666668e-06, - "loss": 0.7081, - "step": 500 - }, - { - "epoch": 0.01758542620965619, - "grad_norm": 1.1013809442520142, - "learning_rate": 8.333333333333334e-06, - "loss": 0.6112, - "step": 501 - }, - { - "epoch": 0.017620526860773266, - "grad_norm": 1.0645540952682495, - "learning_rate": 8.350000000000001e-06, - "loss": 0.5393, - "step": 502 - }, - { - "epoch": 0.017655627511890346, - "grad_norm": 1.012903094291687, - "learning_rate": 8.366666666666667e-06, - "loss": 0.5782, - "step": 503 - }, - { - "epoch": 0.017690728163007423, - "grad_norm": 0.8243297934532166, - "learning_rate": 8.383333333333333e-06, - "loss": 0.658, - "step": 504 - }, - { - "epoch": 0.017725828814124503, - "grad_norm": 0.8322799205780029, - "learning_rate": 8.400000000000001e-06, - "loss": 0.6001, - "step": 505 - }, - { - "epoch": 0.01776092946524158, - "grad_norm": 0.8744947910308838, - "learning_rate": 8.416666666666667e-06, - "loss": 0.6603, - "step": 506 - }, - { - "epoch": 0.01779603011635866, - "grad_norm": 0.9537670612335205, - "learning_rate": 8.433333333333333e-06, - "loss": 0.5923, - "step": 507 - }, - { - "epoch": 0.017831130767475736, - "grad_norm": 0.8564189076423645, - "learning_rate": 8.45e-06, - "loss": 0.5559, - "step": 508 - }, - { - "epoch": 0.017866231418592816, - "grad_norm": 0.9584269523620605, - "learning_rate": 8.466666666666666e-06, - "loss": 0.5655, - "step": 509 - }, - { - "epoch": 0.017901332069709893, - "grad_norm": 0.8163449168205261, - "learning_rate": 8.483333333333334e-06, - "loss": 0.6009, - "step": 510 - }, - { - "epoch": 0.01793643272082697, - "grad_norm": 0.9601420164108276, - "learning_rate": 8.500000000000002e-06, - "loss": 0.5098, - "step": 511 - }, - { - "epoch": 0.01797153337194405, - "grad_norm": 1.096222996711731, - "learning_rate": 8.516666666666668e-06, - "loss": 0.7286, - "step": 512 - }, - { - "epoch": 0.018006634023061126, - "grad_norm": 0.8880419731140137, - "learning_rate": 8.533333333333334e-06, - "loss": 0.5652, - "step": 513 - }, - { - "epoch": 0.018041734674178207, - "grad_norm": 1.004822850227356, - "learning_rate": 8.550000000000001e-06, - "loss": 0.6306, - "step": 514 - }, - { - "epoch": 0.018076835325295283, - "grad_norm": 1.1892567873001099, - "learning_rate": 8.566666666666667e-06, - "loss": 0.6654, - "step": 515 - }, - { - "epoch": 0.018111935976412363, - "grad_norm": 0.9374802112579346, - "learning_rate": 8.583333333333333e-06, - "loss": 0.6044, - "step": 516 - }, - { - "epoch": 0.01814703662752944, - "grad_norm": 1.0226093530654907, - "learning_rate": 8.599999999999999e-06, - "loss": 0.5507, - "step": 517 - }, - { - "epoch": 0.01818213727864652, - "grad_norm": 0.9727879166603088, - "learning_rate": 8.616666666666667e-06, - "loss": 0.5256, - "step": 518 - }, - { - "epoch": 0.018217237929763597, - "grad_norm": 0.9751061797142029, - "learning_rate": 8.633333333333334e-06, - "loss": 0.5383, - "step": 519 - }, - { - "epoch": 0.018252338580880677, - "grad_norm": 1.0009385347366333, - "learning_rate": 8.65e-06, - "loss": 0.5985, - "step": 520 - }, - { - "epoch": 0.018287439231997753, - "grad_norm": 1.0737996101379395, - "learning_rate": 8.666666666666668e-06, - "loss": 0.6387, - "step": 521 - }, - { - "epoch": 0.01832253988311483, - "grad_norm": 1.2864314317703247, - "learning_rate": 8.683333333333334e-06, - "loss": 0.5459, - "step": 522 - }, - { - "epoch": 0.01835764053423191, - "grad_norm": 1.104248285293579, - "learning_rate": 8.7e-06, - "loss": 0.5614, - "step": 523 - }, - { - "epoch": 0.018392741185348987, - "grad_norm": 1.1225610971450806, - "learning_rate": 8.716666666666667e-06, - "loss": 0.661, - "step": 524 - }, - { - "epoch": 0.018427841836466067, - "grad_norm": 0.8852440118789673, - "learning_rate": 8.733333333333333e-06, - "loss": 0.5772, - "step": 525 - }, - { - "epoch": 0.018462942487583144, - "grad_norm": 0.8552767634391785, - "learning_rate": 8.75e-06, - "loss": 0.598, - "step": 526 - }, - { - "epoch": 0.018498043138700224, - "grad_norm": 1.131404995918274, - "learning_rate": 8.766666666666667e-06, - "loss": 0.5801, - "step": 527 - }, - { - "epoch": 0.0185331437898173, - "grad_norm": 0.9262281656265259, - "learning_rate": 8.783333333333335e-06, - "loss": 0.7197, - "step": 528 - }, - { - "epoch": 0.01856824444093438, - "grad_norm": 1.0418230295181274, - "learning_rate": 8.8e-06, - "loss": 0.6094, - "step": 529 - }, - { - "epoch": 0.018603345092051457, - "grad_norm": 0.8845667243003845, - "learning_rate": 8.816666666666668e-06, - "loss": 0.652, - "step": 530 - }, - { - "epoch": 0.018638445743168537, - "grad_norm": 0.8226301074028015, - "learning_rate": 8.833333333333334e-06, - "loss": 0.6124, - "step": 531 - }, - { - "epoch": 0.018673546394285614, - "grad_norm": 0.8966876864433289, - "learning_rate": 8.85e-06, - "loss": 0.6468, - "step": 532 - }, - { - "epoch": 0.018708647045402694, - "grad_norm": 1.1389936208724976, - "learning_rate": 8.866666666666668e-06, - "loss": 0.5144, - "step": 533 - }, - { - "epoch": 0.01874374769651977, - "grad_norm": 1.0347756147384644, - "learning_rate": 8.883333333333334e-06, - "loss": 0.6071, - "step": 534 - }, - { - "epoch": 0.018778848347636847, - "grad_norm": 0.9363552927970886, - "learning_rate": 8.9e-06, - "loss": 0.6096, - "step": 535 - }, - { - "epoch": 0.018813948998753927, - "grad_norm": 0.8461397886276245, - "learning_rate": 8.916666666666667e-06, - "loss": 0.6998, - "step": 536 - }, - { - "epoch": 0.018849049649871004, - "grad_norm": 1.0320388078689575, - "learning_rate": 8.933333333333333e-06, - "loss": 0.6423, - "step": 537 - }, - { - "epoch": 0.018884150300988084, - "grad_norm": 0.8768509030342102, - "learning_rate": 8.95e-06, - "loss": 0.5372, - "step": 538 - }, - { - "epoch": 0.01891925095210516, - "grad_norm": 0.8577126264572144, - "learning_rate": 8.966666666666668e-06, - "loss": 0.5328, - "step": 539 - }, - { - "epoch": 0.01895435160322224, - "grad_norm": 1.314351201057434, - "learning_rate": 8.983333333333334e-06, - "loss": 0.5216, - "step": 540 - }, - { - "epoch": 0.018989452254339317, - "grad_norm": 0.9700373411178589, - "learning_rate": 9e-06, - "loss": 0.6599, - "step": 541 - }, - { - "epoch": 0.019024552905456397, - "grad_norm": 1.0834518671035767, - "learning_rate": 9.016666666666668e-06, - "loss": 0.6987, - "step": 542 - }, - { - "epoch": 0.019059653556573474, - "grad_norm": 0.8142768740653992, - "learning_rate": 9.033333333333334e-06, - "loss": 0.5624, - "step": 543 - }, - { - "epoch": 0.019094754207690554, - "grad_norm": 0.915524959564209, - "learning_rate": 9.05e-06, - "loss": 0.6174, - "step": 544 - }, - { - "epoch": 0.01912985485880763, - "grad_norm": 0.9785704016685486, - "learning_rate": 9.066666666666667e-06, - "loss": 0.585, - "step": 545 - }, - { - "epoch": 0.019164955509924708, - "grad_norm": 1.106635570526123, - "learning_rate": 9.083333333333333e-06, - "loss": 0.5987, - "step": 546 - }, - { - "epoch": 0.019200056161041788, - "grad_norm": 0.9003202319145203, - "learning_rate": 9.100000000000001e-06, - "loss": 0.6468, - "step": 547 - }, - { - "epoch": 0.019235156812158864, - "grad_norm": 1.0753730535507202, - "learning_rate": 9.116666666666667e-06, - "loss": 0.5213, - "step": 548 - }, - { - "epoch": 0.019270257463275944, - "grad_norm": 0.9911580085754395, - "learning_rate": 9.133333333333335e-06, - "loss": 0.639, - "step": 549 - }, - { - "epoch": 0.01930535811439302, - "grad_norm": 0.8952308297157288, - "learning_rate": 9.15e-06, - "loss": 0.5343, - "step": 550 - }, - { - "epoch": 0.0193404587655101, - "grad_norm": 0.8737993836402893, - "learning_rate": 9.166666666666666e-06, - "loss": 0.6456, - "step": 551 - }, - { - "epoch": 0.019375559416627178, - "grad_norm": 0.9945151805877686, - "learning_rate": 9.183333333333334e-06, - "loss": 0.6176, - "step": 552 - }, - { - "epoch": 0.019410660067744258, - "grad_norm": 1.002377986907959, - "learning_rate": 9.2e-06, - "loss": 0.6527, - "step": 553 - }, - { - "epoch": 0.019445760718861334, - "grad_norm": 1.3646693229675293, - "learning_rate": 9.216666666666666e-06, - "loss": 0.6781, - "step": 554 - }, - { - "epoch": 0.019480861369978415, - "grad_norm": 0.9139904975891113, - "learning_rate": 9.233333333333334e-06, - "loss": 0.507, - "step": 555 - }, - { - "epoch": 0.01951596202109549, - "grad_norm": 0.9778136014938354, - "learning_rate": 9.25e-06, - "loss": 0.6368, - "step": 556 - }, - { - "epoch": 0.019551062672212568, - "grad_norm": 1.0487507581710815, - "learning_rate": 9.266666666666667e-06, - "loss": 0.6116, - "step": 557 - }, - { - "epoch": 0.019586163323329648, - "grad_norm": 0.9676331281661987, - "learning_rate": 9.283333333333335e-06, - "loss": 0.6859, - "step": 558 - }, - { - "epoch": 0.019621263974446725, - "grad_norm": 0.7723503112792969, - "learning_rate": 9.3e-06, - "loss": 0.6056, - "step": 559 - }, - { - "epoch": 0.019656364625563805, - "grad_norm": 0.9119293689727783, - "learning_rate": 9.316666666666667e-06, - "loss": 0.6005, - "step": 560 - }, - { - "epoch": 0.01969146527668088, - "grad_norm": 0.8378137946128845, - "learning_rate": 9.333333333333334e-06, - "loss": 0.5153, - "step": 561 - }, - { - "epoch": 0.01972656592779796, - "grad_norm": 0.9101772904396057, - "learning_rate": 9.35e-06, - "loss": 0.6567, - "step": 562 - }, - { - "epoch": 0.019761666578915038, - "grad_norm": 0.8841044902801514, - "learning_rate": 9.366666666666666e-06, - "loss": 0.661, - "step": 563 - }, - { - "epoch": 0.019796767230032118, - "grad_norm": 1.039009928703308, - "learning_rate": 9.383333333333334e-06, - "loss": 0.5479, - "step": 564 - }, - { - "epoch": 0.019831867881149195, - "grad_norm": 0.8715111613273621, - "learning_rate": 9.4e-06, - "loss": 0.6384, - "step": 565 - }, - { - "epoch": 0.019866968532266275, - "grad_norm": 1.6248489618301392, - "learning_rate": 9.416666666666667e-06, - "loss": 0.6162, - "step": 566 - }, - { - "epoch": 0.01990206918338335, - "grad_norm": 0.9106292128562927, - "learning_rate": 9.433333333333335e-06, - "loss": 0.6164, - "step": 567 - }, - { - "epoch": 0.01993716983450043, - "grad_norm": 1.178511142730713, - "learning_rate": 9.450000000000001e-06, - "loss": 0.5691, - "step": 568 - }, - { - "epoch": 0.01997227048561751, - "grad_norm": 1.0117709636688232, - "learning_rate": 9.466666666666667e-06, - "loss": 0.6639, - "step": 569 - }, - { - "epoch": 0.020007371136734585, - "grad_norm": 1.0925737619400024, - "learning_rate": 9.483333333333335e-06, - "loss": 0.3326, - "step": 570 - }, - { - "epoch": 0.020042471787851665, - "grad_norm": 0.8340603709220886, - "learning_rate": 9.5e-06, - "loss": 0.6424, - "step": 571 - }, - { - "epoch": 0.02007757243896874, - "grad_norm": 0.7880210280418396, - "learning_rate": 9.516666666666666e-06, - "loss": 0.6427, - "step": 572 - }, - { - "epoch": 0.020112673090085822, - "grad_norm": 1.0672916173934937, - "learning_rate": 9.533333333333334e-06, - "loss": 0.5276, - "step": 573 - }, - { - "epoch": 0.0201477737412029, - "grad_norm": 0.8741814494132996, - "learning_rate": 9.55e-06, - "loss": 0.6311, - "step": 574 - }, - { - "epoch": 0.02018287439231998, - "grad_norm": 1.053268551826477, - "learning_rate": 9.566666666666666e-06, - "loss": 0.6646, - "step": 575 - }, - { - "epoch": 0.020217975043437055, - "grad_norm": 0.9222052097320557, - "learning_rate": 9.583333333333334e-06, - "loss": 0.5519, - "step": 576 - }, - { - "epoch": 0.020253075694554135, - "grad_norm": 0.8408103585243225, - "learning_rate": 9.600000000000001e-06, - "loss": 0.5551, - "step": 577 - }, - { - "epoch": 0.020288176345671212, - "grad_norm": 1.0086537599563599, - "learning_rate": 9.616666666666667e-06, - "loss": 0.5831, - "step": 578 - }, - { - "epoch": 0.020323276996788292, - "grad_norm": 0.9166975617408752, - "learning_rate": 9.633333333333335e-06, - "loss": 0.6178, - "step": 579 - }, - { - "epoch": 0.02035837764790537, - "grad_norm": 0.9390255212783813, - "learning_rate": 9.65e-06, - "loss": 0.6488, - "step": 580 - }, - { - "epoch": 0.020393478299022445, - "grad_norm": 0.9074947834014893, - "learning_rate": 9.666666666666667e-06, - "loss": 0.6076, - "step": 581 - }, - { - "epoch": 0.020428578950139525, - "grad_norm": 0.9572161436080933, - "learning_rate": 9.683333333333333e-06, - "loss": 0.5823, - "step": 582 - }, - { - "epoch": 0.020463679601256602, - "grad_norm": 0.8706708550453186, - "learning_rate": 9.7e-06, - "loss": 0.6361, - "step": 583 - }, - { - "epoch": 0.020498780252373682, - "grad_norm": 1.1245841979980469, - "learning_rate": 9.716666666666666e-06, - "loss": 0.5548, - "step": 584 - }, - { - "epoch": 0.02053388090349076, - "grad_norm": 0.8558626770973206, - "learning_rate": 9.733333333333334e-06, - "loss": 0.625, - "step": 585 - }, - { - "epoch": 0.02056898155460784, - "grad_norm": 0.9347841143608093, - "learning_rate": 9.750000000000002e-06, - "loss": 0.6281, - "step": 586 - }, - { - "epoch": 0.020604082205724916, - "grad_norm": 0.7697385549545288, - "learning_rate": 9.766666666666667e-06, - "loss": 0.6182, - "step": 587 - }, - { - "epoch": 0.020639182856841996, - "grad_norm": 0.8728403449058533, - "learning_rate": 9.783333333333333e-06, - "loss": 0.6294, - "step": 588 - }, - { - "epoch": 0.020674283507959072, - "grad_norm": 1.0015891790390015, - "learning_rate": 9.800000000000001e-06, - "loss": 0.6399, - "step": 589 - }, - { - "epoch": 0.020709384159076152, - "grad_norm": 0.8906582593917847, - "learning_rate": 9.816666666666667e-06, - "loss": 0.6065, - "step": 590 - }, - { - "epoch": 0.02074448481019323, - "grad_norm": 1.0701593160629272, - "learning_rate": 9.833333333333333e-06, - "loss": 0.5758, - "step": 591 - }, - { - "epoch": 0.020779585461310306, - "grad_norm": 1.0955392122268677, - "learning_rate": 9.85e-06, - "loss": 0.7245, - "step": 592 - }, - { - "epoch": 0.020814686112427386, - "grad_norm": 0.8800115585327148, - "learning_rate": 9.866666666666667e-06, - "loss": 0.508, - "step": 593 - }, - { - "epoch": 0.020849786763544462, - "grad_norm": 0.8054641485214233, - "learning_rate": 9.883333333333334e-06, - "loss": 0.608, - "step": 594 - }, - { - "epoch": 0.020884887414661542, - "grad_norm": 1.0237234830856323, - "learning_rate": 9.900000000000002e-06, - "loss": 0.6369, - "step": 595 - }, - { - "epoch": 0.02091998806577862, - "grad_norm": 1.0052982568740845, - "learning_rate": 9.916666666666668e-06, - "loss": 0.4956, - "step": 596 - }, - { - "epoch": 0.0209550887168957, - "grad_norm": 0.9854535460472107, - "learning_rate": 9.933333333333334e-06, - "loss": 0.6613, - "step": 597 - }, - { - "epoch": 0.020990189368012776, - "grad_norm": 1.0364254713058472, - "learning_rate": 9.950000000000001e-06, - "loss": 0.7312, - "step": 598 - }, - { - "epoch": 0.021025290019129856, - "grad_norm": 1.0579527616500854, - "learning_rate": 9.966666666666667e-06, - "loss": 0.6637, - "step": 599 - }, - { - "epoch": 0.021060390670246933, - "grad_norm": 0.8525680303573608, - "learning_rate": 9.983333333333333e-06, - "loss": 0.5986, - "step": 600 - }, - { - "epoch": 0.021095491321364013, - "grad_norm": 0.9275010824203491, - "learning_rate": 1e-05, - "loss": 0.4289, - "step": 601 - }, - { - "epoch": 0.02113059197248109, - "grad_norm": 1.2061110734939575, - "learning_rate": 1.0016666666666667e-05, - "loss": 0.681, - "step": 602 - }, - { - "epoch": 0.02116569262359817, - "grad_norm": 0.9286341667175293, - "learning_rate": 1.0033333333333333e-05, - "loss": 0.6417, - "step": 603 - }, - { - "epoch": 0.021200793274715246, - "grad_norm": 0.8527871966362, - "learning_rate": 1.005e-05, - "loss": 0.5693, - "step": 604 - }, - { - "epoch": 0.021235893925832323, - "grad_norm": 0.931303083896637, - "learning_rate": 1.0066666666666668e-05, - "loss": 0.6173, - "step": 605 - }, - { - "epoch": 0.021270994576949403, - "grad_norm": 0.8897165656089783, - "learning_rate": 1.0083333333333334e-05, - "loss": 0.6511, - "step": 606 - }, - { - "epoch": 0.02130609522806648, - "grad_norm": 0.919098973274231, - "learning_rate": 1.0100000000000002e-05, - "loss": 0.6153, - "step": 607 - }, - { - "epoch": 0.02134119587918356, - "grad_norm": 1.0162445306777954, - "learning_rate": 1.0116666666666667e-05, - "loss": 0.5267, - "step": 608 - }, - { - "epoch": 0.021376296530300636, - "grad_norm": 1.3944861888885498, - "learning_rate": 1.0133333333333333e-05, - "loss": 0.6423, - "step": 609 - }, - { - "epoch": 0.021411397181417716, - "grad_norm": 1.162306547164917, - "learning_rate": 1.0150000000000001e-05, - "loss": 0.7103, - "step": 610 - }, - { - "epoch": 0.021446497832534793, - "grad_norm": 1.290745496749878, - "learning_rate": 1.0166666666666667e-05, - "loss": 0.6585, - "step": 611 - }, - { - "epoch": 0.021481598483651873, - "grad_norm": 0.9385931491851807, - "learning_rate": 1.0183333333333333e-05, - "loss": 0.515, - "step": 612 - }, - { - "epoch": 0.02151669913476895, - "grad_norm": 0.9629005789756775, - "learning_rate": 1.02e-05, - "loss": 0.6097, - "step": 613 - }, - { - "epoch": 0.02155179978588603, - "grad_norm": 1.3584367036819458, - "learning_rate": 1.0216666666666668e-05, - "loss": 0.693, - "step": 614 - }, - { - "epoch": 0.021586900437003106, - "grad_norm": 1.038100004196167, - "learning_rate": 1.0233333333333334e-05, - "loss": 0.5273, - "step": 615 - }, - { - "epoch": 0.021622001088120183, - "grad_norm": 1.0167081356048584, - "learning_rate": 1.025e-05, - "loss": 0.6672, - "step": 616 - }, - { - "epoch": 0.021657101739237263, - "grad_norm": 1.0161933898925781, - "learning_rate": 1.0266666666666668e-05, - "loss": 0.6596, - "step": 617 - }, - { - "epoch": 0.02169220239035434, - "grad_norm": 0.8233750462532043, - "learning_rate": 1.0283333333333334e-05, - "loss": 0.5884, - "step": 618 - }, - { - "epoch": 0.02172730304147142, - "grad_norm": 1.0802810192108154, - "learning_rate": 1.03e-05, - "loss": 0.6126, - "step": 619 - }, - { - "epoch": 0.021762403692588497, - "grad_norm": 0.7562347054481506, - "learning_rate": 1.0316666666666667e-05, - "loss": 0.6631, - "step": 620 - }, - { - "epoch": 0.021797504343705577, - "grad_norm": 0.9656702280044556, - "learning_rate": 1.0333333333333333e-05, - "loss": 0.4001, - "step": 621 - }, - { - "epoch": 0.021832604994822653, - "grad_norm": 1.4866695404052734, - "learning_rate": 1.035e-05, - "loss": 0.5834, - "step": 622 - }, - { - "epoch": 0.021867705645939733, - "grad_norm": 1.167631983757019, - "learning_rate": 1.0366666666666667e-05, - "loss": 0.6803, - "step": 623 - }, - { - "epoch": 0.02190280629705681, - "grad_norm": 0.9970325827598572, - "learning_rate": 1.0383333333333334e-05, - "loss": 0.6531, - "step": 624 - }, - { - "epoch": 0.02193790694817389, - "grad_norm": 0.868802547454834, - "learning_rate": 1.04e-05, - "loss": 0.5987, - "step": 625 - }, - { - "epoch": 0.021973007599290967, - "grad_norm": 0.9121021628379822, - "learning_rate": 1.0416666666666668e-05, - "loss": 0.6379, - "step": 626 - }, - { - "epoch": 0.022008108250408043, - "grad_norm": 1.1371960639953613, - "learning_rate": 1.0433333333333334e-05, - "loss": 0.6122, - "step": 627 - }, - { - "epoch": 0.022043208901525124, - "grad_norm": 0.9022966027259827, - "learning_rate": 1.045e-05, - "loss": 0.5873, - "step": 628 - }, - { - "epoch": 0.0220783095526422, - "grad_norm": 0.8861680030822754, - "learning_rate": 1.0466666666666668e-05, - "loss": 0.549, - "step": 629 - }, - { - "epoch": 0.02211341020375928, - "grad_norm": 1.0784804821014404, - "learning_rate": 1.0483333333333333e-05, - "loss": 0.5279, - "step": 630 - }, - { - "epoch": 0.022148510854876357, - "grad_norm": 0.943938672542572, - "learning_rate": 1.05e-05, - "loss": 0.61, - "step": 631 - }, - { - "epoch": 0.022183611505993437, - "grad_norm": 1.0902332067489624, - "learning_rate": 1.0516666666666667e-05, - "loss": 0.5469, - "step": 632 - }, - { - "epoch": 0.022218712157110514, - "grad_norm": 1.0747830867767334, - "learning_rate": 1.0533333333333335e-05, - "loss": 0.539, - "step": 633 - }, - { - "epoch": 0.022253812808227594, - "grad_norm": 0.8802452683448792, - "learning_rate": 1.055e-05, - "loss": 0.5256, - "step": 634 - }, - { - "epoch": 0.02228891345934467, - "grad_norm": 1.0567326545715332, - "learning_rate": 1.0566666666666668e-05, - "loss": 0.606, - "step": 635 - }, - { - "epoch": 0.02232401411046175, - "grad_norm": 0.9563356637954712, - "learning_rate": 1.0583333333333334e-05, - "loss": 0.5556, - "step": 636 - }, - { - "epoch": 0.022359114761578827, - "grad_norm": 1.0935349464416504, - "learning_rate": 1.06e-05, - "loss": 0.5054, - "step": 637 - }, - { - "epoch": 0.022394215412695904, - "grad_norm": 1.0036141872406006, - "learning_rate": 1.0616666666666668e-05, - "loss": 0.6126, - "step": 638 - }, - { - "epoch": 0.022429316063812984, - "grad_norm": 1.4181396961212158, - "learning_rate": 1.0633333333333334e-05, - "loss": 0.5308, - "step": 639 - }, - { - "epoch": 0.02246441671493006, - "grad_norm": 0.95639568567276, - "learning_rate": 1.065e-05, - "loss": 0.5581, - "step": 640 - }, - { - "epoch": 0.02249951736604714, - "grad_norm": 0.9342384934425354, - "learning_rate": 1.0666666666666667e-05, - "loss": 0.6103, - "step": 641 - }, - { - "epoch": 0.022534618017164217, - "grad_norm": 0.9774160385131836, - "learning_rate": 1.0683333333333333e-05, - "loss": 0.6138, - "step": 642 - }, - { - "epoch": 0.022569718668281297, - "grad_norm": 1.0911550521850586, - "learning_rate": 1.0700000000000001e-05, - "loss": 0.5136, - "step": 643 - }, - { - "epoch": 0.022604819319398374, - "grad_norm": 0.9475198984146118, - "learning_rate": 1.0716666666666667e-05, - "loss": 0.6665, - "step": 644 - }, - { - "epoch": 0.022639919970515454, - "grad_norm": 0.9182398915290833, - "learning_rate": 1.0733333333333334e-05, - "loss": 0.6732, - "step": 645 - }, - { - "epoch": 0.02267502062163253, - "grad_norm": 0.9509775042533875, - "learning_rate": 1.075e-05, - "loss": 0.5908, - "step": 646 - }, - { - "epoch": 0.02271012127274961, - "grad_norm": 1.0147144794464111, - "learning_rate": 1.0766666666666666e-05, - "loss": 0.5684, - "step": 647 - }, - { - "epoch": 0.022745221923866688, - "grad_norm": 1.1849620342254639, - "learning_rate": 1.0783333333333334e-05, - "loss": 0.51, - "step": 648 - }, - { - "epoch": 0.022780322574983768, - "grad_norm": 1.0161203145980835, - "learning_rate": 1.08e-05, - "loss": 0.5756, - "step": 649 - }, - { - "epoch": 0.022815423226100844, - "grad_norm": 1.0142561197280884, - "learning_rate": 1.0816666666666666e-05, - "loss": 0.5911, - "step": 650 - }, - { - "epoch": 0.02285052387721792, - "grad_norm": 1.3482136726379395, - "learning_rate": 1.0833333333333334e-05, - "loss": 0.7144, - "step": 651 - }, - { - "epoch": 0.022885624528335, - "grad_norm": 1.1933956146240234, - "learning_rate": 1.0850000000000001e-05, - "loss": 0.6041, - "step": 652 - }, - { - "epoch": 0.022920725179452078, - "grad_norm": 1.0066577196121216, - "learning_rate": 1.0866666666666667e-05, - "loss": 0.4402, - "step": 653 - }, - { - "epoch": 0.022955825830569158, - "grad_norm": 1.0669445991516113, - "learning_rate": 1.0883333333333335e-05, - "loss": 0.5802, - "step": 654 - }, - { - "epoch": 0.022990926481686234, - "grad_norm": 1.1072990894317627, - "learning_rate": 1.09e-05, - "loss": 0.5919, - "step": 655 - }, - { - "epoch": 0.023026027132803314, - "grad_norm": 0.9607362151145935, - "learning_rate": 1.0916666666666667e-05, - "loss": 0.5904, - "step": 656 - }, - { - "epoch": 0.02306112778392039, - "grad_norm": 0.9523122906684875, - "learning_rate": 1.0933333333333334e-05, - "loss": 0.6686, - "step": 657 - }, - { - "epoch": 0.02309622843503747, - "grad_norm": 0.9108603000640869, - "learning_rate": 1.095e-05, - "loss": 0.5454, - "step": 658 - }, - { - "epoch": 0.023131329086154548, - "grad_norm": 1.0565656423568726, - "learning_rate": 1.0966666666666666e-05, - "loss": 0.5992, - "step": 659 - }, - { - "epoch": 0.023166429737271628, - "grad_norm": 1.0518982410430908, - "learning_rate": 1.0983333333333334e-05, - "loss": 0.5305, - "step": 660 - }, - { - "epoch": 0.023201530388388705, - "grad_norm": 1.1661075353622437, - "learning_rate": 1.1000000000000001e-05, - "loss": 0.5529, - "step": 661 - }, - { - "epoch": 0.02323663103950578, - "grad_norm": 1.287306308746338, - "learning_rate": 1.1016666666666667e-05, - "loss": 0.5796, - "step": 662 - }, - { - "epoch": 0.02327173169062286, - "grad_norm": 1.0319486856460571, - "learning_rate": 1.1033333333333335e-05, - "loss": 0.6112, - "step": 663 - }, - { - "epoch": 0.023306832341739938, - "grad_norm": 0.9688727855682373, - "learning_rate": 1.1050000000000001e-05, - "loss": 0.5523, - "step": 664 - }, - { - "epoch": 0.023341932992857018, - "grad_norm": 0.965029239654541, - "learning_rate": 1.1066666666666667e-05, - "loss": 0.6126, - "step": 665 - }, - { - "epoch": 0.023377033643974095, - "grad_norm": 0.9012778401374817, - "learning_rate": 1.1083333333333335e-05, - "loss": 0.4922, - "step": 666 - }, - { - "epoch": 0.023412134295091175, - "grad_norm": 0.9865382313728333, - "learning_rate": 1.11e-05, - "loss": 0.6194, - "step": 667 - }, - { - "epoch": 0.02344723494620825, - "grad_norm": 0.8062460422515869, - "learning_rate": 1.1116666666666666e-05, - "loss": 0.5891, - "step": 668 - }, - { - "epoch": 0.02348233559732533, - "grad_norm": 1.089154601097107, - "learning_rate": 1.1133333333333334e-05, - "loss": 0.6467, - "step": 669 - }, - { - "epoch": 0.023517436248442408, - "grad_norm": 0.9915698766708374, - "learning_rate": 1.115e-05, - "loss": 0.6542, - "step": 670 - }, - { - "epoch": 0.02355253689955949, - "grad_norm": 0.9813034534454346, - "learning_rate": 1.1166666666666668e-05, - "loss": 0.6518, - "step": 671 - }, - { - "epoch": 0.023587637550676565, - "grad_norm": 1.028455376625061, - "learning_rate": 1.1183333333333335e-05, - "loss": 0.5711, - "step": 672 - }, - { - "epoch": 0.02362273820179364, - "grad_norm": 0.9291154146194458, - "learning_rate": 1.1200000000000001e-05, - "loss": 0.5603, - "step": 673 - }, - { - "epoch": 0.02365783885291072, - "grad_norm": 0.9011480212211609, - "learning_rate": 1.1216666666666667e-05, - "loss": 0.5792, - "step": 674 - }, - { - "epoch": 0.0236929395040278, - "grad_norm": 0.856713056564331, - "learning_rate": 1.1233333333333333e-05, - "loss": 0.6527, - "step": 675 - }, - { - "epoch": 0.02372804015514488, - "grad_norm": 1.07389497756958, - "learning_rate": 1.125e-05, - "loss": 0.7182, - "step": 676 - }, - { - "epoch": 0.023763140806261955, - "grad_norm": 0.9553548693656921, - "learning_rate": 1.1266666666666667e-05, - "loss": 0.5832, - "step": 677 - }, - { - "epoch": 0.023798241457379035, - "grad_norm": 1.075767159461975, - "learning_rate": 1.1283333333333333e-05, - "loss": 0.6301, - "step": 678 - }, - { - "epoch": 0.023833342108496112, - "grad_norm": 1.0410617589950562, - "learning_rate": 1.13e-05, - "loss": 0.7015, - "step": 679 - }, - { - "epoch": 0.023868442759613192, - "grad_norm": 0.8720484972000122, - "learning_rate": 1.1316666666666668e-05, - "loss": 0.5172, - "step": 680 - }, - { - "epoch": 0.02390354341073027, - "grad_norm": 1.0325586795806885, - "learning_rate": 1.1333333333333334e-05, - "loss": 0.5985, - "step": 681 - }, - { - "epoch": 0.02393864406184735, - "grad_norm": 0.9808174967765808, - "learning_rate": 1.1350000000000001e-05, - "loss": 0.6162, - "step": 682 - }, - { - "epoch": 0.023973744712964425, - "grad_norm": 0.9918460249900818, - "learning_rate": 1.1366666666666667e-05, - "loss": 0.4101, - "step": 683 - }, - { - "epoch": 0.024008845364081505, - "grad_norm": 0.7662786841392517, - "learning_rate": 1.1383333333333333e-05, - "loss": 0.5683, - "step": 684 - }, - { - "epoch": 0.024043946015198582, - "grad_norm": 0.9496008157730103, - "learning_rate": 1.1400000000000001e-05, - "loss": 0.4938, - "step": 685 - }, - { - "epoch": 0.02407904666631566, - "grad_norm": 1.5666351318359375, - "learning_rate": 1.1416666666666667e-05, - "loss": 0.6881, - "step": 686 - }, - { - "epoch": 0.02411414731743274, - "grad_norm": 0.9467529058456421, - "learning_rate": 1.1433333333333333e-05, - "loss": 0.4846, - "step": 687 - }, - { - "epoch": 0.024149247968549815, - "grad_norm": 1.2340805530548096, - "learning_rate": 1.145e-05, - "loss": 0.6427, - "step": 688 - }, - { - "epoch": 0.024184348619666896, - "grad_norm": 0.9724809527397156, - "learning_rate": 1.1466666666666666e-05, - "loss": 0.639, - "step": 689 - }, - { - "epoch": 0.024219449270783972, - "grad_norm": 0.87039715051651, - "learning_rate": 1.1483333333333334e-05, - "loss": 0.5288, - "step": 690 - }, - { - "epoch": 0.024254549921901052, - "grad_norm": 0.8920450806617737, - "learning_rate": 1.1500000000000002e-05, - "loss": 0.4972, - "step": 691 - }, - { - "epoch": 0.02428965057301813, - "grad_norm": 0.935610830783844, - "learning_rate": 1.1516666666666668e-05, - "loss": 0.5216, - "step": 692 - }, - { - "epoch": 0.02432475122413521, - "grad_norm": 0.9545224905014038, - "learning_rate": 1.1533333333333334e-05, - "loss": 0.5933, - "step": 693 - }, - { - "epoch": 0.024359851875252286, - "grad_norm": 0.7749422788619995, - "learning_rate": 1.1550000000000001e-05, - "loss": 0.484, - "step": 694 - }, - { - "epoch": 0.024394952526369366, - "grad_norm": 1.4642022848129272, - "learning_rate": 1.1566666666666667e-05, - "loss": 0.6889, - "step": 695 - }, - { - "epoch": 0.024430053177486442, - "grad_norm": 1.0278215408325195, - "learning_rate": 1.1583333333333333e-05, - "loss": 0.6445, - "step": 696 - }, - { - "epoch": 0.02446515382860352, - "grad_norm": 1.0251096487045288, - "learning_rate": 1.16e-05, - "loss": 0.666, - "step": 697 - }, - { - "epoch": 0.0245002544797206, - "grad_norm": 1.1746734380722046, - "learning_rate": 1.1616666666666667e-05, - "loss": 0.5867, - "step": 698 - }, - { - "epoch": 0.024535355130837676, - "grad_norm": 1.1448838710784912, - "learning_rate": 1.1633333333333334e-05, - "loss": 0.6583, - "step": 699 - }, - { - "epoch": 0.024570455781954756, - "grad_norm": 0.8148424029350281, - "learning_rate": 1.1650000000000002e-05, - "loss": 0.6255, - "step": 700 - }, - { - "epoch": 0.024605556433071833, - "grad_norm": 0.8810437321662903, - "learning_rate": 1.1666666666666668e-05, - "loss": 0.644, - "step": 701 - }, - { - "epoch": 0.024640657084188913, - "grad_norm": 1.0027213096618652, - "learning_rate": 1.1683333333333334e-05, - "loss": 0.627, - "step": 702 - }, - { - "epoch": 0.02467575773530599, - "grad_norm": 0.9059099555015564, - "learning_rate": 1.1700000000000001e-05, - "loss": 0.6655, - "step": 703 - }, - { - "epoch": 0.02471085838642307, - "grad_norm": 0.8976470828056335, - "learning_rate": 1.1716666666666667e-05, - "loss": 0.5199, - "step": 704 - }, - { - "epoch": 0.024745959037540146, - "grad_norm": 0.7602828741073608, - "learning_rate": 1.1733333333333333e-05, - "loss": 0.6293, - "step": 705 - }, - { - "epoch": 0.024781059688657226, - "grad_norm": 1.2034651041030884, - "learning_rate": 1.175e-05, - "loss": 0.5091, - "step": 706 - }, - { - "epoch": 0.024816160339774303, - "grad_norm": 1.109535813331604, - "learning_rate": 1.1766666666666667e-05, - "loss": 0.6606, - "step": 707 - }, - { - "epoch": 0.02485126099089138, - "grad_norm": 0.9311160445213318, - "learning_rate": 1.1783333333333333e-05, - "loss": 0.6366, - "step": 708 - }, - { - "epoch": 0.02488636164200846, - "grad_norm": 1.19136643409729, - "learning_rate": 1.18e-05, - "loss": 0.6443, - "step": 709 - }, - { - "epoch": 0.024921462293125536, - "grad_norm": 0.9227035045623779, - "learning_rate": 1.1816666666666668e-05, - "loss": 0.5546, - "step": 710 - }, - { - "epoch": 0.024956562944242616, - "grad_norm": 0.958421528339386, - "learning_rate": 1.1833333333333334e-05, - "loss": 0.5788, - "step": 711 - }, - { - "epoch": 0.024991663595359693, - "grad_norm": 0.8536099195480347, - "learning_rate": 1.185e-05, - "loss": 0.5736, - "step": 712 - }, - { - "epoch": 0.025026764246476773, - "grad_norm": 1.2013400793075562, - "learning_rate": 1.1866666666666668e-05, - "loss": 0.622, - "step": 713 - }, - { - "epoch": 0.02506186489759385, - "grad_norm": 1.037121295928955, - "learning_rate": 1.1883333333333334e-05, - "loss": 0.6709, - "step": 714 - }, - { - "epoch": 0.02509696554871093, - "grad_norm": 1.0345181226730347, - "learning_rate": 1.19e-05, - "loss": 0.5395, - "step": 715 - }, - { - "epoch": 0.025132066199828006, - "grad_norm": 1.424540638923645, - "learning_rate": 1.1916666666666667e-05, - "loss": 0.5877, - "step": 716 - }, - { - "epoch": 0.025167166850945086, - "grad_norm": 1.033006191253662, - "learning_rate": 1.1933333333333333e-05, - "loss": 0.715, - "step": 717 - }, - { - "epoch": 0.025202267502062163, - "grad_norm": 1.0266960859298706, - "learning_rate": 1.195e-05, - "loss": 0.6428, - "step": 718 - }, - { - "epoch": 0.025237368153179243, - "grad_norm": 0.9285753965377808, - "learning_rate": 1.1966666666666668e-05, - "loss": 0.5083, - "step": 719 - }, - { - "epoch": 0.02527246880429632, - "grad_norm": 0.8545864820480347, - "learning_rate": 1.1983333333333334e-05, - "loss": 0.6902, - "step": 720 - }, - { - "epoch": 0.025307569455413396, - "grad_norm": 0.9641414880752563, - "learning_rate": 1.2e-05, - "loss": 0.6734, - "step": 721 - }, - { - "epoch": 0.025342670106530477, - "grad_norm": 1.1507606506347656, - "learning_rate": 1.2016666666666668e-05, - "loss": 0.7026, - "step": 722 - }, - { - "epoch": 0.025377770757647553, - "grad_norm": 0.8300370573997498, - "learning_rate": 1.2033333333333334e-05, - "loss": 0.624, - "step": 723 - }, - { - "epoch": 0.025412871408764633, - "grad_norm": 0.9982704520225525, - "learning_rate": 1.205e-05, - "loss": 0.5323, - "step": 724 - }, - { - "epoch": 0.02544797205988171, - "grad_norm": 1.018902063369751, - "learning_rate": 1.2066666666666667e-05, - "loss": 0.6668, - "step": 725 - }, - { - "epoch": 0.02548307271099879, - "grad_norm": 1.0369715690612793, - "learning_rate": 1.2083333333333333e-05, - "loss": 0.5301, - "step": 726 - }, - { - "epoch": 0.025518173362115867, - "grad_norm": 0.9668406844139099, - "learning_rate": 1.2100000000000001e-05, - "loss": 0.5518, - "step": 727 - }, - { - "epoch": 0.025553274013232947, - "grad_norm": 0.7166993021965027, - "learning_rate": 1.2116666666666669e-05, - "loss": 0.6317, - "step": 728 - }, - { - "epoch": 0.025588374664350023, - "grad_norm": 0.9985421299934387, - "learning_rate": 1.2133333333333335e-05, - "loss": 0.5496, - "step": 729 - }, - { - "epoch": 0.025623475315467104, - "grad_norm": 0.9921954274177551, - "learning_rate": 1.215e-05, - "loss": 0.6052, - "step": 730 - }, - { - "epoch": 0.02565857596658418, - "grad_norm": 1.082417607307434, - "learning_rate": 1.2166666666666668e-05, - "loss": 0.6682, - "step": 731 - }, - { - "epoch": 0.025693676617701257, - "grad_norm": 0.9536468386650085, - "learning_rate": 1.2183333333333334e-05, - "loss": 0.5927, - "step": 732 - }, - { - "epoch": 0.025728777268818337, - "grad_norm": 0.8978720307350159, - "learning_rate": 1.22e-05, - "loss": 0.6552, - "step": 733 - }, - { - "epoch": 0.025763877919935414, - "grad_norm": 0.8055136203765869, - "learning_rate": 1.2216666666666668e-05, - "loss": 0.5952, - "step": 734 - }, - { - "epoch": 0.025798978571052494, - "grad_norm": 1.1340339183807373, - "learning_rate": 1.2233333333333334e-05, - "loss": 0.6247, - "step": 735 - }, - { - "epoch": 0.02583407922216957, - "grad_norm": 0.8621813058853149, - "learning_rate": 1.225e-05, - "loss": 0.662, - "step": 736 - }, - { - "epoch": 0.02586917987328665, - "grad_norm": 0.915064811706543, - "learning_rate": 1.2266666666666667e-05, - "loss": 0.5154, - "step": 737 - }, - { - "epoch": 0.025904280524403727, - "grad_norm": 0.822255551815033, - "learning_rate": 1.2283333333333335e-05, - "loss": 0.6019, - "step": 738 - }, - { - "epoch": 0.025939381175520807, - "grad_norm": 1.380184292793274, - "learning_rate": 1.23e-05, - "loss": 0.6948, - "step": 739 - }, - { - "epoch": 0.025974481826637884, - "grad_norm": 0.7636292576789856, - "learning_rate": 1.2316666666666667e-05, - "loss": 0.5759, - "step": 740 - }, - { - "epoch": 0.026009582477754964, - "grad_norm": 0.8857672810554504, - "learning_rate": 1.2333333333333334e-05, - "loss": 0.474, - "step": 741 - }, - { - "epoch": 0.02604468312887204, - "grad_norm": 1.055988073348999, - "learning_rate": 1.235e-05, - "loss": 0.72, - "step": 742 - }, - { - "epoch": 0.026079783779989117, - "grad_norm": 1.0068556070327759, - "learning_rate": 1.2366666666666666e-05, - "loss": 0.6195, - "step": 743 - }, - { - "epoch": 0.026114884431106197, - "grad_norm": 0.813576877117157, - "learning_rate": 1.2383333333333334e-05, - "loss": 0.613, - "step": 744 - }, - { - "epoch": 0.026149985082223274, - "grad_norm": 0.8425983786582947, - "learning_rate": 1.24e-05, - "loss": 0.6116, - "step": 745 - }, - { - "epoch": 0.026185085733340354, - "grad_norm": 1.422468900680542, - "learning_rate": 1.2416666666666667e-05, - "loss": 0.6661, - "step": 746 - }, - { - "epoch": 0.02622018638445743, - "grad_norm": 1.3028035163879395, - "learning_rate": 1.2433333333333335e-05, - "loss": 0.6593, - "step": 747 - }, - { - "epoch": 0.02625528703557451, - "grad_norm": 0.9752406477928162, - "learning_rate": 1.2450000000000001e-05, - "loss": 0.5264, - "step": 748 - }, - { - "epoch": 0.026290387686691587, - "grad_norm": 1.2601510286331177, - "learning_rate": 1.2466666666666667e-05, - "loss": 0.5846, - "step": 749 - }, - { - "epoch": 0.026325488337808668, - "grad_norm": 1.015480399131775, - "learning_rate": 1.2483333333333335e-05, - "loss": 0.5827, - "step": 750 - }, - { - "epoch": 0.026360588988925744, - "grad_norm": 0.9317602515220642, - "learning_rate": 1.25e-05, - "loss": 0.5262, - "step": 751 - }, - { - "epoch": 0.026395689640042824, - "grad_norm": 1.1067959070205688, - "learning_rate": 1.2516666666666668e-05, - "loss": 0.7215, - "step": 752 - }, - { - "epoch": 0.0264307902911599, - "grad_norm": 0.8901679515838623, - "learning_rate": 1.2533333333333332e-05, - "loss": 0.7391, - "step": 753 - }, - { - "epoch": 0.026465890942276978, - "grad_norm": 1.2453365325927734, - "learning_rate": 1.255e-05, - "loss": 0.7129, - "step": 754 - }, - { - "epoch": 0.026500991593394058, - "grad_norm": 0.8864213824272156, - "learning_rate": 1.2566666666666668e-05, - "loss": 0.5234, - "step": 755 - }, - { - "epoch": 0.026536092244511134, - "grad_norm": 1.0965079069137573, - "learning_rate": 1.2583333333333334e-05, - "loss": 0.7174, - "step": 756 - }, - { - "epoch": 0.026571192895628214, - "grad_norm": 1.2259900569915771, - "learning_rate": 1.2600000000000001e-05, - "loss": 0.5444, - "step": 757 - }, - { - "epoch": 0.02660629354674529, - "grad_norm": 1.1042605638504028, - "learning_rate": 1.2616666666666669e-05, - "loss": 0.6481, - "step": 758 - }, - { - "epoch": 0.02664139419786237, - "grad_norm": 0.8764416575431824, - "learning_rate": 1.2633333333333333e-05, - "loss": 0.6082, - "step": 759 - }, - { - "epoch": 0.026676494848979448, - "grad_norm": 1.0908573865890503, - "learning_rate": 1.2650000000000001e-05, - "loss": 0.6022, - "step": 760 - }, - { - "epoch": 0.026711595500096528, - "grad_norm": 1.1708296537399292, - "learning_rate": 1.2666666666666668e-05, - "loss": 0.6465, - "step": 761 - }, - { - "epoch": 0.026746696151213604, - "grad_norm": 0.8845819234848022, - "learning_rate": 1.2683333333333333e-05, - "loss": 0.6527, - "step": 762 - }, - { - "epoch": 0.026781796802330685, - "grad_norm": 1.326408863067627, - "learning_rate": 1.27e-05, - "loss": 0.7366, - "step": 763 - }, - { - "epoch": 0.02681689745344776, - "grad_norm": 1.086714267730713, - "learning_rate": 1.2716666666666668e-05, - "loss": 0.5589, - "step": 764 - }, - { - "epoch": 0.02685199810456484, - "grad_norm": 0.8319637179374695, - "learning_rate": 1.2733333333333334e-05, - "loss": 0.4679, - "step": 765 - }, - { - "epoch": 0.026887098755681918, - "grad_norm": 0.9143708348274231, - "learning_rate": 1.2750000000000002e-05, - "loss": 0.6157, - "step": 766 - }, - { - "epoch": 0.026922199406798995, - "grad_norm": 0.884982168674469, - "learning_rate": 1.276666666666667e-05, - "loss": 0.6706, - "step": 767 - }, - { - "epoch": 0.026957300057916075, - "grad_norm": 1.0044327974319458, - "learning_rate": 1.2783333333333333e-05, - "loss": 0.6347, - "step": 768 - }, - { - "epoch": 0.02699240070903315, - "grad_norm": 0.8781887292861938, - "learning_rate": 1.2800000000000001e-05, - "loss": 0.5701, - "step": 769 - }, - { - "epoch": 0.02702750136015023, - "grad_norm": 1.0674799680709839, - "learning_rate": 1.2816666666666669e-05, - "loss": 0.5727, - "step": 770 - }, - { - "epoch": 0.027062602011267308, - "grad_norm": 1.0415328741073608, - "learning_rate": 1.2833333333333333e-05, - "loss": 0.5454, - "step": 771 - }, - { - "epoch": 0.027097702662384388, - "grad_norm": 0.96322101354599, - "learning_rate": 1.285e-05, - "loss": 0.6109, - "step": 772 - }, - { - "epoch": 0.027132803313501465, - "grad_norm": 0.772331953048706, - "learning_rate": 1.2866666666666668e-05, - "loss": 0.6148, - "step": 773 - }, - { - "epoch": 0.027167903964618545, - "grad_norm": 0.9610581398010254, - "learning_rate": 1.2883333333333333e-05, - "loss": 0.5777, - "step": 774 - }, - { - "epoch": 0.02720300461573562, - "grad_norm": 0.8241354823112488, - "learning_rate": 1.29e-05, - "loss": 0.4586, - "step": 775 - }, - { - "epoch": 0.0272381052668527, - "grad_norm": 1.0315505266189575, - "learning_rate": 1.2916666666666668e-05, - "loss": 0.5172, - "step": 776 - }, - { - "epoch": 0.02727320591796978, - "grad_norm": 0.9936224818229675, - "learning_rate": 1.2933333333333334e-05, - "loss": 0.4708, - "step": 777 - }, - { - "epoch": 0.027308306569086855, - "grad_norm": 1.2409054040908813, - "learning_rate": 1.2950000000000001e-05, - "loss": 0.5101, - "step": 778 - }, - { - "epoch": 0.027343407220203935, - "grad_norm": 1.0505166053771973, - "learning_rate": 1.2966666666666669e-05, - "loss": 0.6259, - "step": 779 - }, - { - "epoch": 0.02737850787132101, - "grad_norm": 1.2441704273223877, - "learning_rate": 1.2983333333333333e-05, - "loss": 0.6467, - "step": 780 - }, - { - "epoch": 0.027413608522438092, - "grad_norm": 0.8379360437393188, - "learning_rate": 1.3000000000000001e-05, - "loss": 0.5609, - "step": 781 - }, - { - "epoch": 0.02744870917355517, - "grad_norm": 1.0697416067123413, - "learning_rate": 1.3016666666666669e-05, - "loss": 0.5539, - "step": 782 - }, - { - "epoch": 0.02748380982467225, - "grad_norm": 1.0635101795196533, - "learning_rate": 1.3033333333333333e-05, - "loss": 0.5278, - "step": 783 - }, - { - "epoch": 0.027518910475789325, - "grad_norm": 0.9322614669799805, - "learning_rate": 1.305e-05, - "loss": 0.6916, - "step": 784 - }, - { - "epoch": 0.027554011126906405, - "grad_norm": 0.9332512617111206, - "learning_rate": 1.3066666666666666e-05, - "loss": 0.5854, - "step": 785 - }, - { - "epoch": 0.027589111778023482, - "grad_norm": 1.05270254611969, - "learning_rate": 1.3083333333333334e-05, - "loss": 0.6719, - "step": 786 - }, - { - "epoch": 0.027624212429140562, - "grad_norm": 1.0886476039886475, - "learning_rate": 1.3100000000000002e-05, - "loss": 0.5726, - "step": 787 - }, - { - "epoch": 0.02765931308025764, - "grad_norm": 0.8410332798957825, - "learning_rate": 1.3116666666666666e-05, - "loss": 0.5326, - "step": 788 - }, - { - "epoch": 0.027694413731374715, - "grad_norm": 0.9487334489822388, - "learning_rate": 1.3133333333333334e-05, - "loss": 0.6288, - "step": 789 - }, - { - "epoch": 0.027729514382491795, - "grad_norm": 1.0704911947250366, - "learning_rate": 1.3150000000000001e-05, - "loss": 0.6707, - "step": 790 - }, - { - "epoch": 0.027764615033608872, - "grad_norm": 1.0350197553634644, - "learning_rate": 1.3166666666666665e-05, - "loss": 0.6146, - "step": 791 - }, - { - "epoch": 0.027799715684725952, - "grad_norm": 1.1455434560775757, - "learning_rate": 1.3183333333333333e-05, - "loss": 0.6565, - "step": 792 - }, - { - "epoch": 0.02783481633584303, - "grad_norm": 1.2005776166915894, - "learning_rate": 1.32e-05, - "loss": 0.6492, - "step": 793 - }, - { - "epoch": 0.02786991698696011, - "grad_norm": 1.006037950515747, - "learning_rate": 1.3216666666666667e-05, - "loss": 0.5451, - "step": 794 - }, - { - "epoch": 0.027905017638077186, - "grad_norm": 1.0440253019332886, - "learning_rate": 1.3233333333333334e-05, - "loss": 0.6155, - "step": 795 - }, - { - "epoch": 0.027940118289194266, - "grad_norm": 0.9793239235877991, - "learning_rate": 1.3250000000000002e-05, - "loss": 0.6326, - "step": 796 - }, - { - "epoch": 0.027975218940311342, - "grad_norm": 1.0628732442855835, - "learning_rate": 1.3266666666666666e-05, - "loss": 0.7578, - "step": 797 - }, - { - "epoch": 0.028010319591428422, - "grad_norm": 1.0508205890655518, - "learning_rate": 1.3283333333333334e-05, - "loss": 0.7174, - "step": 798 - }, - { - "epoch": 0.0280454202425455, - "grad_norm": 1.0056098699569702, - "learning_rate": 1.3300000000000001e-05, - "loss": 0.6242, - "step": 799 - }, - { - "epoch": 0.02808052089366258, - "grad_norm": 1.0134036540985107, - "learning_rate": 1.3316666666666666e-05, - "loss": 0.5268, - "step": 800 - }, - { - "epoch": 0.028115621544779656, - "grad_norm": 1.0485649108886719, - "learning_rate": 1.3333333333333333e-05, - "loss": 0.6293, - "step": 801 - }, - { - "epoch": 0.028150722195896732, - "grad_norm": 0.925115704536438, - "learning_rate": 1.3350000000000001e-05, - "loss": 0.6019, - "step": 802 - }, - { - "epoch": 0.028185822847013813, - "grad_norm": 0.7955142855644226, - "learning_rate": 1.3366666666666667e-05, - "loss": 0.5405, - "step": 803 - }, - { - "epoch": 0.02822092349813089, - "grad_norm": 0.9358482956886292, - "learning_rate": 1.3383333333333335e-05, - "loss": 0.6317, - "step": 804 - }, - { - "epoch": 0.02825602414924797, - "grad_norm": 1.0600048303604126, - "learning_rate": 1.3400000000000002e-05, - "loss": 0.6228, - "step": 805 - }, - { - "epoch": 0.028291124800365046, - "grad_norm": 1.048999309539795, - "learning_rate": 1.3416666666666666e-05, - "loss": 0.5525, - "step": 806 - }, - { - "epoch": 0.028326225451482126, - "grad_norm": 0.9001591205596924, - "learning_rate": 1.3433333333333334e-05, - "loss": 0.5337, - "step": 807 - }, - { - "epoch": 0.028361326102599203, - "grad_norm": 0.9517924785614014, - "learning_rate": 1.3450000000000002e-05, - "loss": 0.5841, - "step": 808 - }, - { - "epoch": 0.028396426753716283, - "grad_norm": 1.066624402999878, - "learning_rate": 1.3466666666666666e-05, - "loss": 0.5356, - "step": 809 - }, - { - "epoch": 0.02843152740483336, - "grad_norm": 0.9107909798622131, - "learning_rate": 1.3483333333333334e-05, - "loss": 0.4049, - "step": 810 - }, - { - "epoch": 0.02846662805595044, - "grad_norm": 0.962810754776001, - "learning_rate": 1.3500000000000001e-05, - "loss": 0.5826, - "step": 811 - }, - { - "epoch": 0.028501728707067516, - "grad_norm": 1.0954890251159668, - "learning_rate": 1.3516666666666667e-05, - "loss": 0.5616, - "step": 812 - }, - { - "epoch": 0.028536829358184593, - "grad_norm": 1.017521858215332, - "learning_rate": 1.3533333333333335e-05, - "loss": 0.6612, - "step": 813 - }, - { - "epoch": 0.028571930009301673, - "grad_norm": 1.1435023546218872, - "learning_rate": 1.3550000000000002e-05, - "loss": 0.6079, - "step": 814 - }, - { - "epoch": 0.02860703066041875, - "grad_norm": 1.1919277906417847, - "learning_rate": 1.3566666666666667e-05, - "loss": 0.6498, - "step": 815 - }, - { - "epoch": 0.02864213131153583, - "grad_norm": 0.7939985990524292, - "learning_rate": 1.3583333333333334e-05, - "loss": 0.5117, - "step": 816 - }, - { - "epoch": 0.028677231962652906, - "grad_norm": 1.1839039325714111, - "learning_rate": 1.3600000000000002e-05, - "loss": 0.6423, - "step": 817 - }, - { - "epoch": 0.028712332613769986, - "grad_norm": 0.8231754302978516, - "learning_rate": 1.3616666666666666e-05, - "loss": 0.6446, - "step": 818 - }, - { - "epoch": 0.028747433264887063, - "grad_norm": 0.8469293117523193, - "learning_rate": 1.3633333333333334e-05, - "loss": 0.6566, - "step": 819 - }, - { - "epoch": 0.028782533916004143, - "grad_norm": 0.9139358401298523, - "learning_rate": 1.3650000000000001e-05, - "loss": 0.6733, - "step": 820 - }, - { - "epoch": 0.02881763456712122, - "grad_norm": 1.077144980430603, - "learning_rate": 1.3666666666666666e-05, - "loss": 0.6787, - "step": 821 - }, - { - "epoch": 0.0288527352182383, - "grad_norm": 0.8924608826637268, - "learning_rate": 1.3683333333333333e-05, - "loss": 0.6434, - "step": 822 - }, - { - "epoch": 0.028887835869355376, - "grad_norm": 1.005375862121582, - "learning_rate": 1.3700000000000001e-05, - "loss": 0.4474, - "step": 823 - }, - { - "epoch": 0.028922936520472453, - "grad_norm": 1.0390379428863525, - "learning_rate": 1.3716666666666667e-05, - "loss": 0.6256, - "step": 824 - }, - { - "epoch": 0.028958037171589533, - "grad_norm": 0.9136461615562439, - "learning_rate": 1.3733333333333335e-05, - "loss": 0.6353, - "step": 825 - }, - { - "epoch": 0.02899313782270661, - "grad_norm": 0.8225098252296448, - "learning_rate": 1.3750000000000002e-05, - "loss": 0.6197, - "step": 826 - }, - { - "epoch": 0.02902823847382369, - "grad_norm": 1.0780084133148193, - "learning_rate": 1.3766666666666666e-05, - "loss": 0.489, - "step": 827 - }, - { - "epoch": 0.029063339124940767, - "grad_norm": 1.130454421043396, - "learning_rate": 1.3783333333333334e-05, - "loss": 0.4865, - "step": 828 - }, - { - "epoch": 0.029098439776057847, - "grad_norm": 1.1053262948989868, - "learning_rate": 1.3800000000000002e-05, - "loss": 0.5767, - "step": 829 - }, - { - "epoch": 0.029133540427174923, - "grad_norm": 0.9139864444732666, - "learning_rate": 1.3816666666666666e-05, - "loss": 0.5643, - "step": 830 - }, - { - "epoch": 0.029168641078292003, - "grad_norm": 0.9921054840087891, - "learning_rate": 1.3833333333333334e-05, - "loss": 0.6298, - "step": 831 - }, - { - "epoch": 0.02920374172940908, - "grad_norm": 1.3478199243545532, - "learning_rate": 1.3850000000000001e-05, - "loss": 0.634, - "step": 832 - }, - { - "epoch": 0.02923884238052616, - "grad_norm": 1.1272192001342773, - "learning_rate": 1.3866666666666667e-05, - "loss": 0.4957, - "step": 833 - }, - { - "epoch": 0.029273943031643237, - "grad_norm": 1.0357730388641357, - "learning_rate": 1.3883333333333335e-05, - "loss": 0.6912, - "step": 834 - }, - { - "epoch": 0.029309043682760317, - "grad_norm": 1.2913200855255127, - "learning_rate": 1.3900000000000002e-05, - "loss": 0.6538, - "step": 835 - }, - { - "epoch": 0.029344144333877394, - "grad_norm": 1.028536319732666, - "learning_rate": 1.3916666666666667e-05, - "loss": 0.6877, - "step": 836 - }, - { - "epoch": 0.02937924498499447, - "grad_norm": 1.0354609489440918, - "learning_rate": 1.3933333333333334e-05, - "loss": 0.6208, - "step": 837 - }, - { - "epoch": 0.02941434563611155, - "grad_norm": 1.0116552114486694, - "learning_rate": 1.3950000000000002e-05, - "loss": 0.6542, - "step": 838 - }, - { - "epoch": 0.029449446287228627, - "grad_norm": 0.9813959002494812, - "learning_rate": 1.3966666666666666e-05, - "loss": 0.6933, - "step": 839 - }, - { - "epoch": 0.029484546938345707, - "grad_norm": 0.9651138782501221, - "learning_rate": 1.3983333333333334e-05, - "loss": 0.6897, - "step": 840 - }, - { - "epoch": 0.029519647589462784, - "grad_norm": 0.9314088821411133, - "learning_rate": 1.4000000000000001e-05, - "loss": 0.62, - "step": 841 - }, - { - "epoch": 0.029554748240579864, - "grad_norm": 0.9322080016136169, - "learning_rate": 1.4016666666666667e-05, - "loss": 0.4933, - "step": 842 - }, - { - "epoch": 0.02958984889169694, - "grad_norm": 1.2339155673980713, - "learning_rate": 1.4033333333333335e-05, - "loss": 0.6775, - "step": 843 - }, - { - "epoch": 0.02962494954281402, - "grad_norm": 1.107587218284607, - "learning_rate": 1.4050000000000003e-05, - "loss": 0.513, - "step": 844 - }, - { - "epoch": 0.029660050193931097, - "grad_norm": 0.9127965569496155, - "learning_rate": 1.4066666666666667e-05, - "loss": 0.5144, - "step": 845 - }, - { - "epoch": 0.029695150845048177, - "grad_norm": 0.958262026309967, - "learning_rate": 1.4083333333333335e-05, - "loss": 0.4555, - "step": 846 - }, - { - "epoch": 0.029730251496165254, - "grad_norm": 0.9273630976676941, - "learning_rate": 1.4099999999999999e-05, - "loss": 0.5668, - "step": 847 - }, - { - "epoch": 0.02976535214728233, - "grad_norm": 1.0010329484939575, - "learning_rate": 1.4116666666666666e-05, - "loss": 0.6032, - "step": 848 - }, - { - "epoch": 0.02980045279839941, - "grad_norm": 0.9038178324699402, - "learning_rate": 1.4133333333333334e-05, - "loss": 0.5074, - "step": 849 - }, - { - "epoch": 0.029835553449516487, - "grad_norm": 1.2543113231658936, - "learning_rate": 1.415e-05, - "loss": 0.6549, - "step": 850 - }, - { - "epoch": 0.029870654100633567, - "grad_norm": 0.9433026313781738, - "learning_rate": 1.4166666666666668e-05, - "loss": 0.5987, - "step": 851 - }, - { - "epoch": 0.029905754751750644, - "grad_norm": 1.052535057067871, - "learning_rate": 1.4183333333333335e-05, - "loss": 0.5545, - "step": 852 - }, - { - "epoch": 0.029940855402867724, - "grad_norm": 0.9284372329711914, - "learning_rate": 1.42e-05, - "loss": 0.5122, - "step": 853 - }, - { - "epoch": 0.0299759560539848, - "grad_norm": 1.0561364889144897, - "learning_rate": 1.4216666666666667e-05, - "loss": 0.6223, - "step": 854 - }, - { - "epoch": 0.03001105670510188, - "grad_norm": 0.9172783493995667, - "learning_rate": 1.4233333333333335e-05, - "loss": 0.5398, - "step": 855 - }, - { - "epoch": 0.030046157356218958, - "grad_norm": 0.865747332572937, - "learning_rate": 1.4249999999999999e-05, - "loss": 0.601, - "step": 856 - }, - { - "epoch": 0.030081258007336038, - "grad_norm": 1.0111786127090454, - "learning_rate": 1.4266666666666667e-05, - "loss": 0.4309, - "step": 857 - }, - { - "epoch": 0.030116358658453114, - "grad_norm": 0.9191696643829346, - "learning_rate": 1.4283333333333334e-05, - "loss": 0.5824, - "step": 858 - }, - { - "epoch": 0.03015145930957019, - "grad_norm": 0.816463828086853, - "learning_rate": 1.43e-05, - "loss": 0.5853, - "step": 859 - }, - { - "epoch": 0.03018655996068727, - "grad_norm": 0.9260498285293579, - "learning_rate": 1.4316666666666668e-05, - "loss": 0.706, - "step": 860 - }, - { - "epoch": 0.030221660611804348, - "grad_norm": 1.04400634765625, - "learning_rate": 1.4333333333333334e-05, - "loss": 0.6063, - "step": 861 - }, - { - "epoch": 0.030256761262921428, - "grad_norm": 0.9023379683494568, - "learning_rate": 1.435e-05, - "loss": 0.5787, - "step": 862 - }, - { - "epoch": 0.030291861914038504, - "grad_norm": 1.139204978942871, - "learning_rate": 1.4366666666666667e-05, - "loss": 0.5905, - "step": 863 - }, - { - "epoch": 0.030326962565155585, - "grad_norm": 0.862968921661377, - "learning_rate": 1.4383333333333335e-05, - "loss": 0.7496, - "step": 864 - }, - { - "epoch": 0.03036206321627266, - "grad_norm": 0.955241858959198, - "learning_rate": 1.44e-05, - "loss": 0.5377, - "step": 865 - }, - { - "epoch": 0.03039716386738974, - "grad_norm": 1.06700599193573, - "learning_rate": 1.4416666666666667e-05, - "loss": 0.5983, - "step": 866 - }, - { - "epoch": 0.030432264518506818, - "grad_norm": 0.8584571480751038, - "learning_rate": 1.4433333333333335e-05, - "loss": 0.5677, - "step": 867 - }, - { - "epoch": 0.030467365169623898, - "grad_norm": 0.8186190128326416, - "learning_rate": 1.4449999999999999e-05, - "loss": 0.6588, - "step": 868 - }, - { - "epoch": 0.030502465820740975, - "grad_norm": 0.8638167977333069, - "learning_rate": 1.4466666666666667e-05, - "loss": 0.3921, - "step": 869 - }, - { - "epoch": 0.03053756647185805, - "grad_norm": 0.8464345335960388, - "learning_rate": 1.4483333333333334e-05, - "loss": 0.6136, - "step": 870 - }, - { - "epoch": 0.03057266712297513, - "grad_norm": 0.9997843503952026, - "learning_rate": 1.45e-05, - "loss": 0.4904, - "step": 871 - }, - { - "epoch": 0.030607767774092208, - "grad_norm": 0.9466270804405212, - "learning_rate": 1.4516666666666668e-05, - "loss": 0.667, - "step": 872 - }, - { - "epoch": 0.030642868425209288, - "grad_norm": 1.013338565826416, - "learning_rate": 1.4533333333333335e-05, - "loss": 0.568, - "step": 873 - }, - { - "epoch": 0.030677969076326365, - "grad_norm": 1.1407853364944458, - "learning_rate": 1.455e-05, - "loss": 0.5235, - "step": 874 - }, - { - "epoch": 0.030713069727443445, - "grad_norm": 0.8981014490127563, - "learning_rate": 1.4566666666666667e-05, - "loss": 0.6264, - "step": 875 - }, - { - "epoch": 0.03074817037856052, - "grad_norm": 1.0555576086044312, - "learning_rate": 1.4583333333333335e-05, - "loss": 0.5886, - "step": 876 - }, - { - "epoch": 0.0307832710296776, - "grad_norm": 1.050808072090149, - "learning_rate": 1.4599999999999999e-05, - "loss": 0.5951, - "step": 877 - }, - { - "epoch": 0.030818371680794678, - "grad_norm": 1.1925806999206543, - "learning_rate": 1.4616666666666667e-05, - "loss": 0.6124, - "step": 878 - }, - { - "epoch": 0.03085347233191176, - "grad_norm": 0.88880455493927, - "learning_rate": 1.4633333333333334e-05, - "loss": 0.5351, - "step": 879 - }, - { - "epoch": 0.030888572983028835, - "grad_norm": 2.320607900619507, - "learning_rate": 1.465e-05, - "loss": 0.5574, - "step": 880 - }, - { - "epoch": 0.030923673634145915, - "grad_norm": 0.926405668258667, - "learning_rate": 1.4666666666666668e-05, - "loss": 0.5544, - "step": 881 - }, - { - "epoch": 0.03095877428526299, - "grad_norm": 1.0072410106658936, - "learning_rate": 1.4683333333333336e-05, - "loss": 0.5878, - "step": 882 - }, - { - "epoch": 0.03099387493638007, - "grad_norm": 0.9528069496154785, - "learning_rate": 1.47e-05, - "loss": 0.6212, - "step": 883 - }, - { - "epoch": 0.03102897558749715, - "grad_norm": 1.0547586679458618, - "learning_rate": 1.4716666666666668e-05, - "loss": 0.5255, - "step": 884 - }, - { - "epoch": 0.031064076238614225, - "grad_norm": 0.8748328685760498, - "learning_rate": 1.4733333333333335e-05, - "loss": 0.6126, - "step": 885 - }, - { - "epoch": 0.031099176889731305, - "grad_norm": 0.886745035648346, - "learning_rate": 1.475e-05, - "loss": 0.6288, - "step": 886 - }, - { - "epoch": 0.031134277540848382, - "grad_norm": 0.9105363488197327, - "learning_rate": 1.4766666666666667e-05, - "loss": 0.5518, - "step": 887 - }, - { - "epoch": 0.031169378191965462, - "grad_norm": 0.9734959006309509, - "learning_rate": 1.4783333333333335e-05, - "loss": 0.5928, - "step": 888 - }, - { - "epoch": 0.03120447884308254, - "grad_norm": 1.1372627019882202, - "learning_rate": 1.48e-05, - "loss": 0.5929, - "step": 889 - }, - { - "epoch": 0.03123957949419962, - "grad_norm": 0.9855411648750305, - "learning_rate": 1.4816666666666668e-05, - "loss": 0.6051, - "step": 890 - }, - { - "epoch": 0.031274680145316695, - "grad_norm": 1.0753607749938965, - "learning_rate": 1.4833333333333336e-05, - "loss": 0.451, - "step": 891 - }, - { - "epoch": 0.031309780796433775, - "grad_norm": 0.838467001914978, - "learning_rate": 1.485e-05, - "loss": 0.6061, - "step": 892 - }, - { - "epoch": 0.031344881447550856, - "grad_norm": 1.1696242094039917, - "learning_rate": 1.4866666666666668e-05, - "loss": 0.6038, - "step": 893 - }, - { - "epoch": 0.03137998209866793, - "grad_norm": 0.9310009479522705, - "learning_rate": 1.4883333333333335e-05, - "loss": 0.5685, - "step": 894 - }, - { - "epoch": 0.03141508274978501, - "grad_norm": 1.2832084894180298, - "learning_rate": 1.49e-05, - "loss": 0.6351, - "step": 895 - }, - { - "epoch": 0.03145018340090209, - "grad_norm": 1.112697720527649, - "learning_rate": 1.4916666666666667e-05, - "loss": 0.625, - "step": 896 - }, - { - "epoch": 0.03148528405201916, - "grad_norm": 0.8581767082214355, - "learning_rate": 1.4933333333333335e-05, - "loss": 0.5092, - "step": 897 - }, - { - "epoch": 0.03152038470313624, - "grad_norm": 1.0171732902526855, - "learning_rate": 1.4950000000000001e-05, - "loss": 0.6598, - "step": 898 - }, - { - "epoch": 0.03155548535425332, - "grad_norm": 1.0230075120925903, - "learning_rate": 1.4966666666666668e-05, - "loss": 0.6211, - "step": 899 - }, - { - "epoch": 0.0315905860053704, - "grad_norm": 1.0944479703903198, - "learning_rate": 1.4983333333333336e-05, - "loss": 0.537, - "step": 900 - }, - { - "epoch": 0.031625686656487476, - "grad_norm": 0.862521767616272, - "learning_rate": 1.5e-05, - "loss": 0.5929, - "step": 901 - }, - { - "epoch": 0.031660787307604556, - "grad_norm": 0.8932321071624756, - "learning_rate": 1.5016666666666668e-05, - "loss": 0.5954, - "step": 902 - }, - { - "epoch": 0.031695887958721636, - "grad_norm": 0.8971567749977112, - "learning_rate": 1.5033333333333336e-05, - "loss": 0.4906, - "step": 903 - }, - { - "epoch": 0.031730988609838716, - "grad_norm": 0.8993833065032959, - "learning_rate": 1.505e-05, - "loss": 0.6698, - "step": 904 - }, - { - "epoch": 0.03176608926095579, - "grad_norm": 1.0415412187576294, - "learning_rate": 1.5066666666666668e-05, - "loss": 0.6492, - "step": 905 - }, - { - "epoch": 0.03180118991207287, - "grad_norm": 0.9636504650115967, - "learning_rate": 1.5083333333333335e-05, - "loss": 0.7088, - "step": 906 - }, - { - "epoch": 0.03183629056318995, - "grad_norm": 0.9391382336616516, - "learning_rate": 1.51e-05, - "loss": 0.5511, - "step": 907 - }, - { - "epoch": 0.03187139121430702, - "grad_norm": 1.386468529701233, - "learning_rate": 1.5116666666666667e-05, - "loss": 0.5734, - "step": 908 - }, - { - "epoch": 0.0319064918654241, - "grad_norm": 1.0533735752105713, - "learning_rate": 1.5133333333333333e-05, - "loss": 0.6156, - "step": 909 - }, - { - "epoch": 0.03194159251654118, - "grad_norm": 1.4174695014953613, - "learning_rate": 1.515e-05, - "loss": 0.6058, - "step": 910 - }, - { - "epoch": 0.03197669316765826, - "grad_norm": 0.8735809326171875, - "learning_rate": 1.5166666666666668e-05, - "loss": 0.5471, - "step": 911 - }, - { - "epoch": 0.032011793818775336, - "grad_norm": 0.9226352572441101, - "learning_rate": 1.5183333333333333e-05, - "loss": 0.5272, - "step": 912 - }, - { - "epoch": 0.032046894469892416, - "grad_norm": 0.8569856286048889, - "learning_rate": 1.52e-05, - "loss": 0.5662, - "step": 913 - }, - { - "epoch": 0.032081995121009496, - "grad_norm": 0.9716305732727051, - "learning_rate": 1.5216666666666668e-05, - "loss": 0.5552, - "step": 914 - }, - { - "epoch": 0.032117095772126576, - "grad_norm": 1.2622570991516113, - "learning_rate": 1.5233333333333332e-05, - "loss": 0.5787, - "step": 915 - }, - { - "epoch": 0.03215219642324365, - "grad_norm": 1.0370113849639893, - "learning_rate": 1.525e-05, - "loss": 0.5832, - "step": 916 - }, - { - "epoch": 0.03218729707436073, - "grad_norm": 0.9171684980392456, - "learning_rate": 1.5266666666666667e-05, - "loss": 0.6101, - "step": 917 - }, - { - "epoch": 0.03222239772547781, - "grad_norm": 1.1236262321472168, - "learning_rate": 1.5283333333333333e-05, - "loss": 0.5668, - "step": 918 - }, - { - "epoch": 0.03225749837659488, - "grad_norm": 1.0135213136672974, - "learning_rate": 1.53e-05, - "loss": 0.552, - "step": 919 - }, - { - "epoch": 0.03229259902771196, - "grad_norm": 0.8461011648178101, - "learning_rate": 1.531666666666667e-05, - "loss": 0.6019, - "step": 920 - }, - { - "epoch": 0.03232769967882904, - "grad_norm": 0.9246734380722046, - "learning_rate": 1.5333333333333334e-05, - "loss": 0.5658, - "step": 921 - }, - { - "epoch": 0.03236280032994612, - "grad_norm": 0.9452496767044067, - "learning_rate": 1.535e-05, - "loss": 0.6174, - "step": 922 - }, - { - "epoch": 0.032397900981063196, - "grad_norm": 0.9882787466049194, - "learning_rate": 1.536666666666667e-05, - "loss": 0.5863, - "step": 923 - }, - { - "epoch": 0.032433001632180276, - "grad_norm": 0.9319274425506592, - "learning_rate": 1.5383333333333332e-05, - "loss": 0.5139, - "step": 924 - }, - { - "epoch": 0.032468102283297356, - "grad_norm": 0.8428853750228882, - "learning_rate": 1.54e-05, - "loss": 0.5947, - "step": 925 - }, - { - "epoch": 0.03250320293441444, - "grad_norm": 0.8910307288169861, - "learning_rate": 1.5416666666666668e-05, - "loss": 0.6362, - "step": 926 - }, - { - "epoch": 0.03253830358553151, - "grad_norm": 0.9212327003479004, - "learning_rate": 1.5433333333333334e-05, - "loss": 0.672, - "step": 927 - }, - { - "epoch": 0.03257340423664859, - "grad_norm": 0.9944688081741333, - "learning_rate": 1.545e-05, - "loss": 0.4599, - "step": 928 - }, - { - "epoch": 0.03260850488776567, - "grad_norm": 0.9040385484695435, - "learning_rate": 1.546666666666667e-05, - "loss": 0.5909, - "step": 929 - }, - { - "epoch": 0.03264360553888274, - "grad_norm": 1.018808126449585, - "learning_rate": 1.548333333333333e-05, - "loss": 0.6082, - "step": 930 - }, - { - "epoch": 0.03267870618999982, - "grad_norm": 0.7645334005355835, - "learning_rate": 1.55e-05, - "loss": 0.6881, - "step": 931 - }, - { - "epoch": 0.0327138068411169, - "grad_norm": 0.8324993252754211, - "learning_rate": 1.5516666666666667e-05, - "loss": 0.6656, - "step": 932 - }, - { - "epoch": 0.03274890749223398, - "grad_norm": 0.841802179813385, - "learning_rate": 1.5533333333333333e-05, - "loss": 0.4607, - "step": 933 - }, - { - "epoch": 0.03278400814335106, - "grad_norm": 0.8607341647148132, - "learning_rate": 1.5550000000000002e-05, - "loss": 0.5571, - "step": 934 - }, - { - "epoch": 0.03281910879446814, - "grad_norm": 1.2200024127960205, - "learning_rate": 1.5566666666666668e-05, - "loss": 0.6178, - "step": 935 - }, - { - "epoch": 0.03285420944558522, - "grad_norm": 0.8964154124259949, - "learning_rate": 1.5583333333333334e-05, - "loss": 0.5921, - "step": 936 - }, - { - "epoch": 0.0328893100967023, - "grad_norm": 0.9233154654502869, - "learning_rate": 1.56e-05, - "loss": 0.6456, - "step": 937 - }, - { - "epoch": 0.03292441074781937, - "grad_norm": 0.8749010562896729, - "learning_rate": 1.561666666666667e-05, - "loss": 0.5353, - "step": 938 - }, - { - "epoch": 0.03295951139893645, - "grad_norm": 0.9857833981513977, - "learning_rate": 1.563333333333333e-05, - "loss": 0.529, - "step": 939 - }, - { - "epoch": 0.03299461205005353, - "grad_norm": 0.9726569056510925, - "learning_rate": 1.565e-05, - "loss": 0.4737, - "step": 940 - }, - { - "epoch": 0.033029712701170603, - "grad_norm": 0.9627752304077148, - "learning_rate": 1.5666666666666667e-05, - "loss": 0.6093, - "step": 941 - }, - { - "epoch": 0.033064813352287684, - "grad_norm": 1.0519294738769531, - "learning_rate": 1.5683333333333333e-05, - "loss": 0.6688, - "step": 942 - }, - { - "epoch": 0.033099914003404764, - "grad_norm": 1.0391918420791626, - "learning_rate": 1.5700000000000002e-05, - "loss": 0.5603, - "step": 943 - }, - { - "epoch": 0.033135014654521844, - "grad_norm": 1.3510538339614868, - "learning_rate": 1.5716666666666668e-05, - "loss": 0.6265, - "step": 944 - }, - { - "epoch": 0.03317011530563892, - "grad_norm": 2.2737343311309814, - "learning_rate": 1.5733333333333334e-05, - "loss": 0.6035, - "step": 945 - }, - { - "epoch": 0.033205215956756, - "grad_norm": 0.9815682172775269, - "learning_rate": 1.575e-05, - "loss": 0.5118, - "step": 946 - }, - { - "epoch": 0.03324031660787308, - "grad_norm": 0.9648652076721191, - "learning_rate": 1.576666666666667e-05, - "loss": 0.5505, - "step": 947 - }, - { - "epoch": 0.03327541725899016, - "grad_norm": 0.8823598027229309, - "learning_rate": 1.5783333333333332e-05, - "loss": 0.6392, - "step": 948 - }, - { - "epoch": 0.03331051791010723, - "grad_norm": 1.0880920886993408, - "learning_rate": 1.58e-05, - "loss": 0.4613, - "step": 949 - }, - { - "epoch": 0.03334561856122431, - "grad_norm": 1.054494023323059, - "learning_rate": 1.5816666666666667e-05, - "loss": 0.7157, - "step": 950 - }, - { - "epoch": 0.03338071921234139, - "grad_norm": 0.8579971194267273, - "learning_rate": 1.5833333333333333e-05, - "loss": 0.5403, - "step": 951 - }, - { - "epoch": 0.033415819863458464, - "grad_norm": 0.97617107629776, - "learning_rate": 1.5850000000000002e-05, - "loss": 0.6328, - "step": 952 - }, - { - "epoch": 0.033450920514575544, - "grad_norm": 1.009897232055664, - "learning_rate": 1.586666666666667e-05, - "loss": 0.6336, - "step": 953 - }, - { - "epoch": 0.033486021165692624, - "grad_norm": 0.791948676109314, - "learning_rate": 1.5883333333333334e-05, - "loss": 0.4551, - "step": 954 - }, - { - "epoch": 0.033521121816809704, - "grad_norm": 1.2766231298446655, - "learning_rate": 1.59e-05, - "loss": 0.5249, - "step": 955 - }, - { - "epoch": 0.03355622246792678, - "grad_norm": 0.9439748525619507, - "learning_rate": 1.591666666666667e-05, - "loss": 0.5702, - "step": 956 - }, - { - "epoch": 0.03359132311904386, - "grad_norm": 0.9156375527381897, - "learning_rate": 1.5933333333333332e-05, - "loss": 0.5004, - "step": 957 - }, - { - "epoch": 0.03362642377016094, - "grad_norm": 0.8501600623130798, - "learning_rate": 1.595e-05, - "loss": 0.6575, - "step": 958 - }, - { - "epoch": 0.03366152442127802, - "grad_norm": 1.0339219570159912, - "learning_rate": 1.5966666666666667e-05, - "loss": 0.6446, - "step": 959 - }, - { - "epoch": 0.03369662507239509, - "grad_norm": 0.9778133630752563, - "learning_rate": 1.5983333333333333e-05, - "loss": 0.5811, - "step": 960 - }, - { - "epoch": 0.03373172572351217, - "grad_norm": 0.8759364485740662, - "learning_rate": 1.6000000000000003e-05, - "loss": 0.6015, - "step": 961 - }, - { - "epoch": 0.03376682637462925, - "grad_norm": 0.9957571625709534, - "learning_rate": 1.601666666666667e-05, - "loss": 0.5356, - "step": 962 - }, - { - "epoch": 0.033801927025746324, - "grad_norm": 1.0240576267242432, - "learning_rate": 1.6033333333333335e-05, - "loss": 0.6193, - "step": 963 - }, - { - "epoch": 0.033837027676863404, - "grad_norm": 1.0095261335372925, - "learning_rate": 1.605e-05, - "loss": 0.5468, - "step": 964 - }, - { - "epoch": 0.033872128327980484, - "grad_norm": 1.2280914783477783, - "learning_rate": 1.606666666666667e-05, - "loss": 0.5873, - "step": 965 - }, - { - "epoch": 0.033907228979097565, - "grad_norm": 0.9030550122261047, - "learning_rate": 1.6083333333333332e-05, - "loss": 0.577, - "step": 966 - }, - { - "epoch": 0.03394232963021464, - "grad_norm": 1.1108384132385254, - "learning_rate": 1.6100000000000002e-05, - "loss": 0.5378, - "step": 967 - }, - { - "epoch": 0.03397743028133172, - "grad_norm": 0.9789435863494873, - "learning_rate": 1.6116666666666668e-05, - "loss": 0.6219, - "step": 968 - }, - { - "epoch": 0.0340125309324488, - "grad_norm": 0.9857867360115051, - "learning_rate": 1.6133333333333334e-05, - "loss": 0.6418, - "step": 969 - }, - { - "epoch": 0.03404763158356588, - "grad_norm": 0.9818567633628845, - "learning_rate": 1.6150000000000003e-05, - "loss": 0.5311, - "step": 970 - }, - { - "epoch": 0.03408273223468295, - "grad_norm": 0.9742870926856995, - "learning_rate": 1.6166666666666665e-05, - "loss": 0.6193, - "step": 971 - }, - { - "epoch": 0.03411783288580003, - "grad_norm": 0.8891617059707642, - "learning_rate": 1.6183333333333335e-05, - "loss": 0.5156, - "step": 972 - }, - { - "epoch": 0.03415293353691711, - "grad_norm": 1.1438515186309814, - "learning_rate": 1.62e-05, - "loss": 0.6199, - "step": 973 - }, - { - "epoch": 0.03418803418803419, - "grad_norm": 0.9580617547035217, - "learning_rate": 1.6216666666666667e-05, - "loss": 0.6806, - "step": 974 - }, - { - "epoch": 0.034223134839151265, - "grad_norm": 1.0753464698791504, - "learning_rate": 1.6233333333333333e-05, - "loss": 0.4862, - "step": 975 - }, - { - "epoch": 0.034258235490268345, - "grad_norm": 1.0305813550949097, - "learning_rate": 1.6250000000000002e-05, - "loss": 0.6348, - "step": 976 - }, - { - "epoch": 0.034293336141385425, - "grad_norm": 1.2136017084121704, - "learning_rate": 1.6266666666666665e-05, - "loss": 0.6478, - "step": 977 - }, - { - "epoch": 0.0343284367925025, - "grad_norm": 0.9190363883972168, - "learning_rate": 1.6283333333333334e-05, - "loss": 0.5486, - "step": 978 - }, - { - "epoch": 0.03436353744361958, - "grad_norm": 0.9230664372444153, - "learning_rate": 1.63e-05, - "loss": 0.5121, - "step": 979 - }, - { - "epoch": 0.03439863809473666, - "grad_norm": 0.8631748557090759, - "learning_rate": 1.6316666666666666e-05, - "loss": 0.6334, - "step": 980 - }, - { - "epoch": 0.03443373874585374, - "grad_norm": 0.8901913166046143, - "learning_rate": 1.6333333333333335e-05, - "loss": 0.5826, - "step": 981 - }, - { - "epoch": 0.03446883939697081, - "grad_norm": 0.8605450987815857, - "learning_rate": 1.635e-05, - "loss": 0.6045, - "step": 982 - }, - { - "epoch": 0.03450394004808789, - "grad_norm": 0.9340412616729736, - "learning_rate": 1.6366666666666667e-05, - "loss": 0.5221, - "step": 983 - }, - { - "epoch": 0.03453904069920497, - "grad_norm": 0.8894437551498413, - "learning_rate": 1.6383333333333333e-05, - "loss": 0.5909, - "step": 984 - }, - { - "epoch": 0.03457414135032205, - "grad_norm": 1.0453128814697266, - "learning_rate": 1.6400000000000002e-05, - "loss": 0.623, - "step": 985 - }, - { - "epoch": 0.034609242001439125, - "grad_norm": 1.1560142040252686, - "learning_rate": 1.6416666666666665e-05, - "loss": 0.5634, - "step": 986 - }, - { - "epoch": 0.034644342652556205, - "grad_norm": 1.0617189407348633, - "learning_rate": 1.6433333333333334e-05, - "loss": 0.4866, - "step": 987 - }, - { - "epoch": 0.034679443303673285, - "grad_norm": 1.0079429149627686, - "learning_rate": 1.645e-05, - "loss": 0.5526, - "step": 988 - }, - { - "epoch": 0.03471454395479036, - "grad_norm": 1.1586683988571167, - "learning_rate": 1.6466666666666666e-05, - "loss": 0.4929, - "step": 989 - }, - { - "epoch": 0.03474964460590744, - "grad_norm": 0.9742299318313599, - "learning_rate": 1.6483333333333335e-05, - "loss": 0.6039, - "step": 990 - }, - { - "epoch": 0.03478474525702452, - "grad_norm": 1.072942852973938, - "learning_rate": 1.65e-05, - "loss": 0.5981, - "step": 991 - }, - { - "epoch": 0.0348198459081416, - "grad_norm": 0.8040679693222046, - "learning_rate": 1.6516666666666667e-05, - "loss": 0.6727, - "step": 992 - }, - { - "epoch": 0.03485494655925867, - "grad_norm": 1.0388846397399902, - "learning_rate": 1.6533333333333333e-05, - "loss": 0.6541, - "step": 993 - }, - { - "epoch": 0.03489004721037575, - "grad_norm": 1.2212457656860352, - "learning_rate": 1.6550000000000002e-05, - "loss": 0.5504, - "step": 994 - }, - { - "epoch": 0.03492514786149283, - "grad_norm": 0.905949592590332, - "learning_rate": 1.6566666666666665e-05, - "loss": 0.656, - "step": 995 - }, - { - "epoch": 0.03496024851260991, - "grad_norm": 0.9137271642684937, - "learning_rate": 1.6583333333333334e-05, - "loss": 0.5251, - "step": 996 - }, - { - "epoch": 0.034995349163726985, - "grad_norm": 0.8318852782249451, - "learning_rate": 1.66e-05, - "loss": 0.5484, - "step": 997 - }, - { - "epoch": 0.035030449814844065, - "grad_norm": 0.9636765718460083, - "learning_rate": 1.6616666666666666e-05, - "loss": 0.5682, - "step": 998 - }, - { - "epoch": 0.035065550465961146, - "grad_norm": 0.7860920429229736, - "learning_rate": 1.6633333333333336e-05, - "loss": 0.525, - "step": 999 - }, - { - "epoch": 0.03510065111707822, - "grad_norm": 0.8540359139442444, - "learning_rate": 1.665e-05, - "loss": 0.545, - "step": 1000 - }, - { - "epoch": 0.0351357517681953, - "grad_norm": 1.0459446907043457, - "learning_rate": 1.6666666666666667e-05, - "loss": 0.629, - "step": 1001 - }, - { - "epoch": 0.03517085241931238, - "grad_norm": 0.9724981784820557, - "learning_rate": 1.6683333333333333e-05, - "loss": 0.5809, - "step": 1002 - }, - { - "epoch": 0.03520595307042946, - "grad_norm": 0.8261970281600952, - "learning_rate": 1.6700000000000003e-05, - "loss": 0.5604, - "step": 1003 - }, - { - "epoch": 0.03524105372154653, - "grad_norm": 1.4648427963256836, - "learning_rate": 1.6716666666666665e-05, - "loss": 0.6187, - "step": 1004 - }, - { - "epoch": 0.03527615437266361, - "grad_norm": 0.915421724319458, - "learning_rate": 1.6733333333333335e-05, - "loss": 0.5705, - "step": 1005 - }, - { - "epoch": 0.03531125502378069, - "grad_norm": 0.827420175075531, - "learning_rate": 1.675e-05, - "loss": 0.5335, - "step": 1006 - }, - { - "epoch": 0.03534635567489777, - "grad_norm": 0.8957952260971069, - "learning_rate": 1.6766666666666667e-05, - "loss": 0.5909, - "step": 1007 - }, - { - "epoch": 0.035381456326014846, - "grad_norm": 0.9339390993118286, - "learning_rate": 1.6783333333333336e-05, - "loss": 0.5526, - "step": 1008 - }, - { - "epoch": 0.035416556977131926, - "grad_norm": 0.9966162443161011, - "learning_rate": 1.6800000000000002e-05, - "loss": 0.6211, - "step": 1009 - }, - { - "epoch": 0.035451657628249006, - "grad_norm": 0.8126536011695862, - "learning_rate": 1.6816666666666668e-05, - "loss": 0.5778, - "step": 1010 - }, - { - "epoch": 0.03548675827936608, - "grad_norm": 1.0551860332489014, - "learning_rate": 1.6833333333333334e-05, - "loss": 0.5533, - "step": 1011 - }, - { - "epoch": 0.03552185893048316, - "grad_norm": 0.8555046319961548, - "learning_rate": 1.6850000000000003e-05, - "loss": 0.5402, - "step": 1012 - }, - { - "epoch": 0.03555695958160024, - "grad_norm": 0.9422745704650879, - "learning_rate": 1.6866666666666666e-05, - "loss": 0.647, - "step": 1013 - }, - { - "epoch": 0.03559206023271732, - "grad_norm": 0.9827373027801514, - "learning_rate": 1.6883333333333335e-05, - "loss": 0.6143, - "step": 1014 - }, - { - "epoch": 0.03562716088383439, - "grad_norm": 0.9243713617324829, - "learning_rate": 1.69e-05, - "loss": 0.6365, - "step": 1015 - }, - { - "epoch": 0.03566226153495147, - "grad_norm": 0.9701935052871704, - "learning_rate": 1.6916666666666667e-05, - "loss": 0.5793, - "step": 1016 - }, - { - "epoch": 0.03569736218606855, - "grad_norm": 1.123908281326294, - "learning_rate": 1.6933333333333333e-05, - "loss": 0.5431, - "step": 1017 - }, - { - "epoch": 0.03573246283718563, - "grad_norm": 0.8895246982574463, - "learning_rate": 1.6950000000000002e-05, - "loss": 0.5851, - "step": 1018 - }, - { - "epoch": 0.035767563488302706, - "grad_norm": 0.7936455607414246, - "learning_rate": 1.6966666666666668e-05, - "loss": 0.5443, - "step": 1019 - }, - { - "epoch": 0.035802664139419786, - "grad_norm": 0.9007326364517212, - "learning_rate": 1.6983333333333334e-05, - "loss": 0.6915, - "step": 1020 - }, - { - "epoch": 0.035837764790536866, - "grad_norm": 0.9408074021339417, - "learning_rate": 1.7000000000000003e-05, - "loss": 0.528, - "step": 1021 - }, - { - "epoch": 0.03587286544165394, - "grad_norm": 1.2493603229522705, - "learning_rate": 1.7016666666666666e-05, - "loss": 0.6084, - "step": 1022 - }, - { - "epoch": 0.03590796609277102, - "grad_norm": 1.1960853338241577, - "learning_rate": 1.7033333333333335e-05, - "loss": 0.5659, - "step": 1023 - }, - { - "epoch": 0.0359430667438881, - "grad_norm": 1.0739424228668213, - "learning_rate": 1.705e-05, - "loss": 0.5945, - "step": 1024 - }, - { - "epoch": 0.03597816739500518, - "grad_norm": 1.0549860000610352, - "learning_rate": 1.7066666666666667e-05, - "loss": 0.5521, - "step": 1025 - }, - { - "epoch": 0.03601326804612225, - "grad_norm": 1.3232887983322144, - "learning_rate": 1.7083333333333333e-05, - "loss": 0.605, - "step": 1026 - }, - { - "epoch": 0.03604836869723933, - "grad_norm": 0.8592387437820435, - "learning_rate": 1.7100000000000002e-05, - "loss": 0.6172, - "step": 1027 - }, - { - "epoch": 0.03608346934835641, - "grad_norm": 1.389203667640686, - "learning_rate": 1.7116666666666668e-05, - "loss": 0.6508, - "step": 1028 - }, - { - "epoch": 0.03611856999947349, - "grad_norm": 0.8611961603164673, - "learning_rate": 1.7133333333333334e-05, - "loss": 0.6287, - "step": 1029 - }, - { - "epoch": 0.036153670650590566, - "grad_norm": 0.8547701239585876, - "learning_rate": 1.7150000000000004e-05, - "loss": 0.4494, - "step": 1030 - }, - { - "epoch": 0.036188771301707646, - "grad_norm": 0.939764678478241, - "learning_rate": 1.7166666666666666e-05, - "loss": 0.5247, - "step": 1031 - }, - { - "epoch": 0.03622387195282473, - "grad_norm": 0.8439621329307556, - "learning_rate": 1.7183333333333335e-05, - "loss": 0.5624, - "step": 1032 - }, - { - "epoch": 0.0362589726039418, - "grad_norm": 1.0787298679351807, - "learning_rate": 1.7199999999999998e-05, - "loss": 0.5681, - "step": 1033 - }, - { - "epoch": 0.03629407325505888, - "grad_norm": 1.0583066940307617, - "learning_rate": 1.7216666666666667e-05, - "loss": 0.5821, - "step": 1034 - }, - { - "epoch": 0.03632917390617596, - "grad_norm": 0.9632483124732971, - "learning_rate": 1.7233333333333333e-05, - "loss": 0.6243, - "step": 1035 - }, - { - "epoch": 0.03636427455729304, - "grad_norm": 0.967204749584198, - "learning_rate": 1.725e-05, - "loss": 0.5519, - "step": 1036 - }, - { - "epoch": 0.03639937520841011, - "grad_norm": 1.124489665031433, - "learning_rate": 1.726666666666667e-05, - "loss": 0.6208, - "step": 1037 - }, - { - "epoch": 0.03643447585952719, - "grad_norm": 0.9415506720542908, - "learning_rate": 1.7283333333333334e-05, - "loss": 0.5752, - "step": 1038 - }, - { - "epoch": 0.036469576510644273, - "grad_norm": 0.8510027527809143, - "learning_rate": 1.73e-05, - "loss": 0.5486, - "step": 1039 - }, - { - "epoch": 0.036504677161761354, - "grad_norm": 0.9924975037574768, - "learning_rate": 1.7316666666666666e-05, - "loss": 0.4871, - "step": 1040 - }, - { - "epoch": 0.03653977781287843, - "grad_norm": 1.0993822813034058, - "learning_rate": 1.7333333333333336e-05, - "loss": 0.6001, - "step": 1041 - }, - { - "epoch": 0.03657487846399551, - "grad_norm": 0.9219257831573486, - "learning_rate": 1.7349999999999998e-05, - "loss": 0.4936, - "step": 1042 - }, - { - "epoch": 0.03660997911511259, - "grad_norm": 1.2107486724853516, - "learning_rate": 1.7366666666666668e-05, - "loss": 0.6127, - "step": 1043 - }, - { - "epoch": 0.03664507976622966, - "grad_norm": 1.303966760635376, - "learning_rate": 1.7383333333333333e-05, - "loss": 0.6294, - "step": 1044 - }, - { - "epoch": 0.03668018041734674, - "grad_norm": 1.1637053489685059, - "learning_rate": 1.74e-05, - "loss": 0.6846, - "step": 1045 - }, - { - "epoch": 0.03671528106846382, - "grad_norm": 0.8200995326042175, - "learning_rate": 1.741666666666667e-05, - "loss": 0.6224, - "step": 1046 - }, - { - "epoch": 0.0367503817195809, - "grad_norm": 0.9877166748046875, - "learning_rate": 1.7433333333333335e-05, - "loss": 0.6075, - "step": 1047 - }, - { - "epoch": 0.036785482370697974, - "grad_norm": 0.8307560086250305, - "learning_rate": 1.745e-05, - "loss": 0.6219, - "step": 1048 - }, - { - "epoch": 0.036820583021815054, - "grad_norm": 1.0402482748031616, - "learning_rate": 1.7466666666666667e-05, - "loss": 0.6434, - "step": 1049 - }, - { - "epoch": 0.036855683672932134, - "grad_norm": 1.2060072422027588, - "learning_rate": 1.7483333333333336e-05, - "loss": 0.6487, - "step": 1050 - }, - { - "epoch": 0.036890784324049214, - "grad_norm": 0.9890388250350952, - "learning_rate": 1.75e-05, - "loss": 0.6523, - "step": 1051 - }, - { - "epoch": 0.03692588497516629, - "grad_norm": 0.8402560353279114, - "learning_rate": 1.7516666666666668e-05, - "loss": 0.7164, - "step": 1052 - }, - { - "epoch": 0.03696098562628337, - "grad_norm": 0.8804049491882324, - "learning_rate": 1.7533333333333334e-05, - "loss": 0.5442, - "step": 1053 - }, - { - "epoch": 0.03699608627740045, - "grad_norm": 1.2492446899414062, - "learning_rate": 1.755e-05, - "loss": 0.6401, - "step": 1054 - }, - { - "epoch": 0.03703118692851753, - "grad_norm": 0.9256028532981873, - "learning_rate": 1.756666666666667e-05, - "loss": 0.5286, - "step": 1055 - }, - { - "epoch": 0.0370662875796346, - "grad_norm": 1.0653115510940552, - "learning_rate": 1.7583333333333335e-05, - "loss": 0.6064, - "step": 1056 - }, - { - "epoch": 0.03710138823075168, - "grad_norm": 0.7797328233718872, - "learning_rate": 1.76e-05, - "loss": 0.6069, - "step": 1057 - }, - { - "epoch": 0.03713648888186876, - "grad_norm": 1.0193705558776855, - "learning_rate": 1.7616666666666667e-05, - "loss": 0.5749, - "step": 1058 - }, - { - "epoch": 0.037171589532985834, - "grad_norm": 1.0267952680587769, - "learning_rate": 1.7633333333333336e-05, - "loss": 0.5559, - "step": 1059 - }, - { - "epoch": 0.037206690184102914, - "grad_norm": 0.964346706867218, - "learning_rate": 1.765e-05, - "loss": 0.6086, - "step": 1060 - }, - { - "epoch": 0.037241790835219994, - "grad_norm": 1.0263025760650635, - "learning_rate": 1.7666666666666668e-05, - "loss": 0.7036, - "step": 1061 - }, - { - "epoch": 0.037276891486337074, - "grad_norm": 0.9133209586143494, - "learning_rate": 1.7683333333333334e-05, - "loss": 0.666, - "step": 1062 - }, - { - "epoch": 0.03731199213745415, - "grad_norm": 1.0560188293457031, - "learning_rate": 1.77e-05, - "loss": 0.5945, - "step": 1063 - }, - { - "epoch": 0.03734709278857123, - "grad_norm": 1.0671626329421997, - "learning_rate": 1.7716666666666666e-05, - "loss": 0.6643, - "step": 1064 - }, - { - "epoch": 0.03738219343968831, - "grad_norm": 1.0666981935501099, - "learning_rate": 1.7733333333333335e-05, - "loss": 0.6294, - "step": 1065 - }, - { - "epoch": 0.03741729409080539, - "grad_norm": 1.0421277284622192, - "learning_rate": 1.775e-05, - "loss": 0.6459, - "step": 1066 - }, - { - "epoch": 0.03745239474192246, - "grad_norm": 0.8643361330032349, - "learning_rate": 1.7766666666666667e-05, - "loss": 0.625, - "step": 1067 - }, - { - "epoch": 0.03748749539303954, - "grad_norm": 1.0993927717208862, - "learning_rate": 1.7783333333333336e-05, - "loss": 0.5155, - "step": 1068 - }, - { - "epoch": 0.03752259604415662, - "grad_norm": 0.9344600439071655, - "learning_rate": 1.78e-05, - "loss": 0.4708, - "step": 1069 - }, - { - "epoch": 0.037557696695273694, - "grad_norm": 1.098231554031372, - "learning_rate": 1.781666666666667e-05, - "loss": 0.5553, - "step": 1070 - }, - { - "epoch": 0.037592797346390774, - "grad_norm": 0.8188855648040771, - "learning_rate": 1.7833333333333334e-05, - "loss": 0.6412, - "step": 1071 - }, - { - "epoch": 0.037627897997507855, - "grad_norm": 0.8533406853675842, - "learning_rate": 1.785e-05, - "loss": 0.665, - "step": 1072 - }, - { - "epoch": 0.037662998648624935, - "grad_norm": 0.8994888067245483, - "learning_rate": 1.7866666666666666e-05, - "loss": 0.6109, - "step": 1073 - }, - { - "epoch": 0.03769809929974201, - "grad_norm": 1.008533000946045, - "learning_rate": 1.7883333333333335e-05, - "loss": 0.6544, - "step": 1074 - }, - { - "epoch": 0.03773319995085909, - "grad_norm": 0.8494242429733276, - "learning_rate": 1.79e-05, - "loss": 0.6269, - "step": 1075 - }, - { - "epoch": 0.03776830060197617, - "grad_norm": 0.8976731896400452, - "learning_rate": 1.7916666666666667e-05, - "loss": 0.5478, - "step": 1076 - }, - { - "epoch": 0.03780340125309325, - "grad_norm": 1.083275318145752, - "learning_rate": 1.7933333333333337e-05, - "loss": 0.6589, - "step": 1077 - }, - { - "epoch": 0.03783850190421032, - "grad_norm": 0.8663831353187561, - "learning_rate": 1.795e-05, - "loss": 0.6223, - "step": 1078 - }, - { - "epoch": 0.0378736025553274, - "grad_norm": 0.932680070400238, - "learning_rate": 1.796666666666667e-05, - "loss": 0.6409, - "step": 1079 - }, - { - "epoch": 0.03790870320644448, - "grad_norm": 1.1589884757995605, - "learning_rate": 1.7983333333333335e-05, - "loss": 0.6162, - "step": 1080 - }, - { - "epoch": 0.037943803857561555, - "grad_norm": 0.9089801907539368, - "learning_rate": 1.8e-05, - "loss": 0.5958, - "step": 1081 - }, - { - "epoch": 0.037978904508678635, - "grad_norm": 0.8988527059555054, - "learning_rate": 1.8016666666666666e-05, - "loss": 0.5816, - "step": 1082 - }, - { - "epoch": 0.038014005159795715, - "grad_norm": 1.1050105094909668, - "learning_rate": 1.8033333333333336e-05, - "loss": 0.5455, - "step": 1083 - }, - { - "epoch": 0.038049105810912795, - "grad_norm": 1.0836914777755737, - "learning_rate": 1.805e-05, - "loss": 0.6394, - "step": 1084 - }, - { - "epoch": 0.03808420646202987, - "grad_norm": 0.9687957763671875, - "learning_rate": 1.8066666666666668e-05, - "loss": 0.4948, - "step": 1085 - }, - { - "epoch": 0.03811930711314695, - "grad_norm": 1.1304610967636108, - "learning_rate": 1.8083333333333337e-05, - "loss": 0.601, - "step": 1086 - }, - { - "epoch": 0.03815440776426403, - "grad_norm": 1.0763933658599854, - "learning_rate": 1.81e-05, - "loss": 0.4171, - "step": 1087 - }, - { - "epoch": 0.03818950841538111, - "grad_norm": 0.846434473991394, - "learning_rate": 1.811666666666667e-05, - "loss": 0.5666, - "step": 1088 - }, - { - "epoch": 0.03822460906649818, - "grad_norm": 0.8940535187721252, - "learning_rate": 1.8133333333333335e-05, - "loss": 0.5997, - "step": 1089 - }, - { - "epoch": 0.03825970971761526, - "grad_norm": 1.0281331539154053, - "learning_rate": 1.815e-05, - "loss": 0.6383, - "step": 1090 - }, - { - "epoch": 0.03829481036873234, - "grad_norm": 0.8268880248069763, - "learning_rate": 1.8166666666666667e-05, - "loss": 0.6405, - "step": 1091 - }, - { - "epoch": 0.038329911019849415, - "grad_norm": 1.0023624897003174, - "learning_rate": 1.8183333333333336e-05, - "loss": 0.616, - "step": 1092 - }, - { - "epoch": 0.038365011670966495, - "grad_norm": 0.7972894906997681, - "learning_rate": 1.8200000000000002e-05, - "loss": 0.6116, - "step": 1093 - }, - { - "epoch": 0.038400112322083575, - "grad_norm": 0.8800451159477234, - "learning_rate": 1.8216666666666668e-05, - "loss": 0.5813, - "step": 1094 - }, - { - "epoch": 0.038435212973200655, - "grad_norm": 1.0819505453109741, - "learning_rate": 1.8233333333333334e-05, - "loss": 0.6066, - "step": 1095 - }, - { - "epoch": 0.03847031362431773, - "grad_norm": 0.8768135905265808, - "learning_rate": 1.825e-05, - "loss": 0.6443, - "step": 1096 - }, - { - "epoch": 0.03850541427543481, - "grad_norm": 0.9781245589256287, - "learning_rate": 1.826666666666667e-05, - "loss": 0.5943, - "step": 1097 - }, - { - "epoch": 0.03854051492655189, - "grad_norm": 0.9826093912124634, - "learning_rate": 1.828333333333333e-05, - "loss": 0.6061, - "step": 1098 - }, - { - "epoch": 0.03857561557766897, - "grad_norm": 0.908022403717041, - "learning_rate": 1.83e-05, - "loss": 0.6297, - "step": 1099 - }, - { - "epoch": 0.03861071622878604, - "grad_norm": 1.0357130765914917, - "learning_rate": 1.8316666666666667e-05, - "loss": 0.5443, - "step": 1100 - }, - { - "epoch": 0.03864581687990312, - "grad_norm": 1.074162483215332, - "learning_rate": 1.8333333333333333e-05, - "loss": 0.5858, - "step": 1101 - }, - { - "epoch": 0.0386809175310202, - "grad_norm": 0.9841240048408508, - "learning_rate": 1.8350000000000002e-05, - "loss": 0.5717, - "step": 1102 - }, - { - "epoch": 0.038716018182137275, - "grad_norm": 0.812080979347229, - "learning_rate": 1.8366666666666668e-05, - "loss": 0.6184, - "step": 1103 - }, - { - "epoch": 0.038751118833254355, - "grad_norm": 0.9340435266494751, - "learning_rate": 1.8383333333333334e-05, - "loss": 0.5301, - "step": 1104 - }, - { - "epoch": 0.038786219484371436, - "grad_norm": 1.041313886642456, - "learning_rate": 1.84e-05, - "loss": 0.6268, - "step": 1105 - }, - { - "epoch": 0.038821320135488516, - "grad_norm": 0.8807110786437988, - "learning_rate": 1.841666666666667e-05, - "loss": 0.4909, - "step": 1106 - }, - { - "epoch": 0.03885642078660559, - "grad_norm": 0.9660021066665649, - "learning_rate": 1.8433333333333332e-05, - "loss": 0.5804, - "step": 1107 - }, - { - "epoch": 0.03889152143772267, - "grad_norm": 0.981268584728241, - "learning_rate": 1.845e-05, - "loss": 0.5698, - "step": 1108 - }, - { - "epoch": 0.03892662208883975, - "grad_norm": 0.9013524651527405, - "learning_rate": 1.8466666666666667e-05, - "loss": 0.6183, - "step": 1109 - }, - { - "epoch": 0.03896172273995683, - "grad_norm": 0.7768324613571167, - "learning_rate": 1.8483333333333333e-05, - "loss": 0.6002, - "step": 1110 - }, - { - "epoch": 0.0389968233910739, - "grad_norm": 0.9130654335021973, - "learning_rate": 1.85e-05, - "loss": 0.6249, - "step": 1111 - }, - { - "epoch": 0.03903192404219098, - "grad_norm": 1.2218343019485474, - "learning_rate": 1.851666666666667e-05, - "loss": 0.5259, - "step": 1112 - }, - { - "epoch": 0.03906702469330806, - "grad_norm": 0.8794834017753601, - "learning_rate": 1.8533333333333334e-05, - "loss": 0.5302, - "step": 1113 - }, - { - "epoch": 0.039102125344425136, - "grad_norm": 1.012821078300476, - "learning_rate": 1.855e-05, - "loss": 0.6333, - "step": 1114 - }, - { - "epoch": 0.039137225995542216, - "grad_norm": 1.0025798082351685, - "learning_rate": 1.856666666666667e-05, - "loss": 0.6733, - "step": 1115 - }, - { - "epoch": 0.039172326646659296, - "grad_norm": 0.9215095639228821, - "learning_rate": 1.8583333333333332e-05, - "loss": 0.5705, - "step": 1116 - }, - { - "epoch": 0.039207427297776376, - "grad_norm": 0.9281803965568542, - "learning_rate": 1.86e-05, - "loss": 0.5656, - "step": 1117 - }, - { - "epoch": 0.03924252794889345, - "grad_norm": 1.1983158588409424, - "learning_rate": 1.8616666666666667e-05, - "loss": 0.5879, - "step": 1118 - }, - { - "epoch": 0.03927762860001053, - "grad_norm": 0.9853391051292419, - "learning_rate": 1.8633333333333333e-05, - "loss": 0.5793, - "step": 1119 - }, - { - "epoch": 0.03931272925112761, - "grad_norm": 0.893787682056427, - "learning_rate": 1.865e-05, - "loss": 0.5396, - "step": 1120 - }, - { - "epoch": 0.03934782990224469, - "grad_norm": 0.8866139650344849, - "learning_rate": 1.866666666666667e-05, - "loss": 0.6153, - "step": 1121 - }, - { - "epoch": 0.03938293055336176, - "grad_norm": 0.8313519954681396, - "learning_rate": 1.8683333333333335e-05, - "loss": 0.5253, - "step": 1122 - }, - { - "epoch": 0.03941803120447884, - "grad_norm": 1.138990879058838, - "learning_rate": 1.87e-05, - "loss": 0.6468, - "step": 1123 - }, - { - "epoch": 0.03945313185559592, - "grad_norm": 0.9034054279327393, - "learning_rate": 1.871666666666667e-05, - "loss": 0.5706, - "step": 1124 - }, - { - "epoch": 0.039488232506712996, - "grad_norm": 1.0490405559539795, - "learning_rate": 1.8733333333333332e-05, - "loss": 0.5699, - "step": 1125 - }, - { - "epoch": 0.039523333157830076, - "grad_norm": 0.8973144888877869, - "learning_rate": 1.8750000000000002e-05, - "loss": 0.4728, - "step": 1126 - }, - { - "epoch": 0.039558433808947156, - "grad_norm": 0.8238698244094849, - "learning_rate": 1.8766666666666668e-05, - "loss": 0.4058, - "step": 1127 - }, - { - "epoch": 0.039593534460064236, - "grad_norm": 1.0868406295776367, - "learning_rate": 1.8783333333333334e-05, - "loss": 0.568, - "step": 1128 - }, - { - "epoch": 0.03962863511118131, - "grad_norm": 1.1839690208435059, - "learning_rate": 1.88e-05, - "loss": 0.6951, - "step": 1129 - }, - { - "epoch": 0.03966373576229839, - "grad_norm": 1.2430944442749023, - "learning_rate": 1.881666666666667e-05, - "loss": 0.6827, - "step": 1130 - }, - { - "epoch": 0.03969883641341547, - "grad_norm": 1.036939024925232, - "learning_rate": 1.8833333333333335e-05, - "loss": 0.5281, - "step": 1131 - }, - { - "epoch": 0.03973393706453255, - "grad_norm": 0.8819296360015869, - "learning_rate": 1.885e-05, - "loss": 0.6357, - "step": 1132 - }, - { - "epoch": 0.03976903771564962, - "grad_norm": 0.9491674900054932, - "learning_rate": 1.886666666666667e-05, - "loss": 0.5781, - "step": 1133 - }, - { - "epoch": 0.0398041383667667, - "grad_norm": 0.989338755607605, - "learning_rate": 1.8883333333333333e-05, - "loss": 0.6925, - "step": 1134 - }, - { - "epoch": 0.03983923901788378, - "grad_norm": 0.6631282567977905, - "learning_rate": 1.8900000000000002e-05, - "loss": 0.5015, - "step": 1135 - }, - { - "epoch": 0.03987433966900086, - "grad_norm": 0.7043457627296448, - "learning_rate": 1.8916666666666668e-05, - "loss": 0.6263, - "step": 1136 - }, - { - "epoch": 0.039909440320117937, - "grad_norm": 0.87305748462677, - "learning_rate": 1.8933333333333334e-05, - "loss": 0.5473, - "step": 1137 - }, - { - "epoch": 0.03994454097123502, - "grad_norm": 1.1566425561904907, - "learning_rate": 1.895e-05, - "loss": 0.5137, - "step": 1138 - }, - { - "epoch": 0.0399796416223521, - "grad_norm": 0.9493715763092041, - "learning_rate": 1.896666666666667e-05, - "loss": 0.5601, - "step": 1139 - }, - { - "epoch": 0.04001474227346917, - "grad_norm": 1.0280978679656982, - "learning_rate": 1.8983333333333335e-05, - "loss": 0.5113, - "step": 1140 - }, - { - "epoch": 0.04004984292458625, - "grad_norm": 0.9715282917022705, - "learning_rate": 1.9e-05, - "loss": 0.4918, - "step": 1141 - }, - { - "epoch": 0.04008494357570333, - "grad_norm": 1.1167824268341064, - "learning_rate": 1.901666666666667e-05, - "loss": 0.6613, - "step": 1142 - }, - { - "epoch": 0.04012004422682041, - "grad_norm": 1.1052043437957764, - "learning_rate": 1.9033333333333333e-05, - "loss": 0.4591, - "step": 1143 - }, - { - "epoch": 0.04015514487793748, - "grad_norm": 0.9028419256210327, - "learning_rate": 1.9050000000000002e-05, - "loss": 0.5451, - "step": 1144 - }, - { - "epoch": 0.040190245529054563, - "grad_norm": 0.8470709323883057, - "learning_rate": 1.9066666666666668e-05, - "loss": 0.457, - "step": 1145 - }, - { - "epoch": 0.040225346180171644, - "grad_norm": 0.8566228151321411, - "learning_rate": 1.9083333333333334e-05, - "loss": 0.4994, - "step": 1146 - }, - { - "epoch": 0.040260446831288724, - "grad_norm": 1.1415870189666748, - "learning_rate": 1.91e-05, - "loss": 0.6052, - "step": 1147 - }, - { - "epoch": 0.0402955474824058, - "grad_norm": 0.9783337116241455, - "learning_rate": 1.911666666666667e-05, - "loss": 0.6696, - "step": 1148 - }, - { - "epoch": 0.04033064813352288, - "grad_norm": 1.2492997646331787, - "learning_rate": 1.9133333333333332e-05, - "loss": 0.5599, - "step": 1149 - }, - { - "epoch": 0.04036574878463996, - "grad_norm": 1.083591341972351, - "learning_rate": 1.915e-05, - "loss": 0.5951, - "step": 1150 - }, - { - "epoch": 0.04040084943575703, - "grad_norm": 0.8037967085838318, - "learning_rate": 1.9166666666666667e-05, - "loss": 0.6087, - "step": 1151 - }, - { - "epoch": 0.04043595008687411, - "grad_norm": 0.9791350960731506, - "learning_rate": 1.9183333333333333e-05, - "loss": 0.5333, - "step": 1152 - }, - { - "epoch": 0.04047105073799119, - "grad_norm": 0.9623364806175232, - "learning_rate": 1.9200000000000003e-05, - "loss": 0.5837, - "step": 1153 - }, - { - "epoch": 0.04050615138910827, - "grad_norm": 0.8411163687705994, - "learning_rate": 1.921666666666667e-05, - "loss": 0.5808, - "step": 1154 - }, - { - "epoch": 0.040541252040225344, - "grad_norm": 0.8408946394920349, - "learning_rate": 1.9233333333333334e-05, - "loss": 0.614, - "step": 1155 - }, - { - "epoch": 0.040576352691342424, - "grad_norm": 1.012041449546814, - "learning_rate": 1.925e-05, - "loss": 0.5341, - "step": 1156 - }, - { - "epoch": 0.040611453342459504, - "grad_norm": 0.8592766523361206, - "learning_rate": 1.926666666666667e-05, - "loss": 0.4679, - "step": 1157 - }, - { - "epoch": 0.040646553993576584, - "grad_norm": 1.0088168382644653, - "learning_rate": 1.9283333333333332e-05, - "loss": 0.4188, - "step": 1158 - }, - { - "epoch": 0.04068165464469366, - "grad_norm": 0.8713597059249878, - "learning_rate": 1.93e-05, - "loss": 0.6215, - "step": 1159 - }, - { - "epoch": 0.04071675529581074, - "grad_norm": 0.9707948565483093, - "learning_rate": 1.9316666666666668e-05, - "loss": 0.458, - "step": 1160 - }, - { - "epoch": 0.04075185594692782, - "grad_norm": 1.105762243270874, - "learning_rate": 1.9333333333333333e-05, - "loss": 0.5059, - "step": 1161 - }, - { - "epoch": 0.04078695659804489, - "grad_norm": 0.9623876214027405, - "learning_rate": 1.9350000000000003e-05, - "loss": 0.565, - "step": 1162 - }, - { - "epoch": 0.04082205724916197, - "grad_norm": 0.9897580146789551, - "learning_rate": 1.9366666666666665e-05, - "loss": 0.5092, - "step": 1163 - }, - { - "epoch": 0.04085715790027905, - "grad_norm": 1.1104151010513306, - "learning_rate": 1.9383333333333335e-05, - "loss": 0.6956, - "step": 1164 - }, - { - "epoch": 0.04089225855139613, - "grad_norm": 0.9994053840637207, - "learning_rate": 1.94e-05, - "loss": 0.6848, - "step": 1165 - }, - { - "epoch": 0.040927359202513204, - "grad_norm": 0.989762008190155, - "learning_rate": 1.9416666666666667e-05, - "loss": 0.5265, - "step": 1166 - }, - { - "epoch": 0.040962459853630284, - "grad_norm": 0.8993902802467346, - "learning_rate": 1.9433333333333332e-05, - "loss": 0.6445, - "step": 1167 - }, - { - "epoch": 0.040997560504747364, - "grad_norm": 1.1658049821853638, - "learning_rate": 1.9450000000000002e-05, - "loss": 0.5918, - "step": 1168 - }, - { - "epoch": 0.041032661155864444, - "grad_norm": 1.1489572525024414, - "learning_rate": 1.9466666666666668e-05, - "loss": 0.5383, - "step": 1169 - }, - { - "epoch": 0.04106776180698152, - "grad_norm": 1.1149494647979736, - "learning_rate": 1.9483333333333334e-05, - "loss": 0.5269, - "step": 1170 - }, - { - "epoch": 0.0411028624580986, - "grad_norm": 1.0343719720840454, - "learning_rate": 1.9500000000000003e-05, - "loss": 0.5108, - "step": 1171 - }, - { - "epoch": 0.04113796310921568, - "grad_norm": 0.9062416553497314, - "learning_rate": 1.9516666666666666e-05, - "loss": 0.5675, - "step": 1172 - }, - { - "epoch": 0.04117306376033275, - "grad_norm": 1.0966885089874268, - "learning_rate": 1.9533333333333335e-05, - "loss": 0.532, - "step": 1173 - }, - { - "epoch": 0.04120816441144983, - "grad_norm": 0.9024336338043213, - "learning_rate": 1.955e-05, - "loss": 0.6261, - "step": 1174 - }, - { - "epoch": 0.04124326506256691, - "grad_norm": 0.9701692461967468, - "learning_rate": 1.9566666666666667e-05, - "loss": 0.5539, - "step": 1175 - }, - { - "epoch": 0.04127836571368399, - "grad_norm": 0.9694427847862244, - "learning_rate": 1.9583333333333333e-05, - "loss": 0.6065, - "step": 1176 - }, - { - "epoch": 0.041313466364801064, - "grad_norm": 0.853252649307251, - "learning_rate": 1.9600000000000002e-05, - "loss": 0.6132, - "step": 1177 - }, - { - "epoch": 0.041348567015918145, - "grad_norm": 0.7746585011482239, - "learning_rate": 1.9616666666666668e-05, - "loss": 0.5931, - "step": 1178 - }, - { - "epoch": 0.041383667667035225, - "grad_norm": 0.9380737543106079, - "learning_rate": 1.9633333333333334e-05, - "loss": 0.5525, - "step": 1179 - }, - { - "epoch": 0.041418768318152305, - "grad_norm": 0.8563441038131714, - "learning_rate": 1.9650000000000003e-05, - "loss": 0.6567, - "step": 1180 - }, - { - "epoch": 0.04145386896926938, - "grad_norm": 0.8974596261978149, - "learning_rate": 1.9666666666666666e-05, - "loss": 0.6465, - "step": 1181 - }, - { - "epoch": 0.04148896962038646, - "grad_norm": 1.0097354650497437, - "learning_rate": 1.9683333333333335e-05, - "loss": 0.6693, - "step": 1182 - }, - { - "epoch": 0.04152407027150354, - "grad_norm": 1.3103739023208618, - "learning_rate": 1.97e-05, - "loss": 0.6867, - "step": 1183 - }, - { - "epoch": 0.04155917092262061, - "grad_norm": 1.1153186559677124, - "learning_rate": 1.9716666666666667e-05, - "loss": 0.5555, - "step": 1184 - }, - { - "epoch": 0.04159427157373769, - "grad_norm": 1.0118658542633057, - "learning_rate": 1.9733333333333333e-05, - "loss": 0.6122, - "step": 1185 - }, - { - "epoch": 0.04162937222485477, - "grad_norm": 0.8657587766647339, - "learning_rate": 1.9750000000000002e-05, - "loss": 0.604, - "step": 1186 - }, - { - "epoch": 0.04166447287597185, - "grad_norm": 1.0339405536651611, - "learning_rate": 1.9766666666666668e-05, - "loss": 0.608, - "step": 1187 - }, - { - "epoch": 0.041699573527088925, - "grad_norm": 0.8082770109176636, - "learning_rate": 1.9783333333333334e-05, - "loss": 0.581, - "step": 1188 - }, - { - "epoch": 0.041734674178206005, - "grad_norm": 1.0485087633132935, - "learning_rate": 1.9800000000000004e-05, - "loss": 0.5849, - "step": 1189 - }, - { - "epoch": 0.041769774829323085, - "grad_norm": 0.8721499443054199, - "learning_rate": 1.9816666666666666e-05, - "loss": 0.7044, - "step": 1190 - }, - { - "epoch": 0.041804875480440165, - "grad_norm": 1.1909102201461792, - "learning_rate": 1.9833333333333335e-05, - "loss": 0.4049, - "step": 1191 - }, - { - "epoch": 0.04183997613155724, - "grad_norm": 0.8456947207450867, - "learning_rate": 1.985e-05, - "loss": 0.5881, - "step": 1192 - }, - { - "epoch": 0.04187507678267432, - "grad_norm": 1.0174843072891235, - "learning_rate": 1.9866666666666667e-05, - "loss": 0.5316, - "step": 1193 - }, - { - "epoch": 0.0419101774337914, - "grad_norm": 1.117897391319275, - "learning_rate": 1.9883333333333333e-05, - "loss": 0.64, - "step": 1194 - }, - { - "epoch": 0.04194527808490847, - "grad_norm": 0.8840462565422058, - "learning_rate": 1.9900000000000003e-05, - "loss": 0.5681, - "step": 1195 - }, - { - "epoch": 0.04198037873602555, - "grad_norm": 0.8642598390579224, - "learning_rate": 1.9916666666666665e-05, - "loss": 0.5261, - "step": 1196 - }, - { - "epoch": 0.04201547938714263, - "grad_norm": 1.118917465209961, - "learning_rate": 1.9933333333333334e-05, - "loss": 0.5868, - "step": 1197 - }, - { - "epoch": 0.04205058003825971, - "grad_norm": 0.8755786418914795, - "learning_rate": 1.995e-05, - "loss": 0.5074, - "step": 1198 - }, - { - "epoch": 0.042085680689376785, - "grad_norm": 0.8636272549629211, - "learning_rate": 1.9966666666666666e-05, - "loss": 0.5602, - "step": 1199 - }, - { - "epoch": 0.042120781340493865, - "grad_norm": 1.0090816020965576, - "learning_rate": 1.9983333333333336e-05, - "loss": 0.5985, - "step": 1200 - }, - { - "epoch": 0.042155881991610945, - "grad_norm": 0.9463249444961548, - "learning_rate": 2e-05, - "loss": 0.6263, - "step": 1201 - }, - { - "epoch": 0.042190982642728025, - "grad_norm": 1.1058014631271362, - "learning_rate": 2.0016666666666668e-05, - "loss": 0.5143, - "step": 1202 - }, - { - "epoch": 0.0422260832938451, - "grad_norm": 0.9966798424720764, - "learning_rate": 2.0033333333333334e-05, - "loss": 0.7118, - "step": 1203 - }, - { - "epoch": 0.04226118394496218, - "grad_norm": 0.9342330098152161, - "learning_rate": 2.0050000000000003e-05, - "loss": 0.5481, - "step": 1204 - }, - { - "epoch": 0.04229628459607926, - "grad_norm": 0.9840129613876343, - "learning_rate": 2.0066666666666665e-05, - "loss": 0.4936, - "step": 1205 - }, - { - "epoch": 0.04233138524719634, - "grad_norm": 0.9503723978996277, - "learning_rate": 2.0083333333333335e-05, - "loss": 0.6088, - "step": 1206 - }, - { - "epoch": 0.04236648589831341, - "grad_norm": 1.0956673622131348, - "learning_rate": 2.01e-05, - "loss": 0.5948, - "step": 1207 - }, - { - "epoch": 0.04240158654943049, - "grad_norm": 0.7648499011993408, - "learning_rate": 2.0116666666666667e-05, - "loss": 0.6107, - "step": 1208 - }, - { - "epoch": 0.04243668720054757, - "grad_norm": 1.0190852880477905, - "learning_rate": 2.0133333333333336e-05, - "loss": 0.5138, - "step": 1209 - }, - { - "epoch": 0.042471787851664645, - "grad_norm": 0.8170036673545837, - "learning_rate": 2.0150000000000002e-05, - "loss": 0.574, - "step": 1210 - }, - { - "epoch": 0.042506888502781726, - "grad_norm": 0.8929511904716492, - "learning_rate": 2.0166666666666668e-05, - "loss": 0.6135, - "step": 1211 - }, - { - "epoch": 0.042541989153898806, - "grad_norm": 0.8172008395195007, - "learning_rate": 2.0183333333333334e-05, - "loss": 0.5812, - "step": 1212 - }, - { - "epoch": 0.042577089805015886, - "grad_norm": 0.7644974589347839, - "learning_rate": 2.0200000000000003e-05, - "loss": 0.6221, - "step": 1213 - }, - { - "epoch": 0.04261219045613296, - "grad_norm": 0.8804694414138794, - "learning_rate": 2.0216666666666666e-05, - "loss": 0.6391, - "step": 1214 - }, - { - "epoch": 0.04264729110725004, - "grad_norm": 0.9264209270477295, - "learning_rate": 2.0233333333333335e-05, - "loss": 0.5968, - "step": 1215 - }, - { - "epoch": 0.04268239175836712, - "grad_norm": 0.8453667163848877, - "learning_rate": 2.025e-05, - "loss": 0.5202, - "step": 1216 - }, - { - "epoch": 0.0427174924094842, - "grad_norm": 0.9284541606903076, - "learning_rate": 2.0266666666666667e-05, - "loss": 0.5484, - "step": 1217 - }, - { - "epoch": 0.04275259306060127, - "grad_norm": 0.8387140035629272, - "learning_rate": 2.0283333333333336e-05, - "loss": 0.6218, - "step": 1218 - }, - { - "epoch": 0.04278769371171835, - "grad_norm": 0.9756208658218384, - "learning_rate": 2.0300000000000002e-05, - "loss": 0.5704, - "step": 1219 - }, - { - "epoch": 0.04282279436283543, - "grad_norm": 0.8407394886016846, - "learning_rate": 2.0316666666666668e-05, - "loss": 0.574, - "step": 1220 - }, - { - "epoch": 0.042857895013952506, - "grad_norm": 0.8965904712677002, - "learning_rate": 2.0333333333333334e-05, - "loss": 0.5461, - "step": 1221 - }, - { - "epoch": 0.042892995665069586, - "grad_norm": 1.124571442604065, - "learning_rate": 2.035e-05, - "loss": 0.567, - "step": 1222 - }, - { - "epoch": 0.042928096316186666, - "grad_norm": 1.0436081886291504, - "learning_rate": 2.0366666666666666e-05, - "loss": 0.5859, - "step": 1223 - }, - { - "epoch": 0.042963196967303746, - "grad_norm": 0.8768441677093506, - "learning_rate": 2.0383333333333335e-05, - "loss": 0.605, - "step": 1224 - }, - { - "epoch": 0.04299829761842082, - "grad_norm": 1.043534755706787, - "learning_rate": 2.04e-05, - "loss": 0.6454, - "step": 1225 - }, - { - "epoch": 0.0430333982695379, - "grad_norm": 1.0274196863174438, - "learning_rate": 2.0416666666666667e-05, - "loss": 0.5079, - "step": 1226 - }, - { - "epoch": 0.04306849892065498, - "grad_norm": 1.4208531379699707, - "learning_rate": 2.0433333333333336e-05, - "loss": 0.565, - "step": 1227 - }, - { - "epoch": 0.04310359957177206, - "grad_norm": 0.8124593496322632, - "learning_rate": 2.045e-05, - "loss": 0.6029, - "step": 1228 - }, - { - "epoch": 0.04313870022288913, - "grad_norm": 0.8806735277175903, - "learning_rate": 2.046666666666667e-05, - "loss": 0.6163, - "step": 1229 - }, - { - "epoch": 0.04317380087400621, - "grad_norm": 1.0440969467163086, - "learning_rate": 2.0483333333333334e-05, - "loss": 0.597, - "step": 1230 - }, - { - "epoch": 0.04320890152512329, - "grad_norm": 0.9867247939109802, - "learning_rate": 2.05e-05, - "loss": 0.6264, - "step": 1231 - }, - { - "epoch": 0.043244002176240366, - "grad_norm": 0.956729531288147, - "learning_rate": 2.0516666666666666e-05, - "loss": 0.6417, - "step": 1232 - }, - { - "epoch": 0.043279102827357446, - "grad_norm": 1.1763876676559448, - "learning_rate": 2.0533333333333336e-05, - "loss": 0.665, - "step": 1233 - }, - { - "epoch": 0.043314203478474526, - "grad_norm": 0.8519653081893921, - "learning_rate": 2.055e-05, - "loss": 0.6127, - "step": 1234 - }, - { - "epoch": 0.043349304129591607, - "grad_norm": 0.8583745360374451, - "learning_rate": 2.0566666666666667e-05, - "loss": 0.4267, - "step": 1235 - }, - { - "epoch": 0.04338440478070868, - "grad_norm": 0.8305136561393738, - "learning_rate": 2.0583333333333333e-05, - "loss": 0.5562, - "step": 1236 - }, - { - "epoch": 0.04341950543182576, - "grad_norm": 0.7600401043891907, - "learning_rate": 2.06e-05, - "loss": 0.5618, - "step": 1237 - }, - { - "epoch": 0.04345460608294284, - "grad_norm": 1.0506978034973145, - "learning_rate": 2.061666666666667e-05, - "loss": 0.6101, - "step": 1238 - }, - { - "epoch": 0.04348970673405992, - "grad_norm": 0.8699408173561096, - "learning_rate": 2.0633333333333335e-05, - "loss": 0.5861, - "step": 1239 - }, - { - "epoch": 0.04352480738517699, - "grad_norm": 0.9041525721549988, - "learning_rate": 2.065e-05, - "loss": 0.5909, - "step": 1240 - }, - { - "epoch": 0.04355990803629407, - "grad_norm": 0.9315203428268433, - "learning_rate": 2.0666666666666666e-05, - "loss": 0.5754, - "step": 1241 - }, - { - "epoch": 0.04359500868741115, - "grad_norm": 0.7866604924201965, - "learning_rate": 2.0683333333333336e-05, - "loss": 0.5929, - "step": 1242 - }, - { - "epoch": 0.04363010933852823, - "grad_norm": 1.1668622493743896, - "learning_rate": 2.07e-05, - "loss": 0.5672, - "step": 1243 - }, - { - "epoch": 0.04366520998964531, - "grad_norm": 0.9067161679267883, - "learning_rate": 2.0716666666666668e-05, - "loss": 0.5603, - "step": 1244 - }, - { - "epoch": 0.04370031064076239, - "grad_norm": 0.9792014360427856, - "learning_rate": 2.0733333333333334e-05, - "loss": 0.5938, - "step": 1245 - }, - { - "epoch": 0.04373541129187947, - "grad_norm": 1.0255589485168457, - "learning_rate": 2.075e-05, - "loss": 0.5888, - "step": 1246 - }, - { - "epoch": 0.04377051194299654, - "grad_norm": 1.0449928045272827, - "learning_rate": 2.076666666666667e-05, - "loss": 0.6043, - "step": 1247 - }, - { - "epoch": 0.04380561259411362, - "grad_norm": 0.935329258441925, - "learning_rate": 2.0783333333333335e-05, - "loss": 0.6053, - "step": 1248 - }, - { - "epoch": 0.0438407132452307, - "grad_norm": 0.9620431065559387, - "learning_rate": 2.08e-05, - "loss": 0.5615, - "step": 1249 - }, - { - "epoch": 0.04387581389634778, - "grad_norm": 1.150026559829712, - "learning_rate": 2.0816666666666667e-05, - "loss": 0.6583, - "step": 1250 - }, - { - "epoch": 0.043910914547464854, - "grad_norm": 0.9072456359863281, - "learning_rate": 2.0833333333333336e-05, - "loss": 0.627, - "step": 1251 - }, - { - "epoch": 0.043946015198581934, - "grad_norm": 1.2282532453536987, - "learning_rate": 2.085e-05, - "loss": 0.6541, - "step": 1252 - }, - { - "epoch": 0.043981115849699014, - "grad_norm": 1.0507006645202637, - "learning_rate": 2.0866666666666668e-05, - "loss": 0.5933, - "step": 1253 - }, - { - "epoch": 0.04401621650081609, - "grad_norm": 0.9516247510910034, - "learning_rate": 2.0883333333333334e-05, - "loss": 0.5071, - "step": 1254 - }, - { - "epoch": 0.04405131715193317, - "grad_norm": 0.9996647834777832, - "learning_rate": 2.09e-05, - "loss": 0.5241, - "step": 1255 - }, - { - "epoch": 0.04408641780305025, - "grad_norm": 0.8334883451461792, - "learning_rate": 2.091666666666667e-05, - "loss": 0.6786, - "step": 1256 - }, - { - "epoch": 0.04412151845416733, - "grad_norm": 0.7434655427932739, - "learning_rate": 2.0933333333333335e-05, - "loss": 0.6154, - "step": 1257 - }, - { - "epoch": 0.0441566191052844, - "grad_norm": 1.0884755849838257, - "learning_rate": 2.095e-05, - "loss": 0.5791, - "step": 1258 - }, - { - "epoch": 0.04419171975640148, - "grad_norm": 1.323236107826233, - "learning_rate": 2.0966666666666667e-05, - "loss": 0.5175, - "step": 1259 - }, - { - "epoch": 0.04422682040751856, - "grad_norm": 1.105530858039856, - "learning_rate": 2.0983333333333336e-05, - "loss": 0.6319, - "step": 1260 - }, - { - "epoch": 0.04426192105863564, - "grad_norm": 0.7992721199989319, - "learning_rate": 2.1e-05, - "loss": 0.4805, - "step": 1261 - }, - { - "epoch": 0.044297021709752714, - "grad_norm": 1.2161036729812622, - "learning_rate": 2.1016666666666668e-05, - "loss": 0.5419, - "step": 1262 - }, - { - "epoch": 0.044332122360869794, - "grad_norm": 1.1513391733169556, - "learning_rate": 2.1033333333333334e-05, - "loss": 0.579, - "step": 1263 - }, - { - "epoch": 0.044367223011986874, - "grad_norm": 0.9124073386192322, - "learning_rate": 2.105e-05, - "loss": 0.4829, - "step": 1264 - }, - { - "epoch": 0.04440232366310395, - "grad_norm": 1.0916827917099, - "learning_rate": 2.106666666666667e-05, - "loss": 0.5532, - "step": 1265 - }, - { - "epoch": 0.04443742431422103, - "grad_norm": 1.2989686727523804, - "learning_rate": 2.1083333333333335e-05, - "loss": 0.5215, - "step": 1266 - }, - { - "epoch": 0.04447252496533811, - "grad_norm": 1.7528316974639893, - "learning_rate": 2.11e-05, - "loss": 0.641, - "step": 1267 - }, - { - "epoch": 0.04450762561645519, - "grad_norm": 1.1326812505722046, - "learning_rate": 2.1116666666666667e-05, - "loss": 0.5772, - "step": 1268 - }, - { - "epoch": 0.04454272626757226, - "grad_norm": 1.186155080795288, - "learning_rate": 2.1133333333333337e-05, - "loss": 0.6255, - "step": 1269 - }, - { - "epoch": 0.04457782691868934, - "grad_norm": 0.8946954011917114, - "learning_rate": 2.115e-05, - "loss": 0.5465, - "step": 1270 - }, - { - "epoch": 0.04461292756980642, - "grad_norm": 1.137293815612793, - "learning_rate": 2.116666666666667e-05, - "loss": 0.593, - "step": 1271 - }, - { - "epoch": 0.0446480282209235, - "grad_norm": 0.9652631878852844, - "learning_rate": 2.1183333333333334e-05, - "loss": 0.5016, - "step": 1272 - }, - { - "epoch": 0.044683128872040574, - "grad_norm": 1.034254789352417, - "learning_rate": 2.12e-05, - "loss": 0.6194, - "step": 1273 - }, - { - "epoch": 0.044718229523157654, - "grad_norm": 1.0079013109207153, - "learning_rate": 2.121666666666667e-05, - "loss": 0.6048, - "step": 1274 - }, - { - "epoch": 0.044753330174274734, - "grad_norm": 0.86722332239151, - "learning_rate": 2.1233333333333336e-05, - "loss": 0.66, - "step": 1275 - }, - { - "epoch": 0.04478843082539181, - "grad_norm": 0.874142050743103, - "learning_rate": 2.125e-05, - "loss": 0.5475, - "step": 1276 - }, - { - "epoch": 0.04482353147650889, - "grad_norm": 0.7891244292259216, - "learning_rate": 2.1266666666666667e-05, - "loss": 0.5936, - "step": 1277 - }, - { - "epoch": 0.04485863212762597, - "grad_norm": 0.8186230063438416, - "learning_rate": 2.1283333333333337e-05, - "loss": 0.5668, - "step": 1278 - }, - { - "epoch": 0.04489373277874305, - "grad_norm": 0.7901942729949951, - "learning_rate": 2.13e-05, - "loss": 0.51, - "step": 1279 - }, - { - "epoch": 0.04492883342986012, - "grad_norm": 0.7573047280311584, - "learning_rate": 2.131666666666667e-05, - "loss": 0.5565, - "step": 1280 - }, - { - "epoch": 0.0449639340809772, - "grad_norm": 0.848042905330658, - "learning_rate": 2.1333333333333335e-05, - "loss": 0.6521, - "step": 1281 - }, - { - "epoch": 0.04499903473209428, - "grad_norm": 0.8993608951568604, - "learning_rate": 2.135e-05, - "loss": 0.6206, - "step": 1282 - }, - { - "epoch": 0.04503413538321136, - "grad_norm": 0.811018168926239, - "learning_rate": 2.1366666666666667e-05, - "loss": 0.6201, - "step": 1283 - }, - { - "epoch": 0.045069236034328435, - "grad_norm": 0.96208655834198, - "learning_rate": 2.1383333333333332e-05, - "loss": 0.6214, - "step": 1284 - }, - { - "epoch": 0.045104336685445515, - "grad_norm": 1.235467791557312, - "learning_rate": 2.1400000000000002e-05, - "loss": 0.4757, - "step": 1285 - }, - { - "epoch": 0.045139437336562595, - "grad_norm": 0.8466498255729675, - "learning_rate": 2.1416666666666668e-05, - "loss": 0.6103, - "step": 1286 - }, - { - "epoch": 0.045174537987679675, - "grad_norm": 1.0111896991729736, - "learning_rate": 2.1433333333333334e-05, - "loss": 0.6829, - "step": 1287 - }, - { - "epoch": 0.04520963863879675, - "grad_norm": 0.8784310221672058, - "learning_rate": 2.145e-05, - "loss": 0.5692, - "step": 1288 - }, - { - "epoch": 0.04524473928991383, - "grad_norm": 0.923785388469696, - "learning_rate": 2.146666666666667e-05, - "loss": 0.5915, - "step": 1289 - }, - { - "epoch": 0.04527983994103091, - "grad_norm": 0.9720158576965332, - "learning_rate": 2.148333333333333e-05, - "loss": 0.6657, - "step": 1290 - }, - { - "epoch": 0.04531494059214798, - "grad_norm": 0.8979562520980835, - "learning_rate": 2.15e-05, - "loss": 0.6009, - "step": 1291 - }, - { - "epoch": 0.04535004124326506, - "grad_norm": 0.8532235026359558, - "learning_rate": 2.1516666666666667e-05, - "loss": 0.5452, - "step": 1292 - }, - { - "epoch": 0.04538514189438214, - "grad_norm": 0.8392691612243652, - "learning_rate": 2.1533333333333333e-05, - "loss": 0.5926, - "step": 1293 - }, - { - "epoch": 0.04542024254549922, - "grad_norm": 0.9789480566978455, - "learning_rate": 2.1550000000000002e-05, - "loss": 0.5065, - "step": 1294 - }, - { - "epoch": 0.045455343196616295, - "grad_norm": 0.8148399591445923, - "learning_rate": 2.1566666666666668e-05, - "loss": 0.5822, - "step": 1295 - }, - { - "epoch": 0.045490443847733375, - "grad_norm": 0.8900694847106934, - "learning_rate": 2.1583333333333334e-05, - "loss": 0.6077, - "step": 1296 - }, - { - "epoch": 0.045525544498850455, - "grad_norm": 0.9025644063949585, - "learning_rate": 2.16e-05, - "loss": 0.6141, - "step": 1297 - }, - { - "epoch": 0.045560645149967535, - "grad_norm": 0.9687007665634155, - "learning_rate": 2.161666666666667e-05, - "loss": 0.4895, - "step": 1298 - }, - { - "epoch": 0.04559574580108461, - "grad_norm": 1.0527963638305664, - "learning_rate": 2.1633333333333332e-05, - "loss": 0.6124, - "step": 1299 - }, - { - "epoch": 0.04563084645220169, - "grad_norm": 0.9732885956764221, - "learning_rate": 2.165e-05, - "loss": 0.6227, - "step": 1300 - }, - { - "epoch": 0.04566594710331877, - "grad_norm": 0.8811717629432678, - "learning_rate": 2.1666666666666667e-05, - "loss": 0.6199, - "step": 1301 - }, - { - "epoch": 0.04570104775443584, - "grad_norm": 0.8965359330177307, - "learning_rate": 2.1683333333333333e-05, - "loss": 0.5091, - "step": 1302 - }, - { - "epoch": 0.04573614840555292, - "grad_norm": 0.799170196056366, - "learning_rate": 2.1700000000000002e-05, - "loss": 0.5146, - "step": 1303 - }, - { - "epoch": 0.04577124905667, - "grad_norm": 1.1216000318527222, - "learning_rate": 2.1716666666666668e-05, - "loss": 0.6334, - "step": 1304 - }, - { - "epoch": 0.04580634970778708, - "grad_norm": 1.066213607788086, - "learning_rate": 2.1733333333333334e-05, - "loss": 0.684, - "step": 1305 - }, - { - "epoch": 0.045841450358904155, - "grad_norm": 1.005852460861206, - "learning_rate": 2.175e-05, - "loss": 0.5378, - "step": 1306 - }, - { - "epoch": 0.045876551010021235, - "grad_norm": 0.8573705554008484, - "learning_rate": 2.176666666666667e-05, - "loss": 0.6351, - "step": 1307 - }, - { - "epoch": 0.045911651661138315, - "grad_norm": 0.9931483268737793, - "learning_rate": 2.1783333333333332e-05, - "loss": 0.674, - "step": 1308 - }, - { - "epoch": 0.045946752312255396, - "grad_norm": 1.0524840354919434, - "learning_rate": 2.18e-05, - "loss": 0.4829, - "step": 1309 - }, - { - "epoch": 0.04598185296337247, - "grad_norm": 1.0602960586547852, - "learning_rate": 2.1816666666666667e-05, - "loss": 0.5894, - "step": 1310 - }, - { - "epoch": 0.04601695361448955, - "grad_norm": 0.8225466012954712, - "learning_rate": 2.1833333333333333e-05, - "loss": 0.4943, - "step": 1311 - }, - { - "epoch": 0.04605205426560663, - "grad_norm": 0.9279763102531433, - "learning_rate": 2.1850000000000003e-05, - "loss": 0.5728, - "step": 1312 - }, - { - "epoch": 0.0460871549167237, - "grad_norm": 0.9208652973175049, - "learning_rate": 2.186666666666667e-05, - "loss": 0.5806, - "step": 1313 - }, - { - "epoch": 0.04612225556784078, - "grad_norm": 0.891963541507721, - "learning_rate": 2.1883333333333334e-05, - "loss": 0.4505, - "step": 1314 - }, - { - "epoch": 0.04615735621895786, - "grad_norm": 1.0481077432632446, - "learning_rate": 2.19e-05, - "loss": 0.579, - "step": 1315 - }, - { - "epoch": 0.04619245687007494, - "grad_norm": 0.9770382642745972, - "learning_rate": 2.191666666666667e-05, - "loss": 0.6175, - "step": 1316 - }, - { - "epoch": 0.046227557521192016, - "grad_norm": 0.8830778002738953, - "learning_rate": 2.1933333333333332e-05, - "loss": 0.5809, - "step": 1317 - }, - { - "epoch": 0.046262658172309096, - "grad_norm": 0.9987735152244568, - "learning_rate": 2.195e-05, - "loss": 0.6055, - "step": 1318 - }, - { - "epoch": 0.046297758823426176, - "grad_norm": 0.9642265439033508, - "learning_rate": 2.1966666666666668e-05, - "loss": 0.5671, - "step": 1319 - }, - { - "epoch": 0.046332859474543256, - "grad_norm": 1.0046539306640625, - "learning_rate": 2.1983333333333333e-05, - "loss": 0.6336, - "step": 1320 - }, - { - "epoch": 0.04636796012566033, - "grad_norm": 1.0598657131195068, - "learning_rate": 2.2000000000000003e-05, - "loss": 0.6491, - "step": 1321 - }, - { - "epoch": 0.04640306077677741, - "grad_norm": 0.8845835328102112, - "learning_rate": 2.201666666666667e-05, - "loss": 0.5907, - "step": 1322 - }, - { - "epoch": 0.04643816142789449, - "grad_norm": 1.0653579235076904, - "learning_rate": 2.2033333333333335e-05, - "loss": 0.6382, - "step": 1323 - }, - { - "epoch": 0.04647326207901156, - "grad_norm": 1.2952213287353516, - "learning_rate": 2.205e-05, - "loss": 0.6017, - "step": 1324 - }, - { - "epoch": 0.04650836273012864, - "grad_norm": 1.0288969278335571, - "learning_rate": 2.206666666666667e-05, - "loss": 0.6398, - "step": 1325 - }, - { - "epoch": 0.04654346338124572, - "grad_norm": 0.8833808898925781, - "learning_rate": 2.2083333333333333e-05, - "loss": 0.5453, - "step": 1326 - }, - { - "epoch": 0.0465785640323628, - "grad_norm": 0.9019023180007935, - "learning_rate": 2.2100000000000002e-05, - "loss": 0.6035, - "step": 1327 - }, - { - "epoch": 0.046613664683479876, - "grad_norm": 0.8831346035003662, - "learning_rate": 2.2116666666666668e-05, - "loss": 0.5774, - "step": 1328 - }, - { - "epoch": 0.046648765334596956, - "grad_norm": 0.8821845650672913, - "learning_rate": 2.2133333333333334e-05, - "loss": 0.5548, - "step": 1329 - }, - { - "epoch": 0.046683865985714036, - "grad_norm": 1.0253621339797974, - "learning_rate": 2.215e-05, - "loss": 0.6303, - "step": 1330 - }, - { - "epoch": 0.046718966636831116, - "grad_norm": 0.8783879280090332, - "learning_rate": 2.216666666666667e-05, - "loss": 0.6801, - "step": 1331 - }, - { - "epoch": 0.04675406728794819, - "grad_norm": 0.9857313632965088, - "learning_rate": 2.2183333333333335e-05, - "loss": 0.5945, - "step": 1332 - }, - { - "epoch": 0.04678916793906527, - "grad_norm": 1.0179704427719116, - "learning_rate": 2.22e-05, - "loss": 0.5994, - "step": 1333 - }, - { - "epoch": 0.04682426859018235, - "grad_norm": 0.7670969367027283, - "learning_rate": 2.221666666666667e-05, - "loss": 0.5958, - "step": 1334 - }, - { - "epoch": 0.04685936924129942, - "grad_norm": 0.715133011341095, - "learning_rate": 2.2233333333333333e-05, - "loss": 0.5877, - "step": 1335 - }, - { - "epoch": 0.0468944698924165, - "grad_norm": 0.8861752152442932, - "learning_rate": 2.2250000000000002e-05, - "loss": 0.5464, - "step": 1336 - }, - { - "epoch": 0.04692957054353358, - "grad_norm": 0.7214004993438721, - "learning_rate": 2.2266666666666668e-05, - "loss": 0.4957, - "step": 1337 - }, - { - "epoch": 0.04696467119465066, - "grad_norm": 1.162130355834961, - "learning_rate": 2.2283333333333334e-05, - "loss": 0.4967, - "step": 1338 - }, - { - "epoch": 0.046999771845767736, - "grad_norm": 0.8951762914657593, - "learning_rate": 2.23e-05, - "loss": 0.6276, - "step": 1339 - }, - { - "epoch": 0.047034872496884816, - "grad_norm": 0.9130322337150574, - "learning_rate": 2.231666666666667e-05, - "loss": 0.5763, - "step": 1340 - }, - { - "epoch": 0.047069973148001897, - "grad_norm": 0.7825527191162109, - "learning_rate": 2.2333333333333335e-05, - "loss": 0.6213, - "step": 1341 - }, - { - "epoch": 0.04710507379911898, - "grad_norm": 1.002081274986267, - "learning_rate": 2.235e-05, - "loss": 0.6848, - "step": 1342 - }, - { - "epoch": 0.04714017445023605, - "grad_norm": 1.045775294303894, - "learning_rate": 2.236666666666667e-05, - "loss": 0.5935, - "step": 1343 - }, - { - "epoch": 0.04717527510135313, - "grad_norm": 1.1247576475143433, - "learning_rate": 2.2383333333333333e-05, - "loss": 0.6278, - "step": 1344 - }, - { - "epoch": 0.04721037575247021, - "grad_norm": 0.9264076352119446, - "learning_rate": 2.2400000000000002e-05, - "loss": 0.4853, - "step": 1345 - }, - { - "epoch": 0.04724547640358728, - "grad_norm": 0.9032841324806213, - "learning_rate": 2.2416666666666665e-05, - "loss": 0.5793, - "step": 1346 - }, - { - "epoch": 0.04728057705470436, - "grad_norm": 0.7691323757171631, - "learning_rate": 2.2433333333333334e-05, - "loss": 0.6159, - "step": 1347 - }, - { - "epoch": 0.04731567770582144, - "grad_norm": 1.0699636936187744, - "learning_rate": 2.245e-05, - "loss": 0.4947, - "step": 1348 - }, - { - "epoch": 0.047350778356938523, - "grad_norm": 1.007291316986084, - "learning_rate": 2.2466666666666666e-05, - "loss": 0.6581, - "step": 1349 - }, - { - "epoch": 0.0473858790080556, - "grad_norm": 0.9178420901298523, - "learning_rate": 2.2483333333333335e-05, - "loss": 0.6394, - "step": 1350 - }, - { - "epoch": 0.04742097965917268, - "grad_norm": 0.9966664910316467, - "learning_rate": 2.25e-05, - "loss": 0.6738, - "step": 1351 - }, - { - "epoch": 0.04745608031028976, - "grad_norm": 0.8448407053947449, - "learning_rate": 2.2516666666666667e-05, - "loss": 0.5313, - "step": 1352 - }, - { - "epoch": 0.04749118096140684, - "grad_norm": 0.8473799228668213, - "learning_rate": 2.2533333333333333e-05, - "loss": 0.5713, - "step": 1353 - }, - { - "epoch": 0.04752628161252391, - "grad_norm": 0.8104460835456848, - "learning_rate": 2.2550000000000003e-05, - "loss": 0.5666, - "step": 1354 - }, - { - "epoch": 0.04756138226364099, - "grad_norm": 1.0103565454483032, - "learning_rate": 2.2566666666666665e-05, - "loss": 0.5776, - "step": 1355 - }, - { - "epoch": 0.04759648291475807, - "grad_norm": 1.008154273033142, - "learning_rate": 2.2583333333333335e-05, - "loss": 0.6663, - "step": 1356 - }, - { - "epoch": 0.047631583565875144, - "grad_norm": 0.7677988409996033, - "learning_rate": 2.26e-05, - "loss": 0.5339, - "step": 1357 - }, - { - "epoch": 0.047666684216992224, - "grad_norm": 0.9283496141433716, - "learning_rate": 2.2616666666666666e-05, - "loss": 0.6081, - "step": 1358 - }, - { - "epoch": 0.047701784868109304, - "grad_norm": 0.9555026888847351, - "learning_rate": 2.2633333333333336e-05, - "loss": 0.5211, - "step": 1359 - }, - { - "epoch": 0.047736885519226384, - "grad_norm": 0.8146477937698364, - "learning_rate": 2.265e-05, - "loss": 0.583, - "step": 1360 - }, - { - "epoch": 0.04777198617034346, - "grad_norm": 0.8224829435348511, - "learning_rate": 2.2666666666666668e-05, - "loss": 0.5105, - "step": 1361 - }, - { - "epoch": 0.04780708682146054, - "grad_norm": 1.0573444366455078, - "learning_rate": 2.2683333333333334e-05, - "loss": 0.6022, - "step": 1362 - }, - { - "epoch": 0.04784218747257762, - "grad_norm": 0.8850184679031372, - "learning_rate": 2.2700000000000003e-05, - "loss": 0.5846, - "step": 1363 - }, - { - "epoch": 0.0478772881236947, - "grad_norm": 0.9310876131057739, - "learning_rate": 2.2716666666666665e-05, - "loss": 0.6226, - "step": 1364 - }, - { - "epoch": 0.04791238877481177, - "grad_norm": 0.9875400066375732, - "learning_rate": 2.2733333333333335e-05, - "loss": 0.5805, - "step": 1365 - }, - { - "epoch": 0.04794748942592885, - "grad_norm": 0.9192889928817749, - "learning_rate": 2.275e-05, - "loss": 0.5655, - "step": 1366 - }, - { - "epoch": 0.04798259007704593, - "grad_norm": 1.09389328956604, - "learning_rate": 2.2766666666666667e-05, - "loss": 0.5371, - "step": 1367 - }, - { - "epoch": 0.04801769072816301, - "grad_norm": 0.8843206763267517, - "learning_rate": 2.2783333333333336e-05, - "loss": 0.5346, - "step": 1368 - }, - { - "epoch": 0.048052791379280084, - "grad_norm": 0.9545069336891174, - "learning_rate": 2.2800000000000002e-05, - "loss": 0.606, - "step": 1369 - }, - { - "epoch": 0.048087892030397164, - "grad_norm": 1.0261815786361694, - "learning_rate": 2.2816666666666668e-05, - "loss": 0.6597, - "step": 1370 - }, - { - "epoch": 0.048122992681514244, - "grad_norm": 0.8468955755233765, - "learning_rate": 2.2833333333333334e-05, - "loss": 0.5891, - "step": 1371 - }, - { - "epoch": 0.04815809333263132, - "grad_norm": 0.8354367613792419, - "learning_rate": 2.2850000000000003e-05, - "loss": 0.6448, - "step": 1372 - }, - { - "epoch": 0.0481931939837484, - "grad_norm": 0.9705619812011719, - "learning_rate": 2.2866666666666666e-05, - "loss": 0.6928, - "step": 1373 - }, - { - "epoch": 0.04822829463486548, - "grad_norm": 0.8287313580513, - "learning_rate": 2.2883333333333335e-05, - "loss": 0.5379, - "step": 1374 - }, - { - "epoch": 0.04826339528598256, - "grad_norm": 0.7645323872566223, - "learning_rate": 2.29e-05, - "loss": 0.4621, - "step": 1375 - }, - { - "epoch": 0.04829849593709963, - "grad_norm": 0.8514662384986877, - "learning_rate": 2.2916666666666667e-05, - "loss": 0.6341, - "step": 1376 - }, - { - "epoch": 0.04833359658821671, - "grad_norm": 0.913595974445343, - "learning_rate": 2.2933333333333333e-05, - "loss": 0.5494, - "step": 1377 - }, - { - "epoch": 0.04836869723933379, - "grad_norm": 0.8810891509056091, - "learning_rate": 2.2950000000000002e-05, - "loss": 0.6556, - "step": 1378 - }, - { - "epoch": 0.04840379789045087, - "grad_norm": 0.8694616556167603, - "learning_rate": 2.2966666666666668e-05, - "loss": 0.6405, - "step": 1379 - }, - { - "epoch": 0.048438898541567944, - "grad_norm": 0.7635499835014343, - "learning_rate": 2.2983333333333334e-05, - "loss": 0.6274, - "step": 1380 - }, - { - "epoch": 0.048473999192685024, - "grad_norm": 1.034641146659851, - "learning_rate": 2.3000000000000003e-05, - "loss": 0.4503, - "step": 1381 - }, - { - "epoch": 0.048509099843802105, - "grad_norm": 1.090796709060669, - "learning_rate": 2.3016666666666666e-05, - "loss": 0.622, - "step": 1382 - }, - { - "epoch": 0.04854420049491918, - "grad_norm": 0.9189488887786865, - "learning_rate": 2.3033333333333335e-05, - "loss": 0.5076, - "step": 1383 - }, - { - "epoch": 0.04857930114603626, - "grad_norm": 0.9259627461433411, - "learning_rate": 2.305e-05, - "loss": 0.5534, - "step": 1384 - }, - { - "epoch": 0.04861440179715334, - "grad_norm": 0.9400174617767334, - "learning_rate": 2.3066666666666667e-05, - "loss": 0.5899, - "step": 1385 - }, - { - "epoch": 0.04864950244827042, - "grad_norm": 0.9215295910835266, - "learning_rate": 2.3083333333333333e-05, - "loss": 0.594, - "step": 1386 - }, - { - "epoch": 0.04868460309938749, - "grad_norm": 0.9168936014175415, - "learning_rate": 2.3100000000000002e-05, - "loss": 0.5778, - "step": 1387 - }, - { - "epoch": 0.04871970375050457, - "grad_norm": 0.932648241519928, - "learning_rate": 2.311666666666667e-05, - "loss": 0.515, - "step": 1388 - }, - { - "epoch": 0.04875480440162165, - "grad_norm": 0.9155400991439819, - "learning_rate": 2.3133333333333334e-05, - "loss": 0.6072, - "step": 1389 - }, - { - "epoch": 0.04878990505273873, - "grad_norm": 0.7297822833061218, - "learning_rate": 2.3150000000000004e-05, - "loss": 0.533, - "step": 1390 - }, - { - "epoch": 0.048825005703855805, - "grad_norm": 0.7923974394798279, - "learning_rate": 2.3166666666666666e-05, - "loss": 0.5281, - "step": 1391 - }, - { - "epoch": 0.048860106354972885, - "grad_norm": 0.8310854434967041, - "learning_rate": 2.3183333333333336e-05, - "loss": 0.4972, - "step": 1392 - }, - { - "epoch": 0.048895207006089965, - "grad_norm": 1.2057256698608398, - "learning_rate": 2.32e-05, - "loss": 0.661, - "step": 1393 - }, - { - "epoch": 0.04893030765720704, - "grad_norm": 0.9190142154693604, - "learning_rate": 2.3216666666666667e-05, - "loss": 0.4572, - "step": 1394 - }, - { - "epoch": 0.04896540830832412, - "grad_norm": 0.8839457631111145, - "learning_rate": 2.3233333333333333e-05, - "loss": 0.6664, - "step": 1395 - }, - { - "epoch": 0.0490005089594412, - "grad_norm": 0.745145320892334, - "learning_rate": 2.3250000000000003e-05, - "loss": 0.4594, - "step": 1396 - }, - { - "epoch": 0.04903560961055828, - "grad_norm": 1.1367541551589966, - "learning_rate": 2.326666666666667e-05, - "loss": 0.6129, - "step": 1397 - }, - { - "epoch": 0.04907071026167535, - "grad_norm": 0.9885472655296326, - "learning_rate": 2.3283333333333335e-05, - "loss": 0.4493, - "step": 1398 - }, - { - "epoch": 0.04910581091279243, - "grad_norm": 0.7785638570785522, - "learning_rate": 2.3300000000000004e-05, - "loss": 0.5987, - "step": 1399 - }, - { - "epoch": 0.04914091156390951, - "grad_norm": 0.7895349860191345, - "learning_rate": 2.3316666666666666e-05, - "loss": 0.6269, - "step": 1400 - }, - { - "epoch": 0.04917601221502659, - "grad_norm": 0.9859976768493652, - "learning_rate": 2.3333333333333336e-05, - "loss": 0.6728, - "step": 1401 - }, - { - "epoch": 0.049211112866143665, - "grad_norm": 0.8850532174110413, - "learning_rate": 2.3350000000000002e-05, - "loss": 0.5804, - "step": 1402 - }, - { - "epoch": 0.049246213517260745, - "grad_norm": 0.8255146741867065, - "learning_rate": 2.3366666666666668e-05, - "loss": 0.5163, - "step": 1403 - }, - { - "epoch": 0.049281314168377825, - "grad_norm": 0.8226147890090942, - "learning_rate": 2.3383333333333334e-05, - "loss": 0.6239, - "step": 1404 - }, - { - "epoch": 0.0493164148194949, - "grad_norm": 0.904064953327179, - "learning_rate": 2.3400000000000003e-05, - "loss": 0.6598, - "step": 1405 - }, - { - "epoch": 0.04935151547061198, - "grad_norm": 0.829043984413147, - "learning_rate": 2.341666666666667e-05, - "loss": 0.5867, - "step": 1406 - }, - { - "epoch": 0.04938661612172906, - "grad_norm": 0.8868791460990906, - "learning_rate": 2.3433333333333335e-05, - "loss": 0.5867, - "step": 1407 - }, - { - "epoch": 0.04942171677284614, - "grad_norm": 0.820468544960022, - "learning_rate": 2.345e-05, - "loss": 0.5973, - "step": 1408 - }, - { - "epoch": 0.04945681742396321, - "grad_norm": 0.9773980379104614, - "learning_rate": 2.3466666666666667e-05, - "loss": 0.6355, - "step": 1409 - }, - { - "epoch": 0.04949191807508029, - "grad_norm": 0.8319428563117981, - "learning_rate": 2.3483333333333336e-05, - "loss": 0.4181, - "step": 1410 - }, - { - "epoch": 0.04952701872619737, - "grad_norm": 0.8425382971763611, - "learning_rate": 2.35e-05, - "loss": 0.6565, - "step": 1411 - }, - { - "epoch": 0.04956211937731445, - "grad_norm": 0.8136534690856934, - "learning_rate": 2.3516666666666668e-05, - "loss": 0.5034, - "step": 1412 - }, - { - "epoch": 0.049597220028431525, - "grad_norm": 0.7443097233772278, - "learning_rate": 2.3533333333333334e-05, - "loss": 0.6263, - "step": 1413 - }, - { - "epoch": 0.049632320679548605, - "grad_norm": 0.888260006904602, - "learning_rate": 2.355e-05, - "loss": 0.5257, - "step": 1414 - }, - { - "epoch": 0.049667421330665686, - "grad_norm": 0.9726781845092773, - "learning_rate": 2.3566666666666666e-05, - "loss": 0.6056, - "step": 1415 - }, - { - "epoch": 0.04970252198178276, - "grad_norm": 0.934015154838562, - "learning_rate": 2.3583333333333335e-05, - "loss": 0.5097, - "step": 1416 - }, - { - "epoch": 0.04973762263289984, - "grad_norm": 0.7699724435806274, - "learning_rate": 2.36e-05, - "loss": 0.4799, - "step": 1417 - }, - { - "epoch": 0.04977272328401692, - "grad_norm": 0.8072660565376282, - "learning_rate": 2.3616666666666667e-05, - "loss": 0.5183, - "step": 1418 - }, - { - "epoch": 0.049807823935134, - "grad_norm": 0.9367466568946838, - "learning_rate": 2.3633333333333336e-05, - "loss": 0.4728, - "step": 1419 - }, - { - "epoch": 0.04984292458625107, - "grad_norm": 0.8968836069107056, - "learning_rate": 2.365e-05, - "loss": 0.5465, - "step": 1420 - }, - { - "epoch": 0.04987802523736815, - "grad_norm": 0.9370712637901306, - "learning_rate": 2.3666666666666668e-05, - "loss": 0.5547, - "step": 1421 - }, - { - "epoch": 0.04991312588848523, - "grad_norm": 0.8985634446144104, - "learning_rate": 2.3683333333333334e-05, - "loss": 0.476, - "step": 1422 - }, - { - "epoch": 0.04994822653960231, - "grad_norm": 1.1236416101455688, - "learning_rate": 2.37e-05, - "loss": 0.5049, - "step": 1423 - }, - { - "epoch": 0.049983327190719386, - "grad_norm": 1.1350032091140747, - "learning_rate": 2.3716666666666666e-05, - "loss": 0.6458, - "step": 1424 - }, - { - "epoch": 0.050018427841836466, - "grad_norm": 1.0265923738479614, - "learning_rate": 2.3733333333333335e-05, - "loss": 0.6081, - "step": 1425 - }, - { - "epoch": 0.050053528492953546, - "grad_norm": 0.9249345064163208, - "learning_rate": 2.375e-05, - "loss": 0.5693, - "step": 1426 - }, - { - "epoch": 0.05008862914407062, - "grad_norm": 0.8873357772827148, - "learning_rate": 2.3766666666666667e-05, - "loss": 0.6651, - "step": 1427 - }, - { - "epoch": 0.0501237297951877, - "grad_norm": 0.8991969227790833, - "learning_rate": 2.3783333333333337e-05, - "loss": 0.526, - "step": 1428 - }, - { - "epoch": 0.05015883044630478, - "grad_norm": 0.9085362553596497, - "learning_rate": 2.38e-05, - "loss": 0.6598, - "step": 1429 - }, - { - "epoch": 0.05019393109742186, - "grad_norm": 0.7426455020904541, - "learning_rate": 2.381666666666667e-05, - "loss": 0.4764, - "step": 1430 - }, - { - "epoch": 0.05022903174853893, - "grad_norm": 0.7136308550834656, - "learning_rate": 2.3833333333333334e-05, - "loss": 0.5505, - "step": 1431 - }, - { - "epoch": 0.05026413239965601, - "grad_norm": 0.8349494934082031, - "learning_rate": 2.385e-05, - "loss": 0.439, - "step": 1432 - }, - { - "epoch": 0.05029923305077309, - "grad_norm": 0.9426866769790649, - "learning_rate": 2.3866666666666666e-05, - "loss": 0.6767, - "step": 1433 - }, - { - "epoch": 0.05033433370189017, - "grad_norm": 0.9961262345314026, - "learning_rate": 2.3883333333333336e-05, - "loss": 0.5162, - "step": 1434 - }, - { - "epoch": 0.050369434353007246, - "grad_norm": 0.9317672252655029, - "learning_rate": 2.39e-05, - "loss": 0.6645, - "step": 1435 - }, - { - "epoch": 0.050404535004124326, - "grad_norm": 0.8165339827537537, - "learning_rate": 2.3916666666666668e-05, - "loss": 0.653, - "step": 1436 - }, - { - "epoch": 0.050439635655241406, - "grad_norm": 0.8679570555686951, - "learning_rate": 2.3933333333333337e-05, - "loss": 0.5885, - "step": 1437 - }, - { - "epoch": 0.050474736306358486, - "grad_norm": 0.9226828217506409, - "learning_rate": 2.395e-05, - "loss": 0.5655, - "step": 1438 - }, - { - "epoch": 0.05050983695747556, - "grad_norm": 0.9950264096260071, - "learning_rate": 2.396666666666667e-05, - "loss": 0.4902, - "step": 1439 - }, - { - "epoch": 0.05054493760859264, - "grad_norm": 0.9637367725372314, - "learning_rate": 2.3983333333333335e-05, - "loss": 0.5962, - "step": 1440 - }, - { - "epoch": 0.05058003825970972, - "grad_norm": 0.7300596237182617, - "learning_rate": 2.4e-05, - "loss": 0.4908, - "step": 1441 - }, - { - "epoch": 0.05061513891082679, - "grad_norm": 0.8011408448219299, - "learning_rate": 2.4016666666666667e-05, - "loss": 0.4899, - "step": 1442 - }, - { - "epoch": 0.05065023956194387, - "grad_norm": 0.8831098079681396, - "learning_rate": 2.4033333333333336e-05, - "loss": 0.4966, - "step": 1443 - }, - { - "epoch": 0.05068534021306095, - "grad_norm": 1.0501912832260132, - "learning_rate": 2.4050000000000002e-05, - "loss": 0.5751, - "step": 1444 - }, - { - "epoch": 0.05072044086417803, - "grad_norm": 1.1154884099960327, - "learning_rate": 2.4066666666666668e-05, - "loss": 0.6022, - "step": 1445 - }, - { - "epoch": 0.050755541515295106, - "grad_norm": 0.9770199060440063, - "learning_rate": 2.4083333333333337e-05, - "loss": 0.6011, - "step": 1446 - }, - { - "epoch": 0.05079064216641219, - "grad_norm": 1.0678211450576782, - "learning_rate": 2.41e-05, - "loss": 0.6062, - "step": 1447 - }, - { - "epoch": 0.05082574281752927, - "grad_norm": 0.8812564015388489, - "learning_rate": 2.411666666666667e-05, - "loss": 0.6392, - "step": 1448 - }, - { - "epoch": 0.05086084346864635, - "grad_norm": 1.0071412324905396, - "learning_rate": 2.4133333333333335e-05, - "loss": 0.4438, - "step": 1449 - }, - { - "epoch": 0.05089594411976342, - "grad_norm": 0.9238632917404175, - "learning_rate": 2.415e-05, - "loss": 0.5655, - "step": 1450 - }, - { - "epoch": 0.0509310447708805, - "grad_norm": 0.9648122191429138, - "learning_rate": 2.4166666666666667e-05, - "loss": 0.5773, - "step": 1451 - }, - { - "epoch": 0.05096614542199758, - "grad_norm": 0.9602773189544678, - "learning_rate": 2.4183333333333336e-05, - "loss": 0.5006, - "step": 1452 - }, - { - "epoch": 0.05100124607311465, - "grad_norm": 1.3500280380249023, - "learning_rate": 2.4200000000000002e-05, - "loss": 0.4953, - "step": 1453 - }, - { - "epoch": 0.05103634672423173, - "grad_norm": 0.8189380764961243, - "learning_rate": 2.4216666666666668e-05, - "loss": 0.6714, - "step": 1454 - }, - { - "epoch": 0.051071447375348814, - "grad_norm": 0.7656849026679993, - "learning_rate": 2.4233333333333337e-05, - "loss": 0.6312, - "step": 1455 - }, - { - "epoch": 0.051106548026465894, - "grad_norm": 0.8716220259666443, - "learning_rate": 2.425e-05, - "loss": 0.5718, - "step": 1456 - }, - { - "epoch": 0.05114164867758297, - "grad_norm": 0.8098880052566528, - "learning_rate": 2.426666666666667e-05, - "loss": 0.6164, - "step": 1457 - }, - { - "epoch": 0.05117674932870005, - "grad_norm": 1.0608261823654175, - "learning_rate": 2.4283333333333335e-05, - "loss": 0.568, - "step": 1458 - }, - { - "epoch": 0.05121184997981713, - "grad_norm": 1.1452842950820923, - "learning_rate": 2.43e-05, - "loss": 0.5158, - "step": 1459 - }, - { - "epoch": 0.05124695063093421, - "grad_norm": 0.9255029559135437, - "learning_rate": 2.4316666666666667e-05, - "loss": 0.5856, - "step": 1460 - }, - { - "epoch": 0.05128205128205128, - "grad_norm": 0.911535382270813, - "learning_rate": 2.4333333333333336e-05, - "loss": 0.5499, - "step": 1461 - }, - { - "epoch": 0.05131715193316836, - "grad_norm": 0.8715035319328308, - "learning_rate": 2.435e-05, - "loss": 0.5767, - "step": 1462 - }, - { - "epoch": 0.05135225258428544, - "grad_norm": 0.8597701787948608, - "learning_rate": 2.4366666666666668e-05, - "loss": 0.5206, - "step": 1463 - }, - { - "epoch": 0.051387353235402514, - "grad_norm": 0.8434522151947021, - "learning_rate": 2.4383333333333334e-05, - "loss": 0.5894, - "step": 1464 - }, - { - "epoch": 0.051422453886519594, - "grad_norm": 1.078916311264038, - "learning_rate": 2.44e-05, - "loss": 0.6305, - "step": 1465 - }, - { - "epoch": 0.051457554537636674, - "grad_norm": 0.9632884860038757, - "learning_rate": 2.441666666666667e-05, - "loss": 0.5119, - "step": 1466 - }, - { - "epoch": 0.051492655188753754, - "grad_norm": 0.9038316011428833, - "learning_rate": 2.4433333333333335e-05, - "loss": 0.6363, - "step": 1467 - }, - { - "epoch": 0.05152775583987083, - "grad_norm": 1.4983078241348267, - "learning_rate": 2.445e-05, - "loss": 0.6463, - "step": 1468 - }, - { - "epoch": 0.05156285649098791, - "grad_norm": 0.8505057096481323, - "learning_rate": 2.4466666666666667e-05, - "loss": 0.5272, - "step": 1469 - }, - { - "epoch": 0.05159795714210499, - "grad_norm": 0.9734725952148438, - "learning_rate": 2.4483333333333333e-05, - "loss": 0.5695, - "step": 1470 - }, - { - "epoch": 0.05163305779322207, - "grad_norm": 0.9427980184555054, - "learning_rate": 2.45e-05, - "loss": 0.5807, - "step": 1471 - }, - { - "epoch": 0.05166815844433914, - "grad_norm": 1.1969058513641357, - "learning_rate": 2.451666666666667e-05, - "loss": 0.6016, - "step": 1472 - }, - { - "epoch": 0.05170325909545622, - "grad_norm": 1.0406696796417236, - "learning_rate": 2.4533333333333334e-05, - "loss": 0.5363, - "step": 1473 - }, - { - "epoch": 0.0517383597465733, - "grad_norm": 1.0298492908477783, - "learning_rate": 2.455e-05, - "loss": 0.5378, - "step": 1474 - }, - { - "epoch": 0.051773460397690374, - "grad_norm": 0.9241925477981567, - "learning_rate": 2.456666666666667e-05, - "loss": 0.5349, - "step": 1475 - }, - { - "epoch": 0.051808561048807454, - "grad_norm": 0.9663549065589905, - "learning_rate": 2.4583333333333332e-05, - "loss": 0.6703, - "step": 1476 - }, - { - "epoch": 0.051843661699924534, - "grad_norm": 0.9351133704185486, - "learning_rate": 2.46e-05, - "loss": 0.584, - "step": 1477 - }, - { - "epoch": 0.051878762351041614, - "grad_norm": 0.8211937546730042, - "learning_rate": 2.4616666666666668e-05, - "loss": 0.5647, - "step": 1478 - }, - { - "epoch": 0.05191386300215869, - "grad_norm": 1.1139233112335205, - "learning_rate": 2.4633333333333334e-05, - "loss": 0.4035, - "step": 1479 - }, - { - "epoch": 0.05194896365327577, - "grad_norm": 1.0714863538742065, - "learning_rate": 2.465e-05, - "loss": 0.5377, - "step": 1480 - }, - { - "epoch": 0.05198406430439285, - "grad_norm": 0.8968372344970703, - "learning_rate": 2.466666666666667e-05, - "loss": 0.5067, - "step": 1481 - }, - { - "epoch": 0.05201916495550993, - "grad_norm": 0.8226032853126526, - "learning_rate": 2.4683333333333335e-05, - "loss": 0.5496, - "step": 1482 - }, - { - "epoch": 0.052054265606627, - "grad_norm": 1.1523706912994385, - "learning_rate": 2.47e-05, - "loss": 0.5987, - "step": 1483 - }, - { - "epoch": 0.05208936625774408, - "grad_norm": 0.9169896841049194, - "learning_rate": 2.471666666666667e-05, - "loss": 0.5802, - "step": 1484 - }, - { - "epoch": 0.05212446690886116, - "grad_norm": 1.1325315237045288, - "learning_rate": 2.4733333333333333e-05, - "loss": 0.6196, - "step": 1485 - }, - { - "epoch": 0.052159567559978234, - "grad_norm": 1.1404129266738892, - "learning_rate": 2.4750000000000002e-05, - "loss": 0.6945, - "step": 1486 - }, - { - "epoch": 0.052194668211095314, - "grad_norm": 0.9190216660499573, - "learning_rate": 2.4766666666666668e-05, - "loss": 0.5951, - "step": 1487 - }, - { - "epoch": 0.052229768862212395, - "grad_norm": 0.9223252534866333, - "learning_rate": 2.4783333333333334e-05, - "loss": 0.6129, - "step": 1488 - }, - { - "epoch": 0.052264869513329475, - "grad_norm": 0.7985851168632507, - "learning_rate": 2.48e-05, - "loss": 0.5505, - "step": 1489 - }, - { - "epoch": 0.05229997016444655, - "grad_norm": 0.8588102459907532, - "learning_rate": 2.481666666666667e-05, - "loss": 0.5974, - "step": 1490 - }, - { - "epoch": 0.05233507081556363, - "grad_norm": 0.9576802849769592, - "learning_rate": 2.4833333333333335e-05, - "loss": 0.6335, - "step": 1491 - }, - { - "epoch": 0.05237017146668071, - "grad_norm": 0.9025812149047852, - "learning_rate": 2.485e-05, - "loss": 0.6873, - "step": 1492 - }, - { - "epoch": 0.05240527211779779, - "grad_norm": 0.9183788299560547, - "learning_rate": 2.486666666666667e-05, - "loss": 0.5139, - "step": 1493 - }, - { - "epoch": 0.05244037276891486, - "grad_norm": 0.7240175604820251, - "learning_rate": 2.4883333333333333e-05, - "loss": 0.5031, - "step": 1494 - }, - { - "epoch": 0.05247547342003194, - "grad_norm": 0.8669688105583191, - "learning_rate": 2.4900000000000002e-05, - "loss": 0.6114, - "step": 1495 - }, - { - "epoch": 0.05251057407114902, - "grad_norm": 0.8632715940475464, - "learning_rate": 2.4916666666666668e-05, - "loss": 0.5778, - "step": 1496 - }, - { - "epoch": 0.052545674722266095, - "grad_norm": 0.8068783283233643, - "learning_rate": 2.4933333333333334e-05, - "loss": 0.597, - "step": 1497 - }, - { - "epoch": 0.052580775373383175, - "grad_norm": 0.8584094047546387, - "learning_rate": 2.495e-05, - "loss": 0.4858, - "step": 1498 - }, - { - "epoch": 0.052615876024500255, - "grad_norm": 0.6980915665626526, - "learning_rate": 2.496666666666667e-05, - "loss": 0.5388, - "step": 1499 - }, - { - "epoch": 0.052650976675617335, - "grad_norm": 0.8205957412719727, - "learning_rate": 2.4983333333333335e-05, - "loss": 0.5531, - "step": 1500 - }, - { - "epoch": 0.05268607732673441, - "grad_norm": 0.7044770121574402, - "learning_rate": 2.5e-05, - "loss": 0.4022, - "step": 1501 - }, - { - "epoch": 0.05272117797785149, - "grad_norm": 0.8833944797515869, - "learning_rate": 2.5016666666666667e-05, - "loss": 0.4781, - "step": 1502 - }, - { - "epoch": 0.05275627862896857, - "grad_norm": 0.8832343816757202, - "learning_rate": 2.5033333333333336e-05, - "loss": 0.5714, - "step": 1503 - }, - { - "epoch": 0.05279137928008565, - "grad_norm": 1.3227026462554932, - "learning_rate": 2.5050000000000002e-05, - "loss": 0.4341, - "step": 1504 - }, - { - "epoch": 0.05282647993120272, - "grad_norm": 0.7450274229049683, - "learning_rate": 2.5066666666666665e-05, - "loss": 0.5222, - "step": 1505 - }, - { - "epoch": 0.0528615805823198, - "grad_norm": 0.906936764717102, - "learning_rate": 2.5083333333333338e-05, - "loss": 0.5757, - "step": 1506 - }, - { - "epoch": 0.05289668123343688, - "grad_norm": 1.2139792442321777, - "learning_rate": 2.51e-05, - "loss": 0.4954, - "step": 1507 - }, - { - "epoch": 0.052931781884553955, - "grad_norm": 1.0227335691452026, - "learning_rate": 2.5116666666666666e-05, - "loss": 0.6476, - "step": 1508 - }, - { - "epoch": 0.052966882535671035, - "grad_norm": 0.8952595591545105, - "learning_rate": 2.5133333333333336e-05, - "loss": 0.5495, - "step": 1509 - }, - { - "epoch": 0.053001983186788115, - "grad_norm": 0.8526520133018494, - "learning_rate": 2.515e-05, - "loss": 0.5921, - "step": 1510 - }, - { - "epoch": 0.053037083837905195, - "grad_norm": 0.9406465291976929, - "learning_rate": 2.5166666666666667e-05, - "loss": 0.6461, - "step": 1511 - }, - { - "epoch": 0.05307218448902227, - "grad_norm": 0.9828279614448547, - "learning_rate": 2.5183333333333337e-05, - "loss": 0.4667, - "step": 1512 - }, - { - "epoch": 0.05310728514013935, - "grad_norm": 1.2353652715682983, - "learning_rate": 2.5200000000000003e-05, - "loss": 0.5424, - "step": 1513 - }, - { - "epoch": 0.05314238579125643, - "grad_norm": 1.0070971250534058, - "learning_rate": 2.5216666666666665e-05, - "loss": 0.603, - "step": 1514 - }, - { - "epoch": 0.05317748644237351, - "grad_norm": 1.1331793069839478, - "learning_rate": 2.5233333333333338e-05, - "loss": 0.4568, - "step": 1515 - }, - { - "epoch": 0.05321258709349058, - "grad_norm": 1.2636204957962036, - "learning_rate": 2.525e-05, - "loss": 0.4653, - "step": 1516 - }, - { - "epoch": 0.05324768774460766, - "grad_norm": 1.137778639793396, - "learning_rate": 2.5266666666666666e-05, - "loss": 0.5428, - "step": 1517 - }, - { - "epoch": 0.05328278839572474, - "grad_norm": 0.6592631340026855, - "learning_rate": 2.5283333333333336e-05, - "loss": 0.4709, - "step": 1518 - }, - { - "epoch": 0.05331788904684182, - "grad_norm": 0.8651885986328125, - "learning_rate": 2.5300000000000002e-05, - "loss": 0.5616, - "step": 1519 - }, - { - "epoch": 0.053352989697958896, - "grad_norm": 1.2772464752197266, - "learning_rate": 2.5316666666666668e-05, - "loss": 0.6228, - "step": 1520 - }, - { - "epoch": 0.053388090349075976, - "grad_norm": 1.0781004428863525, - "learning_rate": 2.5333333333333337e-05, - "loss": 0.5652, - "step": 1521 - }, - { - "epoch": 0.053423191000193056, - "grad_norm": 0.7070196270942688, - "learning_rate": 2.5350000000000003e-05, - "loss": 0.5083, - "step": 1522 - }, - { - "epoch": 0.05345829165131013, - "grad_norm": 1.0119154453277588, - "learning_rate": 2.5366666666666665e-05, - "loss": 0.6022, - "step": 1523 - }, - { - "epoch": 0.05349339230242721, - "grad_norm": 0.9012966156005859, - "learning_rate": 2.5383333333333338e-05, - "loss": 0.4689, - "step": 1524 - }, - { - "epoch": 0.05352849295354429, - "grad_norm": 0.9807010889053345, - "learning_rate": 2.54e-05, - "loss": 0.5991, - "step": 1525 - }, - { - "epoch": 0.05356359360466137, - "grad_norm": 0.8261582851409912, - "learning_rate": 2.5416666666666667e-05, - "loss": 0.5915, - "step": 1526 - }, - { - "epoch": 0.05359869425577844, - "grad_norm": 0.9033578634262085, - "learning_rate": 2.5433333333333336e-05, - "loss": 0.4492, - "step": 1527 - }, - { - "epoch": 0.05363379490689552, - "grad_norm": 0.8827459812164307, - "learning_rate": 2.5450000000000002e-05, - "loss": 0.5892, - "step": 1528 - }, - { - "epoch": 0.0536688955580126, - "grad_norm": 0.7585636973381042, - "learning_rate": 2.5466666666666668e-05, - "loss": 0.5017, - "step": 1529 - }, - { - "epoch": 0.05370399620912968, - "grad_norm": 0.8670980334281921, - "learning_rate": 2.5483333333333337e-05, - "loss": 0.6932, - "step": 1530 - }, - { - "epoch": 0.053739096860246756, - "grad_norm": 0.8173544406890869, - "learning_rate": 2.5500000000000003e-05, - "loss": 0.6716, - "step": 1531 - }, - { - "epoch": 0.053774197511363836, - "grad_norm": 0.7903043627738953, - "learning_rate": 2.5516666666666666e-05, - "loss": 0.5511, - "step": 1532 - }, - { - "epoch": 0.053809298162480916, - "grad_norm": 0.7819637656211853, - "learning_rate": 2.553333333333334e-05, - "loss": 0.5641, - "step": 1533 - }, - { - "epoch": 0.05384439881359799, - "grad_norm": 1.135581374168396, - "learning_rate": 2.555e-05, - "loss": 0.6257, - "step": 1534 - }, - { - "epoch": 0.05387949946471507, - "grad_norm": 0.8057026863098145, - "learning_rate": 2.5566666666666667e-05, - "loss": 0.5824, - "step": 1535 - }, - { - "epoch": 0.05391460011583215, - "grad_norm": 0.9141386151313782, - "learning_rate": 2.5583333333333336e-05, - "loss": 0.5913, - "step": 1536 - }, - { - "epoch": 0.05394970076694923, - "grad_norm": 1.023053765296936, - "learning_rate": 2.5600000000000002e-05, - "loss": 0.5872, - "step": 1537 - }, - { - "epoch": 0.0539848014180663, - "grad_norm": 0.8817396759986877, - "learning_rate": 2.5616666666666668e-05, - "loss": 0.6555, - "step": 1538 - }, - { - "epoch": 0.05401990206918338, - "grad_norm": 0.9328076839447021, - "learning_rate": 2.5633333333333338e-05, - "loss": 0.6691, - "step": 1539 - }, - { - "epoch": 0.05405500272030046, - "grad_norm": 0.8951240181922913, - "learning_rate": 2.5650000000000003e-05, - "loss": 0.6517, - "step": 1540 - }, - { - "epoch": 0.05409010337141754, - "grad_norm": 0.9209489226341248, - "learning_rate": 2.5666666666666666e-05, - "loss": 0.4486, - "step": 1541 - }, - { - "epoch": 0.054125204022534616, - "grad_norm": 0.739060640335083, - "learning_rate": 2.5683333333333335e-05, - "loss": 0.5507, - "step": 1542 - }, - { - "epoch": 0.054160304673651696, - "grad_norm": 0.9737457036972046, - "learning_rate": 2.57e-05, - "loss": 0.4977, - "step": 1543 - }, - { - "epoch": 0.054195405324768776, - "grad_norm": 0.7628976702690125, - "learning_rate": 2.5716666666666667e-05, - "loss": 0.5919, - "step": 1544 - }, - { - "epoch": 0.05423050597588585, - "grad_norm": 0.8171901106834412, - "learning_rate": 2.5733333333333337e-05, - "loss": 0.5487, - "step": 1545 - }, - { - "epoch": 0.05426560662700293, - "grad_norm": 0.8544654250144958, - "learning_rate": 2.5750000000000002e-05, - "loss": 0.6379, - "step": 1546 - }, - { - "epoch": 0.05430070727812001, - "grad_norm": 0.9182475805282593, - "learning_rate": 2.5766666666666665e-05, - "loss": 0.5088, - "step": 1547 - }, - { - "epoch": 0.05433580792923709, - "grad_norm": 2.2327253818511963, - "learning_rate": 2.5783333333333338e-05, - "loss": 0.5962, - "step": 1548 - }, - { - "epoch": 0.05437090858035416, - "grad_norm": 0.860800564289093, - "learning_rate": 2.58e-05, - "loss": 0.6356, - "step": 1549 - }, - { - "epoch": 0.05440600923147124, - "grad_norm": 0.8559394478797913, - "learning_rate": 2.5816666666666666e-05, - "loss": 0.4781, - "step": 1550 - }, - { - "epoch": 0.05444110988258832, - "grad_norm": 0.9324066638946533, - "learning_rate": 2.5833333333333336e-05, - "loss": 0.4601, - "step": 1551 - }, - { - "epoch": 0.0544762105337054, - "grad_norm": 0.7692175507545471, - "learning_rate": 2.585e-05, - "loss": 0.577, - "step": 1552 - }, - { - "epoch": 0.05451131118482248, - "grad_norm": 0.8642932772636414, - "learning_rate": 2.5866666666666667e-05, - "loss": 0.6, - "step": 1553 - }, - { - "epoch": 0.05454641183593956, - "grad_norm": 0.8588842749595642, - "learning_rate": 2.5883333333333337e-05, - "loss": 0.5401, - "step": 1554 - }, - { - "epoch": 0.05458151248705664, - "grad_norm": 0.9186115860939026, - "learning_rate": 2.5900000000000003e-05, - "loss": 0.591, - "step": 1555 - }, - { - "epoch": 0.05461661313817371, - "grad_norm": 0.7131514549255371, - "learning_rate": 2.5916666666666665e-05, - "loss": 0.6195, - "step": 1556 - }, - { - "epoch": 0.05465171378929079, - "grad_norm": 0.8255966305732727, - "learning_rate": 2.5933333333333338e-05, - "loss": 0.4954, - "step": 1557 - }, - { - "epoch": 0.05468681444040787, - "grad_norm": 0.7736409902572632, - "learning_rate": 2.595e-05, - "loss": 0.6126, - "step": 1558 - }, - { - "epoch": 0.05472191509152495, - "grad_norm": 0.9837097525596619, - "learning_rate": 2.5966666666666667e-05, - "loss": 0.7245, - "step": 1559 - }, - { - "epoch": 0.05475701574264202, - "grad_norm": 0.792003333568573, - "learning_rate": 2.5983333333333336e-05, - "loss": 0.5447, - "step": 1560 - }, - { - "epoch": 0.054792116393759104, - "grad_norm": 0.9907065629959106, - "learning_rate": 2.6000000000000002e-05, - "loss": 0.5563, - "step": 1561 - }, - { - "epoch": 0.054827217044876184, - "grad_norm": 0.7174595594406128, - "learning_rate": 2.6016666666666668e-05, - "loss": 0.5083, - "step": 1562 - }, - { - "epoch": 0.054862317695993264, - "grad_norm": 0.9422925710678101, - "learning_rate": 2.6033333333333337e-05, - "loss": 0.6196, - "step": 1563 - }, - { - "epoch": 0.05489741834711034, - "grad_norm": 0.9489427208900452, - "learning_rate": 2.6050000000000003e-05, - "loss": 0.5535, - "step": 1564 - }, - { - "epoch": 0.05493251899822742, - "grad_norm": 0.8384038805961609, - "learning_rate": 2.6066666666666666e-05, - "loss": 0.5801, - "step": 1565 - }, - { - "epoch": 0.0549676196493445, - "grad_norm": 0.716360330581665, - "learning_rate": 2.608333333333333e-05, - "loss": 0.6442, - "step": 1566 - }, - { - "epoch": 0.05500272030046157, - "grad_norm": 0.859251081943512, - "learning_rate": 2.61e-05, - "loss": 0.5036, - "step": 1567 - }, - { - "epoch": 0.05503782095157865, - "grad_norm": 0.7934287190437317, - "learning_rate": 2.6116666666666667e-05, - "loss": 0.5741, - "step": 1568 - }, - { - "epoch": 0.05507292160269573, - "grad_norm": 0.819107711315155, - "learning_rate": 2.6133333333333333e-05, - "loss": 0.5731, - "step": 1569 - }, - { - "epoch": 0.05510802225381281, - "grad_norm": 0.9189621806144714, - "learning_rate": 2.6150000000000002e-05, - "loss": 0.6446, - "step": 1570 - }, - { - "epoch": 0.055143122904929884, - "grad_norm": 0.8865830302238464, - "learning_rate": 2.6166666666666668e-05, - "loss": 0.5828, - "step": 1571 - }, - { - "epoch": 0.055178223556046964, - "grad_norm": 0.8061898946762085, - "learning_rate": 2.618333333333333e-05, - "loss": 0.5223, - "step": 1572 - }, - { - "epoch": 0.055213324207164044, - "grad_norm": 0.7666097283363342, - "learning_rate": 2.6200000000000003e-05, - "loss": 0.5283, - "step": 1573 - }, - { - "epoch": 0.055248424858281124, - "grad_norm": 0.8229867219924927, - "learning_rate": 2.6216666666666666e-05, - "loss": 0.6017, - "step": 1574 - }, - { - "epoch": 0.0552835255093982, - "grad_norm": 0.7606106400489807, - "learning_rate": 2.6233333333333332e-05, - "loss": 0.5485, - "step": 1575 - }, - { - "epoch": 0.05531862616051528, - "grad_norm": 0.9296096563339233, - "learning_rate": 2.625e-05, - "loss": 0.6334, - "step": 1576 - }, - { - "epoch": 0.05535372681163236, - "grad_norm": 1.0402617454528809, - "learning_rate": 2.6266666666666667e-05, - "loss": 0.6127, - "step": 1577 - }, - { - "epoch": 0.05538882746274943, - "grad_norm": 1.0092535018920898, - "learning_rate": 2.6283333333333333e-05, - "loss": 0.6211, - "step": 1578 - }, - { - "epoch": 0.05542392811386651, - "grad_norm": 0.8297025561332703, - "learning_rate": 2.6300000000000002e-05, - "loss": 0.6405, - "step": 1579 - }, - { - "epoch": 0.05545902876498359, - "grad_norm": 0.9090064167976379, - "learning_rate": 2.6316666666666668e-05, - "loss": 0.505, - "step": 1580 - }, - { - "epoch": 0.05549412941610067, - "grad_norm": 0.8663822412490845, - "learning_rate": 2.633333333333333e-05, - "loss": 0.4136, - "step": 1581 - }, - { - "epoch": 0.055529230067217744, - "grad_norm": 0.9022305011749268, - "learning_rate": 2.6350000000000004e-05, - "loss": 0.5303, - "step": 1582 - }, - { - "epoch": 0.055564330718334824, - "grad_norm": 0.6345568299293518, - "learning_rate": 2.6366666666666666e-05, - "loss": 0.512, - "step": 1583 - }, - { - "epoch": 0.055599431369451904, - "grad_norm": 0.7334537506103516, - "learning_rate": 2.6383333333333332e-05, - "loss": 0.6312, - "step": 1584 - }, - { - "epoch": 0.055634532020568984, - "grad_norm": 0.8477878570556641, - "learning_rate": 2.64e-05, - "loss": 0.4951, - "step": 1585 - }, - { - "epoch": 0.05566963267168606, - "grad_norm": 0.853717029094696, - "learning_rate": 2.6416666666666667e-05, - "loss": 0.6546, - "step": 1586 - }, - { - "epoch": 0.05570473332280314, - "grad_norm": 0.8018850684165955, - "learning_rate": 2.6433333333333333e-05, - "loss": 0.5654, - "step": 1587 - }, - { - "epoch": 0.05573983397392022, - "grad_norm": 0.6719748377799988, - "learning_rate": 2.6450000000000003e-05, - "loss": 0.5293, - "step": 1588 - }, - { - "epoch": 0.05577493462503729, - "grad_norm": 0.8983669877052307, - "learning_rate": 2.646666666666667e-05, - "loss": 0.6011, - "step": 1589 - }, - { - "epoch": 0.05581003527615437, - "grad_norm": 0.8444874286651611, - "learning_rate": 2.648333333333333e-05, - "loss": 0.5761, - "step": 1590 - }, - { - "epoch": 0.05584513592727145, - "grad_norm": 1.0006234645843506, - "learning_rate": 2.6500000000000004e-05, - "loss": 0.5463, - "step": 1591 - }, - { - "epoch": 0.05588023657838853, - "grad_norm": 0.7869793772697449, - "learning_rate": 2.6516666666666666e-05, - "loss": 0.611, - "step": 1592 - }, - { - "epoch": 0.055915337229505604, - "grad_norm": 0.8124833106994629, - "learning_rate": 2.6533333333333332e-05, - "loss": 0.5851, - "step": 1593 - }, - { - "epoch": 0.055950437880622685, - "grad_norm": 1.0046567916870117, - "learning_rate": 2.655e-05, - "loss": 0.5127, - "step": 1594 - }, - { - "epoch": 0.055985538531739765, - "grad_norm": 0.7066105604171753, - "learning_rate": 2.6566666666666668e-05, - "loss": 0.5023, - "step": 1595 - }, - { - "epoch": 0.056020639182856845, - "grad_norm": 0.8923646807670593, - "learning_rate": 2.6583333333333333e-05, - "loss": 0.5775, - "step": 1596 - }, - { - "epoch": 0.05605573983397392, - "grad_norm": 0.8404573798179626, - "learning_rate": 2.6600000000000003e-05, - "loss": 0.5662, - "step": 1597 - }, - { - "epoch": 0.056090840485091, - "grad_norm": 0.815059244632721, - "learning_rate": 2.661666666666667e-05, - "loss": 0.6052, - "step": 1598 - }, - { - "epoch": 0.05612594113620808, - "grad_norm": 0.872988224029541, - "learning_rate": 2.663333333333333e-05, - "loss": 0.6346, - "step": 1599 - }, - { - "epoch": 0.05616104178732516, - "grad_norm": 1.0520364046096802, - "learning_rate": 2.6650000000000004e-05, - "loss": 0.5217, - "step": 1600 - }, - { - "epoch": 0.05619614243844223, - "grad_norm": 0.8525280356407166, - "learning_rate": 2.6666666666666667e-05, - "loss": 0.6113, - "step": 1601 - }, - { - "epoch": 0.05623124308955931, - "grad_norm": 0.7703519463539124, - "learning_rate": 2.6683333333333333e-05, - "loss": 0.4889, - "step": 1602 - }, - { - "epoch": 0.05626634374067639, - "grad_norm": 0.7067528963088989, - "learning_rate": 2.6700000000000002e-05, - "loss": 0.6334, - "step": 1603 - }, - { - "epoch": 0.056301444391793465, - "grad_norm": 0.7190594673156738, - "learning_rate": 2.6716666666666668e-05, - "loss": 0.5926, - "step": 1604 - }, - { - "epoch": 0.056336545042910545, - "grad_norm": 0.8615397214889526, - "learning_rate": 2.6733333333333334e-05, - "loss": 0.5052, - "step": 1605 - }, - { - "epoch": 0.056371645694027625, - "grad_norm": 0.9560925960540771, - "learning_rate": 2.6750000000000003e-05, - "loss": 0.5805, - "step": 1606 - }, - { - "epoch": 0.056406746345144705, - "grad_norm": 0.8827159404754639, - "learning_rate": 2.676666666666667e-05, - "loss": 0.6436, - "step": 1607 - }, - { - "epoch": 0.05644184699626178, - "grad_norm": 0.7900975346565247, - "learning_rate": 2.678333333333333e-05, - "loss": 0.3975, - "step": 1608 - }, - { - "epoch": 0.05647694764737886, - "grad_norm": 0.7604160904884338, - "learning_rate": 2.6800000000000004e-05, - "loss": 0.5456, - "step": 1609 - }, - { - "epoch": 0.05651204829849594, - "grad_norm": 0.8024663329124451, - "learning_rate": 2.6816666666666667e-05, - "loss": 0.4587, - "step": 1610 - }, - { - "epoch": 0.05654714894961302, - "grad_norm": 1.0114924907684326, - "learning_rate": 2.6833333333333333e-05, - "loss": 0.5864, - "step": 1611 - }, - { - "epoch": 0.05658224960073009, - "grad_norm": 0.9237247705459595, - "learning_rate": 2.6850000000000002e-05, - "loss": 0.6094, - "step": 1612 - }, - { - "epoch": 0.05661735025184717, - "grad_norm": 1.013271450996399, - "learning_rate": 2.6866666666666668e-05, - "loss": 0.6641, - "step": 1613 - }, - { - "epoch": 0.05665245090296425, - "grad_norm": 0.9568613767623901, - "learning_rate": 2.6883333333333334e-05, - "loss": 0.6127, - "step": 1614 - }, - { - "epoch": 0.056687551554081325, - "grad_norm": 0.8756184577941895, - "learning_rate": 2.6900000000000003e-05, - "loss": 0.5275, - "step": 1615 - }, - { - "epoch": 0.056722652205198405, - "grad_norm": 0.8299587965011597, - "learning_rate": 2.691666666666667e-05, - "loss": 0.5521, - "step": 1616 - }, - { - "epoch": 0.056757752856315485, - "grad_norm": 0.8527237772941589, - "learning_rate": 2.6933333333333332e-05, - "loss": 0.509, - "step": 1617 - }, - { - "epoch": 0.056792853507432565, - "grad_norm": 0.734512984752655, - "learning_rate": 2.6950000000000005e-05, - "loss": 0.5114, - "step": 1618 - }, - { - "epoch": 0.05682795415854964, - "grad_norm": 0.9348254799842834, - "learning_rate": 2.6966666666666667e-05, - "loss": 0.6435, - "step": 1619 - }, - { - "epoch": 0.05686305480966672, - "grad_norm": 0.8855149149894714, - "learning_rate": 2.6983333333333333e-05, - "loss": 0.4943, - "step": 1620 - }, - { - "epoch": 0.0568981554607838, - "grad_norm": 0.9278663992881775, - "learning_rate": 2.7000000000000002e-05, - "loss": 0.476, - "step": 1621 - }, - { - "epoch": 0.05693325611190088, - "grad_norm": 0.7646121382713318, - "learning_rate": 2.701666666666667e-05, - "loss": 0.4753, - "step": 1622 - }, - { - "epoch": 0.05696835676301795, - "grad_norm": 0.8605278730392456, - "learning_rate": 2.7033333333333334e-05, - "loss": 0.4575, - "step": 1623 - }, - { - "epoch": 0.05700345741413503, - "grad_norm": 1.0712898969650269, - "learning_rate": 2.7050000000000004e-05, - "loss": 0.604, - "step": 1624 - }, - { - "epoch": 0.05703855806525211, - "grad_norm": 0.9088006019592285, - "learning_rate": 2.706666666666667e-05, - "loss": 0.495, - "step": 1625 - }, - { - "epoch": 0.057073658716369186, - "grad_norm": 1.0011342763900757, - "learning_rate": 2.7083333333333332e-05, - "loss": 0.6515, - "step": 1626 - }, - { - "epoch": 0.057108759367486266, - "grad_norm": 1.2058277130126953, - "learning_rate": 2.7100000000000005e-05, - "loss": 0.5859, - "step": 1627 - }, - { - "epoch": 0.057143860018603346, - "grad_norm": 0.8988670706748962, - "learning_rate": 2.7116666666666667e-05, - "loss": 0.5666, - "step": 1628 - }, - { - "epoch": 0.057178960669720426, - "grad_norm": 0.9090272784233093, - "learning_rate": 2.7133333333333333e-05, - "loss": 0.449, - "step": 1629 - }, - { - "epoch": 0.0572140613208375, - "grad_norm": 0.8152803182601929, - "learning_rate": 2.7150000000000003e-05, - "loss": 0.5808, - "step": 1630 - }, - { - "epoch": 0.05724916197195458, - "grad_norm": 1.1594698429107666, - "learning_rate": 2.716666666666667e-05, - "loss": 0.6467, - "step": 1631 - }, - { - "epoch": 0.05728426262307166, - "grad_norm": 0.7938334941864014, - "learning_rate": 2.7183333333333335e-05, - "loss": 0.5578, - "step": 1632 - }, - { - "epoch": 0.05731936327418874, - "grad_norm": 1.0903970003128052, - "learning_rate": 2.7200000000000004e-05, - "loss": 0.5689, - "step": 1633 - }, - { - "epoch": 0.05735446392530581, - "grad_norm": 0.9512434601783752, - "learning_rate": 2.7216666666666666e-05, - "loss": 0.5751, - "step": 1634 - }, - { - "epoch": 0.05738956457642289, - "grad_norm": 0.7708196640014648, - "learning_rate": 2.7233333333333332e-05, - "loss": 0.6019, - "step": 1635 - }, - { - "epoch": 0.05742466522753997, - "grad_norm": 0.7847676873207092, - "learning_rate": 2.725e-05, - "loss": 0.5817, - "step": 1636 - }, - { - "epoch": 0.057459765878657046, - "grad_norm": 0.7649686336517334, - "learning_rate": 2.7266666666666668e-05, - "loss": 0.5614, - "step": 1637 - }, - { - "epoch": 0.057494866529774126, - "grad_norm": 1.0303431749343872, - "learning_rate": 2.7283333333333334e-05, - "loss": 0.5752, - "step": 1638 - }, - { - "epoch": 0.057529967180891206, - "grad_norm": 0.8989413380622864, - "learning_rate": 2.7300000000000003e-05, - "loss": 0.6035, - "step": 1639 - }, - { - "epoch": 0.057565067832008286, - "grad_norm": 0.7569693922996521, - "learning_rate": 2.731666666666667e-05, - "loss": 0.4393, - "step": 1640 - }, - { - "epoch": 0.05760016848312536, - "grad_norm": 0.9440147876739502, - "learning_rate": 2.733333333333333e-05, - "loss": 0.4263, - "step": 1641 - }, - { - "epoch": 0.05763526913424244, - "grad_norm": 0.8054575324058533, - "learning_rate": 2.7350000000000004e-05, - "loss": 0.5579, - "step": 1642 - }, - { - "epoch": 0.05767036978535952, - "grad_norm": 1.237614631652832, - "learning_rate": 2.7366666666666667e-05, - "loss": 0.5301, - "step": 1643 - }, - { - "epoch": 0.0577054704364766, - "grad_norm": 0.7855312824249268, - "learning_rate": 2.7383333333333333e-05, - "loss": 0.6159, - "step": 1644 - }, - { - "epoch": 0.05774057108759367, - "grad_norm": 0.9251673221588135, - "learning_rate": 2.7400000000000002e-05, - "loss": 0.6347, - "step": 1645 - }, - { - "epoch": 0.05777567173871075, - "grad_norm": 0.9640317559242249, - "learning_rate": 2.7416666666666668e-05, - "loss": 0.5894, - "step": 1646 - }, - { - "epoch": 0.05781077238982783, - "grad_norm": 1.1192435026168823, - "learning_rate": 2.7433333333333334e-05, - "loss": 0.5383, - "step": 1647 - }, - { - "epoch": 0.057845873040944906, - "grad_norm": 0.8605307936668396, - "learning_rate": 2.7450000000000003e-05, - "loss": 0.6424, - "step": 1648 - }, - { - "epoch": 0.057880973692061986, - "grad_norm": 0.9872875213623047, - "learning_rate": 2.746666666666667e-05, - "loss": 0.4592, - "step": 1649 - }, - { - "epoch": 0.057916074343179066, - "grad_norm": 0.816878616809845, - "learning_rate": 2.748333333333333e-05, - "loss": 0.5046, - "step": 1650 - }, - { - "epoch": 0.05795117499429615, - "grad_norm": 0.7380409240722656, - "learning_rate": 2.7500000000000004e-05, - "loss": 0.4595, - "step": 1651 - }, - { - "epoch": 0.05798627564541322, - "grad_norm": 0.7057638764381409, - "learning_rate": 2.7516666666666667e-05, - "loss": 0.5415, - "step": 1652 - }, - { - "epoch": 0.0580213762965303, - "grad_norm": 0.7509571313858032, - "learning_rate": 2.7533333333333333e-05, - "loss": 0.4958, - "step": 1653 - }, - { - "epoch": 0.05805647694764738, - "grad_norm": 1.024704933166504, - "learning_rate": 2.7550000000000002e-05, - "loss": 0.5834, - "step": 1654 - }, - { - "epoch": 0.05809157759876446, - "grad_norm": 0.769654393196106, - "learning_rate": 2.7566666666666668e-05, - "loss": 0.4713, - "step": 1655 - }, - { - "epoch": 0.05812667824988153, - "grad_norm": 0.8875621557235718, - "learning_rate": 2.7583333333333334e-05, - "loss": 0.4925, - "step": 1656 - }, - { - "epoch": 0.05816177890099861, - "grad_norm": 0.994264543056488, - "learning_rate": 2.7600000000000003e-05, - "loss": 0.4629, - "step": 1657 - }, - { - "epoch": 0.05819687955211569, - "grad_norm": 0.9958389401435852, - "learning_rate": 2.761666666666667e-05, - "loss": 0.5762, - "step": 1658 - }, - { - "epoch": 0.05823198020323277, - "grad_norm": 1.0203615427017212, - "learning_rate": 2.7633333333333332e-05, - "loss": 0.5362, - "step": 1659 - }, - { - "epoch": 0.05826708085434985, - "grad_norm": 0.7708565592765808, - "learning_rate": 2.7650000000000005e-05, - "loss": 0.6134, - "step": 1660 - }, - { - "epoch": 0.05830218150546693, - "grad_norm": 0.8765611052513123, - "learning_rate": 2.7666666666666667e-05, - "loss": 0.5464, - "step": 1661 - }, - { - "epoch": 0.05833728215658401, - "grad_norm": 0.7693695425987244, - "learning_rate": 2.7683333333333333e-05, - "loss": 0.5704, - "step": 1662 - }, - { - "epoch": 0.05837238280770108, - "grad_norm": 0.8315569758415222, - "learning_rate": 2.7700000000000002e-05, - "loss": 0.5561, - "step": 1663 - }, - { - "epoch": 0.05840748345881816, - "grad_norm": 0.8944202065467834, - "learning_rate": 2.771666666666667e-05, - "loss": 0.5249, - "step": 1664 - }, - { - "epoch": 0.05844258410993524, - "grad_norm": 0.9080599546432495, - "learning_rate": 2.7733333333333334e-05, - "loss": 0.5591, - "step": 1665 - }, - { - "epoch": 0.05847768476105232, - "grad_norm": 0.781814694404602, - "learning_rate": 2.7750000000000004e-05, - "loss": 0.5455, - "step": 1666 - }, - { - "epoch": 0.058512785412169394, - "grad_norm": 0.9808908700942993, - "learning_rate": 2.776666666666667e-05, - "loss": 0.578, - "step": 1667 - }, - { - "epoch": 0.058547886063286474, - "grad_norm": 0.9039526581764221, - "learning_rate": 2.7783333333333332e-05, - "loss": 0.5827, - "step": 1668 - }, - { - "epoch": 0.058582986714403554, - "grad_norm": 0.8152556419372559, - "learning_rate": 2.7800000000000005e-05, - "loss": 0.5566, - "step": 1669 - }, - { - "epoch": 0.058618087365520634, - "grad_norm": 0.9781185984611511, - "learning_rate": 2.7816666666666667e-05, - "loss": 0.594, - "step": 1670 - }, - { - "epoch": 0.05865318801663771, - "grad_norm": 0.8482365012168884, - "learning_rate": 2.7833333333333333e-05, - "loss": 0.5946, - "step": 1671 - }, - { - "epoch": 0.05868828866775479, - "grad_norm": 0.8915314078330994, - "learning_rate": 2.7850000000000003e-05, - "loss": 0.5962, - "step": 1672 - }, - { - "epoch": 0.05872338931887187, - "grad_norm": 0.9585595726966858, - "learning_rate": 2.786666666666667e-05, - "loss": 0.5946, - "step": 1673 - }, - { - "epoch": 0.05875848996998894, - "grad_norm": 0.8826923966407776, - "learning_rate": 2.7883333333333335e-05, - "loss": 0.5472, - "step": 1674 - }, - { - "epoch": 0.05879359062110602, - "grad_norm": 0.820868194103241, - "learning_rate": 2.7900000000000004e-05, - "loss": 0.6861, - "step": 1675 - }, - { - "epoch": 0.0588286912722231, - "grad_norm": 0.9991664290428162, - "learning_rate": 2.791666666666667e-05, - "loss": 0.4727, - "step": 1676 - }, - { - "epoch": 0.05886379192334018, - "grad_norm": 0.9735100269317627, - "learning_rate": 2.7933333333333332e-05, - "loss": 0.626, - "step": 1677 - }, - { - "epoch": 0.058898892574457254, - "grad_norm": 0.8279062509536743, - "learning_rate": 2.7950000000000005e-05, - "loss": 0.6314, - "step": 1678 - }, - { - "epoch": 0.058933993225574334, - "grad_norm": 1.049379825592041, - "learning_rate": 2.7966666666666668e-05, - "loss": 0.6485, - "step": 1679 - }, - { - "epoch": 0.058969093876691414, - "grad_norm": 0.9463269114494324, - "learning_rate": 2.7983333333333334e-05, - "loss": 0.6174, - "step": 1680 - }, - { - "epoch": 0.059004194527808494, - "grad_norm": 0.9673007130622864, - "learning_rate": 2.8000000000000003e-05, - "loss": 0.6582, - "step": 1681 - }, - { - "epoch": 0.05903929517892557, - "grad_norm": 0.7928126454353333, - "learning_rate": 2.801666666666667e-05, - "loss": 0.6302, - "step": 1682 - }, - { - "epoch": 0.05907439583004265, - "grad_norm": 0.7132440209388733, - "learning_rate": 2.8033333333333335e-05, - "loss": 0.5146, - "step": 1683 - }, - { - "epoch": 0.05910949648115973, - "grad_norm": 0.892551600933075, - "learning_rate": 2.8050000000000004e-05, - "loss": 0.6413, - "step": 1684 - }, - { - "epoch": 0.0591445971322768, - "grad_norm": 0.8217902183532715, - "learning_rate": 2.806666666666667e-05, - "loss": 0.5223, - "step": 1685 - }, - { - "epoch": 0.05917969778339388, - "grad_norm": 0.8981382846832275, - "learning_rate": 2.8083333333333333e-05, - "loss": 0.5238, - "step": 1686 - }, - { - "epoch": 0.05921479843451096, - "grad_norm": 0.870939314365387, - "learning_rate": 2.8100000000000005e-05, - "loss": 0.6296, - "step": 1687 - }, - { - "epoch": 0.05924989908562804, - "grad_norm": 0.8301160335540771, - "learning_rate": 2.8116666666666668e-05, - "loss": 0.5855, - "step": 1688 - }, - { - "epoch": 0.059284999736745114, - "grad_norm": 0.773914635181427, - "learning_rate": 2.8133333333333334e-05, - "loss": 0.5395, - "step": 1689 - }, - { - "epoch": 0.059320100387862194, - "grad_norm": 0.850540280342102, - "learning_rate": 2.815e-05, - "loss": 0.5307, - "step": 1690 - }, - { - "epoch": 0.059355201038979274, - "grad_norm": 0.8549738526344299, - "learning_rate": 2.816666666666667e-05, - "loss": 0.6208, - "step": 1691 - }, - { - "epoch": 0.059390301690096355, - "grad_norm": 0.8738875985145569, - "learning_rate": 2.8183333333333335e-05, - "loss": 0.4611, - "step": 1692 - }, - { - "epoch": 0.05942540234121343, - "grad_norm": 0.9244881272315979, - "learning_rate": 2.8199999999999998e-05, - "loss": 0.5846, - "step": 1693 - }, - { - "epoch": 0.05946050299233051, - "grad_norm": 1.128974437713623, - "learning_rate": 2.821666666666667e-05, - "loss": 0.5093, - "step": 1694 - }, - { - "epoch": 0.05949560364344759, - "grad_norm": 0.8586655259132385, - "learning_rate": 2.8233333333333333e-05, - "loss": 0.6305, - "step": 1695 - }, - { - "epoch": 0.05953070429456466, - "grad_norm": 1.0308798551559448, - "learning_rate": 2.825e-05, - "loss": 0.5525, - "step": 1696 - }, - { - "epoch": 0.05956580494568174, - "grad_norm": 0.8977118134498596, - "learning_rate": 2.8266666666666668e-05, - "loss": 0.5437, - "step": 1697 - }, - { - "epoch": 0.05960090559679882, - "grad_norm": 0.7872234582901001, - "learning_rate": 2.8283333333333334e-05, - "loss": 0.4527, - "step": 1698 - }, - { - "epoch": 0.0596360062479159, - "grad_norm": 0.7523535490036011, - "learning_rate": 2.83e-05, - "loss": 0.3998, - "step": 1699 - }, - { - "epoch": 0.059671106899032975, - "grad_norm": 1.1569147109985352, - "learning_rate": 2.831666666666667e-05, - "loss": 0.5527, - "step": 1700 - }, - { - "epoch": 0.059706207550150055, - "grad_norm": 0.7714337110519409, - "learning_rate": 2.8333333333333335e-05, - "loss": 0.6474, - "step": 1701 - }, - { - "epoch": 0.059741308201267135, - "grad_norm": 1.1045012474060059, - "learning_rate": 2.8349999999999998e-05, - "loss": 0.4758, - "step": 1702 - }, - { - "epoch": 0.059776408852384215, - "grad_norm": 0.9084939956665039, - "learning_rate": 2.836666666666667e-05, - "loss": 0.4265, - "step": 1703 - }, - { - "epoch": 0.05981150950350129, - "grad_norm": 0.8292708396911621, - "learning_rate": 2.8383333333333333e-05, - "loss": 0.6135, - "step": 1704 - }, - { - "epoch": 0.05984661015461837, - "grad_norm": 0.8671690821647644, - "learning_rate": 2.84e-05, - "loss": 0.5331, - "step": 1705 - }, - { - "epoch": 0.05988171080573545, - "grad_norm": 0.8129920363426208, - "learning_rate": 2.841666666666667e-05, - "loss": 0.4186, - "step": 1706 - }, - { - "epoch": 0.05991681145685252, - "grad_norm": 0.9554848670959473, - "learning_rate": 2.8433333333333334e-05, - "loss": 0.6037, - "step": 1707 - }, - { - "epoch": 0.0599519121079696, - "grad_norm": 1.1521918773651123, - "learning_rate": 2.845e-05, - "loss": 0.6491, - "step": 1708 - }, - { - "epoch": 0.05998701275908668, - "grad_norm": 0.8753706812858582, - "learning_rate": 2.846666666666667e-05, - "loss": 0.6352, - "step": 1709 - }, - { - "epoch": 0.06002211341020376, - "grad_norm": 1.0067591667175293, - "learning_rate": 2.8483333333333336e-05, - "loss": 0.5078, - "step": 1710 - }, - { - "epoch": 0.060057214061320835, - "grad_norm": 0.8123511075973511, - "learning_rate": 2.8499999999999998e-05, - "loss": 0.6341, - "step": 1711 - }, - { - "epoch": 0.060092314712437915, - "grad_norm": 0.9136712551116943, - "learning_rate": 2.851666666666667e-05, - "loss": 0.4464, - "step": 1712 - }, - { - "epoch": 0.060127415363554995, - "grad_norm": 0.7776134014129639, - "learning_rate": 2.8533333333333333e-05, - "loss": 0.6144, - "step": 1713 - }, - { - "epoch": 0.060162516014672075, - "grad_norm": 0.7525821328163147, - "learning_rate": 2.855e-05, - "loss": 0.6429, - "step": 1714 - }, - { - "epoch": 0.06019761666578915, - "grad_norm": 0.879554808139801, - "learning_rate": 2.856666666666667e-05, - "loss": 0.5836, - "step": 1715 - }, - { - "epoch": 0.06023271731690623, - "grad_norm": 0.8130171895027161, - "learning_rate": 2.8583333333333335e-05, - "loss": 0.6034, - "step": 1716 - }, - { - "epoch": 0.06026781796802331, - "grad_norm": 0.9702152013778687, - "learning_rate": 2.86e-05, - "loss": 0.5859, - "step": 1717 - }, - { - "epoch": 0.06030291861914038, - "grad_norm": 1.2795990705490112, - "learning_rate": 2.861666666666667e-05, - "loss": 0.5423, - "step": 1718 - }, - { - "epoch": 0.06033801927025746, - "grad_norm": 1.1050628423690796, - "learning_rate": 2.8633333333333336e-05, - "loss": 0.4644, - "step": 1719 - }, - { - "epoch": 0.06037311992137454, - "grad_norm": 1.0486819744110107, - "learning_rate": 2.865e-05, - "loss": 0.612, - "step": 1720 - }, - { - "epoch": 0.06040822057249162, - "grad_norm": 0.7843863368034363, - "learning_rate": 2.8666666666666668e-05, - "loss": 0.5915, - "step": 1721 - }, - { - "epoch": 0.060443321223608695, - "grad_norm": 1.083356499671936, - "learning_rate": 2.8683333333333334e-05, - "loss": 0.6176, - "step": 1722 - }, - { - "epoch": 0.060478421874725775, - "grad_norm": 0.7916292548179626, - "learning_rate": 2.87e-05, - "loss": 0.6504, - "step": 1723 - }, - { - "epoch": 0.060513522525842856, - "grad_norm": 1.0492959022521973, - "learning_rate": 2.871666666666667e-05, - "loss": 0.5563, - "step": 1724 - }, - { - "epoch": 0.060548623176959936, - "grad_norm": 0.8875330090522766, - "learning_rate": 2.8733333333333335e-05, - "loss": 0.6173, - "step": 1725 - }, - { - "epoch": 0.06058372382807701, - "grad_norm": 0.7309126853942871, - "learning_rate": 2.8749999999999997e-05, - "loss": 0.5522, - "step": 1726 - }, - { - "epoch": 0.06061882447919409, - "grad_norm": 0.7485297322273254, - "learning_rate": 2.876666666666667e-05, - "loss": 0.5643, - "step": 1727 - }, - { - "epoch": 0.06065392513031117, - "grad_norm": 0.8746162056922913, - "learning_rate": 2.8783333333333333e-05, - "loss": 0.5879, - "step": 1728 - }, - { - "epoch": 0.06068902578142824, - "grad_norm": 0.7454932332038879, - "learning_rate": 2.88e-05, - "loss": 0.6587, - "step": 1729 - }, - { - "epoch": 0.06072412643254532, - "grad_norm": 0.9040213227272034, - "learning_rate": 2.8816666666666668e-05, - "loss": 0.6453, - "step": 1730 - }, - { - "epoch": 0.0607592270836624, - "grad_norm": 0.8961483240127563, - "learning_rate": 2.8833333333333334e-05, - "loss": 0.6061, - "step": 1731 - }, - { - "epoch": 0.06079432773477948, - "grad_norm": 0.9070466160774231, - "learning_rate": 2.885e-05, - "loss": 0.4704, - "step": 1732 - }, - { - "epoch": 0.060829428385896556, - "grad_norm": 1.0121585130691528, - "learning_rate": 2.886666666666667e-05, - "loss": 0.5529, - "step": 1733 - }, - { - "epoch": 0.060864529037013636, - "grad_norm": 0.7246527075767517, - "learning_rate": 2.8883333333333335e-05, - "loss": 0.5408, - "step": 1734 - }, - { - "epoch": 0.060899629688130716, - "grad_norm": 0.6835054755210876, - "learning_rate": 2.8899999999999998e-05, - "loss": 0.5404, - "step": 1735 - }, - { - "epoch": 0.060934730339247796, - "grad_norm": 0.7371531128883362, - "learning_rate": 2.891666666666667e-05, - "loss": 0.6656, - "step": 1736 - }, - { - "epoch": 0.06096983099036487, - "grad_norm": 0.6782920956611633, - "learning_rate": 2.8933333333333333e-05, - "loss": 0.5979, - "step": 1737 - }, - { - "epoch": 0.06100493164148195, - "grad_norm": 0.7242698669433594, - "learning_rate": 2.895e-05, - "loss": 0.5994, - "step": 1738 - }, - { - "epoch": 0.06104003229259903, - "grad_norm": 0.7604151368141174, - "learning_rate": 2.8966666666666668e-05, - "loss": 0.4961, - "step": 1739 - }, - { - "epoch": 0.0610751329437161, - "grad_norm": 0.8417724370956421, - "learning_rate": 2.8983333333333334e-05, - "loss": 0.7032, - "step": 1740 - }, - { - "epoch": 0.06111023359483318, - "grad_norm": 1.158736228942871, - "learning_rate": 2.9e-05, - "loss": 0.5341, - "step": 1741 - }, - { - "epoch": 0.06114533424595026, - "grad_norm": 0.7428897023200989, - "learning_rate": 2.901666666666667e-05, - "loss": 0.6037, - "step": 1742 - }, - { - "epoch": 0.06118043489706734, - "grad_norm": 0.7622668743133545, - "learning_rate": 2.9033333333333335e-05, - "loss": 0.6561, - "step": 1743 - }, - { - "epoch": 0.061215535548184416, - "grad_norm": 0.9846168756484985, - "learning_rate": 2.9049999999999998e-05, - "loss": 0.5181, - "step": 1744 - }, - { - "epoch": 0.061250636199301496, - "grad_norm": 0.7325814962387085, - "learning_rate": 2.906666666666667e-05, - "loss": 0.5475, - "step": 1745 - }, - { - "epoch": 0.061285736850418576, - "grad_norm": 0.778376042842865, - "learning_rate": 2.9083333333333333e-05, - "loss": 0.5478, - "step": 1746 - }, - { - "epoch": 0.061320837501535656, - "grad_norm": 0.721563994884491, - "learning_rate": 2.91e-05, - "loss": 0.3967, - "step": 1747 - }, - { - "epoch": 0.06135593815265273, - "grad_norm": 0.7100737690925598, - "learning_rate": 2.911666666666667e-05, - "loss": 0.6814, - "step": 1748 - }, - { - "epoch": 0.06139103880376981, - "grad_norm": 0.8389009237289429, - "learning_rate": 2.9133333333333334e-05, - "loss": 0.5914, - "step": 1749 - }, - { - "epoch": 0.06142613945488689, - "grad_norm": 0.8072444796562195, - "learning_rate": 2.915e-05, - "loss": 0.5656, - "step": 1750 - }, - { - "epoch": 0.06146124010600397, - "grad_norm": 0.9113303422927856, - "learning_rate": 2.916666666666667e-05, - "loss": 0.3581, - "step": 1751 - }, - { - "epoch": 0.06149634075712104, - "grad_norm": 0.8248709440231323, - "learning_rate": 2.9183333333333336e-05, - "loss": 0.651, - "step": 1752 - }, - { - "epoch": 0.06153144140823812, - "grad_norm": 0.7624828219413757, - "learning_rate": 2.9199999999999998e-05, - "loss": 0.5729, - "step": 1753 - }, - { - "epoch": 0.0615665420593552, - "grad_norm": 0.9370071887969971, - "learning_rate": 2.921666666666667e-05, - "loss": 0.6724, - "step": 1754 - }, - { - "epoch": 0.061601642710472276, - "grad_norm": 0.7407233715057373, - "learning_rate": 2.9233333333333334e-05, - "loss": 0.5722, - "step": 1755 - }, - { - "epoch": 0.061636743361589356, - "grad_norm": 0.8683093786239624, - "learning_rate": 2.925e-05, - "loss": 0.6051, - "step": 1756 - }, - { - "epoch": 0.06167184401270644, - "grad_norm": 0.7697977423667908, - "learning_rate": 2.926666666666667e-05, - "loss": 0.617, - "step": 1757 - }, - { - "epoch": 0.06170694466382352, - "grad_norm": 1.2314720153808594, - "learning_rate": 2.9283333333333335e-05, - "loss": 0.6552, - "step": 1758 - }, - { - "epoch": 0.06174204531494059, - "grad_norm": 0.6858367919921875, - "learning_rate": 2.93e-05, - "loss": 0.5726, - "step": 1759 - }, - { - "epoch": 0.06177714596605767, - "grad_norm": 0.7631691098213196, - "learning_rate": 2.931666666666667e-05, - "loss": 0.5156, - "step": 1760 - }, - { - "epoch": 0.06181224661717475, - "grad_norm": 0.8702123761177063, - "learning_rate": 2.9333333333333336e-05, - "loss": 0.6233, - "step": 1761 - }, - { - "epoch": 0.06184734726829183, - "grad_norm": 0.7742347717285156, - "learning_rate": 2.935e-05, - "loss": 0.5091, - "step": 1762 - }, - { - "epoch": 0.0618824479194089, - "grad_norm": 0.7589830756187439, - "learning_rate": 2.936666666666667e-05, - "loss": 0.5847, - "step": 1763 - }, - { - "epoch": 0.06191754857052598, - "grad_norm": 0.770355224609375, - "learning_rate": 2.9383333333333334e-05, - "loss": 0.6335, - "step": 1764 - }, - { - "epoch": 0.061952649221643064, - "grad_norm": 0.6903135776519775, - "learning_rate": 2.94e-05, - "loss": 0.503, - "step": 1765 - }, - { - "epoch": 0.06198774987276014, - "grad_norm": 0.7834055423736572, - "learning_rate": 2.941666666666667e-05, - "loss": 0.5389, - "step": 1766 - }, - { - "epoch": 0.06202285052387722, - "grad_norm": 0.8828336596488953, - "learning_rate": 2.9433333333333335e-05, - "loss": 0.5064, - "step": 1767 - }, - { - "epoch": 0.0620579511749943, - "grad_norm": 0.8641542196273804, - "learning_rate": 2.945e-05, - "loss": 0.5481, - "step": 1768 - }, - { - "epoch": 0.06209305182611138, - "grad_norm": 0.8307515382766724, - "learning_rate": 2.946666666666667e-05, - "loss": 0.5951, - "step": 1769 - }, - { - "epoch": 0.06212815247722845, - "grad_norm": 0.8695902228355408, - "learning_rate": 2.9483333333333336e-05, - "loss": 0.6114, - "step": 1770 - }, - { - "epoch": 0.06216325312834553, - "grad_norm": 0.8251100182533264, - "learning_rate": 2.95e-05, - "loss": 0.5214, - "step": 1771 - }, - { - "epoch": 0.06219835377946261, - "grad_norm": 0.8173295259475708, - "learning_rate": 2.951666666666667e-05, - "loss": 0.4747, - "step": 1772 - }, - { - "epoch": 0.06223345443057969, - "grad_norm": 0.8599511981010437, - "learning_rate": 2.9533333333333334e-05, - "loss": 0.4462, - "step": 1773 - }, - { - "epoch": 0.062268555081696764, - "grad_norm": 0.8339107632637024, - "learning_rate": 2.955e-05, - "loss": 0.591, - "step": 1774 - }, - { - "epoch": 0.062303655732813844, - "grad_norm": 0.8350062370300293, - "learning_rate": 2.956666666666667e-05, - "loss": 0.5094, - "step": 1775 - }, - { - "epoch": 0.062338756383930924, - "grad_norm": 0.869197428226471, - "learning_rate": 2.9583333333333335e-05, - "loss": 0.3766, - "step": 1776 - }, - { - "epoch": 0.062373857035048, - "grad_norm": 0.9232282638549805, - "learning_rate": 2.96e-05, - "loss": 0.671, - "step": 1777 - }, - { - "epoch": 0.06240895768616508, - "grad_norm": 0.7261525988578796, - "learning_rate": 2.961666666666667e-05, - "loss": 0.4389, - "step": 1778 - }, - { - "epoch": 0.06244405833728216, - "grad_norm": 0.8898705840110779, - "learning_rate": 2.9633333333333336e-05, - "loss": 0.5393, - "step": 1779 - }, - { - "epoch": 0.06247915898839924, - "grad_norm": 1.023830771446228, - "learning_rate": 2.965e-05, - "loss": 0.6196, - "step": 1780 - }, - { - "epoch": 0.06251425963951632, - "grad_norm": 0.8531644940376282, - "learning_rate": 2.9666666666666672e-05, - "loss": 0.6064, - "step": 1781 - }, - { - "epoch": 0.06254936029063339, - "grad_norm": 0.9280110597610474, - "learning_rate": 2.9683333333333334e-05, - "loss": 0.6397, - "step": 1782 - }, - { - "epoch": 0.06258446094175046, - "grad_norm": 0.9614513516426086, - "learning_rate": 2.97e-05, - "loss": 0.5938, - "step": 1783 - }, - { - "epoch": 0.06261956159286755, - "grad_norm": 1.1399403810501099, - "learning_rate": 2.971666666666667e-05, - "loss": 0.614, - "step": 1784 - }, - { - "epoch": 0.06265466224398462, - "grad_norm": 0.8278588652610779, - "learning_rate": 2.9733333333333336e-05, - "loss": 0.5308, - "step": 1785 - }, - { - "epoch": 0.06268976289510171, - "grad_norm": 0.9221890568733215, - "learning_rate": 2.975e-05, - "loss": 0.4907, - "step": 1786 - }, - { - "epoch": 0.06272486354621878, - "grad_norm": 0.9902821779251099, - "learning_rate": 2.976666666666667e-05, - "loss": 0.5492, - "step": 1787 - }, - { - "epoch": 0.06275996419733586, - "grad_norm": 1.0970113277435303, - "learning_rate": 2.9783333333333337e-05, - "loss": 0.5699, - "step": 1788 - }, - { - "epoch": 0.06279506484845294, - "grad_norm": 1.0750397443771362, - "learning_rate": 2.98e-05, - "loss": 0.6011, - "step": 1789 - }, - { - "epoch": 0.06283016549957002, - "grad_norm": 0.9217466115951538, - "learning_rate": 2.9816666666666672e-05, - "loss": 0.5507, - "step": 1790 - }, - { - "epoch": 0.06286526615068709, - "grad_norm": 0.8816239237785339, - "learning_rate": 2.9833333333333335e-05, - "loss": 0.5616, - "step": 1791 - }, - { - "epoch": 0.06290036680180418, - "grad_norm": 1.1753729581832886, - "learning_rate": 2.985e-05, - "loss": 0.6255, - "step": 1792 - }, - { - "epoch": 0.06293546745292125, - "grad_norm": 0.9463215470314026, - "learning_rate": 2.986666666666667e-05, - "loss": 0.521, - "step": 1793 - }, - { - "epoch": 0.06297056810403832, - "grad_norm": 0.81819087266922, - "learning_rate": 2.9883333333333336e-05, - "loss": 0.5598, - "step": 1794 - }, - { - "epoch": 0.06300566875515541, - "grad_norm": 0.765758752822876, - "learning_rate": 2.9900000000000002e-05, - "loss": 0.5543, - "step": 1795 - }, - { - "epoch": 0.06304076940627248, - "grad_norm": 0.857505202293396, - "learning_rate": 2.991666666666667e-05, - "loss": 0.4586, - "step": 1796 - }, - { - "epoch": 0.06307587005738957, - "grad_norm": 0.7262391448020935, - "learning_rate": 2.9933333333333337e-05, - "loss": 0.6009, - "step": 1797 - }, - { - "epoch": 0.06311097070850664, - "grad_norm": 0.8083949089050293, - "learning_rate": 2.995e-05, - "loss": 0.3858, - "step": 1798 - }, - { - "epoch": 0.06314607135962372, - "grad_norm": 0.8829843997955322, - "learning_rate": 2.9966666666666672e-05, - "loss": 0.6711, - "step": 1799 - }, - { - "epoch": 0.0631811720107408, - "grad_norm": 0.9163005352020264, - "learning_rate": 2.9983333333333335e-05, - "loss": 0.569, - "step": 1800 - }, - { - "epoch": 0.06321627266185788, - "grad_norm": 0.9020576477050781, - "learning_rate": 3e-05, - "loss": 0.5529, - "step": 1801 - }, - { - "epoch": 0.06325137331297495, - "grad_norm": 0.7227227091789246, - "learning_rate": 3.001666666666667e-05, - "loss": 0.6152, - "step": 1802 - }, - { - "epoch": 0.06328647396409204, - "grad_norm": 0.8037997484207153, - "learning_rate": 3.0033333333333336e-05, - "loss": 0.5869, - "step": 1803 - }, - { - "epoch": 0.06332157461520911, - "grad_norm": 0.9063264727592468, - "learning_rate": 3.0050000000000002e-05, - "loss": 0.6009, - "step": 1804 - }, - { - "epoch": 0.06335667526632618, - "grad_norm": 0.6953176856040955, - "learning_rate": 3.006666666666667e-05, - "loss": 0.4462, - "step": 1805 - }, - { - "epoch": 0.06339177591744327, - "grad_norm": 0.6950365900993347, - "learning_rate": 3.0083333333333337e-05, - "loss": 0.6337, - "step": 1806 - }, - { - "epoch": 0.06342687656856034, - "grad_norm": 0.6474564075469971, - "learning_rate": 3.01e-05, - "loss": 0.563, - "step": 1807 - }, - { - "epoch": 0.06346197721967743, - "grad_norm": 0.7492498159408569, - "learning_rate": 3.011666666666667e-05, - "loss": 0.5928, - "step": 1808 - }, - { - "epoch": 0.0634970778707945, - "grad_norm": 0.817744791507721, - "learning_rate": 3.0133333333333335e-05, - "loss": 0.5899, - "step": 1809 - }, - { - "epoch": 0.06353217852191158, - "grad_norm": 0.6524613499641418, - "learning_rate": 3.015e-05, - "loss": 0.5579, - "step": 1810 - }, - { - "epoch": 0.06356727917302867, - "grad_norm": 0.6341206431388855, - "learning_rate": 3.016666666666667e-05, - "loss": 0.4951, - "step": 1811 - }, - { - "epoch": 0.06360237982414574, - "grad_norm": 0.7473127245903015, - "learning_rate": 3.0183333333333336e-05, - "loss": 0.5776, - "step": 1812 - }, - { - "epoch": 0.06363748047526281, - "grad_norm": 0.8515803217887878, - "learning_rate": 3.02e-05, - "loss": 0.5759, - "step": 1813 - }, - { - "epoch": 0.0636725811263799, - "grad_norm": 0.8946070671081543, - "learning_rate": 3.0216666666666665e-05, - "loss": 0.6058, - "step": 1814 - }, - { - "epoch": 0.06370768177749697, - "grad_norm": 0.755203127861023, - "learning_rate": 3.0233333333333334e-05, - "loss": 0.6159, - "step": 1815 - }, - { - "epoch": 0.06374278242861404, - "grad_norm": 0.7588019967079163, - "learning_rate": 3.025e-05, - "loss": 0.6283, - "step": 1816 - }, - { - "epoch": 0.06377788307973113, - "grad_norm": 0.7684946060180664, - "learning_rate": 3.0266666666666666e-05, - "loss": 0.4106, - "step": 1817 - }, - { - "epoch": 0.0638129837308482, - "grad_norm": 0.7672116160392761, - "learning_rate": 3.0283333333333335e-05, - "loss": 0.6344, - "step": 1818 - }, - { - "epoch": 0.06384808438196529, - "grad_norm": 0.8291484117507935, - "learning_rate": 3.03e-05, - "loss": 0.4832, - "step": 1819 - }, - { - "epoch": 0.06388318503308237, - "grad_norm": 0.7767817974090576, - "learning_rate": 3.0316666666666664e-05, - "loss": 0.5063, - "step": 1820 - }, - { - "epoch": 0.06391828568419944, - "grad_norm": 0.8940779566764832, - "learning_rate": 3.0333333333333337e-05, - "loss": 0.4509, - "step": 1821 - }, - { - "epoch": 0.06395338633531653, - "grad_norm": 0.8179513216018677, - "learning_rate": 3.035e-05, - "loss": 0.5185, - "step": 1822 - }, - { - "epoch": 0.0639884869864336, - "grad_norm": 0.7610150575637817, - "learning_rate": 3.0366666666666665e-05, - "loss": 0.5145, - "step": 1823 - }, - { - "epoch": 0.06402358763755067, - "grad_norm": 0.7387018203735352, - "learning_rate": 3.0383333333333334e-05, - "loss": 0.4692, - "step": 1824 - }, - { - "epoch": 0.06405868828866776, - "grad_norm": 0.911463737487793, - "learning_rate": 3.04e-05, - "loss": 0.5352, - "step": 1825 - }, - { - "epoch": 0.06409378893978483, - "grad_norm": 0.7533367276191711, - "learning_rate": 3.0416666666666666e-05, - "loss": 0.6119, - "step": 1826 - }, - { - "epoch": 0.0641288895909019, - "grad_norm": 0.6884397864341736, - "learning_rate": 3.0433333333333336e-05, - "loss": 0.5381, - "step": 1827 - }, - { - "epoch": 0.06416399024201899, - "grad_norm": 0.8816990256309509, - "learning_rate": 3.045e-05, - "loss": 0.6114, - "step": 1828 - }, - { - "epoch": 0.06419909089313607, - "grad_norm": 0.7746396660804749, - "learning_rate": 3.0466666666666664e-05, - "loss": 0.4192, - "step": 1829 - }, - { - "epoch": 0.06423419154425315, - "grad_norm": 0.784048855304718, - "learning_rate": 3.0483333333333337e-05, - "loss": 0.5495, - "step": 1830 - }, - { - "epoch": 0.06426929219537023, - "grad_norm": 0.8856515884399414, - "learning_rate": 3.05e-05, - "loss": 0.5744, - "step": 1831 - }, - { - "epoch": 0.0643043928464873, - "grad_norm": 0.8966047763824463, - "learning_rate": 3.0516666666666665e-05, - "loss": 0.3864, - "step": 1832 - }, - { - "epoch": 0.06433949349760439, - "grad_norm": 1.1532541513442993, - "learning_rate": 3.0533333333333335e-05, - "loss": 0.647, - "step": 1833 - }, - { - "epoch": 0.06437459414872146, - "grad_norm": 0.8023368120193481, - "learning_rate": 3.0550000000000004e-05, - "loss": 0.5825, - "step": 1834 - }, - { - "epoch": 0.06440969479983853, - "grad_norm": 0.8466277718544006, - "learning_rate": 3.0566666666666667e-05, - "loss": 0.6051, - "step": 1835 - }, - { - "epoch": 0.06444479545095562, - "grad_norm": 1.0002416372299194, - "learning_rate": 3.0583333333333336e-05, - "loss": 0.5872, - "step": 1836 - }, - { - "epoch": 0.06447989610207269, - "grad_norm": 1.0670493841171265, - "learning_rate": 3.06e-05, - "loss": 0.5781, - "step": 1837 - }, - { - "epoch": 0.06451499675318977, - "grad_norm": 0.9404459595680237, - "learning_rate": 3.061666666666667e-05, - "loss": 0.5281, - "step": 1838 - }, - { - "epoch": 0.06455009740430685, - "grad_norm": 0.9593863487243652, - "learning_rate": 3.063333333333334e-05, - "loss": 0.6327, - "step": 1839 - }, - { - "epoch": 0.06458519805542393, - "grad_norm": 0.9086483120918274, - "learning_rate": 3.065e-05, - "loss": 0.6305, - "step": 1840 - }, - { - "epoch": 0.06462029870654101, - "grad_norm": 0.8836398720741272, - "learning_rate": 3.066666666666667e-05, - "loss": 0.637, - "step": 1841 - }, - { - "epoch": 0.06465539935765809, - "grad_norm": 0.8199813961982727, - "learning_rate": 3.068333333333334e-05, - "loss": 0.4688, - "step": 1842 - }, - { - "epoch": 0.06469050000877516, - "grad_norm": 0.7582228779792786, - "learning_rate": 3.07e-05, - "loss": 0.5194, - "step": 1843 - }, - { - "epoch": 0.06472560065989225, - "grad_norm": 0.8415733575820923, - "learning_rate": 3.0716666666666663e-05, - "loss": 0.6395, - "step": 1844 - }, - { - "epoch": 0.06476070131100932, - "grad_norm": 0.8264052271842957, - "learning_rate": 3.073333333333334e-05, - "loss": 0.7126, - "step": 1845 - }, - { - "epoch": 0.06479580196212639, - "grad_norm": 0.7457237839698792, - "learning_rate": 3.075e-05, - "loss": 0.5919, - "step": 1846 - }, - { - "epoch": 0.06483090261324348, - "grad_norm": 0.6747273802757263, - "learning_rate": 3.0766666666666665e-05, - "loss": 0.5318, - "step": 1847 - }, - { - "epoch": 0.06486600326436055, - "grad_norm": 0.8826310634613037, - "learning_rate": 3.0783333333333334e-05, - "loss": 0.4943, - "step": 1848 - }, - { - "epoch": 0.06490110391547763, - "grad_norm": 0.6286559700965881, - "learning_rate": 3.08e-05, - "loss": 0.5602, - "step": 1849 - }, - { - "epoch": 0.06493620456659471, - "grad_norm": 0.8328421711921692, - "learning_rate": 3.0816666666666666e-05, - "loss": 0.6111, - "step": 1850 - }, - { - "epoch": 0.06497130521771179, - "grad_norm": 0.7489180564880371, - "learning_rate": 3.0833333333333335e-05, - "loss": 0.4415, - "step": 1851 - }, - { - "epoch": 0.06500640586882887, - "grad_norm": 0.7744174003601074, - "learning_rate": 3.0850000000000004e-05, - "loss": 0.5733, - "step": 1852 - }, - { - "epoch": 0.06504150651994595, - "grad_norm": 0.6597766280174255, - "learning_rate": 3.086666666666667e-05, - "loss": 0.4803, - "step": 1853 - }, - { - "epoch": 0.06507660717106302, - "grad_norm": 0.6211796998977661, - "learning_rate": 3.0883333333333336e-05, - "loss": 0.5375, - "step": 1854 - }, - { - "epoch": 0.0651117078221801, - "grad_norm": 0.908808708190918, - "learning_rate": 3.09e-05, - "loss": 0.6193, - "step": 1855 - }, - { - "epoch": 0.06514680847329718, - "grad_norm": 0.7301246523857117, - "learning_rate": 3.091666666666667e-05, - "loss": 0.5148, - "step": 1856 - }, - { - "epoch": 0.06518190912441425, - "grad_norm": 0.7789900302886963, - "learning_rate": 3.093333333333334e-05, - "loss": 0.6269, - "step": 1857 - }, - { - "epoch": 0.06521700977553134, - "grad_norm": 0.9326147437095642, - "learning_rate": 3.095e-05, - "loss": 0.611, - "step": 1858 - }, - { - "epoch": 0.06525211042664841, - "grad_norm": 0.8848814368247986, - "learning_rate": 3.096666666666666e-05, - "loss": 0.4946, - "step": 1859 - }, - { - "epoch": 0.06528721107776549, - "grad_norm": 0.8850508332252502, - "learning_rate": 3.098333333333334e-05, - "loss": 0.5867, - "step": 1860 - }, - { - "epoch": 0.06532231172888257, - "grad_norm": 0.763200044631958, - "learning_rate": 3.1e-05, - "loss": 0.633, - "step": 1861 - }, - { - "epoch": 0.06535741237999965, - "grad_norm": 0.7311384677886963, - "learning_rate": 3.1016666666666664e-05, - "loss": 0.6011, - "step": 1862 - }, - { - "epoch": 0.06539251303111673, - "grad_norm": 0.8791167140007019, - "learning_rate": 3.103333333333333e-05, - "loss": 0.5545, - "step": 1863 - }, - { - "epoch": 0.0654276136822338, - "grad_norm": 0.7872679829597473, - "learning_rate": 3.105e-05, - "loss": 0.5107, - "step": 1864 - }, - { - "epoch": 0.06546271433335088, - "grad_norm": 0.8561465740203857, - "learning_rate": 3.1066666666666665e-05, - "loss": 0.4955, - "step": 1865 - }, - { - "epoch": 0.06549781498446797, - "grad_norm": 0.7971600294113159, - "learning_rate": 3.1083333333333334e-05, - "loss": 0.5614, - "step": 1866 - }, - { - "epoch": 0.06553291563558504, - "grad_norm": 0.8307240605354309, - "learning_rate": 3.1100000000000004e-05, - "loss": 0.5012, - "step": 1867 - }, - { - "epoch": 0.06556801628670211, - "grad_norm": 0.7176113128662109, - "learning_rate": 3.1116666666666666e-05, - "loss": 0.5266, - "step": 1868 - }, - { - "epoch": 0.0656031169378192, - "grad_norm": 0.7387281656265259, - "learning_rate": 3.1133333333333336e-05, - "loss": 0.6088, - "step": 1869 - }, - { - "epoch": 0.06563821758893627, - "grad_norm": 0.8318613171577454, - "learning_rate": 3.115e-05, - "loss": 0.525, - "step": 1870 - }, - { - "epoch": 0.06567331824005335, - "grad_norm": 0.7993846535682678, - "learning_rate": 3.116666666666667e-05, - "loss": 0.4761, - "step": 1871 - }, - { - "epoch": 0.06570841889117043, - "grad_norm": 0.8571208119392395, - "learning_rate": 3.118333333333334e-05, - "loss": 0.5535, - "step": 1872 - }, - { - "epoch": 0.0657435195422875, - "grad_norm": 0.7446001768112183, - "learning_rate": 3.12e-05, - "loss": 0.6156, - "step": 1873 - }, - { - "epoch": 0.0657786201934046, - "grad_norm": 0.8127099871635437, - "learning_rate": 3.121666666666667e-05, - "loss": 0.6283, - "step": 1874 - }, - { - "epoch": 0.06581372084452167, - "grad_norm": 0.8139795660972595, - "learning_rate": 3.123333333333334e-05, - "loss": 0.635, - "step": 1875 - }, - { - "epoch": 0.06584882149563874, - "grad_norm": 0.7437220811843872, - "learning_rate": 3.125e-05, - "loss": 0.6302, - "step": 1876 - }, - { - "epoch": 0.06588392214675583, - "grad_norm": 0.7806976437568665, - "learning_rate": 3.126666666666666e-05, - "loss": 0.5166, - "step": 1877 - }, - { - "epoch": 0.0659190227978729, - "grad_norm": 0.8086317777633667, - "learning_rate": 3.128333333333334e-05, - "loss": 0.6596, - "step": 1878 - }, - { - "epoch": 0.06595412344898997, - "grad_norm": 0.8119486570358276, - "learning_rate": 3.13e-05, - "loss": 0.6358, - "step": 1879 - }, - { - "epoch": 0.06598922410010706, - "grad_norm": 0.7849818468093872, - "learning_rate": 3.1316666666666664e-05, - "loss": 0.6424, - "step": 1880 - }, - { - "epoch": 0.06602432475122413, - "grad_norm": 0.7665835022926331, - "learning_rate": 3.1333333333333334e-05, - "loss": 0.4543, - "step": 1881 - }, - { - "epoch": 0.06605942540234121, - "grad_norm": 0.7948185205459595, - "learning_rate": 3.135e-05, - "loss": 0.5326, - "step": 1882 - }, - { - "epoch": 0.0660945260534583, - "grad_norm": 0.7795671224594116, - "learning_rate": 3.1366666666666666e-05, - "loss": 0.5483, - "step": 1883 - }, - { - "epoch": 0.06612962670457537, - "grad_norm": 0.7621694803237915, - "learning_rate": 3.1383333333333335e-05, - "loss": 0.6119, - "step": 1884 - }, - { - "epoch": 0.06616472735569245, - "grad_norm": 0.7098021507263184, - "learning_rate": 3.1400000000000004e-05, - "loss": 0.5035, - "step": 1885 - }, - { - "epoch": 0.06619982800680953, - "grad_norm": 0.7221153378486633, - "learning_rate": 3.141666666666667e-05, - "loss": 0.5986, - "step": 1886 - }, - { - "epoch": 0.0662349286579266, - "grad_norm": 1.0827208757400513, - "learning_rate": 3.1433333333333336e-05, - "loss": 0.6768, - "step": 1887 - }, - { - "epoch": 0.06627002930904369, - "grad_norm": 0.9476728439331055, - "learning_rate": 3.145e-05, - "loss": 0.5755, - "step": 1888 - }, - { - "epoch": 0.06630512996016076, - "grad_norm": 0.8737848401069641, - "learning_rate": 3.146666666666667e-05, - "loss": 0.5749, - "step": 1889 - }, - { - "epoch": 0.06634023061127783, - "grad_norm": 0.8301693201065063, - "learning_rate": 3.148333333333334e-05, - "loss": 0.5363, - "step": 1890 - }, - { - "epoch": 0.06637533126239492, - "grad_norm": 0.8370927572250366, - "learning_rate": 3.15e-05, - "loss": 0.6555, - "step": 1891 - }, - { - "epoch": 0.066410431913512, - "grad_norm": 0.8028042316436768, - "learning_rate": 3.151666666666667e-05, - "loss": 0.5612, - "step": 1892 - }, - { - "epoch": 0.06644553256462907, - "grad_norm": 0.7858577370643616, - "learning_rate": 3.153333333333334e-05, - "loss": 0.5633, - "step": 1893 - }, - { - "epoch": 0.06648063321574615, - "grad_norm": 0.9522932171821594, - "learning_rate": 3.155e-05, - "loss": 0.5544, - "step": 1894 - }, - { - "epoch": 0.06651573386686323, - "grad_norm": 0.9142866730690002, - "learning_rate": 3.1566666666666664e-05, - "loss": 0.4062, - "step": 1895 - }, - { - "epoch": 0.06655083451798031, - "grad_norm": 0.9126763939857483, - "learning_rate": 3.158333333333334e-05, - "loss": 0.61, - "step": 1896 - }, - { - "epoch": 0.06658593516909739, - "grad_norm": 0.736941397190094, - "learning_rate": 3.16e-05, - "loss": 0.5513, - "step": 1897 - }, - { - "epoch": 0.06662103582021446, - "grad_norm": 0.7622309327125549, - "learning_rate": 3.1616666666666665e-05, - "loss": 0.6328, - "step": 1898 - }, - { - "epoch": 0.06665613647133155, - "grad_norm": 0.8357309699058533, - "learning_rate": 3.1633333333333334e-05, - "loss": 0.6073, - "step": 1899 - }, - { - "epoch": 0.06669123712244862, - "grad_norm": 0.7216735482215881, - "learning_rate": 3.1650000000000004e-05, - "loss": 0.4774, - "step": 1900 - }, - { - "epoch": 0.0667263377735657, - "grad_norm": 0.7899258136749268, - "learning_rate": 3.1666666666666666e-05, - "loss": 0.5671, - "step": 1901 - }, - { - "epoch": 0.06676143842468278, - "grad_norm": 0.9340733885765076, - "learning_rate": 3.1683333333333335e-05, - "loss": 0.6814, - "step": 1902 - }, - { - "epoch": 0.06679653907579985, - "grad_norm": 1.0456535816192627, - "learning_rate": 3.1700000000000005e-05, - "loss": 0.6719, - "step": 1903 - }, - { - "epoch": 0.06683163972691693, - "grad_norm": 0.6841208934783936, - "learning_rate": 3.171666666666667e-05, - "loss": 0.4332, - "step": 1904 - }, - { - "epoch": 0.06686674037803401, - "grad_norm": 0.8201801776885986, - "learning_rate": 3.173333333333334e-05, - "loss": 0.6006, - "step": 1905 - }, - { - "epoch": 0.06690184102915109, - "grad_norm": 0.7807548642158508, - "learning_rate": 3.175e-05, - "loss": 0.4165, - "step": 1906 - }, - { - "epoch": 0.06693694168026817, - "grad_norm": 0.8358427882194519, - "learning_rate": 3.176666666666667e-05, - "loss": 0.58, - "step": 1907 - }, - { - "epoch": 0.06697204233138525, - "grad_norm": 0.8372209072113037, - "learning_rate": 3.178333333333334e-05, - "loss": 0.6462, - "step": 1908 - }, - { - "epoch": 0.06700714298250232, - "grad_norm": 0.7869482040405273, - "learning_rate": 3.18e-05, - "loss": 0.6532, - "step": 1909 - }, - { - "epoch": 0.06704224363361941, - "grad_norm": 1.0508146286010742, - "learning_rate": 3.181666666666667e-05, - "loss": 0.4818, - "step": 1910 - }, - { - "epoch": 0.06707734428473648, - "grad_norm": 0.8419238924980164, - "learning_rate": 3.183333333333334e-05, - "loss": 0.6489, - "step": 1911 - }, - { - "epoch": 0.06711244493585355, - "grad_norm": 0.767579972743988, - "learning_rate": 3.185e-05, - "loss": 0.5389, - "step": 1912 - }, - { - "epoch": 0.06714754558697064, - "grad_norm": 0.8072236776351929, - "learning_rate": 3.1866666666666664e-05, - "loss": 0.5489, - "step": 1913 - }, - { - "epoch": 0.06718264623808771, - "grad_norm": 0.9269421696662903, - "learning_rate": 3.188333333333334e-05, - "loss": 0.5634, - "step": 1914 - }, - { - "epoch": 0.06721774688920479, - "grad_norm": 1.2076356410980225, - "learning_rate": 3.19e-05, - "loss": 0.6821, - "step": 1915 - }, - { - "epoch": 0.06725284754032188, - "grad_norm": 0.9056288003921509, - "learning_rate": 3.1916666666666665e-05, - "loss": 0.5864, - "step": 1916 - }, - { - "epoch": 0.06728794819143895, - "grad_norm": 0.7668805718421936, - "learning_rate": 3.1933333333333335e-05, - "loss": 0.5696, - "step": 1917 - }, - { - "epoch": 0.06732304884255604, - "grad_norm": 0.915939450263977, - "learning_rate": 3.1950000000000004e-05, - "loss": 0.5844, - "step": 1918 - }, - { - "epoch": 0.06735814949367311, - "grad_norm": 0.8159973621368408, - "learning_rate": 3.196666666666667e-05, - "loss": 0.5119, - "step": 1919 - }, - { - "epoch": 0.06739325014479018, - "grad_norm": 0.6929640769958496, - "learning_rate": 3.1983333333333336e-05, - "loss": 0.6286, - "step": 1920 - }, - { - "epoch": 0.06742835079590727, - "grad_norm": 0.7779014110565186, - "learning_rate": 3.2000000000000005e-05, - "loss": 0.5678, - "step": 1921 - }, - { - "epoch": 0.06746345144702434, - "grad_norm": 0.9116969704627991, - "learning_rate": 3.201666666666667e-05, - "loss": 0.6441, - "step": 1922 - }, - { - "epoch": 0.06749855209814141, - "grad_norm": 0.7807540893554688, - "learning_rate": 3.203333333333334e-05, - "loss": 0.6633, - "step": 1923 - }, - { - "epoch": 0.0675336527492585, - "grad_norm": 0.744918942451477, - "learning_rate": 3.205e-05, - "loss": 0.6455, - "step": 1924 - }, - { - "epoch": 0.06756875340037558, - "grad_norm": 0.7129676938056946, - "learning_rate": 3.206666666666667e-05, - "loss": 0.4902, - "step": 1925 - }, - { - "epoch": 0.06760385405149265, - "grad_norm": 0.8227726817131042, - "learning_rate": 3.208333333333334e-05, - "loss": 0.57, - "step": 1926 - }, - { - "epoch": 0.06763895470260974, - "grad_norm": 0.7451854348182678, - "learning_rate": 3.21e-05, - "loss": 0.6318, - "step": 1927 - }, - { - "epoch": 0.06767405535372681, - "grad_norm": 0.8081383109092712, - "learning_rate": 3.211666666666667e-05, - "loss": 0.6222, - "step": 1928 - }, - { - "epoch": 0.0677091560048439, - "grad_norm": 0.9103675484657288, - "learning_rate": 3.213333333333334e-05, - "loss": 0.6039, - "step": 1929 - }, - { - "epoch": 0.06774425665596097, - "grad_norm": 0.8621140718460083, - "learning_rate": 3.215e-05, - "loss": 0.5101, - "step": 1930 - }, - { - "epoch": 0.06777935730707804, - "grad_norm": 0.7594343423843384, - "learning_rate": 3.2166666666666665e-05, - "loss": 0.4955, - "step": 1931 - }, - { - "epoch": 0.06781445795819513, - "grad_norm": 0.791481077671051, - "learning_rate": 3.218333333333334e-05, - "loss": 0.6543, - "step": 1932 - }, - { - "epoch": 0.0678495586093122, - "grad_norm": 1.0861701965332031, - "learning_rate": 3.2200000000000003e-05, - "loss": 0.5558, - "step": 1933 - }, - { - "epoch": 0.06788465926042928, - "grad_norm": 0.8678163886070251, - "learning_rate": 3.2216666666666666e-05, - "loss": 0.5311, - "step": 1934 - }, - { - "epoch": 0.06791975991154636, - "grad_norm": 0.8307566046714783, - "learning_rate": 3.2233333333333335e-05, - "loss": 0.6258, - "step": 1935 - }, - { - "epoch": 0.06795486056266344, - "grad_norm": 0.850312352180481, - "learning_rate": 3.2250000000000005e-05, - "loss": 0.6235, - "step": 1936 - }, - { - "epoch": 0.06798996121378051, - "grad_norm": 0.6883727312088013, - "learning_rate": 3.226666666666667e-05, - "loss": 0.6288, - "step": 1937 - }, - { - "epoch": 0.0680250618648976, - "grad_norm": 0.6939675211906433, - "learning_rate": 3.2283333333333337e-05, - "loss": 0.6205, - "step": 1938 - }, - { - "epoch": 0.06806016251601467, - "grad_norm": 0.7434590458869934, - "learning_rate": 3.2300000000000006e-05, - "loss": 0.5353, - "step": 1939 - }, - { - "epoch": 0.06809526316713176, - "grad_norm": 0.895688533782959, - "learning_rate": 3.231666666666667e-05, - "loss": 0.4819, - "step": 1940 - }, - { - "epoch": 0.06813036381824883, - "grad_norm": 0.9775886535644531, - "learning_rate": 3.233333333333333e-05, - "loss": 0.6985, - "step": 1941 - }, - { - "epoch": 0.0681654644693659, - "grad_norm": 0.8531865477561951, - "learning_rate": 3.235e-05, - "loss": 0.4684, - "step": 1942 - }, - { - "epoch": 0.06820056512048299, - "grad_norm": 0.861181378364563, - "learning_rate": 3.236666666666667e-05, - "loss": 0.6122, - "step": 1943 - }, - { - "epoch": 0.06823566577160006, - "grad_norm": 0.8767728805541992, - "learning_rate": 3.238333333333333e-05, - "loss": 0.5318, - "step": 1944 - }, - { - "epoch": 0.06827076642271714, - "grad_norm": 0.943699061870575, - "learning_rate": 3.24e-05, - "loss": 0.5339, - "step": 1945 - }, - { - "epoch": 0.06830586707383422, - "grad_norm": 0.661409854888916, - "learning_rate": 3.2416666666666664e-05, - "loss": 0.5726, - "step": 1946 - }, - { - "epoch": 0.0683409677249513, - "grad_norm": 0.7223817110061646, - "learning_rate": 3.243333333333333e-05, - "loss": 0.5794, - "step": 1947 - }, - { - "epoch": 0.06837606837606838, - "grad_norm": 0.8460460305213928, - "learning_rate": 3.245e-05, - "loss": 0.4863, - "step": 1948 - }, - { - "epoch": 0.06841116902718546, - "grad_norm": 0.8054212927818298, - "learning_rate": 3.2466666666666665e-05, - "loss": 0.6597, - "step": 1949 - }, - { - "epoch": 0.06844626967830253, - "grad_norm": 0.7382675409317017, - "learning_rate": 3.2483333333333335e-05, - "loss": 0.5765, - "step": 1950 - }, - { - "epoch": 0.06848137032941962, - "grad_norm": 0.7212059497833252, - "learning_rate": 3.2500000000000004e-05, - "loss": 0.5732, - "step": 1951 - }, - { - "epoch": 0.06851647098053669, - "grad_norm": 0.9416758418083191, - "learning_rate": 3.2516666666666666e-05, - "loss": 0.5252, - "step": 1952 - }, - { - "epoch": 0.06855157163165376, - "grad_norm": 0.6506751179695129, - "learning_rate": 3.253333333333333e-05, - "loss": 0.5094, - "step": 1953 - }, - { - "epoch": 0.06858667228277085, - "grad_norm": 0.8954200744628906, - "learning_rate": 3.2550000000000005e-05, - "loss": 0.5857, - "step": 1954 - }, - { - "epoch": 0.06862177293388792, - "grad_norm": 0.8127887845039368, - "learning_rate": 3.256666666666667e-05, - "loss": 0.5523, - "step": 1955 - }, - { - "epoch": 0.068656873585005, - "grad_norm": 0.8125292062759399, - "learning_rate": 3.258333333333333e-05, - "loss": 0.4646, - "step": 1956 - }, - { - "epoch": 0.06869197423612208, - "grad_norm": 0.7837854027748108, - "learning_rate": 3.26e-05, - "loss": 0.5332, - "step": 1957 - }, - { - "epoch": 0.06872707488723916, - "grad_norm": 0.8488165736198425, - "learning_rate": 3.261666666666667e-05, - "loss": 0.5591, - "step": 1958 - }, - { - "epoch": 0.06876217553835624, - "grad_norm": 0.8304761648178101, - "learning_rate": 3.263333333333333e-05, - "loss": 0.5806, - "step": 1959 - }, - { - "epoch": 0.06879727618947332, - "grad_norm": 0.9049697518348694, - "learning_rate": 3.265e-05, - "loss": 0.5172, - "step": 1960 - }, - { - "epoch": 0.06883237684059039, - "grad_norm": 0.8829930424690247, - "learning_rate": 3.266666666666667e-05, - "loss": 0.5824, - "step": 1961 - }, - { - "epoch": 0.06886747749170748, - "grad_norm": 0.6192549467086792, - "learning_rate": 3.268333333333333e-05, - "loss": 0.5379, - "step": 1962 - }, - { - "epoch": 0.06890257814282455, - "grad_norm": 0.837633490562439, - "learning_rate": 3.27e-05, - "loss": 0.6367, - "step": 1963 - }, - { - "epoch": 0.06893767879394162, - "grad_norm": 0.7671380639076233, - "learning_rate": 3.2716666666666665e-05, - "loss": 0.5044, - "step": 1964 - }, - { - "epoch": 0.06897277944505871, - "grad_norm": 0.8990729451179504, - "learning_rate": 3.2733333333333334e-05, - "loss": 0.5756, - "step": 1965 - }, - { - "epoch": 0.06900788009617578, - "grad_norm": 0.6952524781227112, - "learning_rate": 3.275e-05, - "loss": 0.4545, - "step": 1966 - }, - { - "epoch": 0.06904298074729286, - "grad_norm": 0.7732763290405273, - "learning_rate": 3.2766666666666666e-05, - "loss": 0.5046, - "step": 1967 - }, - { - "epoch": 0.06907808139840994, - "grad_norm": 1.0858538150787354, - "learning_rate": 3.2783333333333335e-05, - "loss": 0.6112, - "step": 1968 - }, - { - "epoch": 0.06911318204952702, - "grad_norm": 0.8591503500938416, - "learning_rate": 3.2800000000000004e-05, - "loss": 0.487, - "step": 1969 - }, - { - "epoch": 0.0691482827006441, - "grad_norm": 0.7938820719718933, - "learning_rate": 3.281666666666667e-05, - "loss": 0.5326, - "step": 1970 - }, - { - "epoch": 0.06918338335176118, - "grad_norm": 0.8219045996665955, - "learning_rate": 3.283333333333333e-05, - "loss": 0.5366, - "step": 1971 - }, - { - "epoch": 0.06921848400287825, - "grad_norm": 0.7894295454025269, - "learning_rate": 3.2850000000000006e-05, - "loss": 0.6232, - "step": 1972 - }, - { - "epoch": 0.06925358465399534, - "grad_norm": 0.7418105006217957, - "learning_rate": 3.286666666666667e-05, - "loss": 0.5232, - "step": 1973 - }, - { - "epoch": 0.06928868530511241, - "grad_norm": 0.7490347027778625, - "learning_rate": 3.288333333333333e-05, - "loss": 0.5691, - "step": 1974 - }, - { - "epoch": 0.06932378595622948, - "grad_norm": 0.7386009097099304, - "learning_rate": 3.29e-05, - "loss": 0.4932, - "step": 1975 - }, - { - "epoch": 0.06935888660734657, - "grad_norm": 0.7403868436813354, - "learning_rate": 3.291666666666667e-05, - "loss": 0.5427, - "step": 1976 - }, - { - "epoch": 0.06939398725846364, - "grad_norm": 0.8204106092453003, - "learning_rate": 3.293333333333333e-05, - "loss": 0.657, - "step": 1977 - }, - { - "epoch": 0.06942908790958072, - "grad_norm": 0.9087108969688416, - "learning_rate": 3.295e-05, - "loss": 0.6642, - "step": 1978 - }, - { - "epoch": 0.0694641885606978, - "grad_norm": 0.837213397026062, - "learning_rate": 3.296666666666667e-05, - "loss": 0.6254, - "step": 1979 - }, - { - "epoch": 0.06949928921181488, - "grad_norm": 0.7432148456573486, - "learning_rate": 3.298333333333333e-05, - "loss": 0.5907, - "step": 1980 - }, - { - "epoch": 0.06953438986293196, - "grad_norm": 0.6581439971923828, - "learning_rate": 3.3e-05, - "loss": 0.6546, - "step": 1981 - }, - { - "epoch": 0.06956949051404904, - "grad_norm": 0.9265547394752502, - "learning_rate": 3.3016666666666665e-05, - "loss": 0.632, - "step": 1982 - }, - { - "epoch": 0.06960459116516611, - "grad_norm": 0.6786394715309143, - "learning_rate": 3.3033333333333334e-05, - "loss": 0.5785, - "step": 1983 - }, - { - "epoch": 0.0696396918162832, - "grad_norm": 0.8280029296875, - "learning_rate": 3.3050000000000004e-05, - "loss": 0.5846, - "step": 1984 - }, - { - "epoch": 0.06967479246740027, - "grad_norm": 0.7738841772079468, - "learning_rate": 3.3066666666666666e-05, - "loss": 0.6384, - "step": 1985 - }, - { - "epoch": 0.06970989311851734, - "grad_norm": 0.7423191666603088, - "learning_rate": 3.3083333333333336e-05, - "loss": 0.5848, - "step": 1986 - }, - { - "epoch": 0.06974499376963443, - "grad_norm": 0.7991020083427429, - "learning_rate": 3.3100000000000005e-05, - "loss": 0.5318, - "step": 1987 - }, - { - "epoch": 0.0697800944207515, - "grad_norm": 0.7569445967674255, - "learning_rate": 3.311666666666667e-05, - "loss": 0.5165, - "step": 1988 - }, - { - "epoch": 0.06981519507186858, - "grad_norm": 0.8478513360023499, - "learning_rate": 3.313333333333333e-05, - "loss": 0.5954, - "step": 1989 - }, - { - "epoch": 0.06985029572298566, - "grad_norm": 0.6857832670211792, - "learning_rate": 3.3150000000000006e-05, - "loss": 0.4983, - "step": 1990 - }, - { - "epoch": 0.06988539637410274, - "grad_norm": 0.9485779404640198, - "learning_rate": 3.316666666666667e-05, - "loss": 0.5532, - "step": 1991 - }, - { - "epoch": 0.06992049702521982, - "grad_norm": 0.6793798804283142, - "learning_rate": 3.318333333333333e-05, - "loss": 0.5208, - "step": 1992 - }, - { - "epoch": 0.0699555976763369, - "grad_norm": 0.9474567174911499, - "learning_rate": 3.32e-05, - "loss": 0.5715, - "step": 1993 - }, - { - "epoch": 0.06999069832745397, - "grad_norm": 1.0585821866989136, - "learning_rate": 3.321666666666667e-05, - "loss": 0.7064, - "step": 1994 - }, - { - "epoch": 0.07002579897857106, - "grad_norm": 1.0955852270126343, - "learning_rate": 3.323333333333333e-05, - "loss": 0.6431, - "step": 1995 - }, - { - "epoch": 0.07006089962968813, - "grad_norm": 0.6483298540115356, - "learning_rate": 3.325e-05, - "loss": 0.5455, - "step": 1996 - }, - { - "epoch": 0.0700960002808052, - "grad_norm": 0.9294613599777222, - "learning_rate": 3.326666666666667e-05, - "loss": 0.5843, - "step": 1997 - }, - { - "epoch": 0.07013110093192229, - "grad_norm": 0.8238900303840637, - "learning_rate": 3.3283333333333334e-05, - "loss": 0.57, - "step": 1998 - }, - { - "epoch": 0.07016620158303936, - "grad_norm": 0.7127041220664978, - "learning_rate": 3.33e-05, - "loss": 0.5394, - "step": 1999 - }, - { - "epoch": 0.07020130223415644, - "grad_norm": 0.7443059682846069, - "learning_rate": 3.3316666666666666e-05, - "loss": 0.6399, - "step": 2000 - }, - { - "epoch": 0.07023640288527352, - "grad_norm": 0.9109698534011841, - "learning_rate": 3.3333333333333335e-05, - "loss": 0.6596, - "step": 2001 - }, - { - "epoch": 0.0702715035363906, - "grad_norm": 1.0399954319000244, - "learning_rate": 3.3350000000000004e-05, - "loss": 0.5941, - "step": 2002 - }, - { - "epoch": 0.07030660418750768, - "grad_norm": 0.6853341460227966, - "learning_rate": 3.336666666666667e-05, - "loss": 0.5742, - "step": 2003 - }, - { - "epoch": 0.07034170483862476, - "grad_norm": 0.7907604575157166, - "learning_rate": 3.3383333333333336e-05, - "loss": 0.5137, - "step": 2004 - }, - { - "epoch": 0.07037680548974183, - "grad_norm": 0.9720898270606995, - "learning_rate": 3.3400000000000005e-05, - "loss": 0.6585, - "step": 2005 - }, - { - "epoch": 0.07041190614085892, - "grad_norm": 0.9500999450683594, - "learning_rate": 3.341666666666667e-05, - "loss": 0.5758, - "step": 2006 - }, - { - "epoch": 0.07044700679197599, - "grad_norm": 0.7743890881538391, - "learning_rate": 3.343333333333333e-05, - "loss": 0.5782, - "step": 2007 - }, - { - "epoch": 0.07048210744309306, - "grad_norm": 0.8283418416976929, - "learning_rate": 3.345000000000001e-05, - "loss": 0.4591, - "step": 2008 - }, - { - "epoch": 0.07051720809421015, - "grad_norm": 0.7590608596801758, - "learning_rate": 3.346666666666667e-05, - "loss": 0.5768, - "step": 2009 - }, - { - "epoch": 0.07055230874532722, - "grad_norm": 0.9352790117263794, - "learning_rate": 3.348333333333333e-05, - "loss": 0.5389, - "step": 2010 - }, - { - "epoch": 0.0705874093964443, - "grad_norm": 0.7843517661094666, - "learning_rate": 3.35e-05, - "loss": 0.5398, - "step": 2011 - }, - { - "epoch": 0.07062251004756138, - "grad_norm": 0.7295296788215637, - "learning_rate": 3.351666666666667e-05, - "loss": 0.4898, - "step": 2012 - }, - { - "epoch": 0.07065761069867846, - "grad_norm": 0.6961238980293274, - "learning_rate": 3.353333333333333e-05, - "loss": 0.6428, - "step": 2013 - }, - { - "epoch": 0.07069271134979555, - "grad_norm": 1.0248783826828003, - "learning_rate": 3.355e-05, - "loss": 0.6008, - "step": 2014 - }, - { - "epoch": 0.07072781200091262, - "grad_norm": 1.227441430091858, - "learning_rate": 3.356666666666667e-05, - "loss": 0.598, - "step": 2015 - }, - { - "epoch": 0.07076291265202969, - "grad_norm": 0.8519283533096313, - "learning_rate": 3.3583333333333334e-05, - "loss": 0.5784, - "step": 2016 - }, - { - "epoch": 0.07079801330314678, - "grad_norm": 0.8552387356758118, - "learning_rate": 3.3600000000000004e-05, - "loss": 0.4822, - "step": 2017 - }, - { - "epoch": 0.07083311395426385, - "grad_norm": 0.9949544668197632, - "learning_rate": 3.3616666666666666e-05, - "loss": 0.5516, - "step": 2018 - }, - { - "epoch": 0.07086821460538092, - "grad_norm": 0.7517937421798706, - "learning_rate": 3.3633333333333335e-05, - "loss": 0.5895, - "step": 2019 - }, - { - "epoch": 0.07090331525649801, - "grad_norm": 0.6484950184822083, - "learning_rate": 3.3650000000000005e-05, - "loss": 0.4859, - "step": 2020 - }, - { - "epoch": 0.07093841590761508, - "grad_norm": 0.6981530785560608, - "learning_rate": 3.366666666666667e-05, - "loss": 0.56, - "step": 2021 - }, - { - "epoch": 0.07097351655873216, - "grad_norm": 0.7985936999320984, - "learning_rate": 3.368333333333334e-05, - "loss": 0.619, - "step": 2022 - }, - { - "epoch": 0.07100861720984925, - "grad_norm": 0.7247318029403687, - "learning_rate": 3.3700000000000006e-05, - "loss": 0.4711, - "step": 2023 - }, - { - "epoch": 0.07104371786096632, - "grad_norm": 0.6601803302764893, - "learning_rate": 3.371666666666667e-05, - "loss": 0.4853, - "step": 2024 - }, - { - "epoch": 0.0710788185120834, - "grad_norm": 0.8003655672073364, - "learning_rate": 3.373333333333333e-05, - "loss": 0.6484, - "step": 2025 - }, - { - "epoch": 0.07111391916320048, - "grad_norm": 0.8628774285316467, - "learning_rate": 3.375000000000001e-05, - "loss": 0.6321, - "step": 2026 - }, - { - "epoch": 0.07114901981431755, - "grad_norm": 0.8663702011108398, - "learning_rate": 3.376666666666667e-05, - "loss": 0.5437, - "step": 2027 - }, - { - "epoch": 0.07118412046543464, - "grad_norm": 0.8554595708847046, - "learning_rate": 3.378333333333333e-05, - "loss": 0.5373, - "step": 2028 - }, - { - "epoch": 0.07121922111655171, - "grad_norm": 0.7867055535316467, - "learning_rate": 3.38e-05, - "loss": 0.5983, - "step": 2029 - }, - { - "epoch": 0.07125432176766879, - "grad_norm": 0.9063835144042969, - "learning_rate": 3.381666666666667e-05, - "loss": 0.5526, - "step": 2030 - }, - { - "epoch": 0.07128942241878587, - "grad_norm": 0.8749638795852661, - "learning_rate": 3.3833333333333334e-05, - "loss": 0.6198, - "step": 2031 - }, - { - "epoch": 0.07132452306990295, - "grad_norm": 0.8498993515968323, - "learning_rate": 3.385e-05, - "loss": 0.4558, - "step": 2032 - }, - { - "epoch": 0.07135962372102002, - "grad_norm": 0.7225214838981628, - "learning_rate": 3.3866666666666665e-05, - "loss": 0.6125, - "step": 2033 - }, - { - "epoch": 0.0713947243721371, - "grad_norm": 0.9027663469314575, - "learning_rate": 3.3883333333333335e-05, - "loss": 0.5431, - "step": 2034 - }, - { - "epoch": 0.07142982502325418, - "grad_norm": 0.9803597927093506, - "learning_rate": 3.3900000000000004e-05, - "loss": 0.622, - "step": 2035 - }, - { - "epoch": 0.07146492567437127, - "grad_norm": 0.7759371399879456, - "learning_rate": 3.391666666666667e-05, - "loss": 0.4492, - "step": 2036 - }, - { - "epoch": 0.07150002632548834, - "grad_norm": 0.8563437461853027, - "learning_rate": 3.3933333333333336e-05, - "loss": 0.5046, - "step": 2037 - }, - { - "epoch": 0.07153512697660541, - "grad_norm": 0.7520809173583984, - "learning_rate": 3.3950000000000005e-05, - "loss": 0.3989, - "step": 2038 - }, - { - "epoch": 0.0715702276277225, - "grad_norm": 0.7853280901908875, - "learning_rate": 3.396666666666667e-05, - "loss": 0.6385, - "step": 2039 - }, - { - "epoch": 0.07160532827883957, - "grad_norm": 0.7605171203613281, - "learning_rate": 3.398333333333333e-05, - "loss": 0.6058, - "step": 2040 - }, - { - "epoch": 0.07164042892995665, - "grad_norm": 0.7568932771682739, - "learning_rate": 3.4000000000000007e-05, - "loss": 0.6021, - "step": 2041 - }, - { - "epoch": 0.07167552958107373, - "grad_norm": 0.908478856086731, - "learning_rate": 3.401666666666667e-05, - "loss": 0.5639, - "step": 2042 - }, - { - "epoch": 0.0717106302321908, - "grad_norm": 0.8123347759246826, - "learning_rate": 3.403333333333333e-05, - "loss": 0.5848, - "step": 2043 - }, - { - "epoch": 0.07174573088330788, - "grad_norm": 0.7543589472770691, - "learning_rate": 3.405e-05, - "loss": 0.5708, - "step": 2044 - }, - { - "epoch": 0.07178083153442497, - "grad_norm": 0.7130060195922852, - "learning_rate": 3.406666666666667e-05, - "loss": 0.5564, - "step": 2045 - }, - { - "epoch": 0.07181593218554204, - "grad_norm": 0.859442412853241, - "learning_rate": 3.408333333333333e-05, - "loss": 0.675, - "step": 2046 - }, - { - "epoch": 0.07185103283665913, - "grad_norm": 0.8244993090629578, - "learning_rate": 3.41e-05, - "loss": 0.4957, - "step": 2047 - }, - { - "epoch": 0.0718861334877762, - "grad_norm": 0.8745046854019165, - "learning_rate": 3.411666666666667e-05, - "loss": 0.625, - "step": 2048 - }, - { - "epoch": 0.07192123413889327, - "grad_norm": 0.7713401913642883, - "learning_rate": 3.4133333333333334e-05, - "loss": 0.5427, - "step": 2049 - }, - { - "epoch": 0.07195633479001036, - "grad_norm": 0.6444798707962036, - "learning_rate": 3.415e-05, - "loss": 0.4128, - "step": 2050 - }, - { - "epoch": 0.07199143544112743, - "grad_norm": 0.8196809887886047, - "learning_rate": 3.4166666666666666e-05, - "loss": 0.5719, - "step": 2051 - }, - { - "epoch": 0.0720265360922445, - "grad_norm": 0.9262065291404724, - "learning_rate": 3.4183333333333335e-05, - "loss": 0.6583, - "step": 2052 - }, - { - "epoch": 0.07206163674336159, - "grad_norm": 0.771190881729126, - "learning_rate": 3.4200000000000005e-05, - "loss": 0.4685, - "step": 2053 - }, - { - "epoch": 0.07209673739447867, - "grad_norm": 0.7347545623779297, - "learning_rate": 3.421666666666667e-05, - "loss": 0.4458, - "step": 2054 - }, - { - "epoch": 0.07213183804559574, - "grad_norm": 0.6310091614723206, - "learning_rate": 3.4233333333333336e-05, - "loss": 0.6142, - "step": 2055 - }, - { - "epoch": 0.07216693869671283, - "grad_norm": 0.6907777786254883, - "learning_rate": 3.4250000000000006e-05, - "loss": 0.581, - "step": 2056 - }, - { - "epoch": 0.0722020393478299, - "grad_norm": 0.8350146412849426, - "learning_rate": 3.426666666666667e-05, - "loss": 0.5337, - "step": 2057 - }, - { - "epoch": 0.07223713999894699, - "grad_norm": 0.7044351100921631, - "learning_rate": 3.428333333333333e-05, - "loss": 0.6501, - "step": 2058 - }, - { - "epoch": 0.07227224065006406, - "grad_norm": 0.9584517478942871, - "learning_rate": 3.430000000000001e-05, - "loss": 0.5857, - "step": 2059 - }, - { - "epoch": 0.07230734130118113, - "grad_norm": 0.7614437937736511, - "learning_rate": 3.431666666666667e-05, - "loss": 0.6184, - "step": 2060 - }, - { - "epoch": 0.07234244195229822, - "grad_norm": 0.7863118648529053, - "learning_rate": 3.433333333333333e-05, - "loss": 0.4645, - "step": 2061 - }, - { - "epoch": 0.07237754260341529, - "grad_norm": 0.8786917328834534, - "learning_rate": 3.435e-05, - "loss": 0.519, - "step": 2062 - }, - { - "epoch": 0.07241264325453237, - "grad_norm": 0.6854803562164307, - "learning_rate": 3.436666666666667e-05, - "loss": 0.5213, - "step": 2063 - }, - { - "epoch": 0.07244774390564945, - "grad_norm": 1.1818560361862183, - "learning_rate": 3.438333333333333e-05, - "loss": 0.5751, - "step": 2064 - }, - { - "epoch": 0.07248284455676653, - "grad_norm": 0.9576982855796814, - "learning_rate": 3.4399999999999996e-05, - "loss": 0.6072, - "step": 2065 - }, - { - "epoch": 0.0725179452078836, - "grad_norm": 0.7695790529251099, - "learning_rate": 3.441666666666667e-05, - "loss": 0.5004, - "step": 2066 - }, - { - "epoch": 0.07255304585900069, - "grad_norm": 1.0148605108261108, - "learning_rate": 3.4433333333333335e-05, - "loss": 0.528, - "step": 2067 - }, - { - "epoch": 0.07258814651011776, - "grad_norm": 0.7785366177558899, - "learning_rate": 3.445e-05, - "loss": 0.5887, - "step": 2068 - }, - { - "epoch": 0.07262324716123485, - "grad_norm": 1.1074315309524536, - "learning_rate": 3.4466666666666666e-05, - "loss": 0.4712, - "step": 2069 - }, - { - "epoch": 0.07265834781235192, - "grad_norm": 0.9337692260742188, - "learning_rate": 3.4483333333333336e-05, - "loss": 0.6498, - "step": 2070 - }, - { - "epoch": 0.072693448463469, - "grad_norm": 0.9755414724349976, - "learning_rate": 3.45e-05, - "loss": 0.6558, - "step": 2071 - }, - { - "epoch": 0.07272854911458608, - "grad_norm": 1.0787190198898315, - "learning_rate": 3.451666666666667e-05, - "loss": 0.6811, - "step": 2072 - }, - { - "epoch": 0.07276364976570315, - "grad_norm": 0.5997499227523804, - "learning_rate": 3.453333333333334e-05, - "loss": 0.5581, - "step": 2073 - }, - { - "epoch": 0.07279875041682023, - "grad_norm": 0.8069697618484497, - "learning_rate": 3.455e-05, - "loss": 0.5705, - "step": 2074 - }, - { - "epoch": 0.07283385106793731, - "grad_norm": 1.0357967615127563, - "learning_rate": 3.456666666666667e-05, - "loss": 0.5871, - "step": 2075 - }, - { - "epoch": 0.07286895171905439, - "grad_norm": 0.7173290252685547, - "learning_rate": 3.458333333333333e-05, - "loss": 0.5328, - "step": 2076 - }, - { - "epoch": 0.07290405237017146, - "grad_norm": 0.6961866021156311, - "learning_rate": 3.46e-05, - "loss": 0.4654, - "step": 2077 - }, - { - "epoch": 0.07293915302128855, - "grad_norm": 0.7530010342597961, - "learning_rate": 3.461666666666667e-05, - "loss": 0.5232, - "step": 2078 - }, - { - "epoch": 0.07297425367240562, - "grad_norm": 0.7027971744537354, - "learning_rate": 3.463333333333333e-05, - "loss": 0.5803, - "step": 2079 - }, - { - "epoch": 0.07300935432352271, - "grad_norm": 0.8408620357513428, - "learning_rate": 3.465e-05, - "loss": 0.6066, - "step": 2080 - }, - { - "epoch": 0.07304445497463978, - "grad_norm": 0.7428988814353943, - "learning_rate": 3.466666666666667e-05, - "loss": 0.6052, - "step": 2081 - }, - { - "epoch": 0.07307955562575685, - "grad_norm": 0.6913266181945801, - "learning_rate": 3.4683333333333334e-05, - "loss": 0.5865, - "step": 2082 - }, - { - "epoch": 0.07311465627687394, - "grad_norm": 0.8748499155044556, - "learning_rate": 3.4699999999999996e-05, - "loss": 0.4949, - "step": 2083 - }, - { - "epoch": 0.07314975692799101, - "grad_norm": 0.8470097184181213, - "learning_rate": 3.471666666666667e-05, - "loss": 0.5711, - "step": 2084 - }, - { - "epoch": 0.07318485757910809, - "grad_norm": 0.7705821394920349, - "learning_rate": 3.4733333333333335e-05, - "loss": 0.557, - "step": 2085 - }, - { - "epoch": 0.07321995823022517, - "grad_norm": 0.6817398071289062, - "learning_rate": 3.475e-05, - "loss": 0.4609, - "step": 2086 - }, - { - "epoch": 0.07325505888134225, - "grad_norm": 0.7529863715171814, - "learning_rate": 3.476666666666667e-05, - "loss": 0.5805, - "step": 2087 - }, - { - "epoch": 0.07329015953245932, - "grad_norm": 0.7706541419029236, - "learning_rate": 3.4783333333333336e-05, - "loss": 0.4619, - "step": 2088 - }, - { - "epoch": 0.07332526018357641, - "grad_norm": 0.9617740511894226, - "learning_rate": 3.48e-05, - "loss": 0.6628, - "step": 2089 - }, - { - "epoch": 0.07336036083469348, - "grad_norm": 0.7068691849708557, - "learning_rate": 3.481666666666667e-05, - "loss": 0.539, - "step": 2090 - }, - { - "epoch": 0.07339546148581057, - "grad_norm": 0.7176676988601685, - "learning_rate": 3.483333333333334e-05, - "loss": 0.4985, - "step": 2091 - }, - { - "epoch": 0.07343056213692764, - "grad_norm": 0.6520867943763733, - "learning_rate": 3.485e-05, - "loss": 0.5244, - "step": 2092 - }, - { - "epoch": 0.07346566278804471, - "grad_norm": 0.7456715106964111, - "learning_rate": 3.486666666666667e-05, - "loss": 0.5759, - "step": 2093 - }, - { - "epoch": 0.0735007634391618, - "grad_norm": 0.6622182726860046, - "learning_rate": 3.488333333333333e-05, - "loss": 0.3795, - "step": 2094 - }, - { - "epoch": 0.07353586409027887, - "grad_norm": 0.6189080476760864, - "learning_rate": 3.49e-05, - "loss": 0.528, - "step": 2095 - }, - { - "epoch": 0.07357096474139595, - "grad_norm": 0.809302568435669, - "learning_rate": 3.491666666666667e-05, - "loss": 0.5764, - "step": 2096 - }, - { - "epoch": 0.07360606539251303, - "grad_norm": 0.7465280890464783, - "learning_rate": 3.493333333333333e-05, - "loss": 0.5074, - "step": 2097 - }, - { - "epoch": 0.07364116604363011, - "grad_norm": 0.8541216254234314, - "learning_rate": 3.495e-05, - "loss": 0.6856, - "step": 2098 - }, - { - "epoch": 0.0736762666947472, - "grad_norm": 0.7714167833328247, - "learning_rate": 3.496666666666667e-05, - "loss": 0.5308, - "step": 2099 - }, - { - "epoch": 0.07371136734586427, - "grad_norm": 0.7811244130134583, - "learning_rate": 3.4983333333333334e-05, - "loss": 0.58, - "step": 2100 - }, - { - "epoch": 0.07374646799698134, - "grad_norm": 0.8182535171508789, - "learning_rate": 3.5e-05, - "loss": 0.5986, - "step": 2101 - }, - { - "epoch": 0.07378156864809843, - "grad_norm": 0.8494198322296143, - "learning_rate": 3.501666666666667e-05, - "loss": 0.6197, - "step": 2102 - }, - { - "epoch": 0.0738166692992155, - "grad_norm": 0.8508967757225037, - "learning_rate": 3.5033333333333336e-05, - "loss": 0.6089, - "step": 2103 - }, - { - "epoch": 0.07385176995033257, - "grad_norm": 0.8030626773834229, - "learning_rate": 3.505e-05, - "loss": 0.4714, - "step": 2104 - }, - { - "epoch": 0.07388687060144966, - "grad_norm": 0.7187404632568359, - "learning_rate": 3.506666666666667e-05, - "loss": 0.5252, - "step": 2105 - }, - { - "epoch": 0.07392197125256673, - "grad_norm": 0.7031899690628052, - "learning_rate": 3.508333333333334e-05, - "loss": 0.5545, - "step": 2106 - }, - { - "epoch": 0.07395707190368381, - "grad_norm": 0.7800685167312622, - "learning_rate": 3.51e-05, - "loss": 0.58, - "step": 2107 - }, - { - "epoch": 0.0739921725548009, - "grad_norm": 0.7981323599815369, - "learning_rate": 3.511666666666667e-05, - "loss": 0.545, - "step": 2108 - }, - { - "epoch": 0.07402727320591797, - "grad_norm": 0.879671037197113, - "learning_rate": 3.513333333333334e-05, - "loss": 0.5726, - "step": 2109 - }, - { - "epoch": 0.07406237385703505, - "grad_norm": 0.9708579778671265, - "learning_rate": 3.515e-05, - "loss": 0.5059, - "step": 2110 - }, - { - "epoch": 0.07409747450815213, - "grad_norm": 0.7893533110618591, - "learning_rate": 3.516666666666667e-05, - "loss": 0.6809, - "step": 2111 - }, - { - "epoch": 0.0741325751592692, - "grad_norm": 0.6041672229766846, - "learning_rate": 3.518333333333333e-05, - "loss": 0.6437, - "step": 2112 - }, - { - "epoch": 0.07416767581038629, - "grad_norm": 0.7589817047119141, - "learning_rate": 3.52e-05, - "loss": 0.5075, - "step": 2113 - }, - { - "epoch": 0.07420277646150336, - "grad_norm": 0.8484549522399902, - "learning_rate": 3.521666666666667e-05, - "loss": 0.6007, - "step": 2114 - }, - { - "epoch": 0.07423787711262043, - "grad_norm": 0.7746132016181946, - "learning_rate": 3.5233333333333334e-05, - "loss": 0.5064, - "step": 2115 - }, - { - "epoch": 0.07427297776373752, - "grad_norm": 0.9150781035423279, - "learning_rate": 3.525e-05, - "loss": 0.6053, - "step": 2116 - }, - { - "epoch": 0.0743080784148546, - "grad_norm": 0.9150809645652771, - "learning_rate": 3.526666666666667e-05, - "loss": 0.5716, - "step": 2117 - }, - { - "epoch": 0.07434317906597167, - "grad_norm": 0.856972873210907, - "learning_rate": 3.5283333333333335e-05, - "loss": 0.494, - "step": 2118 - }, - { - "epoch": 0.07437827971708875, - "grad_norm": 0.8792679905891418, - "learning_rate": 3.53e-05, - "loss": 0.4903, - "step": 2119 - }, - { - "epoch": 0.07441338036820583, - "grad_norm": 0.7545060515403748, - "learning_rate": 3.531666666666667e-05, - "loss": 0.5827, - "step": 2120 - }, - { - "epoch": 0.07444848101932292, - "grad_norm": 0.8906727433204651, - "learning_rate": 3.5333333333333336e-05, - "loss": 0.6036, - "step": 2121 - }, - { - "epoch": 0.07448358167043999, - "grad_norm": 0.9564309120178223, - "learning_rate": 3.535e-05, - "loss": 0.5145, - "step": 2122 - }, - { - "epoch": 0.07451868232155706, - "grad_norm": 0.8494366407394409, - "learning_rate": 3.536666666666667e-05, - "loss": 0.6965, - "step": 2123 - }, - { - "epoch": 0.07455378297267415, - "grad_norm": 0.7552554607391357, - "learning_rate": 3.538333333333334e-05, - "loss": 0.5099, - "step": 2124 - }, - { - "epoch": 0.07458888362379122, - "grad_norm": 0.851131021976471, - "learning_rate": 3.54e-05, - "loss": 0.5404, - "step": 2125 - }, - { - "epoch": 0.0746239842749083, - "grad_norm": 0.6921199560165405, - "learning_rate": 3.541666666666667e-05, - "loss": 0.6285, - "step": 2126 - }, - { - "epoch": 0.07465908492602538, - "grad_norm": 0.8893712162971497, - "learning_rate": 3.543333333333333e-05, - "loss": 0.5643, - "step": 2127 - }, - { - "epoch": 0.07469418557714246, - "grad_norm": 0.7482878565788269, - "learning_rate": 3.545e-05, - "loss": 0.4885, - "step": 2128 - }, - { - "epoch": 0.07472928622825953, - "grad_norm": 0.7227964997291565, - "learning_rate": 3.546666666666667e-05, - "loss": 0.6078, - "step": 2129 - }, - { - "epoch": 0.07476438687937662, - "grad_norm": 0.8027764558792114, - "learning_rate": 3.548333333333333e-05, - "loss": 0.5849, - "step": 2130 - }, - { - "epoch": 0.07479948753049369, - "grad_norm": 0.7268373966217041, - "learning_rate": 3.55e-05, - "loss": 0.5306, - "step": 2131 - }, - { - "epoch": 0.07483458818161078, - "grad_norm": 0.9068865776062012, - "learning_rate": 3.551666666666667e-05, - "loss": 0.6059, - "step": 2132 - }, - { - "epoch": 0.07486968883272785, - "grad_norm": 0.8272049427032471, - "learning_rate": 3.5533333333333334e-05, - "loss": 0.5534, - "step": 2133 - }, - { - "epoch": 0.07490478948384492, - "grad_norm": 0.7602357268333435, - "learning_rate": 3.555e-05, - "loss": 0.5272, - "step": 2134 - }, - { - "epoch": 0.07493989013496201, - "grad_norm": 0.8995108008384705, - "learning_rate": 3.556666666666667e-05, - "loss": 0.52, - "step": 2135 - }, - { - "epoch": 0.07497499078607908, - "grad_norm": 0.8890154957771301, - "learning_rate": 3.5583333333333335e-05, - "loss": 0.5498, - "step": 2136 - }, - { - "epoch": 0.07501009143719616, - "grad_norm": 0.7440714240074158, - "learning_rate": 3.56e-05, - "loss": 0.5799, - "step": 2137 - }, - { - "epoch": 0.07504519208831324, - "grad_norm": 0.8879970908164978, - "learning_rate": 3.561666666666667e-05, - "loss": 0.6472, - "step": 2138 - }, - { - "epoch": 0.07508029273943032, - "grad_norm": 0.8416216373443604, - "learning_rate": 3.563333333333334e-05, - "loss": 0.6374, - "step": 2139 - }, - { - "epoch": 0.07511539339054739, - "grad_norm": 0.8991538882255554, - "learning_rate": 3.565e-05, - "loss": 0.5562, - "step": 2140 - }, - { - "epoch": 0.07515049404166448, - "grad_norm": 0.7219961881637573, - "learning_rate": 3.566666666666667e-05, - "loss": 0.478, - "step": 2141 - }, - { - "epoch": 0.07518559469278155, - "grad_norm": 0.7459020018577576, - "learning_rate": 3.568333333333334e-05, - "loss": 0.5468, - "step": 2142 - }, - { - "epoch": 0.07522069534389864, - "grad_norm": 0.6780352592468262, - "learning_rate": 3.57e-05, - "loss": 0.5353, - "step": 2143 - }, - { - "epoch": 0.07525579599501571, - "grad_norm": 0.7579954862594604, - "learning_rate": 3.571666666666667e-05, - "loss": 0.5161, - "step": 2144 - }, - { - "epoch": 0.07529089664613278, - "grad_norm": 0.9592366814613342, - "learning_rate": 3.573333333333333e-05, - "loss": 0.5586, - "step": 2145 - }, - { - "epoch": 0.07532599729724987, - "grad_norm": 0.823238730430603, - "learning_rate": 3.575e-05, - "loss": 0.5893, - "step": 2146 - }, - { - "epoch": 0.07536109794836694, - "grad_norm": 0.882710337638855, - "learning_rate": 3.576666666666667e-05, - "loss": 0.6023, - "step": 2147 - }, - { - "epoch": 0.07539619859948402, - "grad_norm": 0.74812251329422, - "learning_rate": 3.5783333333333333e-05, - "loss": 0.5202, - "step": 2148 - }, - { - "epoch": 0.0754312992506011, - "grad_norm": 0.7439372539520264, - "learning_rate": 3.58e-05, - "loss": 0.4898, - "step": 2149 - }, - { - "epoch": 0.07546639990171818, - "grad_norm": 0.742073118686676, - "learning_rate": 3.581666666666667e-05, - "loss": 0.5531, - "step": 2150 - }, - { - "epoch": 0.07550150055283525, - "grad_norm": 0.7700799703598022, - "learning_rate": 3.5833333333333335e-05, - "loss": 0.5034, - "step": 2151 - }, - { - "epoch": 0.07553660120395234, - "grad_norm": 0.7042312622070312, - "learning_rate": 3.585e-05, - "loss": 0.6677, - "step": 2152 - }, - { - "epoch": 0.07557170185506941, - "grad_norm": 0.9024224281311035, - "learning_rate": 3.586666666666667e-05, - "loss": 0.5153, - "step": 2153 - }, - { - "epoch": 0.0756068025061865, - "grad_norm": 0.781620979309082, - "learning_rate": 3.5883333333333336e-05, - "loss": 0.6633, - "step": 2154 - }, - { - "epoch": 0.07564190315730357, - "grad_norm": 0.7566316723823547, - "learning_rate": 3.59e-05, - "loss": 0.5664, - "step": 2155 - }, - { - "epoch": 0.07567700380842064, - "grad_norm": 0.6882869601249695, - "learning_rate": 3.591666666666667e-05, - "loss": 0.494, - "step": 2156 - }, - { - "epoch": 0.07571210445953773, - "grad_norm": 0.7702744603157043, - "learning_rate": 3.593333333333334e-05, - "loss": 0.5454, - "step": 2157 - }, - { - "epoch": 0.0757472051106548, - "grad_norm": 0.7371748685836792, - "learning_rate": 3.595e-05, - "loss": 0.383, - "step": 2158 - }, - { - "epoch": 0.07578230576177188, - "grad_norm": 0.7292578220367432, - "learning_rate": 3.596666666666667e-05, - "loss": 0.5183, - "step": 2159 - }, - { - "epoch": 0.07581740641288896, - "grad_norm": 0.7964637875556946, - "learning_rate": 3.598333333333334e-05, - "loss": 0.5299, - "step": 2160 - }, - { - "epoch": 0.07585250706400604, - "grad_norm": 0.6916497945785522, - "learning_rate": 3.6e-05, - "loss": 0.5769, - "step": 2161 - }, - { - "epoch": 0.07588760771512311, - "grad_norm": 0.9018362164497375, - "learning_rate": 3.601666666666667e-05, - "loss": 0.4941, - "step": 2162 - }, - { - "epoch": 0.0759227083662402, - "grad_norm": 0.6992202997207642, - "learning_rate": 3.603333333333333e-05, - "loss": 0.5614, - "step": 2163 - }, - { - "epoch": 0.07595780901735727, - "grad_norm": 0.8575785756111145, - "learning_rate": 3.605e-05, - "loss": 0.5844, - "step": 2164 - }, - { - "epoch": 0.07599290966847436, - "grad_norm": 0.7805685997009277, - "learning_rate": 3.606666666666667e-05, - "loss": 0.4449, - "step": 2165 - }, - { - "epoch": 0.07602801031959143, - "grad_norm": 0.7467162013053894, - "learning_rate": 3.6083333333333334e-05, - "loss": 0.531, - "step": 2166 - }, - { - "epoch": 0.0760631109707085, - "grad_norm": 0.7604694962501526, - "learning_rate": 3.61e-05, - "loss": 0.5017, - "step": 2167 - }, - { - "epoch": 0.07609821162182559, - "grad_norm": 0.711988091468811, - "learning_rate": 3.611666666666667e-05, - "loss": 0.5102, - "step": 2168 - }, - { - "epoch": 0.07613331227294266, - "grad_norm": 0.826841413974762, - "learning_rate": 3.6133333333333335e-05, - "loss": 0.5335, - "step": 2169 - }, - { - "epoch": 0.07616841292405974, - "grad_norm": 0.7697293758392334, - "learning_rate": 3.615e-05, - "loss": 0.6179, - "step": 2170 - }, - { - "epoch": 0.07620351357517682, - "grad_norm": 1.0296145677566528, - "learning_rate": 3.6166666666666674e-05, - "loss": 0.5057, - "step": 2171 - }, - { - "epoch": 0.0762386142262939, - "grad_norm": 0.8230093717575073, - "learning_rate": 3.6183333333333336e-05, - "loss": 0.5982, - "step": 2172 - }, - { - "epoch": 0.07627371487741097, - "grad_norm": 0.8089206218719482, - "learning_rate": 3.62e-05, - "loss": 0.455, - "step": 2173 - }, - { - "epoch": 0.07630881552852806, - "grad_norm": 0.7948326468467712, - "learning_rate": 3.621666666666667e-05, - "loss": 0.5372, - "step": 2174 - }, - { - "epoch": 0.07634391617964513, - "grad_norm": 1.0101865530014038, - "learning_rate": 3.623333333333334e-05, - "loss": 0.5996, - "step": 2175 - }, - { - "epoch": 0.07637901683076222, - "grad_norm": 0.6938397884368896, - "learning_rate": 3.625e-05, - "loss": 0.639, - "step": 2176 - }, - { - "epoch": 0.07641411748187929, - "grad_norm": 0.7769777178764343, - "learning_rate": 3.626666666666667e-05, - "loss": 0.6708, - "step": 2177 - }, - { - "epoch": 0.07644921813299636, - "grad_norm": 0.7729928493499756, - "learning_rate": 3.628333333333334e-05, - "loss": 0.5289, - "step": 2178 - }, - { - "epoch": 0.07648431878411345, - "grad_norm": 0.7576045393943787, - "learning_rate": 3.63e-05, - "loss": 0.5454, - "step": 2179 - }, - { - "epoch": 0.07651941943523052, - "grad_norm": 0.7550916075706482, - "learning_rate": 3.631666666666667e-05, - "loss": 0.4929, - "step": 2180 - }, - { - "epoch": 0.0765545200863476, - "grad_norm": 0.7301676869392395, - "learning_rate": 3.633333333333333e-05, - "loss": 0.4951, - "step": 2181 - }, - { - "epoch": 0.07658962073746468, - "grad_norm": 0.7349069118499756, - "learning_rate": 3.635e-05, - "loss": 0.5518, - "step": 2182 - }, - { - "epoch": 0.07662472138858176, - "grad_norm": 0.7225417494773865, - "learning_rate": 3.636666666666667e-05, - "loss": 0.4153, - "step": 2183 - }, - { - "epoch": 0.07665982203969883, - "grad_norm": 0.7492383718490601, - "learning_rate": 3.6383333333333335e-05, - "loss": 0.5917, - "step": 2184 - }, - { - "epoch": 0.07669492269081592, - "grad_norm": 0.7366249561309814, - "learning_rate": 3.6400000000000004e-05, - "loss": 0.5279, - "step": 2185 - }, - { - "epoch": 0.07673002334193299, - "grad_norm": 0.8797443509101868, - "learning_rate": 3.641666666666667e-05, - "loss": 0.507, - "step": 2186 - }, - { - "epoch": 0.07676512399305008, - "grad_norm": 0.7121625542640686, - "learning_rate": 3.6433333333333336e-05, - "loss": 0.6027, - "step": 2187 - }, - { - "epoch": 0.07680022464416715, - "grad_norm": 0.853376567363739, - "learning_rate": 3.645e-05, - "loss": 0.5231, - "step": 2188 - }, - { - "epoch": 0.07683532529528422, - "grad_norm": 0.7478623986244202, - "learning_rate": 3.646666666666667e-05, - "loss": 0.55, - "step": 2189 - }, - { - "epoch": 0.07687042594640131, - "grad_norm": 0.8315578103065491, - "learning_rate": 3.648333333333334e-05, - "loss": 0.4831, - "step": 2190 - }, - { - "epoch": 0.07690552659751838, - "grad_norm": 0.9318643808364868, - "learning_rate": 3.65e-05, - "loss": 0.6127, - "step": 2191 - }, - { - "epoch": 0.07694062724863546, - "grad_norm": 1.2410143613815308, - "learning_rate": 3.651666666666667e-05, - "loss": 0.5397, - "step": 2192 - }, - { - "epoch": 0.07697572789975254, - "grad_norm": 0.6230559349060059, - "learning_rate": 3.653333333333334e-05, - "loss": 0.5763, - "step": 2193 - }, - { - "epoch": 0.07701082855086962, - "grad_norm": 0.7796958088874817, - "learning_rate": 3.655e-05, - "loss": 0.6743, - "step": 2194 - }, - { - "epoch": 0.07704592920198669, - "grad_norm": 0.9190065264701843, - "learning_rate": 3.656666666666666e-05, - "loss": 0.6576, - "step": 2195 - }, - { - "epoch": 0.07708102985310378, - "grad_norm": 0.8774913549423218, - "learning_rate": 3.658333333333334e-05, - "loss": 0.57, - "step": 2196 - }, - { - "epoch": 0.07711613050422085, - "grad_norm": 0.7319844365119934, - "learning_rate": 3.66e-05, - "loss": 0.5092, - "step": 2197 - }, - { - "epoch": 0.07715123115533794, - "grad_norm": 0.9354243278503418, - "learning_rate": 3.6616666666666664e-05, - "loss": 0.507, - "step": 2198 - }, - { - "epoch": 0.07718633180645501, - "grad_norm": 0.800602912902832, - "learning_rate": 3.6633333333333334e-05, - "loss": 0.5728, - "step": 2199 - }, - { - "epoch": 0.07722143245757208, - "grad_norm": 0.8301639556884766, - "learning_rate": 3.665e-05, - "loss": 0.5946, - "step": 2200 - }, - { - "epoch": 0.07725653310868917, - "grad_norm": 0.7890861630439758, - "learning_rate": 3.6666666666666666e-05, - "loss": 0.4892, - "step": 2201 - }, - { - "epoch": 0.07729163375980624, - "grad_norm": 0.8486902117729187, - "learning_rate": 3.6683333333333335e-05, - "loss": 0.5795, - "step": 2202 - }, - { - "epoch": 0.07732673441092332, - "grad_norm": 0.9671634435653687, - "learning_rate": 3.6700000000000004e-05, - "loss": 0.4871, - "step": 2203 - }, - { - "epoch": 0.0773618350620404, - "grad_norm": 0.9384043216705322, - "learning_rate": 3.671666666666667e-05, - "loss": 0.5498, - "step": 2204 - }, - { - "epoch": 0.07739693571315748, - "grad_norm": 0.7141161561012268, - "learning_rate": 3.6733333333333336e-05, - "loss": 0.5651, - "step": 2205 - }, - { - "epoch": 0.07743203636427455, - "grad_norm": 0.8361902236938477, - "learning_rate": 3.675e-05, - "loss": 0.492, - "step": 2206 - }, - { - "epoch": 0.07746713701539164, - "grad_norm": 0.7120230793952942, - "learning_rate": 3.676666666666667e-05, - "loss": 0.5368, - "step": 2207 - }, - { - "epoch": 0.07750223766650871, - "grad_norm": 0.6863588094711304, - "learning_rate": 3.678333333333334e-05, - "loss": 0.5077, - "step": 2208 - }, - { - "epoch": 0.0775373383176258, - "grad_norm": 0.6332901120185852, - "learning_rate": 3.68e-05, - "loss": 0.6221, - "step": 2209 - }, - { - "epoch": 0.07757243896874287, - "grad_norm": 0.7178941965103149, - "learning_rate": 3.681666666666667e-05, - "loss": 0.5749, - "step": 2210 - }, - { - "epoch": 0.07760753961985994, - "grad_norm": 0.7471216917037964, - "learning_rate": 3.683333333333334e-05, - "loss": 0.5869, - "step": 2211 - }, - { - "epoch": 0.07764264027097703, - "grad_norm": 0.924140989780426, - "learning_rate": 3.685e-05, - "loss": 0.5508, - "step": 2212 - }, - { - "epoch": 0.0776777409220941, - "grad_norm": 0.6945390105247498, - "learning_rate": 3.6866666666666664e-05, - "loss": 0.6381, - "step": 2213 - }, - { - "epoch": 0.07771284157321118, - "grad_norm": 0.9160926342010498, - "learning_rate": 3.688333333333333e-05, - "loss": 0.6557, - "step": 2214 - }, - { - "epoch": 0.07774794222432826, - "grad_norm": 0.7674309015274048, - "learning_rate": 3.69e-05, - "loss": 0.5329, - "step": 2215 - }, - { - "epoch": 0.07778304287544534, - "grad_norm": 0.7598112225532532, - "learning_rate": 3.6916666666666665e-05, - "loss": 0.5817, - "step": 2216 - }, - { - "epoch": 0.07781814352656241, - "grad_norm": 0.6250936985015869, - "learning_rate": 3.6933333333333334e-05, - "loss": 0.4513, - "step": 2217 - }, - { - "epoch": 0.0778532441776795, - "grad_norm": 0.7116743326187134, - "learning_rate": 3.6950000000000004e-05, - "loss": 0.5479, - "step": 2218 - }, - { - "epoch": 0.07788834482879657, - "grad_norm": 0.7151616215705872, - "learning_rate": 3.6966666666666666e-05, - "loss": 0.564, - "step": 2219 - }, - { - "epoch": 0.07792344547991366, - "grad_norm": 0.6935098767280579, - "learning_rate": 3.6983333333333336e-05, - "loss": 0.5491, - "step": 2220 - }, - { - "epoch": 0.07795854613103073, - "grad_norm": 0.8330838084220886, - "learning_rate": 3.7e-05, - "loss": 0.3992, - "step": 2221 - }, - { - "epoch": 0.0779936467821478, - "grad_norm": 0.6862971782684326, - "learning_rate": 3.701666666666667e-05, - "loss": 0.5798, - "step": 2222 - }, - { - "epoch": 0.07802874743326489, - "grad_norm": 0.778533935546875, - "learning_rate": 3.703333333333334e-05, - "loss": 0.6203, - "step": 2223 - }, - { - "epoch": 0.07806384808438196, - "grad_norm": 0.7325842380523682, - "learning_rate": 3.705e-05, - "loss": 0.6057, - "step": 2224 - }, - { - "epoch": 0.07809894873549904, - "grad_norm": 0.8332782983779907, - "learning_rate": 3.706666666666667e-05, - "loss": 0.6773, - "step": 2225 - }, - { - "epoch": 0.07813404938661613, - "grad_norm": 0.7655009627342224, - "learning_rate": 3.708333333333334e-05, - "loss": 0.5657, - "step": 2226 - }, - { - "epoch": 0.0781691500377332, - "grad_norm": 0.9526976943016052, - "learning_rate": 3.71e-05, - "loss": 0.5524, - "step": 2227 - }, - { - "epoch": 0.07820425068885027, - "grad_norm": 0.7546918988227844, - "learning_rate": 3.711666666666666e-05, - "loss": 0.484, - "step": 2228 - }, - { - "epoch": 0.07823935133996736, - "grad_norm": 0.7579228281974792, - "learning_rate": 3.713333333333334e-05, - "loss": 0.5696, - "step": 2229 - }, - { - "epoch": 0.07827445199108443, - "grad_norm": 0.9136479496955872, - "learning_rate": 3.715e-05, - "loss": 0.6414, - "step": 2230 - }, - { - "epoch": 0.07830955264220152, - "grad_norm": 0.8219105005264282, - "learning_rate": 3.7166666666666664e-05, - "loss": 0.6046, - "step": 2231 - }, - { - "epoch": 0.07834465329331859, - "grad_norm": 0.7353741526603699, - "learning_rate": 3.7183333333333334e-05, - "loss": 0.618, - "step": 2232 - }, - { - "epoch": 0.07837975394443567, - "grad_norm": 0.7659191489219666, - "learning_rate": 3.72e-05, - "loss": 0.4778, - "step": 2233 - }, - { - "epoch": 0.07841485459555275, - "grad_norm": 1.063459038734436, - "learning_rate": 3.7216666666666666e-05, - "loss": 0.4467, - "step": 2234 - }, - { - "epoch": 0.07844995524666983, - "grad_norm": 0.7490857243537903, - "learning_rate": 3.7233333333333335e-05, - "loss": 0.6027, - "step": 2235 - }, - { - "epoch": 0.0784850558977869, - "grad_norm": 0.6932900547981262, - "learning_rate": 3.7250000000000004e-05, - "loss": 0.5522, - "step": 2236 - }, - { - "epoch": 0.07852015654890399, - "grad_norm": 0.7601192593574524, - "learning_rate": 3.726666666666667e-05, - "loss": 0.5388, - "step": 2237 - }, - { - "epoch": 0.07855525720002106, - "grad_norm": 0.6286331415176392, - "learning_rate": 3.7283333333333336e-05, - "loss": 0.5318, - "step": 2238 - }, - { - "epoch": 0.07859035785113813, - "grad_norm": 0.83624666929245, - "learning_rate": 3.73e-05, - "loss": 0.6125, - "step": 2239 - }, - { - "epoch": 0.07862545850225522, - "grad_norm": 1.5692675113677979, - "learning_rate": 3.731666666666667e-05, - "loss": 0.552, - "step": 2240 - }, - { - "epoch": 0.07866055915337229, - "grad_norm": 0.7515231370925903, - "learning_rate": 3.733333333333334e-05, - "loss": 0.4765, - "step": 2241 - }, - { - "epoch": 0.07869565980448938, - "grad_norm": 0.6803705096244812, - "learning_rate": 3.735e-05, - "loss": 0.5174, - "step": 2242 - }, - { - "epoch": 0.07873076045560645, - "grad_norm": 0.8735167384147644, - "learning_rate": 3.736666666666667e-05, - "loss": 0.569, - "step": 2243 - }, - { - "epoch": 0.07876586110672353, - "grad_norm": 0.702824592590332, - "learning_rate": 3.738333333333334e-05, - "loss": 0.5639, - "step": 2244 - }, - { - "epoch": 0.07880096175784061, - "grad_norm": 0.8496579527854919, - "learning_rate": 3.74e-05, - "loss": 0.5143, - "step": 2245 - }, - { - "epoch": 0.07883606240895769, - "grad_norm": 0.6988188028335571, - "learning_rate": 3.7416666666666664e-05, - "loss": 0.4784, - "step": 2246 - }, - { - "epoch": 0.07887116306007476, - "grad_norm": 0.7612179517745972, - "learning_rate": 3.743333333333334e-05, - "loss": 0.6302, - "step": 2247 - }, - { - "epoch": 0.07890626371119185, - "grad_norm": 0.7722770571708679, - "learning_rate": 3.745e-05, - "loss": 0.559, - "step": 2248 - }, - { - "epoch": 0.07894136436230892, - "grad_norm": 0.7844948172569275, - "learning_rate": 3.7466666666666665e-05, - "loss": 0.6542, - "step": 2249 - }, - { - "epoch": 0.07897646501342599, - "grad_norm": 0.6720234751701355, - "learning_rate": 3.7483333333333334e-05, - "loss": 0.4439, - "step": 2250 - }, - { - "epoch": 0.07901156566454308, - "grad_norm": 0.7401400804519653, - "learning_rate": 3.7500000000000003e-05, - "loss": 0.5141, - "step": 2251 - }, - { - "epoch": 0.07904666631566015, - "grad_norm": 0.6829811930656433, - "learning_rate": 3.7516666666666666e-05, - "loss": 0.5115, - "step": 2252 - }, - { - "epoch": 0.07908176696677724, - "grad_norm": 0.8589451909065247, - "learning_rate": 3.7533333333333335e-05, - "loss": 0.3994, - "step": 2253 - }, - { - "epoch": 0.07911686761789431, - "grad_norm": 0.7754709720611572, - "learning_rate": 3.7550000000000005e-05, - "loss": 0.6428, - "step": 2254 - }, - { - "epoch": 0.07915196826901139, - "grad_norm": 0.7704675197601318, - "learning_rate": 3.756666666666667e-05, - "loss": 0.6618, - "step": 2255 - }, - { - "epoch": 0.07918706892012847, - "grad_norm": 0.7967194318771362, - "learning_rate": 3.7583333333333337e-05, - "loss": 0.5135, - "step": 2256 - }, - { - "epoch": 0.07922216957124555, - "grad_norm": 1.0106990337371826, - "learning_rate": 3.76e-05, - "loss": 0.5906, - "step": 2257 - }, - { - "epoch": 0.07925727022236262, - "grad_norm": 0.701863169670105, - "learning_rate": 3.761666666666667e-05, - "loss": 0.6622, - "step": 2258 - }, - { - "epoch": 0.0792923708734797, - "grad_norm": 0.7264806628227234, - "learning_rate": 3.763333333333334e-05, - "loss": 0.5966, - "step": 2259 - }, - { - "epoch": 0.07932747152459678, - "grad_norm": 0.8818365335464478, - "learning_rate": 3.765e-05, - "loss": 0.6381, - "step": 2260 - }, - { - "epoch": 0.07936257217571387, - "grad_norm": 0.7045902609825134, - "learning_rate": 3.766666666666667e-05, - "loss": 0.6196, - "step": 2261 - }, - { - "epoch": 0.07939767282683094, - "grad_norm": 0.6589072942733765, - "learning_rate": 3.768333333333334e-05, - "loss": 0.562, - "step": 2262 - }, - { - "epoch": 0.07943277347794801, - "grad_norm": 0.9042130708694458, - "learning_rate": 3.77e-05, - "loss": 0.5459, - "step": 2263 - }, - { - "epoch": 0.0794678741290651, - "grad_norm": 0.7253658771514893, - "learning_rate": 3.7716666666666664e-05, - "loss": 0.537, - "step": 2264 - }, - { - "epoch": 0.07950297478018217, - "grad_norm": 0.6376678943634033, - "learning_rate": 3.773333333333334e-05, - "loss": 0.5646, - "step": 2265 - }, - { - "epoch": 0.07953807543129925, - "grad_norm": 0.8313679695129395, - "learning_rate": 3.775e-05, - "loss": 0.6691, - "step": 2266 - }, - { - "epoch": 0.07957317608241633, - "grad_norm": 0.7108116745948792, - "learning_rate": 3.7766666666666665e-05, - "loss": 0.5442, - "step": 2267 - }, - { - "epoch": 0.0796082767335334, - "grad_norm": 0.7295188307762146, - "learning_rate": 3.7783333333333335e-05, - "loss": 0.5628, - "step": 2268 - }, - { - "epoch": 0.07964337738465048, - "grad_norm": 0.6742207407951355, - "learning_rate": 3.7800000000000004e-05, - "loss": 0.5414, - "step": 2269 - }, - { - "epoch": 0.07967847803576757, - "grad_norm": 0.6865862011909485, - "learning_rate": 3.7816666666666667e-05, - "loss": 0.639, - "step": 2270 - }, - { - "epoch": 0.07971357868688464, - "grad_norm": 0.8072473406791687, - "learning_rate": 3.7833333333333336e-05, - "loss": 0.4993, - "step": 2271 - }, - { - "epoch": 0.07974867933800173, - "grad_norm": 0.6760308742523193, - "learning_rate": 3.7850000000000005e-05, - "loss": 0.617, - "step": 2272 - }, - { - "epoch": 0.0797837799891188, - "grad_norm": 0.6714460849761963, - "learning_rate": 3.786666666666667e-05, - "loss": 0.5872, - "step": 2273 - }, - { - "epoch": 0.07981888064023587, - "grad_norm": 0.8576647043228149, - "learning_rate": 3.788333333333334e-05, - "loss": 0.5525, - "step": 2274 - }, - { - "epoch": 0.07985398129135296, - "grad_norm": 0.8594738841056824, - "learning_rate": 3.79e-05, - "loss": 0.5383, - "step": 2275 - }, - { - "epoch": 0.07988908194247003, - "grad_norm": 0.7621399760246277, - "learning_rate": 3.791666666666667e-05, - "loss": 0.501, - "step": 2276 - }, - { - "epoch": 0.0799241825935871, - "grad_norm": 0.842941403388977, - "learning_rate": 3.793333333333334e-05, - "loss": 0.5548, - "step": 2277 - }, - { - "epoch": 0.0799592832447042, - "grad_norm": 0.8408876061439514, - "learning_rate": 3.795e-05, - "loss": 0.5178, - "step": 2278 - }, - { - "epoch": 0.07999438389582127, - "grad_norm": 0.7721808552742004, - "learning_rate": 3.796666666666667e-05, - "loss": 0.4687, - "step": 2279 - }, - { - "epoch": 0.08002948454693834, - "grad_norm": 0.7637141942977905, - "learning_rate": 3.798333333333334e-05, - "loss": 0.6028, - "step": 2280 - }, - { - "epoch": 0.08006458519805543, - "grad_norm": 0.7112342119216919, - "learning_rate": 3.8e-05, - "loss": 0.5207, - "step": 2281 - }, - { - "epoch": 0.0800996858491725, - "grad_norm": 0.7972661852836609, - "learning_rate": 3.8016666666666665e-05, - "loss": 0.4934, - "step": 2282 - }, - { - "epoch": 0.08013478650028959, - "grad_norm": 0.9022185206413269, - "learning_rate": 3.803333333333334e-05, - "loss": 0.6649, - "step": 2283 - }, - { - "epoch": 0.08016988715140666, - "grad_norm": 0.9949752688407898, - "learning_rate": 3.805e-05, - "loss": 0.5598, - "step": 2284 - }, - { - "epoch": 0.08020498780252373, - "grad_norm": 0.8010836243629456, - "learning_rate": 3.8066666666666666e-05, - "loss": 0.5622, - "step": 2285 - }, - { - "epoch": 0.08024008845364082, - "grad_norm": 0.8060246706008911, - "learning_rate": 3.8083333333333335e-05, - "loss": 0.4689, - "step": 2286 - }, - { - "epoch": 0.0802751891047579, - "grad_norm": 0.7555766701698303, - "learning_rate": 3.8100000000000005e-05, - "loss": 0.4955, - "step": 2287 - }, - { - "epoch": 0.08031028975587497, - "grad_norm": 0.6752068400382996, - "learning_rate": 3.811666666666667e-05, - "loss": 0.5472, - "step": 2288 - }, - { - "epoch": 0.08034539040699205, - "grad_norm": 0.7871788740158081, - "learning_rate": 3.8133333333333336e-05, - "loss": 0.5993, - "step": 2289 - }, - { - "epoch": 0.08038049105810913, - "grad_norm": 0.7692066431045532, - "learning_rate": 3.8150000000000006e-05, - "loss": 0.422, - "step": 2290 - }, - { - "epoch": 0.0804155917092262, - "grad_norm": 0.7720059752464294, - "learning_rate": 3.816666666666667e-05, - "loss": 0.5302, - "step": 2291 - }, - { - "epoch": 0.08045069236034329, - "grad_norm": 0.6240893602371216, - "learning_rate": 3.818333333333334e-05, - "loss": 0.4816, - "step": 2292 - }, - { - "epoch": 0.08048579301146036, - "grad_norm": 0.7865394949913025, - "learning_rate": 3.82e-05, - "loss": 0.5986, - "step": 2293 - }, - { - "epoch": 0.08052089366257745, - "grad_norm": 0.8140631914138794, - "learning_rate": 3.821666666666667e-05, - "loss": 0.467, - "step": 2294 - }, - { - "epoch": 0.08055599431369452, - "grad_norm": 0.7148425579071045, - "learning_rate": 3.823333333333334e-05, - "loss": 0.4906, - "step": 2295 - }, - { - "epoch": 0.0805910949648116, - "grad_norm": 0.762346625328064, - "learning_rate": 3.825e-05, - "loss": 0.61, - "step": 2296 - }, - { - "epoch": 0.08062619561592868, - "grad_norm": 0.881294310092926, - "learning_rate": 3.8266666666666664e-05, - "loss": 0.6161, - "step": 2297 - }, - { - "epoch": 0.08066129626704575, - "grad_norm": 0.908348798751831, - "learning_rate": 3.828333333333334e-05, - "loss": 0.5221, - "step": 2298 - }, - { - "epoch": 0.08069639691816283, - "grad_norm": 0.8757265210151672, - "learning_rate": 3.83e-05, - "loss": 0.4266, - "step": 2299 - }, - { - "epoch": 0.08073149756927991, - "grad_norm": 0.8359132409095764, - "learning_rate": 3.8316666666666665e-05, - "loss": 0.5016, - "step": 2300 - }, - { - "epoch": 0.08076659822039699, - "grad_norm": 0.7498669624328613, - "learning_rate": 3.8333333333333334e-05, - "loss": 0.6307, - "step": 2301 - }, - { - "epoch": 0.08080169887151406, - "grad_norm": 0.6886937022209167, - "learning_rate": 3.8350000000000004e-05, - "loss": 0.4744, - "step": 2302 - }, - { - "epoch": 0.08083679952263115, - "grad_norm": 1.0419338941574097, - "learning_rate": 3.8366666666666666e-05, - "loss": 0.5129, - "step": 2303 - }, - { - "epoch": 0.08087190017374822, - "grad_norm": 0.800762414932251, - "learning_rate": 3.8383333333333336e-05, - "loss": 0.5602, - "step": 2304 - }, - { - "epoch": 0.08090700082486531, - "grad_norm": 0.6630125641822815, - "learning_rate": 3.8400000000000005e-05, - "loss": 0.5494, - "step": 2305 - }, - { - "epoch": 0.08094210147598238, - "grad_norm": 0.7351178526878357, - "learning_rate": 3.841666666666667e-05, - "loss": 0.5837, - "step": 2306 - }, - { - "epoch": 0.08097720212709945, - "grad_norm": 0.7743481993675232, - "learning_rate": 3.843333333333334e-05, - "loss": 0.6019, - "step": 2307 - }, - { - "epoch": 0.08101230277821654, - "grad_norm": 0.6816107630729675, - "learning_rate": 3.845e-05, - "loss": 0.6159, - "step": 2308 - }, - { - "epoch": 0.08104740342933361, - "grad_norm": 0.591511070728302, - "learning_rate": 3.846666666666667e-05, - "loss": 0.5197, - "step": 2309 - }, - { - "epoch": 0.08108250408045069, - "grad_norm": 0.8406704664230347, - "learning_rate": 3.848333333333334e-05, - "loss": 0.5257, - "step": 2310 - }, - { - "epoch": 0.08111760473156777, - "grad_norm": 0.9103978276252747, - "learning_rate": 3.85e-05, - "loss": 0.586, - "step": 2311 - }, - { - "epoch": 0.08115270538268485, - "grad_norm": 0.8435265421867371, - "learning_rate": 3.851666666666667e-05, - "loss": 0.5468, - "step": 2312 - }, - { - "epoch": 0.08118780603380192, - "grad_norm": 0.8463445901870728, - "learning_rate": 3.853333333333334e-05, - "loss": 0.5146, - "step": 2313 - }, - { - "epoch": 0.08122290668491901, - "grad_norm": 0.8945830464363098, - "learning_rate": 3.855e-05, - "loss": 0.5799, - "step": 2314 - }, - { - "epoch": 0.08125800733603608, - "grad_norm": 0.9025808572769165, - "learning_rate": 3.8566666666666664e-05, - "loss": 0.5646, - "step": 2315 - }, - { - "epoch": 0.08129310798715317, - "grad_norm": 0.6859649419784546, - "learning_rate": 3.8583333333333334e-05, - "loss": 0.5075, - "step": 2316 - }, - { - "epoch": 0.08132820863827024, - "grad_norm": 0.8615952134132385, - "learning_rate": 3.86e-05, - "loss": 0.624, - "step": 2317 - }, - { - "epoch": 0.08136330928938731, - "grad_norm": 0.7217848300933838, - "learning_rate": 3.8616666666666666e-05, - "loss": 0.4851, - "step": 2318 - }, - { - "epoch": 0.0813984099405044, - "grad_norm": 0.6811777353286743, - "learning_rate": 3.8633333333333335e-05, - "loss": 0.5303, - "step": 2319 - }, - { - "epoch": 0.08143351059162147, - "grad_norm": 0.8858216404914856, - "learning_rate": 3.8650000000000004e-05, - "loss": 0.604, - "step": 2320 - }, - { - "epoch": 0.08146861124273855, - "grad_norm": 0.706606388092041, - "learning_rate": 3.866666666666667e-05, - "loss": 0.5698, - "step": 2321 - }, - { - "epoch": 0.08150371189385563, - "grad_norm": 0.7640691995620728, - "learning_rate": 3.868333333333333e-05, - "loss": 0.469, - "step": 2322 - }, - { - "epoch": 0.08153881254497271, - "grad_norm": 0.6956092715263367, - "learning_rate": 3.8700000000000006e-05, - "loss": 0.5504, - "step": 2323 - }, - { - "epoch": 0.08157391319608978, - "grad_norm": 0.6697455644607544, - "learning_rate": 3.871666666666667e-05, - "loss": 0.5202, - "step": 2324 - }, - { - "epoch": 0.08160901384720687, - "grad_norm": 0.6048200726509094, - "learning_rate": 3.873333333333333e-05, - "loss": 0.5189, - "step": 2325 - }, - { - "epoch": 0.08164411449832394, - "grad_norm": 0.7242048978805542, - "learning_rate": 3.875e-05, - "loss": 0.4643, - "step": 2326 - }, - { - "epoch": 0.08167921514944103, - "grad_norm": 0.7634289264678955, - "learning_rate": 3.876666666666667e-05, - "loss": 0.5636, - "step": 2327 - }, - { - "epoch": 0.0817143158005581, - "grad_norm": 0.7629936933517456, - "learning_rate": 3.878333333333333e-05, - "loss": 0.5375, - "step": 2328 - }, - { - "epoch": 0.08174941645167517, - "grad_norm": 0.735228955745697, - "learning_rate": 3.88e-05, - "loss": 0.5303, - "step": 2329 - }, - { - "epoch": 0.08178451710279226, - "grad_norm": 0.8243984580039978, - "learning_rate": 3.881666666666667e-05, - "loss": 0.555, - "step": 2330 - }, - { - "epoch": 0.08181961775390934, - "grad_norm": 0.6730183959007263, - "learning_rate": 3.883333333333333e-05, - "loss": 0.5657, - "step": 2331 - }, - { - "epoch": 0.08185471840502641, - "grad_norm": 0.6689878106117249, - "learning_rate": 3.885e-05, - "loss": 0.4667, - "step": 2332 - }, - { - "epoch": 0.0818898190561435, - "grad_norm": 0.8474567532539368, - "learning_rate": 3.8866666666666665e-05, - "loss": 0.6233, - "step": 2333 - }, - { - "epoch": 0.08192491970726057, - "grad_norm": 0.7553229331970215, - "learning_rate": 3.8883333333333334e-05, - "loss": 0.4714, - "step": 2334 - }, - { - "epoch": 0.08196002035837764, - "grad_norm": 0.6403676867485046, - "learning_rate": 3.8900000000000004e-05, - "loss": 0.4898, - "step": 2335 - }, - { - "epoch": 0.08199512100949473, - "grad_norm": 0.8084325790405273, - "learning_rate": 3.8916666666666666e-05, - "loss": 0.5095, - "step": 2336 - }, - { - "epoch": 0.0820302216606118, - "grad_norm": 0.7792364358901978, - "learning_rate": 3.8933333333333336e-05, - "loss": 0.4984, - "step": 2337 - }, - { - "epoch": 0.08206532231172889, - "grad_norm": 0.7916306257247925, - "learning_rate": 3.8950000000000005e-05, - "loss": 0.486, - "step": 2338 - }, - { - "epoch": 0.08210042296284596, - "grad_norm": 0.6701837778091431, - "learning_rate": 3.896666666666667e-05, - "loss": 0.5404, - "step": 2339 - }, - { - "epoch": 0.08213552361396304, - "grad_norm": 0.7499209046363831, - "learning_rate": 3.898333333333333e-05, - "loss": 0.6033, - "step": 2340 - }, - { - "epoch": 0.08217062426508012, - "grad_norm": 0.7737659811973572, - "learning_rate": 3.9000000000000006e-05, - "loss": 0.5941, - "step": 2341 - }, - { - "epoch": 0.0822057249161972, - "grad_norm": 0.7042847871780396, - "learning_rate": 3.901666666666667e-05, - "loss": 0.4859, - "step": 2342 - }, - { - "epoch": 0.08224082556731427, - "grad_norm": 0.7350881099700928, - "learning_rate": 3.903333333333333e-05, - "loss": 0.4116, - "step": 2343 - }, - { - "epoch": 0.08227592621843136, - "grad_norm": 0.8371569514274597, - "learning_rate": 3.905e-05, - "loss": 0.5608, - "step": 2344 - }, - { - "epoch": 0.08231102686954843, - "grad_norm": 0.7238355875015259, - "learning_rate": 3.906666666666667e-05, - "loss": 0.6189, - "step": 2345 - }, - { - "epoch": 0.0823461275206655, - "grad_norm": 0.7036373019218445, - "learning_rate": 3.908333333333333e-05, - "loss": 0.6648, - "step": 2346 - }, - { - "epoch": 0.08238122817178259, - "grad_norm": 0.7287402749061584, - "learning_rate": 3.91e-05, - "loss": 0.55, - "step": 2347 - }, - { - "epoch": 0.08241632882289966, - "grad_norm": 0.7568923830986023, - "learning_rate": 3.911666666666667e-05, - "loss": 0.6254, - "step": 2348 - }, - { - "epoch": 0.08245142947401675, - "grad_norm": 0.7093027234077454, - "learning_rate": 3.9133333333333334e-05, - "loss": 0.6834, - "step": 2349 - }, - { - "epoch": 0.08248653012513382, - "grad_norm": 0.6959288120269775, - "learning_rate": 3.915e-05, - "loss": 0.624, - "step": 2350 - }, - { - "epoch": 0.0825216307762509, - "grad_norm": 1.150103211402893, - "learning_rate": 3.9166666666666665e-05, - "loss": 0.6395, - "step": 2351 - }, - { - "epoch": 0.08255673142736798, - "grad_norm": 0.617337703704834, - "learning_rate": 3.9183333333333335e-05, - "loss": 0.5093, - "step": 2352 - }, - { - "epoch": 0.08259183207848506, - "grad_norm": 0.6569388508796692, - "learning_rate": 3.9200000000000004e-05, - "loss": 0.4928, - "step": 2353 - }, - { - "epoch": 0.08262693272960213, - "grad_norm": 0.7391181588172913, - "learning_rate": 3.921666666666667e-05, - "loss": 0.4855, - "step": 2354 - }, - { - "epoch": 0.08266203338071922, - "grad_norm": 0.8651422262191772, - "learning_rate": 3.9233333333333336e-05, - "loss": 0.5437, - "step": 2355 - }, - { - "epoch": 0.08269713403183629, - "grad_norm": 0.724470317363739, - "learning_rate": 3.9250000000000005e-05, - "loss": 0.4772, - "step": 2356 - }, - { - "epoch": 0.08273223468295336, - "grad_norm": 0.7466826438903809, - "learning_rate": 3.926666666666667e-05, - "loss": 0.5026, - "step": 2357 - }, - { - "epoch": 0.08276733533407045, - "grad_norm": 0.9567835330963135, - "learning_rate": 3.928333333333333e-05, - "loss": 0.6331, - "step": 2358 - }, - { - "epoch": 0.08280243598518752, - "grad_norm": 0.6492432951927185, - "learning_rate": 3.9300000000000007e-05, - "loss": 0.5049, - "step": 2359 - }, - { - "epoch": 0.08283753663630461, - "grad_norm": 0.6319663524627686, - "learning_rate": 3.931666666666667e-05, - "loss": 0.5167, - "step": 2360 - }, - { - "epoch": 0.08287263728742168, - "grad_norm": 0.6036595702171326, - "learning_rate": 3.933333333333333e-05, - "loss": 0.4852, - "step": 2361 - }, - { - "epoch": 0.08290773793853876, - "grad_norm": 0.7527871131896973, - "learning_rate": 3.935e-05, - "loss": 0.6632, - "step": 2362 - }, - { - "epoch": 0.08294283858965584, - "grad_norm": 0.7550980448722839, - "learning_rate": 3.936666666666667e-05, - "loss": 0.6067, - "step": 2363 - }, - { - "epoch": 0.08297793924077292, - "grad_norm": 0.7350654602050781, - "learning_rate": 3.938333333333333e-05, - "loss": 0.5287, - "step": 2364 - }, - { - "epoch": 0.08301303989188999, - "grad_norm": 0.7125895023345947, - "learning_rate": 3.94e-05, - "loss": 0.5179, - "step": 2365 - }, - { - "epoch": 0.08304814054300708, - "grad_norm": 0.8176701664924622, - "learning_rate": 3.941666666666667e-05, - "loss": 0.4856, - "step": 2366 - }, - { - "epoch": 0.08308324119412415, - "grad_norm": 0.8117053508758545, - "learning_rate": 3.9433333333333334e-05, - "loss": 0.5289, - "step": 2367 - }, - { - "epoch": 0.08311834184524122, - "grad_norm": 0.8342973589897156, - "learning_rate": 3.9450000000000003e-05, - "loss": 0.4801, - "step": 2368 - }, - { - "epoch": 0.08315344249635831, - "grad_norm": 0.7611440420150757, - "learning_rate": 3.9466666666666666e-05, - "loss": 0.6236, - "step": 2369 - }, - { - "epoch": 0.08318854314747538, - "grad_norm": 0.7349371910095215, - "learning_rate": 3.9483333333333335e-05, - "loss": 0.5097, - "step": 2370 - }, - { - "epoch": 0.08322364379859247, - "grad_norm": 0.6961541771888733, - "learning_rate": 3.9500000000000005e-05, - "loss": 0.5038, - "step": 2371 - }, - { - "epoch": 0.08325874444970954, - "grad_norm": 0.6769289374351501, - "learning_rate": 3.951666666666667e-05, - "loss": 0.5261, - "step": 2372 - }, - { - "epoch": 0.08329384510082662, - "grad_norm": 0.8962070345878601, - "learning_rate": 3.9533333333333337e-05, - "loss": 0.3823, - "step": 2373 - }, - { - "epoch": 0.0833289457519437, - "grad_norm": 0.7772852182388306, - "learning_rate": 3.9550000000000006e-05, - "loss": 0.4957, - "step": 2374 - }, - { - "epoch": 0.08336404640306078, - "grad_norm": 1.068327784538269, - "learning_rate": 3.956666666666667e-05, - "loss": 0.4986, - "step": 2375 - }, - { - "epoch": 0.08339914705417785, - "grad_norm": 0.742225706577301, - "learning_rate": 3.958333333333333e-05, - "loss": 0.539, - "step": 2376 - }, - { - "epoch": 0.08343424770529494, - "grad_norm": 0.725978434085846, - "learning_rate": 3.960000000000001e-05, - "loss": 0.5858, - "step": 2377 - }, - { - "epoch": 0.08346934835641201, - "grad_norm": 0.7082867622375488, - "learning_rate": 3.961666666666667e-05, - "loss": 0.4896, - "step": 2378 - }, - { - "epoch": 0.08350444900752908, - "grad_norm": 0.6538575887680054, - "learning_rate": 3.963333333333333e-05, - "loss": 0.5608, - "step": 2379 - }, - { - "epoch": 0.08353954965864617, - "grad_norm": 0.7007297277450562, - "learning_rate": 3.965e-05, - "loss": 0.5355, - "step": 2380 - }, - { - "epoch": 0.08357465030976324, - "grad_norm": 0.7016144394874573, - "learning_rate": 3.966666666666667e-05, - "loss": 0.5922, - "step": 2381 - }, - { - "epoch": 0.08360975096088033, - "grad_norm": 0.6564974784851074, - "learning_rate": 3.9683333333333333e-05, - "loss": 0.4897, - "step": 2382 - }, - { - "epoch": 0.0836448516119974, - "grad_norm": 0.7456164360046387, - "learning_rate": 3.97e-05, - "loss": 0.4337, - "step": 2383 - }, - { - "epoch": 0.08367995226311448, - "grad_norm": 0.6611679196357727, - "learning_rate": 3.9716666666666665e-05, - "loss": 0.4882, - "step": 2384 - }, - { - "epoch": 0.08371505291423156, - "grad_norm": 0.6389467120170593, - "learning_rate": 3.9733333333333335e-05, - "loss": 0.616, - "step": 2385 - }, - { - "epoch": 0.08375015356534864, - "grad_norm": 0.6544708609580994, - "learning_rate": 3.9750000000000004e-05, - "loss": 0.5792, - "step": 2386 - }, - { - "epoch": 0.08378525421646571, - "grad_norm": 0.6319180727005005, - "learning_rate": 3.9766666666666667e-05, - "loss": 0.4616, - "step": 2387 - }, - { - "epoch": 0.0838203548675828, - "grad_norm": 0.7553228139877319, - "learning_rate": 3.9783333333333336e-05, - "loss": 0.4951, - "step": 2388 - }, - { - "epoch": 0.08385545551869987, - "grad_norm": 0.6589532494544983, - "learning_rate": 3.9800000000000005e-05, - "loss": 0.5444, - "step": 2389 - }, - { - "epoch": 0.08389055616981694, - "grad_norm": 0.7552146911621094, - "learning_rate": 3.981666666666667e-05, - "loss": 0.4894, - "step": 2390 - }, - { - "epoch": 0.08392565682093403, - "grad_norm": 0.7688384056091309, - "learning_rate": 3.983333333333333e-05, - "loss": 0.571, - "step": 2391 - }, - { - "epoch": 0.0839607574720511, - "grad_norm": 0.8385567665100098, - "learning_rate": 3.9850000000000006e-05, - "loss": 0.5869, - "step": 2392 - }, - { - "epoch": 0.08399585812316819, - "grad_norm": 0.7879346609115601, - "learning_rate": 3.986666666666667e-05, - "loss": 0.5673, - "step": 2393 - }, - { - "epoch": 0.08403095877428526, - "grad_norm": 0.9681922793388367, - "learning_rate": 3.988333333333333e-05, - "loss": 0.6857, - "step": 2394 - }, - { - "epoch": 0.08406605942540234, - "grad_norm": 0.6379744410514832, - "learning_rate": 3.99e-05, - "loss": 0.5606, - "step": 2395 - }, - { - "epoch": 0.08410116007651942, - "grad_norm": 0.7651694416999817, - "learning_rate": 3.991666666666667e-05, - "loss": 0.5567, - "step": 2396 - }, - { - "epoch": 0.0841362607276365, - "grad_norm": 0.8447965979576111, - "learning_rate": 3.993333333333333e-05, - "loss": 0.5478, - "step": 2397 - }, - { - "epoch": 0.08417136137875357, - "grad_norm": 0.8490372896194458, - "learning_rate": 3.995e-05, - "loss": 0.5992, - "step": 2398 - }, - { - "epoch": 0.08420646202987066, - "grad_norm": 0.8255260586738586, - "learning_rate": 3.996666666666667e-05, - "loss": 0.6317, - "step": 2399 - }, - { - "epoch": 0.08424156268098773, - "grad_norm": 0.7952541708946228, - "learning_rate": 3.9983333333333334e-05, - "loss": 0.5425, - "step": 2400 - }, - { - "epoch": 0.0842766633321048, - "grad_norm": 0.912874698638916, - "learning_rate": 4e-05, - "loss": 0.6481, - "step": 2401 - }, - { - "epoch": 0.08431176398322189, - "grad_norm": 0.8904337286949158, - "learning_rate": 4.0016666666666666e-05, - "loss": 0.5968, - "step": 2402 - }, - { - "epoch": 0.08434686463433896, - "grad_norm": 0.8512853384017944, - "learning_rate": 4.0033333333333335e-05, - "loss": 0.5387, - "step": 2403 - }, - { - "epoch": 0.08438196528545605, - "grad_norm": 0.9175641536712646, - "learning_rate": 4.0050000000000004e-05, - "loss": 0.5954, - "step": 2404 - }, - { - "epoch": 0.08441706593657312, - "grad_norm": 0.67563396692276, - "learning_rate": 4.006666666666667e-05, - "loss": 0.6264, - "step": 2405 - }, - { - "epoch": 0.0844521665876902, - "grad_norm": 0.6931411027908325, - "learning_rate": 4.0083333333333336e-05, - "loss": 0.5661, - "step": 2406 - }, - { - "epoch": 0.08448726723880728, - "grad_norm": 0.7110955119132996, - "learning_rate": 4.0100000000000006e-05, - "loss": 0.3611, - "step": 2407 - }, - { - "epoch": 0.08452236788992436, - "grad_norm": 0.7594624161720276, - "learning_rate": 4.011666666666667e-05, - "loss": 0.6579, - "step": 2408 - }, - { - "epoch": 0.08455746854104143, - "grad_norm": 0.6691012978553772, - "learning_rate": 4.013333333333333e-05, - "loss": 0.5526, - "step": 2409 - }, - { - "epoch": 0.08459256919215852, - "grad_norm": 0.6471546292304993, - "learning_rate": 4.015000000000001e-05, - "loss": 0.4351, - "step": 2410 - }, - { - "epoch": 0.08462766984327559, - "grad_norm": 0.7325376868247986, - "learning_rate": 4.016666666666667e-05, - "loss": 0.4086, - "step": 2411 - }, - { - "epoch": 0.08466277049439268, - "grad_norm": 0.7475745677947998, - "learning_rate": 4.018333333333333e-05, - "loss": 0.4674, - "step": 2412 - }, - { - "epoch": 0.08469787114550975, - "grad_norm": 0.7345950603485107, - "learning_rate": 4.02e-05, - "loss": 0.4611, - "step": 2413 - }, - { - "epoch": 0.08473297179662682, - "grad_norm": 0.8580730557441711, - "learning_rate": 4.021666666666667e-05, - "loss": 0.5784, - "step": 2414 - }, - { - "epoch": 0.08476807244774391, - "grad_norm": 0.6650412082672119, - "learning_rate": 4.023333333333333e-05, - "loss": 0.4036, - "step": 2415 - }, - { - "epoch": 0.08480317309886098, - "grad_norm": 0.6672228574752808, - "learning_rate": 4.025e-05, - "loss": 0.4568, - "step": 2416 - }, - { - "epoch": 0.08483827374997806, - "grad_norm": 0.9235221743583679, - "learning_rate": 4.026666666666667e-05, - "loss": 0.6251, - "step": 2417 - }, - { - "epoch": 0.08487337440109514, - "grad_norm": 0.7988741397857666, - "learning_rate": 4.0283333333333334e-05, - "loss": 0.5318, - "step": 2418 - }, - { - "epoch": 0.08490847505221222, - "grad_norm": 0.7415744066238403, - "learning_rate": 4.0300000000000004e-05, - "loss": 0.5565, - "step": 2419 - }, - { - "epoch": 0.08494357570332929, - "grad_norm": 0.7187644839286804, - "learning_rate": 4.0316666666666666e-05, - "loss": 0.5691, - "step": 2420 - }, - { - "epoch": 0.08497867635444638, - "grad_norm": 0.6821092963218689, - "learning_rate": 4.0333333333333336e-05, - "loss": 0.588, - "step": 2421 - }, - { - "epoch": 0.08501377700556345, - "grad_norm": 0.7220823168754578, - "learning_rate": 4.0350000000000005e-05, - "loss": 0.5536, - "step": 2422 - }, - { - "epoch": 0.08504887765668054, - "grad_norm": 0.7581253051757812, - "learning_rate": 4.036666666666667e-05, - "loss": 0.582, - "step": 2423 - }, - { - "epoch": 0.08508397830779761, - "grad_norm": 0.5757447481155396, - "learning_rate": 4.038333333333334e-05, - "loss": 0.6375, - "step": 2424 - }, - { - "epoch": 0.08511907895891468, - "grad_norm": 0.6429451704025269, - "learning_rate": 4.0400000000000006e-05, - "loss": 0.466, - "step": 2425 - }, - { - "epoch": 0.08515417961003177, - "grad_norm": 0.8387114405632019, - "learning_rate": 4.041666666666667e-05, - "loss": 0.4785, - "step": 2426 - }, - { - "epoch": 0.08518928026114884, - "grad_norm": 0.6611273288726807, - "learning_rate": 4.043333333333333e-05, - "loss": 0.6025, - "step": 2427 - }, - { - "epoch": 0.08522438091226592, - "grad_norm": 0.6814500093460083, - "learning_rate": 4.045000000000001e-05, - "loss": 0.6107, - "step": 2428 - }, - { - "epoch": 0.085259481563383, - "grad_norm": 0.6847195625305176, - "learning_rate": 4.046666666666667e-05, - "loss": 0.6309, - "step": 2429 - }, - { - "epoch": 0.08529458221450008, - "grad_norm": 0.8384342789649963, - "learning_rate": 4.048333333333333e-05, - "loss": 0.6582, - "step": 2430 - }, - { - "epoch": 0.08532968286561715, - "grad_norm": 0.7498215436935425, - "learning_rate": 4.05e-05, - "loss": 0.5537, - "step": 2431 - }, - { - "epoch": 0.08536478351673424, - "grad_norm": 0.7326194643974304, - "learning_rate": 4.051666666666667e-05, - "loss": 0.3982, - "step": 2432 - }, - { - "epoch": 0.08539988416785131, - "grad_norm": 0.6498636603355408, - "learning_rate": 4.0533333333333334e-05, - "loss": 0.5507, - "step": 2433 - }, - { - "epoch": 0.0854349848189684, - "grad_norm": 0.7056525945663452, - "learning_rate": 4.055e-05, - "loss": 0.5795, - "step": 2434 - }, - { - "epoch": 0.08547008547008547, - "grad_norm": 0.6815606951713562, - "learning_rate": 4.056666666666667e-05, - "loss": 0.6259, - "step": 2435 - }, - { - "epoch": 0.08550518612120254, - "grad_norm": 0.7273479700088501, - "learning_rate": 4.0583333333333335e-05, - "loss": 0.5033, - "step": 2436 - }, - { - "epoch": 0.08554028677231963, - "grad_norm": 0.6475807428359985, - "learning_rate": 4.0600000000000004e-05, - "loss": 0.4824, - "step": 2437 - }, - { - "epoch": 0.0855753874234367, - "grad_norm": 0.7082532644271851, - "learning_rate": 4.061666666666667e-05, - "loss": 0.5938, - "step": 2438 - }, - { - "epoch": 0.08561048807455378, - "grad_norm": 0.6900319457054138, - "learning_rate": 4.0633333333333336e-05, - "loss": 0.5592, - "step": 2439 - }, - { - "epoch": 0.08564558872567087, - "grad_norm": 0.6771111488342285, - "learning_rate": 4.065e-05, - "loss": 0.4894, - "step": 2440 - }, - { - "epoch": 0.08568068937678794, - "grad_norm": 0.9472949504852295, - "learning_rate": 4.066666666666667e-05, - "loss": 0.5407, - "step": 2441 - }, - { - "epoch": 0.08571579002790501, - "grad_norm": 0.7385139465332031, - "learning_rate": 4.068333333333334e-05, - "loss": 0.4126, - "step": 2442 - }, - { - "epoch": 0.0857508906790221, - "grad_norm": 0.7839069366455078, - "learning_rate": 4.07e-05, - "loss": 0.63, - "step": 2443 - }, - { - "epoch": 0.08578599133013917, - "grad_norm": 0.6328029632568359, - "learning_rate": 4.071666666666667e-05, - "loss": 0.4959, - "step": 2444 - }, - { - "epoch": 0.08582109198125626, - "grad_norm": 0.697523832321167, - "learning_rate": 4.073333333333333e-05, - "loss": 0.6168, - "step": 2445 - }, - { - "epoch": 0.08585619263237333, - "grad_norm": 0.898102343082428, - "learning_rate": 4.075e-05, - "loss": 0.5903, - "step": 2446 - }, - { - "epoch": 0.0858912932834904, - "grad_norm": 0.701063334941864, - "learning_rate": 4.076666666666667e-05, - "loss": 0.5589, - "step": 2447 - }, - { - "epoch": 0.08592639393460749, - "grad_norm": 0.7042207717895508, - "learning_rate": 4.078333333333333e-05, - "loss": 0.5814, - "step": 2448 - }, - { - "epoch": 0.08596149458572457, - "grad_norm": 0.8321166634559631, - "learning_rate": 4.08e-05, - "loss": 0.5422, - "step": 2449 - }, - { - "epoch": 0.08599659523684164, - "grad_norm": 0.90151047706604, - "learning_rate": 4.081666666666667e-05, - "loss": 0.6377, - "step": 2450 - }, - { - "epoch": 0.08603169588795873, - "grad_norm": 0.6720162034034729, - "learning_rate": 4.0833333333333334e-05, - "loss": 0.5517, - "step": 2451 - }, - { - "epoch": 0.0860667965390758, - "grad_norm": 0.6451565623283386, - "learning_rate": 4.085e-05, - "loss": 0.5627, - "step": 2452 - }, - { - "epoch": 0.08610189719019287, - "grad_norm": 0.8113126754760742, - "learning_rate": 4.086666666666667e-05, - "loss": 0.5155, - "step": 2453 - }, - { - "epoch": 0.08613699784130996, - "grad_norm": 0.6410455703735352, - "learning_rate": 4.0883333333333335e-05, - "loss": 0.582, - "step": 2454 - }, - { - "epoch": 0.08617209849242703, - "grad_norm": 0.8279411196708679, - "learning_rate": 4.09e-05, - "loss": 0.5584, - "step": 2455 - }, - { - "epoch": 0.08620719914354412, - "grad_norm": 0.6895954012870789, - "learning_rate": 4.091666666666667e-05, - "loss": 0.5853, - "step": 2456 - }, - { - "epoch": 0.08624229979466119, - "grad_norm": 0.7134543657302856, - "learning_rate": 4.093333333333334e-05, - "loss": 0.5376, - "step": 2457 - }, - { - "epoch": 0.08627740044577827, - "grad_norm": 0.5527628064155579, - "learning_rate": 4.095e-05, - "loss": 0.5465, - "step": 2458 - }, - { - "epoch": 0.08631250109689535, - "grad_norm": 0.9429477453231812, - "learning_rate": 4.096666666666667e-05, - "loss": 0.6193, - "step": 2459 - }, - { - "epoch": 0.08634760174801243, - "grad_norm": 0.8874958157539368, - "learning_rate": 4.098333333333334e-05, - "loss": 0.6654, - "step": 2460 - }, - { - "epoch": 0.0863827023991295, - "grad_norm": 0.6251516342163086, - "learning_rate": 4.1e-05, - "loss": 0.6294, - "step": 2461 - }, - { - "epoch": 0.08641780305024659, - "grad_norm": 0.8232916593551636, - "learning_rate": 4.101666666666667e-05, - "loss": 0.522, - "step": 2462 - }, - { - "epoch": 0.08645290370136366, - "grad_norm": 0.8793765902519226, - "learning_rate": 4.103333333333333e-05, - "loss": 0.5341, - "step": 2463 - }, - { - "epoch": 0.08648800435248073, - "grad_norm": 0.8470953106880188, - "learning_rate": 4.105e-05, - "loss": 0.5724, - "step": 2464 - }, - { - "epoch": 0.08652310500359782, - "grad_norm": 0.6992962956428528, - "learning_rate": 4.106666666666667e-05, - "loss": 0.4312, - "step": 2465 - }, - { - "epoch": 0.08655820565471489, - "grad_norm": 0.802210807800293, - "learning_rate": 4.1083333333333334e-05, - "loss": 0.5868, - "step": 2466 - }, - { - "epoch": 0.08659330630583198, - "grad_norm": 0.6599482297897339, - "learning_rate": 4.11e-05, - "loss": 0.5695, - "step": 2467 - }, - { - "epoch": 0.08662840695694905, - "grad_norm": 0.8056846261024475, - "learning_rate": 4.111666666666667e-05, - "loss": 0.6047, - "step": 2468 - }, - { - "epoch": 0.08666350760806613, - "grad_norm": 0.664377748966217, - "learning_rate": 4.1133333333333335e-05, - "loss": 0.5408, - "step": 2469 - }, - { - "epoch": 0.08669860825918321, - "grad_norm": 0.7053443789482117, - "learning_rate": 4.115e-05, - "loss": 0.5418, - "step": 2470 - }, - { - "epoch": 0.08673370891030029, - "grad_norm": 0.6932326555252075, - "learning_rate": 4.116666666666667e-05, - "loss": 0.5635, - "step": 2471 - }, - { - "epoch": 0.08676880956141736, - "grad_norm": 0.6610416769981384, - "learning_rate": 4.1183333333333336e-05, - "loss": 0.4635, - "step": 2472 - }, - { - "epoch": 0.08680391021253445, - "grad_norm": 0.6999430656433105, - "learning_rate": 4.12e-05, - "loss": 0.4422, - "step": 2473 - }, - { - "epoch": 0.08683901086365152, - "grad_norm": 0.7630894184112549, - "learning_rate": 4.121666666666667e-05, - "loss": 0.5902, - "step": 2474 - }, - { - "epoch": 0.08687411151476859, - "grad_norm": 0.8787630200386047, - "learning_rate": 4.123333333333334e-05, - "loss": 0.5897, - "step": 2475 - }, - { - "epoch": 0.08690921216588568, - "grad_norm": 0.7308582663536072, - "learning_rate": 4.125e-05, - "loss": 0.5674, - "step": 2476 - }, - { - "epoch": 0.08694431281700275, - "grad_norm": 0.7342019081115723, - "learning_rate": 4.126666666666667e-05, - "loss": 0.426, - "step": 2477 - }, - { - "epoch": 0.08697941346811984, - "grad_norm": 0.6050177216529846, - "learning_rate": 4.128333333333333e-05, - "loss": 0.4957, - "step": 2478 - }, - { - "epoch": 0.08701451411923691, - "grad_norm": 0.7296125292778015, - "learning_rate": 4.13e-05, - "loss": 0.4706, - "step": 2479 - }, - { - "epoch": 0.08704961477035399, - "grad_norm": 0.703024685382843, - "learning_rate": 4.131666666666667e-05, - "loss": 0.5361, - "step": 2480 - }, - { - "epoch": 0.08708471542147107, - "grad_norm": 0.908774197101593, - "learning_rate": 4.133333333333333e-05, - "loss": 0.5465, - "step": 2481 - }, - { - "epoch": 0.08711981607258815, - "grad_norm": 0.7785829901695251, - "learning_rate": 4.135e-05, - "loss": 0.5366, - "step": 2482 - }, - { - "epoch": 0.08715491672370522, - "grad_norm": 0.8898082971572876, - "learning_rate": 4.136666666666667e-05, - "loss": 0.5785, - "step": 2483 - }, - { - "epoch": 0.0871900173748223, - "grad_norm": 0.750759482383728, - "learning_rate": 4.1383333333333334e-05, - "loss": 0.5981, - "step": 2484 - }, - { - "epoch": 0.08722511802593938, - "grad_norm": 0.7883542776107788, - "learning_rate": 4.14e-05, - "loss": 0.5971, - "step": 2485 - }, - { - "epoch": 0.08726021867705645, - "grad_norm": 0.9240295886993408, - "learning_rate": 4.141666666666667e-05, - "loss": 0.5601, - "step": 2486 - }, - { - "epoch": 0.08729531932817354, - "grad_norm": 0.8082268834114075, - "learning_rate": 4.1433333333333335e-05, - "loss": 0.6398, - "step": 2487 - }, - { - "epoch": 0.08733041997929061, - "grad_norm": 0.7449907660484314, - "learning_rate": 4.145e-05, - "loss": 0.4597, - "step": 2488 - }, - { - "epoch": 0.0873655206304077, - "grad_norm": 0.6862127780914307, - "learning_rate": 4.146666666666667e-05, - "loss": 0.6014, - "step": 2489 - }, - { - "epoch": 0.08740062128152477, - "grad_norm": 0.5955716967582703, - "learning_rate": 4.1483333333333337e-05, - "loss": 0.5272, - "step": 2490 - }, - { - "epoch": 0.08743572193264185, - "grad_norm": 1.038345456123352, - "learning_rate": 4.15e-05, - "loss": 0.6098, - "step": 2491 - }, - { - "epoch": 0.08747082258375893, - "grad_norm": 0.6355138421058655, - "learning_rate": 4.151666666666667e-05, - "loss": 0.5662, - "step": 2492 - }, - { - "epoch": 0.087505923234876, - "grad_norm": 0.6959314346313477, - "learning_rate": 4.153333333333334e-05, - "loss": 0.6242, - "step": 2493 - }, - { - "epoch": 0.08754102388599308, - "grad_norm": 0.6592915058135986, - "learning_rate": 4.155e-05, - "loss": 0.5965, - "step": 2494 - }, - { - "epoch": 0.08757612453711017, - "grad_norm": 0.8016124367713928, - "learning_rate": 4.156666666666667e-05, - "loss": 0.5108, - "step": 2495 - }, - { - "epoch": 0.08761122518822724, - "grad_norm": 0.9338623285293579, - "learning_rate": 4.158333333333333e-05, - "loss": 0.6171, - "step": 2496 - }, - { - "epoch": 0.08764632583934431, - "grad_norm": 1.0530892610549927, - "learning_rate": 4.16e-05, - "loss": 0.5557, - "step": 2497 - }, - { - "epoch": 0.0876814264904614, - "grad_norm": 0.6329345107078552, - "learning_rate": 4.161666666666667e-05, - "loss": 0.5422, - "step": 2498 - }, - { - "epoch": 0.08771652714157847, - "grad_norm": 0.6518897414207458, - "learning_rate": 4.1633333333333333e-05, - "loss": 0.5337, - "step": 2499 - }, - { - "epoch": 0.08775162779269556, - "grad_norm": 0.7592974901199341, - "learning_rate": 4.165e-05, - "loss": 0.61, - "step": 2500 - }, - { - "epoch": 0.08778672844381263, - "grad_norm": 0.6995757222175598, - "learning_rate": 4.166666666666667e-05, - "loss": 0.5346, - "step": 2501 - }, - { - "epoch": 0.08782182909492971, - "grad_norm": 0.9000418782234192, - "learning_rate": 4.1683333333333335e-05, - "loss": 0.6444, - "step": 2502 - }, - { - "epoch": 0.0878569297460468, - "grad_norm": 0.7109729647636414, - "learning_rate": 4.17e-05, - "loss": 0.635, - "step": 2503 - }, - { - "epoch": 0.08789203039716387, - "grad_norm": 0.7527068257331848, - "learning_rate": 4.171666666666667e-05, - "loss": 0.5243, - "step": 2504 - }, - { - "epoch": 0.08792713104828094, - "grad_norm": 0.7282922863960266, - "learning_rate": 4.1733333333333336e-05, - "loss": 0.5259, - "step": 2505 - }, - { - "epoch": 0.08796223169939803, - "grad_norm": 0.8327860236167908, - "learning_rate": 4.175e-05, - "loss": 0.5889, - "step": 2506 - }, - { - "epoch": 0.0879973323505151, - "grad_norm": 0.6968662142753601, - "learning_rate": 4.176666666666667e-05, - "loss": 0.6064, - "step": 2507 - }, - { - "epoch": 0.08803243300163217, - "grad_norm": 0.9057127237319946, - "learning_rate": 4.178333333333334e-05, - "loss": 0.5565, - "step": 2508 - }, - { - "epoch": 0.08806753365274926, - "grad_norm": 0.8575260043144226, - "learning_rate": 4.18e-05, - "loss": 0.5464, - "step": 2509 - }, - { - "epoch": 0.08810263430386633, - "grad_norm": 0.6735507249832153, - "learning_rate": 4.181666666666667e-05, - "loss": 0.5503, - "step": 2510 - }, - { - "epoch": 0.08813773495498342, - "grad_norm": 0.6560800075531006, - "learning_rate": 4.183333333333334e-05, - "loss": 0.5763, - "step": 2511 - }, - { - "epoch": 0.0881728356061005, - "grad_norm": 0.6627293229103088, - "learning_rate": 4.185e-05, - "loss": 0.5656, - "step": 2512 - }, - { - "epoch": 0.08820793625721757, - "grad_norm": 0.6440991759300232, - "learning_rate": 4.186666666666667e-05, - "loss": 0.4115, - "step": 2513 - }, - { - "epoch": 0.08824303690833465, - "grad_norm": 0.8348912000656128, - "learning_rate": 4.188333333333333e-05, - "loss": 0.6263, - "step": 2514 - }, - { - "epoch": 0.08827813755945173, - "grad_norm": 0.8451579213142395, - "learning_rate": 4.19e-05, - "loss": 0.5787, - "step": 2515 - }, - { - "epoch": 0.0883132382105688, - "grad_norm": 0.7366482615470886, - "learning_rate": 4.191666666666667e-05, - "loss": 0.4428, - "step": 2516 - }, - { - "epoch": 0.08834833886168589, - "grad_norm": 0.5577110052108765, - "learning_rate": 4.1933333333333334e-05, - "loss": 0.4075, - "step": 2517 - }, - { - "epoch": 0.08838343951280296, - "grad_norm": 0.6520463824272156, - "learning_rate": 4.195e-05, - "loss": 0.6248, - "step": 2518 - }, - { - "epoch": 0.08841854016392003, - "grad_norm": 0.9133307337760925, - "learning_rate": 4.196666666666667e-05, - "loss": 0.5335, - "step": 2519 - }, - { - "epoch": 0.08845364081503712, - "grad_norm": 0.7297801375389099, - "learning_rate": 4.1983333333333335e-05, - "loss": 0.4593, - "step": 2520 - }, - { - "epoch": 0.0884887414661542, - "grad_norm": 0.6633549928665161, - "learning_rate": 4.2e-05, - "loss": 0.6221, - "step": 2521 - }, - { - "epoch": 0.08852384211727128, - "grad_norm": 0.7260506749153137, - "learning_rate": 4.2016666666666674e-05, - "loss": 0.5638, - "step": 2522 - }, - { - "epoch": 0.08855894276838835, - "grad_norm": 0.7439989447593689, - "learning_rate": 4.2033333333333336e-05, - "loss": 0.5576, - "step": 2523 - }, - { - "epoch": 0.08859404341950543, - "grad_norm": 0.714114785194397, - "learning_rate": 4.205e-05, - "loss": 0.4715, - "step": 2524 - }, - { - "epoch": 0.08862914407062251, - "grad_norm": 0.7002841830253601, - "learning_rate": 4.206666666666667e-05, - "loss": 0.5764, - "step": 2525 - }, - { - "epoch": 0.08866424472173959, - "grad_norm": 0.6846678853034973, - "learning_rate": 4.208333333333334e-05, - "loss": 0.5437, - "step": 2526 - }, - { - "epoch": 0.08869934537285666, - "grad_norm": 0.798199474811554, - "learning_rate": 4.21e-05, - "loss": 0.6348, - "step": 2527 - }, - { - "epoch": 0.08873444602397375, - "grad_norm": 0.7236700654029846, - "learning_rate": 4.211666666666667e-05, - "loss": 0.5322, - "step": 2528 - }, - { - "epoch": 0.08876954667509082, - "grad_norm": 0.6753543615341187, - "learning_rate": 4.213333333333334e-05, - "loss": 0.5128, - "step": 2529 - }, - { - "epoch": 0.0888046473262079, - "grad_norm": 0.7070162296295166, - "learning_rate": 4.215e-05, - "loss": 0.5847, - "step": 2530 - }, - { - "epoch": 0.08883974797732498, - "grad_norm": 0.7884312868118286, - "learning_rate": 4.216666666666667e-05, - "loss": 0.6224, - "step": 2531 - }, - { - "epoch": 0.08887484862844205, - "grad_norm": 0.5992742776870728, - "learning_rate": 4.218333333333333e-05, - "loss": 0.6268, - "step": 2532 - }, - { - "epoch": 0.08890994927955914, - "grad_norm": 0.7348172664642334, - "learning_rate": 4.22e-05, - "loss": 0.5522, - "step": 2533 - }, - { - "epoch": 0.08894504993067621, - "grad_norm": 0.7309276461601257, - "learning_rate": 4.221666666666667e-05, - "loss": 0.4783, - "step": 2534 - }, - { - "epoch": 0.08898015058179329, - "grad_norm": 0.5966473817825317, - "learning_rate": 4.2233333333333334e-05, - "loss": 0.465, - "step": 2535 - }, - { - "epoch": 0.08901525123291038, - "grad_norm": 0.6354944109916687, - "learning_rate": 4.2250000000000004e-05, - "loss": 0.656, - "step": 2536 - }, - { - "epoch": 0.08905035188402745, - "grad_norm": 0.6773764491081238, - "learning_rate": 4.226666666666667e-05, - "loss": 0.5918, - "step": 2537 - }, - { - "epoch": 0.08908545253514452, - "grad_norm": 0.6622337102890015, - "learning_rate": 4.2283333333333336e-05, - "loss": 0.5168, - "step": 2538 - }, - { - "epoch": 0.08912055318626161, - "grad_norm": 0.6476207971572876, - "learning_rate": 4.23e-05, - "loss": 0.5315, - "step": 2539 - }, - { - "epoch": 0.08915565383737868, - "grad_norm": 0.6963481903076172, - "learning_rate": 4.2316666666666674e-05, - "loss": 0.6279, - "step": 2540 - }, - { - "epoch": 0.08919075448849575, - "grad_norm": 0.6867284774780273, - "learning_rate": 4.233333333333334e-05, - "loss": 0.5552, - "step": 2541 - }, - { - "epoch": 0.08922585513961284, - "grad_norm": 0.7501063942909241, - "learning_rate": 4.235e-05, - "loss": 0.5593, - "step": 2542 - }, - { - "epoch": 0.08926095579072992, - "grad_norm": 0.7013742923736572, - "learning_rate": 4.236666666666667e-05, - "loss": 0.5145, - "step": 2543 - }, - { - "epoch": 0.089296056441847, - "grad_norm": 0.6915982365608215, - "learning_rate": 4.238333333333334e-05, - "loss": 0.5562, - "step": 2544 - }, - { - "epoch": 0.08933115709296408, - "grad_norm": 0.6582024693489075, - "learning_rate": 4.24e-05, - "loss": 0.4972, - "step": 2545 - }, - { - "epoch": 0.08936625774408115, - "grad_norm": 0.6858877539634705, - "learning_rate": 4.241666666666667e-05, - "loss": 0.5865, - "step": 2546 - }, - { - "epoch": 0.08940135839519824, - "grad_norm": 0.7726238965988159, - "learning_rate": 4.243333333333334e-05, - "loss": 0.682, - "step": 2547 - }, - { - "epoch": 0.08943645904631531, - "grad_norm": 0.6227906942367554, - "learning_rate": 4.245e-05, - "loss": 0.5373, - "step": 2548 - }, - { - "epoch": 0.08947155969743238, - "grad_norm": 0.6766523122787476, - "learning_rate": 4.246666666666667e-05, - "loss": 0.5754, - "step": 2549 - }, - { - "epoch": 0.08950666034854947, - "grad_norm": 0.6514480710029602, - "learning_rate": 4.2483333333333334e-05, - "loss": 0.5127, - "step": 2550 - }, - { - "epoch": 0.08954176099966654, - "grad_norm": 0.845949113368988, - "learning_rate": 4.25e-05, - "loss": 0.4961, - "step": 2551 - }, - { - "epoch": 0.08957686165078362, - "grad_norm": 0.8868273496627808, - "learning_rate": 4.251666666666667e-05, - "loss": 0.5699, - "step": 2552 - }, - { - "epoch": 0.0896119623019007, - "grad_norm": 0.738898515701294, - "learning_rate": 4.2533333333333335e-05, - "loss": 0.5738, - "step": 2553 - }, - { - "epoch": 0.08964706295301778, - "grad_norm": 0.761687695980072, - "learning_rate": 4.2550000000000004e-05, - "loss": 0.598, - "step": 2554 - }, - { - "epoch": 0.08968216360413486, - "grad_norm": 0.7953792214393616, - "learning_rate": 4.2566666666666674e-05, - "loss": 0.6142, - "step": 2555 - }, - { - "epoch": 0.08971726425525194, - "grad_norm": 0.655120849609375, - "learning_rate": 4.2583333333333336e-05, - "loss": 0.5396, - "step": 2556 - }, - { - "epoch": 0.08975236490636901, - "grad_norm": 0.8023091554641724, - "learning_rate": 4.26e-05, - "loss": 0.6208, - "step": 2557 - }, - { - "epoch": 0.0897874655574861, - "grad_norm": 0.8692325949668884, - "learning_rate": 4.261666666666667e-05, - "loss": 0.5275, - "step": 2558 - }, - { - "epoch": 0.08982256620860317, - "grad_norm": 0.7006253004074097, - "learning_rate": 4.263333333333334e-05, - "loss": 0.6092, - "step": 2559 - }, - { - "epoch": 0.08985766685972024, - "grad_norm": 0.638313353061676, - "learning_rate": 4.265e-05, - "loss": 0.501, - "step": 2560 - }, - { - "epoch": 0.08989276751083733, - "grad_norm": 0.7721964716911316, - "learning_rate": 4.266666666666667e-05, - "loss": 0.4721, - "step": 2561 - }, - { - "epoch": 0.0899278681619544, - "grad_norm": 0.8489258289337158, - "learning_rate": 4.268333333333334e-05, - "loss": 0.6211, - "step": 2562 - }, - { - "epoch": 0.08996296881307149, - "grad_norm": 0.7676384449005127, - "learning_rate": 4.27e-05, - "loss": 0.6091, - "step": 2563 - }, - { - "epoch": 0.08999806946418856, - "grad_norm": 0.6678386926651001, - "learning_rate": 4.2716666666666664e-05, - "loss": 0.4734, - "step": 2564 - }, - { - "epoch": 0.09003317011530564, - "grad_norm": 0.7306602001190186, - "learning_rate": 4.273333333333333e-05, - "loss": 0.5729, - "step": 2565 - }, - { - "epoch": 0.09006827076642272, - "grad_norm": 0.8476908802986145, - "learning_rate": 4.275e-05, - "loss": 0.544, - "step": 2566 - }, - { - "epoch": 0.0901033714175398, - "grad_norm": 0.6399706602096558, - "learning_rate": 4.2766666666666665e-05, - "loss": 0.4296, - "step": 2567 - }, - { - "epoch": 0.09013847206865687, - "grad_norm": 0.8751822113990784, - "learning_rate": 4.2783333333333334e-05, - "loss": 0.5951, - "step": 2568 - }, - { - "epoch": 0.09017357271977396, - "grad_norm": 0.7030075192451477, - "learning_rate": 4.2800000000000004e-05, - "loss": 0.5538, - "step": 2569 - }, - { - "epoch": 0.09020867337089103, - "grad_norm": 0.8252291083335876, - "learning_rate": 4.2816666666666666e-05, - "loss": 0.4613, - "step": 2570 - }, - { - "epoch": 0.0902437740220081, - "grad_norm": 0.727439284324646, - "learning_rate": 4.2833333333333335e-05, - "loss": 0.6025, - "step": 2571 - }, - { - "epoch": 0.09027887467312519, - "grad_norm": 0.6818931698799133, - "learning_rate": 4.285e-05, - "loss": 0.558, - "step": 2572 - }, - { - "epoch": 0.09031397532424226, - "grad_norm": 0.8012758493423462, - "learning_rate": 4.286666666666667e-05, - "loss": 0.5517, - "step": 2573 - }, - { - "epoch": 0.09034907597535935, - "grad_norm": 0.8243826627731323, - "learning_rate": 4.288333333333334e-05, - "loss": 0.4649, - "step": 2574 - }, - { - "epoch": 0.09038417662647642, - "grad_norm": 0.6729589104652405, - "learning_rate": 4.29e-05, - "loss": 0.501, - "step": 2575 - }, - { - "epoch": 0.0904192772775935, - "grad_norm": 0.7642138004302979, - "learning_rate": 4.291666666666667e-05, - "loss": 0.6492, - "step": 2576 - }, - { - "epoch": 0.09045437792871058, - "grad_norm": 0.7365584969520569, - "learning_rate": 4.293333333333334e-05, - "loss": 0.4546, - "step": 2577 - }, - { - "epoch": 0.09048947857982766, - "grad_norm": 0.6718725562095642, - "learning_rate": 4.295e-05, - "loss": 0.5681, - "step": 2578 - }, - { - "epoch": 0.09052457923094473, - "grad_norm": 0.658265233039856, - "learning_rate": 4.296666666666666e-05, - "loss": 0.3992, - "step": 2579 - }, - { - "epoch": 0.09055967988206182, - "grad_norm": 0.6213750243186951, - "learning_rate": 4.298333333333334e-05, - "loss": 0.483, - "step": 2580 - }, - { - "epoch": 0.09059478053317889, - "grad_norm": 0.7259891033172607, - "learning_rate": 4.3e-05, - "loss": 0.5611, - "step": 2581 - }, - { - "epoch": 0.09062988118429596, - "grad_norm": 0.726986289024353, - "learning_rate": 4.3016666666666664e-05, - "loss": 0.4587, - "step": 2582 - }, - { - "epoch": 0.09066498183541305, - "grad_norm": 0.8851428031921387, - "learning_rate": 4.3033333333333334e-05, - "loss": 0.57, - "step": 2583 - }, - { - "epoch": 0.09070008248653012, - "grad_norm": 0.7349922060966492, - "learning_rate": 4.305e-05, - "loss": 0.5382, - "step": 2584 - }, - { - "epoch": 0.09073518313764721, - "grad_norm": 0.8317297697067261, - "learning_rate": 4.3066666666666665e-05, - "loss": 0.5938, - "step": 2585 - }, - { - "epoch": 0.09077028378876428, - "grad_norm": 0.714537501335144, - "learning_rate": 4.3083333333333335e-05, - "loss": 0.589, - "step": 2586 - }, - { - "epoch": 0.09080538443988136, - "grad_norm": 0.7151386141777039, - "learning_rate": 4.3100000000000004e-05, - "loss": 0.6766, - "step": 2587 - }, - { - "epoch": 0.09084048509099844, - "grad_norm": 0.6885514259338379, - "learning_rate": 4.311666666666667e-05, - "loss": 0.5778, - "step": 2588 - }, - { - "epoch": 0.09087558574211552, - "grad_norm": 0.6938579082489014, - "learning_rate": 4.3133333333333336e-05, - "loss": 0.6329, - "step": 2589 - }, - { - "epoch": 0.09091068639323259, - "grad_norm": 0.6717303991317749, - "learning_rate": 4.315e-05, - "loss": 0.5098, - "step": 2590 - }, - { - "epoch": 0.09094578704434968, - "grad_norm": 0.6527897715568542, - "learning_rate": 4.316666666666667e-05, - "loss": 0.5404, - "step": 2591 - }, - { - "epoch": 0.09098088769546675, - "grad_norm": 0.7306952476501465, - "learning_rate": 4.318333333333334e-05, - "loss": 0.4793, - "step": 2592 - }, - { - "epoch": 0.09101598834658382, - "grad_norm": 0.7674599885940552, - "learning_rate": 4.32e-05, - "loss": 0.5194, - "step": 2593 - }, - { - "epoch": 0.09105108899770091, - "grad_norm": 0.6781083345413208, - "learning_rate": 4.321666666666667e-05, - "loss": 0.579, - "step": 2594 - }, - { - "epoch": 0.09108618964881798, - "grad_norm": 0.7285536527633667, - "learning_rate": 4.323333333333334e-05, - "loss": 0.6091, - "step": 2595 - }, - { - "epoch": 0.09112129029993507, - "grad_norm": 0.835934042930603, - "learning_rate": 4.325e-05, - "loss": 0.6287, - "step": 2596 - }, - { - "epoch": 0.09115639095105214, - "grad_norm": 0.7045307755470276, - "learning_rate": 4.3266666666666664e-05, - "loss": 0.5876, - "step": 2597 - }, - { - "epoch": 0.09119149160216922, - "grad_norm": 0.7886543273925781, - "learning_rate": 4.328333333333334e-05, - "loss": 0.6049, - "step": 2598 - }, - { - "epoch": 0.0912265922532863, - "grad_norm": 0.6835835576057434, - "learning_rate": 4.33e-05, - "loss": 0.5129, - "step": 2599 - }, - { - "epoch": 0.09126169290440338, - "grad_norm": 0.718490719795227, - "learning_rate": 4.3316666666666665e-05, - "loss": 0.457, - "step": 2600 - }, - { - "epoch": 0.09129679355552045, - "grad_norm": 0.727260947227478, - "learning_rate": 4.3333333333333334e-05, - "loss": 0.4903, - "step": 2601 - }, - { - "epoch": 0.09133189420663754, - "grad_norm": 0.9537283778190613, - "learning_rate": 4.335e-05, - "loss": 0.6081, - "step": 2602 - }, - { - "epoch": 0.09136699485775461, - "grad_norm": 0.5571185946464539, - "learning_rate": 4.3366666666666666e-05, - "loss": 0.5435, - "step": 2603 - }, - { - "epoch": 0.09140209550887168, - "grad_norm": 0.6961998343467712, - "learning_rate": 4.3383333333333335e-05, - "loss": 0.5151, - "step": 2604 - }, - { - "epoch": 0.09143719615998877, - "grad_norm": 0.5736442804336548, - "learning_rate": 4.3400000000000005e-05, - "loss": 0.5511, - "step": 2605 - }, - { - "epoch": 0.09147229681110584, - "grad_norm": 0.626470685005188, - "learning_rate": 4.341666666666667e-05, - "loss": 0.5537, - "step": 2606 - }, - { - "epoch": 0.09150739746222293, - "grad_norm": 0.6622349619865417, - "learning_rate": 4.3433333333333336e-05, - "loss": 0.5234, - "step": 2607 - }, - { - "epoch": 0.09154249811334, - "grad_norm": 0.7772030830383301, - "learning_rate": 4.345e-05, - "loss": 0.6198, - "step": 2608 - }, - { - "epoch": 0.09157759876445708, - "grad_norm": 0.7392779588699341, - "learning_rate": 4.346666666666667e-05, - "loss": 0.5841, - "step": 2609 - }, - { - "epoch": 0.09161269941557416, - "grad_norm": 0.6466121077537537, - "learning_rate": 4.348333333333334e-05, - "loss": 0.626, - "step": 2610 - }, - { - "epoch": 0.09164780006669124, - "grad_norm": 0.6485302448272705, - "learning_rate": 4.35e-05, - "loss": 0.5507, - "step": 2611 - }, - { - "epoch": 0.09168290071780831, - "grad_norm": 0.6960524916648865, - "learning_rate": 4.351666666666667e-05, - "loss": 0.6187, - "step": 2612 - }, - { - "epoch": 0.0917180013689254, - "grad_norm": 0.7055206894874573, - "learning_rate": 4.353333333333334e-05, - "loss": 0.5568, - "step": 2613 - }, - { - "epoch": 0.09175310202004247, - "grad_norm": 0.8697220683097839, - "learning_rate": 4.355e-05, - "loss": 0.4365, - "step": 2614 - }, - { - "epoch": 0.09178820267115954, - "grad_norm": 0.7108759880065918, - "learning_rate": 4.3566666666666664e-05, - "loss": 0.4644, - "step": 2615 - }, - { - "epoch": 0.09182330332227663, - "grad_norm": 0.6188262104988098, - "learning_rate": 4.358333333333334e-05, - "loss": 0.4921, - "step": 2616 - }, - { - "epoch": 0.0918584039733937, - "grad_norm": 0.8458303809165955, - "learning_rate": 4.36e-05, - "loss": 0.5708, - "step": 2617 - }, - { - "epoch": 0.09189350462451079, - "grad_norm": 0.7081383466720581, - "learning_rate": 4.3616666666666665e-05, - "loss": 0.5067, - "step": 2618 - }, - { - "epoch": 0.09192860527562786, - "grad_norm": 0.5474262237548828, - "learning_rate": 4.3633333333333335e-05, - "loss": 0.4854, - "step": 2619 - }, - { - "epoch": 0.09196370592674494, - "grad_norm": 0.6053456664085388, - "learning_rate": 4.3650000000000004e-05, - "loss": 0.4435, - "step": 2620 - }, - { - "epoch": 0.09199880657786202, - "grad_norm": 0.6961937546730042, - "learning_rate": 4.3666666666666666e-05, - "loss": 0.6437, - "step": 2621 - }, - { - "epoch": 0.0920339072289791, - "grad_norm": 0.7558527588844299, - "learning_rate": 4.3683333333333336e-05, - "loss": 0.5025, - "step": 2622 - }, - { - "epoch": 0.09206900788009617, - "grad_norm": 0.6647387146949768, - "learning_rate": 4.3700000000000005e-05, - "loss": 0.5689, - "step": 2623 - }, - { - "epoch": 0.09210410853121326, - "grad_norm": 0.7135533690452576, - "learning_rate": 4.371666666666667e-05, - "loss": 0.389, - "step": 2624 - }, - { - "epoch": 0.09213920918233033, - "grad_norm": 0.7769681215286255, - "learning_rate": 4.373333333333334e-05, - "loss": 0.6227, - "step": 2625 - }, - { - "epoch": 0.0921743098334474, - "grad_norm": 0.9529311060905457, - "learning_rate": 4.375e-05, - "loss": 0.45, - "step": 2626 - }, - { - "epoch": 0.09220941048456449, - "grad_norm": 0.9330646991729736, - "learning_rate": 4.376666666666667e-05, - "loss": 0.5476, - "step": 2627 - }, - { - "epoch": 0.09224451113568156, - "grad_norm": 0.926125705242157, - "learning_rate": 4.378333333333334e-05, - "loss": 0.618, - "step": 2628 - }, - { - "epoch": 0.09227961178679865, - "grad_norm": 0.7334752082824707, - "learning_rate": 4.38e-05, - "loss": 0.4948, - "step": 2629 - }, - { - "epoch": 0.09231471243791572, - "grad_norm": 0.7631013989448547, - "learning_rate": 4.381666666666667e-05, - "loss": 0.6034, - "step": 2630 - }, - { - "epoch": 0.0923498130890328, - "grad_norm": 0.731987476348877, - "learning_rate": 4.383333333333334e-05, - "loss": 0.4188, - "step": 2631 - }, - { - "epoch": 0.09238491374014988, - "grad_norm": 0.7657735347747803, - "learning_rate": 4.385e-05, - "loss": 0.5318, - "step": 2632 - }, - { - "epoch": 0.09242001439126696, - "grad_norm": 0.7355462312698364, - "learning_rate": 4.3866666666666665e-05, - "loss": 0.5998, - "step": 2633 - }, - { - "epoch": 0.09245511504238403, - "grad_norm": 0.700467050075531, - "learning_rate": 4.388333333333334e-05, - "loss": 0.6403, - "step": 2634 - }, - { - "epoch": 0.09249021569350112, - "grad_norm": 0.634898841381073, - "learning_rate": 4.39e-05, - "loss": 0.6129, - "step": 2635 - }, - { - "epoch": 0.09252531634461819, - "grad_norm": 0.646999180316925, - "learning_rate": 4.3916666666666666e-05, - "loss": 0.5766, - "step": 2636 - }, - { - "epoch": 0.09256041699573526, - "grad_norm": 0.6841120719909668, - "learning_rate": 4.3933333333333335e-05, - "loss": 0.6421, - "step": 2637 - }, - { - "epoch": 0.09259551764685235, - "grad_norm": 0.7717334032058716, - "learning_rate": 4.3950000000000004e-05, - "loss": 0.5683, - "step": 2638 - }, - { - "epoch": 0.09263061829796942, - "grad_norm": 0.9416913986206055, - "learning_rate": 4.396666666666667e-05, - "loss": 0.6534, - "step": 2639 - }, - { - "epoch": 0.09266571894908651, - "grad_norm": 0.7571461796760559, - "learning_rate": 4.3983333333333336e-05, - "loss": 0.4083, - "step": 2640 - }, - { - "epoch": 0.09270081960020359, - "grad_norm": 0.7976679801940918, - "learning_rate": 4.4000000000000006e-05, - "loss": 0.6967, - "step": 2641 - }, - { - "epoch": 0.09273592025132066, - "grad_norm": 0.7388980388641357, - "learning_rate": 4.401666666666667e-05, - "loss": 0.6374, - "step": 2642 - }, - { - "epoch": 0.09277102090243775, - "grad_norm": 0.6196393370628357, - "learning_rate": 4.403333333333334e-05, - "loss": 0.5739, - "step": 2643 - }, - { - "epoch": 0.09280612155355482, - "grad_norm": 0.757447361946106, - "learning_rate": 4.405e-05, - "loss": 0.5015, - "step": 2644 - }, - { - "epoch": 0.09284122220467189, - "grad_norm": 0.7102518677711487, - "learning_rate": 4.406666666666667e-05, - "loss": 0.5428, - "step": 2645 - }, - { - "epoch": 0.09287632285578898, - "grad_norm": 0.7623802423477173, - "learning_rate": 4.408333333333334e-05, - "loss": 0.4731, - "step": 2646 - }, - { - "epoch": 0.09291142350690605, - "grad_norm": 0.7529585361480713, - "learning_rate": 4.41e-05, - "loss": 0.5376, - "step": 2647 - }, - { - "epoch": 0.09294652415802312, - "grad_norm": 0.8127328753471375, - "learning_rate": 4.411666666666667e-05, - "loss": 0.654, - "step": 2648 - }, - { - "epoch": 0.09298162480914021, - "grad_norm": 0.6153225302696228, - "learning_rate": 4.413333333333334e-05, - "loss": 0.6289, - "step": 2649 - }, - { - "epoch": 0.09301672546025729, - "grad_norm": 0.643932044506073, - "learning_rate": 4.415e-05, - "loss": 0.5732, - "step": 2650 - }, - { - "epoch": 0.09305182611137437, - "grad_norm": 0.6812381744384766, - "learning_rate": 4.4166666666666665e-05, - "loss": 0.5722, - "step": 2651 - }, - { - "epoch": 0.09308692676249145, - "grad_norm": 0.723708987236023, - "learning_rate": 4.4183333333333334e-05, - "loss": 0.6456, - "step": 2652 - }, - { - "epoch": 0.09312202741360852, - "grad_norm": 0.7173773050308228, - "learning_rate": 4.4200000000000004e-05, - "loss": 0.5988, - "step": 2653 - }, - { - "epoch": 0.0931571280647256, - "grad_norm": 0.6097594499588013, - "learning_rate": 4.4216666666666666e-05, - "loss": 0.5727, - "step": 2654 - }, - { - "epoch": 0.09319222871584268, - "grad_norm": 0.6757858991622925, - "learning_rate": 4.4233333333333336e-05, - "loss": 0.5759, - "step": 2655 - }, - { - "epoch": 0.09322732936695975, - "grad_norm": 0.670283317565918, - "learning_rate": 4.4250000000000005e-05, - "loss": 0.5358, - "step": 2656 - }, - { - "epoch": 0.09326243001807684, - "grad_norm": 0.7086200714111328, - "learning_rate": 4.426666666666667e-05, - "loss": 0.6162, - "step": 2657 - }, - { - "epoch": 0.09329753066919391, - "grad_norm": 0.8046087622642517, - "learning_rate": 4.428333333333334e-05, - "loss": 0.5813, - "step": 2658 - }, - { - "epoch": 0.09333263132031099, - "grad_norm": 0.6473639011383057, - "learning_rate": 4.43e-05, - "loss": 0.5652, - "step": 2659 - }, - { - "epoch": 0.09336773197142807, - "grad_norm": 0.8590714931488037, - "learning_rate": 4.431666666666667e-05, - "loss": 0.6375, - "step": 2660 - }, - { - "epoch": 0.09340283262254515, - "grad_norm": 0.591759443283081, - "learning_rate": 4.433333333333334e-05, - "loss": 0.5617, - "step": 2661 - }, - { - "epoch": 0.09343793327366223, - "grad_norm": 0.5920177698135376, - "learning_rate": 4.435e-05, - "loss": 0.5755, - "step": 2662 - }, - { - "epoch": 0.0934730339247793, - "grad_norm": 0.6698595285415649, - "learning_rate": 4.436666666666667e-05, - "loss": 0.6452, - "step": 2663 - }, - { - "epoch": 0.09350813457589638, - "grad_norm": 0.759893536567688, - "learning_rate": 4.438333333333334e-05, - "loss": 0.6383, - "step": 2664 - }, - { - "epoch": 0.09354323522701347, - "grad_norm": 0.5978102087974548, - "learning_rate": 4.44e-05, - "loss": 0.6124, - "step": 2665 - }, - { - "epoch": 0.09357833587813054, - "grad_norm": 0.7173013687133789, - "learning_rate": 4.4416666666666664e-05, - "loss": 0.5462, - "step": 2666 - }, - { - "epoch": 0.09361343652924761, - "grad_norm": 0.825029194355011, - "learning_rate": 4.443333333333334e-05, - "loss": 0.4773, - "step": 2667 - }, - { - "epoch": 0.0936485371803647, - "grad_norm": 0.5800117254257202, - "learning_rate": 4.445e-05, - "loss": 0.5727, - "step": 2668 - }, - { - "epoch": 0.09368363783148177, - "grad_norm": 0.6288424134254456, - "learning_rate": 4.4466666666666666e-05, - "loss": 0.5973, - "step": 2669 - }, - { - "epoch": 0.09371873848259885, - "grad_norm": 0.670842707157135, - "learning_rate": 4.4483333333333335e-05, - "loss": 0.511, - "step": 2670 - }, - { - "epoch": 0.09375383913371593, - "grad_norm": 0.6182045936584473, - "learning_rate": 4.4500000000000004e-05, - "loss": 0.4548, - "step": 2671 - }, - { - "epoch": 0.093788939784833, - "grad_norm": 0.6534830331802368, - "learning_rate": 4.451666666666667e-05, - "loss": 0.5852, - "step": 2672 - }, - { - "epoch": 0.09382404043595009, - "grad_norm": 0.5707035064697266, - "learning_rate": 4.4533333333333336e-05, - "loss": 0.6009, - "step": 2673 - }, - { - "epoch": 0.09385914108706717, - "grad_norm": 0.6514356732368469, - "learning_rate": 4.4550000000000005e-05, - "loss": 0.5513, - "step": 2674 - }, - { - "epoch": 0.09389424173818424, - "grad_norm": 0.6770808100700378, - "learning_rate": 4.456666666666667e-05, - "loss": 0.487, - "step": 2675 - }, - { - "epoch": 0.09392934238930133, - "grad_norm": 0.6938658952713013, - "learning_rate": 4.458333333333334e-05, - "loss": 0.55, - "step": 2676 - }, - { - "epoch": 0.0939644430404184, - "grad_norm": 0.6999446749687195, - "learning_rate": 4.46e-05, - "loss": 0.5226, - "step": 2677 - }, - { - "epoch": 0.09399954369153547, - "grad_norm": 0.6150930523872375, - "learning_rate": 4.461666666666667e-05, - "loss": 0.5449, - "step": 2678 - }, - { - "epoch": 0.09403464434265256, - "grad_norm": 0.7643689513206482, - "learning_rate": 4.463333333333334e-05, - "loss": 0.4732, - "step": 2679 - }, - { - "epoch": 0.09406974499376963, - "grad_norm": 0.5293813943862915, - "learning_rate": 4.465e-05, - "loss": 0.6218, - "step": 2680 - }, - { - "epoch": 0.0941048456448867, - "grad_norm": 0.7024825811386108, - "learning_rate": 4.466666666666667e-05, - "loss": 0.5612, - "step": 2681 - }, - { - "epoch": 0.09413994629600379, - "grad_norm": 0.78353351354599, - "learning_rate": 4.468333333333334e-05, - "loss": 0.3527, - "step": 2682 - }, - { - "epoch": 0.09417504694712087, - "grad_norm": 0.8565510511398315, - "learning_rate": 4.47e-05, - "loss": 0.5145, - "step": 2683 - }, - { - "epoch": 0.09421014759823795, - "grad_norm": 0.8242422342300415, - "learning_rate": 4.4716666666666665e-05, - "loss": 0.4912, - "step": 2684 - }, - { - "epoch": 0.09424524824935503, - "grad_norm": 0.9229152798652649, - "learning_rate": 4.473333333333334e-05, - "loss": 0.6247, - "step": 2685 - }, - { - "epoch": 0.0942803489004721, - "grad_norm": 0.7405350804328918, - "learning_rate": 4.4750000000000004e-05, - "loss": 0.4871, - "step": 2686 - }, - { - "epoch": 0.09431544955158919, - "grad_norm": 0.6970462799072266, - "learning_rate": 4.4766666666666666e-05, - "loss": 0.6596, - "step": 2687 - }, - { - "epoch": 0.09435055020270626, - "grad_norm": 0.7106477618217468, - "learning_rate": 4.4783333333333335e-05, - "loss": 0.5084, - "step": 2688 - }, - { - "epoch": 0.09438565085382333, - "grad_norm": 0.6873270869255066, - "learning_rate": 4.4800000000000005e-05, - "loss": 0.6065, - "step": 2689 - }, - { - "epoch": 0.09442075150494042, - "grad_norm": 0.6278554797172546, - "learning_rate": 4.481666666666667e-05, - "loss": 0.5908, - "step": 2690 - }, - { - "epoch": 0.0944558521560575, - "grad_norm": 0.7556796669960022, - "learning_rate": 4.483333333333333e-05, - "loss": 0.6044, - "step": 2691 - }, - { - "epoch": 0.09449095280717457, - "grad_norm": 0.6613122224807739, - "learning_rate": 4.4850000000000006e-05, - "loss": 0.5645, - "step": 2692 - }, - { - "epoch": 0.09452605345829165, - "grad_norm": 0.718525767326355, - "learning_rate": 4.486666666666667e-05, - "loss": 0.5924, - "step": 2693 - }, - { - "epoch": 0.09456115410940873, - "grad_norm": 0.6385703086853027, - "learning_rate": 4.488333333333333e-05, - "loss": 0.5006, - "step": 2694 - }, - { - "epoch": 0.09459625476052581, - "grad_norm": 0.7501863241195679, - "learning_rate": 4.49e-05, - "loss": 0.618, - "step": 2695 - }, - { - "epoch": 0.09463135541164289, - "grad_norm": 0.626573383808136, - "learning_rate": 4.491666666666667e-05, - "loss": 0.5664, - "step": 2696 - }, - { - "epoch": 0.09466645606275996, - "grad_norm": 0.6502801179885864, - "learning_rate": 4.493333333333333e-05, - "loss": 0.5717, - "step": 2697 - }, - { - "epoch": 0.09470155671387705, - "grad_norm": 0.6235475540161133, - "learning_rate": 4.495e-05, - "loss": 0.4549, - "step": 2698 - }, - { - "epoch": 0.09473665736499412, - "grad_norm": 0.6917382478713989, - "learning_rate": 4.496666666666667e-05, - "loss": 0.5319, - "step": 2699 - }, - { - "epoch": 0.0947717580161112, - "grad_norm": 0.7189970016479492, - "learning_rate": 4.4983333333333334e-05, - "loss": 0.53, - "step": 2700 - }, - { - "epoch": 0.09480685866722828, - "grad_norm": 0.5789535641670227, - "learning_rate": 4.5e-05, - "loss": 0.482, - "step": 2701 - }, - { - "epoch": 0.09484195931834535, - "grad_norm": 0.5161978006362915, - "learning_rate": 4.5016666666666665e-05, - "loss": 0.5671, - "step": 2702 - }, - { - "epoch": 0.09487705996946243, - "grad_norm": 0.6552004218101501, - "learning_rate": 4.5033333333333335e-05, - "loss": 0.561, - "step": 2703 - }, - { - "epoch": 0.09491216062057951, - "grad_norm": 0.7567232847213745, - "learning_rate": 4.5050000000000004e-05, - "loss": 0.5385, - "step": 2704 - }, - { - "epoch": 0.09494726127169659, - "grad_norm": 0.7214070558547974, - "learning_rate": 4.5066666666666667e-05, - "loss": 0.6248, - "step": 2705 - }, - { - "epoch": 0.09498236192281367, - "grad_norm": 0.6023358106613159, - "learning_rate": 4.5083333333333336e-05, - "loss": 0.4759, - "step": 2706 - }, - { - "epoch": 0.09501746257393075, - "grad_norm": 0.6131018400192261, - "learning_rate": 4.5100000000000005e-05, - "loss": 0.4312, - "step": 2707 - }, - { - "epoch": 0.09505256322504782, - "grad_norm": 0.589123547077179, - "learning_rate": 4.511666666666667e-05, - "loss": 0.6471, - "step": 2708 - }, - { - "epoch": 0.09508766387616491, - "grad_norm": 0.7670140266418457, - "learning_rate": 4.513333333333333e-05, - "loss": 0.4918, - "step": 2709 - }, - { - "epoch": 0.09512276452728198, - "grad_norm": 0.8858130574226379, - "learning_rate": 4.5150000000000006e-05, - "loss": 0.6334, - "step": 2710 - }, - { - "epoch": 0.09515786517839905, - "grad_norm": 0.6052109599113464, - "learning_rate": 4.516666666666667e-05, - "loss": 0.6436, - "step": 2711 - }, - { - "epoch": 0.09519296582951614, - "grad_norm": 0.6272503733634949, - "learning_rate": 4.518333333333333e-05, - "loss": 0.5887, - "step": 2712 - }, - { - "epoch": 0.09522806648063321, - "grad_norm": 0.7486005425453186, - "learning_rate": 4.52e-05, - "loss": 0.4941, - "step": 2713 - }, - { - "epoch": 0.09526316713175029, - "grad_norm": 0.6724404096603394, - "learning_rate": 4.521666666666667e-05, - "loss": 0.5094, - "step": 2714 - }, - { - "epoch": 0.09529826778286737, - "grad_norm": 0.6400233507156372, - "learning_rate": 4.523333333333333e-05, - "loss": 0.6204, - "step": 2715 - }, - { - "epoch": 0.09533336843398445, - "grad_norm": 0.6011627912521362, - "learning_rate": 4.525e-05, - "loss": 0.5792, - "step": 2716 - }, - { - "epoch": 0.09536846908510153, - "grad_norm": 0.6405631899833679, - "learning_rate": 4.526666666666667e-05, - "loss": 0.4448, - "step": 2717 - }, - { - "epoch": 0.09540356973621861, - "grad_norm": 0.7741261720657349, - "learning_rate": 4.5283333333333334e-05, - "loss": 0.463, - "step": 2718 - }, - { - "epoch": 0.09543867038733568, - "grad_norm": 0.9355655312538147, - "learning_rate": 4.53e-05, - "loss": 0.4567, - "step": 2719 - }, - { - "epoch": 0.09547377103845277, - "grad_norm": 0.8547265529632568, - "learning_rate": 4.5316666666666666e-05, - "loss": 0.6217, - "step": 2720 - }, - { - "epoch": 0.09550887168956984, - "grad_norm": 0.6662641167640686, - "learning_rate": 4.5333333333333335e-05, - "loss": 0.5225, - "step": 2721 - }, - { - "epoch": 0.09554397234068691, - "grad_norm": 0.7422149777412415, - "learning_rate": 4.5350000000000005e-05, - "loss": 0.5566, - "step": 2722 - }, - { - "epoch": 0.095579072991804, - "grad_norm": 0.7963353991508484, - "learning_rate": 4.536666666666667e-05, - "loss": 0.6717, - "step": 2723 - }, - { - "epoch": 0.09561417364292107, - "grad_norm": 0.6972795128822327, - "learning_rate": 4.5383333333333336e-05, - "loss": 0.5688, - "step": 2724 - }, - { - "epoch": 0.09564927429403816, - "grad_norm": 0.7453543543815613, - "learning_rate": 4.5400000000000006e-05, - "loss": 0.6312, - "step": 2725 - }, - { - "epoch": 0.09568437494515523, - "grad_norm": 0.6451449394226074, - "learning_rate": 4.541666666666667e-05, - "loss": 0.4842, - "step": 2726 - }, - { - "epoch": 0.09571947559627231, - "grad_norm": 0.6998327374458313, - "learning_rate": 4.543333333333333e-05, - "loss": 0.471, - "step": 2727 - }, - { - "epoch": 0.0957545762473894, - "grad_norm": 0.8091192245483398, - "learning_rate": 4.545000000000001e-05, - "loss": 0.6416, - "step": 2728 - }, - { - "epoch": 0.09578967689850647, - "grad_norm": 0.7667819857597351, - "learning_rate": 4.546666666666667e-05, - "loss": 0.4433, - "step": 2729 - }, - { - "epoch": 0.09582477754962354, - "grad_norm": 0.6079297065734863, - "learning_rate": 4.548333333333333e-05, - "loss": 0.5292, - "step": 2730 - }, - { - "epoch": 0.09585987820074063, - "grad_norm": 0.7188242077827454, - "learning_rate": 4.55e-05, - "loss": 0.6196, - "step": 2731 - }, - { - "epoch": 0.0958949788518577, - "grad_norm": 0.849205493927002, - "learning_rate": 4.551666666666667e-05, - "loss": 0.5927, - "step": 2732 - }, - { - "epoch": 0.09593007950297477, - "grad_norm": 0.6869365572929382, - "learning_rate": 4.553333333333333e-05, - "loss": 0.4854, - "step": 2733 - }, - { - "epoch": 0.09596518015409186, - "grad_norm": 0.872492790222168, - "learning_rate": 4.555e-05, - "loss": 0.5739, - "step": 2734 - }, - { - "epoch": 0.09600028080520893, - "grad_norm": 0.82460618019104, - "learning_rate": 4.556666666666667e-05, - "loss": 0.6424, - "step": 2735 - }, - { - "epoch": 0.09603538145632602, - "grad_norm": 0.6506249308586121, - "learning_rate": 4.5583333333333335e-05, - "loss": 0.5756, - "step": 2736 - }, - { - "epoch": 0.0960704821074431, - "grad_norm": 0.7644635438919067, - "learning_rate": 4.5600000000000004e-05, - "loss": 0.5584, - "step": 2737 - }, - { - "epoch": 0.09610558275856017, - "grad_norm": 0.6691493988037109, - "learning_rate": 4.5616666666666666e-05, - "loss": 0.5675, - "step": 2738 - }, - { - "epoch": 0.09614068340967726, - "grad_norm": 0.6079164743423462, - "learning_rate": 4.5633333333333336e-05, - "loss": 0.4578, - "step": 2739 - }, - { - "epoch": 0.09617578406079433, - "grad_norm": 0.6338250041007996, - "learning_rate": 4.5650000000000005e-05, - "loss": 0.5822, - "step": 2740 - }, - { - "epoch": 0.0962108847119114, - "grad_norm": 0.6763618588447571, - "learning_rate": 4.566666666666667e-05, - "loss": 0.5488, - "step": 2741 - }, - { - "epoch": 0.09624598536302849, - "grad_norm": 0.6810805201530457, - "learning_rate": 4.568333333333333e-05, - "loss": 0.488, - "step": 2742 - }, - { - "epoch": 0.09628108601414556, - "grad_norm": 0.7187394499778748, - "learning_rate": 4.5700000000000006e-05, - "loss": 0.5631, - "step": 2743 - }, - { - "epoch": 0.09631618666526263, - "grad_norm": 0.6526506543159485, - "learning_rate": 4.571666666666667e-05, - "loss": 0.505, - "step": 2744 - }, - { - "epoch": 0.09635128731637972, - "grad_norm": 0.6537788510322571, - "learning_rate": 4.573333333333333e-05, - "loss": 0.5198, - "step": 2745 - }, - { - "epoch": 0.0963863879674968, - "grad_norm": 0.7346085906028748, - "learning_rate": 4.575e-05, - "loss": 0.5308, - "step": 2746 - }, - { - "epoch": 0.09642148861861388, - "grad_norm": 0.5221970677375793, - "learning_rate": 4.576666666666667e-05, - "loss": 0.4886, - "step": 2747 - }, - { - "epoch": 0.09645658926973096, - "grad_norm": 0.7957785129547119, - "learning_rate": 4.578333333333333e-05, - "loss": 0.5634, - "step": 2748 - }, - { - "epoch": 0.09649168992084803, - "grad_norm": 0.7125843167304993, - "learning_rate": 4.58e-05, - "loss": 0.5405, - "step": 2749 - }, - { - "epoch": 0.09652679057196512, - "grad_norm": 0.750221312046051, - "learning_rate": 4.581666666666667e-05, - "loss": 0.5933, - "step": 2750 - }, - { - "epoch": 0.09656189122308219, - "grad_norm": 0.5304624438285828, - "learning_rate": 4.5833333333333334e-05, - "loss": 0.472, - "step": 2751 - }, - { - "epoch": 0.09659699187419926, - "grad_norm": 0.6788982152938843, - "learning_rate": 4.585e-05, - "loss": 0.4958, - "step": 2752 - }, - { - "epoch": 0.09663209252531635, - "grad_norm": 0.7003751993179321, - "learning_rate": 4.5866666666666666e-05, - "loss": 0.5693, - "step": 2753 - }, - { - "epoch": 0.09666719317643342, - "grad_norm": 0.6299011707305908, - "learning_rate": 4.5883333333333335e-05, - "loss": 0.4662, - "step": 2754 - }, - { - "epoch": 0.0967022938275505, - "grad_norm": 0.7687483429908752, - "learning_rate": 4.5900000000000004e-05, - "loss": 0.5212, - "step": 2755 - }, - { - "epoch": 0.09673739447866758, - "grad_norm": 0.6642699241638184, - "learning_rate": 4.591666666666667e-05, - "loss": 0.5151, - "step": 2756 - }, - { - "epoch": 0.09677249512978466, - "grad_norm": 0.7607114911079407, - "learning_rate": 4.5933333333333336e-05, - "loss": 0.5508, - "step": 2757 - }, - { - "epoch": 0.09680759578090174, - "grad_norm": 0.6598204970359802, - "learning_rate": 4.5950000000000006e-05, - "loss": 0.6122, - "step": 2758 - }, - { - "epoch": 0.09684269643201882, - "grad_norm": 0.7667787671089172, - "learning_rate": 4.596666666666667e-05, - "loss": 0.5179, - "step": 2759 - }, - { - "epoch": 0.09687779708313589, - "grad_norm": 0.7900807857513428, - "learning_rate": 4.598333333333333e-05, - "loss": 0.5027, - "step": 2760 - }, - { - "epoch": 0.09691289773425298, - "grad_norm": 0.7208337187767029, - "learning_rate": 4.600000000000001e-05, - "loss": 0.6284, - "step": 2761 - }, - { - "epoch": 0.09694799838537005, - "grad_norm": 0.6771347522735596, - "learning_rate": 4.601666666666667e-05, - "loss": 0.5663, - "step": 2762 - }, - { - "epoch": 0.09698309903648712, - "grad_norm": 0.6520406007766724, - "learning_rate": 4.603333333333333e-05, - "loss": 0.5435, - "step": 2763 - }, - { - "epoch": 0.09701819968760421, - "grad_norm": 0.6423272490501404, - "learning_rate": 4.605e-05, - "loss": 0.5247, - "step": 2764 - }, - { - "epoch": 0.09705330033872128, - "grad_norm": 0.7683864831924438, - "learning_rate": 4.606666666666667e-05, - "loss": 0.4699, - "step": 2765 - }, - { - "epoch": 0.09708840098983836, - "grad_norm": 0.693656861782074, - "learning_rate": 4.608333333333333e-05, - "loss": 0.5798, - "step": 2766 - }, - { - "epoch": 0.09712350164095544, - "grad_norm": 0.7854080200195312, - "learning_rate": 4.61e-05, - "loss": 0.6296, - "step": 2767 - }, - { - "epoch": 0.09715860229207252, - "grad_norm": 0.6789932250976562, - "learning_rate": 4.611666666666667e-05, - "loss": 0.5827, - "step": 2768 - }, - { - "epoch": 0.0971937029431896, - "grad_norm": 0.6182605624198914, - "learning_rate": 4.6133333333333334e-05, - "loss": 0.6304, - "step": 2769 - }, - { - "epoch": 0.09722880359430668, - "grad_norm": 0.7891863584518433, - "learning_rate": 4.6150000000000004e-05, - "loss": 0.5739, - "step": 2770 - }, - { - "epoch": 0.09726390424542375, - "grad_norm": 0.6538584232330322, - "learning_rate": 4.6166666666666666e-05, - "loss": 0.5895, - "step": 2771 - }, - { - "epoch": 0.09729900489654084, - "grad_norm": 0.6807616353034973, - "learning_rate": 4.6183333333333336e-05, - "loss": 0.5622, - "step": 2772 - }, - { - "epoch": 0.09733410554765791, - "grad_norm": 0.7434574961662292, - "learning_rate": 4.6200000000000005e-05, - "loss": 0.5198, - "step": 2773 - }, - { - "epoch": 0.09736920619877498, - "grad_norm": 0.6960951685905457, - "learning_rate": 4.621666666666667e-05, - "loss": 0.6101, - "step": 2774 - }, - { - "epoch": 0.09740430684989207, - "grad_norm": 0.650725781917572, - "learning_rate": 4.623333333333334e-05, - "loss": 0.4996, - "step": 2775 - }, - { - "epoch": 0.09743940750100914, - "grad_norm": 0.778532087802887, - "learning_rate": 4.6250000000000006e-05, - "loss": 0.5853, - "step": 2776 - }, - { - "epoch": 0.09747450815212622, - "grad_norm": 0.6690479516983032, - "learning_rate": 4.626666666666667e-05, - "loss": 0.535, - "step": 2777 - }, - { - "epoch": 0.0975096088032433, - "grad_norm": 0.7917090654373169, - "learning_rate": 4.628333333333333e-05, - "loss": 0.5029, - "step": 2778 - }, - { - "epoch": 0.09754470945436038, - "grad_norm": 0.692444384098053, - "learning_rate": 4.630000000000001e-05, - "loss": 0.477, - "step": 2779 - }, - { - "epoch": 0.09757981010547746, - "grad_norm": 0.6689179539680481, - "learning_rate": 4.631666666666667e-05, - "loss": 0.5686, - "step": 2780 - }, - { - "epoch": 0.09761491075659454, - "grad_norm": 0.715208888053894, - "learning_rate": 4.633333333333333e-05, - "loss": 0.5326, - "step": 2781 - }, - { - "epoch": 0.09765001140771161, - "grad_norm": 0.6990243792533875, - "learning_rate": 4.635e-05, - "loss": 0.6336, - "step": 2782 - }, - { - "epoch": 0.0976851120588287, - "grad_norm": 0.8440490365028381, - "learning_rate": 4.636666666666667e-05, - "loss": 0.4287, - "step": 2783 - }, - { - "epoch": 0.09772021270994577, - "grad_norm": 0.6148600578308105, - "learning_rate": 4.6383333333333334e-05, - "loss": 0.5988, - "step": 2784 - }, - { - "epoch": 0.09775531336106284, - "grad_norm": 0.702797532081604, - "learning_rate": 4.64e-05, - "loss": 0.6292, - "step": 2785 - }, - { - "epoch": 0.09779041401217993, - "grad_norm": 0.6686221957206726, - "learning_rate": 4.641666666666667e-05, - "loss": 0.518, - "step": 2786 - }, - { - "epoch": 0.097825514663297, - "grad_norm": 0.7075492739677429, - "learning_rate": 4.6433333333333335e-05, - "loss": 0.5425, - "step": 2787 - }, - { - "epoch": 0.09786061531441408, - "grad_norm": 0.6815154552459717, - "learning_rate": 4.6450000000000004e-05, - "loss": 0.5097, - "step": 2788 - }, - { - "epoch": 0.09789571596553116, - "grad_norm": 0.6945807337760925, - "learning_rate": 4.646666666666667e-05, - "loss": 0.5517, - "step": 2789 - }, - { - "epoch": 0.09793081661664824, - "grad_norm": 0.6198239326477051, - "learning_rate": 4.6483333333333336e-05, - "loss": 0.5357, - "step": 2790 - }, - { - "epoch": 0.09796591726776532, - "grad_norm": 0.6894412636756897, - "learning_rate": 4.6500000000000005e-05, - "loss": 0.6353, - "step": 2791 - }, - { - "epoch": 0.0980010179188824, - "grad_norm": 0.7672602534294128, - "learning_rate": 4.651666666666667e-05, - "loss": 0.5028, - "step": 2792 - }, - { - "epoch": 0.09803611856999947, - "grad_norm": 0.8373517394065857, - "learning_rate": 4.653333333333334e-05, - "loss": 0.423, - "step": 2793 - }, - { - "epoch": 0.09807121922111656, - "grad_norm": 0.6964385509490967, - "learning_rate": 4.655000000000001e-05, - "loss": 0.6123, - "step": 2794 - }, - { - "epoch": 0.09810631987223363, - "grad_norm": 0.6066370606422424, - "learning_rate": 4.656666666666667e-05, - "loss": 0.4363, - "step": 2795 - }, - { - "epoch": 0.0981414205233507, - "grad_norm": 0.7381938099861145, - "learning_rate": 4.658333333333333e-05, - "loss": 0.462, - "step": 2796 - }, - { - "epoch": 0.09817652117446779, - "grad_norm": 0.6768075823783875, - "learning_rate": 4.660000000000001e-05, - "loss": 0.5885, - "step": 2797 - }, - { - "epoch": 0.09821162182558486, - "grad_norm": 0.8107766509056091, - "learning_rate": 4.661666666666667e-05, - "loss": 0.5871, - "step": 2798 - }, - { - "epoch": 0.09824672247670194, - "grad_norm": 0.7111032605171204, - "learning_rate": 4.663333333333333e-05, - "loss": 0.5129, - "step": 2799 - }, - { - "epoch": 0.09828182312781902, - "grad_norm": 0.6218859553337097, - "learning_rate": 4.665e-05, - "loss": 0.5163, - "step": 2800 - }, - { - "epoch": 0.0983169237789361, - "grad_norm": 0.7160350680351257, - "learning_rate": 4.666666666666667e-05, - "loss": 0.5473, - "step": 2801 - }, - { - "epoch": 0.09835202443005318, - "grad_norm": 0.6916956305503845, - "learning_rate": 4.6683333333333334e-05, - "loss": 0.4316, - "step": 2802 - }, - { - "epoch": 0.09838712508117026, - "grad_norm": 0.6618161201477051, - "learning_rate": 4.6700000000000003e-05, - "loss": 0.6486, - "step": 2803 - }, - { - "epoch": 0.09842222573228733, - "grad_norm": 0.669982373714447, - "learning_rate": 4.671666666666667e-05, - "loss": 0.5701, - "step": 2804 - }, - { - "epoch": 0.09845732638340442, - "grad_norm": 0.6980327367782593, - "learning_rate": 4.6733333333333335e-05, - "loss": 0.436, - "step": 2805 - }, - { - "epoch": 0.09849242703452149, - "grad_norm": 0.6544129848480225, - "learning_rate": 4.6750000000000005e-05, - "loss": 0.6312, - "step": 2806 - }, - { - "epoch": 0.09852752768563856, - "grad_norm": 0.6224889755249023, - "learning_rate": 4.676666666666667e-05, - "loss": 0.5822, - "step": 2807 - }, - { - "epoch": 0.09856262833675565, - "grad_norm": 0.6049771904945374, - "learning_rate": 4.6783333333333337e-05, - "loss": 0.4748, - "step": 2808 - }, - { - "epoch": 0.09859772898787272, - "grad_norm": 0.6286863088607788, - "learning_rate": 4.6800000000000006e-05, - "loss": 0.4779, - "step": 2809 - }, - { - "epoch": 0.0986328296389898, - "grad_norm": 0.6159301400184631, - "learning_rate": 4.681666666666667e-05, - "loss": 0.5645, - "step": 2810 - }, - { - "epoch": 0.09866793029010688, - "grad_norm": 0.5812424421310425, - "learning_rate": 4.683333333333334e-05, - "loss": 0.5468, - "step": 2811 - }, - { - "epoch": 0.09870303094122396, - "grad_norm": 0.6692481637001038, - "learning_rate": 4.685000000000001e-05, - "loss": 0.5322, - "step": 2812 - }, - { - "epoch": 0.09873813159234104, - "grad_norm": 0.6440367698669434, - "learning_rate": 4.686666666666667e-05, - "loss": 0.613, - "step": 2813 - }, - { - "epoch": 0.09877323224345812, - "grad_norm": 0.6919503808021545, - "learning_rate": 4.688333333333333e-05, - "loss": 0.5298, - "step": 2814 - }, - { - "epoch": 0.09880833289457519, - "grad_norm": 0.6829193234443665, - "learning_rate": 4.69e-05, - "loss": 0.5078, - "step": 2815 - }, - { - "epoch": 0.09884343354569228, - "grad_norm": 0.6358184814453125, - "learning_rate": 4.691666666666667e-05, - "loss": 0.4827, - "step": 2816 - }, - { - "epoch": 0.09887853419680935, - "grad_norm": 0.6279439926147461, - "learning_rate": 4.6933333333333333e-05, - "loss": 0.4357, - "step": 2817 - }, - { - "epoch": 0.09891363484792642, - "grad_norm": 0.8188804388046265, - "learning_rate": 4.695e-05, - "loss": 0.5536, - "step": 2818 - }, - { - "epoch": 0.09894873549904351, - "grad_norm": 0.6322032809257507, - "learning_rate": 4.696666666666667e-05, - "loss": 0.5293, - "step": 2819 - }, - { - "epoch": 0.09898383615016058, - "grad_norm": 1.0169817209243774, - "learning_rate": 4.6983333333333335e-05, - "loss": 0.6205, - "step": 2820 - }, - { - "epoch": 0.09901893680127766, - "grad_norm": 0.6601396799087524, - "learning_rate": 4.7e-05, - "loss": 0.4417, - "step": 2821 - }, - { - "epoch": 0.09905403745239474, - "grad_norm": 0.7128034830093384, - "learning_rate": 4.701666666666667e-05, - "loss": 0.5627, - "step": 2822 - }, - { - "epoch": 0.09908913810351182, - "grad_norm": 0.7533126473426819, - "learning_rate": 4.7033333333333336e-05, - "loss": 0.6302, - "step": 2823 - }, - { - "epoch": 0.0991242387546289, - "grad_norm": 0.8674376010894775, - "learning_rate": 4.705e-05, - "loss": 0.5027, - "step": 2824 - }, - { - "epoch": 0.09915933940574598, - "grad_norm": 0.6870859265327454, - "learning_rate": 4.706666666666667e-05, - "loss": 0.5385, - "step": 2825 - }, - { - "epoch": 0.09919444005686305, - "grad_norm": 0.6536357402801514, - "learning_rate": 4.708333333333334e-05, - "loss": 0.5827, - "step": 2826 - }, - { - "epoch": 0.09922954070798014, - "grad_norm": 0.7247697114944458, - "learning_rate": 4.71e-05, - "loss": 0.52, - "step": 2827 - }, - { - "epoch": 0.09926464135909721, - "grad_norm": 0.7005322575569153, - "learning_rate": 4.711666666666667e-05, - "loss": 0.4475, - "step": 2828 - }, - { - "epoch": 0.09929974201021428, - "grad_norm": 0.6740981936454773, - "learning_rate": 4.713333333333333e-05, - "loss": 0.6563, - "step": 2829 - }, - { - "epoch": 0.09933484266133137, - "grad_norm": 0.8399416208267212, - "learning_rate": 4.715e-05, - "loss": 0.4827, - "step": 2830 - }, - { - "epoch": 0.09936994331244844, - "grad_norm": 0.7832150459289551, - "learning_rate": 4.716666666666667e-05, - "loss": 0.6623, - "step": 2831 - }, - { - "epoch": 0.09940504396356552, - "grad_norm": 0.6542021036148071, - "learning_rate": 4.718333333333333e-05, - "loss": 0.5074, - "step": 2832 - }, - { - "epoch": 0.0994401446146826, - "grad_norm": 0.6672087907791138, - "learning_rate": 4.72e-05, - "loss": 0.5044, - "step": 2833 - }, - { - "epoch": 0.09947524526579968, - "grad_norm": 1.2631577253341675, - "learning_rate": 4.721666666666667e-05, - "loss": 0.6977, - "step": 2834 - }, - { - "epoch": 0.09951034591691676, - "grad_norm": 0.7517412900924683, - "learning_rate": 4.7233333333333334e-05, - "loss": 0.4845, - "step": 2835 - }, - { - "epoch": 0.09954544656803384, - "grad_norm": 0.6209465861320496, - "learning_rate": 4.7249999999999997e-05, - "loss": 0.4593, - "step": 2836 - }, - { - "epoch": 0.09958054721915091, - "grad_norm": 0.5780912041664124, - "learning_rate": 4.726666666666667e-05, - "loss": 0.4552, - "step": 2837 - }, - { - "epoch": 0.099615647870268, - "grad_norm": 0.6587122678756714, - "learning_rate": 4.7283333333333335e-05, - "loss": 0.6563, - "step": 2838 - }, - { - "epoch": 0.09965074852138507, - "grad_norm": 0.7026985287666321, - "learning_rate": 4.73e-05, - "loss": 0.41, - "step": 2839 - }, - { - "epoch": 0.09968584917250214, - "grad_norm": 0.6990641355514526, - "learning_rate": 4.731666666666667e-05, - "loss": 0.5709, - "step": 2840 - }, - { - "epoch": 0.09972094982361923, - "grad_norm": 0.7025068998336792, - "learning_rate": 4.7333333333333336e-05, - "loss": 0.4942, - "step": 2841 - }, - { - "epoch": 0.0997560504747363, - "grad_norm": 0.7048470973968506, - "learning_rate": 4.735e-05, - "loss": 0.4795, - "step": 2842 - }, - { - "epoch": 0.09979115112585338, - "grad_norm": 0.8115831613540649, - "learning_rate": 4.736666666666667e-05, - "loss": 0.5806, - "step": 2843 - }, - { - "epoch": 0.09982625177697046, - "grad_norm": 0.6225636005401611, - "learning_rate": 4.738333333333334e-05, - "loss": 0.4607, - "step": 2844 - }, - { - "epoch": 0.09986135242808754, - "grad_norm": 0.6407011151313782, - "learning_rate": 4.74e-05, - "loss": 0.5865, - "step": 2845 - }, - { - "epoch": 0.09989645307920463, - "grad_norm": 0.8740142583847046, - "learning_rate": 4.741666666666667e-05, - "loss": 0.5679, - "step": 2846 - }, - { - "epoch": 0.0999315537303217, - "grad_norm": 0.660116970539093, - "learning_rate": 4.743333333333333e-05, - "loss": 0.4974, - "step": 2847 - }, - { - "epoch": 0.09996665438143877, - "grad_norm": 0.7601189613342285, - "learning_rate": 4.745e-05, - "loss": 0.5122, - "step": 2848 - }, - { - "epoch": 0.10000175503255586, - "grad_norm": 0.7605468034744263, - "learning_rate": 4.746666666666667e-05, - "loss": 0.5602, - "step": 2849 - }, - { - "epoch": 0.10003685568367293, - "grad_norm": 0.6459551453590393, - "learning_rate": 4.748333333333333e-05, - "loss": 0.5313, - "step": 2850 - }, - { - "epoch": 0.10007195633479, - "grad_norm": 0.8451781868934631, - "learning_rate": 4.75e-05, - "loss": 0.5883, - "step": 2851 - }, - { - "epoch": 0.10010705698590709, - "grad_norm": 0.7370774745941162, - "learning_rate": 4.751666666666667e-05, - "loss": 0.6367, - "step": 2852 - }, - { - "epoch": 0.10014215763702417, - "grad_norm": 0.6960186958312988, - "learning_rate": 4.7533333333333334e-05, - "loss": 0.5496, - "step": 2853 - }, - { - "epoch": 0.10017725828814124, - "grad_norm": 0.7016911506652832, - "learning_rate": 4.755e-05, - "loss": 0.5457, - "step": 2854 - }, - { - "epoch": 0.10021235893925833, - "grad_norm": 0.7205179333686829, - "learning_rate": 4.756666666666667e-05, - "loss": 0.5877, - "step": 2855 - }, - { - "epoch": 0.1002474595903754, - "grad_norm": 1.0431588888168335, - "learning_rate": 4.7583333333333336e-05, - "loss": 0.5242, - "step": 2856 - }, - { - "epoch": 0.10028256024149249, - "grad_norm": 0.5862993001937866, - "learning_rate": 4.76e-05, - "loss": 0.5468, - "step": 2857 - }, - { - "epoch": 0.10031766089260956, - "grad_norm": 0.7520979046821594, - "learning_rate": 4.761666666666667e-05, - "loss": 0.5756, - "step": 2858 - }, - { - "epoch": 0.10035276154372663, - "grad_norm": 0.8270582556724548, - "learning_rate": 4.763333333333334e-05, - "loss": 0.4714, - "step": 2859 - }, - { - "epoch": 0.10038786219484372, - "grad_norm": 0.7526752948760986, - "learning_rate": 4.765e-05, - "loss": 0.5083, - "step": 2860 - }, - { - "epoch": 0.10042296284596079, - "grad_norm": 0.6865964531898499, - "learning_rate": 4.766666666666667e-05, - "loss": 0.445, - "step": 2861 - }, - { - "epoch": 0.10045806349707787, - "grad_norm": 0.57756507396698, - "learning_rate": 4.768333333333334e-05, - "loss": 0.5293, - "step": 2862 - }, - { - "epoch": 0.10049316414819495, - "grad_norm": 0.6815480589866638, - "learning_rate": 4.77e-05, - "loss": 0.5401, - "step": 2863 - }, - { - "epoch": 0.10052826479931203, - "grad_norm": 0.7253823280334473, - "learning_rate": 4.771666666666667e-05, - "loss": 0.565, - "step": 2864 - }, - { - "epoch": 0.1005633654504291, - "grad_norm": 0.6658036708831787, - "learning_rate": 4.773333333333333e-05, - "loss": 0.5721, - "step": 2865 - }, - { - "epoch": 0.10059846610154619, - "grad_norm": 0.5818290114402771, - "learning_rate": 4.775e-05, - "loss": 0.4403, - "step": 2866 - }, - { - "epoch": 0.10063356675266326, - "grad_norm": 0.5628954768180847, - "learning_rate": 4.776666666666667e-05, - "loss": 0.3861, - "step": 2867 - }, - { - "epoch": 0.10066866740378035, - "grad_norm": 0.7717320322990417, - "learning_rate": 4.7783333333333334e-05, - "loss": 0.5101, - "step": 2868 - }, - { - "epoch": 0.10070376805489742, - "grad_norm": 0.6317097544670105, - "learning_rate": 4.78e-05, - "loss": 0.4988, - "step": 2869 - }, - { - "epoch": 0.10073886870601449, - "grad_norm": 0.7216052412986755, - "learning_rate": 4.781666666666667e-05, - "loss": 0.604, - "step": 2870 - }, - { - "epoch": 0.10077396935713158, - "grad_norm": 0.6070287823677063, - "learning_rate": 4.7833333333333335e-05, - "loss": 0.5549, - "step": 2871 - }, - { - "epoch": 0.10080907000824865, - "grad_norm": 0.806025505065918, - "learning_rate": 4.785e-05, - "loss": 0.5433, - "step": 2872 - }, - { - "epoch": 0.10084417065936573, - "grad_norm": 0.8454792499542236, - "learning_rate": 4.7866666666666674e-05, - "loss": 0.4907, - "step": 2873 - }, - { - "epoch": 0.10087927131048281, - "grad_norm": 0.6476381421089172, - "learning_rate": 4.7883333333333336e-05, - "loss": 0.6111, - "step": 2874 - }, - { - "epoch": 0.10091437196159989, - "grad_norm": 0.7004950642585754, - "learning_rate": 4.79e-05, - "loss": 0.6125, - "step": 2875 - }, - { - "epoch": 0.10094947261271697, - "grad_norm": 0.6373005509376526, - "learning_rate": 4.791666666666667e-05, - "loss": 0.5713, - "step": 2876 - }, - { - "epoch": 0.10098457326383405, - "grad_norm": 1.018912434577942, - "learning_rate": 4.793333333333334e-05, - "loss": 0.6371, - "step": 2877 - }, - { - "epoch": 0.10101967391495112, - "grad_norm": 0.7587532997131348, - "learning_rate": 4.795e-05, - "loss": 0.5666, - "step": 2878 - }, - { - "epoch": 0.1010547745660682, - "grad_norm": 0.7520046234130859, - "learning_rate": 4.796666666666667e-05, - "loss": 0.6942, - "step": 2879 - }, - { - "epoch": 0.10108987521718528, - "grad_norm": 0.693989098072052, - "learning_rate": 4.798333333333334e-05, - "loss": 0.5148, - "step": 2880 - }, - { - "epoch": 0.10112497586830235, - "grad_norm": 0.7826319932937622, - "learning_rate": 4.8e-05, - "loss": 0.4616, - "step": 2881 - }, - { - "epoch": 0.10116007651941944, - "grad_norm": 0.7451441884040833, - "learning_rate": 4.801666666666667e-05, - "loss": 0.5418, - "step": 2882 - }, - { - "epoch": 0.10119517717053651, - "grad_norm": 0.6215334534645081, - "learning_rate": 4.803333333333333e-05, - "loss": 0.449, - "step": 2883 - }, - { - "epoch": 0.10123027782165359, - "grad_norm": 0.6908101439476013, - "learning_rate": 4.805e-05, - "loss": 0.4081, - "step": 2884 - }, - { - "epoch": 0.10126537847277067, - "grad_norm": 0.6983919739723206, - "learning_rate": 4.806666666666667e-05, - "loss": 0.5867, - "step": 2885 - }, - { - "epoch": 0.10130047912388775, - "grad_norm": 0.5930376052856445, - "learning_rate": 4.8083333333333334e-05, - "loss": 0.6178, - "step": 2886 - }, - { - "epoch": 0.10133557977500483, - "grad_norm": 0.5716097950935364, - "learning_rate": 4.8100000000000004e-05, - "loss": 0.4678, - "step": 2887 - }, - { - "epoch": 0.1013706804261219, - "grad_norm": 0.7293802499771118, - "learning_rate": 4.811666666666667e-05, - "loss": 0.5717, - "step": 2888 - }, - { - "epoch": 0.10140578107723898, - "grad_norm": 0.5754700899124146, - "learning_rate": 4.8133333333333336e-05, - "loss": 0.4251, - "step": 2889 - }, - { - "epoch": 0.10144088172835607, - "grad_norm": 0.6425073742866516, - "learning_rate": 4.815e-05, - "loss": 0.4878, - "step": 2890 - }, - { - "epoch": 0.10147598237947314, - "grad_norm": 0.725957989692688, - "learning_rate": 4.8166666666666674e-05, - "loss": 0.4476, - "step": 2891 - }, - { - "epoch": 0.10151108303059021, - "grad_norm": 0.71188884973526, - "learning_rate": 4.818333333333334e-05, - "loss": 0.5554, - "step": 2892 - }, - { - "epoch": 0.1015461836817073, - "grad_norm": 0.6096182465553284, - "learning_rate": 4.82e-05, - "loss": 0.5406, - "step": 2893 - }, - { - "epoch": 0.10158128433282437, - "grad_norm": 0.6356244683265686, - "learning_rate": 4.821666666666667e-05, - "loss": 0.5081, - "step": 2894 - }, - { - "epoch": 0.10161638498394145, - "grad_norm": 0.8047494888305664, - "learning_rate": 4.823333333333334e-05, - "loss": 0.598, - "step": 2895 - }, - { - "epoch": 0.10165148563505853, - "grad_norm": 0.6052554249763489, - "learning_rate": 4.825e-05, - "loss": 0.4719, - "step": 2896 - }, - { - "epoch": 0.1016865862861756, - "grad_norm": 0.6990760564804077, - "learning_rate": 4.826666666666667e-05, - "loss": 0.6269, - "step": 2897 - }, - { - "epoch": 0.1017216869372927, - "grad_norm": 0.675819993019104, - "learning_rate": 4.828333333333334e-05, - "loss": 0.5236, - "step": 2898 - }, - { - "epoch": 0.10175678758840977, - "grad_norm": 0.5972265601158142, - "learning_rate": 4.83e-05, - "loss": 0.3915, - "step": 2899 - }, - { - "epoch": 0.10179188823952684, - "grad_norm": 0.5433881282806396, - "learning_rate": 4.831666666666667e-05, - "loss": 0.5001, - "step": 2900 - }, - { - "epoch": 0.10182698889064393, - "grad_norm": 0.6600993871688843, - "learning_rate": 4.8333333333333334e-05, - "loss": 0.7097, - "step": 2901 - }, - { - "epoch": 0.101862089541761, - "grad_norm": 0.6581844091415405, - "learning_rate": 4.835e-05, - "loss": 0.5862, - "step": 2902 - }, - { - "epoch": 0.10189719019287807, - "grad_norm": 0.6303026676177979, - "learning_rate": 4.836666666666667e-05, - "loss": 0.5088, - "step": 2903 - }, - { - "epoch": 0.10193229084399516, - "grad_norm": 0.6541187763214111, - "learning_rate": 4.8383333333333335e-05, - "loss": 0.5972, - "step": 2904 - }, - { - "epoch": 0.10196739149511223, - "grad_norm": 0.6858148574829102, - "learning_rate": 4.8400000000000004e-05, - "loss": 0.5818, - "step": 2905 - }, - { - "epoch": 0.1020024921462293, - "grad_norm": 0.6088797450065613, - "learning_rate": 4.8416666666666673e-05, - "loss": 0.5075, - "step": 2906 - }, - { - "epoch": 0.1020375927973464, - "grad_norm": 0.7248406410217285, - "learning_rate": 4.8433333333333336e-05, - "loss": 0.6048, - "step": 2907 - }, - { - "epoch": 0.10207269344846347, - "grad_norm": 0.6527935862541199, - "learning_rate": 4.845e-05, - "loss": 0.4615, - "step": 2908 - }, - { - "epoch": 0.10210779409958055, - "grad_norm": 0.6892026662826538, - "learning_rate": 4.8466666666666675e-05, - "loss": 0.4726, - "step": 2909 - }, - { - "epoch": 0.10214289475069763, - "grad_norm": 0.6045893430709839, - "learning_rate": 4.848333333333334e-05, - "loss": 0.4231, - "step": 2910 - }, - { - "epoch": 0.1021779954018147, - "grad_norm": 0.7701821327209473, - "learning_rate": 4.85e-05, - "loss": 0.4309, - "step": 2911 - }, - { - "epoch": 0.10221309605293179, - "grad_norm": 0.8160079121589661, - "learning_rate": 4.851666666666667e-05, - "loss": 0.6775, - "step": 2912 - }, - { - "epoch": 0.10224819670404886, - "grad_norm": 0.6486470699310303, - "learning_rate": 4.853333333333334e-05, - "loss": 0.5508, - "step": 2913 - }, - { - "epoch": 0.10228329735516593, - "grad_norm": 0.883394718170166, - "learning_rate": 4.855e-05, - "loss": 0.585, - "step": 2914 - }, - { - "epoch": 0.10231839800628302, - "grad_norm": 0.687179446220398, - "learning_rate": 4.856666666666667e-05, - "loss": 0.5054, - "step": 2915 - }, - { - "epoch": 0.1023534986574001, - "grad_norm": 0.6792639493942261, - "learning_rate": 4.858333333333333e-05, - "loss": 0.5935, - "step": 2916 - }, - { - "epoch": 0.10238859930851717, - "grad_norm": 0.683747410774231, - "learning_rate": 4.86e-05, - "loss": 0.4815, - "step": 2917 - }, - { - "epoch": 0.10242369995963425, - "grad_norm": 0.7211899161338806, - "learning_rate": 4.861666666666667e-05, - "loss": 0.548, - "step": 2918 - }, - { - "epoch": 0.10245880061075133, - "grad_norm": 0.7007673978805542, - "learning_rate": 4.8633333333333334e-05, - "loss": 0.5023, - "step": 2919 - }, - { - "epoch": 0.10249390126186841, - "grad_norm": 0.96873539686203, - "learning_rate": 4.8650000000000003e-05, - "loss": 0.5939, - "step": 2920 - }, - { - "epoch": 0.10252900191298549, - "grad_norm": 0.8816741108894348, - "learning_rate": 4.866666666666667e-05, - "loss": 0.6045, - "step": 2921 - }, - { - "epoch": 0.10256410256410256, - "grad_norm": 0.6406329274177551, - "learning_rate": 4.8683333333333335e-05, - "loss": 0.4548, - "step": 2922 - }, - { - "epoch": 0.10259920321521965, - "grad_norm": 0.6410990357398987, - "learning_rate": 4.87e-05, - "loss": 0.5467, - "step": 2923 - }, - { - "epoch": 0.10263430386633672, - "grad_norm": 0.6164471507072449, - "learning_rate": 4.8716666666666674e-05, - "loss": 0.5036, - "step": 2924 - }, - { - "epoch": 0.1026694045174538, - "grad_norm": 0.7089288234710693, - "learning_rate": 4.8733333333333337e-05, - "loss": 0.5655, - "step": 2925 - }, - { - "epoch": 0.10270450516857088, - "grad_norm": 1.3691662549972534, - "learning_rate": 4.875e-05, - "loss": 0.6766, - "step": 2926 - }, - { - "epoch": 0.10273960581968795, - "grad_norm": 0.6338435411453247, - "learning_rate": 4.876666666666667e-05, - "loss": 0.6146, - "step": 2927 - }, - { - "epoch": 0.10277470647080503, - "grad_norm": 0.6471400260925293, - "learning_rate": 4.878333333333334e-05, - "loss": 0.4877, - "step": 2928 - }, - { - "epoch": 0.10280980712192211, - "grad_norm": 0.659509003162384, - "learning_rate": 4.88e-05, - "loss": 0.608, - "step": 2929 - }, - { - "epoch": 0.10284490777303919, - "grad_norm": 0.6619380116462708, - "learning_rate": 4.881666666666667e-05, - "loss": 0.5301, - "step": 2930 - }, - { - "epoch": 0.10288000842415627, - "grad_norm": 0.7228443622589111, - "learning_rate": 4.883333333333334e-05, - "loss": 0.5656, - "step": 2931 - }, - { - "epoch": 0.10291510907527335, - "grad_norm": 0.590498149394989, - "learning_rate": 4.885e-05, - "loss": 0.5237, - "step": 2932 - }, - { - "epoch": 0.10295020972639042, - "grad_norm": 0.6266579627990723, - "learning_rate": 4.886666666666667e-05, - "loss": 0.61, - "step": 2933 - }, - { - "epoch": 0.10298531037750751, - "grad_norm": 0.8121914863586426, - "learning_rate": 4.8883333333333333e-05, - "loss": 0.5302, - "step": 2934 - }, - { - "epoch": 0.10302041102862458, - "grad_norm": 0.6263863444328308, - "learning_rate": 4.89e-05, - "loss": 0.5497, - "step": 2935 - }, - { - "epoch": 0.10305551167974165, - "grad_norm": 1.051953911781311, - "learning_rate": 4.891666666666667e-05, - "loss": 0.6468, - "step": 2936 - }, - { - "epoch": 0.10309061233085874, - "grad_norm": 0.7298235297203064, - "learning_rate": 4.8933333333333335e-05, - "loss": 0.5453, - "step": 2937 - }, - { - "epoch": 0.10312571298197581, - "grad_norm": 0.682137131690979, - "learning_rate": 4.8950000000000004e-05, - "loss": 0.5411, - "step": 2938 - }, - { - "epoch": 0.10316081363309289, - "grad_norm": 0.6014999747276306, - "learning_rate": 4.8966666666666667e-05, - "loss": 0.5007, - "step": 2939 - }, - { - "epoch": 0.10319591428420997, - "grad_norm": 0.5795685052871704, - "learning_rate": 4.8983333333333336e-05, - "loss": 0.5439, - "step": 2940 - }, - { - "epoch": 0.10323101493532705, - "grad_norm": 1.051537275314331, - "learning_rate": 4.9e-05, - "loss": 0.5773, - "step": 2941 - }, - { - "epoch": 0.10326611558644413, - "grad_norm": 0.6638875007629395, - "learning_rate": 4.901666666666667e-05, - "loss": 0.5789, - "step": 2942 - }, - { - "epoch": 0.10330121623756121, - "grad_norm": 0.7315919995307922, - "learning_rate": 4.903333333333334e-05, - "loss": 0.5641, - "step": 2943 - }, - { - "epoch": 0.10333631688867828, - "grad_norm": 0.7672233581542969, - "learning_rate": 4.905e-05, - "loss": 0.4652, - "step": 2944 - }, - { - "epoch": 0.10337141753979537, - "grad_norm": 0.7486818432807922, - "learning_rate": 4.906666666666667e-05, - "loss": 0.4275, - "step": 2945 - }, - { - "epoch": 0.10340651819091244, - "grad_norm": 0.5999351143836975, - "learning_rate": 4.908333333333334e-05, - "loss": 0.5384, - "step": 2946 - }, - { - "epoch": 0.10344161884202951, - "grad_norm": 0.7558861970901489, - "learning_rate": 4.91e-05, - "loss": 0.5971, - "step": 2947 - }, - { - "epoch": 0.1034767194931466, - "grad_norm": 0.818730890750885, - "learning_rate": 4.9116666666666663e-05, - "loss": 0.6555, - "step": 2948 - }, - { - "epoch": 0.10351182014426367, - "grad_norm": 0.7085680365562439, - "learning_rate": 4.913333333333334e-05, - "loss": 0.6477, - "step": 2949 - }, - { - "epoch": 0.10354692079538075, - "grad_norm": 0.7544760704040527, - "learning_rate": 4.915e-05, - "loss": 0.5201, - "step": 2950 - }, - { - "epoch": 0.10358202144649784, - "grad_norm": 0.6897989511489868, - "learning_rate": 4.9166666666666665e-05, - "loss": 0.5992, - "step": 2951 - }, - { - "epoch": 0.10361712209761491, - "grad_norm": 0.6799570322036743, - "learning_rate": 4.9183333333333334e-05, - "loss": 0.4089, - "step": 2952 - }, - { - "epoch": 0.103652222748732, - "grad_norm": 0.5951196551322937, - "learning_rate": 4.92e-05, - "loss": 0.5272, - "step": 2953 - }, - { - "epoch": 0.10368732339984907, - "grad_norm": 0.7390590906143188, - "learning_rate": 4.9216666666666666e-05, - "loss": 0.4836, - "step": 2954 - }, - { - "epoch": 0.10372242405096614, - "grad_norm": 0.6598067879676819, - "learning_rate": 4.9233333333333335e-05, - "loss": 0.4181, - "step": 2955 - }, - { - "epoch": 0.10375752470208323, - "grad_norm": 0.7463845610618591, - "learning_rate": 4.9250000000000004e-05, - "loss": 0.5488, - "step": 2956 - }, - { - "epoch": 0.1037926253532003, - "grad_norm": 1.0755946636199951, - "learning_rate": 4.926666666666667e-05, - "loss": 0.5879, - "step": 2957 - }, - { - "epoch": 0.10382772600431737, - "grad_norm": 0.6041322350502014, - "learning_rate": 4.9283333333333336e-05, - "loss": 0.5147, - "step": 2958 - }, - { - "epoch": 0.10386282665543446, - "grad_norm": 0.7376183867454529, - "learning_rate": 4.93e-05, - "loss": 0.6356, - "step": 2959 - }, - { - "epoch": 0.10389792730655154, - "grad_norm": 0.7502118349075317, - "learning_rate": 4.931666666666667e-05, - "loss": 0.559, - "step": 2960 - }, - { - "epoch": 0.10393302795766861, - "grad_norm": 0.663476288318634, - "learning_rate": 4.933333333333334e-05, - "loss": 0.6335, - "step": 2961 - }, - { - "epoch": 0.1039681286087857, - "grad_norm": 0.65740567445755, - "learning_rate": 4.935e-05, - "loss": 0.529, - "step": 2962 - }, - { - "epoch": 0.10400322925990277, - "grad_norm": 0.6586572527885437, - "learning_rate": 4.936666666666667e-05, - "loss": 0.4976, - "step": 2963 - }, - { - "epoch": 0.10403832991101986, - "grad_norm": 0.8447552919387817, - "learning_rate": 4.938333333333334e-05, - "loss": 0.6991, - "step": 2964 - }, - { - "epoch": 0.10407343056213693, - "grad_norm": 0.7009012699127197, - "learning_rate": 4.94e-05, - "loss": 0.5012, - "step": 2965 - }, - { - "epoch": 0.104108531213254, - "grad_norm": 0.5254390835762024, - "learning_rate": 4.9416666666666664e-05, - "loss": 0.4634, - "step": 2966 - }, - { - "epoch": 0.10414363186437109, - "grad_norm": 0.7568767070770264, - "learning_rate": 4.943333333333334e-05, - "loss": 0.5595, - "step": 2967 - }, - { - "epoch": 0.10417873251548816, - "grad_norm": 0.8155142664909363, - "learning_rate": 4.945e-05, - "loss": 0.5512, - "step": 2968 - }, - { - "epoch": 0.10421383316660524, - "grad_norm": 0.5824988484382629, - "learning_rate": 4.9466666666666665e-05, - "loss": 0.3507, - "step": 2969 - }, - { - "epoch": 0.10424893381772232, - "grad_norm": 0.6394709348678589, - "learning_rate": 4.9483333333333334e-05, - "loss": 0.5277, - "step": 2970 - }, - { - "epoch": 0.1042840344688394, - "grad_norm": 0.6002989411354065, - "learning_rate": 4.9500000000000004e-05, - "loss": 0.5062, - "step": 2971 - }, - { - "epoch": 0.10431913511995647, - "grad_norm": 0.7172956466674805, - "learning_rate": 4.9516666666666666e-05, - "loss": 0.6296, - "step": 2972 - }, - { - "epoch": 0.10435423577107356, - "grad_norm": 0.9529057145118713, - "learning_rate": 4.9533333333333336e-05, - "loss": 0.6383, - "step": 2973 - }, - { - "epoch": 0.10438933642219063, - "grad_norm": 0.7884171605110168, - "learning_rate": 4.9550000000000005e-05, - "loss": 0.4702, - "step": 2974 - }, - { - "epoch": 0.10442443707330772, - "grad_norm": 0.8065502643585205, - "learning_rate": 4.956666666666667e-05, - "loss": 0.5556, - "step": 2975 - }, - { - "epoch": 0.10445953772442479, - "grad_norm": 0.712104082107544, - "learning_rate": 4.958333333333334e-05, - "loss": 0.5108, - "step": 2976 - }, - { - "epoch": 0.10449463837554186, - "grad_norm": 0.6508561968803406, - "learning_rate": 4.96e-05, - "loss": 0.6127, - "step": 2977 - }, - { - "epoch": 0.10452973902665895, - "grad_norm": 0.629836916923523, - "learning_rate": 4.961666666666667e-05, - "loss": 0.6153, - "step": 2978 - }, - { - "epoch": 0.10456483967777602, - "grad_norm": 0.5880177617073059, - "learning_rate": 4.963333333333334e-05, - "loss": 0.6394, - "step": 2979 - }, - { - "epoch": 0.1045999403288931, - "grad_norm": 0.7795565724372864, - "learning_rate": 4.965e-05, - "loss": 0.5483, - "step": 2980 - }, - { - "epoch": 0.10463504098001018, - "grad_norm": 0.6918127536773682, - "learning_rate": 4.966666666666667e-05, - "loss": 0.6031, - "step": 2981 - }, - { - "epoch": 0.10467014163112726, - "grad_norm": 1.0652241706848145, - "learning_rate": 4.968333333333334e-05, - "loss": 0.5352, - "step": 2982 - }, - { - "epoch": 0.10470524228224433, - "grad_norm": 0.6096606850624084, - "learning_rate": 4.97e-05, - "loss": 0.5198, - "step": 2983 - }, - { - "epoch": 0.10474034293336142, - "grad_norm": 0.7173064947128296, - "learning_rate": 4.9716666666666664e-05, - "loss": 0.5443, - "step": 2984 - }, - { - "epoch": 0.10477544358447849, - "grad_norm": 0.7393389344215393, - "learning_rate": 4.973333333333334e-05, - "loss": 0.5594, - "step": 2985 - }, - { - "epoch": 0.10481054423559558, - "grad_norm": 0.6488310098648071, - "learning_rate": 4.975e-05, - "loss": 0.5515, - "step": 2986 - }, - { - "epoch": 0.10484564488671265, - "grad_norm": 0.6470093727111816, - "learning_rate": 4.9766666666666666e-05, - "loss": 0.5102, - "step": 2987 - }, - { - "epoch": 0.10488074553782972, - "grad_norm": 0.6964113116264343, - "learning_rate": 4.9783333333333335e-05, - "loss": 0.5356, - "step": 2988 - }, - { - "epoch": 0.10491584618894681, - "grad_norm": 0.616308331489563, - "learning_rate": 4.9800000000000004e-05, - "loss": 0.6119, - "step": 2989 - }, - { - "epoch": 0.10495094684006388, - "grad_norm": 0.6174660921096802, - "learning_rate": 4.981666666666667e-05, - "loss": 0.483, - "step": 2990 - }, - { - "epoch": 0.10498604749118096, - "grad_norm": 0.6972153782844543, - "learning_rate": 4.9833333333333336e-05, - "loss": 0.5117, - "step": 2991 - }, - { - "epoch": 0.10502114814229804, - "grad_norm": 0.6776761412620544, - "learning_rate": 4.9850000000000006e-05, - "loss": 0.5713, - "step": 2992 - }, - { - "epoch": 0.10505624879341512, - "grad_norm": 0.6164352297782898, - "learning_rate": 4.986666666666667e-05, - "loss": 0.4335, - "step": 2993 - }, - { - "epoch": 0.10509134944453219, - "grad_norm": 0.7239603400230408, - "learning_rate": 4.988333333333334e-05, - "loss": 0.6429, - "step": 2994 - }, - { - "epoch": 0.10512645009564928, - "grad_norm": 0.8498432636260986, - "learning_rate": 4.99e-05, - "loss": 0.585, - "step": 2995 - }, - { - "epoch": 0.10516155074676635, - "grad_norm": 0.716417133808136, - "learning_rate": 4.991666666666667e-05, - "loss": 0.4957, - "step": 2996 - }, - { - "epoch": 0.10519665139788344, - "grad_norm": 0.8459712862968445, - "learning_rate": 4.993333333333334e-05, - "loss": 0.6081, - "step": 2997 - }, - { - "epoch": 0.10523175204900051, - "grad_norm": 0.6196990013122559, - "learning_rate": 4.995e-05, - "loss": 0.5128, - "step": 2998 - }, - { - "epoch": 0.10526685270011758, - "grad_norm": 1.0793451070785522, - "learning_rate": 4.996666666666667e-05, - "loss": 0.5936, - "step": 2999 - }, - { - "epoch": 0.10530195335123467, - "grad_norm": 0.6978861689567566, - "learning_rate": 4.998333333333334e-05, - "loss": 0.4356, - "step": 3000 - }, - { - "epoch": 0.10533705400235174, - "grad_norm": 0.8536586761474609, - "learning_rate": 5e-05, - "loss": 0.5466, - "step": 3001 - }, - { - "epoch": 0.10537215465346882, - "grad_norm": 0.603816568851471, - "learning_rate": 4.999814814814815e-05, - "loss": 0.5805, - "step": 3002 - }, - { - "epoch": 0.1054072553045859, - "grad_norm": 0.9095315337181091, - "learning_rate": 4.9996296296296296e-05, - "loss": 0.5894, - "step": 3003 - }, - { - "epoch": 0.10544235595570298, - "grad_norm": 0.6745477914810181, - "learning_rate": 4.9994444444444446e-05, - "loss": 0.5422, - "step": 3004 - }, - { - "epoch": 0.10547745660682005, - "grad_norm": 0.608215868473053, - "learning_rate": 4.9992592592592596e-05, - "loss": 0.5681, - "step": 3005 - }, - { - "epoch": 0.10551255725793714, - "grad_norm": 0.7895101308822632, - "learning_rate": 4.9990740740740746e-05, - "loss": 0.5333, - "step": 3006 - }, - { - "epoch": 0.10554765790905421, - "grad_norm": 0.8462368845939636, - "learning_rate": 4.998888888888889e-05, - "loss": 0.5906, - "step": 3007 - }, - { - "epoch": 0.1055827585601713, - "grad_norm": 0.5428255200386047, - "learning_rate": 4.998703703703704e-05, - "loss": 0.4409, - "step": 3008 - }, - { - "epoch": 0.10561785921128837, - "grad_norm": 0.6652342677116394, - "learning_rate": 4.998518518518518e-05, - "loss": 0.4923, - "step": 3009 - }, - { - "epoch": 0.10565295986240544, - "grad_norm": 0.6487823128700256, - "learning_rate": 4.998333333333334e-05, - "loss": 0.5548, - "step": 3010 - }, - { - "epoch": 0.10568806051352253, - "grad_norm": 0.6037496328353882, - "learning_rate": 4.998148148148148e-05, - "loss": 0.6009, - "step": 3011 - }, - { - "epoch": 0.1057231611646396, - "grad_norm": 0.5417963266372681, - "learning_rate": 4.997962962962963e-05, - "loss": 0.6204, - "step": 3012 - }, - { - "epoch": 0.10575826181575668, - "grad_norm": 0.6369830369949341, - "learning_rate": 4.997777777777778e-05, - "loss": 0.5785, - "step": 3013 - }, - { - "epoch": 0.10579336246687376, - "grad_norm": 0.7027493119239807, - "learning_rate": 4.997592592592593e-05, - "loss": 0.5861, - "step": 3014 - }, - { - "epoch": 0.10582846311799084, - "grad_norm": 0.7829379439353943, - "learning_rate": 4.997407407407408e-05, - "loss": 0.5523, - "step": 3015 - }, - { - "epoch": 0.10586356376910791, - "grad_norm": 0.6312464475631714, - "learning_rate": 4.997222222222223e-05, - "loss": 0.5288, - "step": 3016 - }, - { - "epoch": 0.105898664420225, - "grad_norm": 0.5445833206176758, - "learning_rate": 4.997037037037037e-05, - "loss": 0.5224, - "step": 3017 - }, - { - "epoch": 0.10593376507134207, - "grad_norm": 0.6575988531112671, - "learning_rate": 4.996851851851852e-05, - "loss": 0.6789, - "step": 3018 - }, - { - "epoch": 0.10596886572245916, - "grad_norm": 0.6032088398933411, - "learning_rate": 4.996666666666667e-05, - "loss": 0.5558, - "step": 3019 - }, - { - "epoch": 0.10600396637357623, - "grad_norm": 0.512489378452301, - "learning_rate": 4.996481481481482e-05, - "loss": 0.5031, - "step": 3020 - }, - { - "epoch": 0.1060390670246933, - "grad_norm": 0.6850407719612122, - "learning_rate": 4.9962962962962964e-05, - "loss": 0.5482, - "step": 3021 - }, - { - "epoch": 0.10607416767581039, - "grad_norm": 0.6066628098487854, - "learning_rate": 4.9961111111111114e-05, - "loss": 0.4902, - "step": 3022 - }, - { - "epoch": 0.10610926832692746, - "grad_norm": 0.6193882822990417, - "learning_rate": 4.9959259259259264e-05, - "loss": 0.5374, - "step": 3023 - }, - { - "epoch": 0.10614436897804454, - "grad_norm": 0.7051039338111877, - "learning_rate": 4.995740740740741e-05, - "loss": 0.5514, - "step": 3024 - }, - { - "epoch": 0.10617946962916162, - "grad_norm": 0.5327629446983337, - "learning_rate": 4.995555555555556e-05, - "loss": 0.4801, - "step": 3025 - }, - { - "epoch": 0.1062145702802787, - "grad_norm": 0.6739999055862427, - "learning_rate": 4.995370370370371e-05, - "loss": 0.5688, - "step": 3026 - }, - { - "epoch": 0.10624967093139577, - "grad_norm": 0.6417441964149475, - "learning_rate": 4.995185185185186e-05, - "loss": 0.586, - "step": 3027 - }, - { - "epoch": 0.10628477158251286, - "grad_norm": 0.7391411662101746, - "learning_rate": 4.995e-05, - "loss": 0.5239, - "step": 3028 - }, - { - "epoch": 0.10631987223362993, - "grad_norm": 0.6683998703956604, - "learning_rate": 4.994814814814815e-05, - "loss": 0.5194, - "step": 3029 - }, - { - "epoch": 0.10635497288474702, - "grad_norm": 0.6370558738708496, - "learning_rate": 4.9946296296296295e-05, - "loss": 0.4837, - "step": 3030 - }, - { - "epoch": 0.10639007353586409, - "grad_norm": 0.6855101585388184, - "learning_rate": 4.994444444444445e-05, - "loss": 0.5288, - "step": 3031 - }, - { - "epoch": 0.10642517418698116, - "grad_norm": 0.6214173436164856, - "learning_rate": 4.9942592592592595e-05, - "loss": 0.5294, - "step": 3032 - }, - { - "epoch": 0.10646027483809825, - "grad_norm": 0.7040107846260071, - "learning_rate": 4.9940740740740745e-05, - "loss": 0.4745, - "step": 3033 - }, - { - "epoch": 0.10649537548921532, - "grad_norm": 0.5997017025947571, - "learning_rate": 4.993888888888889e-05, - "loss": 0.5322, - "step": 3034 - }, - { - "epoch": 0.1065304761403324, - "grad_norm": 0.8231281042098999, - "learning_rate": 4.993703703703704e-05, - "loss": 0.5934, - "step": 3035 - }, - { - "epoch": 0.10656557679144948, - "grad_norm": 0.7713268399238586, - "learning_rate": 4.993518518518518e-05, - "loss": 0.4944, - "step": 3036 - }, - { - "epoch": 0.10660067744256656, - "grad_norm": 0.5714576840400696, - "learning_rate": 4.993333333333334e-05, - "loss": 0.5336, - "step": 3037 - }, - { - "epoch": 0.10663577809368364, - "grad_norm": 0.5519785284996033, - "learning_rate": 4.993148148148148e-05, - "loss": 0.5235, - "step": 3038 - }, - { - "epoch": 0.10667087874480072, - "grad_norm": 0.8070365786552429, - "learning_rate": 4.992962962962963e-05, - "loss": 0.4761, - "step": 3039 - }, - { - "epoch": 0.10670597939591779, - "grad_norm": 0.6372881531715393, - "learning_rate": 4.992777777777778e-05, - "loss": 0.5061, - "step": 3040 - }, - { - "epoch": 0.10674108004703488, - "grad_norm": 0.6388071179389954, - "learning_rate": 4.9925925925925926e-05, - "loss": 0.5074, - "step": 3041 - }, - { - "epoch": 0.10677618069815195, - "grad_norm": 0.727430522441864, - "learning_rate": 4.9924074074074076e-05, - "loss": 0.5399, - "step": 3042 - }, - { - "epoch": 0.10681128134926902, - "grad_norm": 0.6776041388511658, - "learning_rate": 4.9922222222222226e-05, - "loss": 0.6054, - "step": 3043 - }, - { - "epoch": 0.10684638200038611, - "grad_norm": 0.6620975136756897, - "learning_rate": 4.9920370370370376e-05, - "loss": 0.4712, - "step": 3044 - }, - { - "epoch": 0.10688148265150318, - "grad_norm": 0.6466237306594849, - "learning_rate": 4.991851851851852e-05, - "loss": 0.5182, - "step": 3045 - }, - { - "epoch": 0.10691658330262026, - "grad_norm": 0.5723544359207153, - "learning_rate": 4.991666666666667e-05, - "loss": 0.3761, - "step": 3046 - }, - { - "epoch": 0.10695168395373734, - "grad_norm": 0.7456693649291992, - "learning_rate": 4.991481481481482e-05, - "loss": 0.5967, - "step": 3047 - }, - { - "epoch": 0.10698678460485442, - "grad_norm": 0.6624867916107178, - "learning_rate": 4.991296296296297e-05, - "loss": 0.5044, - "step": 3048 - }, - { - "epoch": 0.1070218852559715, - "grad_norm": 0.7067692279815674, - "learning_rate": 4.991111111111111e-05, - "loss": 0.5713, - "step": 3049 - }, - { - "epoch": 0.10705698590708858, - "grad_norm": 0.6433124542236328, - "learning_rate": 4.990925925925926e-05, - "loss": 0.5107, - "step": 3050 - }, - { - "epoch": 0.10709208655820565, - "grad_norm": 0.6541153192520142, - "learning_rate": 4.9907407407407406e-05, - "loss": 0.5747, - "step": 3051 - }, - { - "epoch": 0.10712718720932274, - "grad_norm": 0.6012939214706421, - "learning_rate": 4.9905555555555556e-05, - "loss": 0.4144, - "step": 3052 - }, - { - "epoch": 0.10716228786043981, - "grad_norm": 0.8033766150474548, - "learning_rate": 4.9903703703703707e-05, - "loss": 0.5806, - "step": 3053 - }, - { - "epoch": 0.10719738851155688, - "grad_norm": 0.6887876391410828, - "learning_rate": 4.990185185185186e-05, - "loss": 0.5263, - "step": 3054 - }, - { - "epoch": 0.10723248916267397, - "grad_norm": 0.7334967851638794, - "learning_rate": 4.99e-05, - "loss": 0.5285, - "step": 3055 - }, - { - "epoch": 0.10726758981379104, - "grad_norm": 0.7198358178138733, - "learning_rate": 4.989814814814815e-05, - "loss": 0.6379, - "step": 3056 - }, - { - "epoch": 0.10730269046490812, - "grad_norm": 0.7758725881576538, - "learning_rate": 4.9896296296296293e-05, - "loss": 0.5452, - "step": 3057 - }, - { - "epoch": 0.1073377911160252, - "grad_norm": 0.5773873925209045, - "learning_rate": 4.989444444444445e-05, - "loss": 0.5576, - "step": 3058 - }, - { - "epoch": 0.10737289176714228, - "grad_norm": 0.7840684056282043, - "learning_rate": 4.9892592592592594e-05, - "loss": 0.6335, - "step": 3059 - }, - { - "epoch": 0.10740799241825937, - "grad_norm": 0.6644937992095947, - "learning_rate": 4.9890740740740744e-05, - "loss": 0.4344, - "step": 3060 - }, - { - "epoch": 0.10744309306937644, - "grad_norm": 0.6554849147796631, - "learning_rate": 4.9888888888888894e-05, - "loss": 0.596, - "step": 3061 - }, - { - "epoch": 0.10747819372049351, - "grad_norm": 0.5906491875648499, - "learning_rate": 4.988703703703704e-05, - "loss": 0.4318, - "step": 3062 - }, - { - "epoch": 0.1075132943716106, - "grad_norm": 0.7156143188476562, - "learning_rate": 4.988518518518519e-05, - "loss": 0.5037, - "step": 3063 - }, - { - "epoch": 0.10754839502272767, - "grad_norm": 0.6230971813201904, - "learning_rate": 4.988333333333334e-05, - "loss": 0.5041, - "step": 3064 - }, - { - "epoch": 0.10758349567384475, - "grad_norm": 0.6249761581420898, - "learning_rate": 4.988148148148149e-05, - "loss": 0.5372, - "step": 3065 - }, - { - "epoch": 0.10761859632496183, - "grad_norm": 0.6845327615737915, - "learning_rate": 4.987962962962963e-05, - "loss": 0.5614, - "step": 3066 - }, - { - "epoch": 0.1076536969760789, - "grad_norm": 0.8372601866722107, - "learning_rate": 4.987777777777778e-05, - "loss": 0.5963, - "step": 3067 - }, - { - "epoch": 0.10768879762719598, - "grad_norm": 0.5042539834976196, - "learning_rate": 4.9875925925925924e-05, - "loss": 0.5984, - "step": 3068 - }, - { - "epoch": 0.10772389827831307, - "grad_norm": 0.709041178226471, - "learning_rate": 4.987407407407408e-05, - "loss": 0.5011, - "step": 3069 - }, - { - "epoch": 0.10775899892943014, - "grad_norm": 0.7672573328018188, - "learning_rate": 4.9872222222222225e-05, - "loss": 0.7308, - "step": 3070 - }, - { - "epoch": 0.10779409958054723, - "grad_norm": 0.5623196959495544, - "learning_rate": 4.9870370370370375e-05, - "loss": 0.5442, - "step": 3071 - }, - { - "epoch": 0.1078292002316643, - "grad_norm": 0.6262000203132629, - "learning_rate": 4.986851851851852e-05, - "loss": 0.6304, - "step": 3072 - }, - { - "epoch": 0.10786430088278137, - "grad_norm": 0.5918208360671997, - "learning_rate": 4.986666666666667e-05, - "loss": 0.5421, - "step": 3073 - }, - { - "epoch": 0.10789940153389846, - "grad_norm": 0.6179586052894592, - "learning_rate": 4.986481481481482e-05, - "loss": 0.583, - "step": 3074 - }, - { - "epoch": 0.10793450218501553, - "grad_norm": 0.562579333782196, - "learning_rate": 4.986296296296297e-05, - "loss": 0.5694, - "step": 3075 - }, - { - "epoch": 0.1079696028361326, - "grad_norm": 0.5938811898231506, - "learning_rate": 4.986111111111111e-05, - "loss": 0.5166, - "step": 3076 - }, - { - "epoch": 0.10800470348724969, - "grad_norm": 0.5616680979728699, - "learning_rate": 4.985925925925926e-05, - "loss": 0.4916, - "step": 3077 - }, - { - "epoch": 0.10803980413836677, - "grad_norm": 0.5889248847961426, - "learning_rate": 4.9857407407407405e-05, - "loss": 0.5761, - "step": 3078 - }, - { - "epoch": 0.10807490478948384, - "grad_norm": 0.6474159955978394, - "learning_rate": 4.9855555555555555e-05, - "loss": 0.5477, - "step": 3079 - }, - { - "epoch": 0.10811000544060093, - "grad_norm": 0.6113816499710083, - "learning_rate": 4.9853703703703705e-05, - "loss": 0.5375, - "step": 3080 - }, - { - "epoch": 0.108145106091718, - "grad_norm": 0.67293781042099, - "learning_rate": 4.9851851851851855e-05, - "loss": 0.4252, - "step": 3081 - }, - { - "epoch": 0.10818020674283509, - "grad_norm": 0.6976415514945984, - "learning_rate": 4.9850000000000006e-05, - "loss": 0.5817, - "step": 3082 - }, - { - "epoch": 0.10821530739395216, - "grad_norm": 0.7594155669212341, - "learning_rate": 4.984814814814815e-05, - "loss": 0.4997, - "step": 3083 - }, - { - "epoch": 0.10825040804506923, - "grad_norm": 0.5919487476348877, - "learning_rate": 4.98462962962963e-05, - "loss": 0.5233, - "step": 3084 - }, - { - "epoch": 0.10828550869618632, - "grad_norm": 0.623344898223877, - "learning_rate": 4.984444444444445e-05, - "loss": 0.5947, - "step": 3085 - }, - { - "epoch": 0.10832060934730339, - "grad_norm": 0.6456923484802246, - "learning_rate": 4.98425925925926e-05, - "loss": 0.4631, - "step": 3086 - }, - { - "epoch": 0.10835570999842047, - "grad_norm": 0.6497933864593506, - "learning_rate": 4.984074074074074e-05, - "loss": 0.4839, - "step": 3087 - }, - { - "epoch": 0.10839081064953755, - "grad_norm": 0.5570430755615234, - "learning_rate": 4.983888888888889e-05, - "loss": 0.5385, - "step": 3088 - }, - { - "epoch": 0.10842591130065463, - "grad_norm": 0.6699201464653015, - "learning_rate": 4.9837037037037036e-05, - "loss": 0.5743, - "step": 3089 - }, - { - "epoch": 0.1084610119517717, - "grad_norm": 0.726135790348053, - "learning_rate": 4.983518518518519e-05, - "loss": 0.6346, - "step": 3090 - }, - { - "epoch": 0.10849611260288879, - "grad_norm": 0.626787543296814, - "learning_rate": 4.9833333333333336e-05, - "loss": 0.5288, - "step": 3091 - }, - { - "epoch": 0.10853121325400586, - "grad_norm": 0.6667259931564331, - "learning_rate": 4.9831481481481486e-05, - "loss": 0.5949, - "step": 3092 - }, - { - "epoch": 0.10856631390512295, - "grad_norm": 0.6025003790855408, - "learning_rate": 4.982962962962963e-05, - "loss": 0.4592, - "step": 3093 - }, - { - "epoch": 0.10860141455624002, - "grad_norm": 0.5527742505073547, - "learning_rate": 4.982777777777778e-05, - "loss": 0.531, - "step": 3094 - }, - { - "epoch": 0.10863651520735709, - "grad_norm": 0.625763475894928, - "learning_rate": 4.982592592592592e-05, - "loss": 0.4462, - "step": 3095 - }, - { - "epoch": 0.10867161585847418, - "grad_norm": 0.6368997693061829, - "learning_rate": 4.982407407407408e-05, - "loss": 0.6156, - "step": 3096 - }, - { - "epoch": 0.10870671650959125, - "grad_norm": 0.6927420496940613, - "learning_rate": 4.982222222222222e-05, - "loss": 0.5085, - "step": 3097 - }, - { - "epoch": 0.10874181716070833, - "grad_norm": 0.6651814579963684, - "learning_rate": 4.9820370370370373e-05, - "loss": 0.5574, - "step": 3098 - }, - { - "epoch": 0.10877691781182541, - "grad_norm": 0.6036892533302307, - "learning_rate": 4.981851851851852e-05, - "loss": 0.5091, - "step": 3099 - }, - { - "epoch": 0.10881201846294249, - "grad_norm": 0.6069902181625366, - "learning_rate": 4.981666666666667e-05, - "loss": 0.4732, - "step": 3100 - }, - { - "epoch": 0.10884711911405956, - "grad_norm": 0.746080756187439, - "learning_rate": 4.981481481481482e-05, - "loss": 0.6123, - "step": 3101 - }, - { - "epoch": 0.10888221976517665, - "grad_norm": 0.6058173179626465, - "learning_rate": 4.981296296296297e-05, - "loss": 0.4227, - "step": 3102 - }, - { - "epoch": 0.10891732041629372, - "grad_norm": 0.8945167064666748, - "learning_rate": 4.981111111111112e-05, - "loss": 0.3908, - "step": 3103 - }, - { - "epoch": 0.1089524210674108, - "grad_norm": 0.7544126510620117, - "learning_rate": 4.980925925925926e-05, - "loss": 0.4941, - "step": 3104 - }, - { - "epoch": 0.10898752171852788, - "grad_norm": 0.6898295283317566, - "learning_rate": 4.980740740740741e-05, - "loss": 0.5772, - "step": 3105 - }, - { - "epoch": 0.10902262236964495, - "grad_norm": 0.6202386617660522, - "learning_rate": 4.9805555555555554e-05, - "loss": 0.6116, - "step": 3106 - }, - { - "epoch": 0.10905772302076204, - "grad_norm": 0.6592008471488953, - "learning_rate": 4.980370370370371e-05, - "loss": 0.6534, - "step": 3107 - }, - { - "epoch": 0.10909282367187911, - "grad_norm": 0.5409555435180664, - "learning_rate": 4.9801851851851854e-05, - "loss": 0.5405, - "step": 3108 - }, - { - "epoch": 0.10912792432299619, - "grad_norm": 0.9660138487815857, - "learning_rate": 4.9800000000000004e-05, - "loss": 0.6933, - "step": 3109 - }, - { - "epoch": 0.10916302497411327, - "grad_norm": 0.7172825336456299, - "learning_rate": 4.979814814814815e-05, - "loss": 0.4555, - "step": 3110 - }, - { - "epoch": 0.10919812562523035, - "grad_norm": 0.6054369211196899, - "learning_rate": 4.97962962962963e-05, - "loss": 0.5496, - "step": 3111 - }, - { - "epoch": 0.10923322627634742, - "grad_norm": 0.7011042237281799, - "learning_rate": 4.979444444444445e-05, - "loss": 0.6773, - "step": 3112 - }, - { - "epoch": 0.1092683269274645, - "grad_norm": 0.7385786771774292, - "learning_rate": 4.97925925925926e-05, - "loss": 0.5299, - "step": 3113 - }, - { - "epoch": 0.10930342757858158, - "grad_norm": 0.6241104006767273, - "learning_rate": 4.979074074074074e-05, - "loss": 0.6189, - "step": 3114 - }, - { - "epoch": 0.10933852822969867, - "grad_norm": 0.5386983752250671, - "learning_rate": 4.978888888888889e-05, - "loss": 0.4899, - "step": 3115 - }, - { - "epoch": 0.10937362888081574, - "grad_norm": 0.5874850749969482, - "learning_rate": 4.9787037037037035e-05, - "loss": 0.5167, - "step": 3116 - }, - { - "epoch": 0.10940872953193281, - "grad_norm": 0.6154995560646057, - "learning_rate": 4.978518518518519e-05, - "loss": 0.6067, - "step": 3117 - }, - { - "epoch": 0.1094438301830499, - "grad_norm": 0.5852852463722229, - "learning_rate": 4.9783333333333335e-05, - "loss": 0.5718, - "step": 3118 - }, - { - "epoch": 0.10947893083416697, - "grad_norm": 0.7220240831375122, - "learning_rate": 4.9781481481481485e-05, - "loss": 0.546, - "step": 3119 - }, - { - "epoch": 0.10951403148528405, - "grad_norm": 0.6582068800926208, - "learning_rate": 4.977962962962963e-05, - "loss": 0.573, - "step": 3120 - }, - { - "epoch": 0.10954913213640113, - "grad_norm": 0.5980414748191833, - "learning_rate": 4.977777777777778e-05, - "loss": 0.4781, - "step": 3121 - }, - { - "epoch": 0.10958423278751821, - "grad_norm": 0.7111872434616089, - "learning_rate": 4.977592592592593e-05, - "loss": 0.5342, - "step": 3122 - }, - { - "epoch": 0.10961933343863528, - "grad_norm": 0.7463699579238892, - "learning_rate": 4.977407407407408e-05, - "loss": 0.5648, - "step": 3123 - }, - { - "epoch": 0.10965443408975237, - "grad_norm": 0.7045801281929016, - "learning_rate": 4.977222222222223e-05, - "loss": 0.5539, - "step": 3124 - }, - { - "epoch": 0.10968953474086944, - "grad_norm": 0.7246782183647156, - "learning_rate": 4.977037037037037e-05, - "loss": 0.5325, - "step": 3125 - }, - { - "epoch": 0.10972463539198653, - "grad_norm": 0.598635733127594, - "learning_rate": 4.976851851851852e-05, - "loss": 0.5676, - "step": 3126 - }, - { - "epoch": 0.1097597360431036, - "grad_norm": 0.6831518411636353, - "learning_rate": 4.9766666666666666e-05, - "loss": 0.4678, - "step": 3127 - }, - { - "epoch": 0.10979483669422067, - "grad_norm": 0.6582784056663513, - "learning_rate": 4.976481481481482e-05, - "loss": 0.6123, - "step": 3128 - }, - { - "epoch": 0.10982993734533776, - "grad_norm": 0.6393179297447205, - "learning_rate": 4.9762962962962966e-05, - "loss": 0.6379, - "step": 3129 - }, - { - "epoch": 0.10986503799645483, - "grad_norm": 0.6067506670951843, - "learning_rate": 4.9761111111111116e-05, - "loss": 0.4993, - "step": 3130 - }, - { - "epoch": 0.10990013864757191, - "grad_norm": 0.5738722681999207, - "learning_rate": 4.975925925925926e-05, - "loss": 0.4613, - "step": 3131 - }, - { - "epoch": 0.109935239298689, - "grad_norm": 0.5307616591453552, - "learning_rate": 4.975740740740741e-05, - "loss": 0.5702, - "step": 3132 - }, - { - "epoch": 0.10997033994980607, - "grad_norm": 0.6324464678764343, - "learning_rate": 4.975555555555555e-05, - "loss": 0.4603, - "step": 3133 - }, - { - "epoch": 0.11000544060092314, - "grad_norm": 0.7165551781654358, - "learning_rate": 4.975370370370371e-05, - "loss": 0.5182, - "step": 3134 - }, - { - "epoch": 0.11004054125204023, - "grad_norm": 0.5732969641685486, - "learning_rate": 4.975185185185185e-05, - "loss": 0.5837, - "step": 3135 - }, - { - "epoch": 0.1100756419031573, - "grad_norm": 0.6228199601173401, - "learning_rate": 4.975e-05, - "loss": 0.4785, - "step": 3136 - }, - { - "epoch": 0.11011074255427439, - "grad_norm": 0.6281245350837708, - "learning_rate": 4.9748148148148146e-05, - "loss": 0.5302, - "step": 3137 - }, - { - "epoch": 0.11014584320539146, - "grad_norm": 0.6962692737579346, - "learning_rate": 4.9746296296296297e-05, - "loss": 0.5905, - "step": 3138 - }, - { - "epoch": 0.11018094385650853, - "grad_norm": 0.5870905518531799, - "learning_rate": 4.974444444444445e-05, - "loss": 0.501, - "step": 3139 - }, - { - "epoch": 0.11021604450762562, - "grad_norm": 0.5430238246917725, - "learning_rate": 4.97425925925926e-05, - "loss": 0.5464, - "step": 3140 - }, - { - "epoch": 0.1102511451587427, - "grad_norm": 0.6421905159950256, - "learning_rate": 4.974074074074074e-05, - "loss": 0.5186, - "step": 3141 - }, - { - "epoch": 0.11028624580985977, - "grad_norm": 0.7252295613288879, - "learning_rate": 4.973888888888889e-05, - "loss": 0.6483, - "step": 3142 - }, - { - "epoch": 0.11032134646097685, - "grad_norm": 0.6682125926017761, - "learning_rate": 4.973703703703704e-05, - "loss": 0.4835, - "step": 3143 - }, - { - "epoch": 0.11035644711209393, - "grad_norm": 0.6421388387680054, - "learning_rate": 4.973518518518519e-05, - "loss": 0.5195, - "step": 3144 - }, - { - "epoch": 0.110391547763211, - "grad_norm": 0.6797918081283569, - "learning_rate": 4.973333333333334e-05, - "loss": 0.4562, - "step": 3145 - }, - { - "epoch": 0.11042664841432809, - "grad_norm": 0.6733700633049011, - "learning_rate": 4.9731481481481484e-05, - "loss": 0.6455, - "step": 3146 - }, - { - "epoch": 0.11046174906544516, - "grad_norm": 0.6191102266311646, - "learning_rate": 4.9729629629629634e-05, - "loss": 0.4346, - "step": 3147 - }, - { - "epoch": 0.11049684971656225, - "grad_norm": 0.6790646910667419, - "learning_rate": 4.972777777777778e-05, - "loss": 0.4349, - "step": 3148 - }, - { - "epoch": 0.11053195036767932, - "grad_norm": 0.7792980074882507, - "learning_rate": 4.972592592592593e-05, - "loss": 0.543, - "step": 3149 - }, - { - "epoch": 0.1105670510187964, - "grad_norm": 0.571169912815094, - "learning_rate": 4.972407407407408e-05, - "loss": 0.4621, - "step": 3150 - }, - { - "epoch": 0.11060215166991348, - "grad_norm": 0.6672490239143372, - "learning_rate": 4.972222222222223e-05, - "loss": 0.5328, - "step": 3151 - }, - { - "epoch": 0.11063725232103055, - "grad_norm": 0.594989538192749, - "learning_rate": 4.972037037037037e-05, - "loss": 0.5642, - "step": 3152 - }, - { - "epoch": 0.11067235297214763, - "grad_norm": 0.5816904902458191, - "learning_rate": 4.971851851851852e-05, - "loss": 0.5742, - "step": 3153 - }, - { - "epoch": 0.11070745362326471, - "grad_norm": 0.6483237147331238, - "learning_rate": 4.9716666666666664e-05, - "loss": 0.4595, - "step": 3154 - }, - { - "epoch": 0.11074255427438179, - "grad_norm": 0.7015228271484375, - "learning_rate": 4.971481481481482e-05, - "loss": 0.6174, - "step": 3155 - }, - { - "epoch": 0.11077765492549886, - "grad_norm": 0.5922762751579285, - "learning_rate": 4.9712962962962965e-05, - "loss": 0.5568, - "step": 3156 - }, - { - "epoch": 0.11081275557661595, - "grad_norm": 0.5735580325126648, - "learning_rate": 4.9711111111111115e-05, - "loss": 0.4191, - "step": 3157 - }, - { - "epoch": 0.11084785622773302, - "grad_norm": 0.6604952812194824, - "learning_rate": 4.970925925925926e-05, - "loss": 0.4567, - "step": 3158 - }, - { - "epoch": 0.11088295687885011, - "grad_norm": 0.7019994854927063, - "learning_rate": 4.970740740740741e-05, - "loss": 0.5155, - "step": 3159 - }, - { - "epoch": 0.11091805752996718, - "grad_norm": 0.7013636827468872, - "learning_rate": 4.970555555555556e-05, - "loss": 0.5272, - "step": 3160 - }, - { - "epoch": 0.11095315818108425, - "grad_norm": 0.7259323596954346, - "learning_rate": 4.970370370370371e-05, - "loss": 0.6217, - "step": 3161 - }, - { - "epoch": 0.11098825883220134, - "grad_norm": 0.5762435793876648, - "learning_rate": 4.970185185185185e-05, - "loss": 0.4993, - "step": 3162 - }, - { - "epoch": 0.11102335948331842, - "grad_norm": 0.6803324818611145, - "learning_rate": 4.97e-05, - "loss": 0.6373, - "step": 3163 - }, - { - "epoch": 0.11105846013443549, - "grad_norm": 0.6699590682983398, - "learning_rate": 4.969814814814815e-05, - "loss": 0.5998, - "step": 3164 - }, - { - "epoch": 0.11109356078555258, - "grad_norm": 0.5822440981864929, - "learning_rate": 4.9696296296296295e-05, - "loss": 0.4467, - "step": 3165 - }, - { - "epoch": 0.11112866143666965, - "grad_norm": 0.6254309415817261, - "learning_rate": 4.969444444444445e-05, - "loss": 0.4997, - "step": 3166 - }, - { - "epoch": 0.11116376208778672, - "grad_norm": 0.6829240322113037, - "learning_rate": 4.9692592592592596e-05, - "loss": 0.5127, - "step": 3167 - }, - { - "epoch": 0.11119886273890381, - "grad_norm": 0.705875039100647, - "learning_rate": 4.9690740740740746e-05, - "loss": 0.4147, - "step": 3168 - }, - { - "epoch": 0.11123396339002088, - "grad_norm": 0.7180333733558655, - "learning_rate": 4.968888888888889e-05, - "loss": 0.5978, - "step": 3169 - }, - { - "epoch": 0.11126906404113797, - "grad_norm": 0.6133614778518677, - "learning_rate": 4.968703703703704e-05, - "loss": 0.4521, - "step": 3170 - }, - { - "epoch": 0.11130416469225504, - "grad_norm": 0.5918648838996887, - "learning_rate": 4.968518518518519e-05, - "loss": 0.523, - "step": 3171 - }, - { - "epoch": 0.11133926534337212, - "grad_norm": 0.6551830172538757, - "learning_rate": 4.968333333333334e-05, - "loss": 0.4203, - "step": 3172 - }, - { - "epoch": 0.1113743659944892, - "grad_norm": 0.6802245378494263, - "learning_rate": 4.968148148148148e-05, - "loss": 0.5873, - "step": 3173 - }, - { - "epoch": 0.11140946664560628, - "grad_norm": 0.7048672437667847, - "learning_rate": 4.967962962962963e-05, - "loss": 0.5425, - "step": 3174 - }, - { - "epoch": 0.11144456729672335, - "grad_norm": 0.6276256442070007, - "learning_rate": 4.9677777777777776e-05, - "loss": 0.6608, - "step": 3175 - }, - { - "epoch": 0.11147966794784044, - "grad_norm": 0.646041214466095, - "learning_rate": 4.9675925925925926e-05, - "loss": 0.4523, - "step": 3176 - }, - { - "epoch": 0.11151476859895751, - "grad_norm": 0.8295732736587524, - "learning_rate": 4.9674074074074076e-05, - "loss": 0.5404, - "step": 3177 - }, - { - "epoch": 0.11154986925007458, - "grad_norm": 0.720611035823822, - "learning_rate": 4.9672222222222226e-05, - "loss": 0.6052, - "step": 3178 - }, - { - "epoch": 0.11158496990119167, - "grad_norm": 0.6849273443222046, - "learning_rate": 4.967037037037037e-05, - "loss": 0.6257, - "step": 3179 - }, - { - "epoch": 0.11162007055230874, - "grad_norm": 0.7209822535514832, - "learning_rate": 4.966851851851852e-05, - "loss": 0.6071, - "step": 3180 - }, - { - "epoch": 0.11165517120342583, - "grad_norm": 0.6274434328079224, - "learning_rate": 4.966666666666667e-05, - "loss": 0.5207, - "step": 3181 - }, - { - "epoch": 0.1116902718545429, - "grad_norm": 1.1096237897872925, - "learning_rate": 4.966481481481482e-05, - "loss": 0.6011, - "step": 3182 - }, - { - "epoch": 0.11172537250565998, - "grad_norm": 0.595501720905304, - "learning_rate": 4.9662962962962963e-05, - "loss": 0.6079, - "step": 3183 - }, - { - "epoch": 0.11176047315677706, - "grad_norm": 0.6058116555213928, - "learning_rate": 4.9661111111111114e-05, - "loss": 0.5562, - "step": 3184 - }, - { - "epoch": 0.11179557380789414, - "grad_norm": 0.6850024461746216, - "learning_rate": 4.9659259259259264e-05, - "loss": 0.5226, - "step": 3185 - }, - { - "epoch": 0.11183067445901121, - "grad_norm": 0.9823969602584839, - "learning_rate": 4.965740740740741e-05, - "loss": 0.5262, - "step": 3186 - }, - { - "epoch": 0.1118657751101283, - "grad_norm": 0.73008131980896, - "learning_rate": 4.965555555555556e-05, - "loss": 0.5661, - "step": 3187 - }, - { - "epoch": 0.11190087576124537, - "grad_norm": 0.7372958064079285, - "learning_rate": 4.965370370370371e-05, - "loss": 0.3493, - "step": 3188 - }, - { - "epoch": 0.11193597641236246, - "grad_norm": 0.5917587876319885, - "learning_rate": 4.965185185185186e-05, - "loss": 0.5335, - "step": 3189 - }, - { - "epoch": 0.11197107706347953, - "grad_norm": 0.7486703395843506, - "learning_rate": 4.965e-05, - "loss": 0.5173, - "step": 3190 - }, - { - "epoch": 0.1120061777145966, - "grad_norm": 0.9657439589500427, - "learning_rate": 4.964814814814815e-05, - "loss": 0.642, - "step": 3191 - }, - { - "epoch": 0.11204127836571369, - "grad_norm": 1.405655026435852, - "learning_rate": 4.9646296296296294e-05, - "loss": 0.6172, - "step": 3192 - }, - { - "epoch": 0.11207637901683076, - "grad_norm": 0.6593519449234009, - "learning_rate": 4.964444444444445e-05, - "loss": 0.5417, - "step": 3193 - }, - { - "epoch": 0.11211147966794784, - "grad_norm": 0.7179496884346008, - "learning_rate": 4.9642592592592594e-05, - "loss": 0.5381, - "step": 3194 - }, - { - "epoch": 0.11214658031906492, - "grad_norm": 0.668103814125061, - "learning_rate": 4.9640740740740744e-05, - "loss": 0.5631, - "step": 3195 - }, - { - "epoch": 0.112181680970182, - "grad_norm": 0.6913663744926453, - "learning_rate": 4.963888888888889e-05, - "loss": 0.5247, - "step": 3196 - }, - { - "epoch": 0.11221678162129907, - "grad_norm": 1.1723670959472656, - "learning_rate": 4.963703703703704e-05, - "loss": 0.5711, - "step": 3197 - }, - { - "epoch": 0.11225188227241616, - "grad_norm": 0.5021218657493591, - "learning_rate": 4.963518518518519e-05, - "loss": 0.4212, - "step": 3198 - }, - { - "epoch": 0.11228698292353323, - "grad_norm": 0.5818294882774353, - "learning_rate": 4.963333333333334e-05, - "loss": 0.5642, - "step": 3199 - }, - { - "epoch": 0.11232208357465032, - "grad_norm": 0.6688151955604553, - "learning_rate": 4.963148148148148e-05, - "loss": 0.6324, - "step": 3200 - }, - { - "epoch": 0.11235718422576739, - "grad_norm": 0.6832871437072754, - "learning_rate": 4.962962962962963e-05, - "loss": 0.4779, - "step": 3201 - }, - { - "epoch": 0.11239228487688446, - "grad_norm": 1.4192994832992554, - "learning_rate": 4.962777777777778e-05, - "loss": 0.6324, - "step": 3202 - }, - { - "epoch": 0.11242738552800155, - "grad_norm": 0.945139467716217, - "learning_rate": 4.9625925925925925e-05, - "loss": 0.5609, - "step": 3203 - }, - { - "epoch": 0.11246248617911862, - "grad_norm": 0.7766860127449036, - "learning_rate": 4.9624074074074075e-05, - "loss": 0.6436, - "step": 3204 - }, - { - "epoch": 0.1124975868302357, - "grad_norm": 0.7832661867141724, - "learning_rate": 4.9622222222222225e-05, - "loss": 0.5023, - "step": 3205 - }, - { - "epoch": 0.11253268748135278, - "grad_norm": 0.5861284732818604, - "learning_rate": 4.9620370370370375e-05, - "loss": 0.5416, - "step": 3206 - }, - { - "epoch": 0.11256778813246986, - "grad_norm": 1.4301286935806274, - "learning_rate": 4.961851851851852e-05, - "loss": 0.6946, - "step": 3207 - }, - { - "epoch": 0.11260288878358693, - "grad_norm": 0.50128173828125, - "learning_rate": 4.961666666666667e-05, - "loss": 0.4521, - "step": 3208 - }, - { - "epoch": 0.11263798943470402, - "grad_norm": 0.661767840385437, - "learning_rate": 4.961481481481482e-05, - "loss": 0.6349, - "step": 3209 - }, - { - "epoch": 0.11267309008582109, - "grad_norm": 0.5829587578773499, - "learning_rate": 4.961296296296297e-05, - "loss": 0.5208, - "step": 3210 - }, - { - "epoch": 0.11270819073693818, - "grad_norm": 0.6282117962837219, - "learning_rate": 4.961111111111111e-05, - "loss": 0.5543, - "step": 3211 - }, - { - "epoch": 0.11274329138805525, - "grad_norm": 1.1405786275863647, - "learning_rate": 4.960925925925926e-05, - "loss": 0.5689, - "step": 3212 - }, - { - "epoch": 0.11277839203917232, - "grad_norm": 0.7508547902107239, - "learning_rate": 4.9607407407407406e-05, - "loss": 0.4537, - "step": 3213 - }, - { - "epoch": 0.11281349269028941, - "grad_norm": 0.6120636463165283, - "learning_rate": 4.960555555555556e-05, - "loss": 0.5984, - "step": 3214 - }, - { - "epoch": 0.11284859334140648, - "grad_norm": 0.6313924193382263, - "learning_rate": 4.9603703703703706e-05, - "loss": 0.7119, - "step": 3215 - }, - { - "epoch": 0.11288369399252356, - "grad_norm": 0.5684128403663635, - "learning_rate": 4.9601851851851856e-05, - "loss": 0.5564, - "step": 3216 - }, - { - "epoch": 0.11291879464364064, - "grad_norm": 0.6705465912818909, - "learning_rate": 4.96e-05, - "loss": 0.5753, - "step": 3217 - }, - { - "epoch": 0.11295389529475772, - "grad_norm": 0.5789027214050293, - "learning_rate": 4.959814814814815e-05, - "loss": 0.5692, - "step": 3218 - }, - { - "epoch": 0.11298899594587479, - "grad_norm": 0.6719627976417542, - "learning_rate": 4.959629629629629e-05, - "loss": 0.5524, - "step": 3219 - }, - { - "epoch": 0.11302409659699188, - "grad_norm": 0.7399791479110718, - "learning_rate": 4.959444444444445e-05, - "loss": 0.6085, - "step": 3220 - }, - { - "epoch": 0.11305919724810895, - "grad_norm": 0.5984674096107483, - "learning_rate": 4.959259259259259e-05, - "loss": 0.5441, - "step": 3221 - }, - { - "epoch": 0.11309429789922604, - "grad_norm": 0.7750998735427856, - "learning_rate": 4.959074074074074e-05, - "loss": 0.6245, - "step": 3222 - }, - { - "epoch": 0.11312939855034311, - "grad_norm": 0.6555527448654175, - "learning_rate": 4.958888888888889e-05, - "loss": 0.5928, - "step": 3223 - }, - { - "epoch": 0.11316449920146018, - "grad_norm": 0.6236708760261536, - "learning_rate": 4.958703703703704e-05, - "loss": 0.6913, - "step": 3224 - }, - { - "epoch": 0.11319959985257727, - "grad_norm": 0.6283673048019409, - "learning_rate": 4.958518518518519e-05, - "loss": 0.5259, - "step": 3225 - }, - { - "epoch": 0.11323470050369434, - "grad_norm": 0.6096956133842468, - "learning_rate": 4.958333333333334e-05, - "loss": 0.5302, - "step": 3226 - }, - { - "epoch": 0.11326980115481142, - "grad_norm": 0.6024258732795715, - "learning_rate": 4.958148148148149e-05, - "loss": 0.5341, - "step": 3227 - }, - { - "epoch": 0.1133049018059285, - "grad_norm": 0.602425217628479, - "learning_rate": 4.957962962962963e-05, - "loss": 0.4565, - "step": 3228 - }, - { - "epoch": 0.11334000245704558, - "grad_norm": 0.6349795460700989, - "learning_rate": 4.957777777777778e-05, - "loss": 0.4815, - "step": 3229 - }, - { - "epoch": 0.11337510310816265, - "grad_norm": 0.5897064208984375, - "learning_rate": 4.9575925925925924e-05, - "loss": 0.5436, - "step": 3230 - }, - { - "epoch": 0.11341020375927974, - "grad_norm": 1.0753543376922607, - "learning_rate": 4.957407407407408e-05, - "loss": 0.5632, - "step": 3231 - }, - { - "epoch": 0.11344530441039681, - "grad_norm": 0.8606966733932495, - "learning_rate": 4.9572222222222224e-05, - "loss": 0.5715, - "step": 3232 - }, - { - "epoch": 0.1134804050615139, - "grad_norm": 0.6467044353485107, - "learning_rate": 4.9570370370370374e-05, - "loss": 0.5032, - "step": 3233 - }, - { - "epoch": 0.11351550571263097, - "grad_norm": 0.6297491192817688, - "learning_rate": 4.956851851851852e-05, - "loss": 0.5232, - "step": 3234 - }, - { - "epoch": 0.11355060636374804, - "grad_norm": 1.1157346963882446, - "learning_rate": 4.956666666666667e-05, - "loss": 0.6126, - "step": 3235 - }, - { - "epoch": 0.11358570701486513, - "grad_norm": 1.5592472553253174, - "learning_rate": 4.956481481481482e-05, - "loss": 0.5563, - "step": 3236 - }, - { - "epoch": 0.1136208076659822, - "grad_norm": 0.6427077651023865, - "learning_rate": 4.956296296296297e-05, - "loss": 0.5308, - "step": 3237 - }, - { - "epoch": 0.11365590831709928, - "grad_norm": 0.5860710144042969, - "learning_rate": 4.956111111111111e-05, - "loss": 0.5315, - "step": 3238 - }, - { - "epoch": 0.11369100896821636, - "grad_norm": 0.5829582214355469, - "learning_rate": 4.955925925925926e-05, - "loss": 0.5505, - "step": 3239 - }, - { - "epoch": 0.11372610961933344, - "grad_norm": 0.5521115660667419, - "learning_rate": 4.9557407407407405e-05, - "loss": 0.5884, - "step": 3240 - }, - { - "epoch": 0.11376121027045051, - "grad_norm": 0.8749446868896484, - "learning_rate": 4.955555555555556e-05, - "loss": 0.5707, - "step": 3241 - }, - { - "epoch": 0.1137963109215676, - "grad_norm": 0.7713532447814941, - "learning_rate": 4.9553703703703705e-05, - "loss": 0.6039, - "step": 3242 - }, - { - "epoch": 0.11383141157268467, - "grad_norm": 0.6089738011360168, - "learning_rate": 4.9551851851851855e-05, - "loss": 0.5232, - "step": 3243 - }, - { - "epoch": 0.11386651222380176, - "grad_norm": 0.5862056612968445, - "learning_rate": 4.9550000000000005e-05, - "loss": 0.5785, - "step": 3244 - }, - { - "epoch": 0.11390161287491883, - "grad_norm": 0.5886847376823425, - "learning_rate": 4.954814814814815e-05, - "loss": 0.6426, - "step": 3245 - }, - { - "epoch": 0.1139367135260359, - "grad_norm": 0.6288779377937317, - "learning_rate": 4.95462962962963e-05, - "loss": 0.6422, - "step": 3246 - }, - { - "epoch": 0.11397181417715299, - "grad_norm": 0.6182766556739807, - "learning_rate": 4.954444444444445e-05, - "loss": 0.5371, - "step": 3247 - }, - { - "epoch": 0.11400691482827006, - "grad_norm": 0.5750756859779358, - "learning_rate": 4.95425925925926e-05, - "loss": 0.6368, - "step": 3248 - }, - { - "epoch": 0.11404201547938714, - "grad_norm": 0.8770506381988525, - "learning_rate": 4.954074074074074e-05, - "loss": 0.5652, - "step": 3249 - }, - { - "epoch": 0.11407711613050422, - "grad_norm": 0.7753925919532776, - "learning_rate": 4.953888888888889e-05, - "loss": 0.3938, - "step": 3250 - }, - { - "epoch": 0.1141122167816213, - "grad_norm": 0.6141456961631775, - "learning_rate": 4.9537037037037035e-05, - "loss": 0.5584, - "step": 3251 - }, - { - "epoch": 0.11414731743273837, - "grad_norm": 0.585298478603363, - "learning_rate": 4.953518518518519e-05, - "loss": 0.5756, - "step": 3252 - }, - { - "epoch": 0.11418241808385546, - "grad_norm": 0.6876172423362732, - "learning_rate": 4.9533333333333336e-05, - "loss": 0.5272, - "step": 3253 - }, - { - "epoch": 0.11421751873497253, - "grad_norm": 0.5862181782722473, - "learning_rate": 4.9531481481481486e-05, - "loss": 0.5921, - "step": 3254 - }, - { - "epoch": 0.11425261938608962, - "grad_norm": 0.6114069223403931, - "learning_rate": 4.952962962962963e-05, - "loss": 0.6018, - "step": 3255 - }, - { - "epoch": 0.11428772003720669, - "grad_norm": 0.5962209701538086, - "learning_rate": 4.952777777777778e-05, - "loss": 0.5348, - "step": 3256 - }, - { - "epoch": 0.11432282068832376, - "grad_norm": 0.5636523962020874, - "learning_rate": 4.952592592592592e-05, - "loss": 0.5141, - "step": 3257 - }, - { - "epoch": 0.11435792133944085, - "grad_norm": 0.6569848656654358, - "learning_rate": 4.952407407407408e-05, - "loss": 0.5213, - "step": 3258 - }, - { - "epoch": 0.11439302199055792, - "grad_norm": 0.6843352913856506, - "learning_rate": 4.952222222222222e-05, - "loss": 0.5164, - "step": 3259 - }, - { - "epoch": 0.114428122641675, - "grad_norm": 0.6291747689247131, - "learning_rate": 4.952037037037037e-05, - "loss": 0.5887, - "step": 3260 - }, - { - "epoch": 0.11446322329279209, - "grad_norm": 0.6117442846298218, - "learning_rate": 4.951851851851852e-05, - "loss": 0.5468, - "step": 3261 - }, - { - "epoch": 0.11449832394390916, - "grad_norm": 0.5406420230865479, - "learning_rate": 4.9516666666666666e-05, - "loss": 0.5074, - "step": 3262 - }, - { - "epoch": 0.11453342459502623, - "grad_norm": 0.6589360237121582, - "learning_rate": 4.9514814814814816e-05, - "loss": 0.5662, - "step": 3263 - }, - { - "epoch": 0.11456852524614332, - "grad_norm": 0.8711466789245605, - "learning_rate": 4.9512962962962967e-05, - "loss": 0.5026, - "step": 3264 - }, - { - "epoch": 0.11460362589726039, - "grad_norm": 0.6204253435134888, - "learning_rate": 4.951111111111112e-05, - "loss": 0.5015, - "step": 3265 - }, - { - "epoch": 0.11463872654837748, - "grad_norm": 0.6064289212226868, - "learning_rate": 4.950925925925926e-05, - "loss": 0.5617, - "step": 3266 - }, - { - "epoch": 0.11467382719949455, - "grad_norm": 0.554735004901886, - "learning_rate": 4.950740740740741e-05, - "loss": 0.4616, - "step": 3267 - }, - { - "epoch": 0.11470892785061162, - "grad_norm": 0.5533216595649719, - "learning_rate": 4.950555555555556e-05, - "loss": 0.6048, - "step": 3268 - }, - { - "epoch": 0.11474402850172871, - "grad_norm": 0.6211358308792114, - "learning_rate": 4.950370370370371e-05, - "loss": 0.4866, - "step": 3269 - }, - { - "epoch": 0.11477912915284579, - "grad_norm": 0.6224796175956726, - "learning_rate": 4.9501851851851854e-05, - "loss": 0.4865, - "step": 3270 - }, - { - "epoch": 0.11481422980396286, - "grad_norm": 0.7347021102905273, - "learning_rate": 4.9500000000000004e-05, - "loss": 0.5515, - "step": 3271 - }, - { - "epoch": 0.11484933045507995, - "grad_norm": 0.6768015623092651, - "learning_rate": 4.949814814814815e-05, - "loss": 0.4952, - "step": 3272 - }, - { - "epoch": 0.11488443110619702, - "grad_norm": 0.7160813212394714, - "learning_rate": 4.94962962962963e-05, - "loss": 0.5301, - "step": 3273 - }, - { - "epoch": 0.11491953175731409, - "grad_norm": 0.6133739948272705, - "learning_rate": 4.949444444444445e-05, - "loss": 0.6372, - "step": 3274 - }, - { - "epoch": 0.11495463240843118, - "grad_norm": 0.594247043132782, - "learning_rate": 4.94925925925926e-05, - "loss": 0.4692, - "step": 3275 - }, - { - "epoch": 0.11498973305954825, - "grad_norm": 0.6223771572113037, - "learning_rate": 4.949074074074074e-05, - "loss": 0.5899, - "step": 3276 - }, - { - "epoch": 0.11502483371066534, - "grad_norm": 0.5896468162536621, - "learning_rate": 4.948888888888889e-05, - "loss": 0.5245, - "step": 3277 - }, - { - "epoch": 0.11505993436178241, - "grad_norm": 0.5668961405754089, - "learning_rate": 4.9487037037037034e-05, - "loss": 0.5956, - "step": 3278 - }, - { - "epoch": 0.11509503501289949, - "grad_norm": 0.5487853288650513, - "learning_rate": 4.948518518518519e-05, - "loss": 0.4645, - "step": 3279 - }, - { - "epoch": 0.11513013566401657, - "grad_norm": 0.5698654651641846, - "learning_rate": 4.9483333333333334e-05, - "loss": 0.4809, - "step": 3280 - }, - { - "epoch": 0.11516523631513365, - "grad_norm": 0.8388647437095642, - "learning_rate": 4.9481481481481485e-05, - "loss": 0.4842, - "step": 3281 - }, - { - "epoch": 0.11520033696625072, - "grad_norm": 0.624266505241394, - "learning_rate": 4.9479629629629635e-05, - "loss": 0.538, - "step": 3282 - }, - { - "epoch": 0.1152354376173678, - "grad_norm": 0.5589842796325684, - "learning_rate": 4.947777777777778e-05, - "loss": 0.4987, - "step": 3283 - }, - { - "epoch": 0.11527053826848488, - "grad_norm": 0.6954354047775269, - "learning_rate": 4.947592592592593e-05, - "loss": 0.4924, - "step": 3284 - }, - { - "epoch": 0.11530563891960195, - "grad_norm": 0.6769924759864807, - "learning_rate": 4.947407407407408e-05, - "loss": 0.3801, - "step": 3285 - }, - { - "epoch": 0.11534073957071904, - "grad_norm": 0.6670584678649902, - "learning_rate": 4.947222222222223e-05, - "loss": 0.531, - "step": 3286 - }, - { - "epoch": 0.11537584022183611, - "grad_norm": 0.7050185799598694, - "learning_rate": 4.947037037037037e-05, - "loss": 0.4169, - "step": 3287 - }, - { - "epoch": 0.1154109408729532, - "grad_norm": 0.6978665590286255, - "learning_rate": 4.946851851851852e-05, - "loss": 0.4709, - "step": 3288 - }, - { - "epoch": 0.11544604152407027, - "grad_norm": 0.5943599343299866, - "learning_rate": 4.9466666666666665e-05, - "loss": 0.5596, - "step": 3289 - }, - { - "epoch": 0.11548114217518735, - "grad_norm": 0.6558557152748108, - "learning_rate": 4.946481481481482e-05, - "loss": 0.5596, - "step": 3290 - }, - { - "epoch": 0.11551624282630443, - "grad_norm": 0.6525485515594482, - "learning_rate": 4.9462962962962965e-05, - "loss": 0.6023, - "step": 3291 - }, - { - "epoch": 0.1155513434774215, - "grad_norm": 0.7232561111450195, - "learning_rate": 4.9461111111111115e-05, - "loss": 0.5732, - "step": 3292 - }, - { - "epoch": 0.11558644412853858, - "grad_norm": 0.7676307559013367, - "learning_rate": 4.945925925925926e-05, - "loss": 0.4203, - "step": 3293 - }, - { - "epoch": 0.11562154477965567, - "grad_norm": 0.5953564643859863, - "learning_rate": 4.945740740740741e-05, - "loss": 0.4063, - "step": 3294 - }, - { - "epoch": 0.11565664543077274, - "grad_norm": 0.7584761381149292, - "learning_rate": 4.945555555555556e-05, - "loss": 0.5725, - "step": 3295 - }, - { - "epoch": 0.11569174608188981, - "grad_norm": 0.6403425335884094, - "learning_rate": 4.945370370370371e-05, - "loss": 0.5665, - "step": 3296 - }, - { - "epoch": 0.1157268467330069, - "grad_norm": 0.6566243767738342, - "learning_rate": 4.945185185185185e-05, - "loss": 0.5372, - "step": 3297 - }, - { - "epoch": 0.11576194738412397, - "grad_norm": 0.5468577146530151, - "learning_rate": 4.945e-05, - "loss": 0.557, - "step": 3298 - }, - { - "epoch": 0.11579704803524106, - "grad_norm": 0.8573240041732788, - "learning_rate": 4.9448148148148146e-05, - "loss": 0.5243, - "step": 3299 - }, - { - "epoch": 0.11583214868635813, - "grad_norm": 0.5772416591644287, - "learning_rate": 4.9446296296296296e-05, - "loss": 0.5481, - "step": 3300 - }, - { - "epoch": 0.1158672493374752, - "grad_norm": 0.5129542350769043, - "learning_rate": 4.9444444444444446e-05, - "loss": 0.5226, - "step": 3301 - }, - { - "epoch": 0.1159023499885923, - "grad_norm": 0.6295337080955505, - "learning_rate": 4.9442592592592596e-05, - "loss": 0.6085, - "step": 3302 - }, - { - "epoch": 0.11593745063970937, - "grad_norm": 0.5660290122032166, - "learning_rate": 4.9440740740740746e-05, - "loss": 0.6011, - "step": 3303 - }, - { - "epoch": 0.11597255129082644, - "grad_norm": 0.5612079501152039, - "learning_rate": 4.943888888888889e-05, - "loss": 0.5765, - "step": 3304 - }, - { - "epoch": 0.11600765194194353, - "grad_norm": 0.5781408548355103, - "learning_rate": 4.943703703703704e-05, - "loss": 0.6715, - "step": 3305 - }, - { - "epoch": 0.1160427525930606, - "grad_norm": 0.5619946122169495, - "learning_rate": 4.943518518518519e-05, - "loss": 0.5453, - "step": 3306 - }, - { - "epoch": 0.11607785324417767, - "grad_norm": 0.5820042490959167, - "learning_rate": 4.943333333333334e-05, - "loss": 0.5191, - "step": 3307 - }, - { - "epoch": 0.11611295389529476, - "grad_norm": 0.5565657615661621, - "learning_rate": 4.943148148148148e-05, - "loss": 0.4603, - "step": 3308 - }, - { - "epoch": 0.11614805454641183, - "grad_norm": 0.5352852940559387, - "learning_rate": 4.9429629629629633e-05, - "loss": 0.5186, - "step": 3309 - }, - { - "epoch": 0.11618315519752892, - "grad_norm": 0.6934499144554138, - "learning_rate": 4.942777777777778e-05, - "loss": 0.5417, - "step": 3310 - }, - { - "epoch": 0.116218255848646, - "grad_norm": 0.6989749073982239, - "learning_rate": 4.942592592592593e-05, - "loss": 0.5252, - "step": 3311 - }, - { - "epoch": 0.11625335649976307, - "grad_norm": 0.7379990816116333, - "learning_rate": 4.942407407407408e-05, - "loss": 0.6236, - "step": 3312 - }, - { - "epoch": 0.11628845715088015, - "grad_norm": 0.631428062915802, - "learning_rate": 4.942222222222223e-05, - "loss": 0.5438, - "step": 3313 - }, - { - "epoch": 0.11632355780199723, - "grad_norm": 0.6066673994064331, - "learning_rate": 4.942037037037037e-05, - "loss": 0.5662, - "step": 3314 - }, - { - "epoch": 0.1163586584531143, - "grad_norm": 0.5504570007324219, - "learning_rate": 4.941851851851852e-05, - "loss": 0.4871, - "step": 3315 - }, - { - "epoch": 0.11639375910423139, - "grad_norm": 0.5355759263038635, - "learning_rate": 4.9416666666666664e-05, - "loss": 0.4706, - "step": 3316 - }, - { - "epoch": 0.11642885975534846, - "grad_norm": 0.5986247062683105, - "learning_rate": 4.941481481481482e-05, - "loss": 0.5552, - "step": 3317 - }, - { - "epoch": 0.11646396040646553, - "grad_norm": 0.6856994032859802, - "learning_rate": 4.9412962962962964e-05, - "loss": 0.5781, - "step": 3318 - }, - { - "epoch": 0.11649906105758262, - "grad_norm": 0.5235650539398193, - "learning_rate": 4.9411111111111114e-05, - "loss": 0.5523, - "step": 3319 - }, - { - "epoch": 0.1165341617086997, - "grad_norm": 0.7154974937438965, - "learning_rate": 4.940925925925926e-05, - "loss": 0.5992, - "step": 3320 - }, - { - "epoch": 0.11656926235981678, - "grad_norm": 0.6678881049156189, - "learning_rate": 4.940740740740741e-05, - "loss": 0.4939, - "step": 3321 - }, - { - "epoch": 0.11660436301093385, - "grad_norm": 0.6584571599960327, - "learning_rate": 4.940555555555556e-05, - "loss": 0.524, - "step": 3322 - }, - { - "epoch": 0.11663946366205093, - "grad_norm": 0.4922095239162445, - "learning_rate": 4.940370370370371e-05, - "loss": 0.5357, - "step": 3323 - }, - { - "epoch": 0.11667456431316801, - "grad_norm": 0.6991430521011353, - "learning_rate": 4.940185185185186e-05, - "loss": 0.5268, - "step": 3324 - }, - { - "epoch": 0.11670966496428509, - "grad_norm": 0.5986264944076538, - "learning_rate": 4.94e-05, - "loss": 0.604, - "step": 3325 - }, - { - "epoch": 0.11674476561540216, - "grad_norm": 0.5818352699279785, - "learning_rate": 4.939814814814815e-05, - "loss": 0.5717, - "step": 3326 - }, - { - "epoch": 0.11677986626651925, - "grad_norm": 0.7222238779067993, - "learning_rate": 4.9396296296296295e-05, - "loss": 0.646, - "step": 3327 - }, - { - "epoch": 0.11681496691763632, - "grad_norm": 0.5434146523475647, - "learning_rate": 4.939444444444445e-05, - "loss": 0.5249, - "step": 3328 - }, - { - "epoch": 0.1168500675687534, - "grad_norm": 0.5869682431221008, - "learning_rate": 4.9392592592592595e-05, - "loss": 0.525, - "step": 3329 - }, - { - "epoch": 0.11688516821987048, - "grad_norm": 0.591731607913971, - "learning_rate": 4.9390740740740745e-05, - "loss": 0.6124, - "step": 3330 - }, - { - "epoch": 0.11692026887098755, - "grad_norm": 0.5963661670684814, - "learning_rate": 4.938888888888889e-05, - "loss": 0.4179, - "step": 3331 - }, - { - "epoch": 0.11695536952210464, - "grad_norm": 0.5243632197380066, - "learning_rate": 4.938703703703704e-05, - "loss": 0.5008, - "step": 3332 - }, - { - "epoch": 0.11699047017322171, - "grad_norm": 0.753287672996521, - "learning_rate": 4.938518518518519e-05, - "loss": 0.5077, - "step": 3333 - }, - { - "epoch": 0.11702557082433879, - "grad_norm": 0.5799756050109863, - "learning_rate": 4.938333333333334e-05, - "loss": 0.4885, - "step": 3334 - }, - { - "epoch": 0.11706067147545587, - "grad_norm": 0.6663563251495361, - "learning_rate": 4.938148148148148e-05, - "loss": 0.6016, - "step": 3335 - }, - { - "epoch": 0.11709577212657295, - "grad_norm": 0.6024001240730286, - "learning_rate": 4.937962962962963e-05, - "loss": 0.5753, - "step": 3336 - }, - { - "epoch": 0.11713087277769002, - "grad_norm": 0.5410277247428894, - "learning_rate": 4.9377777777777776e-05, - "loss": 0.5819, - "step": 3337 - }, - { - "epoch": 0.11716597342880711, - "grad_norm": 0.7079979777336121, - "learning_rate": 4.937592592592593e-05, - "loss": 0.4813, - "step": 3338 - }, - { - "epoch": 0.11720107407992418, - "grad_norm": 0.6191153526306152, - "learning_rate": 4.9374074074074076e-05, - "loss": 0.5078, - "step": 3339 - }, - { - "epoch": 0.11723617473104127, - "grad_norm": 0.5509341955184937, - "learning_rate": 4.9372222222222226e-05, - "loss": 0.4468, - "step": 3340 - }, - { - "epoch": 0.11727127538215834, - "grad_norm": 0.5718255639076233, - "learning_rate": 4.937037037037037e-05, - "loss": 0.4637, - "step": 3341 - }, - { - "epoch": 0.11730637603327541, - "grad_norm": 0.6026448011398315, - "learning_rate": 4.936851851851852e-05, - "loss": 0.5035, - "step": 3342 - }, - { - "epoch": 0.1173414766843925, - "grad_norm": 0.6038998961448669, - "learning_rate": 4.936666666666667e-05, - "loss": 0.533, - "step": 3343 - }, - { - "epoch": 0.11737657733550957, - "grad_norm": 0.617021381855011, - "learning_rate": 4.936481481481482e-05, - "loss": 0.6161, - "step": 3344 - }, - { - "epoch": 0.11741167798662665, - "grad_norm": 0.5759481191635132, - "learning_rate": 4.936296296296297e-05, - "loss": 0.57, - "step": 3345 - }, - { - "epoch": 0.11744677863774373, - "grad_norm": 0.5772411227226257, - "learning_rate": 4.936111111111111e-05, - "loss": 0.4588, - "step": 3346 - }, - { - "epoch": 0.11748187928886081, - "grad_norm": 0.5738247632980347, - "learning_rate": 4.935925925925926e-05, - "loss": 0.5473, - "step": 3347 - }, - { - "epoch": 0.11751697993997788, - "grad_norm": 0.5737380385398865, - "learning_rate": 4.9357407407407406e-05, - "loss": 0.5728, - "step": 3348 - }, - { - "epoch": 0.11755208059109497, - "grad_norm": 0.572783350944519, - "learning_rate": 4.935555555555556e-05, - "loss": 0.5884, - "step": 3349 - }, - { - "epoch": 0.11758718124221204, - "grad_norm": 0.5781747698783875, - "learning_rate": 4.935370370370371e-05, - "loss": 0.5298, - "step": 3350 - }, - { - "epoch": 0.11762228189332913, - "grad_norm": 0.5660456418991089, - "learning_rate": 4.935185185185186e-05, - "loss": 0.4707, - "step": 3351 - }, - { - "epoch": 0.1176573825444462, - "grad_norm": 0.5792422294616699, - "learning_rate": 4.935e-05, - "loss": 0.4311, - "step": 3352 - }, - { - "epoch": 0.11769248319556327, - "grad_norm": 0.5400115251541138, - "learning_rate": 4.934814814814815e-05, - "loss": 0.6727, - "step": 3353 - }, - { - "epoch": 0.11772758384668036, - "grad_norm": 0.5227763056755066, - "learning_rate": 4.9346296296296294e-05, - "loss": 0.5212, - "step": 3354 - }, - { - "epoch": 0.11776268449779743, - "grad_norm": 0.6218478083610535, - "learning_rate": 4.934444444444445e-05, - "loss": 0.4955, - "step": 3355 - }, - { - "epoch": 0.11779778514891451, - "grad_norm": 0.49512627720832825, - "learning_rate": 4.9342592592592594e-05, - "loss": 0.5539, - "step": 3356 - }, - { - "epoch": 0.1178328858000316, - "grad_norm": 0.536597728729248, - "learning_rate": 4.9340740740740744e-05, - "loss": 0.5081, - "step": 3357 - }, - { - "epoch": 0.11786798645114867, - "grad_norm": 0.7845978736877441, - "learning_rate": 4.933888888888889e-05, - "loss": 0.5613, - "step": 3358 - }, - { - "epoch": 0.11790308710226574, - "grad_norm": 0.5916150808334351, - "learning_rate": 4.933703703703704e-05, - "loss": 0.5888, - "step": 3359 - }, - { - "epoch": 0.11793818775338283, - "grad_norm": 0.5625584125518799, - "learning_rate": 4.933518518518519e-05, - "loss": 0.4031, - "step": 3360 - }, - { - "epoch": 0.1179732884044999, - "grad_norm": 0.5873352289199829, - "learning_rate": 4.933333333333334e-05, - "loss": 0.5774, - "step": 3361 - }, - { - "epoch": 0.11800838905561699, - "grad_norm": 0.5544146299362183, - "learning_rate": 4.933148148148148e-05, - "loss": 0.4337, - "step": 3362 - }, - { - "epoch": 0.11804348970673406, - "grad_norm": 0.5778603553771973, - "learning_rate": 4.932962962962963e-05, - "loss": 0.6271, - "step": 3363 - }, - { - "epoch": 0.11807859035785113, - "grad_norm": 0.6241691708564758, - "learning_rate": 4.932777777777778e-05, - "loss": 0.5284, - "step": 3364 - }, - { - "epoch": 0.11811369100896822, - "grad_norm": 0.6230523586273193, - "learning_rate": 4.932592592592593e-05, - "loss": 0.4452, - "step": 3365 - }, - { - "epoch": 0.1181487916600853, - "grad_norm": 0.6616918444633484, - "learning_rate": 4.932407407407408e-05, - "loss": 0.5551, - "step": 3366 - }, - { - "epoch": 0.11818389231120237, - "grad_norm": 0.6268907785415649, - "learning_rate": 4.9322222222222225e-05, - "loss": 0.4818, - "step": 3367 - }, - { - "epoch": 0.11821899296231946, - "grad_norm": 0.6917697191238403, - "learning_rate": 4.9320370370370375e-05, - "loss": 0.429, - "step": 3368 - }, - { - "epoch": 0.11825409361343653, - "grad_norm": 0.7823694944381714, - "learning_rate": 4.931851851851852e-05, - "loss": 0.5364, - "step": 3369 - }, - { - "epoch": 0.1182891942645536, - "grad_norm": 0.7104827165603638, - "learning_rate": 4.931666666666667e-05, - "loss": 0.61, - "step": 3370 - }, - { - "epoch": 0.11832429491567069, - "grad_norm": 0.7574892640113831, - "learning_rate": 4.931481481481482e-05, - "loss": 0.63, - "step": 3371 - }, - { - "epoch": 0.11835939556678776, - "grad_norm": 0.6103653907775879, - "learning_rate": 4.931296296296297e-05, - "loss": 0.5409, - "step": 3372 - }, - { - "epoch": 0.11839449621790485, - "grad_norm": 0.4963729679584503, - "learning_rate": 4.931111111111111e-05, - "loss": 0.5122, - "step": 3373 - }, - { - "epoch": 0.11842959686902192, - "grad_norm": 0.6646038293838501, - "learning_rate": 4.930925925925926e-05, - "loss": 0.527, - "step": 3374 - }, - { - "epoch": 0.118464697520139, - "grad_norm": 0.6470727324485779, - "learning_rate": 4.9307407407407405e-05, - "loss": 0.5405, - "step": 3375 - }, - { - "epoch": 0.11849979817125608, - "grad_norm": 0.730037271976471, - "learning_rate": 4.930555555555556e-05, - "loss": 0.5616, - "step": 3376 - }, - { - "epoch": 0.11853489882237316, - "grad_norm": 0.6118882298469543, - "learning_rate": 4.9303703703703705e-05, - "loss": 0.5496, - "step": 3377 - }, - { - "epoch": 0.11856999947349023, - "grad_norm": 0.653281569480896, - "learning_rate": 4.9301851851851856e-05, - "loss": 0.5305, - "step": 3378 - }, - { - "epoch": 0.11860510012460732, - "grad_norm": 0.5917869210243225, - "learning_rate": 4.93e-05, - "loss": 0.5213, - "step": 3379 - }, - { - "epoch": 0.11864020077572439, - "grad_norm": 0.7048130035400391, - "learning_rate": 4.929814814814815e-05, - "loss": 0.6338, - "step": 3380 - }, - { - "epoch": 0.11867530142684146, - "grad_norm": 0.5355014204978943, - "learning_rate": 4.92962962962963e-05, - "loss": 0.5173, - "step": 3381 - }, - { - "epoch": 0.11871040207795855, - "grad_norm": 0.6650946140289307, - "learning_rate": 4.929444444444445e-05, - "loss": 0.5604, - "step": 3382 - }, - { - "epoch": 0.11874550272907562, - "grad_norm": 0.6302752494812012, - "learning_rate": 4.929259259259259e-05, - "loss": 0.338, - "step": 3383 - }, - { - "epoch": 0.11878060338019271, - "grad_norm": 0.5817702412605286, - "learning_rate": 4.929074074074074e-05, - "loss": 0.423, - "step": 3384 - }, - { - "epoch": 0.11881570403130978, - "grad_norm": 0.5695309638977051, - "learning_rate": 4.928888888888889e-05, - "loss": 0.5728, - "step": 3385 - }, - { - "epoch": 0.11885080468242686, - "grad_norm": 0.5920072197914124, - "learning_rate": 4.9287037037037036e-05, - "loss": 0.4634, - "step": 3386 - }, - { - "epoch": 0.11888590533354394, - "grad_norm": 0.6411303281784058, - "learning_rate": 4.928518518518519e-05, - "loss": 0.4898, - "step": 3387 - }, - { - "epoch": 0.11892100598466102, - "grad_norm": 0.6530652046203613, - "learning_rate": 4.9283333333333336e-05, - "loss": 0.5558, - "step": 3388 - }, - { - "epoch": 0.11895610663577809, - "grad_norm": 0.6782507300376892, - "learning_rate": 4.9281481481481486e-05, - "loss": 0.5935, - "step": 3389 - }, - { - "epoch": 0.11899120728689518, - "grad_norm": 0.7314573526382446, - "learning_rate": 4.927962962962963e-05, - "loss": 0.6618, - "step": 3390 - }, - { - "epoch": 0.11902630793801225, - "grad_norm": 0.5572981834411621, - "learning_rate": 4.927777777777778e-05, - "loss": 0.6148, - "step": 3391 - }, - { - "epoch": 0.11906140858912932, - "grad_norm": 0.5422652363777161, - "learning_rate": 4.927592592592593e-05, - "loss": 0.4893, - "step": 3392 - }, - { - "epoch": 0.11909650924024641, - "grad_norm": 0.5231618285179138, - "learning_rate": 4.927407407407408e-05, - "loss": 0.5768, - "step": 3393 - }, - { - "epoch": 0.11913160989136348, - "grad_norm": 0.6373382806777954, - "learning_rate": 4.9272222222222223e-05, - "loss": 0.5371, - "step": 3394 - }, - { - "epoch": 0.11916671054248057, - "grad_norm": 0.611038088798523, - "learning_rate": 4.9270370370370374e-05, - "loss": 0.5345, - "step": 3395 - }, - { - "epoch": 0.11920181119359764, - "grad_norm": 0.5543455481529236, - "learning_rate": 4.926851851851852e-05, - "loss": 0.5772, - "step": 3396 - }, - { - "epoch": 0.11923691184471472, - "grad_norm": 0.5755308270454407, - "learning_rate": 4.926666666666667e-05, - "loss": 0.5429, - "step": 3397 - }, - { - "epoch": 0.1192720124958318, - "grad_norm": 0.5589984655380249, - "learning_rate": 4.926481481481482e-05, - "loss": 0.5629, - "step": 3398 - }, - { - "epoch": 0.11930711314694888, - "grad_norm": 0.5220105648040771, - "learning_rate": 4.926296296296297e-05, - "loss": 0.4362, - "step": 3399 - }, - { - "epoch": 0.11934221379806595, - "grad_norm": 0.5194084048271179, - "learning_rate": 4.926111111111111e-05, - "loss": 0.5306, - "step": 3400 - }, - { - "epoch": 0.11937731444918304, - "grad_norm": 0.5826179385185242, - "learning_rate": 4.925925925925926e-05, - "loss": 0.4181, - "step": 3401 - }, - { - "epoch": 0.11941241510030011, - "grad_norm": 0.588756799697876, - "learning_rate": 4.925740740740741e-05, - "loss": 0.4452, - "step": 3402 - }, - { - "epoch": 0.11944751575141718, - "grad_norm": 0.831125795841217, - "learning_rate": 4.925555555555556e-05, - "loss": 0.6308, - "step": 3403 - }, - { - "epoch": 0.11948261640253427, - "grad_norm": 0.6154045462608337, - "learning_rate": 4.9253703703703704e-05, - "loss": 0.4781, - "step": 3404 - }, - { - "epoch": 0.11951771705365134, - "grad_norm": 0.6327564120292664, - "learning_rate": 4.9251851851851854e-05, - "loss": 0.5282, - "step": 3405 - }, - { - "epoch": 0.11955281770476843, - "grad_norm": 0.6018508076667786, - "learning_rate": 4.9250000000000004e-05, - "loss": 0.6293, - "step": 3406 - }, - { - "epoch": 0.1195879183558855, - "grad_norm": 0.49778562784194946, - "learning_rate": 4.924814814814815e-05, - "loss": 0.5835, - "step": 3407 - }, - { - "epoch": 0.11962301900700258, - "grad_norm": 0.6177685260772705, - "learning_rate": 4.92462962962963e-05, - "loss": 0.6268, - "step": 3408 - }, - { - "epoch": 0.11965811965811966, - "grad_norm": 0.6469677686691284, - "learning_rate": 4.924444444444445e-05, - "loss": 0.645, - "step": 3409 - }, - { - "epoch": 0.11969322030923674, - "grad_norm": 0.602812647819519, - "learning_rate": 4.92425925925926e-05, - "loss": 0.514, - "step": 3410 - }, - { - "epoch": 0.11972832096035381, - "grad_norm": 0.530297040939331, - "learning_rate": 4.924074074074074e-05, - "loss": 0.5783, - "step": 3411 - }, - { - "epoch": 0.1197634216114709, - "grad_norm": 0.584979772567749, - "learning_rate": 4.923888888888889e-05, - "loss": 0.4725, - "step": 3412 - }, - { - "epoch": 0.11979852226258797, - "grad_norm": 0.6478732824325562, - "learning_rate": 4.9237037037037035e-05, - "loss": 0.6678, - "step": 3413 - }, - { - "epoch": 0.11983362291370504, - "grad_norm": 0.5874982476234436, - "learning_rate": 4.923518518518519e-05, - "loss": 0.5962, - "step": 3414 - }, - { - "epoch": 0.11986872356482213, - "grad_norm": 0.5993367433547974, - "learning_rate": 4.9233333333333335e-05, - "loss": 0.5083, - "step": 3415 - }, - { - "epoch": 0.1199038242159392, - "grad_norm": 0.59568190574646, - "learning_rate": 4.9231481481481485e-05, - "loss": 0.579, - "step": 3416 - }, - { - "epoch": 0.11993892486705629, - "grad_norm": 0.6363686323165894, - "learning_rate": 4.922962962962963e-05, - "loss": 0.6328, - "step": 3417 - }, - { - "epoch": 0.11997402551817336, - "grad_norm": 0.642812967300415, - "learning_rate": 4.922777777777778e-05, - "loss": 0.4693, - "step": 3418 - }, - { - "epoch": 0.12000912616929044, - "grad_norm": 0.729076623916626, - "learning_rate": 4.922592592592593e-05, - "loss": 0.6001, - "step": 3419 - }, - { - "epoch": 0.12004422682040752, - "grad_norm": 0.6801328659057617, - "learning_rate": 4.922407407407408e-05, - "loss": 0.5819, - "step": 3420 - }, - { - "epoch": 0.1200793274715246, - "grad_norm": 0.5790257453918457, - "learning_rate": 4.922222222222222e-05, - "loss": 0.5824, - "step": 3421 - }, - { - "epoch": 0.12011442812264167, - "grad_norm": 0.5439820289611816, - "learning_rate": 4.922037037037037e-05, - "loss": 0.5117, - "step": 3422 - }, - { - "epoch": 0.12014952877375876, - "grad_norm": 0.6019900441169739, - "learning_rate": 4.921851851851852e-05, - "loss": 0.499, - "step": 3423 - }, - { - "epoch": 0.12018462942487583, - "grad_norm": 0.5517000555992126, - "learning_rate": 4.9216666666666666e-05, - "loss": 0.6016, - "step": 3424 - }, - { - "epoch": 0.1202197300759929, - "grad_norm": 0.6675683259963989, - "learning_rate": 4.9214814814814816e-05, - "loss": 0.576, - "step": 3425 - }, - { - "epoch": 0.12025483072710999, - "grad_norm": 1.140912652015686, - "learning_rate": 4.9212962962962966e-05, - "loss": 0.4437, - "step": 3426 - }, - { - "epoch": 0.12028993137822706, - "grad_norm": 0.7656604051589966, - "learning_rate": 4.9211111111111116e-05, - "loss": 0.4083, - "step": 3427 - }, - { - "epoch": 0.12032503202934415, - "grad_norm": 0.5971274971961975, - "learning_rate": 4.920925925925926e-05, - "loss": 0.583, - "step": 3428 - }, - { - "epoch": 0.12036013268046122, - "grad_norm": 0.6273960471153259, - "learning_rate": 4.920740740740741e-05, - "loss": 0.6317, - "step": 3429 - }, - { - "epoch": 0.1203952333315783, - "grad_norm": 0.5565261840820312, - "learning_rate": 4.920555555555556e-05, - "loss": 0.4893, - "step": 3430 - }, - { - "epoch": 0.12043033398269538, - "grad_norm": 0.9163693785667419, - "learning_rate": 4.920370370370371e-05, - "loss": 0.4046, - "step": 3431 - }, - { - "epoch": 0.12046543463381246, - "grad_norm": 0.878036618232727, - "learning_rate": 4.920185185185185e-05, - "loss": 0.5429, - "step": 3432 - }, - { - "epoch": 0.12050053528492953, - "grad_norm": 0.5364927053451538, - "learning_rate": 4.92e-05, - "loss": 0.4655, - "step": 3433 - }, - { - "epoch": 0.12053563593604662, - "grad_norm": 0.6229907274246216, - "learning_rate": 4.919814814814815e-05, - "loss": 0.5443, - "step": 3434 - }, - { - "epoch": 0.12057073658716369, - "grad_norm": 0.6556458473205566, - "learning_rate": 4.91962962962963e-05, - "loss": 0.5787, - "step": 3435 - }, - { - "epoch": 0.12060583723828076, - "grad_norm": 0.6338503956794739, - "learning_rate": 4.919444444444445e-05, - "loss": 0.5105, - "step": 3436 - }, - { - "epoch": 0.12064093788939785, - "grad_norm": 0.7679821252822876, - "learning_rate": 4.91925925925926e-05, - "loss": 0.5049, - "step": 3437 - }, - { - "epoch": 0.12067603854051492, - "grad_norm": 0.5975868105888367, - "learning_rate": 4.919074074074074e-05, - "loss": 0.4733, - "step": 3438 - }, - { - "epoch": 0.12071113919163201, - "grad_norm": 0.7541370987892151, - "learning_rate": 4.918888888888889e-05, - "loss": 0.5979, - "step": 3439 - }, - { - "epoch": 0.12074623984274908, - "grad_norm": 0.5493429899215698, - "learning_rate": 4.9187037037037034e-05, - "loss": 0.6102, - "step": 3440 - }, - { - "epoch": 0.12078134049386616, - "grad_norm": 0.524752676486969, - "learning_rate": 4.918518518518519e-05, - "loss": 0.4841, - "step": 3441 - }, - { - "epoch": 0.12081644114498324, - "grad_norm": 0.6459055542945862, - "learning_rate": 4.9183333333333334e-05, - "loss": 0.5734, - "step": 3442 - }, - { - "epoch": 0.12085154179610032, - "grad_norm": 0.5309064984321594, - "learning_rate": 4.9181481481481484e-05, - "loss": 0.5087, - "step": 3443 - }, - { - "epoch": 0.12088664244721739, - "grad_norm": 0.6716018319129944, - "learning_rate": 4.9179629629629634e-05, - "loss": 0.5021, - "step": 3444 - }, - { - "epoch": 0.12092174309833448, - "grad_norm": 0.7030388116836548, - "learning_rate": 4.917777777777778e-05, - "loss": 0.5894, - "step": 3445 - }, - { - "epoch": 0.12095684374945155, - "grad_norm": 0.612118661403656, - "learning_rate": 4.917592592592593e-05, - "loss": 0.5326, - "step": 3446 - }, - { - "epoch": 0.12099194440056862, - "grad_norm": 0.5464820861816406, - "learning_rate": 4.917407407407408e-05, - "loss": 0.404, - "step": 3447 - }, - { - "epoch": 0.12102704505168571, - "grad_norm": 0.5753924250602722, - "learning_rate": 4.917222222222223e-05, - "loss": 0.4948, - "step": 3448 - }, - { - "epoch": 0.12106214570280278, - "grad_norm": 0.7072426080703735, - "learning_rate": 4.917037037037037e-05, - "loss": 0.4627, - "step": 3449 - }, - { - "epoch": 0.12109724635391987, - "grad_norm": 0.5479128956794739, - "learning_rate": 4.916851851851852e-05, - "loss": 0.4329, - "step": 3450 - }, - { - "epoch": 0.12113234700503694, - "grad_norm": 0.5609884858131409, - "learning_rate": 4.9166666666666665e-05, - "loss": 0.5484, - "step": 3451 - }, - { - "epoch": 0.12116744765615402, - "grad_norm": 0.6516402959823608, - "learning_rate": 4.916481481481482e-05, - "loss": 0.4666, - "step": 3452 - }, - { - "epoch": 0.1212025483072711, - "grad_norm": 0.526574432849884, - "learning_rate": 4.9162962962962965e-05, - "loss": 0.5295, - "step": 3453 - }, - { - "epoch": 0.12123764895838818, - "grad_norm": 0.6460963487625122, - "learning_rate": 4.9161111111111115e-05, - "loss": 0.5663, - "step": 3454 - }, - { - "epoch": 0.12127274960950525, - "grad_norm": 0.6676881313323975, - "learning_rate": 4.915925925925926e-05, - "loss": 0.3141, - "step": 3455 - }, - { - "epoch": 0.12130785026062234, - "grad_norm": 0.7270172238349915, - "learning_rate": 4.915740740740741e-05, - "loss": 0.5949, - "step": 3456 - }, - { - "epoch": 0.12134295091173941, - "grad_norm": 0.8134821653366089, - "learning_rate": 4.915555555555556e-05, - "loss": 0.552, - "step": 3457 - }, - { - "epoch": 0.12137805156285648, - "grad_norm": 0.6300197839736938, - "learning_rate": 4.915370370370371e-05, - "loss": 0.5079, - "step": 3458 - }, - { - "epoch": 0.12141315221397357, - "grad_norm": 0.6343599557876587, - "learning_rate": 4.915185185185185e-05, - "loss": 0.6259, - "step": 3459 - }, - { - "epoch": 0.12144825286509064, - "grad_norm": 0.578429639339447, - "learning_rate": 4.915e-05, - "loss": 0.4909, - "step": 3460 - }, - { - "epoch": 0.12148335351620773, - "grad_norm": 0.6583652496337891, - "learning_rate": 4.9148148148148145e-05, - "loss": 0.6253, - "step": 3461 - }, - { - "epoch": 0.1215184541673248, - "grad_norm": 0.5550524592399597, - "learning_rate": 4.91462962962963e-05, - "loss": 0.481, - "step": 3462 - }, - { - "epoch": 0.12155355481844188, - "grad_norm": 0.7157047986984253, - "learning_rate": 4.9144444444444446e-05, - "loss": 0.5911, - "step": 3463 - }, - { - "epoch": 0.12158865546955896, - "grad_norm": 0.853480339050293, - "learning_rate": 4.9142592592592596e-05, - "loss": 0.5344, - "step": 3464 - }, - { - "epoch": 0.12162375612067604, - "grad_norm": 0.6270495653152466, - "learning_rate": 4.9140740740740746e-05, - "loss": 0.5073, - "step": 3465 - }, - { - "epoch": 0.12165885677179311, - "grad_norm": 0.7944740056991577, - "learning_rate": 4.913888888888889e-05, - "loss": 0.5616, - "step": 3466 - }, - { - "epoch": 0.1216939574229102, - "grad_norm": 0.701286792755127, - "learning_rate": 4.913703703703704e-05, - "loss": 0.5931, - "step": 3467 - }, - { - "epoch": 0.12172905807402727, - "grad_norm": 0.4955708682537079, - "learning_rate": 4.913518518518519e-05, - "loss": 0.5299, - "step": 3468 - }, - { - "epoch": 0.12176415872514434, - "grad_norm": 0.6155372262001038, - "learning_rate": 4.913333333333334e-05, - "loss": 0.5079, - "step": 3469 - }, - { - "epoch": 0.12179925937626143, - "grad_norm": 0.5697748064994812, - "learning_rate": 4.913148148148148e-05, - "loss": 0.5425, - "step": 3470 - }, - { - "epoch": 0.1218343600273785, - "grad_norm": 0.7843124270439148, - "learning_rate": 4.912962962962963e-05, - "loss": 0.6206, - "step": 3471 - }, - { - "epoch": 0.12186946067849559, - "grad_norm": 0.6990386843681335, - "learning_rate": 4.9127777777777776e-05, - "loss": 0.4684, - "step": 3472 - }, - { - "epoch": 0.12190456132961267, - "grad_norm": 0.5105015635490417, - "learning_rate": 4.912592592592593e-05, - "loss": 0.4011, - "step": 3473 - }, - { - "epoch": 0.12193966198072974, - "grad_norm": 0.6413056254386902, - "learning_rate": 4.9124074074074077e-05, - "loss": 0.5422, - "step": 3474 - }, - { - "epoch": 0.12197476263184683, - "grad_norm": 0.5734795928001404, - "learning_rate": 4.912222222222223e-05, - "loss": 0.4755, - "step": 3475 - }, - { - "epoch": 0.1220098632829639, - "grad_norm": 0.5067521333694458, - "learning_rate": 4.912037037037037e-05, - "loss": 0.3368, - "step": 3476 - }, - { - "epoch": 0.12204496393408097, - "grad_norm": 0.7569767236709595, - "learning_rate": 4.911851851851852e-05, - "loss": 0.5108, - "step": 3477 - }, - { - "epoch": 0.12208006458519806, - "grad_norm": 0.6246193051338196, - "learning_rate": 4.9116666666666663e-05, - "loss": 0.613, - "step": 3478 - }, - { - "epoch": 0.12211516523631513, - "grad_norm": 0.6443379521369934, - "learning_rate": 4.911481481481482e-05, - "loss": 0.6063, - "step": 3479 - }, - { - "epoch": 0.1221502658874322, - "grad_norm": 0.6877490282058716, - "learning_rate": 4.9112962962962964e-05, - "loss": 0.5955, - "step": 3480 - }, - { - "epoch": 0.12218536653854929, - "grad_norm": 0.6203529834747314, - "learning_rate": 4.9111111111111114e-05, - "loss": 0.5612, - "step": 3481 - }, - { - "epoch": 0.12222046718966637, - "grad_norm": 0.6548603177070618, - "learning_rate": 4.910925925925926e-05, - "loss": 0.4819, - "step": 3482 - }, - { - "epoch": 0.12225556784078345, - "grad_norm": 0.5954891443252563, - "learning_rate": 4.910740740740741e-05, - "loss": 0.5855, - "step": 3483 - }, - { - "epoch": 0.12229066849190053, - "grad_norm": 0.5571162700653076, - "learning_rate": 4.910555555555556e-05, - "loss": 0.5208, - "step": 3484 - }, - { - "epoch": 0.1223257691430176, - "grad_norm": 0.5437628030776978, - "learning_rate": 4.910370370370371e-05, - "loss": 0.4697, - "step": 3485 - }, - { - "epoch": 0.12236086979413469, - "grad_norm": 0.6161670088768005, - "learning_rate": 4.910185185185186e-05, - "loss": 0.4958, - "step": 3486 - }, - { - "epoch": 0.12239597044525176, - "grad_norm": 0.5650283098220825, - "learning_rate": 4.91e-05, - "loss": 0.6148, - "step": 3487 - }, - { - "epoch": 0.12243107109636883, - "grad_norm": 0.5702127814292908, - "learning_rate": 4.909814814814815e-05, - "loss": 0.4719, - "step": 3488 - }, - { - "epoch": 0.12246617174748592, - "grad_norm": 0.6318438649177551, - "learning_rate": 4.90962962962963e-05, - "loss": 0.4771, - "step": 3489 - }, - { - "epoch": 0.12250127239860299, - "grad_norm": 0.6326043605804443, - "learning_rate": 4.909444444444445e-05, - "loss": 0.4746, - "step": 3490 - }, - { - "epoch": 0.12253637304972007, - "grad_norm": 0.6715946793556213, - "learning_rate": 4.9092592592592595e-05, - "loss": 0.5692, - "step": 3491 - }, - { - "epoch": 0.12257147370083715, - "grad_norm": 0.529293954372406, - "learning_rate": 4.9090740740740745e-05, - "loss": 0.5632, - "step": 3492 - }, - { - "epoch": 0.12260657435195423, - "grad_norm": 0.6605871319770813, - "learning_rate": 4.908888888888889e-05, - "loss": 0.6018, - "step": 3493 - }, - { - "epoch": 0.12264167500307131, - "grad_norm": 0.6047478914260864, - "learning_rate": 4.908703703703704e-05, - "loss": 0.5018, - "step": 3494 - }, - { - "epoch": 0.12267677565418839, - "grad_norm": 0.5458720922470093, - "learning_rate": 4.908518518518519e-05, - "loss": 0.5366, - "step": 3495 - }, - { - "epoch": 0.12271187630530546, - "grad_norm": 0.558098316192627, - "learning_rate": 4.908333333333334e-05, - "loss": 0.5559, - "step": 3496 - }, - { - "epoch": 0.12274697695642255, - "grad_norm": 0.5034712553024292, - "learning_rate": 4.908148148148148e-05, - "loss": 0.575, - "step": 3497 - }, - { - "epoch": 0.12278207760753962, - "grad_norm": 0.5525332689285278, - "learning_rate": 4.907962962962963e-05, - "loss": 0.5827, - "step": 3498 - }, - { - "epoch": 0.12281717825865669, - "grad_norm": 0.5874247550964355, - "learning_rate": 4.9077777777777775e-05, - "loss": 0.5255, - "step": 3499 - }, - { - "epoch": 0.12285227890977378, - "grad_norm": 0.6577982306480408, - "learning_rate": 4.907592592592593e-05, - "loss": 0.5398, - "step": 3500 - }, - { - "epoch": 0.12288737956089085, - "grad_norm": 0.6019877195358276, - "learning_rate": 4.9074074074074075e-05, - "loss": 0.5618, - "step": 3501 - }, - { - "epoch": 0.12292248021200794, - "grad_norm": 0.6275189518928528, - "learning_rate": 4.9072222222222225e-05, - "loss": 0.592, - "step": 3502 - }, - { - "epoch": 0.12295758086312501, - "grad_norm": 0.6721670031547546, - "learning_rate": 4.907037037037037e-05, - "loss": 0.4599, - "step": 3503 - }, - { - "epoch": 0.12299268151424209, - "grad_norm": 0.6333510875701904, - "learning_rate": 4.906851851851852e-05, - "loss": 0.6057, - "step": 3504 - }, - { - "epoch": 0.12302778216535917, - "grad_norm": 0.5858910083770752, - "learning_rate": 4.906666666666667e-05, - "loss": 0.6104, - "step": 3505 - }, - { - "epoch": 0.12306288281647625, - "grad_norm": 0.8731744289398193, - "learning_rate": 4.906481481481482e-05, - "loss": 0.4109, - "step": 3506 - }, - { - "epoch": 0.12309798346759332, - "grad_norm": 0.6953579783439636, - "learning_rate": 4.906296296296297e-05, - "loss": 0.6928, - "step": 3507 - }, - { - "epoch": 0.1231330841187104, - "grad_norm": 0.5466492176055908, - "learning_rate": 4.906111111111111e-05, - "loss": 0.5459, - "step": 3508 - }, - { - "epoch": 0.12316818476982748, - "grad_norm": 0.4619092047214508, - "learning_rate": 4.905925925925926e-05, - "loss": 0.4391, - "step": 3509 - }, - { - "epoch": 0.12320328542094455, - "grad_norm": 0.6130390763282776, - "learning_rate": 4.9057407407407406e-05, - "loss": 0.541, - "step": 3510 - }, - { - "epoch": 0.12323838607206164, - "grad_norm": 0.5068723559379578, - "learning_rate": 4.905555555555556e-05, - "loss": 0.4334, - "step": 3511 - }, - { - "epoch": 0.12327348672317871, - "grad_norm": 0.5192058682441711, - "learning_rate": 4.9053703703703706e-05, - "loss": 0.6365, - "step": 3512 - }, - { - "epoch": 0.1233085873742958, - "grad_norm": 0.49554187059402466, - "learning_rate": 4.9051851851851856e-05, - "loss": 0.4721, - "step": 3513 - }, - { - "epoch": 0.12334368802541287, - "grad_norm": 0.5258930325508118, - "learning_rate": 4.905e-05, - "loss": 0.5523, - "step": 3514 - }, - { - "epoch": 0.12337878867652995, - "grad_norm": 0.5906677842140198, - "learning_rate": 4.904814814814815e-05, - "loss": 0.5396, - "step": 3515 - }, - { - "epoch": 0.12341388932764703, - "grad_norm": 0.7140839695930481, - "learning_rate": 4.90462962962963e-05, - "loss": 0.4803, - "step": 3516 - }, - { - "epoch": 0.1234489899787641, - "grad_norm": 0.5303902626037598, - "learning_rate": 4.904444444444445e-05, - "loss": 0.429, - "step": 3517 - }, - { - "epoch": 0.12348409062988118, - "grad_norm": 0.5710562467575073, - "learning_rate": 4.904259259259259e-05, - "loss": 0.5993, - "step": 3518 - }, - { - "epoch": 0.12351919128099827, - "grad_norm": 0.8331993222236633, - "learning_rate": 4.9040740740740743e-05, - "loss": 0.4964, - "step": 3519 - }, - { - "epoch": 0.12355429193211534, - "grad_norm": 0.6206798553466797, - "learning_rate": 4.903888888888889e-05, - "loss": 0.5061, - "step": 3520 - }, - { - "epoch": 0.12358939258323241, - "grad_norm": 0.6443823575973511, - "learning_rate": 4.903703703703704e-05, - "loss": 0.4979, - "step": 3521 - }, - { - "epoch": 0.1236244932343495, - "grad_norm": 0.6336797475814819, - "learning_rate": 4.903518518518519e-05, - "loss": 0.5312, - "step": 3522 - }, - { - "epoch": 0.12365959388546657, - "grad_norm": 0.5900976657867432, - "learning_rate": 4.903333333333334e-05, - "loss": 0.5068, - "step": 3523 - }, - { - "epoch": 0.12369469453658366, - "grad_norm": 0.5844592452049255, - "learning_rate": 4.903148148148148e-05, - "loss": 0.55, - "step": 3524 - }, - { - "epoch": 0.12372979518770073, - "grad_norm": 0.6574231386184692, - "learning_rate": 4.902962962962963e-05, - "loss": 0.5641, - "step": 3525 - }, - { - "epoch": 0.1237648958388178, - "grad_norm": 0.5541152954101562, - "learning_rate": 4.902777777777778e-05, - "loss": 0.4687, - "step": 3526 - }, - { - "epoch": 0.1237999964899349, - "grad_norm": 0.6861927509307861, - "learning_rate": 4.902592592592593e-05, - "loss": 0.638, - "step": 3527 - }, - { - "epoch": 0.12383509714105197, - "grad_norm": 0.5383525490760803, - "learning_rate": 4.902407407407408e-05, - "loss": 0.5122, - "step": 3528 - }, - { - "epoch": 0.12387019779216904, - "grad_norm": 0.5677382349967957, - "learning_rate": 4.9022222222222224e-05, - "loss": 0.5622, - "step": 3529 - }, - { - "epoch": 0.12390529844328613, - "grad_norm": 0.6337422132492065, - "learning_rate": 4.9020370370370374e-05, - "loss": 0.5712, - "step": 3530 - }, - { - "epoch": 0.1239403990944032, - "grad_norm": 0.6072642803192139, - "learning_rate": 4.901851851851852e-05, - "loss": 0.4706, - "step": 3531 - }, - { - "epoch": 0.12397549974552027, - "grad_norm": 0.7595537900924683, - "learning_rate": 4.901666666666667e-05, - "loss": 0.6212, - "step": 3532 - }, - { - "epoch": 0.12401060039663736, - "grad_norm": 0.6069207191467285, - "learning_rate": 4.901481481481482e-05, - "loss": 0.5214, - "step": 3533 - }, - { - "epoch": 0.12404570104775443, - "grad_norm": 0.7170518636703491, - "learning_rate": 4.901296296296297e-05, - "loss": 0.6189, - "step": 3534 - }, - { - "epoch": 0.12408080169887152, - "grad_norm": 0.6761568188667297, - "learning_rate": 4.901111111111111e-05, - "loss": 0.5884, - "step": 3535 - }, - { - "epoch": 0.1241159023499886, - "grad_norm": 0.588691771030426, - "learning_rate": 4.900925925925926e-05, - "loss": 0.5229, - "step": 3536 - }, - { - "epoch": 0.12415100300110567, - "grad_norm": 0.5762355327606201, - "learning_rate": 4.9007407407407405e-05, - "loss": 0.549, - "step": 3537 - }, - { - "epoch": 0.12418610365222275, - "grad_norm": 0.5291677713394165, - "learning_rate": 4.900555555555556e-05, - "loss": 0.5061, - "step": 3538 - }, - { - "epoch": 0.12422120430333983, - "grad_norm": 0.6087853312492371, - "learning_rate": 4.9003703703703705e-05, - "loss": 0.6293, - "step": 3539 - }, - { - "epoch": 0.1242563049544569, - "grad_norm": 0.5561395883560181, - "learning_rate": 4.9001851851851855e-05, - "loss": 0.556, - "step": 3540 - }, - { - "epoch": 0.12429140560557399, - "grad_norm": 0.6016924381256104, - "learning_rate": 4.9e-05, - "loss": 0.6212, - "step": 3541 - }, - { - "epoch": 0.12432650625669106, - "grad_norm": 0.535295307636261, - "learning_rate": 4.899814814814815e-05, - "loss": 0.5746, - "step": 3542 - }, - { - "epoch": 0.12436160690780813, - "grad_norm": 0.6499553322792053, - "learning_rate": 4.89962962962963e-05, - "loss": 0.4802, - "step": 3543 - }, - { - "epoch": 0.12439670755892522, - "grad_norm": 0.5828231573104858, - "learning_rate": 4.899444444444445e-05, - "loss": 0.5016, - "step": 3544 - }, - { - "epoch": 0.1244318082100423, - "grad_norm": 0.5614896416664124, - "learning_rate": 4.89925925925926e-05, - "loss": 0.5205, - "step": 3545 - }, - { - "epoch": 0.12446690886115938, - "grad_norm": 0.532425045967102, - "learning_rate": 4.899074074074074e-05, - "loss": 0.6243, - "step": 3546 - }, - { - "epoch": 0.12450200951227645, - "grad_norm": 0.6184325814247131, - "learning_rate": 4.898888888888889e-05, - "loss": 0.528, - "step": 3547 - }, - { - "epoch": 0.12453711016339353, - "grad_norm": 0.7226724624633789, - "learning_rate": 4.8987037037037036e-05, - "loss": 0.5926, - "step": 3548 - }, - { - "epoch": 0.12457221081451061, - "grad_norm": 0.525632917881012, - "learning_rate": 4.898518518518519e-05, - "loss": 0.5025, - "step": 3549 - }, - { - "epoch": 0.12460731146562769, - "grad_norm": 0.5504665374755859, - "learning_rate": 4.8983333333333336e-05, - "loss": 0.533, - "step": 3550 - }, - { - "epoch": 0.12464241211674476, - "grad_norm": 0.6314889192581177, - "learning_rate": 4.8981481481481486e-05, - "loss": 0.4462, - "step": 3551 - }, - { - "epoch": 0.12467751276786185, - "grad_norm": 0.6896164417266846, - "learning_rate": 4.897962962962963e-05, - "loss": 0.5173, - "step": 3552 - }, - { - "epoch": 0.12471261341897892, - "grad_norm": 0.6577224135398865, - "learning_rate": 4.897777777777778e-05, - "loss": 0.5983, - "step": 3553 - }, - { - "epoch": 0.124747714070096, - "grad_norm": 0.6318711042404175, - "learning_rate": 4.897592592592593e-05, - "loss": 0.4556, - "step": 3554 - }, - { - "epoch": 0.12478281472121308, - "grad_norm": 0.6306760907173157, - "learning_rate": 4.897407407407408e-05, - "loss": 0.6126, - "step": 3555 - }, - { - "epoch": 0.12481791537233015, - "grad_norm": 0.6367062926292419, - "learning_rate": 4.897222222222222e-05, - "loss": 0.6229, - "step": 3556 - }, - { - "epoch": 0.12485301602344724, - "grad_norm": 0.6656230092048645, - "learning_rate": 4.897037037037037e-05, - "loss": 0.5844, - "step": 3557 - }, - { - "epoch": 0.12488811667456431, - "grad_norm": 0.5569809675216675, - "learning_rate": 4.8968518518518516e-05, - "loss": 0.5202, - "step": 3558 - }, - { - "epoch": 0.12492321732568139, - "grad_norm": 0.8291566967964172, - "learning_rate": 4.8966666666666667e-05, - "loss": 0.5671, - "step": 3559 - }, - { - "epoch": 0.12495831797679847, - "grad_norm": 0.4848994314670563, - "learning_rate": 4.896481481481482e-05, - "loss": 0.4342, - "step": 3560 - }, - { - "epoch": 0.12499341862791555, - "grad_norm": 0.586779773235321, - "learning_rate": 4.896296296296297e-05, - "loss": 0.488, - "step": 3561 - }, - { - "epoch": 0.12502851927903263, - "grad_norm": 0.625033974647522, - "learning_rate": 4.896111111111111e-05, - "loss": 0.479, - "step": 3562 - }, - { - "epoch": 0.1250636199301497, - "grad_norm": 0.75477135181427, - "learning_rate": 4.895925925925926e-05, - "loss": 0.5317, - "step": 3563 - }, - { - "epoch": 0.12509872058126678, - "grad_norm": 0.5669741034507751, - "learning_rate": 4.895740740740741e-05, - "loss": 0.6141, - "step": 3564 - }, - { - "epoch": 0.12513382123238387, - "grad_norm": 0.6126890778541565, - "learning_rate": 4.895555555555556e-05, - "loss": 0.5458, - "step": 3565 - }, - { - "epoch": 0.12516892188350093, - "grad_norm": 0.8000637888908386, - "learning_rate": 4.895370370370371e-05, - "loss": 0.6536, - "step": 3566 - }, - { - "epoch": 0.12520402253461801, - "grad_norm": 0.5487722754478455, - "learning_rate": 4.8951851851851854e-05, - "loss": 0.4961, - "step": 3567 - }, - { - "epoch": 0.1252391231857351, - "grad_norm": 0.6991975903511047, - "learning_rate": 4.8950000000000004e-05, - "loss": 0.573, - "step": 3568 - }, - { - "epoch": 0.12527422383685216, - "grad_norm": 0.6792163848876953, - "learning_rate": 4.894814814814815e-05, - "loss": 0.5791, - "step": 3569 - }, - { - "epoch": 0.12530932448796925, - "grad_norm": 0.598217785358429, - "learning_rate": 4.8946296296296304e-05, - "loss": 0.5457, - "step": 3570 - }, - { - "epoch": 0.12534442513908634, - "grad_norm": 0.6015528440475464, - "learning_rate": 4.894444444444445e-05, - "loss": 0.5019, - "step": 3571 - }, - { - "epoch": 0.12537952579020342, - "grad_norm": 0.5086249113082886, - "learning_rate": 4.89425925925926e-05, - "loss": 0.4243, - "step": 3572 - }, - { - "epoch": 0.12541462644132048, - "grad_norm": 0.5883159637451172, - "learning_rate": 4.894074074074074e-05, - "loss": 0.5587, - "step": 3573 - }, - { - "epoch": 0.12544972709243757, - "grad_norm": 0.5602272152900696, - "learning_rate": 4.893888888888889e-05, - "loss": 0.506, - "step": 3574 - }, - { - "epoch": 0.12548482774355466, - "grad_norm": 0.6598020195960999, - "learning_rate": 4.8937037037037034e-05, - "loss": 0.4993, - "step": 3575 - }, - { - "epoch": 0.12551992839467171, - "grad_norm": 0.5945547223091125, - "learning_rate": 4.893518518518519e-05, - "loss": 0.4926, - "step": 3576 - }, - { - "epoch": 0.1255550290457888, - "grad_norm": 0.6035172343254089, - "learning_rate": 4.8933333333333335e-05, - "loss": 0.585, - "step": 3577 - }, - { - "epoch": 0.1255901296969059, - "grad_norm": 0.6694384217262268, - "learning_rate": 4.8931481481481485e-05, - "loss": 0.5498, - "step": 3578 - }, - { - "epoch": 0.12562523034802295, - "grad_norm": 0.9028328061103821, - "learning_rate": 4.892962962962963e-05, - "loss": 0.6126, - "step": 3579 - }, - { - "epoch": 0.12566033099914004, - "grad_norm": 0.6990640163421631, - "learning_rate": 4.892777777777778e-05, - "loss": 0.5169, - "step": 3580 - }, - { - "epoch": 0.12569543165025712, - "grad_norm": 0.61386638879776, - "learning_rate": 4.892592592592593e-05, - "loss": 0.5739, - "step": 3581 - }, - { - "epoch": 0.12573053230137418, - "grad_norm": 0.7000011801719666, - "learning_rate": 4.892407407407408e-05, - "loss": 0.5268, - "step": 3582 - }, - { - "epoch": 0.12576563295249127, - "grad_norm": 0.5060369968414307, - "learning_rate": 4.892222222222222e-05, - "loss": 0.4176, - "step": 3583 - }, - { - "epoch": 0.12580073360360836, - "grad_norm": 0.6500051617622375, - "learning_rate": 4.892037037037037e-05, - "loss": 0.6316, - "step": 3584 - }, - { - "epoch": 0.12583583425472541, - "grad_norm": 0.7548242807388306, - "learning_rate": 4.891851851851852e-05, - "loss": 0.5221, - "step": 3585 - }, - { - "epoch": 0.1258709349058425, - "grad_norm": 0.5024241805076599, - "learning_rate": 4.891666666666667e-05, - "loss": 0.5255, - "step": 3586 - }, - { - "epoch": 0.1259060355569596, - "grad_norm": 0.6444483995437622, - "learning_rate": 4.891481481481482e-05, - "loss": 0.4426, - "step": 3587 - }, - { - "epoch": 0.12594113620807665, - "grad_norm": 0.6784068942070007, - "learning_rate": 4.8912962962962966e-05, - "loss": 0.4686, - "step": 3588 - }, - { - "epoch": 0.12597623685919374, - "grad_norm": 0.7014618515968323, - "learning_rate": 4.8911111111111116e-05, - "loss": 0.5157, - "step": 3589 - }, - { - "epoch": 0.12601133751031082, - "grad_norm": 0.6554310917854309, - "learning_rate": 4.890925925925926e-05, - "loss": 0.3652, - "step": 3590 - }, - { - "epoch": 0.12604643816142788, - "grad_norm": 0.5300453305244446, - "learning_rate": 4.890740740740741e-05, - "loss": 0.4824, - "step": 3591 - }, - { - "epoch": 0.12608153881254497, - "grad_norm": 0.6419769525527954, - "learning_rate": 4.890555555555556e-05, - "loss": 0.6189, - "step": 3592 - }, - { - "epoch": 0.12611663946366206, - "grad_norm": 0.5220547318458557, - "learning_rate": 4.890370370370371e-05, - "loss": 0.5879, - "step": 3593 - }, - { - "epoch": 0.12615174011477914, - "grad_norm": 0.6048060059547424, - "learning_rate": 4.890185185185185e-05, - "loss": 0.4952, - "step": 3594 - }, - { - "epoch": 0.1261868407658962, - "grad_norm": 0.5456861853599548, - "learning_rate": 4.89e-05, - "loss": 0.584, - "step": 3595 - }, - { - "epoch": 0.1262219414170133, - "grad_norm": 0.6177746057510376, - "learning_rate": 4.8898148148148146e-05, - "loss": 0.6187, - "step": 3596 - }, - { - "epoch": 0.12625704206813038, - "grad_norm": 0.7614100575447083, - "learning_rate": 4.88962962962963e-05, - "loss": 0.542, - "step": 3597 - }, - { - "epoch": 0.12629214271924744, - "grad_norm": 0.6033332347869873, - "learning_rate": 4.8894444444444446e-05, - "loss": 0.4361, - "step": 3598 - }, - { - "epoch": 0.12632724337036452, - "grad_norm": 0.6727920174598694, - "learning_rate": 4.8892592592592596e-05, - "loss": 0.5522, - "step": 3599 - }, - { - "epoch": 0.1263623440214816, - "grad_norm": 0.5353013277053833, - "learning_rate": 4.889074074074074e-05, - "loss": 0.6339, - "step": 3600 - }, - { - "epoch": 0.12639744467259867, - "grad_norm": 0.5670080184936523, - "learning_rate": 4.888888888888889e-05, - "loss": 0.563, - "step": 3601 - }, - { - "epoch": 0.12643254532371576, - "grad_norm": 0.5802141427993774, - "learning_rate": 4.888703703703704e-05, - "loss": 0.649, - "step": 3602 - }, - { - "epoch": 0.12646764597483284, - "grad_norm": 0.609484076499939, - "learning_rate": 4.888518518518519e-05, - "loss": 0.6465, - "step": 3603 - }, - { - "epoch": 0.1265027466259499, - "grad_norm": 0.5529373288154602, - "learning_rate": 4.8883333333333333e-05, - "loss": 0.5633, - "step": 3604 - }, - { - "epoch": 0.126537847277067, - "grad_norm": 0.6209548711776733, - "learning_rate": 4.8881481481481484e-05, - "loss": 0.4528, - "step": 3605 - }, - { - "epoch": 0.12657294792818408, - "grad_norm": 0.7001374959945679, - "learning_rate": 4.8879629629629634e-05, - "loss": 0.5894, - "step": 3606 - }, - { - "epoch": 0.12660804857930114, - "grad_norm": 1.1265777349472046, - "learning_rate": 4.887777777777778e-05, - "loss": 0.6066, - "step": 3607 - }, - { - "epoch": 0.12664314923041822, - "grad_norm": 0.6087487936019897, - "learning_rate": 4.8875925925925934e-05, - "loss": 0.5032, - "step": 3608 - }, - { - "epoch": 0.1266782498815353, - "grad_norm": 0.5638437271118164, - "learning_rate": 4.887407407407408e-05, - "loss": 0.4562, - "step": 3609 - }, - { - "epoch": 0.12671335053265237, - "grad_norm": 0.5131291151046753, - "learning_rate": 4.887222222222223e-05, - "loss": 0.4779, - "step": 3610 - }, - { - "epoch": 0.12674845118376946, - "grad_norm": 0.6005612015724182, - "learning_rate": 4.887037037037037e-05, - "loss": 0.5579, - "step": 3611 - }, - { - "epoch": 0.12678355183488654, - "grad_norm": 0.6103861927986145, - "learning_rate": 4.886851851851852e-05, - "loss": 0.4738, - "step": 3612 - }, - { - "epoch": 0.1268186524860036, - "grad_norm": 0.7663881182670593, - "learning_rate": 4.886666666666667e-05, - "loss": 0.4999, - "step": 3613 - }, - { - "epoch": 0.1268537531371207, - "grad_norm": 0.6608569025993347, - "learning_rate": 4.886481481481482e-05, - "loss": 0.5501, - "step": 3614 - }, - { - "epoch": 0.12688885378823778, - "grad_norm": 0.5691586136817932, - "learning_rate": 4.8862962962962964e-05, - "loss": 0.5077, - "step": 3615 - }, - { - "epoch": 0.12692395443935486, - "grad_norm": 0.6124642491340637, - "learning_rate": 4.8861111111111114e-05, - "loss": 0.5653, - "step": 3616 - }, - { - "epoch": 0.12695905509047192, - "grad_norm": 0.5733052492141724, - "learning_rate": 4.885925925925926e-05, - "loss": 0.588, - "step": 3617 - }, - { - "epoch": 0.126994155741589, - "grad_norm": 0.5345761179924011, - "learning_rate": 4.885740740740741e-05, - "loss": 0.3621, - "step": 3618 - }, - { - "epoch": 0.1270292563927061, - "grad_norm": 0.603247344493866, - "learning_rate": 4.885555555555556e-05, - "loss": 0.5204, - "step": 3619 - }, - { - "epoch": 0.12706435704382316, - "grad_norm": 0.5644059181213379, - "learning_rate": 4.885370370370371e-05, - "loss": 0.5448, - "step": 3620 - }, - { - "epoch": 0.12709945769494024, - "grad_norm": 0.6008853316307068, - "learning_rate": 4.885185185185185e-05, - "loss": 0.5799, - "step": 3621 - }, - { - "epoch": 0.12713455834605733, - "grad_norm": 0.644427478313446, - "learning_rate": 4.885e-05, - "loss": 0.4146, - "step": 3622 - }, - { - "epoch": 0.1271696589971744, - "grad_norm": 0.6122147440910339, - "learning_rate": 4.884814814814815e-05, - "loss": 0.5799, - "step": 3623 - }, - { - "epoch": 0.12720475964829148, - "grad_norm": 0.5472849607467651, - "learning_rate": 4.88462962962963e-05, - "loss": 0.4755, - "step": 3624 - }, - { - "epoch": 0.12723986029940856, - "grad_norm": 0.5017921924591064, - "learning_rate": 4.8844444444444445e-05, - "loss": 0.6076, - "step": 3625 - }, - { - "epoch": 0.12727496095052562, - "grad_norm": 0.5672622919082642, - "learning_rate": 4.8842592592592595e-05, - "loss": 0.4276, - "step": 3626 - }, - { - "epoch": 0.1273100616016427, - "grad_norm": 0.5956513285636902, - "learning_rate": 4.8840740740740745e-05, - "loss": 0.6322, - "step": 3627 - }, - { - "epoch": 0.1273451622527598, - "grad_norm": 0.6334989666938782, - "learning_rate": 4.883888888888889e-05, - "loss": 0.5589, - "step": 3628 - }, - { - "epoch": 0.12738026290387686, - "grad_norm": 0.6114880442619324, - "learning_rate": 4.883703703703704e-05, - "loss": 0.5966, - "step": 3629 - }, - { - "epoch": 0.12741536355499394, - "grad_norm": 0.5177052021026611, - "learning_rate": 4.883518518518519e-05, - "loss": 0.5856, - "step": 3630 - }, - { - "epoch": 0.12745046420611103, - "grad_norm": 0.5355302691459656, - "learning_rate": 4.883333333333334e-05, - "loss": 0.5329, - "step": 3631 - }, - { - "epoch": 0.1274855648572281, - "grad_norm": 0.5938350558280945, - "learning_rate": 4.883148148148148e-05, - "loss": 0.536, - "step": 3632 - }, - { - "epoch": 0.12752066550834518, - "grad_norm": 0.5365053415298462, - "learning_rate": 4.882962962962963e-05, - "loss": 0.4634, - "step": 3633 - }, - { - "epoch": 0.12755576615946226, - "grad_norm": 0.6796265244483948, - "learning_rate": 4.8827777777777776e-05, - "loss": 0.6741, - "step": 3634 - }, - { - "epoch": 0.12759086681057932, - "grad_norm": 0.5115085244178772, - "learning_rate": 4.882592592592593e-05, - "loss": 0.3967, - "step": 3635 - }, - { - "epoch": 0.1276259674616964, - "grad_norm": 0.5656108260154724, - "learning_rate": 4.8824074074074076e-05, - "loss": 0.5614, - "step": 3636 - }, - { - "epoch": 0.1276610681128135, - "grad_norm": 0.5803112983703613, - "learning_rate": 4.8822222222222226e-05, - "loss": 0.5906, - "step": 3637 - }, - { - "epoch": 0.12769616876393058, - "grad_norm": 0.5494948029518127, - "learning_rate": 4.882037037037037e-05, - "loss": 0.5572, - "step": 3638 - }, - { - "epoch": 0.12773126941504764, - "grad_norm": 0.6151064038276672, - "learning_rate": 4.881851851851852e-05, - "loss": 0.5551, - "step": 3639 - }, - { - "epoch": 0.12776637006616473, - "grad_norm": 0.5415854454040527, - "learning_rate": 4.881666666666667e-05, - "loss": 0.4521, - "step": 3640 - }, - { - "epoch": 0.12780147071728182, - "grad_norm": 0.9026033282279968, - "learning_rate": 4.881481481481482e-05, - "loss": 0.5759, - "step": 3641 - }, - { - "epoch": 0.12783657136839888, - "grad_norm": 0.5725629329681396, - "learning_rate": 4.881296296296296e-05, - "loss": 0.4374, - "step": 3642 - }, - { - "epoch": 0.12787167201951596, - "grad_norm": 0.552539050579071, - "learning_rate": 4.881111111111111e-05, - "loss": 0.5492, - "step": 3643 - }, - { - "epoch": 0.12790677267063305, - "grad_norm": 0.6686174273490906, - "learning_rate": 4.880925925925926e-05, - "loss": 0.5116, - "step": 3644 - }, - { - "epoch": 0.1279418733217501, - "grad_norm": 0.6474117636680603, - "learning_rate": 4.880740740740741e-05, - "loss": 0.4911, - "step": 3645 - }, - { - "epoch": 0.1279769739728672, - "grad_norm": 0.5540754795074463, - "learning_rate": 4.880555555555556e-05, - "loss": 0.5066, - "step": 3646 - }, - { - "epoch": 0.12801207462398428, - "grad_norm": 0.6127769947052002, - "learning_rate": 4.880370370370371e-05, - "loss": 0.5471, - "step": 3647 - }, - { - "epoch": 0.12804717527510134, - "grad_norm": 0.5684733390808105, - "learning_rate": 4.880185185185186e-05, - "loss": 0.6191, - "step": 3648 - }, - { - "epoch": 0.12808227592621843, - "grad_norm": 0.563504159450531, - "learning_rate": 4.88e-05, - "loss": 0.546, - "step": 3649 - }, - { - "epoch": 0.12811737657733552, - "grad_norm": 0.6300188302993774, - "learning_rate": 4.879814814814815e-05, - "loss": 0.5894, - "step": 3650 - }, - { - "epoch": 0.12815247722845258, - "grad_norm": 0.5657989382743835, - "learning_rate": 4.87962962962963e-05, - "loss": 0.5606, - "step": 3651 - }, - { - "epoch": 0.12818757787956966, - "grad_norm": 0.4558708071708679, - "learning_rate": 4.879444444444445e-05, - "loss": 0.4353, - "step": 3652 - }, - { - "epoch": 0.12822267853068675, - "grad_norm": 0.7321639060974121, - "learning_rate": 4.8792592592592594e-05, - "loss": 0.5341, - "step": 3653 - }, - { - "epoch": 0.1282577791818038, - "grad_norm": 0.7387416362762451, - "learning_rate": 4.8790740740740744e-05, - "loss": 0.606, - "step": 3654 - }, - { - "epoch": 0.1282928798329209, - "grad_norm": 0.587890088558197, - "learning_rate": 4.878888888888889e-05, - "loss": 0.5224, - "step": 3655 - }, - { - "epoch": 0.12832798048403798, - "grad_norm": 0.5726508498191833, - "learning_rate": 4.878703703703704e-05, - "loss": 0.5366, - "step": 3656 - }, - { - "epoch": 0.12836308113515504, - "grad_norm": 0.5531563758850098, - "learning_rate": 4.878518518518519e-05, - "loss": 0.4119, - "step": 3657 - }, - { - "epoch": 0.12839818178627213, - "grad_norm": 0.564980149269104, - "learning_rate": 4.878333333333334e-05, - "loss": 0.4995, - "step": 3658 - }, - { - "epoch": 0.12843328243738922, - "grad_norm": 0.5680224299430847, - "learning_rate": 4.878148148148148e-05, - "loss": 0.4978, - "step": 3659 - }, - { - "epoch": 0.1284683830885063, - "grad_norm": 0.6280717253684998, - "learning_rate": 4.877962962962963e-05, - "loss": 0.6046, - "step": 3660 - }, - { - "epoch": 0.12850348373962336, - "grad_norm": 0.577785313129425, - "learning_rate": 4.8777777777777775e-05, - "loss": 0.573, - "step": 3661 - }, - { - "epoch": 0.12853858439074045, - "grad_norm": 0.5978326797485352, - "learning_rate": 4.877592592592593e-05, - "loss": 0.5398, - "step": 3662 - }, - { - "epoch": 0.12857368504185754, - "grad_norm": 0.6417734026908875, - "learning_rate": 4.8774074074074075e-05, - "loss": 0.5293, - "step": 3663 - }, - { - "epoch": 0.1286087856929746, - "grad_norm": 0.5550532937049866, - "learning_rate": 4.8772222222222225e-05, - "loss": 0.5301, - "step": 3664 - }, - { - "epoch": 0.12864388634409168, - "grad_norm": 0.6304478645324707, - "learning_rate": 4.8770370370370375e-05, - "loss": 0.5175, - "step": 3665 - }, - { - "epoch": 0.12867898699520877, - "grad_norm": 0.5759411454200745, - "learning_rate": 4.876851851851852e-05, - "loss": 0.6019, - "step": 3666 - }, - { - "epoch": 0.12871408764632583, - "grad_norm": 0.7269323468208313, - "learning_rate": 4.876666666666667e-05, - "loss": 0.6074, - "step": 3667 - }, - { - "epoch": 0.12874918829744292, - "grad_norm": 0.6728152632713318, - "learning_rate": 4.876481481481482e-05, - "loss": 0.5291, - "step": 3668 - }, - { - "epoch": 0.12878428894856, - "grad_norm": 0.591709315776825, - "learning_rate": 4.876296296296297e-05, - "loss": 0.5023, - "step": 3669 - }, - { - "epoch": 0.12881938959967706, - "grad_norm": 0.709843635559082, - "learning_rate": 4.876111111111111e-05, - "loss": 0.5515, - "step": 3670 - }, - { - "epoch": 0.12885449025079415, - "grad_norm": 0.6097984313964844, - "learning_rate": 4.875925925925926e-05, - "loss": 0.57, - "step": 3671 - }, - { - "epoch": 0.12888959090191124, - "grad_norm": 0.5517318844795227, - "learning_rate": 4.8757407407407405e-05, - "loss": 0.6258, - "step": 3672 - }, - { - "epoch": 0.1289246915530283, - "grad_norm": 0.6298445463180542, - "learning_rate": 4.875555555555556e-05, - "loss": 0.6119, - "step": 3673 - }, - { - "epoch": 0.12895979220414538, - "grad_norm": 0.7482870221138, - "learning_rate": 4.8753703703703706e-05, - "loss": 0.6159, - "step": 3674 - }, - { - "epoch": 0.12899489285526247, - "grad_norm": 0.622247040271759, - "learning_rate": 4.8751851851851856e-05, - "loss": 0.5966, - "step": 3675 - }, - { - "epoch": 0.12902999350637953, - "grad_norm": 0.48349276185035706, - "learning_rate": 4.875e-05, - "loss": 0.5403, - "step": 3676 - }, - { - "epoch": 0.12906509415749662, - "grad_norm": 0.5470307469367981, - "learning_rate": 4.874814814814815e-05, - "loss": 0.4245, - "step": 3677 - }, - { - "epoch": 0.1291001948086137, - "grad_norm": 0.5666052103042603, - "learning_rate": 4.87462962962963e-05, - "loss": 0.5605, - "step": 3678 - }, - { - "epoch": 0.12913529545973076, - "grad_norm": 0.4838297665119171, - "learning_rate": 4.874444444444445e-05, - "loss": 0.5151, - "step": 3679 - }, - { - "epoch": 0.12917039611084785, - "grad_norm": 0.6016704440116882, - "learning_rate": 4.874259259259259e-05, - "loss": 0.463, - "step": 3680 - }, - { - "epoch": 0.12920549676196494, - "grad_norm": 0.7685228586196899, - "learning_rate": 4.874074074074074e-05, - "loss": 0.6005, - "step": 3681 - }, - { - "epoch": 0.12924059741308203, - "grad_norm": 0.5552195906639099, - "learning_rate": 4.8738888888888886e-05, - "loss": 0.4664, - "step": 3682 - }, - { - "epoch": 0.12927569806419908, - "grad_norm": 0.6114494800567627, - "learning_rate": 4.8737037037037036e-05, - "loss": 0.5773, - "step": 3683 - }, - { - "epoch": 0.12931079871531617, - "grad_norm": 0.5559927821159363, - "learning_rate": 4.8735185185185186e-05, - "loss": 0.6068, - "step": 3684 - }, - { - "epoch": 0.12934589936643326, - "grad_norm": 0.5192628502845764, - "learning_rate": 4.8733333333333337e-05, - "loss": 0.4482, - "step": 3685 - }, - { - "epoch": 0.12938100001755032, - "grad_norm": 0.5853452682495117, - "learning_rate": 4.873148148148149e-05, - "loss": 0.4628, - "step": 3686 - }, - { - "epoch": 0.1294161006686674, - "grad_norm": 0.5544705390930176, - "learning_rate": 4.872962962962963e-05, - "loss": 0.5356, - "step": 3687 - }, - { - "epoch": 0.1294512013197845, - "grad_norm": 0.537954568862915, - "learning_rate": 4.872777777777778e-05, - "loss": 0.6288, - "step": 3688 - }, - { - "epoch": 0.12948630197090155, - "grad_norm": 0.9058097004890442, - "learning_rate": 4.872592592592593e-05, - "loss": 0.5432, - "step": 3689 - }, - { - "epoch": 0.12952140262201864, - "grad_norm": 0.6015720963478088, - "learning_rate": 4.872407407407408e-05, - "loss": 0.5756, - "step": 3690 - }, - { - "epoch": 0.12955650327313573, - "grad_norm": 0.5887885689735413, - "learning_rate": 4.8722222222222224e-05, - "loss": 0.4513, - "step": 3691 - }, - { - "epoch": 0.12959160392425279, - "grad_norm": 0.6323729753494263, - "learning_rate": 4.8720370370370374e-05, - "loss": 0.2788, - "step": 3692 - }, - { - "epoch": 0.12962670457536987, - "grad_norm": 0.59381502866745, - "learning_rate": 4.871851851851852e-05, - "loss": 0.5491, - "step": 3693 - }, - { - "epoch": 0.12966180522648696, - "grad_norm": 0.6816097497940063, - "learning_rate": 4.8716666666666674e-05, - "loss": 0.4933, - "step": 3694 - }, - { - "epoch": 0.12969690587760402, - "grad_norm": 0.7184210419654846, - "learning_rate": 4.871481481481482e-05, - "loss": 0.6282, - "step": 3695 - }, - { - "epoch": 0.1297320065287211, - "grad_norm": 0.5866740345954895, - "learning_rate": 4.871296296296297e-05, - "loss": 0.4651, - "step": 3696 - }, - { - "epoch": 0.1297671071798382, - "grad_norm": 0.6229953765869141, - "learning_rate": 4.871111111111111e-05, - "loss": 0.5845, - "step": 3697 - }, - { - "epoch": 0.12980220783095525, - "grad_norm": 0.5407376289367676, - "learning_rate": 4.870925925925926e-05, - "loss": 0.496, - "step": 3698 - }, - { - "epoch": 0.12983730848207234, - "grad_norm": 0.5651052594184875, - "learning_rate": 4.8707407407407404e-05, - "loss": 0.5373, - "step": 3699 - }, - { - "epoch": 0.12987240913318943, - "grad_norm": 0.5698108673095703, - "learning_rate": 4.870555555555556e-05, - "loss": 0.5356, - "step": 3700 - }, - { - "epoch": 0.12990750978430649, - "grad_norm": 0.842113733291626, - "learning_rate": 4.8703703703703704e-05, - "loss": 0.481, - "step": 3701 - }, - { - "epoch": 0.12994261043542357, - "grad_norm": 0.6580106019973755, - "learning_rate": 4.8701851851851855e-05, - "loss": 0.4063, - "step": 3702 - }, - { - "epoch": 0.12997771108654066, - "grad_norm": 0.579119086265564, - "learning_rate": 4.87e-05, - "loss": 0.5059, - "step": 3703 - }, - { - "epoch": 0.13001281173765775, - "grad_norm": 0.5460772514343262, - "learning_rate": 4.869814814814815e-05, - "loss": 0.5233, - "step": 3704 - }, - { - "epoch": 0.1300479123887748, - "grad_norm": 0.7004063129425049, - "learning_rate": 4.86962962962963e-05, - "loss": 0.5625, - "step": 3705 - }, - { - "epoch": 0.1300830130398919, - "grad_norm": 0.5625430345535278, - "learning_rate": 4.869444444444445e-05, - "loss": 0.4115, - "step": 3706 - }, - { - "epoch": 0.13011811369100898, - "grad_norm": 0.6537603735923767, - "learning_rate": 4.86925925925926e-05, - "loss": 0.5362, - "step": 3707 - }, - { - "epoch": 0.13015321434212604, - "grad_norm": 0.7991849184036255, - "learning_rate": 4.869074074074074e-05, - "loss": 0.6074, - "step": 3708 - }, - { - "epoch": 0.13018831499324313, - "grad_norm": 0.6505723595619202, - "learning_rate": 4.868888888888889e-05, - "loss": 0.4752, - "step": 3709 - }, - { - "epoch": 0.1302234156443602, - "grad_norm": 0.5818067193031311, - "learning_rate": 4.868703703703704e-05, - "loss": 0.5804, - "step": 3710 - }, - { - "epoch": 0.13025851629547727, - "grad_norm": 0.5417761206626892, - "learning_rate": 4.868518518518519e-05, - "loss": 0.5479, - "step": 3711 - }, - { - "epoch": 0.13029361694659436, - "grad_norm": 0.6349412202835083, - "learning_rate": 4.8683333333333335e-05, - "loss": 0.544, - "step": 3712 - }, - { - "epoch": 0.13032871759771145, - "grad_norm": 0.570671558380127, - "learning_rate": 4.8681481481481485e-05, - "loss": 0.5439, - "step": 3713 - }, - { - "epoch": 0.1303638182488285, - "grad_norm": 0.7517406940460205, - "learning_rate": 4.867962962962963e-05, - "loss": 0.5744, - "step": 3714 - }, - { - "epoch": 0.1303989188999456, - "grad_norm": 0.5319665670394897, - "learning_rate": 4.867777777777778e-05, - "loss": 0.5489, - "step": 3715 - }, - { - "epoch": 0.13043401955106268, - "grad_norm": 0.49833354353904724, - "learning_rate": 4.867592592592593e-05, - "loss": 0.5845, - "step": 3716 - }, - { - "epoch": 0.13046912020217974, - "grad_norm": 0.6102532148361206, - "learning_rate": 4.867407407407408e-05, - "loss": 0.5857, - "step": 3717 - }, - { - "epoch": 0.13050422085329683, - "grad_norm": 0.6305321455001831, - "learning_rate": 4.867222222222222e-05, - "loss": 0.5804, - "step": 3718 - }, - { - "epoch": 0.1305393215044139, - "grad_norm": 0.6609764695167542, - "learning_rate": 4.867037037037037e-05, - "loss": 0.5036, - "step": 3719 - }, - { - "epoch": 0.13057442215553097, - "grad_norm": 0.5376659631729126, - "learning_rate": 4.8668518518518516e-05, - "loss": 0.4914, - "step": 3720 - }, - { - "epoch": 0.13060952280664806, - "grad_norm": 0.53364098072052, - "learning_rate": 4.866666666666667e-05, - "loss": 0.5315, - "step": 3721 - }, - { - "epoch": 0.13064462345776515, - "grad_norm": 0.6250344514846802, - "learning_rate": 4.8664814814814816e-05, - "loss": 0.5592, - "step": 3722 - }, - { - "epoch": 0.1306797241088822, - "grad_norm": 0.6739871501922607, - "learning_rate": 4.8662962962962966e-05, - "loss": 0.6139, - "step": 3723 - }, - { - "epoch": 0.1307148247599993, - "grad_norm": 0.6571698784828186, - "learning_rate": 4.866111111111111e-05, - "loss": 0.488, - "step": 3724 - }, - { - "epoch": 0.13074992541111638, - "grad_norm": 0.5631604194641113, - "learning_rate": 4.865925925925926e-05, - "loss": 0.496, - "step": 3725 - }, - { - "epoch": 0.13078502606223347, - "grad_norm": 0.5835137963294983, - "learning_rate": 4.865740740740741e-05, - "loss": 0.5914, - "step": 3726 - }, - { - "epoch": 0.13082012671335053, - "grad_norm": 0.5780750513076782, - "learning_rate": 4.865555555555556e-05, - "loss": 0.4849, - "step": 3727 - }, - { - "epoch": 0.1308552273644676, - "grad_norm": 0.7854554057121277, - "learning_rate": 4.865370370370371e-05, - "loss": 0.486, - "step": 3728 - }, - { - "epoch": 0.1308903280155847, - "grad_norm": 0.5096298456192017, - "learning_rate": 4.865185185185185e-05, - "loss": 0.5257, - "step": 3729 - }, - { - "epoch": 0.13092542866670176, - "grad_norm": 0.6025013327598572, - "learning_rate": 4.8650000000000003e-05, - "loss": 0.5714, - "step": 3730 - }, - { - "epoch": 0.13096052931781885, - "grad_norm": 0.6930509209632874, - "learning_rate": 4.864814814814815e-05, - "loss": 0.5603, - "step": 3731 - }, - { - "epoch": 0.13099562996893593, - "grad_norm": 0.6220307350158691, - "learning_rate": 4.8646296296296304e-05, - "loss": 0.5553, - "step": 3732 - }, - { - "epoch": 0.131030730620053, - "grad_norm": 0.5818633437156677, - "learning_rate": 4.864444444444445e-05, - "loss": 0.587, - "step": 3733 - }, - { - "epoch": 0.13106583127117008, - "grad_norm": 0.6496999859809875, - "learning_rate": 4.86425925925926e-05, - "loss": 0.5257, - "step": 3734 - }, - { - "epoch": 0.13110093192228717, - "grad_norm": 0.5281426310539246, - "learning_rate": 4.864074074074074e-05, - "loss": 0.5243, - "step": 3735 - }, - { - "epoch": 0.13113603257340423, - "grad_norm": 0.5717955231666565, - "learning_rate": 4.863888888888889e-05, - "loss": 0.421, - "step": 3736 - }, - { - "epoch": 0.1311711332245213, - "grad_norm": 0.9715144038200378, - "learning_rate": 4.863703703703704e-05, - "loss": 0.7246, - "step": 3737 - }, - { - "epoch": 0.1312062338756384, - "grad_norm": 0.603962242603302, - "learning_rate": 4.863518518518519e-05, - "loss": 0.5841, - "step": 3738 - }, - { - "epoch": 0.13124133452675546, - "grad_norm": 0.6687046885490417, - "learning_rate": 4.8633333333333334e-05, - "loss": 0.6067, - "step": 3739 - }, - { - "epoch": 0.13127643517787255, - "grad_norm": 0.5420154929161072, - "learning_rate": 4.8631481481481484e-05, - "loss": 0.469, - "step": 3740 - }, - { - "epoch": 0.13131153582898963, - "grad_norm": 0.7060014605522156, - "learning_rate": 4.862962962962963e-05, - "loss": 0.6054, - "step": 3741 - }, - { - "epoch": 0.1313466364801067, - "grad_norm": 0.5763956308364868, - "learning_rate": 4.862777777777778e-05, - "loss": 0.5667, - "step": 3742 - }, - { - "epoch": 0.13138173713122378, - "grad_norm": 0.501232922077179, - "learning_rate": 4.862592592592593e-05, - "loss": 0.5207, - "step": 3743 - }, - { - "epoch": 0.13141683778234087, - "grad_norm": 0.6538906097412109, - "learning_rate": 4.862407407407408e-05, - "loss": 0.428, - "step": 3744 - }, - { - "epoch": 0.13145193843345795, - "grad_norm": 0.6474219560623169, - "learning_rate": 4.862222222222222e-05, - "loss": 0.5915, - "step": 3745 - }, - { - "epoch": 0.131487039084575, - "grad_norm": 0.7004745006561279, - "learning_rate": 4.862037037037037e-05, - "loss": 0.5543, - "step": 3746 - }, - { - "epoch": 0.1315221397356921, - "grad_norm": 0.6150645017623901, - "learning_rate": 4.861851851851852e-05, - "loss": 0.6322, - "step": 3747 - }, - { - "epoch": 0.1315572403868092, - "grad_norm": 0.6583216786384583, - "learning_rate": 4.861666666666667e-05, - "loss": 0.6067, - "step": 3748 - }, - { - "epoch": 0.13159234103792625, - "grad_norm": 0.5921520590782166, - "learning_rate": 4.861481481481482e-05, - "loss": 0.5096, - "step": 3749 - }, - { - "epoch": 0.13162744168904333, - "grad_norm": 0.8635561466217041, - "learning_rate": 4.8612962962962965e-05, - "loss": 0.5023, - "step": 3750 - }, - { - "epoch": 0.13166254234016042, - "grad_norm": 0.955264687538147, - "learning_rate": 4.8611111111111115e-05, - "loss": 0.6005, - "step": 3751 - }, - { - "epoch": 0.13169764299127748, - "grad_norm": 0.5340750813484192, - "learning_rate": 4.860925925925926e-05, - "loss": 0.5431, - "step": 3752 - }, - { - "epoch": 0.13173274364239457, - "grad_norm": 0.7173666954040527, - "learning_rate": 4.860740740740741e-05, - "loss": 0.5476, - "step": 3753 - }, - { - "epoch": 0.13176784429351165, - "grad_norm": 0.6291313171386719, - "learning_rate": 4.860555555555556e-05, - "loss": 0.5622, - "step": 3754 - }, - { - "epoch": 0.1318029449446287, - "grad_norm": 0.6980699300765991, - "learning_rate": 4.860370370370371e-05, - "loss": 0.5879, - "step": 3755 - }, - { - "epoch": 0.1318380455957458, - "grad_norm": 0.5696214437484741, - "learning_rate": 4.860185185185185e-05, - "loss": 0.5958, - "step": 3756 - }, - { - "epoch": 0.1318731462468629, - "grad_norm": 0.47608014941215515, - "learning_rate": 4.86e-05, - "loss": 0.4299, - "step": 3757 - }, - { - "epoch": 0.13190824689797995, - "grad_norm": 0.6221615076065063, - "learning_rate": 4.8598148148148146e-05, - "loss": 0.6113, - "step": 3758 - }, - { - "epoch": 0.13194334754909703, - "grad_norm": 0.5243296027183533, - "learning_rate": 4.85962962962963e-05, - "loss": 0.5682, - "step": 3759 - }, - { - "epoch": 0.13197844820021412, - "grad_norm": 0.516649067401886, - "learning_rate": 4.8594444444444446e-05, - "loss": 0.5873, - "step": 3760 - }, - { - "epoch": 0.13201354885133118, - "grad_norm": 0.5296412706375122, - "learning_rate": 4.8592592592592596e-05, - "loss": 0.4968, - "step": 3761 - }, - { - "epoch": 0.13204864950244827, - "grad_norm": 0.5448402762413025, - "learning_rate": 4.859074074074074e-05, - "loss": 0.6062, - "step": 3762 - }, - { - "epoch": 0.13208375015356535, - "grad_norm": 0.539901852607727, - "learning_rate": 4.858888888888889e-05, - "loss": 0.5385, - "step": 3763 - }, - { - "epoch": 0.13211885080468241, - "grad_norm": 0.5904602408409119, - "learning_rate": 4.858703703703704e-05, - "loss": 0.5459, - "step": 3764 - }, - { - "epoch": 0.1321539514557995, - "grad_norm": 0.47610604763031006, - "learning_rate": 4.858518518518519e-05, - "loss": 0.5672, - "step": 3765 - }, - { - "epoch": 0.1321890521069166, - "grad_norm": 0.5974931120872498, - "learning_rate": 4.858333333333333e-05, - "loss": 0.5489, - "step": 3766 - }, - { - "epoch": 0.13222415275803368, - "grad_norm": 0.47380951046943665, - "learning_rate": 4.858148148148148e-05, - "loss": 0.4822, - "step": 3767 - }, - { - "epoch": 0.13225925340915073, - "grad_norm": 0.5821616649627686, - "learning_rate": 4.857962962962963e-05, - "loss": 0.4178, - "step": 3768 - }, - { - "epoch": 0.13229435406026782, - "grad_norm": 0.5112655758857727, - "learning_rate": 4.8577777777777776e-05, - "loss": 0.5763, - "step": 3769 - }, - { - "epoch": 0.1323294547113849, - "grad_norm": 0.5519602298736572, - "learning_rate": 4.857592592592593e-05, - "loss": 0.415, - "step": 3770 - }, - { - "epoch": 0.13236455536250197, - "grad_norm": 0.5728248357772827, - "learning_rate": 4.857407407407408e-05, - "loss": 0.5071, - "step": 3771 - }, - { - "epoch": 0.13239965601361905, - "grad_norm": 0.6059746742248535, - "learning_rate": 4.857222222222223e-05, - "loss": 0.5456, - "step": 3772 - }, - { - "epoch": 0.13243475666473614, - "grad_norm": 0.6753012537956238, - "learning_rate": 4.857037037037037e-05, - "loss": 0.4919, - "step": 3773 - }, - { - "epoch": 0.1324698573158532, - "grad_norm": 0.6118047833442688, - "learning_rate": 4.856851851851852e-05, - "loss": 0.5997, - "step": 3774 - }, - { - "epoch": 0.1325049579669703, - "grad_norm": 0.5419495701789856, - "learning_rate": 4.856666666666667e-05, - "loss": 0.5495, - "step": 3775 - }, - { - "epoch": 0.13254005861808738, - "grad_norm": 0.6506741642951965, - "learning_rate": 4.856481481481482e-05, - "loss": 0.4867, - "step": 3776 - }, - { - "epoch": 0.13257515926920443, - "grad_norm": 0.5497499108314514, - "learning_rate": 4.8562962962962964e-05, - "loss": 0.5428, - "step": 3777 - }, - { - "epoch": 0.13261025992032152, - "grad_norm": 0.7052749991416931, - "learning_rate": 4.8561111111111114e-05, - "loss": 0.6254, - "step": 3778 - }, - { - "epoch": 0.1326453605714386, - "grad_norm": 0.7373431921005249, - "learning_rate": 4.855925925925926e-05, - "loss": 0.506, - "step": 3779 - }, - { - "epoch": 0.13268046122255567, - "grad_norm": 0.5692320466041565, - "learning_rate": 4.855740740740741e-05, - "loss": 0.473, - "step": 3780 - }, - { - "epoch": 0.13271556187367275, - "grad_norm": 0.509395956993103, - "learning_rate": 4.855555555555556e-05, - "loss": 0.5282, - "step": 3781 - }, - { - "epoch": 0.13275066252478984, - "grad_norm": 0.5452751517295837, - "learning_rate": 4.855370370370371e-05, - "loss": 0.3998, - "step": 3782 - }, - { - "epoch": 0.1327857631759069, - "grad_norm": 0.5843863487243652, - "learning_rate": 4.855185185185185e-05, - "loss": 0.5454, - "step": 3783 - }, - { - "epoch": 0.132820863827024, - "grad_norm": 0.5808597207069397, - "learning_rate": 4.855e-05, - "loss": 0.5925, - "step": 3784 - }, - { - "epoch": 0.13285596447814108, - "grad_norm": 0.5373961925506592, - "learning_rate": 4.854814814814815e-05, - "loss": 0.45, - "step": 3785 - }, - { - "epoch": 0.13289106512925813, - "grad_norm": 0.554998517036438, - "learning_rate": 4.85462962962963e-05, - "loss": 0.5288, - "step": 3786 - }, - { - "epoch": 0.13292616578037522, - "grad_norm": 0.6299754977226257, - "learning_rate": 4.8544444444444445e-05, - "loss": 0.4945, - "step": 3787 - }, - { - "epoch": 0.1329612664314923, - "grad_norm": 0.5400426983833313, - "learning_rate": 4.8542592592592595e-05, - "loss": 0.5822, - "step": 3788 - }, - { - "epoch": 0.1329963670826094, - "grad_norm": 0.5512785315513611, - "learning_rate": 4.8540740740740745e-05, - "loss": 0.611, - "step": 3789 - }, - { - "epoch": 0.13303146773372646, - "grad_norm": 0.5272223353385925, - "learning_rate": 4.853888888888889e-05, - "loss": 0.5342, - "step": 3790 - }, - { - "epoch": 0.13306656838484354, - "grad_norm": 0.4763917028903961, - "learning_rate": 4.8537037037037045e-05, - "loss": 0.5314, - "step": 3791 - }, - { - "epoch": 0.13310166903596063, - "grad_norm": 0.542507529258728, - "learning_rate": 4.853518518518519e-05, - "loss": 0.4858, - "step": 3792 - }, - { - "epoch": 0.1331367696870777, - "grad_norm": 0.6629763245582581, - "learning_rate": 4.853333333333334e-05, - "loss": 0.5601, - "step": 3793 - }, - { - "epoch": 0.13317187033819478, - "grad_norm": 0.5629575252532959, - "learning_rate": 4.853148148148148e-05, - "loss": 0.6303, - "step": 3794 - }, - { - "epoch": 0.13320697098931186, - "grad_norm": 0.5181505084037781, - "learning_rate": 4.852962962962963e-05, - "loss": 0.5309, - "step": 3795 - }, - { - "epoch": 0.13324207164042892, - "grad_norm": 0.6616451740264893, - "learning_rate": 4.8527777777777775e-05, - "loss": 0.5577, - "step": 3796 - }, - { - "epoch": 0.133277172291546, - "grad_norm": 0.6721810102462769, - "learning_rate": 4.852592592592593e-05, - "loss": 0.6398, - "step": 3797 - }, - { - "epoch": 0.1333122729426631, - "grad_norm": 0.599430501461029, - "learning_rate": 4.8524074074074075e-05, - "loss": 0.575, - "step": 3798 - }, - { - "epoch": 0.13334737359378016, - "grad_norm": 0.6144026517868042, - "learning_rate": 4.8522222222222226e-05, - "loss": 0.5832, - "step": 3799 - }, - { - "epoch": 0.13338247424489724, - "grad_norm": 0.5688257813453674, - "learning_rate": 4.852037037037037e-05, - "loss": 0.5519, - "step": 3800 - }, - { - "epoch": 0.13341757489601433, - "grad_norm": 0.6310216188430786, - "learning_rate": 4.851851851851852e-05, - "loss": 0.4481, - "step": 3801 - }, - { - "epoch": 0.1334526755471314, - "grad_norm": 0.7449027895927429, - "learning_rate": 4.851666666666667e-05, - "loss": 0.6012, - "step": 3802 - }, - { - "epoch": 0.13348777619824848, - "grad_norm": 0.6316204071044922, - "learning_rate": 4.851481481481482e-05, - "loss": 0.6046, - "step": 3803 - }, - { - "epoch": 0.13352287684936556, - "grad_norm": 0.5782293677330017, - "learning_rate": 4.851296296296296e-05, - "loss": 0.5267, - "step": 3804 - }, - { - "epoch": 0.13355797750048262, - "grad_norm": 0.5706642866134644, - "learning_rate": 4.851111111111111e-05, - "loss": 0.5456, - "step": 3805 - }, - { - "epoch": 0.1335930781515997, - "grad_norm": 0.5747556090354919, - "learning_rate": 4.850925925925926e-05, - "loss": 0.5648, - "step": 3806 - }, - { - "epoch": 0.1336281788027168, - "grad_norm": 0.6480491161346436, - "learning_rate": 4.8507407407407406e-05, - "loss": 0.6633, - "step": 3807 - }, - { - "epoch": 0.13366327945383386, - "grad_norm": 0.6256239414215088, - "learning_rate": 4.8505555555555556e-05, - "loss": 0.5505, - "step": 3808 - }, - { - "epoch": 0.13369838010495094, - "grad_norm": 0.5349667072296143, - "learning_rate": 4.8503703703703706e-05, - "loss": 0.5271, - "step": 3809 - }, - { - "epoch": 0.13373348075606803, - "grad_norm": 0.6378653049468994, - "learning_rate": 4.8501851851851856e-05, - "loss": 0.4925, - "step": 3810 - }, - { - "epoch": 0.13376858140718512, - "grad_norm": 0.6986293196678162, - "learning_rate": 4.85e-05, - "loss": 0.409, - "step": 3811 - }, - { - "epoch": 0.13380368205830218, - "grad_norm": 0.649084210395813, - "learning_rate": 4.849814814814815e-05, - "loss": 0.5037, - "step": 3812 - }, - { - "epoch": 0.13383878270941926, - "grad_norm": 0.6944848895072937, - "learning_rate": 4.84962962962963e-05, - "loss": 0.5632, - "step": 3813 - }, - { - "epoch": 0.13387388336053635, - "grad_norm": 0.5528199672698975, - "learning_rate": 4.849444444444445e-05, - "loss": 0.5339, - "step": 3814 - }, - { - "epoch": 0.1339089840116534, - "grad_norm": 0.5974128842353821, - "learning_rate": 4.8492592592592593e-05, - "loss": 0.6478, - "step": 3815 - }, - { - "epoch": 0.1339440846627705, - "grad_norm": 0.6058229207992554, - "learning_rate": 4.8490740740740744e-05, - "loss": 0.5694, - "step": 3816 - }, - { - "epoch": 0.13397918531388758, - "grad_norm": 0.5304580926895142, - "learning_rate": 4.848888888888889e-05, - "loss": 0.6677, - "step": 3817 - }, - { - "epoch": 0.13401428596500464, - "grad_norm": 0.583048939704895, - "learning_rate": 4.8487037037037044e-05, - "loss": 0.5724, - "step": 3818 - }, - { - "epoch": 0.13404938661612173, - "grad_norm": 0.6166320443153381, - "learning_rate": 4.848518518518519e-05, - "loss": 0.6051, - "step": 3819 - }, - { - "epoch": 0.13408448726723882, - "grad_norm": 0.5629501342773438, - "learning_rate": 4.848333333333334e-05, - "loss": 0.6365, - "step": 3820 - }, - { - "epoch": 0.13411958791835588, - "grad_norm": 0.6088352799415588, - "learning_rate": 4.848148148148148e-05, - "loss": 0.5945, - "step": 3821 - }, - { - "epoch": 0.13415468856947296, - "grad_norm": 0.5568549633026123, - "learning_rate": 4.847962962962963e-05, - "loss": 0.5402, - "step": 3822 - }, - { - "epoch": 0.13418978922059005, - "grad_norm": 0.5508817434310913, - "learning_rate": 4.847777777777778e-05, - "loss": 0.4624, - "step": 3823 - }, - { - "epoch": 0.1342248898717071, - "grad_norm": 0.6760295629501343, - "learning_rate": 4.847592592592593e-05, - "loss": 0.5302, - "step": 3824 - }, - { - "epoch": 0.1342599905228242, - "grad_norm": 0.6736124753952026, - "learning_rate": 4.8474074074074074e-05, - "loss": 0.6626, - "step": 3825 - }, - { - "epoch": 0.13429509117394128, - "grad_norm": 0.6564846634864807, - "learning_rate": 4.8472222222222224e-05, - "loss": 0.4845, - "step": 3826 - }, - { - "epoch": 0.13433019182505834, - "grad_norm": 0.5315808057785034, - "learning_rate": 4.8470370370370374e-05, - "loss": 0.4879, - "step": 3827 - }, - { - "epoch": 0.13436529247617543, - "grad_norm": 0.5561016201972961, - "learning_rate": 4.846851851851852e-05, - "loss": 0.4757, - "step": 3828 - }, - { - "epoch": 0.13440039312729252, - "grad_norm": 0.6134224534034729, - "learning_rate": 4.8466666666666675e-05, - "loss": 0.6198, - "step": 3829 - }, - { - "epoch": 0.13443549377840958, - "grad_norm": 0.551673948764801, - "learning_rate": 4.846481481481482e-05, - "loss": 0.5484, - "step": 3830 - }, - { - "epoch": 0.13447059442952666, - "grad_norm": 0.6265080571174622, - "learning_rate": 4.846296296296297e-05, - "loss": 0.539, - "step": 3831 - }, - { - "epoch": 0.13450569508064375, - "grad_norm": 0.58242267370224, - "learning_rate": 4.846111111111111e-05, - "loss": 0.4935, - "step": 3832 - }, - { - "epoch": 0.13454079573176084, - "grad_norm": 0.625715970993042, - "learning_rate": 4.845925925925926e-05, - "loss": 0.5731, - "step": 3833 - }, - { - "epoch": 0.1345758963828779, - "grad_norm": 0.6702615022659302, - "learning_rate": 4.845740740740741e-05, - "loss": 0.6744, - "step": 3834 - }, - { - "epoch": 0.13461099703399498, - "grad_norm": 0.5140739679336548, - "learning_rate": 4.845555555555556e-05, - "loss": 0.5696, - "step": 3835 - }, - { - "epoch": 0.13464609768511207, - "grad_norm": 0.5411032438278198, - "learning_rate": 4.8453703703703705e-05, - "loss": 0.5383, - "step": 3836 - }, - { - "epoch": 0.13468119833622913, - "grad_norm": 0.751777708530426, - "learning_rate": 4.8451851851851855e-05, - "loss": 0.556, - "step": 3837 - }, - { - "epoch": 0.13471629898734622, - "grad_norm": 0.5573914051055908, - "learning_rate": 4.845e-05, - "loss": 0.5423, - "step": 3838 - }, - { - "epoch": 0.1347513996384633, - "grad_norm": 0.5435053706169128, - "learning_rate": 4.844814814814815e-05, - "loss": 0.466, - "step": 3839 - }, - { - "epoch": 0.13478650028958036, - "grad_norm": 0.5076184868812561, - "learning_rate": 4.84462962962963e-05, - "loss": 0.5599, - "step": 3840 - }, - { - "epoch": 0.13482160094069745, - "grad_norm": 0.6308442950248718, - "learning_rate": 4.844444444444445e-05, - "loss": 0.5376, - "step": 3841 - }, - { - "epoch": 0.13485670159181454, - "grad_norm": 0.5354576706886292, - "learning_rate": 4.844259259259259e-05, - "loss": 0.6236, - "step": 3842 - }, - { - "epoch": 0.1348918022429316, - "grad_norm": 0.5895817875862122, - "learning_rate": 4.844074074074074e-05, - "loss": 0.5029, - "step": 3843 - }, - { - "epoch": 0.13492690289404868, - "grad_norm": 0.484792023897171, - "learning_rate": 4.843888888888889e-05, - "loss": 0.4742, - "step": 3844 - }, - { - "epoch": 0.13496200354516577, - "grad_norm": 0.5333589911460876, - "learning_rate": 4.843703703703704e-05, - "loss": 0.5489, - "step": 3845 - }, - { - "epoch": 0.13499710419628283, - "grad_norm": 0.5714924335479736, - "learning_rate": 4.8435185185185186e-05, - "loss": 0.6012, - "step": 3846 - }, - { - "epoch": 0.13503220484739992, - "grad_norm": 0.42023852467536926, - "learning_rate": 4.8433333333333336e-05, - "loss": 0.5816, - "step": 3847 - }, - { - "epoch": 0.135067305498517, - "grad_norm": 0.604292631149292, - "learning_rate": 4.8431481481481486e-05, - "loss": 0.5699, - "step": 3848 - }, - { - "epoch": 0.13510240614963406, - "grad_norm": 0.5800249576568604, - "learning_rate": 4.842962962962963e-05, - "loss": 0.3434, - "step": 3849 - }, - { - "epoch": 0.13513750680075115, - "grad_norm": 0.5618778467178345, - "learning_rate": 4.842777777777778e-05, - "loss": 0.508, - "step": 3850 - }, - { - "epoch": 0.13517260745186824, - "grad_norm": 0.5522737503051758, - "learning_rate": 4.842592592592593e-05, - "loss": 0.5576, - "step": 3851 - }, - { - "epoch": 0.1352077081029853, - "grad_norm": 0.596592128276825, - "learning_rate": 4.842407407407408e-05, - "loss": 0.5189, - "step": 3852 - }, - { - "epoch": 0.13524280875410238, - "grad_norm": 0.5057129263877869, - "learning_rate": 4.842222222222222e-05, - "loss": 0.4758, - "step": 3853 - }, - { - "epoch": 0.13527790940521947, - "grad_norm": 0.5109990835189819, - "learning_rate": 4.842037037037037e-05, - "loss": 0.5837, - "step": 3854 - }, - { - "epoch": 0.13531301005633656, - "grad_norm": 0.5824634432792664, - "learning_rate": 4.8418518518518517e-05, - "loss": 0.5802, - "step": 3855 - }, - { - "epoch": 0.13534811070745362, - "grad_norm": 0.5344333648681641, - "learning_rate": 4.8416666666666673e-05, - "loss": 0.5649, - "step": 3856 - }, - { - "epoch": 0.1353832113585707, - "grad_norm": 0.5171470046043396, - "learning_rate": 4.841481481481482e-05, - "loss": 0.3383, - "step": 3857 - }, - { - "epoch": 0.1354183120096878, - "grad_norm": 0.5938962697982788, - "learning_rate": 4.841296296296297e-05, - "loss": 0.5021, - "step": 3858 - }, - { - "epoch": 0.13545341266080485, - "grad_norm": 0.49201977252960205, - "learning_rate": 4.841111111111111e-05, - "loss": 0.4987, - "step": 3859 - }, - { - "epoch": 0.13548851331192194, - "grad_norm": 0.522861659526825, - "learning_rate": 4.840925925925926e-05, - "loss": 0.5466, - "step": 3860 - }, - { - "epoch": 0.13552361396303902, - "grad_norm": 0.6305896043777466, - "learning_rate": 4.840740740740741e-05, - "loss": 0.5853, - "step": 3861 - }, - { - "epoch": 0.13555871461415608, - "grad_norm": 0.5167578458786011, - "learning_rate": 4.840555555555556e-05, - "loss": 0.4901, - "step": 3862 - }, - { - "epoch": 0.13559381526527317, - "grad_norm": 0.5382444858551025, - "learning_rate": 4.8403703703703704e-05, - "loss": 0.5494, - "step": 3863 - }, - { - "epoch": 0.13562891591639026, - "grad_norm": 0.5206958055496216, - "learning_rate": 4.8401851851851854e-05, - "loss": 0.5549, - "step": 3864 - }, - { - "epoch": 0.13566401656750732, - "grad_norm": 0.7584376335144043, - "learning_rate": 4.8400000000000004e-05, - "loss": 0.5611, - "step": 3865 - }, - { - "epoch": 0.1356991172186244, - "grad_norm": 0.6720812320709229, - "learning_rate": 4.839814814814815e-05, - "loss": 0.4984, - "step": 3866 - }, - { - "epoch": 0.1357342178697415, - "grad_norm": 0.5891757607460022, - "learning_rate": 4.83962962962963e-05, - "loss": 0.4893, - "step": 3867 - }, - { - "epoch": 0.13576931852085855, - "grad_norm": 0.588127076625824, - "learning_rate": 4.839444444444445e-05, - "loss": 0.4661, - "step": 3868 - }, - { - "epoch": 0.13580441917197564, - "grad_norm": 0.5484990477561951, - "learning_rate": 4.83925925925926e-05, - "loss": 0.4642, - "step": 3869 - }, - { - "epoch": 0.13583951982309272, - "grad_norm": 0.5982415676116943, - "learning_rate": 4.839074074074074e-05, - "loss": 0.6018, - "step": 3870 - }, - { - "epoch": 0.13587462047420978, - "grad_norm": 0.5920597910881042, - "learning_rate": 4.838888888888889e-05, - "loss": 0.4491, - "step": 3871 - }, - { - "epoch": 0.13590972112532687, - "grad_norm": 0.528104305267334, - "learning_rate": 4.838703703703704e-05, - "loss": 0.4263, - "step": 3872 - }, - { - "epoch": 0.13594482177644396, - "grad_norm": 0.5818363428115845, - "learning_rate": 4.838518518518519e-05, - "loss": 0.5461, - "step": 3873 - }, - { - "epoch": 0.13597992242756102, - "grad_norm": 0.5776819586753845, - "learning_rate": 4.8383333333333335e-05, - "loss": 0.4539, - "step": 3874 - }, - { - "epoch": 0.1360150230786781, - "grad_norm": 0.9308384656906128, - "learning_rate": 4.8381481481481485e-05, - "loss": 0.6348, - "step": 3875 - }, - { - "epoch": 0.1360501237297952, - "grad_norm": 0.824488639831543, - "learning_rate": 4.837962962962963e-05, - "loss": 0.5311, - "step": 3876 - }, - { - "epoch": 0.13608522438091228, - "grad_norm": 0.5707147717475891, - "learning_rate": 4.837777777777778e-05, - "loss": 0.5197, - "step": 3877 - }, - { - "epoch": 0.13612032503202934, - "grad_norm": 0.5970208048820496, - "learning_rate": 4.837592592592593e-05, - "loss": 0.4611, - "step": 3878 - }, - { - "epoch": 0.13615542568314642, - "grad_norm": 0.5462239384651184, - "learning_rate": 4.837407407407408e-05, - "loss": 0.5367, - "step": 3879 - }, - { - "epoch": 0.1361905263342635, - "grad_norm": 0.5644932985305786, - "learning_rate": 4.837222222222222e-05, - "loss": 0.4124, - "step": 3880 - }, - { - "epoch": 0.13622562698538057, - "grad_norm": 0.5420364141464233, - "learning_rate": 4.837037037037037e-05, - "loss": 0.5145, - "step": 3881 - }, - { - "epoch": 0.13626072763649766, - "grad_norm": 0.6095282435417175, - "learning_rate": 4.8368518518518515e-05, - "loss": 0.4837, - "step": 3882 - }, - { - "epoch": 0.13629582828761475, - "grad_norm": 0.710106372833252, - "learning_rate": 4.836666666666667e-05, - "loss": 0.4883, - "step": 3883 - }, - { - "epoch": 0.1363309289387318, - "grad_norm": 0.5198734402656555, - "learning_rate": 4.8364814814814816e-05, - "loss": 0.4212, - "step": 3884 - }, - { - "epoch": 0.1363660295898489, - "grad_norm": 0.5952595472335815, - "learning_rate": 4.8362962962962966e-05, - "loss": 0.5375, - "step": 3885 - }, - { - "epoch": 0.13640113024096598, - "grad_norm": 0.7025357484817505, - "learning_rate": 4.8361111111111116e-05, - "loss": 0.5481, - "step": 3886 - }, - { - "epoch": 0.13643623089208304, - "grad_norm": 1.1821988821029663, - "learning_rate": 4.835925925925926e-05, - "loss": 0.5718, - "step": 3887 - }, - { - "epoch": 0.13647133154320013, - "grad_norm": 0.6471722722053528, - "learning_rate": 4.835740740740741e-05, - "loss": 0.5999, - "step": 3888 - }, - { - "epoch": 0.1365064321943172, - "grad_norm": 0.6461659073829651, - "learning_rate": 4.835555555555556e-05, - "loss": 0.6119, - "step": 3889 - }, - { - "epoch": 0.13654153284543427, - "grad_norm": 0.6025562882423401, - "learning_rate": 4.835370370370371e-05, - "loss": 0.5475, - "step": 3890 - }, - { - "epoch": 0.13657663349655136, - "grad_norm": 0.5622265338897705, - "learning_rate": 4.835185185185185e-05, - "loss": 0.5992, - "step": 3891 - }, - { - "epoch": 0.13661173414766845, - "grad_norm": 0.6445316672325134, - "learning_rate": 4.835e-05, - "loss": 0.566, - "step": 3892 - }, - { - "epoch": 0.1366468347987855, - "grad_norm": 0.584281861782074, - "learning_rate": 4.8348148148148146e-05, - "loss": 0.5495, - "step": 3893 - }, - { - "epoch": 0.1366819354499026, - "grad_norm": 0.7215580344200134, - "learning_rate": 4.83462962962963e-05, - "loss": 0.6149, - "step": 3894 - }, - { - "epoch": 0.13671703610101968, - "grad_norm": 0.681130588054657, - "learning_rate": 4.8344444444444447e-05, - "loss": 0.6044, - "step": 3895 - }, - { - "epoch": 0.13675213675213677, - "grad_norm": 0.613131582736969, - "learning_rate": 4.83425925925926e-05, - "loss": 0.5524, - "step": 3896 - }, - { - "epoch": 0.13678723740325383, - "grad_norm": 0.550893247127533, - "learning_rate": 4.834074074074074e-05, - "loss": 0.473, - "step": 3897 - }, - { - "epoch": 0.1368223380543709, - "grad_norm": 0.5010506510734558, - "learning_rate": 4.833888888888889e-05, - "loss": 0.5166, - "step": 3898 - }, - { - "epoch": 0.136857438705488, - "grad_norm": 0.5724324584007263, - "learning_rate": 4.833703703703704e-05, - "loss": 0.5587, - "step": 3899 - }, - { - "epoch": 0.13689253935660506, - "grad_norm": 0.6743648648262024, - "learning_rate": 4.833518518518519e-05, - "loss": 0.5588, - "step": 3900 - }, - { - "epoch": 0.13692764000772215, - "grad_norm": 0.45074740052223206, - "learning_rate": 4.8333333333333334e-05, - "loss": 0.5067, - "step": 3901 - }, - { - "epoch": 0.13696274065883923, - "grad_norm": 0.6492202877998352, - "learning_rate": 4.8331481481481484e-05, - "loss": 0.5555, - "step": 3902 - }, - { - "epoch": 0.1369978413099563, - "grad_norm": 0.5699076652526855, - "learning_rate": 4.832962962962963e-05, - "loss": 0.5623, - "step": 3903 - }, - { - "epoch": 0.13703294196107338, - "grad_norm": 0.8840084671974182, - "learning_rate": 4.832777777777778e-05, - "loss": 0.5691, - "step": 3904 - }, - { - "epoch": 0.13706804261219047, - "grad_norm": 0.6244642734527588, - "learning_rate": 4.832592592592593e-05, - "loss": 0.4111, - "step": 3905 - }, - { - "epoch": 0.13710314326330753, - "grad_norm": 0.6557940244674683, - "learning_rate": 4.832407407407408e-05, - "loss": 0.6511, - "step": 3906 - }, - { - "epoch": 0.1371382439144246, - "grad_norm": 0.6297355890274048, - "learning_rate": 4.832222222222223e-05, - "loss": 0.5075, - "step": 3907 - }, - { - "epoch": 0.1371733445655417, - "grad_norm": 0.6464394927024841, - "learning_rate": 4.832037037037037e-05, - "loss": 0.6774, - "step": 3908 - }, - { - "epoch": 0.13720844521665876, - "grad_norm": 0.6093420386314392, - "learning_rate": 4.831851851851852e-05, - "loss": 0.6047, - "step": 3909 - }, - { - "epoch": 0.13724354586777585, - "grad_norm": 0.584997832775116, - "learning_rate": 4.831666666666667e-05, - "loss": 0.5744, - "step": 3910 - }, - { - "epoch": 0.13727864651889293, - "grad_norm": 0.5776843428611755, - "learning_rate": 4.831481481481482e-05, - "loss": 0.6138, - "step": 3911 - }, - { - "epoch": 0.13731374717001, - "grad_norm": 0.6762183904647827, - "learning_rate": 4.8312962962962965e-05, - "loss": 0.2873, - "step": 3912 - }, - { - "epoch": 0.13734884782112708, - "grad_norm": 0.8203965425491333, - "learning_rate": 4.8311111111111115e-05, - "loss": 0.6078, - "step": 3913 - }, - { - "epoch": 0.13738394847224417, - "grad_norm": 0.6434317231178284, - "learning_rate": 4.830925925925926e-05, - "loss": 0.4726, - "step": 3914 - }, - { - "epoch": 0.13741904912336123, - "grad_norm": 0.6024802327156067, - "learning_rate": 4.8307407407407415e-05, - "loss": 0.4065, - "step": 3915 - }, - { - "epoch": 0.1374541497744783, - "grad_norm": 0.5800409317016602, - "learning_rate": 4.830555555555556e-05, - "loss": 0.433, - "step": 3916 - }, - { - "epoch": 0.1374892504255954, - "grad_norm": 0.7059097290039062, - "learning_rate": 4.830370370370371e-05, - "loss": 0.6795, - "step": 3917 - }, - { - "epoch": 0.1375243510767125, - "grad_norm": 0.5661981105804443, - "learning_rate": 4.830185185185185e-05, - "loss": 0.4395, - "step": 3918 - }, - { - "epoch": 0.13755945172782955, - "grad_norm": 0.6423340439796448, - "learning_rate": 4.83e-05, - "loss": 0.5565, - "step": 3919 - }, - { - "epoch": 0.13759455237894663, - "grad_norm": 0.5184969902038574, - "learning_rate": 4.8298148148148145e-05, - "loss": 0.4943, - "step": 3920 - }, - { - "epoch": 0.13762965303006372, - "grad_norm": 0.6599394679069519, - "learning_rate": 4.82962962962963e-05, - "loss": 0.4949, - "step": 3921 - }, - { - "epoch": 0.13766475368118078, - "grad_norm": 0.6416406035423279, - "learning_rate": 4.8294444444444445e-05, - "loss": 0.6126, - "step": 3922 - }, - { - "epoch": 0.13769985433229787, - "grad_norm": 0.6679257154464722, - "learning_rate": 4.8292592592592595e-05, - "loss": 0.5221, - "step": 3923 - }, - { - "epoch": 0.13773495498341495, - "grad_norm": 0.5380949974060059, - "learning_rate": 4.829074074074074e-05, - "loss": 0.5523, - "step": 3924 - }, - { - "epoch": 0.137770055634532, - "grad_norm": 0.5332492589950562, - "learning_rate": 4.828888888888889e-05, - "loss": 0.4013, - "step": 3925 - }, - { - "epoch": 0.1378051562856491, - "grad_norm": 0.5831320881843567, - "learning_rate": 4.828703703703704e-05, - "loss": 0.5222, - "step": 3926 - }, - { - "epoch": 0.1378402569367662, - "grad_norm": 0.5573626756668091, - "learning_rate": 4.828518518518519e-05, - "loss": 0.4989, - "step": 3927 - }, - { - "epoch": 0.13787535758788325, - "grad_norm": 0.5643361210823059, - "learning_rate": 4.828333333333334e-05, - "loss": 0.6153, - "step": 3928 - }, - { - "epoch": 0.13791045823900033, - "grad_norm": 0.5359368324279785, - "learning_rate": 4.828148148148148e-05, - "loss": 0.5401, - "step": 3929 - }, - { - "epoch": 0.13794555889011742, - "grad_norm": 0.5267593860626221, - "learning_rate": 4.827962962962963e-05, - "loss": 0.5687, - "step": 3930 - }, - { - "epoch": 0.13798065954123448, - "grad_norm": 0.6921526193618774, - "learning_rate": 4.8277777777777776e-05, - "loss": 0.5759, - "step": 3931 - }, - { - "epoch": 0.13801576019235157, - "grad_norm": 0.7275222539901733, - "learning_rate": 4.827592592592593e-05, - "loss": 0.4798, - "step": 3932 - }, - { - "epoch": 0.13805086084346865, - "grad_norm": 0.7397575974464417, - "learning_rate": 4.8274074074074076e-05, - "loss": 0.5132, - "step": 3933 - }, - { - "epoch": 0.1380859614945857, - "grad_norm": 0.5197786688804626, - "learning_rate": 4.8272222222222226e-05, - "loss": 0.4197, - "step": 3934 - }, - { - "epoch": 0.1381210621457028, - "grad_norm": 0.49240589141845703, - "learning_rate": 4.827037037037037e-05, - "loss": 0.544, - "step": 3935 - }, - { - "epoch": 0.1381561627968199, - "grad_norm": 0.5512675642967224, - "learning_rate": 4.826851851851852e-05, - "loss": 0.5161, - "step": 3936 - }, - { - "epoch": 0.13819126344793695, - "grad_norm": 0.5805296897888184, - "learning_rate": 4.826666666666667e-05, - "loss": 0.4178, - "step": 3937 - }, - { - "epoch": 0.13822636409905403, - "grad_norm": 0.6229544281959534, - "learning_rate": 4.826481481481482e-05, - "loss": 0.5709, - "step": 3938 - }, - { - "epoch": 0.13826146475017112, - "grad_norm": 0.5928258299827576, - "learning_rate": 4.826296296296296e-05, - "loss": 0.5984, - "step": 3939 - }, - { - "epoch": 0.1382965654012882, - "grad_norm": 0.574200451374054, - "learning_rate": 4.8261111111111113e-05, - "loss": 0.5786, - "step": 3940 - }, - { - "epoch": 0.13833166605240527, - "grad_norm": 0.4809154272079468, - "learning_rate": 4.825925925925926e-05, - "loss": 0.5679, - "step": 3941 - }, - { - "epoch": 0.13836676670352235, - "grad_norm": 0.5658331513404846, - "learning_rate": 4.8257407407407414e-05, - "loss": 0.5535, - "step": 3942 - }, - { - "epoch": 0.13840186735463944, - "grad_norm": 0.5416087508201599, - "learning_rate": 4.825555555555556e-05, - "loss": 0.5903, - "step": 3943 - }, - { - "epoch": 0.1384369680057565, - "grad_norm": 0.5822819471359253, - "learning_rate": 4.825370370370371e-05, - "loss": 0.5088, - "step": 3944 - }, - { - "epoch": 0.1384720686568736, - "grad_norm": 0.5323874354362488, - "learning_rate": 4.825185185185185e-05, - "loss": 0.5455, - "step": 3945 - }, - { - "epoch": 0.13850716930799067, - "grad_norm": 0.5660281181335449, - "learning_rate": 4.825e-05, - "loss": 0.5054, - "step": 3946 - }, - { - "epoch": 0.13854226995910773, - "grad_norm": 0.5130235552787781, - "learning_rate": 4.824814814814815e-05, - "loss": 0.6206, - "step": 3947 - }, - { - "epoch": 0.13857737061022482, - "grad_norm": 0.6105577945709229, - "learning_rate": 4.82462962962963e-05, - "loss": 0.5921, - "step": 3948 - }, - { - "epoch": 0.1386124712613419, - "grad_norm": 0.5567048788070679, - "learning_rate": 4.824444444444445e-05, - "loss": 0.5822, - "step": 3949 - }, - { - "epoch": 0.13864757191245897, - "grad_norm": 0.5244016647338867, - "learning_rate": 4.8242592592592594e-05, - "loss": 0.521, - "step": 3950 - }, - { - "epoch": 0.13868267256357605, - "grad_norm": 0.47906240820884705, - "learning_rate": 4.8240740740740744e-05, - "loss": 0.4059, - "step": 3951 - }, - { - "epoch": 0.13871777321469314, - "grad_norm": 0.5551429986953735, - "learning_rate": 4.823888888888889e-05, - "loss": 0.5647, - "step": 3952 - }, - { - "epoch": 0.1387528738658102, - "grad_norm": 0.5400863885879517, - "learning_rate": 4.8237037037037045e-05, - "loss": 0.6251, - "step": 3953 - }, - { - "epoch": 0.1387879745169273, - "grad_norm": 0.6187282800674438, - "learning_rate": 4.823518518518519e-05, - "loss": 0.5615, - "step": 3954 - }, - { - "epoch": 0.13882307516804437, - "grad_norm": 0.48996278643608093, - "learning_rate": 4.823333333333334e-05, - "loss": 0.5223, - "step": 3955 - }, - { - "epoch": 0.13885817581916143, - "grad_norm": 0.568915843963623, - "learning_rate": 4.823148148148148e-05, - "loss": 0.5377, - "step": 3956 - }, - { - "epoch": 0.13889327647027852, - "grad_norm": 0.5282769799232483, - "learning_rate": 4.822962962962963e-05, - "loss": 0.4219, - "step": 3957 - }, - { - "epoch": 0.1389283771213956, - "grad_norm": 0.6450901031494141, - "learning_rate": 4.822777777777778e-05, - "loss": 0.5739, - "step": 3958 - }, - { - "epoch": 0.13896347777251267, - "grad_norm": 0.6348466277122498, - "learning_rate": 4.822592592592593e-05, - "loss": 0.491, - "step": 3959 - }, - { - "epoch": 0.13899857842362975, - "grad_norm": 0.6046847701072693, - "learning_rate": 4.8224074074074075e-05, - "loss": 0.5276, - "step": 3960 - }, - { - "epoch": 0.13903367907474684, - "grad_norm": 0.5331156253814697, - "learning_rate": 4.8222222222222225e-05, - "loss": 0.5411, - "step": 3961 - }, - { - "epoch": 0.13906877972586393, - "grad_norm": 0.7433522939682007, - "learning_rate": 4.822037037037037e-05, - "loss": 0.5273, - "step": 3962 - }, - { - "epoch": 0.139103880376981, - "grad_norm": 0.5463560223579407, - "learning_rate": 4.821851851851852e-05, - "loss": 0.4531, - "step": 3963 - }, - { - "epoch": 0.13913898102809807, - "grad_norm": 0.49702349305152893, - "learning_rate": 4.821666666666667e-05, - "loss": 0.4367, - "step": 3964 - }, - { - "epoch": 0.13917408167921516, - "grad_norm": 0.5597353577613831, - "learning_rate": 4.821481481481482e-05, - "loss": 0.5696, - "step": 3965 - }, - { - "epoch": 0.13920918233033222, - "grad_norm": 0.7136774063110352, - "learning_rate": 4.821296296296296e-05, - "loss": 0.5775, - "step": 3966 - }, - { - "epoch": 0.1392442829814493, - "grad_norm": 0.5646262168884277, - "learning_rate": 4.821111111111111e-05, - "loss": 0.5789, - "step": 3967 - }, - { - "epoch": 0.1392793836325664, - "grad_norm": 0.5651338696479797, - "learning_rate": 4.820925925925926e-05, - "loss": 0.6082, - "step": 3968 - }, - { - "epoch": 0.13931448428368345, - "grad_norm": 0.5294429063796997, - "learning_rate": 4.820740740740741e-05, - "loss": 0.5736, - "step": 3969 - }, - { - "epoch": 0.13934958493480054, - "grad_norm": 0.6234112977981567, - "learning_rate": 4.820555555555556e-05, - "loss": 0.6459, - "step": 3970 - }, - { - "epoch": 0.13938468558591763, - "grad_norm": 0.6007513999938965, - "learning_rate": 4.8203703703703706e-05, - "loss": 0.5124, - "step": 3971 - }, - { - "epoch": 0.1394197862370347, - "grad_norm": 0.5353005528450012, - "learning_rate": 4.8201851851851856e-05, - "loss": 0.557, - "step": 3972 - }, - { - "epoch": 0.13945488688815177, - "grad_norm": 0.5883358120918274, - "learning_rate": 4.82e-05, - "loss": 0.4999, - "step": 3973 - }, - { - "epoch": 0.13948998753926886, - "grad_norm": 0.5390874147415161, - "learning_rate": 4.819814814814815e-05, - "loss": 0.6262, - "step": 3974 - }, - { - "epoch": 0.13952508819038592, - "grad_norm": 0.7254940867424011, - "learning_rate": 4.81962962962963e-05, - "loss": 0.442, - "step": 3975 - }, - { - "epoch": 0.139560188841503, - "grad_norm": 0.5257008671760559, - "learning_rate": 4.819444444444445e-05, - "loss": 0.4974, - "step": 3976 - }, - { - "epoch": 0.1395952894926201, - "grad_norm": 0.5382511019706726, - "learning_rate": 4.819259259259259e-05, - "loss": 0.5513, - "step": 3977 - }, - { - "epoch": 0.13963039014373715, - "grad_norm": 0.567147433757782, - "learning_rate": 4.819074074074074e-05, - "loss": 0.6417, - "step": 3978 - }, - { - "epoch": 0.13966549079485424, - "grad_norm": 0.5545315742492676, - "learning_rate": 4.8188888888888886e-05, - "loss": 0.5179, - "step": 3979 - }, - { - "epoch": 0.13970059144597133, - "grad_norm": 0.48590385913848877, - "learning_rate": 4.818703703703704e-05, - "loss": 0.5115, - "step": 3980 - }, - { - "epoch": 0.1397356920970884, - "grad_norm": 0.5126907229423523, - "learning_rate": 4.818518518518519e-05, - "loss": 0.6014, - "step": 3981 - }, - { - "epoch": 0.13977079274820547, - "grad_norm": 0.8177673816680908, - "learning_rate": 4.818333333333334e-05, - "loss": 0.6354, - "step": 3982 - }, - { - "epoch": 0.13980589339932256, - "grad_norm": 0.47938498854637146, - "learning_rate": 4.818148148148148e-05, - "loss": 0.4393, - "step": 3983 - }, - { - "epoch": 0.13984099405043965, - "grad_norm": 0.502892255783081, - "learning_rate": 4.817962962962963e-05, - "loss": 0.4982, - "step": 3984 - }, - { - "epoch": 0.1398760947015567, - "grad_norm": 0.547440767288208, - "learning_rate": 4.817777777777778e-05, - "loss": 0.6505, - "step": 3985 - }, - { - "epoch": 0.1399111953526738, - "grad_norm": 0.5892801284790039, - "learning_rate": 4.817592592592593e-05, - "loss": 0.531, - "step": 3986 - }, - { - "epoch": 0.13994629600379088, - "grad_norm": 0.5609815120697021, - "learning_rate": 4.8174074074074074e-05, - "loss": 0.4639, - "step": 3987 - }, - { - "epoch": 0.13998139665490794, - "grad_norm": 0.5379613637924194, - "learning_rate": 4.8172222222222224e-05, - "loss": 0.4984, - "step": 3988 - }, - { - "epoch": 0.14001649730602503, - "grad_norm": 0.5323002934455872, - "learning_rate": 4.8170370370370374e-05, - "loss": 0.5644, - "step": 3989 - }, - { - "epoch": 0.14005159795714212, - "grad_norm": 0.49583831429481506, - "learning_rate": 4.816851851851852e-05, - "loss": 0.4081, - "step": 3990 - }, - { - "epoch": 0.14008669860825917, - "grad_norm": 0.5517657399177551, - "learning_rate": 4.8166666666666674e-05, - "loss": 0.5574, - "step": 3991 - }, - { - "epoch": 0.14012179925937626, - "grad_norm": 0.521287202835083, - "learning_rate": 4.816481481481482e-05, - "loss": 0.5451, - "step": 3992 - }, - { - "epoch": 0.14015689991049335, - "grad_norm": 0.5801631212234497, - "learning_rate": 4.816296296296297e-05, - "loss": 0.5117, - "step": 3993 - }, - { - "epoch": 0.1401920005616104, - "grad_norm": 0.6105200052261353, - "learning_rate": 4.816111111111111e-05, - "loss": 0.5461, - "step": 3994 - }, - { - "epoch": 0.1402271012127275, - "grad_norm": 0.6318029165267944, - "learning_rate": 4.815925925925926e-05, - "loss": 0.5352, - "step": 3995 - }, - { - "epoch": 0.14026220186384458, - "grad_norm": 0.5805283784866333, - "learning_rate": 4.815740740740741e-05, - "loss": 0.5532, - "step": 3996 - }, - { - "epoch": 0.14029730251496164, - "grad_norm": 0.7593399286270142, - "learning_rate": 4.815555555555556e-05, - "loss": 0.6279, - "step": 3997 - }, - { - "epoch": 0.14033240316607873, - "grad_norm": 0.5569497346878052, - "learning_rate": 4.8153703703703705e-05, - "loss": 0.616, - "step": 3998 - }, - { - "epoch": 0.14036750381719582, - "grad_norm": 0.5464207530021667, - "learning_rate": 4.8151851851851855e-05, - "loss": 0.5237, - "step": 3999 - }, - { - "epoch": 0.14040260446831287, - "grad_norm": 0.5115465521812439, - "learning_rate": 4.815e-05, - "loss": 0.509, - "step": 4000 - }, - { - "epoch": 0.14043770511942996, - "grad_norm": 0.637097179889679, - "learning_rate": 4.814814814814815e-05, - "loss": 0.5531, - "step": 4001 - }, - { - "epoch": 0.14047280577054705, - "grad_norm": 0.5849770903587341, - "learning_rate": 4.81462962962963e-05, - "loss": 0.5408, - "step": 4002 - }, - { - "epoch": 0.1405079064216641, - "grad_norm": 0.4553907513618469, - "learning_rate": 4.814444444444445e-05, - "loss": 0.4728, - "step": 4003 - }, - { - "epoch": 0.1405430070727812, - "grad_norm": 0.621902585029602, - "learning_rate": 4.814259259259259e-05, - "loss": 0.5432, - "step": 4004 - }, - { - "epoch": 0.14057810772389828, - "grad_norm": 0.5364959836006165, - "learning_rate": 4.814074074074074e-05, - "loss": 0.5181, - "step": 4005 - }, - { - "epoch": 0.14061320837501537, - "grad_norm": 0.560142457485199, - "learning_rate": 4.813888888888889e-05, - "loss": 0.5651, - "step": 4006 - }, - { - "epoch": 0.14064830902613243, - "grad_norm": 0.5192075371742249, - "learning_rate": 4.813703703703704e-05, - "loss": 0.4709, - "step": 4007 - }, - { - "epoch": 0.14068340967724952, - "grad_norm": 0.5513694882392883, - "learning_rate": 4.8135185185185185e-05, - "loss": 0.5203, - "step": 4008 - }, - { - "epoch": 0.1407185103283666, - "grad_norm": 0.6380151510238647, - "learning_rate": 4.8133333333333336e-05, - "loss": 0.4799, - "step": 4009 - }, - { - "epoch": 0.14075361097948366, - "grad_norm": 0.6068713068962097, - "learning_rate": 4.8131481481481486e-05, - "loss": 0.5929, - "step": 4010 - }, - { - "epoch": 0.14078871163060075, - "grad_norm": 0.5497875809669495, - "learning_rate": 4.812962962962963e-05, - "loss": 0.5666, - "step": 4011 - }, - { - "epoch": 0.14082381228171784, - "grad_norm": 0.5538807511329651, - "learning_rate": 4.8127777777777786e-05, - "loss": 0.4998, - "step": 4012 - }, - { - "epoch": 0.1408589129328349, - "grad_norm": 0.5621858239173889, - "learning_rate": 4.812592592592593e-05, - "loss": 0.3555, - "step": 4013 - }, - { - "epoch": 0.14089401358395198, - "grad_norm": 0.54034823179245, - "learning_rate": 4.812407407407408e-05, - "loss": 0.5307, - "step": 4014 - }, - { - "epoch": 0.14092911423506907, - "grad_norm": 0.7844265103340149, - "learning_rate": 4.812222222222222e-05, - "loss": 0.6539, - "step": 4015 - }, - { - "epoch": 0.14096421488618613, - "grad_norm": 0.6245548725128174, - "learning_rate": 4.812037037037037e-05, - "loss": 0.5127, - "step": 4016 - }, - { - "epoch": 0.14099931553730322, - "grad_norm": 0.610365629196167, - "learning_rate": 4.8118518518518516e-05, - "loss": 0.4977, - "step": 4017 - }, - { - "epoch": 0.1410344161884203, - "grad_norm": 0.7717720866203308, - "learning_rate": 4.811666666666667e-05, - "loss": 0.456, - "step": 4018 - }, - { - "epoch": 0.14106951683953736, - "grad_norm": 0.5944433212280273, - "learning_rate": 4.8114814814814816e-05, - "loss": 0.6703, - "step": 4019 - }, - { - "epoch": 0.14110461749065445, - "grad_norm": 0.587419867515564, - "learning_rate": 4.8112962962962966e-05, - "loss": 0.5383, - "step": 4020 - }, - { - "epoch": 0.14113971814177154, - "grad_norm": 0.6870607733726501, - "learning_rate": 4.811111111111111e-05, - "loss": 0.6207, - "step": 4021 - }, - { - "epoch": 0.1411748187928886, - "grad_norm": 0.5863437056541443, - "learning_rate": 4.810925925925926e-05, - "loss": 0.4968, - "step": 4022 - }, - { - "epoch": 0.14120991944400568, - "grad_norm": 0.5520671606063843, - "learning_rate": 4.810740740740741e-05, - "loss": 0.4825, - "step": 4023 - }, - { - "epoch": 0.14124502009512277, - "grad_norm": 0.6236404180526733, - "learning_rate": 4.810555555555556e-05, - "loss": 0.6018, - "step": 4024 - }, - { - "epoch": 0.14128012074623983, - "grad_norm": 0.7147496938705444, - "learning_rate": 4.8103703703703703e-05, - "loss": 0.6212, - "step": 4025 - }, - { - "epoch": 0.14131522139735692, - "grad_norm": 0.5676137208938599, - "learning_rate": 4.8101851851851854e-05, - "loss": 0.6001, - "step": 4026 - }, - { - "epoch": 0.141350322048474, - "grad_norm": 0.5128964185714722, - "learning_rate": 4.8100000000000004e-05, - "loss": 0.5361, - "step": 4027 - }, - { - "epoch": 0.1413854226995911, - "grad_norm": 0.5721961855888367, - "learning_rate": 4.809814814814815e-05, - "loss": 0.4838, - "step": 4028 - }, - { - "epoch": 0.14142052335070815, - "grad_norm": 0.5213254690170288, - "learning_rate": 4.80962962962963e-05, - "loss": 0.5572, - "step": 4029 - }, - { - "epoch": 0.14145562400182524, - "grad_norm": 0.5279014706611633, - "learning_rate": 4.809444444444445e-05, - "loss": 0.4477, - "step": 4030 - }, - { - "epoch": 0.14149072465294232, - "grad_norm": 0.4610891342163086, - "learning_rate": 4.80925925925926e-05, - "loss": 0.4744, - "step": 4031 - }, - { - "epoch": 0.14152582530405938, - "grad_norm": 0.48378631472587585, - "learning_rate": 4.809074074074074e-05, - "loss": 0.5594, - "step": 4032 - }, - { - "epoch": 0.14156092595517647, - "grad_norm": 0.568932294845581, - "learning_rate": 4.808888888888889e-05, - "loss": 0.6059, - "step": 4033 - }, - { - "epoch": 0.14159602660629356, - "grad_norm": 0.6721162796020508, - "learning_rate": 4.808703703703704e-05, - "loss": 0.596, - "step": 4034 - }, - { - "epoch": 0.14163112725741062, - "grad_norm": 0.6883906722068787, - "learning_rate": 4.808518518518519e-05, - "loss": 0.5256, - "step": 4035 - }, - { - "epoch": 0.1416662279085277, - "grad_norm": 0.5412405133247375, - "learning_rate": 4.8083333333333334e-05, - "loss": 0.5539, - "step": 4036 - }, - { - "epoch": 0.1417013285596448, - "grad_norm": 0.5488754510879517, - "learning_rate": 4.8081481481481484e-05, - "loss": 0.5493, - "step": 4037 - }, - { - "epoch": 0.14173642921076185, - "grad_norm": 0.8019753694534302, - "learning_rate": 4.807962962962963e-05, - "loss": 0.487, - "step": 4038 - }, - { - "epoch": 0.14177152986187894, - "grad_norm": 0.6728987097740173, - "learning_rate": 4.8077777777777785e-05, - "loss": 0.5475, - "step": 4039 - }, - { - "epoch": 0.14180663051299602, - "grad_norm": 0.5851696133613586, - "learning_rate": 4.807592592592593e-05, - "loss": 0.5291, - "step": 4040 - }, - { - "epoch": 0.14184173116411308, - "grad_norm": 0.6050142645835876, - "learning_rate": 4.807407407407408e-05, - "loss": 0.6058, - "step": 4041 - }, - { - "epoch": 0.14187683181523017, - "grad_norm": 0.5347030758857727, - "learning_rate": 4.807222222222222e-05, - "loss": 0.422, - "step": 4042 - }, - { - "epoch": 0.14191193246634726, - "grad_norm": 0.5758333206176758, - "learning_rate": 4.807037037037037e-05, - "loss": 0.5404, - "step": 4043 - }, - { - "epoch": 0.14194703311746432, - "grad_norm": 0.8962892293930054, - "learning_rate": 4.806851851851852e-05, - "loss": 0.4591, - "step": 4044 - }, - { - "epoch": 0.1419821337685814, - "grad_norm": 1.0303406715393066, - "learning_rate": 4.806666666666667e-05, - "loss": 0.5334, - "step": 4045 - }, - { - "epoch": 0.1420172344196985, - "grad_norm": 0.5778171420097351, - "learning_rate": 4.8064814814814815e-05, - "loss": 0.5117, - "step": 4046 - }, - { - "epoch": 0.14205233507081558, - "grad_norm": 0.46680769324302673, - "learning_rate": 4.8062962962962965e-05, - "loss": 0.5227, - "step": 4047 - }, - { - "epoch": 0.14208743572193264, - "grad_norm": 0.5692826509475708, - "learning_rate": 4.8061111111111115e-05, - "loss": 0.4865, - "step": 4048 - }, - { - "epoch": 0.14212253637304972, - "grad_norm": 0.7801939249038696, - "learning_rate": 4.805925925925926e-05, - "loss": 0.5793, - "step": 4049 - }, - { - "epoch": 0.1421576370241668, - "grad_norm": 1.2100392580032349, - "learning_rate": 4.805740740740741e-05, - "loss": 0.5321, - "step": 4050 - }, - { - "epoch": 0.14219273767528387, - "grad_norm": 0.7642911672592163, - "learning_rate": 4.805555555555556e-05, - "loss": 0.5366, - "step": 4051 - }, - { - "epoch": 0.14222783832640096, - "grad_norm": 0.5375572443008423, - "learning_rate": 4.805370370370371e-05, - "loss": 0.5551, - "step": 4052 - }, - { - "epoch": 0.14226293897751804, - "grad_norm": 0.6682677268981934, - "learning_rate": 4.805185185185185e-05, - "loss": 0.5178, - "step": 4053 - }, - { - "epoch": 0.1422980396286351, - "grad_norm": 0.6166648864746094, - "learning_rate": 4.805e-05, - "loss": 0.6302, - "step": 4054 - }, - { - "epoch": 0.1423331402797522, - "grad_norm": 0.6181133389472961, - "learning_rate": 4.8048148148148146e-05, - "loss": 0.5897, - "step": 4055 - }, - { - "epoch": 0.14236824093086928, - "grad_norm": 0.8364346623420715, - "learning_rate": 4.80462962962963e-05, - "loss": 0.6137, - "step": 4056 - }, - { - "epoch": 0.14240334158198634, - "grad_norm": 0.6300941109657288, - "learning_rate": 4.8044444444444446e-05, - "loss": 0.5333, - "step": 4057 - }, - { - "epoch": 0.14243844223310342, - "grad_norm": 1.0773372650146484, - "learning_rate": 4.8042592592592596e-05, - "loss": 0.5832, - "step": 4058 - }, - { - "epoch": 0.1424735428842205, - "grad_norm": 1.265037178993225, - "learning_rate": 4.804074074074074e-05, - "loss": 0.5753, - "step": 4059 - }, - { - "epoch": 0.14250864353533757, - "grad_norm": 0.6266526579856873, - "learning_rate": 4.803888888888889e-05, - "loss": 0.5788, - "step": 4060 - }, - { - "epoch": 0.14254374418645466, - "grad_norm": 0.5693230628967285, - "learning_rate": 4.803703703703704e-05, - "loss": 0.6543, - "step": 4061 - }, - { - "epoch": 0.14257884483757174, - "grad_norm": 0.6349781155586243, - "learning_rate": 4.803518518518519e-05, - "loss": 0.5225, - "step": 4062 - }, - { - "epoch": 0.1426139454886888, - "grad_norm": 0.7192432284355164, - "learning_rate": 4.803333333333333e-05, - "loss": 0.6093, - "step": 4063 - }, - { - "epoch": 0.1426490461398059, - "grad_norm": 0.8545798659324646, - "learning_rate": 4.803148148148148e-05, - "loss": 0.6024, - "step": 4064 - }, - { - "epoch": 0.14268414679092298, - "grad_norm": 0.5898764133453369, - "learning_rate": 4.802962962962963e-05, - "loss": 0.5527, - "step": 4065 - }, - { - "epoch": 0.14271924744204004, - "grad_norm": 0.7091937065124512, - "learning_rate": 4.8027777777777783e-05, - "loss": 0.5063, - "step": 4066 - }, - { - "epoch": 0.14275434809315712, - "grad_norm": 0.5425930023193359, - "learning_rate": 4.802592592592593e-05, - "loss": 0.4692, - "step": 4067 - }, - { - "epoch": 0.1427894487442742, - "grad_norm": 0.4954110383987427, - "learning_rate": 4.802407407407408e-05, - "loss": 0.5158, - "step": 4068 - }, - { - "epoch": 0.1428245493953913, - "grad_norm": 0.6991184949874878, - "learning_rate": 4.802222222222223e-05, - "loss": 0.5299, - "step": 4069 - }, - { - "epoch": 0.14285965004650836, - "grad_norm": 0.654937744140625, - "learning_rate": 4.802037037037037e-05, - "loss": 0.5242, - "step": 4070 - }, - { - "epoch": 0.14289475069762544, - "grad_norm": 0.7164429426193237, - "learning_rate": 4.801851851851852e-05, - "loss": 0.5714, - "step": 4071 - }, - { - "epoch": 0.14292985134874253, - "grad_norm": 0.5684788227081299, - "learning_rate": 4.801666666666667e-05, - "loss": 0.6202, - "step": 4072 - }, - { - "epoch": 0.1429649519998596, - "grad_norm": 0.5229355096817017, - "learning_rate": 4.801481481481482e-05, - "loss": 0.5224, - "step": 4073 - }, - { - "epoch": 0.14300005265097668, - "grad_norm": 0.621245265007019, - "learning_rate": 4.8012962962962964e-05, - "loss": 0.5324, - "step": 4074 - }, - { - "epoch": 0.14303515330209376, - "grad_norm": 0.5820197463035583, - "learning_rate": 4.8011111111111114e-05, - "loss": 0.6277, - "step": 4075 - }, - { - "epoch": 0.14307025395321082, - "grad_norm": 0.6663697361946106, - "learning_rate": 4.800925925925926e-05, - "loss": 0.6041, - "step": 4076 - }, - { - "epoch": 0.1431053546043279, - "grad_norm": 0.5629537105560303, - "learning_rate": 4.8007407407407414e-05, - "loss": 0.5577, - "step": 4077 - }, - { - "epoch": 0.143140455255445, - "grad_norm": 0.5990055799484253, - "learning_rate": 4.800555555555556e-05, - "loss": 0.5022, - "step": 4078 - }, - { - "epoch": 0.14317555590656206, - "grad_norm": 0.6561012268066406, - "learning_rate": 4.800370370370371e-05, - "loss": 0.7056, - "step": 4079 - }, - { - "epoch": 0.14321065655767914, - "grad_norm": 0.5052238702774048, - "learning_rate": 4.800185185185185e-05, - "loss": 0.5142, - "step": 4080 - }, - { - "epoch": 0.14324575720879623, - "grad_norm": 0.5548496246337891, - "learning_rate": 4.8e-05, - "loss": 0.5715, - "step": 4081 - }, - { - "epoch": 0.1432808578599133, - "grad_norm": 0.4412032961845398, - "learning_rate": 4.799814814814815e-05, - "loss": 0.5306, - "step": 4082 - }, - { - "epoch": 0.14331595851103038, - "grad_norm": 0.5283514857292175, - "learning_rate": 4.79962962962963e-05, - "loss": 0.517, - "step": 4083 - }, - { - "epoch": 0.14335105916214747, - "grad_norm": 0.5526862740516663, - "learning_rate": 4.7994444444444445e-05, - "loss": 0.4804, - "step": 4084 - }, - { - "epoch": 0.14338615981326452, - "grad_norm": 0.5555918216705322, - "learning_rate": 4.7992592592592595e-05, - "loss": 0.5114, - "step": 4085 - }, - { - "epoch": 0.1434212604643816, - "grad_norm": 0.5188391804695129, - "learning_rate": 4.7990740740740745e-05, - "loss": 0.6295, - "step": 4086 - }, - { - "epoch": 0.1434563611154987, - "grad_norm": 0.5313025712966919, - "learning_rate": 4.798888888888889e-05, - "loss": 0.5637, - "step": 4087 - }, - { - "epoch": 0.14349146176661576, - "grad_norm": 0.5425400733947754, - "learning_rate": 4.798703703703704e-05, - "loss": 0.5257, - "step": 4088 - }, - { - "epoch": 0.14352656241773284, - "grad_norm": 0.533576488494873, - "learning_rate": 4.798518518518519e-05, - "loss": 0.5712, - "step": 4089 - }, - { - "epoch": 0.14356166306884993, - "grad_norm": 0.5036427974700928, - "learning_rate": 4.798333333333334e-05, - "loss": 0.5375, - "step": 4090 - }, - { - "epoch": 0.14359676371996702, - "grad_norm": 0.7342220544815063, - "learning_rate": 4.798148148148148e-05, - "loss": 0.6206, - "step": 4091 - }, - { - "epoch": 0.14363186437108408, - "grad_norm": 0.4429321587085724, - "learning_rate": 4.797962962962963e-05, - "loss": 0.5374, - "step": 4092 - }, - { - "epoch": 0.14366696502220117, - "grad_norm": 0.4980109632015228, - "learning_rate": 4.797777777777778e-05, - "loss": 0.575, - "step": 4093 - }, - { - "epoch": 0.14370206567331825, - "grad_norm": 0.5325419306755066, - "learning_rate": 4.797592592592593e-05, - "loss": 0.5347, - "step": 4094 - }, - { - "epoch": 0.1437371663244353, - "grad_norm": 0.6045690178871155, - "learning_rate": 4.7974074074074076e-05, - "loss": 0.5246, - "step": 4095 - }, - { - "epoch": 0.1437722669755524, - "grad_norm": 0.555463969707489, - "learning_rate": 4.7972222222222226e-05, - "loss": 0.5015, - "step": 4096 - }, - { - "epoch": 0.14380736762666949, - "grad_norm": 0.4846169352531433, - "learning_rate": 4.797037037037037e-05, - "loss": 0.5115, - "step": 4097 - }, - { - "epoch": 0.14384246827778654, - "grad_norm": 0.5904855728149414, - "learning_rate": 4.796851851851852e-05, - "loss": 0.5517, - "step": 4098 - }, - { - "epoch": 0.14387756892890363, - "grad_norm": 0.6937822103500366, - "learning_rate": 4.796666666666667e-05, - "loss": 0.607, - "step": 4099 - }, - { - "epoch": 0.14391266958002072, - "grad_norm": 0.6176857948303223, - "learning_rate": 4.796481481481482e-05, - "loss": 0.5309, - "step": 4100 - }, - { - "epoch": 0.14394777023113778, - "grad_norm": 0.5041227340698242, - "learning_rate": 4.796296296296296e-05, - "loss": 0.4937, - "step": 4101 - }, - { - "epoch": 0.14398287088225487, - "grad_norm": 0.70311039686203, - "learning_rate": 4.796111111111111e-05, - "loss": 0.5472, - "step": 4102 - }, - { - "epoch": 0.14401797153337195, - "grad_norm": 0.6876373291015625, - "learning_rate": 4.7959259259259256e-05, - "loss": 0.5593, - "step": 4103 - }, - { - "epoch": 0.144053072184489, - "grad_norm": 0.7079889178276062, - "learning_rate": 4.795740740740741e-05, - "loss": 0.642, - "step": 4104 - }, - { - "epoch": 0.1440881728356061, - "grad_norm": 0.6634764671325684, - "learning_rate": 4.7955555555555556e-05, - "loss": 0.3925, - "step": 4105 - }, - { - "epoch": 0.14412327348672319, - "grad_norm": 0.6690143942832947, - "learning_rate": 4.7953703703703707e-05, - "loss": 0.528, - "step": 4106 - }, - { - "epoch": 0.14415837413784025, - "grad_norm": 0.6375990509986877, - "learning_rate": 4.795185185185186e-05, - "loss": 0.5506, - "step": 4107 - }, - { - "epoch": 0.14419347478895733, - "grad_norm": 0.6735506653785706, - "learning_rate": 4.795e-05, - "loss": 0.3887, - "step": 4108 - }, - { - "epoch": 0.14422857544007442, - "grad_norm": 0.7815655469894409, - "learning_rate": 4.794814814814815e-05, - "loss": 0.4734, - "step": 4109 - }, - { - "epoch": 0.14426367609119148, - "grad_norm": 0.5587615966796875, - "learning_rate": 4.79462962962963e-05, - "loss": 0.5104, - "step": 4110 - }, - { - "epoch": 0.14429877674230857, - "grad_norm": 0.5479151010513306, - "learning_rate": 4.794444444444445e-05, - "loss": 0.4933, - "step": 4111 - }, - { - "epoch": 0.14433387739342565, - "grad_norm": 0.6943367123603821, - "learning_rate": 4.7942592592592594e-05, - "loss": 0.4227, - "step": 4112 - }, - { - "epoch": 0.14436897804454274, - "grad_norm": 0.5934616327285767, - "learning_rate": 4.7940740740740744e-05, - "loss": 0.4948, - "step": 4113 - }, - { - "epoch": 0.1444040786956598, - "grad_norm": 0.8651884198188782, - "learning_rate": 4.793888888888889e-05, - "loss": 0.454, - "step": 4114 - }, - { - "epoch": 0.14443917934677689, - "grad_norm": 0.5426145195960999, - "learning_rate": 4.7937037037037044e-05, - "loss": 0.5897, - "step": 4115 - }, - { - "epoch": 0.14447427999789397, - "grad_norm": 0.6783391237258911, - "learning_rate": 4.793518518518519e-05, - "loss": 0.4276, - "step": 4116 - }, - { - "epoch": 0.14450938064901103, - "grad_norm": 0.4976545572280884, - "learning_rate": 4.793333333333334e-05, - "loss": 0.4798, - "step": 4117 - }, - { - "epoch": 0.14454448130012812, - "grad_norm": 0.556978702545166, - "learning_rate": 4.793148148148148e-05, - "loss": 0.6254, - "step": 4118 - }, - { - "epoch": 0.1445795819512452, - "grad_norm": 0.6790574789047241, - "learning_rate": 4.792962962962963e-05, - "loss": 0.4067, - "step": 4119 - }, - { - "epoch": 0.14461468260236227, - "grad_norm": 0.6829715967178345, - "learning_rate": 4.792777777777778e-05, - "loss": 0.5642, - "step": 4120 - }, - { - "epoch": 0.14464978325347935, - "grad_norm": 0.5680938959121704, - "learning_rate": 4.792592592592593e-05, - "loss": 0.4421, - "step": 4121 - }, - { - "epoch": 0.14468488390459644, - "grad_norm": 0.5720838904380798, - "learning_rate": 4.7924074074074074e-05, - "loss": 0.4896, - "step": 4122 - }, - { - "epoch": 0.1447199845557135, - "grad_norm": 0.5445829033851624, - "learning_rate": 4.7922222222222225e-05, - "loss": 0.5911, - "step": 4123 - }, - { - "epoch": 0.14475508520683059, - "grad_norm": 0.5721622705459595, - "learning_rate": 4.792037037037037e-05, - "loss": 0.4768, - "step": 4124 - }, - { - "epoch": 0.14479018585794767, - "grad_norm": 0.6288172602653503, - "learning_rate": 4.791851851851852e-05, - "loss": 0.6138, - "step": 4125 - }, - { - "epoch": 0.14482528650906473, - "grad_norm": 0.530302882194519, - "learning_rate": 4.791666666666667e-05, - "loss": 0.4044, - "step": 4126 - }, - { - "epoch": 0.14486038716018182, - "grad_norm": 0.5889363884925842, - "learning_rate": 4.791481481481482e-05, - "loss": 0.5791, - "step": 4127 - }, - { - "epoch": 0.1448954878112989, - "grad_norm": 0.6393593549728394, - "learning_rate": 4.791296296296297e-05, - "loss": 0.401, - "step": 4128 - }, - { - "epoch": 0.14493058846241597, - "grad_norm": 0.5728458166122437, - "learning_rate": 4.791111111111111e-05, - "loss": 0.5548, - "step": 4129 - }, - { - "epoch": 0.14496568911353305, - "grad_norm": 0.5854986310005188, - "learning_rate": 4.790925925925926e-05, - "loss": 0.5192, - "step": 4130 - }, - { - "epoch": 0.14500078976465014, - "grad_norm": 0.9220300912857056, - "learning_rate": 4.790740740740741e-05, - "loss": 0.6147, - "step": 4131 - }, - { - "epoch": 0.1450358904157672, - "grad_norm": 0.74735027551651, - "learning_rate": 4.790555555555556e-05, - "loss": 0.4441, - "step": 4132 - }, - { - "epoch": 0.1450709910668843, - "grad_norm": 0.5423954129219055, - "learning_rate": 4.7903703703703705e-05, - "loss": 0.4539, - "step": 4133 - }, - { - "epoch": 0.14510609171800137, - "grad_norm": 0.6304336786270142, - "learning_rate": 4.7901851851851855e-05, - "loss": 0.5039, - "step": 4134 - }, - { - "epoch": 0.14514119236911846, - "grad_norm": 0.541031002998352, - "learning_rate": 4.79e-05, - "loss": 0.4081, - "step": 4135 - }, - { - "epoch": 0.14517629302023552, - "grad_norm": 0.5674508810043335, - "learning_rate": 4.7898148148148156e-05, - "loss": 0.45, - "step": 4136 - }, - { - "epoch": 0.1452113936713526, - "grad_norm": 0.6376379132270813, - "learning_rate": 4.78962962962963e-05, - "loss": 0.6566, - "step": 4137 - }, - { - "epoch": 0.1452464943224697, - "grad_norm": 0.7011367678642273, - "learning_rate": 4.789444444444445e-05, - "loss": 0.5183, - "step": 4138 - }, - { - "epoch": 0.14528159497358675, - "grad_norm": 0.61622554063797, - "learning_rate": 4.789259259259259e-05, - "loss": 0.5327, - "step": 4139 - }, - { - "epoch": 0.14531669562470384, - "grad_norm": 0.5766505002975464, - "learning_rate": 4.789074074074074e-05, - "loss": 0.5623, - "step": 4140 - }, - { - "epoch": 0.14535179627582093, - "grad_norm": 0.5818502306938171, - "learning_rate": 4.7888888888888886e-05, - "loss": 0.4902, - "step": 4141 - }, - { - "epoch": 0.145386896926938, - "grad_norm": 0.5887845754623413, - "learning_rate": 4.788703703703704e-05, - "loss": 0.5528, - "step": 4142 - }, - { - "epoch": 0.14542199757805507, - "grad_norm": 0.5349680781364441, - "learning_rate": 4.7885185185185186e-05, - "loss": 0.6081, - "step": 4143 - }, - { - "epoch": 0.14545709822917216, - "grad_norm": 0.5831820964813232, - "learning_rate": 4.7883333333333336e-05, - "loss": 0.5351, - "step": 4144 - }, - { - "epoch": 0.14549219888028922, - "grad_norm": 0.46607303619384766, - "learning_rate": 4.788148148148148e-05, - "loss": 0.489, - "step": 4145 - }, - { - "epoch": 0.1455272995314063, - "grad_norm": 0.6256576180458069, - "learning_rate": 4.787962962962963e-05, - "loss": 0.5472, - "step": 4146 - }, - { - "epoch": 0.1455624001825234, - "grad_norm": 0.5832725763320923, - "learning_rate": 4.787777777777778e-05, - "loss": 0.4946, - "step": 4147 - }, - { - "epoch": 0.14559750083364045, - "grad_norm": 0.5435469150543213, - "learning_rate": 4.787592592592593e-05, - "loss": 0.4819, - "step": 4148 - }, - { - "epoch": 0.14563260148475754, - "grad_norm": 0.5191269516944885, - "learning_rate": 4.787407407407408e-05, - "loss": 0.5441, - "step": 4149 - }, - { - "epoch": 0.14566770213587463, - "grad_norm": 0.5499022006988525, - "learning_rate": 4.787222222222222e-05, - "loss": 0.5059, - "step": 4150 - }, - { - "epoch": 0.1457028027869917, - "grad_norm": 0.5602805018424988, - "learning_rate": 4.7870370370370373e-05, - "loss": 0.4563, - "step": 4151 - }, - { - "epoch": 0.14573790343810877, - "grad_norm": 0.5752518773078918, - "learning_rate": 4.786851851851852e-05, - "loss": 0.5, - "step": 4152 - }, - { - "epoch": 0.14577300408922586, - "grad_norm": 0.5719858407974243, - "learning_rate": 4.7866666666666674e-05, - "loss": 0.5481, - "step": 4153 - }, - { - "epoch": 0.14580810474034292, - "grad_norm": 0.5027099251747131, - "learning_rate": 4.786481481481482e-05, - "loss": 0.4806, - "step": 4154 - }, - { - "epoch": 0.14584320539146, - "grad_norm": 0.5138076543807983, - "learning_rate": 4.786296296296297e-05, - "loss": 0.5307, - "step": 4155 - }, - { - "epoch": 0.1458783060425771, - "grad_norm": 0.4996546506881714, - "learning_rate": 4.786111111111111e-05, - "loss": 0.4885, - "step": 4156 - }, - { - "epoch": 0.14591340669369418, - "grad_norm": 0.5198639035224915, - "learning_rate": 4.785925925925926e-05, - "loss": 0.6155, - "step": 4157 - }, - { - "epoch": 0.14594850734481124, - "grad_norm": 0.5567254424095154, - "learning_rate": 4.785740740740741e-05, - "loss": 0.5945, - "step": 4158 - }, - { - "epoch": 0.14598360799592833, - "grad_norm": 0.5233835577964783, - "learning_rate": 4.785555555555556e-05, - "loss": 0.4959, - "step": 4159 - }, - { - "epoch": 0.14601870864704541, - "grad_norm": 0.5095246434211731, - "learning_rate": 4.7853703703703704e-05, - "loss": 0.4634, - "step": 4160 - }, - { - "epoch": 0.14605380929816247, - "grad_norm": 0.49866506457328796, - "learning_rate": 4.7851851851851854e-05, - "loss": 0.5434, - "step": 4161 - }, - { - "epoch": 0.14608890994927956, - "grad_norm": 0.5539047718048096, - "learning_rate": 4.785e-05, - "loss": 0.5007, - "step": 4162 - }, - { - "epoch": 0.14612401060039665, - "grad_norm": 0.4848668873310089, - "learning_rate": 4.7848148148148154e-05, - "loss": 0.5589, - "step": 4163 - }, - { - "epoch": 0.1461591112515137, - "grad_norm": 0.5992914438247681, - "learning_rate": 4.78462962962963e-05, - "loss": 0.5208, - "step": 4164 - }, - { - "epoch": 0.1461942119026308, - "grad_norm": 0.6023635268211365, - "learning_rate": 4.784444444444445e-05, - "loss": 0.6174, - "step": 4165 - }, - { - "epoch": 0.14622931255374788, - "grad_norm": 0.5887951850891113, - "learning_rate": 4.784259259259259e-05, - "loss": 0.5993, - "step": 4166 - }, - { - "epoch": 0.14626441320486494, - "grad_norm": 0.5555424094200134, - "learning_rate": 4.784074074074074e-05, - "loss": 0.4824, - "step": 4167 - }, - { - "epoch": 0.14629951385598203, - "grad_norm": 0.5109525322914124, - "learning_rate": 4.783888888888889e-05, - "loss": 0.581, - "step": 4168 - }, - { - "epoch": 0.14633461450709911, - "grad_norm": 0.5273104310035706, - "learning_rate": 4.783703703703704e-05, - "loss": 0.4908, - "step": 4169 - }, - { - "epoch": 0.14636971515821617, - "grad_norm": 0.5643690824508667, - "learning_rate": 4.783518518518519e-05, - "loss": 0.445, - "step": 4170 - }, - { - "epoch": 0.14640481580933326, - "grad_norm": 0.5988652110099792, - "learning_rate": 4.7833333333333335e-05, - "loss": 0.6182, - "step": 4171 - }, - { - "epoch": 0.14643991646045035, - "grad_norm": 0.5457183122634888, - "learning_rate": 4.7831481481481485e-05, - "loss": 0.5688, - "step": 4172 - }, - { - "epoch": 0.1464750171115674, - "grad_norm": 0.6261597275733948, - "learning_rate": 4.782962962962963e-05, - "loss": 0.4709, - "step": 4173 - }, - { - "epoch": 0.1465101177626845, - "grad_norm": 0.5617405772209167, - "learning_rate": 4.7827777777777785e-05, - "loss": 0.5783, - "step": 4174 - }, - { - "epoch": 0.14654521841380158, - "grad_norm": 0.5850933194160461, - "learning_rate": 4.782592592592593e-05, - "loss": 0.5075, - "step": 4175 - }, - { - "epoch": 0.14658031906491864, - "grad_norm": 0.5353249311447144, - "learning_rate": 4.782407407407408e-05, - "loss": 0.493, - "step": 4176 - }, - { - "epoch": 0.14661541971603573, - "grad_norm": 0.5877780914306641, - "learning_rate": 4.782222222222222e-05, - "loss": 0.5392, - "step": 4177 - }, - { - "epoch": 0.14665052036715281, - "grad_norm": 0.6304327249526978, - "learning_rate": 4.782037037037037e-05, - "loss": 0.5654, - "step": 4178 - }, - { - "epoch": 0.1466856210182699, - "grad_norm": 0.7305415272712708, - "learning_rate": 4.7818518518518516e-05, - "loss": 0.4407, - "step": 4179 - }, - { - "epoch": 0.14672072166938696, - "grad_norm": 0.6234126687049866, - "learning_rate": 4.781666666666667e-05, - "loss": 0.5749, - "step": 4180 - }, - { - "epoch": 0.14675582232050405, - "grad_norm": 0.699561357498169, - "learning_rate": 4.7814814814814816e-05, - "loss": 0.5208, - "step": 4181 - }, - { - "epoch": 0.14679092297162114, - "grad_norm": 0.7534358501434326, - "learning_rate": 4.7812962962962966e-05, - "loss": 0.5415, - "step": 4182 - }, - { - "epoch": 0.1468260236227382, - "grad_norm": 0.6465868949890137, - "learning_rate": 4.781111111111111e-05, - "loss": 0.4353, - "step": 4183 - }, - { - "epoch": 0.14686112427385528, - "grad_norm": 0.5497856140136719, - "learning_rate": 4.780925925925926e-05, - "loss": 0.5954, - "step": 4184 - }, - { - "epoch": 0.14689622492497237, - "grad_norm": 0.48822852969169617, - "learning_rate": 4.780740740740741e-05, - "loss": 0.5257, - "step": 4185 - }, - { - "epoch": 0.14693132557608943, - "grad_norm": 0.5993344187736511, - "learning_rate": 4.780555555555556e-05, - "loss": 0.45, - "step": 4186 - }, - { - "epoch": 0.14696642622720651, - "grad_norm": 0.6427016258239746, - "learning_rate": 4.78037037037037e-05, - "loss": 0.6504, - "step": 4187 - }, - { - "epoch": 0.1470015268783236, - "grad_norm": 0.6292029023170471, - "learning_rate": 4.780185185185185e-05, - "loss": 0.594, - "step": 4188 - }, - { - "epoch": 0.14703662752944066, - "grad_norm": 0.46044081449508667, - "learning_rate": 4.78e-05, - "loss": 0.5744, - "step": 4189 - }, - { - "epoch": 0.14707172818055775, - "grad_norm": 0.6222397685050964, - "learning_rate": 4.779814814814815e-05, - "loss": 0.4489, - "step": 4190 - }, - { - "epoch": 0.14710682883167484, - "grad_norm": 0.5028244256973267, - "learning_rate": 4.77962962962963e-05, - "loss": 0.6047, - "step": 4191 - }, - { - "epoch": 0.1471419294827919, - "grad_norm": 0.5392513275146484, - "learning_rate": 4.779444444444445e-05, - "loss": 0.487, - "step": 4192 - }, - { - "epoch": 0.14717703013390898, - "grad_norm": 0.5957140922546387, - "learning_rate": 4.77925925925926e-05, - "loss": 0.4353, - "step": 4193 - }, - { - "epoch": 0.14721213078502607, - "grad_norm": 0.4817187190055847, - "learning_rate": 4.779074074074074e-05, - "loss": 0.4209, - "step": 4194 - }, - { - "epoch": 0.14724723143614313, - "grad_norm": 0.6032044887542725, - "learning_rate": 4.778888888888889e-05, - "loss": 0.5264, - "step": 4195 - }, - { - "epoch": 0.14728233208726021, - "grad_norm": 0.5317597389221191, - "learning_rate": 4.778703703703704e-05, - "loss": 0.4234, - "step": 4196 - }, - { - "epoch": 0.1473174327383773, - "grad_norm": 0.6230401396751404, - "learning_rate": 4.778518518518519e-05, - "loss": 0.5719, - "step": 4197 - }, - { - "epoch": 0.1473525333894944, - "grad_norm": 0.6174208521842957, - "learning_rate": 4.7783333333333334e-05, - "loss": 0.5012, - "step": 4198 - }, - { - "epoch": 0.14738763404061145, - "grad_norm": 0.60752272605896, - "learning_rate": 4.7781481481481484e-05, - "loss": 0.4566, - "step": 4199 - }, - { - "epoch": 0.14742273469172854, - "grad_norm": 0.6075285077095032, - "learning_rate": 4.777962962962963e-05, - "loss": 0.5882, - "step": 4200 - }, - { - "epoch": 0.14745783534284562, - "grad_norm": 0.5333372950553894, - "learning_rate": 4.7777777777777784e-05, - "loss": 0.575, - "step": 4201 - }, - { - "epoch": 0.14749293599396268, - "grad_norm": 0.5506235957145691, - "learning_rate": 4.777592592592593e-05, - "loss": 0.4574, - "step": 4202 - }, - { - "epoch": 0.14752803664507977, - "grad_norm": 0.5149475932121277, - "learning_rate": 4.777407407407408e-05, - "loss": 0.4324, - "step": 4203 - }, - { - "epoch": 0.14756313729619686, - "grad_norm": 0.5493608117103577, - "learning_rate": 4.777222222222222e-05, - "loss": 0.4732, - "step": 4204 - }, - { - "epoch": 0.14759823794731391, - "grad_norm": 0.5196369886398315, - "learning_rate": 4.777037037037037e-05, - "loss": 0.5506, - "step": 4205 - }, - { - "epoch": 0.147633338598431, - "grad_norm": 0.7549602389335632, - "learning_rate": 4.776851851851852e-05, - "loss": 0.5243, - "step": 4206 - }, - { - "epoch": 0.1476684392495481, - "grad_norm": 0.6102510094642639, - "learning_rate": 4.776666666666667e-05, - "loss": 0.6758, - "step": 4207 - }, - { - "epoch": 0.14770353990066515, - "grad_norm": 0.6577208042144775, - "learning_rate": 4.7764814814814815e-05, - "loss": 0.5233, - "step": 4208 - }, - { - "epoch": 0.14773864055178224, - "grad_norm": 0.9343298673629761, - "learning_rate": 4.7762962962962965e-05, - "loss": 0.6102, - "step": 4209 - }, - { - "epoch": 0.14777374120289932, - "grad_norm": 0.5170096755027771, - "learning_rate": 4.7761111111111115e-05, - "loss": 0.4963, - "step": 4210 - }, - { - "epoch": 0.14780884185401638, - "grad_norm": 0.5199185609817505, - "learning_rate": 4.775925925925926e-05, - "loss": 0.67, - "step": 4211 - }, - { - "epoch": 0.14784394250513347, - "grad_norm": 0.5156704187393188, - "learning_rate": 4.7757407407407415e-05, - "loss": 0.3916, - "step": 4212 - }, - { - "epoch": 0.14787904315625056, - "grad_norm": 0.44110095500946045, - "learning_rate": 4.775555555555556e-05, - "loss": 0.435, - "step": 4213 - }, - { - "epoch": 0.14791414380736762, - "grad_norm": 0.5087369680404663, - "learning_rate": 4.775370370370371e-05, - "loss": 0.5922, - "step": 4214 - }, - { - "epoch": 0.1479492444584847, - "grad_norm": 0.5452232360839844, - "learning_rate": 4.775185185185185e-05, - "loss": 0.4284, - "step": 4215 - }, - { - "epoch": 0.1479843451096018, - "grad_norm": 0.6343801617622375, - "learning_rate": 4.775e-05, - "loss": 0.5865, - "step": 4216 - }, - { - "epoch": 0.14801944576071885, - "grad_norm": 0.6904962062835693, - "learning_rate": 4.774814814814815e-05, - "loss": 0.5682, - "step": 4217 - }, - { - "epoch": 0.14805454641183594, - "grad_norm": 0.7238724231719971, - "learning_rate": 4.77462962962963e-05, - "loss": 0.6153, - "step": 4218 - }, - { - "epoch": 0.14808964706295302, - "grad_norm": 0.4908079504966736, - "learning_rate": 4.7744444444444445e-05, - "loss": 0.4825, - "step": 4219 - }, - { - "epoch": 0.1481247477140701, - "grad_norm": 0.8570850491523743, - "learning_rate": 4.7742592592592596e-05, - "loss": 0.5172, - "step": 4220 - }, - { - "epoch": 0.14815984836518717, - "grad_norm": 0.671565055847168, - "learning_rate": 4.774074074074074e-05, - "loss": 0.4622, - "step": 4221 - }, - { - "epoch": 0.14819494901630426, - "grad_norm": 0.4949866235256195, - "learning_rate": 4.773888888888889e-05, - "loss": 0.4621, - "step": 4222 - }, - { - "epoch": 0.14823004966742134, - "grad_norm": 0.5010828375816345, - "learning_rate": 4.773703703703704e-05, - "loss": 0.5489, - "step": 4223 - }, - { - "epoch": 0.1482651503185384, - "grad_norm": 0.5476987957954407, - "learning_rate": 4.773518518518519e-05, - "loss": 0.5012, - "step": 4224 - }, - { - "epoch": 0.1483002509696555, - "grad_norm": 0.5424070358276367, - "learning_rate": 4.773333333333333e-05, - "loss": 0.6209, - "step": 4225 - }, - { - "epoch": 0.14833535162077258, - "grad_norm": 0.6527873873710632, - "learning_rate": 4.773148148148148e-05, - "loss": 0.5899, - "step": 4226 - }, - { - "epoch": 0.14837045227188964, - "grad_norm": 0.5647996068000793, - "learning_rate": 4.772962962962963e-05, - "loss": 0.409, - "step": 4227 - }, - { - "epoch": 0.14840555292300672, - "grad_norm": 0.6247770190238953, - "learning_rate": 4.772777777777778e-05, - "loss": 0.5564, - "step": 4228 - }, - { - "epoch": 0.1484406535741238, - "grad_norm": 0.5242825746536255, - "learning_rate": 4.7725925925925926e-05, - "loss": 0.4947, - "step": 4229 - }, - { - "epoch": 0.14847575422524087, - "grad_norm": 0.5363615155220032, - "learning_rate": 4.7724074074074076e-05, - "loss": 0.4633, - "step": 4230 - }, - { - "epoch": 0.14851085487635796, - "grad_norm": 0.5576969385147095, - "learning_rate": 4.7722222222222226e-05, - "loss": 0.505, - "step": 4231 - }, - { - "epoch": 0.14854595552747504, - "grad_norm": 0.6427892446517944, - "learning_rate": 4.772037037037037e-05, - "loss": 0.5496, - "step": 4232 - }, - { - "epoch": 0.1485810561785921, - "grad_norm": 0.5802056789398193, - "learning_rate": 4.771851851851853e-05, - "loss": 0.5212, - "step": 4233 - }, - { - "epoch": 0.1486161568297092, - "grad_norm": 0.6896734833717346, - "learning_rate": 4.771666666666667e-05, - "loss": 0.5386, - "step": 4234 - }, - { - "epoch": 0.14865125748082628, - "grad_norm": 0.5142319798469543, - "learning_rate": 4.771481481481482e-05, - "loss": 0.5771, - "step": 4235 - }, - { - "epoch": 0.14868635813194334, - "grad_norm": 0.5454700589179993, - "learning_rate": 4.7712962962962963e-05, - "loss": 0.5749, - "step": 4236 - }, - { - "epoch": 0.14872145878306042, - "grad_norm": 0.5649691224098206, - "learning_rate": 4.7711111111111114e-05, - "loss": 0.5284, - "step": 4237 - }, - { - "epoch": 0.1487565594341775, - "grad_norm": 0.5258809328079224, - "learning_rate": 4.770925925925926e-05, - "loss": 0.4902, - "step": 4238 - }, - { - "epoch": 0.14879166008529457, - "grad_norm": 0.6009002327919006, - "learning_rate": 4.7707407407407414e-05, - "loss": 0.5299, - "step": 4239 - }, - { - "epoch": 0.14882676073641166, - "grad_norm": 0.4949319660663605, - "learning_rate": 4.770555555555556e-05, - "loss": 0.5792, - "step": 4240 - }, - { - "epoch": 0.14886186138752874, - "grad_norm": 0.5057190656661987, - "learning_rate": 4.770370370370371e-05, - "loss": 0.47, - "step": 4241 - }, - { - "epoch": 0.14889696203864583, - "grad_norm": 0.6319621801376343, - "learning_rate": 4.770185185185185e-05, - "loss": 0.56, - "step": 4242 - }, - { - "epoch": 0.1489320626897629, - "grad_norm": 0.581890344619751, - "learning_rate": 4.77e-05, - "loss": 0.5768, - "step": 4243 - }, - { - "epoch": 0.14896716334087998, - "grad_norm": 0.5260502696037292, - "learning_rate": 4.769814814814815e-05, - "loss": 0.4053, - "step": 4244 - }, - { - "epoch": 0.14900226399199706, - "grad_norm": 0.5179502367973328, - "learning_rate": 4.76962962962963e-05, - "loss": 0.5088, - "step": 4245 - }, - { - "epoch": 0.14903736464311412, - "grad_norm": 0.5116079449653625, - "learning_rate": 4.7694444444444444e-05, - "loss": 0.5132, - "step": 4246 - }, - { - "epoch": 0.1490724652942312, - "grad_norm": 0.5283390879631042, - "learning_rate": 4.7692592592592594e-05, - "loss": 0.5945, - "step": 4247 - }, - { - "epoch": 0.1491075659453483, - "grad_norm": 0.5798718929290771, - "learning_rate": 4.7690740740740744e-05, - "loss": 0.483, - "step": 4248 - }, - { - "epoch": 0.14914266659646536, - "grad_norm": 0.6317808628082275, - "learning_rate": 4.768888888888889e-05, - "loss": 0.5531, - "step": 4249 - }, - { - "epoch": 0.14917776724758244, - "grad_norm": 0.499236524105072, - "learning_rate": 4.768703703703704e-05, - "loss": 0.5634, - "step": 4250 - }, - { - "epoch": 0.14921286789869953, - "grad_norm": 0.5364636778831482, - "learning_rate": 4.768518518518519e-05, - "loss": 0.4105, - "step": 4251 - }, - { - "epoch": 0.1492479685498166, - "grad_norm": 0.487353652715683, - "learning_rate": 4.768333333333334e-05, - "loss": 0.5156, - "step": 4252 - }, - { - "epoch": 0.14928306920093368, - "grad_norm": 0.520988941192627, - "learning_rate": 4.768148148148148e-05, - "loss": 0.5599, - "step": 4253 - }, - { - "epoch": 0.14931816985205076, - "grad_norm": 0.7954883575439453, - "learning_rate": 4.767962962962963e-05, - "loss": 0.6138, - "step": 4254 - }, - { - "epoch": 0.14935327050316782, - "grad_norm": 0.562747061252594, - "learning_rate": 4.767777777777778e-05, - "loss": 0.5038, - "step": 4255 - }, - { - "epoch": 0.1493883711542849, - "grad_norm": 0.5549457669258118, - "learning_rate": 4.767592592592593e-05, - "loss": 0.4252, - "step": 4256 - }, - { - "epoch": 0.149423471805402, - "grad_norm": 0.5382993817329407, - "learning_rate": 4.7674074074074075e-05, - "loss": 0.394, - "step": 4257 - }, - { - "epoch": 0.14945857245651906, - "grad_norm": 0.5350244641304016, - "learning_rate": 4.7672222222222225e-05, - "loss": 0.6122, - "step": 4258 - }, - { - "epoch": 0.14949367310763614, - "grad_norm": 0.545103907585144, - "learning_rate": 4.767037037037037e-05, - "loss": 0.4292, - "step": 4259 - }, - { - "epoch": 0.14952877375875323, - "grad_norm": 0.5937192440032959, - "learning_rate": 4.7668518518518525e-05, - "loss": 0.5481, - "step": 4260 - }, - { - "epoch": 0.1495638744098703, - "grad_norm": 0.567855715751648, - "learning_rate": 4.766666666666667e-05, - "loss": 0.5537, - "step": 4261 - }, - { - "epoch": 0.14959897506098738, - "grad_norm": 0.5346759557723999, - "learning_rate": 4.766481481481482e-05, - "loss": 0.5923, - "step": 4262 - }, - { - "epoch": 0.14963407571210446, - "grad_norm": 0.5570108890533447, - "learning_rate": 4.766296296296296e-05, - "loss": 0.4797, - "step": 4263 - }, - { - "epoch": 0.14966917636322155, - "grad_norm": 0.41906189918518066, - "learning_rate": 4.766111111111111e-05, - "loss": 0.4378, - "step": 4264 - }, - { - "epoch": 0.1497042770143386, - "grad_norm": 0.4508787989616394, - "learning_rate": 4.7659259259259256e-05, - "loss": 0.4948, - "step": 4265 - }, - { - "epoch": 0.1497393776654557, - "grad_norm": 0.6118481159210205, - "learning_rate": 4.765740740740741e-05, - "loss": 0.5407, - "step": 4266 - }, - { - "epoch": 0.14977447831657278, - "grad_norm": 0.5683364868164062, - "learning_rate": 4.7655555555555556e-05, - "loss": 0.4847, - "step": 4267 - }, - { - "epoch": 0.14980957896768984, - "grad_norm": 0.5080904364585876, - "learning_rate": 4.7653703703703706e-05, - "loss": 0.4075, - "step": 4268 - }, - { - "epoch": 0.14984467961880693, - "grad_norm": 0.5526103973388672, - "learning_rate": 4.7651851851851856e-05, - "loss": 0.4754, - "step": 4269 - }, - { - "epoch": 0.14987978026992402, - "grad_norm": 0.6461148262023926, - "learning_rate": 4.765e-05, - "loss": 0.4549, - "step": 4270 - }, - { - "epoch": 0.14991488092104108, - "grad_norm": 0.5708556175231934, - "learning_rate": 4.764814814814815e-05, - "loss": 0.5918, - "step": 4271 - }, - { - "epoch": 0.14994998157215816, - "grad_norm": 0.5349725484848022, - "learning_rate": 4.76462962962963e-05, - "loss": 0.5823, - "step": 4272 - }, - { - "epoch": 0.14998508222327525, - "grad_norm": 0.5786852240562439, - "learning_rate": 4.764444444444445e-05, - "loss": 0.4953, - "step": 4273 - }, - { - "epoch": 0.1500201828743923, - "grad_norm": 0.5324084758758545, - "learning_rate": 4.764259259259259e-05, - "loss": 0.4964, - "step": 4274 - }, - { - "epoch": 0.1500552835255094, - "grad_norm": 0.610918402671814, - "learning_rate": 4.764074074074074e-05, - "loss": 0.5483, - "step": 4275 - }, - { - "epoch": 0.15009038417662648, - "grad_norm": 0.5298445224761963, - "learning_rate": 4.7638888888888887e-05, - "loss": 0.4798, - "step": 4276 - }, - { - "epoch": 0.15012548482774354, - "grad_norm": 0.5437801480293274, - "learning_rate": 4.7637037037037043e-05, - "loss": 0.5608, - "step": 4277 - }, - { - "epoch": 0.15016058547886063, - "grad_norm": 0.47082895040512085, - "learning_rate": 4.763518518518519e-05, - "loss": 0.4293, - "step": 4278 - }, - { - "epoch": 0.15019568612997772, - "grad_norm": 0.5155958533287048, - "learning_rate": 4.763333333333334e-05, - "loss": 0.5174, - "step": 4279 - }, - { - "epoch": 0.15023078678109478, - "grad_norm": 0.5823724865913391, - "learning_rate": 4.763148148148148e-05, - "loss": 0.523, - "step": 4280 - }, - { - "epoch": 0.15026588743221186, - "grad_norm": 0.6216751337051392, - "learning_rate": 4.762962962962963e-05, - "loss": 0.5957, - "step": 4281 - }, - { - "epoch": 0.15030098808332895, - "grad_norm": 0.5731251835823059, - "learning_rate": 4.762777777777778e-05, - "loss": 0.6681, - "step": 4282 - }, - { - "epoch": 0.150336088734446, - "grad_norm": 0.6210417151451111, - "learning_rate": 4.762592592592593e-05, - "loss": 0.5331, - "step": 4283 - }, - { - "epoch": 0.1503711893855631, - "grad_norm": 0.4891050457954407, - "learning_rate": 4.7624074074074074e-05, - "loss": 0.5082, - "step": 4284 - }, - { - "epoch": 0.15040629003668018, - "grad_norm": 0.514545738697052, - "learning_rate": 4.7622222222222224e-05, - "loss": 0.5388, - "step": 4285 - }, - { - "epoch": 0.15044139068779727, - "grad_norm": 0.5592148900032043, - "learning_rate": 4.762037037037037e-05, - "loss": 0.4829, - "step": 4286 - }, - { - "epoch": 0.15047649133891433, - "grad_norm": 0.5522032380104065, - "learning_rate": 4.7618518518518524e-05, - "loss": 0.5524, - "step": 4287 - }, - { - "epoch": 0.15051159199003142, - "grad_norm": 0.5606949925422668, - "learning_rate": 4.761666666666667e-05, - "loss": 0.5274, - "step": 4288 - }, - { - "epoch": 0.1505466926411485, - "grad_norm": 0.491632878780365, - "learning_rate": 4.761481481481482e-05, - "loss": 0.5603, - "step": 4289 - }, - { - "epoch": 0.15058179329226556, - "grad_norm": 0.49599534273147583, - "learning_rate": 4.761296296296297e-05, - "loss": 0.4232, - "step": 4290 - }, - { - "epoch": 0.15061689394338265, - "grad_norm": 0.5272189378738403, - "learning_rate": 4.761111111111111e-05, - "loss": 0.5593, - "step": 4291 - }, - { - "epoch": 0.15065199459449974, - "grad_norm": 0.6413161754608154, - "learning_rate": 4.760925925925926e-05, - "loss": 0.5312, - "step": 4292 - }, - { - "epoch": 0.1506870952456168, - "grad_norm": 0.5244085788726807, - "learning_rate": 4.760740740740741e-05, - "loss": 0.4937, - "step": 4293 - }, - { - "epoch": 0.15072219589673388, - "grad_norm": 0.5622462630271912, - "learning_rate": 4.760555555555556e-05, - "loss": 0.523, - "step": 4294 - }, - { - "epoch": 0.15075729654785097, - "grad_norm": 0.5382457971572876, - "learning_rate": 4.7603703703703705e-05, - "loss": 0.5743, - "step": 4295 - }, - { - "epoch": 0.15079239719896803, - "grad_norm": 0.5125522017478943, - "learning_rate": 4.7601851851851855e-05, - "loss": 0.4911, - "step": 4296 - }, - { - "epoch": 0.15082749785008512, - "grad_norm": 0.5675469040870667, - "learning_rate": 4.76e-05, - "loss": 0.5463, - "step": 4297 - }, - { - "epoch": 0.1508625985012022, - "grad_norm": 0.6164263486862183, - "learning_rate": 4.7598148148148155e-05, - "loss": 0.5511, - "step": 4298 - }, - { - "epoch": 0.15089769915231926, - "grad_norm": 0.5300889611244202, - "learning_rate": 4.75962962962963e-05, - "loss": 0.4681, - "step": 4299 - }, - { - "epoch": 0.15093279980343635, - "grad_norm": 0.6169356107711792, - "learning_rate": 4.759444444444445e-05, - "loss": 0.5914, - "step": 4300 - }, - { - "epoch": 0.15096790045455344, - "grad_norm": 0.5275663733482361, - "learning_rate": 4.759259259259259e-05, - "loss": 0.5077, - "step": 4301 - }, - { - "epoch": 0.1510030011056705, - "grad_norm": 0.6203058362007141, - "learning_rate": 4.759074074074074e-05, - "loss": 0.5813, - "step": 4302 - }, - { - "epoch": 0.15103810175678758, - "grad_norm": 0.5499642491340637, - "learning_rate": 4.7588888888888885e-05, - "loss": 0.4697, - "step": 4303 - }, - { - "epoch": 0.15107320240790467, - "grad_norm": 0.5465743541717529, - "learning_rate": 4.758703703703704e-05, - "loss": 0.4581, - "step": 4304 - }, - { - "epoch": 0.15110830305902173, - "grad_norm": 0.5471811294555664, - "learning_rate": 4.7585185185185186e-05, - "loss": 0.6151, - "step": 4305 - }, - { - "epoch": 0.15114340371013882, - "grad_norm": 0.5036072134971619, - "learning_rate": 4.7583333333333336e-05, - "loss": 0.497, - "step": 4306 - }, - { - "epoch": 0.1511785043612559, - "grad_norm": 0.6597816348075867, - "learning_rate": 4.758148148148148e-05, - "loss": 0.4935, - "step": 4307 - }, - { - "epoch": 0.151213605012373, - "grad_norm": 0.5923940539360046, - "learning_rate": 4.757962962962963e-05, - "loss": 0.6352, - "step": 4308 - }, - { - "epoch": 0.15124870566349005, - "grad_norm": 0.6168772578239441, - "learning_rate": 4.757777777777778e-05, - "loss": 0.5097, - "step": 4309 - }, - { - "epoch": 0.15128380631460714, - "grad_norm": 0.5804077386856079, - "learning_rate": 4.757592592592593e-05, - "loss": 0.5213, - "step": 4310 - }, - { - "epoch": 0.15131890696572423, - "grad_norm": 0.5188911557197571, - "learning_rate": 4.757407407407408e-05, - "loss": 0.4251, - "step": 4311 - }, - { - "epoch": 0.15135400761684129, - "grad_norm": 0.5256282091140747, - "learning_rate": 4.757222222222222e-05, - "loss": 0.4549, - "step": 4312 - }, - { - "epoch": 0.15138910826795837, - "grad_norm": 0.6465599536895752, - "learning_rate": 4.757037037037037e-05, - "loss": 0.5726, - "step": 4313 - }, - { - "epoch": 0.15142420891907546, - "grad_norm": 0.6010935306549072, - "learning_rate": 4.756851851851852e-05, - "loss": 0.6166, - "step": 4314 - }, - { - "epoch": 0.15145930957019252, - "grad_norm": 0.47138699889183044, - "learning_rate": 4.756666666666667e-05, - "loss": 0.5072, - "step": 4315 - }, - { - "epoch": 0.1514944102213096, - "grad_norm": 0.5258758068084717, - "learning_rate": 4.7564814814814816e-05, - "loss": 0.4456, - "step": 4316 - }, - { - "epoch": 0.1515295108724267, - "grad_norm": 0.587745189666748, - "learning_rate": 4.7562962962962967e-05, - "loss": 0.5547, - "step": 4317 - }, - { - "epoch": 0.15156461152354375, - "grad_norm": 0.5549163818359375, - "learning_rate": 4.756111111111111e-05, - "loss": 0.5666, - "step": 4318 - }, - { - "epoch": 0.15159971217466084, - "grad_norm": 0.6495235562324524, - "learning_rate": 4.755925925925926e-05, - "loss": 0.5281, - "step": 4319 - }, - { - "epoch": 0.15163481282577793, - "grad_norm": 0.5113397836685181, - "learning_rate": 4.755740740740741e-05, - "loss": 0.4587, - "step": 4320 - }, - { - "epoch": 0.15166991347689499, - "grad_norm": 0.5816278457641602, - "learning_rate": 4.755555555555556e-05, - "loss": 0.5429, - "step": 4321 - }, - { - "epoch": 0.15170501412801207, - "grad_norm": 0.5777736902236938, - "learning_rate": 4.7553703703703704e-05, - "loss": 0.5359, - "step": 4322 - }, - { - "epoch": 0.15174011477912916, - "grad_norm": 0.5644968152046204, - "learning_rate": 4.7551851851851854e-05, - "loss": 0.545, - "step": 4323 - }, - { - "epoch": 0.15177521543024622, - "grad_norm": 0.5671641826629639, - "learning_rate": 4.755e-05, - "loss": 0.5669, - "step": 4324 - }, - { - "epoch": 0.1518103160813633, - "grad_norm": 0.565897524356842, - "learning_rate": 4.7548148148148154e-05, - "loss": 0.5145, - "step": 4325 - }, - { - "epoch": 0.1518454167324804, - "grad_norm": 0.6530748009681702, - "learning_rate": 4.75462962962963e-05, - "loss": 0.4295, - "step": 4326 - }, - { - "epoch": 0.15188051738359745, - "grad_norm": 0.6302769780158997, - "learning_rate": 4.754444444444445e-05, - "loss": 0.5712, - "step": 4327 - }, - { - "epoch": 0.15191561803471454, - "grad_norm": 0.6368261575698853, - "learning_rate": 4.75425925925926e-05, - "loss": 0.4935, - "step": 4328 - }, - { - "epoch": 0.15195071868583163, - "grad_norm": 0.5869085192680359, - "learning_rate": 4.754074074074074e-05, - "loss": 0.578, - "step": 4329 - }, - { - "epoch": 0.1519858193369487, - "grad_norm": 0.5786399245262146, - "learning_rate": 4.753888888888889e-05, - "loss": 0.5387, - "step": 4330 - }, - { - "epoch": 0.15202091998806577, - "grad_norm": 0.6224579215049744, - "learning_rate": 4.753703703703704e-05, - "loss": 0.547, - "step": 4331 - }, - { - "epoch": 0.15205602063918286, - "grad_norm": 0.529902994632721, - "learning_rate": 4.753518518518519e-05, - "loss": 0.4628, - "step": 4332 - }, - { - "epoch": 0.15209112129029995, - "grad_norm": 0.5687528848648071, - "learning_rate": 4.7533333333333334e-05, - "loss": 0.5659, - "step": 4333 - }, - { - "epoch": 0.152126221941417, - "grad_norm": 0.5980611443519592, - "learning_rate": 4.7531481481481485e-05, - "loss": 0.5419, - "step": 4334 - }, - { - "epoch": 0.1521613225925341, - "grad_norm": 0.5186651945114136, - "learning_rate": 4.752962962962963e-05, - "loss": 0.4189, - "step": 4335 - }, - { - "epoch": 0.15219642324365118, - "grad_norm": 0.6227090358734131, - "learning_rate": 4.7527777777777785e-05, - "loss": 0.5358, - "step": 4336 - }, - { - "epoch": 0.15223152389476824, - "grad_norm": 0.5085787773132324, - "learning_rate": 4.752592592592593e-05, - "loss": 0.4336, - "step": 4337 - }, - { - "epoch": 0.15226662454588533, - "grad_norm": 0.7299818396568298, - "learning_rate": 4.752407407407408e-05, - "loss": 0.4882, - "step": 4338 - }, - { - "epoch": 0.1523017251970024, - "grad_norm": 0.5969195365905762, - "learning_rate": 4.752222222222222e-05, - "loss": 0.5167, - "step": 4339 - }, - { - "epoch": 0.15233682584811947, - "grad_norm": 0.6834296584129333, - "learning_rate": 4.752037037037037e-05, - "loss": 0.5791, - "step": 4340 - }, - { - "epoch": 0.15237192649923656, - "grad_norm": 0.566085159778595, - "learning_rate": 4.751851851851852e-05, - "loss": 0.4763, - "step": 4341 - }, - { - "epoch": 0.15240702715035365, - "grad_norm": 0.5602989196777344, - "learning_rate": 4.751666666666667e-05, - "loss": 0.5313, - "step": 4342 - }, - { - "epoch": 0.1524421278014707, - "grad_norm": 0.5410272479057312, - "learning_rate": 4.7514814814814815e-05, - "loss": 0.5118, - "step": 4343 - }, - { - "epoch": 0.1524772284525878, - "grad_norm": 0.5023764967918396, - "learning_rate": 4.7512962962962965e-05, - "loss": 0.5197, - "step": 4344 - }, - { - "epoch": 0.15251232910370488, - "grad_norm": 0.7301772832870483, - "learning_rate": 4.751111111111111e-05, - "loss": 0.5806, - "step": 4345 - }, - { - "epoch": 0.15254742975482194, - "grad_norm": 0.47711098194122314, - "learning_rate": 4.750925925925926e-05, - "loss": 0.5623, - "step": 4346 - }, - { - "epoch": 0.15258253040593903, - "grad_norm": 0.6636120080947876, - "learning_rate": 4.750740740740741e-05, - "loss": 0.5049, - "step": 4347 - }, - { - "epoch": 0.1526176310570561, - "grad_norm": 0.4879971146583557, - "learning_rate": 4.750555555555556e-05, - "loss": 0.4504, - "step": 4348 - }, - { - "epoch": 0.1526527317081732, - "grad_norm": 0.5433371663093567, - "learning_rate": 4.750370370370371e-05, - "loss": 0.5789, - "step": 4349 - }, - { - "epoch": 0.15268783235929026, - "grad_norm": 0.6426008939743042, - "learning_rate": 4.750185185185185e-05, - "loss": 0.5779, - "step": 4350 - }, - { - "epoch": 0.15272293301040735, - "grad_norm": 0.6078594923019409, - "learning_rate": 4.75e-05, - "loss": 0.4153, - "step": 4351 - }, - { - "epoch": 0.15275803366152443, - "grad_norm": 0.6219464540481567, - "learning_rate": 4.749814814814815e-05, - "loss": 0.6222, - "step": 4352 - }, - { - "epoch": 0.1527931343126415, - "grad_norm": 0.5122820138931274, - "learning_rate": 4.74962962962963e-05, - "loss": 0.5159, - "step": 4353 - }, - { - "epoch": 0.15282823496375858, - "grad_norm": 0.5022334456443787, - "learning_rate": 4.7494444444444446e-05, - "loss": 0.5728, - "step": 4354 - }, - { - "epoch": 0.15286333561487567, - "grad_norm": 0.4950287640094757, - "learning_rate": 4.7492592592592596e-05, - "loss": 0.4386, - "step": 4355 - }, - { - "epoch": 0.15289843626599273, - "grad_norm": 0.5435306429862976, - "learning_rate": 4.749074074074074e-05, - "loss": 0.4922, - "step": 4356 - }, - { - "epoch": 0.1529335369171098, - "grad_norm": 0.44357696175575256, - "learning_rate": 4.7488888888888897e-05, - "loss": 0.4785, - "step": 4357 - }, - { - "epoch": 0.1529686375682269, - "grad_norm": 0.644426167011261, - "learning_rate": 4.748703703703704e-05, - "loss": 0.6115, - "step": 4358 - }, - { - "epoch": 0.15300373821934396, - "grad_norm": 0.5146259069442749, - "learning_rate": 4.748518518518519e-05, - "loss": 0.4378, - "step": 4359 - }, - { - "epoch": 0.15303883887046105, - "grad_norm": 0.6929392218589783, - "learning_rate": 4.748333333333333e-05, - "loss": 0.4205, - "step": 4360 - }, - { - "epoch": 0.15307393952157813, - "grad_norm": 0.5309625864028931, - "learning_rate": 4.7481481481481483e-05, - "loss": 0.5661, - "step": 4361 - }, - { - "epoch": 0.1531090401726952, - "grad_norm": 0.5513799786567688, - "learning_rate": 4.747962962962963e-05, - "loss": 0.5628, - "step": 4362 - }, - { - "epoch": 0.15314414082381228, - "grad_norm": 1.0017426013946533, - "learning_rate": 4.7477777777777784e-05, - "loss": 0.3399, - "step": 4363 - }, - { - "epoch": 0.15317924147492937, - "grad_norm": 0.5196968913078308, - "learning_rate": 4.747592592592593e-05, - "loss": 0.6158, - "step": 4364 - }, - { - "epoch": 0.15321434212604643, - "grad_norm": 0.6828232407569885, - "learning_rate": 4.747407407407408e-05, - "loss": 0.5632, - "step": 4365 - }, - { - "epoch": 0.1532494427771635, - "grad_norm": 0.5636123418807983, - "learning_rate": 4.747222222222222e-05, - "loss": 0.5859, - "step": 4366 - }, - { - "epoch": 0.1532845434282806, - "grad_norm": 0.5773395895957947, - "learning_rate": 4.747037037037037e-05, - "loss": 0.4819, - "step": 4367 - }, - { - "epoch": 0.15331964407939766, - "grad_norm": 0.5954561829566956, - "learning_rate": 4.746851851851852e-05, - "loss": 0.511, - "step": 4368 - }, - { - "epoch": 0.15335474473051475, - "grad_norm": 0.5350987315177917, - "learning_rate": 4.746666666666667e-05, - "loss": 0.5614, - "step": 4369 - }, - { - "epoch": 0.15338984538163183, - "grad_norm": 0.5593942999839783, - "learning_rate": 4.746481481481482e-05, - "loss": 0.5358, - "step": 4370 - }, - { - "epoch": 0.15342494603274892, - "grad_norm": 0.518413782119751, - "learning_rate": 4.7462962962962964e-05, - "loss": 0.5205, - "step": 4371 - }, - { - "epoch": 0.15346004668386598, - "grad_norm": 0.63504958152771, - "learning_rate": 4.7461111111111114e-05, - "loss": 0.6281, - "step": 4372 - }, - { - "epoch": 0.15349514733498307, - "grad_norm": 0.6380411982536316, - "learning_rate": 4.745925925925926e-05, - "loss": 0.5477, - "step": 4373 - }, - { - "epoch": 0.15353024798610015, - "grad_norm": 0.8126311898231506, - "learning_rate": 4.7457407407407415e-05, - "loss": 0.4386, - "step": 4374 - }, - { - "epoch": 0.1535653486372172, - "grad_norm": 0.5983181595802307, - "learning_rate": 4.745555555555556e-05, - "loss": 0.4562, - "step": 4375 - }, - { - "epoch": 0.1536004492883343, - "grad_norm": 0.5940490961074829, - "learning_rate": 4.745370370370371e-05, - "loss": 0.578, - "step": 4376 - }, - { - "epoch": 0.1536355499394514, - "grad_norm": 0.5229434967041016, - "learning_rate": 4.745185185185185e-05, - "loss": 0.5617, - "step": 4377 - }, - { - "epoch": 0.15367065059056845, - "grad_norm": 0.6245594620704651, - "learning_rate": 4.745e-05, - "loss": 0.4609, - "step": 4378 - }, - { - "epoch": 0.15370575124168553, - "grad_norm": 0.6799649596214294, - "learning_rate": 4.744814814814815e-05, - "loss": 0.5898, - "step": 4379 - }, - { - "epoch": 0.15374085189280262, - "grad_norm": 0.7345729470252991, - "learning_rate": 4.74462962962963e-05, - "loss": 0.5901, - "step": 4380 - }, - { - "epoch": 0.15377595254391968, - "grad_norm": 0.4729119539260864, - "learning_rate": 4.7444444444444445e-05, - "loss": 0.4988, - "step": 4381 - }, - { - "epoch": 0.15381105319503677, - "grad_norm": 0.5724261403083801, - "learning_rate": 4.7442592592592595e-05, - "loss": 0.6279, - "step": 4382 - }, - { - "epoch": 0.15384615384615385, - "grad_norm": 0.5350021123886108, - "learning_rate": 4.744074074074074e-05, - "loss": 0.5304, - "step": 4383 - }, - { - "epoch": 0.15388125449727091, - "grad_norm": 0.5740315318107605, - "learning_rate": 4.7438888888888895e-05, - "loss": 0.5242, - "step": 4384 - }, - { - "epoch": 0.153916355148388, - "grad_norm": 0.5368416905403137, - "learning_rate": 4.743703703703704e-05, - "loss": 0.5848, - "step": 4385 - }, - { - "epoch": 0.1539514557995051, - "grad_norm": 0.7009661197662354, - "learning_rate": 4.743518518518519e-05, - "loss": 0.387, - "step": 4386 - }, - { - "epoch": 0.15398655645062215, - "grad_norm": 0.510844349861145, - "learning_rate": 4.743333333333333e-05, - "loss": 0.5766, - "step": 4387 - }, - { - "epoch": 0.15402165710173923, - "grad_norm": 0.4770292043685913, - "learning_rate": 4.743148148148148e-05, - "loss": 0.476, - "step": 4388 - }, - { - "epoch": 0.15405675775285632, - "grad_norm": 0.5314796566963196, - "learning_rate": 4.742962962962963e-05, - "loss": 0.5406, - "step": 4389 - }, - { - "epoch": 0.15409185840397338, - "grad_norm": 0.4517589211463928, - "learning_rate": 4.742777777777778e-05, - "loss": 0.4996, - "step": 4390 - }, - { - "epoch": 0.15412695905509047, - "grad_norm": 0.5658106803894043, - "learning_rate": 4.742592592592593e-05, - "loss": 0.4986, - "step": 4391 - }, - { - "epoch": 0.15416205970620755, - "grad_norm": 0.5690212845802307, - "learning_rate": 4.7424074074074076e-05, - "loss": 0.4414, - "step": 4392 - }, - { - "epoch": 0.15419716035732464, - "grad_norm": 0.47650352120399475, - "learning_rate": 4.7422222222222226e-05, - "loss": 0.5597, - "step": 4393 - }, - { - "epoch": 0.1542322610084417, - "grad_norm": 0.535818338394165, - "learning_rate": 4.742037037037037e-05, - "loss": 0.5871, - "step": 4394 - }, - { - "epoch": 0.1542673616595588, - "grad_norm": 0.6243133544921875, - "learning_rate": 4.7418518518518526e-05, - "loss": 0.5748, - "step": 4395 - }, - { - "epoch": 0.15430246231067588, - "grad_norm": 0.4921432137489319, - "learning_rate": 4.741666666666667e-05, - "loss": 0.5154, - "step": 4396 - }, - { - "epoch": 0.15433756296179293, - "grad_norm": 0.46974241733551025, - "learning_rate": 4.741481481481482e-05, - "loss": 0.5203, - "step": 4397 - }, - { - "epoch": 0.15437266361291002, - "grad_norm": 0.5348747372627258, - "learning_rate": 4.741296296296296e-05, - "loss": 0.5156, - "step": 4398 - }, - { - "epoch": 0.1544077642640271, - "grad_norm": 0.5195144414901733, - "learning_rate": 4.741111111111111e-05, - "loss": 0.4971, - "step": 4399 - }, - { - "epoch": 0.15444286491514417, - "grad_norm": 0.5347146391868591, - "learning_rate": 4.7409259259259256e-05, - "loss": 0.5215, - "step": 4400 - }, - { - "epoch": 0.15447796556626125, - "grad_norm": 0.6293556094169617, - "learning_rate": 4.740740740740741e-05, - "loss": 0.6201, - "step": 4401 - }, - { - "epoch": 0.15451306621737834, - "grad_norm": 0.5295377373695374, - "learning_rate": 4.740555555555556e-05, - "loss": 0.5078, - "step": 4402 - }, - { - "epoch": 0.1545481668684954, - "grad_norm": 0.5502183437347412, - "learning_rate": 4.740370370370371e-05, - "loss": 0.5242, - "step": 4403 - }, - { - "epoch": 0.1545832675196125, - "grad_norm": 0.5779999494552612, - "learning_rate": 4.740185185185185e-05, - "loss": 0.5182, - "step": 4404 - }, - { - "epoch": 0.15461836817072958, - "grad_norm": 0.5390551686286926, - "learning_rate": 4.74e-05, - "loss": 0.4642, - "step": 4405 - }, - { - "epoch": 0.15465346882184663, - "grad_norm": 0.5048519372940063, - "learning_rate": 4.739814814814815e-05, - "loss": 0.4831, - "step": 4406 - }, - { - "epoch": 0.15468856947296372, - "grad_norm": 0.5512059926986694, - "learning_rate": 4.73962962962963e-05, - "loss": 0.5606, - "step": 4407 - }, - { - "epoch": 0.1547236701240808, - "grad_norm": 0.5170209407806396, - "learning_rate": 4.7394444444444444e-05, - "loss": 0.5502, - "step": 4408 - }, - { - "epoch": 0.15475877077519787, - "grad_norm": 0.539588451385498, - "learning_rate": 4.7392592592592594e-05, - "loss": 0.501, - "step": 4409 - }, - { - "epoch": 0.15479387142631496, - "grad_norm": 0.5127390027046204, - "learning_rate": 4.7390740740740744e-05, - "loss": 0.4671, - "step": 4410 - }, - { - "epoch": 0.15482897207743204, - "grad_norm": 0.543003261089325, - "learning_rate": 4.7388888888888894e-05, - "loss": 0.4807, - "step": 4411 - }, - { - "epoch": 0.1548640727285491, - "grad_norm": 0.5201461911201477, - "learning_rate": 4.7387037037037044e-05, - "loss": 0.5073, - "step": 4412 - }, - { - "epoch": 0.1548991733796662, - "grad_norm": 0.5910061597824097, - "learning_rate": 4.738518518518519e-05, - "loss": 0.6091, - "step": 4413 - }, - { - "epoch": 0.15493427403078328, - "grad_norm": 0.5367075204849243, - "learning_rate": 4.738333333333334e-05, - "loss": 0.4315, - "step": 4414 - }, - { - "epoch": 0.15496937468190036, - "grad_norm": 0.4735981822013855, - "learning_rate": 4.738148148148148e-05, - "loss": 0.4206, - "step": 4415 - }, - { - "epoch": 0.15500447533301742, - "grad_norm": 0.5284679532051086, - "learning_rate": 4.737962962962963e-05, - "loss": 0.4059, - "step": 4416 - }, - { - "epoch": 0.1550395759841345, - "grad_norm": 0.5215883851051331, - "learning_rate": 4.737777777777778e-05, - "loss": 0.5623, - "step": 4417 - }, - { - "epoch": 0.1550746766352516, - "grad_norm": 0.5673629641532898, - "learning_rate": 4.737592592592593e-05, - "loss": 0.4632, - "step": 4418 - }, - { - "epoch": 0.15510977728636866, - "grad_norm": 0.6347936391830444, - "learning_rate": 4.7374074074074075e-05, - "loss": 0.5026, - "step": 4419 - }, - { - "epoch": 0.15514487793748574, - "grad_norm": 0.5648009181022644, - "learning_rate": 4.7372222222222225e-05, - "loss": 0.4914, - "step": 4420 - }, - { - "epoch": 0.15517997858860283, - "grad_norm": 0.4965853989124298, - "learning_rate": 4.737037037037037e-05, - "loss": 0.4698, - "step": 4421 - }, - { - "epoch": 0.1552150792397199, - "grad_norm": 0.6170862913131714, - "learning_rate": 4.7368518518518525e-05, - "loss": 0.507, - "step": 4422 - }, - { - "epoch": 0.15525017989083698, - "grad_norm": 0.5762277841567993, - "learning_rate": 4.736666666666667e-05, - "loss": 0.4885, - "step": 4423 - }, - { - "epoch": 0.15528528054195406, - "grad_norm": 0.5722048878669739, - "learning_rate": 4.736481481481482e-05, - "loss": 0.5211, - "step": 4424 - }, - { - "epoch": 0.15532038119307112, - "grad_norm": 0.6147592663764954, - "learning_rate": 4.736296296296296e-05, - "loss": 0.4776, - "step": 4425 - }, - { - "epoch": 0.1553554818441882, - "grad_norm": 0.6002718210220337, - "learning_rate": 4.736111111111111e-05, - "loss": 0.5405, - "step": 4426 - }, - { - "epoch": 0.1553905824953053, - "grad_norm": 0.5599339008331299, - "learning_rate": 4.735925925925926e-05, - "loss": 0.5159, - "step": 4427 - }, - { - "epoch": 0.15542568314642236, - "grad_norm": 0.6243360638618469, - "learning_rate": 4.735740740740741e-05, - "loss": 0.5783, - "step": 4428 - }, - { - "epoch": 0.15546078379753944, - "grad_norm": 0.4642404317855835, - "learning_rate": 4.7355555555555555e-05, - "loss": 0.5611, - "step": 4429 - }, - { - "epoch": 0.15549588444865653, - "grad_norm": 0.49457666277885437, - "learning_rate": 4.7353703703703706e-05, - "loss": 0.5243, - "step": 4430 - }, - { - "epoch": 0.1555309850997736, - "grad_norm": 0.6473073959350586, - "learning_rate": 4.7351851851851856e-05, - "loss": 0.5513, - "step": 4431 - }, - { - "epoch": 0.15556608575089068, - "grad_norm": 0.5372092127799988, - "learning_rate": 4.735e-05, - "loss": 0.6065, - "step": 4432 - }, - { - "epoch": 0.15560118640200776, - "grad_norm": 0.574484646320343, - "learning_rate": 4.7348148148148156e-05, - "loss": 0.6436, - "step": 4433 - }, - { - "epoch": 0.15563628705312482, - "grad_norm": 0.5560833811759949, - "learning_rate": 4.73462962962963e-05, - "loss": 0.4953, - "step": 4434 - }, - { - "epoch": 0.1556713877042419, - "grad_norm": 0.5263583660125732, - "learning_rate": 4.734444444444445e-05, - "loss": 0.589, - "step": 4435 - }, - { - "epoch": 0.155706488355359, - "grad_norm": 0.8615256547927856, - "learning_rate": 4.734259259259259e-05, - "loss": 0.53, - "step": 4436 - }, - { - "epoch": 0.15574158900647608, - "grad_norm": 0.5419265031814575, - "learning_rate": 4.734074074074074e-05, - "loss": 0.3159, - "step": 4437 - }, - { - "epoch": 0.15577668965759314, - "grad_norm": 0.4475747346878052, - "learning_rate": 4.733888888888889e-05, - "loss": 0.443, - "step": 4438 - }, - { - "epoch": 0.15581179030871023, - "grad_norm": 0.5363615155220032, - "learning_rate": 4.733703703703704e-05, - "loss": 0.5669, - "step": 4439 - }, - { - "epoch": 0.15584689095982732, - "grad_norm": 0.5231989026069641, - "learning_rate": 4.7335185185185186e-05, - "loss": 0.5459, - "step": 4440 - }, - { - "epoch": 0.15588199161094438, - "grad_norm": 0.6308592557907104, - "learning_rate": 4.7333333333333336e-05, - "loss": 0.5534, - "step": 4441 - }, - { - "epoch": 0.15591709226206146, - "grad_norm": 0.5186487436294556, - "learning_rate": 4.733148148148148e-05, - "loss": 0.3333, - "step": 4442 - }, - { - "epoch": 0.15595219291317855, - "grad_norm": 0.5797480940818787, - "learning_rate": 4.732962962962963e-05, - "loss": 0.5991, - "step": 4443 - }, - { - "epoch": 0.1559872935642956, - "grad_norm": 0.7366283535957336, - "learning_rate": 4.732777777777778e-05, - "loss": 0.5427, - "step": 4444 - }, - { - "epoch": 0.1560223942154127, - "grad_norm": 0.4450322091579437, - "learning_rate": 4.732592592592593e-05, - "loss": 0.4933, - "step": 4445 - }, - { - "epoch": 0.15605749486652978, - "grad_norm": 0.4562070071697235, - "learning_rate": 4.7324074074074073e-05, - "loss": 0.5179, - "step": 4446 - }, - { - "epoch": 0.15609259551764684, - "grad_norm": 0.4788576066493988, - "learning_rate": 4.7322222222222224e-05, - "loss": 0.5081, - "step": 4447 - }, - { - "epoch": 0.15612769616876393, - "grad_norm": 0.48899441957473755, - "learning_rate": 4.7320370370370374e-05, - "loss": 0.4743, - "step": 4448 - }, - { - "epoch": 0.15616279681988102, - "grad_norm": 0.4853082001209259, - "learning_rate": 4.7318518518518524e-05, - "loss": 0.529, - "step": 4449 - }, - { - "epoch": 0.15619789747099808, - "grad_norm": 0.6252496242523193, - "learning_rate": 4.731666666666667e-05, - "loss": 0.4185, - "step": 4450 - }, - { - "epoch": 0.15623299812211516, - "grad_norm": 0.5369213223457336, - "learning_rate": 4.731481481481482e-05, - "loss": 0.5406, - "step": 4451 - }, - { - "epoch": 0.15626809877323225, - "grad_norm": 0.5598633885383606, - "learning_rate": 4.731296296296297e-05, - "loss": 0.516, - "step": 4452 - }, - { - "epoch": 0.1563031994243493, - "grad_norm": 0.5546417236328125, - "learning_rate": 4.731111111111111e-05, - "loss": 0.4625, - "step": 4453 - }, - { - "epoch": 0.1563383000754664, - "grad_norm": 0.6225610375404358, - "learning_rate": 4.730925925925927e-05, - "loss": 0.4495, - "step": 4454 - }, - { - "epoch": 0.15637340072658348, - "grad_norm": 0.6794822216033936, - "learning_rate": 4.730740740740741e-05, - "loss": 0.5572, - "step": 4455 - }, - { - "epoch": 0.15640850137770054, - "grad_norm": 0.70404452085495, - "learning_rate": 4.730555555555556e-05, - "loss": 0.6151, - "step": 4456 - }, - { - "epoch": 0.15644360202881763, - "grad_norm": 0.5667216181755066, - "learning_rate": 4.7303703703703704e-05, - "loss": 0.4812, - "step": 4457 - }, - { - "epoch": 0.15647870267993472, - "grad_norm": 0.5993177890777588, - "learning_rate": 4.7301851851851854e-05, - "loss": 0.6034, - "step": 4458 - }, - { - "epoch": 0.1565138033310518, - "grad_norm": 0.5643737316131592, - "learning_rate": 4.73e-05, - "loss": 0.5875, - "step": 4459 - }, - { - "epoch": 0.15654890398216886, - "grad_norm": 0.5947975516319275, - "learning_rate": 4.7298148148148155e-05, - "loss": 0.4704, - "step": 4460 - }, - { - "epoch": 0.15658400463328595, - "grad_norm": 0.5458968877792358, - "learning_rate": 4.72962962962963e-05, - "loss": 0.5024, - "step": 4461 - }, - { - "epoch": 0.15661910528440304, - "grad_norm": 0.5980122089385986, - "learning_rate": 4.729444444444445e-05, - "loss": 0.3946, - "step": 4462 - }, - { - "epoch": 0.1566542059355201, - "grad_norm": 0.454082727432251, - "learning_rate": 4.729259259259259e-05, - "loss": 0.3576, - "step": 4463 - }, - { - "epoch": 0.15668930658663718, - "grad_norm": 0.5157780647277832, - "learning_rate": 4.729074074074074e-05, - "loss": 0.5198, - "step": 4464 - }, - { - "epoch": 0.15672440723775427, - "grad_norm": 0.5853981971740723, - "learning_rate": 4.728888888888889e-05, - "loss": 0.5823, - "step": 4465 - }, - { - "epoch": 0.15675950788887133, - "grad_norm": 0.45070919394493103, - "learning_rate": 4.728703703703704e-05, - "loss": 0.353, - "step": 4466 - }, - { - "epoch": 0.15679460853998842, - "grad_norm": 0.6254708766937256, - "learning_rate": 4.7285185185185185e-05, - "loss": 0.4995, - "step": 4467 - }, - { - "epoch": 0.1568297091911055, - "grad_norm": 0.6201284527778625, - "learning_rate": 4.7283333333333335e-05, - "loss": 0.5881, - "step": 4468 - }, - { - "epoch": 0.15686480984222256, - "grad_norm": 0.5379266738891602, - "learning_rate": 4.7281481481481485e-05, - "loss": 0.6005, - "step": 4469 - }, - { - "epoch": 0.15689991049333965, - "grad_norm": 0.5981894135475159, - "learning_rate": 4.727962962962963e-05, - "loss": 0.5596, - "step": 4470 - }, - { - "epoch": 0.15693501114445674, - "grad_norm": 0.5731337070465088, - "learning_rate": 4.727777777777778e-05, - "loss": 0.5824, - "step": 4471 - }, - { - "epoch": 0.1569701117955738, - "grad_norm": 0.6713743805885315, - "learning_rate": 4.727592592592593e-05, - "loss": 0.6472, - "step": 4472 - }, - { - "epoch": 0.15700521244669088, - "grad_norm": 0.6150679588317871, - "learning_rate": 4.727407407407408e-05, - "loss": 0.5143, - "step": 4473 - }, - { - "epoch": 0.15704031309780797, - "grad_norm": 0.5720675587654114, - "learning_rate": 4.727222222222222e-05, - "loss": 0.5119, - "step": 4474 - }, - { - "epoch": 0.15707541374892503, - "grad_norm": 0.5120483040809631, - "learning_rate": 4.727037037037037e-05, - "loss": 0.4554, - "step": 4475 - }, - { - "epoch": 0.15711051440004212, - "grad_norm": 0.5799268484115601, - "learning_rate": 4.726851851851852e-05, - "loss": 0.508, - "step": 4476 - }, - { - "epoch": 0.1571456150511592, - "grad_norm": 0.5240592360496521, - "learning_rate": 4.726666666666667e-05, - "loss": 0.5222, - "step": 4477 - }, - { - "epoch": 0.15718071570227626, - "grad_norm": 0.5474269390106201, - "learning_rate": 4.7264814814814816e-05, - "loss": 0.544, - "step": 4478 - }, - { - "epoch": 0.15721581635339335, - "grad_norm": 0.5314311385154724, - "learning_rate": 4.7262962962962966e-05, - "loss": 0.6032, - "step": 4479 - }, - { - "epoch": 0.15725091700451044, - "grad_norm": 0.44014301896095276, - "learning_rate": 4.726111111111111e-05, - "loss": 0.4647, - "step": 4480 - }, - { - "epoch": 0.15728601765562752, - "grad_norm": 0.5617543458938599, - "learning_rate": 4.7259259259259266e-05, - "loss": 0.6527, - "step": 4481 - }, - { - "epoch": 0.15732111830674458, - "grad_norm": 0.5026513338088989, - "learning_rate": 4.725740740740741e-05, - "loss": 0.5946, - "step": 4482 - }, - { - "epoch": 0.15735621895786167, - "grad_norm": 0.48705101013183594, - "learning_rate": 4.725555555555556e-05, - "loss": 0.4466, - "step": 4483 - }, - { - "epoch": 0.15739131960897876, - "grad_norm": 0.6523146033287048, - "learning_rate": 4.72537037037037e-05, - "loss": 0.5388, - "step": 4484 - }, - { - "epoch": 0.15742642026009582, - "grad_norm": 0.5408729910850525, - "learning_rate": 4.725185185185185e-05, - "loss": 0.4495, - "step": 4485 - }, - { - "epoch": 0.1574615209112129, - "grad_norm": 0.5408951640129089, - "learning_rate": 4.7249999999999997e-05, - "loss": 0.6123, - "step": 4486 - }, - { - "epoch": 0.15749662156233, - "grad_norm": 0.48607414960861206, - "learning_rate": 4.7248148148148153e-05, - "loss": 0.4989, - "step": 4487 - }, - { - "epoch": 0.15753172221344705, - "grad_norm": 0.563148021697998, - "learning_rate": 4.72462962962963e-05, - "loss": 0.4562, - "step": 4488 - }, - { - "epoch": 0.15756682286456414, - "grad_norm": 0.6336622834205627, - "learning_rate": 4.724444444444445e-05, - "loss": 0.5156, - "step": 4489 - }, - { - "epoch": 0.15760192351568122, - "grad_norm": 0.5182003974914551, - "learning_rate": 4.72425925925926e-05, - "loss": 0.5424, - "step": 4490 - }, - { - "epoch": 0.15763702416679828, - "grad_norm": 0.5218414068222046, - "learning_rate": 4.724074074074074e-05, - "loss": 0.5646, - "step": 4491 - }, - { - "epoch": 0.15767212481791537, - "grad_norm": 0.4989297688007355, - "learning_rate": 4.723888888888889e-05, - "loss": 0.5441, - "step": 4492 - }, - { - "epoch": 0.15770722546903246, - "grad_norm": 0.49429023265838623, - "learning_rate": 4.723703703703704e-05, - "loss": 0.4498, - "step": 4493 - }, - { - "epoch": 0.15774232612014952, - "grad_norm": 0.5216228365898132, - "learning_rate": 4.723518518518519e-05, - "loss": 0.4479, - "step": 4494 - }, - { - "epoch": 0.1577774267712666, - "grad_norm": 0.5165086984634399, - "learning_rate": 4.7233333333333334e-05, - "loss": 0.5892, - "step": 4495 - }, - { - "epoch": 0.1578125274223837, - "grad_norm": 0.6280810236930847, - "learning_rate": 4.7231481481481484e-05, - "loss": 0.5416, - "step": 4496 - }, - { - "epoch": 0.15784762807350075, - "grad_norm": 0.7204790711402893, - "learning_rate": 4.722962962962963e-05, - "loss": 0.5133, - "step": 4497 - }, - { - "epoch": 0.15788272872461784, - "grad_norm": 0.5633249878883362, - "learning_rate": 4.7227777777777784e-05, - "loss": 0.6082, - "step": 4498 - }, - { - "epoch": 0.15791782937573492, - "grad_norm": 0.5352637767791748, - "learning_rate": 4.722592592592593e-05, - "loss": 0.5786, - "step": 4499 - }, - { - "epoch": 0.15795293002685198, - "grad_norm": 0.8016707301139832, - "learning_rate": 4.722407407407408e-05, - "loss": 0.5113, - "step": 4500 - }, - { - "epoch": 0.15798803067796907, - "grad_norm": 0.6326096057891846, - "learning_rate": 4.722222222222222e-05, - "loss": 0.5404, - "step": 4501 - }, - { - "epoch": 0.15802313132908616, - "grad_norm": 0.5842592716217041, - "learning_rate": 4.722037037037037e-05, - "loss": 0.5448, - "step": 4502 - }, - { - "epoch": 0.15805823198020325, - "grad_norm": 0.5471271872520447, - "learning_rate": 4.721851851851852e-05, - "loss": 0.5084, - "step": 4503 - }, - { - "epoch": 0.1580933326313203, - "grad_norm": 0.70560622215271, - "learning_rate": 4.721666666666667e-05, - "loss": 0.5619, - "step": 4504 - }, - { - "epoch": 0.1581284332824374, - "grad_norm": 0.6327870488166809, - "learning_rate": 4.7214814814814815e-05, - "loss": 0.5906, - "step": 4505 - }, - { - "epoch": 0.15816353393355448, - "grad_norm": 0.46337205171585083, - "learning_rate": 4.7212962962962965e-05, - "loss": 0.4333, - "step": 4506 - }, - { - "epoch": 0.15819863458467154, - "grad_norm": 0.6480876207351685, - "learning_rate": 4.721111111111111e-05, - "loss": 0.5876, - "step": 4507 - }, - { - "epoch": 0.15823373523578863, - "grad_norm": 0.45570138096809387, - "learning_rate": 4.7209259259259265e-05, - "loss": 0.4428, - "step": 4508 - }, - { - "epoch": 0.1582688358869057, - "grad_norm": 0.562924861907959, - "learning_rate": 4.720740740740741e-05, - "loss": 0.5442, - "step": 4509 - }, - { - "epoch": 0.15830393653802277, - "grad_norm": 0.5267930030822754, - "learning_rate": 4.720555555555556e-05, - "loss": 0.4297, - "step": 4510 - }, - { - "epoch": 0.15833903718913986, - "grad_norm": 0.5205733776092529, - "learning_rate": 4.720370370370371e-05, - "loss": 0.3849, - "step": 4511 - }, - { - "epoch": 0.15837413784025695, - "grad_norm": 0.5018739104270935, - "learning_rate": 4.720185185185185e-05, - "loss": 0.4493, - "step": 4512 - }, - { - "epoch": 0.158409238491374, - "grad_norm": 0.4825412631034851, - "learning_rate": 4.72e-05, - "loss": 0.5123, - "step": 4513 - }, - { - "epoch": 0.1584443391424911, - "grad_norm": 0.5848619341850281, - "learning_rate": 4.719814814814815e-05, - "loss": 0.4673, - "step": 4514 - }, - { - "epoch": 0.15847943979360818, - "grad_norm": 0.5822762250900269, - "learning_rate": 4.71962962962963e-05, - "loss": 0.5268, - "step": 4515 - }, - { - "epoch": 0.15851454044472524, - "grad_norm": 0.5851758718490601, - "learning_rate": 4.7194444444444446e-05, - "loss": 0.6236, - "step": 4516 - }, - { - "epoch": 0.15854964109584233, - "grad_norm": 0.5254489183425903, - "learning_rate": 4.7192592592592596e-05, - "loss": 0.5667, - "step": 4517 - }, - { - "epoch": 0.1585847417469594, - "grad_norm": 0.4754047989845276, - "learning_rate": 4.719074074074074e-05, - "loss": 0.4862, - "step": 4518 - }, - { - "epoch": 0.15861984239807647, - "grad_norm": 0.45644810795783997, - "learning_rate": 4.7188888888888896e-05, - "loss": 0.4857, - "step": 4519 - }, - { - "epoch": 0.15865494304919356, - "grad_norm": 0.5200848579406738, - "learning_rate": 4.718703703703704e-05, - "loss": 0.6144, - "step": 4520 - }, - { - "epoch": 0.15869004370031065, - "grad_norm": 0.47439658641815186, - "learning_rate": 4.718518518518519e-05, - "loss": 0.4418, - "step": 4521 - }, - { - "epoch": 0.15872514435142773, - "grad_norm": 0.4950857162475586, - "learning_rate": 4.718333333333333e-05, - "loss": 0.4749, - "step": 4522 - }, - { - "epoch": 0.1587602450025448, - "grad_norm": 0.4972844123840332, - "learning_rate": 4.718148148148148e-05, - "loss": 0.6018, - "step": 4523 - }, - { - "epoch": 0.15879534565366188, - "grad_norm": 0.5409953594207764, - "learning_rate": 4.7179629629629626e-05, - "loss": 0.6204, - "step": 4524 - }, - { - "epoch": 0.15883044630477897, - "grad_norm": 0.46698063611984253, - "learning_rate": 4.717777777777778e-05, - "loss": 0.5945, - "step": 4525 - }, - { - "epoch": 0.15886554695589603, - "grad_norm": 0.48971161246299744, - "learning_rate": 4.7175925925925926e-05, - "loss": 0.4703, - "step": 4526 - }, - { - "epoch": 0.1589006476070131, - "grad_norm": 0.5666086673736572, - "learning_rate": 4.7174074074074077e-05, - "loss": 0.5839, - "step": 4527 - }, - { - "epoch": 0.1589357482581302, - "grad_norm": 0.5201736688613892, - "learning_rate": 4.717222222222222e-05, - "loss": 0.5334, - "step": 4528 - }, - { - "epoch": 0.15897084890924726, - "grad_norm": 0.5666569471359253, - "learning_rate": 4.717037037037037e-05, - "loss": 0.6759, - "step": 4529 - }, - { - "epoch": 0.15900594956036435, - "grad_norm": 0.5508410930633545, - "learning_rate": 4.716851851851852e-05, - "loss": 0.4507, - "step": 4530 - }, - { - "epoch": 0.15904105021148143, - "grad_norm": 0.5446406602859497, - "learning_rate": 4.716666666666667e-05, - "loss": 0.4732, - "step": 4531 - }, - { - "epoch": 0.1590761508625985, - "grad_norm": 0.47935229539871216, - "learning_rate": 4.716481481481482e-05, - "loss": 0.4497, - "step": 4532 - }, - { - "epoch": 0.15911125151371558, - "grad_norm": 0.5156164169311523, - "learning_rate": 4.7162962962962964e-05, - "loss": 0.6246, - "step": 4533 - }, - { - "epoch": 0.15914635216483267, - "grad_norm": 0.8051342368125916, - "learning_rate": 4.7161111111111114e-05, - "loss": 0.6866, - "step": 4534 - }, - { - "epoch": 0.15918145281594973, - "grad_norm": 0.6836540102958679, - "learning_rate": 4.7159259259259264e-05, - "loss": 0.5458, - "step": 4535 - }, - { - "epoch": 0.1592165534670668, - "grad_norm": 0.5823956727981567, - "learning_rate": 4.7157407407407414e-05, - "loss": 0.5657, - "step": 4536 - }, - { - "epoch": 0.1592516541181839, - "grad_norm": 0.5264971852302551, - "learning_rate": 4.715555555555556e-05, - "loss": 0.5491, - "step": 4537 - }, - { - "epoch": 0.15928675476930096, - "grad_norm": 0.5722473859786987, - "learning_rate": 4.715370370370371e-05, - "loss": 0.4416, - "step": 4538 - }, - { - "epoch": 0.15932185542041805, - "grad_norm": 0.5274788737297058, - "learning_rate": 4.715185185185185e-05, - "loss": 0.5716, - "step": 4539 - }, - { - "epoch": 0.15935695607153513, - "grad_norm": 0.6684869527816772, - "learning_rate": 4.715e-05, - "loss": 0.5526, - "step": 4540 - }, - { - "epoch": 0.1593920567226522, - "grad_norm": 0.4916113018989563, - "learning_rate": 4.714814814814815e-05, - "loss": 0.5926, - "step": 4541 - }, - { - "epoch": 0.15942715737376928, - "grad_norm": 0.5330376625061035, - "learning_rate": 4.71462962962963e-05, - "loss": 0.4607, - "step": 4542 - }, - { - "epoch": 0.15946225802488637, - "grad_norm": 0.49321600794792175, - "learning_rate": 4.7144444444444444e-05, - "loss": 0.4073, - "step": 4543 - }, - { - "epoch": 0.15949735867600345, - "grad_norm": 0.4821356534957886, - "learning_rate": 4.7142592592592595e-05, - "loss": 0.4549, - "step": 4544 - }, - { - "epoch": 0.1595324593271205, - "grad_norm": 0.5479131937026978, - "learning_rate": 4.714074074074074e-05, - "loss": 0.6233, - "step": 4545 - }, - { - "epoch": 0.1595675599782376, - "grad_norm": 0.527561604976654, - "learning_rate": 4.7138888888888895e-05, - "loss": 0.4751, - "step": 4546 - }, - { - "epoch": 0.1596026606293547, - "grad_norm": 0.6191648840904236, - "learning_rate": 4.713703703703704e-05, - "loss": 0.553, - "step": 4547 - }, - { - "epoch": 0.15963776128047175, - "grad_norm": 0.6936167478561401, - "learning_rate": 4.713518518518519e-05, - "loss": 0.4086, - "step": 4548 - }, - { - "epoch": 0.15967286193158883, - "grad_norm": 0.6497761607170105, - "learning_rate": 4.713333333333333e-05, - "loss": 0.5179, - "step": 4549 - }, - { - "epoch": 0.15970796258270592, - "grad_norm": 0.6016766428947449, - "learning_rate": 4.713148148148148e-05, - "loss": 0.6584, - "step": 4550 - }, - { - "epoch": 0.15974306323382298, - "grad_norm": 0.45767754316329956, - "learning_rate": 4.712962962962963e-05, - "loss": 0.533, - "step": 4551 - }, - { - "epoch": 0.15977816388494007, - "grad_norm": 0.6105126738548279, - "learning_rate": 4.712777777777778e-05, - "loss": 0.4469, - "step": 4552 - }, - { - "epoch": 0.15981326453605715, - "grad_norm": 0.5070940852165222, - "learning_rate": 4.712592592592593e-05, - "loss": 0.5832, - "step": 4553 - }, - { - "epoch": 0.1598483651871742, - "grad_norm": 0.5029154419898987, - "learning_rate": 4.7124074074074075e-05, - "loss": 0.613, - "step": 4554 - }, - { - "epoch": 0.1598834658382913, - "grad_norm": 0.5559334754943848, - "learning_rate": 4.7122222222222225e-05, - "loss": 0.4938, - "step": 4555 - }, - { - "epoch": 0.1599185664894084, - "grad_norm": 0.5888473987579346, - "learning_rate": 4.712037037037037e-05, - "loss": 0.5313, - "step": 4556 - }, - { - "epoch": 0.15995366714052545, - "grad_norm": 0.4599877595901489, - "learning_rate": 4.7118518518518526e-05, - "loss": 0.4779, - "step": 4557 - }, - { - "epoch": 0.15998876779164253, - "grad_norm": 0.550811231136322, - "learning_rate": 4.711666666666667e-05, - "loss": 0.5053, - "step": 4558 - }, - { - "epoch": 0.16002386844275962, - "grad_norm": 0.6071308255195618, - "learning_rate": 4.711481481481482e-05, - "loss": 0.4902, - "step": 4559 - }, - { - "epoch": 0.16005896909387668, - "grad_norm": 0.4531664550304413, - "learning_rate": 4.711296296296296e-05, - "loss": 0.4797, - "step": 4560 - }, - { - "epoch": 0.16009406974499377, - "grad_norm": 0.4926517903804779, - "learning_rate": 4.711111111111111e-05, - "loss": 0.4906, - "step": 4561 - }, - { - "epoch": 0.16012917039611085, - "grad_norm": 0.4893531799316406, - "learning_rate": 4.710925925925926e-05, - "loss": 0.5644, - "step": 4562 - }, - { - "epoch": 0.1601642710472279, - "grad_norm": 0.5344645977020264, - "learning_rate": 4.710740740740741e-05, - "loss": 0.6308, - "step": 4563 - }, - { - "epoch": 0.160199371698345, - "grad_norm": 0.5220844149589539, - "learning_rate": 4.7105555555555556e-05, - "loss": 0.4546, - "step": 4564 - }, - { - "epoch": 0.1602344723494621, - "grad_norm": 0.49759840965270996, - "learning_rate": 4.7103703703703706e-05, - "loss": 0.4231, - "step": 4565 - }, - { - "epoch": 0.16026957300057917, - "grad_norm": 0.671882688999176, - "learning_rate": 4.710185185185185e-05, - "loss": 0.5177, - "step": 4566 - }, - { - "epoch": 0.16030467365169623, - "grad_norm": 0.6001405119895935, - "learning_rate": 4.71e-05, - "loss": 0.5924, - "step": 4567 - }, - { - "epoch": 0.16033977430281332, - "grad_norm": 0.6187847852706909, - "learning_rate": 4.709814814814815e-05, - "loss": 0.5871, - "step": 4568 - }, - { - "epoch": 0.1603748749539304, - "grad_norm": 0.497732013463974, - "learning_rate": 4.70962962962963e-05, - "loss": 0.5201, - "step": 4569 - }, - { - "epoch": 0.16040997560504747, - "grad_norm": 0.5551539659500122, - "learning_rate": 4.709444444444444e-05, - "loss": 0.445, - "step": 4570 - }, - { - "epoch": 0.16044507625616455, - "grad_norm": 0.562332808971405, - "learning_rate": 4.709259259259259e-05, - "loss": 0.626, - "step": 4571 - }, - { - "epoch": 0.16048017690728164, - "grad_norm": 0.5870639681816101, - "learning_rate": 4.7090740740740743e-05, - "loss": 0.5393, - "step": 4572 - }, - { - "epoch": 0.1605152775583987, - "grad_norm": 0.523469865322113, - "learning_rate": 4.7088888888888894e-05, - "loss": 0.5436, - "step": 4573 - }, - { - "epoch": 0.1605503782095158, - "grad_norm": 0.621489942073822, - "learning_rate": 4.7087037037037044e-05, - "loss": 0.5449, - "step": 4574 - }, - { - "epoch": 0.16058547886063287, - "grad_norm": 0.6763370633125305, - "learning_rate": 4.708518518518519e-05, - "loss": 0.5745, - "step": 4575 - }, - { - "epoch": 0.16062057951174993, - "grad_norm": 0.6034652590751648, - "learning_rate": 4.708333333333334e-05, - "loss": 0.5769, - "step": 4576 - }, - { - "epoch": 0.16065568016286702, - "grad_norm": 0.5636066794395447, - "learning_rate": 4.708148148148148e-05, - "loss": 0.4862, - "step": 4577 - }, - { - "epoch": 0.1606907808139841, - "grad_norm": 0.486105740070343, - "learning_rate": 4.707962962962964e-05, - "loss": 0.4688, - "step": 4578 - }, - { - "epoch": 0.16072588146510117, - "grad_norm": 0.5911689400672913, - "learning_rate": 4.707777777777778e-05, - "loss": 0.54, - "step": 4579 - }, - { - "epoch": 0.16076098211621825, - "grad_norm": 0.6073501706123352, - "learning_rate": 4.707592592592593e-05, - "loss": 0.5417, - "step": 4580 - }, - { - "epoch": 0.16079608276733534, - "grad_norm": 0.43842047452926636, - "learning_rate": 4.7074074074074074e-05, - "loss": 0.4589, - "step": 4581 - }, - { - "epoch": 0.1608311834184524, - "grad_norm": 0.5627835392951965, - "learning_rate": 4.7072222222222224e-05, - "loss": 0.5966, - "step": 4582 - }, - { - "epoch": 0.1608662840695695, - "grad_norm": 0.4958846867084503, - "learning_rate": 4.707037037037037e-05, - "loss": 0.4453, - "step": 4583 - }, - { - "epoch": 0.16090138472068657, - "grad_norm": 0.5684654116630554, - "learning_rate": 4.7068518518518524e-05, - "loss": 0.6192, - "step": 4584 - }, - { - "epoch": 0.16093648537180363, - "grad_norm": 0.5155267715454102, - "learning_rate": 4.706666666666667e-05, - "loss": 0.5653, - "step": 4585 - }, - { - "epoch": 0.16097158602292072, - "grad_norm": 0.5057397484779358, - "learning_rate": 4.706481481481482e-05, - "loss": 0.413, - "step": 4586 - }, - { - "epoch": 0.1610066866740378, - "grad_norm": 0.4913228750228882, - "learning_rate": 4.706296296296296e-05, - "loss": 0.532, - "step": 4587 - }, - { - "epoch": 0.1610417873251549, - "grad_norm": 0.5169674158096313, - "learning_rate": 4.706111111111111e-05, - "loss": 0.5838, - "step": 4588 - }, - { - "epoch": 0.16107688797627195, - "grad_norm": 0.5197097063064575, - "learning_rate": 4.705925925925926e-05, - "loss": 0.4342, - "step": 4589 - }, - { - "epoch": 0.16111198862738904, - "grad_norm": 0.5551198124885559, - "learning_rate": 4.705740740740741e-05, - "loss": 0.6039, - "step": 4590 - }, - { - "epoch": 0.16114708927850613, - "grad_norm": 0.5452226996421814, - "learning_rate": 4.7055555555555555e-05, - "loss": 0.5275, - "step": 4591 - }, - { - "epoch": 0.1611821899296232, - "grad_norm": 0.5822575092315674, - "learning_rate": 4.7053703703703705e-05, - "loss": 0.5445, - "step": 4592 - }, - { - "epoch": 0.16121729058074027, - "grad_norm": 0.46882572770118713, - "learning_rate": 4.7051851851851855e-05, - "loss": 0.5119, - "step": 4593 - }, - { - "epoch": 0.16125239123185736, - "grad_norm": 0.5763568878173828, - "learning_rate": 4.705e-05, - "loss": 0.4121, - "step": 4594 - }, - { - "epoch": 0.16128749188297442, - "grad_norm": 0.5008208155632019, - "learning_rate": 4.7048148148148155e-05, - "loss": 0.4322, - "step": 4595 - }, - { - "epoch": 0.1613225925340915, - "grad_norm": 0.6739996671676636, - "learning_rate": 4.70462962962963e-05, - "loss": 0.4842, - "step": 4596 - }, - { - "epoch": 0.1613576931852086, - "grad_norm": 0.6374235153198242, - "learning_rate": 4.704444444444445e-05, - "loss": 0.5002, - "step": 4597 - }, - { - "epoch": 0.16139279383632565, - "grad_norm": 0.5835228562355042, - "learning_rate": 4.704259259259259e-05, - "loss": 0.535, - "step": 4598 - }, - { - "epoch": 0.16142789448744274, - "grad_norm": 0.6149680614471436, - "learning_rate": 4.704074074074074e-05, - "loss": 0.5719, - "step": 4599 - }, - { - "epoch": 0.16146299513855983, - "grad_norm": 0.5481575727462769, - "learning_rate": 4.703888888888889e-05, - "loss": 0.5595, - "step": 4600 - }, - { - "epoch": 0.1614980957896769, - "grad_norm": 0.6297295689582825, - "learning_rate": 4.703703703703704e-05, - "loss": 0.6141, - "step": 4601 - }, - { - "epoch": 0.16153319644079397, - "grad_norm": 0.6547426581382751, - "learning_rate": 4.7035185185185186e-05, - "loss": 0.5307, - "step": 4602 - }, - { - "epoch": 0.16156829709191106, - "grad_norm": 0.6628689765930176, - "learning_rate": 4.7033333333333336e-05, - "loss": 0.5397, - "step": 4603 - }, - { - "epoch": 0.16160339774302812, - "grad_norm": 0.5583134293556213, - "learning_rate": 4.703148148148148e-05, - "loss": 0.5675, - "step": 4604 - }, - { - "epoch": 0.1616384983941452, - "grad_norm": 0.6002651453018188, - "learning_rate": 4.7029629629629636e-05, - "loss": 0.4633, - "step": 4605 - }, - { - "epoch": 0.1616735990452623, - "grad_norm": 0.4845353066921234, - "learning_rate": 4.702777777777778e-05, - "loss": 0.4886, - "step": 4606 - }, - { - "epoch": 0.16170869969637935, - "grad_norm": 0.4881352186203003, - "learning_rate": 4.702592592592593e-05, - "loss": 0.5286, - "step": 4607 - }, - { - "epoch": 0.16174380034749644, - "grad_norm": 0.480567991733551, - "learning_rate": 4.702407407407407e-05, - "loss": 0.5063, - "step": 4608 - }, - { - "epoch": 0.16177890099861353, - "grad_norm": 0.5147241950035095, - "learning_rate": 4.702222222222222e-05, - "loss": 0.4384, - "step": 4609 - }, - { - "epoch": 0.16181400164973062, - "grad_norm": 0.4575687646865845, - "learning_rate": 4.702037037037037e-05, - "loss": 0.5119, - "step": 4610 - }, - { - "epoch": 0.16184910230084767, - "grad_norm": 0.5389901399612427, - "learning_rate": 4.701851851851852e-05, - "loss": 0.5391, - "step": 4611 - }, - { - "epoch": 0.16188420295196476, - "grad_norm": 0.5941452980041504, - "learning_rate": 4.701666666666667e-05, - "loss": 0.5714, - "step": 4612 - }, - { - "epoch": 0.16191930360308185, - "grad_norm": 0.5461995601654053, - "learning_rate": 4.701481481481482e-05, - "loss": 0.5059, - "step": 4613 - }, - { - "epoch": 0.1619544042541989, - "grad_norm": 0.47192662954330444, - "learning_rate": 4.701296296296297e-05, - "loss": 0.444, - "step": 4614 - }, - { - "epoch": 0.161989504905316, - "grad_norm": 0.5135536789894104, - "learning_rate": 4.701111111111111e-05, - "loss": 0.5379, - "step": 4615 - }, - { - "epoch": 0.16202460555643308, - "grad_norm": 0.4586641490459442, - "learning_rate": 4.700925925925927e-05, - "loss": 0.494, - "step": 4616 - }, - { - "epoch": 0.16205970620755014, - "grad_norm": 0.5261182188987732, - "learning_rate": 4.700740740740741e-05, - "loss": 0.5402, - "step": 4617 - }, - { - "epoch": 0.16209480685866723, - "grad_norm": 0.5108253955841064, - "learning_rate": 4.700555555555556e-05, - "loss": 0.5978, - "step": 4618 - }, - { - "epoch": 0.16212990750978432, - "grad_norm": 0.589952826499939, - "learning_rate": 4.7003703703703704e-05, - "loss": 0.5183, - "step": 4619 - }, - { - "epoch": 0.16216500816090137, - "grad_norm": 0.6264922618865967, - "learning_rate": 4.7001851851851854e-05, - "loss": 0.6097, - "step": 4620 - }, - { - "epoch": 0.16220010881201846, - "grad_norm": 0.6495074033737183, - "learning_rate": 4.7e-05, - "loss": 0.5155, - "step": 4621 - }, - { - "epoch": 0.16223520946313555, - "grad_norm": 0.5377683043479919, - "learning_rate": 4.6998148148148154e-05, - "loss": 0.5358, - "step": 4622 - }, - { - "epoch": 0.1622703101142526, - "grad_norm": 0.5211613774299622, - "learning_rate": 4.69962962962963e-05, - "loss": 0.5123, - "step": 4623 - }, - { - "epoch": 0.1623054107653697, - "grad_norm": 0.5677571296691895, - "learning_rate": 4.699444444444445e-05, - "loss": 0.5437, - "step": 4624 - }, - { - "epoch": 0.16234051141648678, - "grad_norm": 0.5779939293861389, - "learning_rate": 4.699259259259259e-05, - "loss": 0.5009, - "step": 4625 - }, - { - "epoch": 0.16237561206760384, - "grad_norm": 0.5656993985176086, - "learning_rate": 4.699074074074074e-05, - "loss": 0.501, - "step": 4626 - }, - { - "epoch": 0.16241071271872093, - "grad_norm": 0.6436411738395691, - "learning_rate": 4.698888888888889e-05, - "loss": 0.587, - "step": 4627 - }, - { - "epoch": 0.16244581336983802, - "grad_norm": 0.7053089141845703, - "learning_rate": 4.698703703703704e-05, - "loss": 0.5468, - "step": 4628 - }, - { - "epoch": 0.16248091402095508, - "grad_norm": 0.5573118925094604, - "learning_rate": 4.6985185185185185e-05, - "loss": 0.5341, - "step": 4629 - }, - { - "epoch": 0.16251601467207216, - "grad_norm": 0.6322102546691895, - "learning_rate": 4.6983333333333335e-05, - "loss": 0.593, - "step": 4630 - }, - { - "epoch": 0.16255111532318925, - "grad_norm": 0.5675036311149597, - "learning_rate": 4.6981481481481485e-05, - "loss": 0.5113, - "step": 4631 - }, - { - "epoch": 0.16258621597430634, - "grad_norm": 0.5841495990753174, - "learning_rate": 4.6979629629629635e-05, - "loss": 0.4432, - "step": 4632 - }, - { - "epoch": 0.1626213166254234, - "grad_norm": 0.5483308434486389, - "learning_rate": 4.6977777777777785e-05, - "loss": 0.4865, - "step": 4633 - }, - { - "epoch": 0.16265641727654048, - "grad_norm": 0.5686365365982056, - "learning_rate": 4.697592592592593e-05, - "loss": 0.5737, - "step": 4634 - }, - { - "epoch": 0.16269151792765757, - "grad_norm": 0.6350523829460144, - "learning_rate": 4.697407407407408e-05, - "loss": 0.6588, - "step": 4635 - }, - { - "epoch": 0.16272661857877463, - "grad_norm": 0.5094438791275024, - "learning_rate": 4.697222222222222e-05, - "loss": 0.5447, - "step": 4636 - }, - { - "epoch": 0.16276171922989172, - "grad_norm": 0.48273414373397827, - "learning_rate": 4.697037037037037e-05, - "loss": 0.3523, - "step": 4637 - }, - { - "epoch": 0.1627968198810088, - "grad_norm": 0.5452636480331421, - "learning_rate": 4.696851851851852e-05, - "loss": 0.5694, - "step": 4638 - }, - { - "epoch": 0.16283192053212586, - "grad_norm": 0.6136966943740845, - "learning_rate": 4.696666666666667e-05, - "loss": 0.523, - "step": 4639 - }, - { - "epoch": 0.16286702118324295, - "grad_norm": 0.5323180556297302, - "learning_rate": 4.6964814814814815e-05, - "loss": 0.4173, - "step": 4640 - }, - { - "epoch": 0.16290212183436004, - "grad_norm": 0.5111260414123535, - "learning_rate": 4.6962962962962966e-05, - "loss": 0.5574, - "step": 4641 - }, - { - "epoch": 0.1629372224854771, - "grad_norm": 0.7459666132926941, - "learning_rate": 4.696111111111111e-05, - "loss": 0.5622, - "step": 4642 - }, - { - "epoch": 0.16297232313659418, - "grad_norm": 0.6661816835403442, - "learning_rate": 4.6959259259259266e-05, - "loss": 0.5717, - "step": 4643 - }, - { - "epoch": 0.16300742378771127, - "grad_norm": 0.5236870646476746, - "learning_rate": 4.695740740740741e-05, - "loss": 0.5604, - "step": 4644 - }, - { - "epoch": 0.16304252443882833, - "grad_norm": 0.5494212508201599, - "learning_rate": 4.695555555555556e-05, - "loss": 0.6172, - "step": 4645 - }, - { - "epoch": 0.16307762508994542, - "grad_norm": 0.6384114027023315, - "learning_rate": 4.69537037037037e-05, - "loss": 0.6436, - "step": 4646 - }, - { - "epoch": 0.1631127257410625, - "grad_norm": 0.5485778450965881, - "learning_rate": 4.695185185185185e-05, - "loss": 0.421, - "step": 4647 - }, - { - "epoch": 0.16314782639217956, - "grad_norm": 0.47776615619659424, - "learning_rate": 4.695e-05, - "loss": 0.5232, - "step": 4648 - }, - { - "epoch": 0.16318292704329665, - "grad_norm": 0.5541751384735107, - "learning_rate": 4.694814814814815e-05, - "loss": 0.5481, - "step": 4649 - }, - { - "epoch": 0.16321802769441374, - "grad_norm": 0.4547575116157532, - "learning_rate": 4.6946296296296296e-05, - "loss": 0.4131, - "step": 4650 - }, - { - "epoch": 0.1632531283455308, - "grad_norm": 0.49398621916770935, - "learning_rate": 4.6944444444444446e-05, - "loss": 0.5674, - "step": 4651 - }, - { - "epoch": 0.16328822899664788, - "grad_norm": 0.5369136929512024, - "learning_rate": 4.6942592592592596e-05, - "loss": 0.5696, - "step": 4652 - }, - { - "epoch": 0.16332332964776497, - "grad_norm": 0.5612474083900452, - "learning_rate": 4.694074074074074e-05, - "loss": 0.5077, - "step": 4653 - }, - { - "epoch": 0.16335843029888206, - "grad_norm": 0.5509868264198303, - "learning_rate": 4.69388888888889e-05, - "loss": 0.3794, - "step": 4654 - }, - { - "epoch": 0.16339353094999912, - "grad_norm": 0.7419764995574951, - "learning_rate": 4.693703703703704e-05, - "loss": 0.6665, - "step": 4655 - }, - { - "epoch": 0.1634286316011162, - "grad_norm": 0.6899669170379639, - "learning_rate": 4.693518518518519e-05, - "loss": 0.5121, - "step": 4656 - }, - { - "epoch": 0.1634637322522333, - "grad_norm": 0.5482474565505981, - "learning_rate": 4.6933333333333333e-05, - "loss": 0.5618, - "step": 4657 - }, - { - "epoch": 0.16349883290335035, - "grad_norm": 0.5838524103164673, - "learning_rate": 4.6931481481481484e-05, - "loss": 0.5735, - "step": 4658 - }, - { - "epoch": 0.16353393355446744, - "grad_norm": 0.5041568279266357, - "learning_rate": 4.6929629629629634e-05, - "loss": 0.4306, - "step": 4659 - }, - { - "epoch": 0.16356903420558452, - "grad_norm": 0.6634576320648193, - "learning_rate": 4.6927777777777784e-05, - "loss": 0.6014, - "step": 4660 - }, - { - "epoch": 0.16360413485670158, - "grad_norm": 0.5717986226081848, - "learning_rate": 4.692592592592593e-05, - "loss": 0.5457, - "step": 4661 - }, - { - "epoch": 0.16363923550781867, - "grad_norm": 0.5959053635597229, - "learning_rate": 4.692407407407408e-05, - "loss": 0.4698, - "step": 4662 - }, - { - "epoch": 0.16367433615893576, - "grad_norm": 0.46559983491897583, - "learning_rate": 4.692222222222222e-05, - "loss": 0.3942, - "step": 4663 - }, - { - "epoch": 0.16370943681005282, - "grad_norm": 0.6200748682022095, - "learning_rate": 4.692037037037037e-05, - "loss": 0.4395, - "step": 4664 - }, - { - "epoch": 0.1637445374611699, - "grad_norm": 0.5644586086273193, - "learning_rate": 4.691851851851852e-05, - "loss": 0.4774, - "step": 4665 - }, - { - "epoch": 0.163779638112287, - "grad_norm": 0.4927223324775696, - "learning_rate": 4.691666666666667e-05, - "loss": 0.5217, - "step": 4666 - }, - { - "epoch": 0.16381473876340405, - "grad_norm": 0.620540976524353, - "learning_rate": 4.6914814814814814e-05, - "loss": 0.4907, - "step": 4667 - }, - { - "epoch": 0.16384983941452114, - "grad_norm": 0.5483893156051636, - "learning_rate": 4.6912962962962964e-05, - "loss": 0.5853, - "step": 4668 - }, - { - "epoch": 0.16388494006563822, - "grad_norm": 0.5629186630249023, - "learning_rate": 4.6911111111111114e-05, - "loss": 0.4978, - "step": 4669 - }, - { - "epoch": 0.16392004071675528, - "grad_norm": 0.6169286370277405, - "learning_rate": 4.6909259259259265e-05, - "loss": 0.5803, - "step": 4670 - }, - { - "epoch": 0.16395514136787237, - "grad_norm": 0.49422532320022583, - "learning_rate": 4.690740740740741e-05, - "loss": 0.5165, - "step": 4671 - }, - { - "epoch": 0.16399024201898946, - "grad_norm": 0.5627767443656921, - "learning_rate": 4.690555555555556e-05, - "loss": 0.5352, - "step": 4672 - }, - { - "epoch": 0.16402534267010654, - "grad_norm": 0.5238227844238281, - "learning_rate": 4.690370370370371e-05, - "loss": 0.5885, - "step": 4673 - }, - { - "epoch": 0.1640604433212236, - "grad_norm": 0.6723129153251648, - "learning_rate": 4.690185185185185e-05, - "loss": 0.5144, - "step": 4674 - }, - { - "epoch": 0.1640955439723407, - "grad_norm": 0.504691481590271, - "learning_rate": 4.69e-05, - "loss": 0.4126, - "step": 4675 - }, - { - "epoch": 0.16413064462345778, - "grad_norm": 0.5681845545768738, - "learning_rate": 4.689814814814815e-05, - "loss": 0.5469, - "step": 4676 - }, - { - "epoch": 0.16416574527457484, - "grad_norm": 0.5791237950325012, - "learning_rate": 4.68962962962963e-05, - "loss": 0.6765, - "step": 4677 - }, - { - "epoch": 0.16420084592569192, - "grad_norm": 0.6019071936607361, - "learning_rate": 4.6894444444444445e-05, - "loss": 0.6499, - "step": 4678 - }, - { - "epoch": 0.164235946576809, - "grad_norm": 0.5311499238014221, - "learning_rate": 4.6892592592592595e-05, - "loss": 0.5455, - "step": 4679 - }, - { - "epoch": 0.16427104722792607, - "grad_norm": 0.593117356300354, - "learning_rate": 4.689074074074074e-05, - "loss": 0.5774, - "step": 4680 - }, - { - "epoch": 0.16430614787904316, - "grad_norm": 0.5377423167228699, - "learning_rate": 4.6888888888888895e-05, - "loss": 0.5779, - "step": 4681 - }, - { - "epoch": 0.16434124853016024, - "grad_norm": 0.5004808306694031, - "learning_rate": 4.688703703703704e-05, - "loss": 0.5721, - "step": 4682 - }, - { - "epoch": 0.1643763491812773, - "grad_norm": 0.5212306976318359, - "learning_rate": 4.688518518518519e-05, - "loss": 0.4667, - "step": 4683 - }, - { - "epoch": 0.1644114498323944, - "grad_norm": 0.49803969264030457, - "learning_rate": 4.688333333333333e-05, - "loss": 0.4413, - "step": 4684 - }, - { - "epoch": 0.16444655048351148, - "grad_norm": 0.5048196911811829, - "learning_rate": 4.688148148148148e-05, - "loss": 0.4274, - "step": 4685 - }, - { - "epoch": 0.16448165113462854, - "grad_norm": 0.49943920969963074, - "learning_rate": 4.687962962962963e-05, - "loss": 0.6299, - "step": 4686 - }, - { - "epoch": 0.16451675178574562, - "grad_norm": 0.4727729260921478, - "learning_rate": 4.687777777777778e-05, - "loss": 0.5855, - "step": 4687 - }, - { - "epoch": 0.1645518524368627, - "grad_norm": 0.5503954291343689, - "learning_rate": 4.6875925925925926e-05, - "loss": 0.5099, - "step": 4688 - }, - { - "epoch": 0.16458695308797977, - "grad_norm": 0.5328043103218079, - "learning_rate": 4.6874074074074076e-05, - "loss": 0.5293, - "step": 4689 - }, - { - "epoch": 0.16462205373909686, - "grad_norm": 0.6505573391914368, - "learning_rate": 4.6872222222222226e-05, - "loss": 0.3524, - "step": 4690 - }, - { - "epoch": 0.16465715439021394, - "grad_norm": 0.6673058271408081, - "learning_rate": 4.687037037037037e-05, - "loss": 0.5526, - "step": 4691 - }, - { - "epoch": 0.164692255041331, - "grad_norm": 0.5668200850486755, - "learning_rate": 4.686851851851852e-05, - "loss": 0.5258, - "step": 4692 - }, - { - "epoch": 0.1647273556924481, - "grad_norm": 0.499667763710022, - "learning_rate": 4.686666666666667e-05, - "loss": 0.477, - "step": 4693 - }, - { - "epoch": 0.16476245634356518, - "grad_norm": 0.5355305671691895, - "learning_rate": 4.686481481481482e-05, - "loss": 0.3569, - "step": 4694 - }, - { - "epoch": 0.16479755699468226, - "grad_norm": 0.5203456878662109, - "learning_rate": 4.686296296296296e-05, - "loss": 0.5402, - "step": 4695 - }, - { - "epoch": 0.16483265764579932, - "grad_norm": 0.5615248680114746, - "learning_rate": 4.686111111111111e-05, - "loss": 0.6202, - "step": 4696 - }, - { - "epoch": 0.1648677582969164, - "grad_norm": 0.49769219756126404, - "learning_rate": 4.685925925925926e-05, - "loss": 0.5998, - "step": 4697 - }, - { - "epoch": 0.1649028589480335, - "grad_norm": 0.7026830315589905, - "learning_rate": 4.6857407407407413e-05, - "loss": 0.5395, - "step": 4698 - }, - { - "epoch": 0.16493795959915056, - "grad_norm": 0.5617247819900513, - "learning_rate": 4.685555555555556e-05, - "loss": 0.5252, - "step": 4699 - }, - { - "epoch": 0.16497306025026764, - "grad_norm": 0.548570454120636, - "learning_rate": 4.685370370370371e-05, - "loss": 0.5854, - "step": 4700 - }, - { - "epoch": 0.16500816090138473, - "grad_norm": 0.5999680757522583, - "learning_rate": 4.685185185185185e-05, - "loss": 0.6625, - "step": 4701 - }, - { - "epoch": 0.1650432615525018, - "grad_norm": 0.7140947580337524, - "learning_rate": 4.685000000000001e-05, - "loss": 0.372, - "step": 4702 - }, - { - "epoch": 0.16507836220361888, - "grad_norm": 0.5420430302619934, - "learning_rate": 4.684814814814815e-05, - "loss": 0.4997, - "step": 4703 - }, - { - "epoch": 0.16511346285473597, - "grad_norm": 0.5073724389076233, - "learning_rate": 4.68462962962963e-05, - "loss": 0.5507, - "step": 4704 - }, - { - "epoch": 0.16514856350585302, - "grad_norm": 0.5502849817276001, - "learning_rate": 4.6844444444444444e-05, - "loss": 0.5633, - "step": 4705 - }, - { - "epoch": 0.1651836641569701, - "grad_norm": 0.6692810654640198, - "learning_rate": 4.6842592592592594e-05, - "loss": 0.54, - "step": 4706 - }, - { - "epoch": 0.1652187648080872, - "grad_norm": 0.4889306128025055, - "learning_rate": 4.684074074074074e-05, - "loss": 0.463, - "step": 4707 - }, - { - "epoch": 0.16525386545920426, - "grad_norm": 0.5250934362411499, - "learning_rate": 4.6838888888888894e-05, - "loss": 0.5732, - "step": 4708 - }, - { - "epoch": 0.16528896611032134, - "grad_norm": 0.5301280617713928, - "learning_rate": 4.683703703703704e-05, - "loss": 0.5002, - "step": 4709 - }, - { - "epoch": 0.16532406676143843, - "grad_norm": 0.5823671817779541, - "learning_rate": 4.683518518518519e-05, - "loss": 0.537, - "step": 4710 - }, - { - "epoch": 0.1653591674125555, - "grad_norm": 0.49500033259391785, - "learning_rate": 4.683333333333334e-05, - "loss": 0.5697, - "step": 4711 - }, - { - "epoch": 0.16539426806367258, - "grad_norm": 0.5170122981071472, - "learning_rate": 4.683148148148148e-05, - "loss": 0.5199, - "step": 4712 - }, - { - "epoch": 0.16542936871478967, - "grad_norm": 0.612006664276123, - "learning_rate": 4.682962962962963e-05, - "loss": 0.5479, - "step": 4713 - }, - { - "epoch": 0.16546446936590672, - "grad_norm": 0.5686412453651428, - "learning_rate": 4.682777777777778e-05, - "loss": 0.4156, - "step": 4714 - }, - { - "epoch": 0.1654995700170238, - "grad_norm": 0.5627766251564026, - "learning_rate": 4.682592592592593e-05, - "loss": 0.4947, - "step": 4715 - }, - { - "epoch": 0.1655346706681409, - "grad_norm": 0.45982828736305237, - "learning_rate": 4.6824074074074075e-05, - "loss": 0.4271, - "step": 4716 - }, - { - "epoch": 0.16556977131925799, - "grad_norm": 0.5337097644805908, - "learning_rate": 4.6822222222222225e-05, - "loss": 0.5567, - "step": 4717 - }, - { - "epoch": 0.16560487197037504, - "grad_norm": 0.5462734699249268, - "learning_rate": 4.682037037037037e-05, - "loss": 0.5323, - "step": 4718 - }, - { - "epoch": 0.16563997262149213, - "grad_norm": 0.5337449908256531, - "learning_rate": 4.6818518518518525e-05, - "loss": 0.4867, - "step": 4719 - }, - { - "epoch": 0.16567507327260922, - "grad_norm": 0.5719510316848755, - "learning_rate": 4.681666666666667e-05, - "loss": 0.4964, - "step": 4720 - }, - { - "epoch": 0.16571017392372628, - "grad_norm": 0.6070922613143921, - "learning_rate": 4.681481481481482e-05, - "loss": 0.48, - "step": 4721 - }, - { - "epoch": 0.16574527457484337, - "grad_norm": 0.530805766582489, - "learning_rate": 4.681296296296296e-05, - "loss": 0.6624, - "step": 4722 - }, - { - "epoch": 0.16578037522596045, - "grad_norm": 0.48083704710006714, - "learning_rate": 4.681111111111111e-05, - "loss": 0.5529, - "step": 4723 - }, - { - "epoch": 0.1658154758770775, - "grad_norm": 0.5640201568603516, - "learning_rate": 4.680925925925926e-05, - "loss": 0.6392, - "step": 4724 - }, - { - "epoch": 0.1658505765281946, - "grad_norm": 0.621825635433197, - "learning_rate": 4.680740740740741e-05, - "loss": 0.4796, - "step": 4725 - }, - { - "epoch": 0.16588567717931169, - "grad_norm": 0.5366700887680054, - "learning_rate": 4.6805555555555556e-05, - "loss": 0.6248, - "step": 4726 - }, - { - "epoch": 0.16592077783042875, - "grad_norm": 0.5641233921051025, - "learning_rate": 4.6803703703703706e-05, - "loss": 0.5628, - "step": 4727 - }, - { - "epoch": 0.16595587848154583, - "grad_norm": 0.6137816905975342, - "learning_rate": 4.680185185185185e-05, - "loss": 0.4849, - "step": 4728 - }, - { - "epoch": 0.16599097913266292, - "grad_norm": 0.6638803482055664, - "learning_rate": 4.6800000000000006e-05, - "loss": 0.5275, - "step": 4729 - }, - { - "epoch": 0.16602607978377998, - "grad_norm": 0.7579865455627441, - "learning_rate": 4.679814814814815e-05, - "loss": 0.3817, - "step": 4730 - }, - { - "epoch": 0.16606118043489707, - "grad_norm": 0.49175789952278137, - "learning_rate": 4.67962962962963e-05, - "loss": 0.4401, - "step": 4731 - }, - { - "epoch": 0.16609628108601415, - "grad_norm": 0.5257351398468018, - "learning_rate": 4.679444444444445e-05, - "loss": 0.5185, - "step": 4732 - }, - { - "epoch": 0.1661313817371312, - "grad_norm": 0.5647621154785156, - "learning_rate": 4.679259259259259e-05, - "loss": 0.6101, - "step": 4733 - }, - { - "epoch": 0.1661664823882483, - "grad_norm": 0.4702816605567932, - "learning_rate": 4.679074074074074e-05, - "loss": 0.5463, - "step": 4734 - }, - { - "epoch": 0.16620158303936539, - "grad_norm": 0.5519846677780151, - "learning_rate": 4.678888888888889e-05, - "loss": 0.4871, - "step": 4735 - }, - { - "epoch": 0.16623668369048245, - "grad_norm": 0.571833610534668, - "learning_rate": 4.678703703703704e-05, - "loss": 0.6143, - "step": 4736 - }, - { - "epoch": 0.16627178434159953, - "grad_norm": 0.48263785243034363, - "learning_rate": 4.6785185185185186e-05, - "loss": 0.5118, - "step": 4737 - }, - { - "epoch": 0.16630688499271662, - "grad_norm": 0.5002440214157104, - "learning_rate": 4.6783333333333337e-05, - "loss": 0.5588, - "step": 4738 - }, - { - "epoch": 0.1663419856438337, - "grad_norm": 0.5336036682128906, - "learning_rate": 4.678148148148148e-05, - "loss": 0.5869, - "step": 4739 - }, - { - "epoch": 0.16637708629495077, - "grad_norm": 0.45904529094696045, - "learning_rate": 4.677962962962964e-05, - "loss": 0.4585, - "step": 4740 - }, - { - "epoch": 0.16641218694606785, - "grad_norm": 0.584610641002655, - "learning_rate": 4.677777777777778e-05, - "loss": 0.4576, - "step": 4741 - }, - { - "epoch": 0.16644728759718494, - "grad_norm": 0.5559007525444031, - "learning_rate": 4.677592592592593e-05, - "loss": 0.5222, - "step": 4742 - }, - { - "epoch": 0.166482388248302, - "grad_norm": 0.5614113211631775, - "learning_rate": 4.6774074074074074e-05, - "loss": 0.386, - "step": 4743 - }, - { - "epoch": 0.16651748889941909, - "grad_norm": 0.528356671333313, - "learning_rate": 4.6772222222222224e-05, - "loss": 0.621, - "step": 4744 - }, - { - "epoch": 0.16655258955053617, - "grad_norm": 0.5779397487640381, - "learning_rate": 4.677037037037037e-05, - "loss": 0.5553, - "step": 4745 - }, - { - "epoch": 0.16658769020165323, - "grad_norm": 0.4525652825832367, - "learning_rate": 4.6768518518518524e-05, - "loss": 0.5713, - "step": 4746 - }, - { - "epoch": 0.16662279085277032, - "grad_norm": 0.593007504940033, - "learning_rate": 4.676666666666667e-05, - "loss": 0.4769, - "step": 4747 - }, - { - "epoch": 0.1666578915038874, - "grad_norm": 0.5071230530738831, - "learning_rate": 4.676481481481482e-05, - "loss": 0.4109, - "step": 4748 - }, - { - "epoch": 0.16669299215500447, - "grad_norm": 0.48594382405281067, - "learning_rate": 4.676296296296296e-05, - "loss": 0.4532, - "step": 4749 - }, - { - "epoch": 0.16672809280612155, - "grad_norm": 0.5422409176826477, - "learning_rate": 4.676111111111111e-05, - "loss": 0.5702, - "step": 4750 - }, - { - "epoch": 0.16676319345723864, - "grad_norm": 0.5546905994415283, - "learning_rate": 4.675925925925926e-05, - "loss": 0.5884, - "step": 4751 - }, - { - "epoch": 0.1667982941083557, - "grad_norm": 0.49044281244277954, - "learning_rate": 4.675740740740741e-05, - "loss": 0.4485, - "step": 4752 - }, - { - "epoch": 0.1668333947594728, - "grad_norm": 0.5500389933586121, - "learning_rate": 4.675555555555556e-05, - "loss": 0.5778, - "step": 4753 - }, - { - "epoch": 0.16686849541058987, - "grad_norm": 0.4695369601249695, - "learning_rate": 4.6753703703703704e-05, - "loss": 0.5891, - "step": 4754 - }, - { - "epoch": 0.16690359606170693, - "grad_norm": 0.5464259386062622, - "learning_rate": 4.6751851851851855e-05, - "loss": 0.512, - "step": 4755 - }, - { - "epoch": 0.16693869671282402, - "grad_norm": 0.5045207142829895, - "learning_rate": 4.6750000000000005e-05, - "loss": 0.4371, - "step": 4756 - }, - { - "epoch": 0.1669737973639411, - "grad_norm": 0.5974268317222595, - "learning_rate": 4.6748148148148155e-05, - "loss": 0.5361, - "step": 4757 - }, - { - "epoch": 0.16700889801505817, - "grad_norm": 0.7026114463806152, - "learning_rate": 4.67462962962963e-05, - "loss": 0.6428, - "step": 4758 - }, - { - "epoch": 0.16704399866617525, - "grad_norm": 0.825789213180542, - "learning_rate": 4.674444444444445e-05, - "loss": 0.3664, - "step": 4759 - }, - { - "epoch": 0.16707909931729234, - "grad_norm": 0.5646992325782776, - "learning_rate": 4.674259259259259e-05, - "loss": 0.5786, - "step": 4760 - }, - { - "epoch": 0.16711419996840943, - "grad_norm": 0.7044955492019653, - "learning_rate": 4.674074074074074e-05, - "loss": 0.6067, - "step": 4761 - }, - { - "epoch": 0.1671493006195265, - "grad_norm": 0.5174485445022583, - "learning_rate": 4.673888888888889e-05, - "loss": 0.4749, - "step": 4762 - }, - { - "epoch": 0.16718440127064357, - "grad_norm": 0.5818511843681335, - "learning_rate": 4.673703703703704e-05, - "loss": 0.5161, - "step": 4763 - }, - { - "epoch": 0.16721950192176066, - "grad_norm": 0.5065397620201111, - "learning_rate": 4.6735185185185185e-05, - "loss": 0.5275, - "step": 4764 - }, - { - "epoch": 0.16725460257287772, - "grad_norm": 0.5362657904624939, - "learning_rate": 4.6733333333333335e-05, - "loss": 0.4874, - "step": 4765 - }, - { - "epoch": 0.1672897032239948, - "grad_norm": 0.5626720190048218, - "learning_rate": 4.673148148148148e-05, - "loss": 0.512, - "step": 4766 - }, - { - "epoch": 0.1673248038751119, - "grad_norm": 0.5682489275932312, - "learning_rate": 4.6729629629629636e-05, - "loss": 0.5143, - "step": 4767 - }, - { - "epoch": 0.16735990452622895, - "grad_norm": 0.5516443252563477, - "learning_rate": 4.672777777777778e-05, - "loss": 0.4232, - "step": 4768 - }, - { - "epoch": 0.16739500517734604, - "grad_norm": 0.9740980267524719, - "learning_rate": 4.672592592592593e-05, - "loss": 0.4995, - "step": 4769 - }, - { - "epoch": 0.16743010582846313, - "grad_norm": 0.5774300694465637, - "learning_rate": 4.672407407407407e-05, - "loss": 0.5571, - "step": 4770 - }, - { - "epoch": 0.1674652064795802, - "grad_norm": 0.4859222173690796, - "learning_rate": 4.672222222222222e-05, - "loss": 0.4165, - "step": 4771 - }, - { - "epoch": 0.16750030713069727, - "grad_norm": 0.48367923498153687, - "learning_rate": 4.672037037037037e-05, - "loss": 0.4537, - "step": 4772 - }, - { - "epoch": 0.16753540778181436, - "grad_norm": 0.5171767473220825, - "learning_rate": 4.671851851851852e-05, - "loss": 0.4397, - "step": 4773 - }, - { - "epoch": 0.16757050843293142, - "grad_norm": 0.6720994114875793, - "learning_rate": 4.671666666666667e-05, - "loss": 0.5254, - "step": 4774 - }, - { - "epoch": 0.1676056090840485, - "grad_norm": 0.6219324469566345, - "learning_rate": 4.6714814814814816e-05, - "loss": 0.4951, - "step": 4775 - }, - { - "epoch": 0.1676407097351656, - "grad_norm": 0.544092059135437, - "learning_rate": 4.6712962962962966e-05, - "loss": 0.447, - "step": 4776 - }, - { - "epoch": 0.16767581038628265, - "grad_norm": 0.7074709534645081, - "learning_rate": 4.671111111111111e-05, - "loss": 0.55, - "step": 4777 - }, - { - "epoch": 0.16771091103739974, - "grad_norm": 0.9448363780975342, - "learning_rate": 4.6709259259259266e-05, - "loss": 0.6, - "step": 4778 - }, - { - "epoch": 0.16774601168851683, - "grad_norm": 0.5409989953041077, - "learning_rate": 4.670740740740741e-05, - "loss": 0.5442, - "step": 4779 - }, - { - "epoch": 0.1677811123396339, - "grad_norm": 0.5290169715881348, - "learning_rate": 4.670555555555556e-05, - "loss": 0.4473, - "step": 4780 - }, - { - "epoch": 0.16781621299075097, - "grad_norm": 0.6510874032974243, - "learning_rate": 4.67037037037037e-05, - "loss": 0.5789, - "step": 4781 - }, - { - "epoch": 0.16785131364186806, - "grad_norm": 0.5435299873352051, - "learning_rate": 4.670185185185185e-05, - "loss": 0.4225, - "step": 4782 - }, - { - "epoch": 0.16788641429298515, - "grad_norm": 0.48611369729042053, - "learning_rate": 4.6700000000000003e-05, - "loss": 0.5853, - "step": 4783 - }, - { - "epoch": 0.1679215149441022, - "grad_norm": 0.5380290746688843, - "learning_rate": 4.6698148148148154e-05, - "loss": 0.5639, - "step": 4784 - }, - { - "epoch": 0.1679566155952193, - "grad_norm": 0.5618020296096802, - "learning_rate": 4.66962962962963e-05, - "loss": 0.5469, - "step": 4785 - }, - { - "epoch": 0.16799171624633638, - "grad_norm": 0.55440354347229, - "learning_rate": 4.669444444444445e-05, - "loss": 0.4381, - "step": 4786 - }, - { - "epoch": 0.16802681689745344, - "grad_norm": 0.5260424017906189, - "learning_rate": 4.669259259259259e-05, - "loss": 0.5519, - "step": 4787 - }, - { - "epoch": 0.16806191754857053, - "grad_norm": 0.5616161227226257, - "learning_rate": 4.669074074074074e-05, - "loss": 0.5234, - "step": 4788 - }, - { - "epoch": 0.16809701819968761, - "grad_norm": 0.6398287415504456, - "learning_rate": 4.668888888888889e-05, - "loss": 0.6111, - "step": 4789 - }, - { - "epoch": 0.16813211885080467, - "grad_norm": 0.5085543990135193, - "learning_rate": 4.668703703703704e-05, - "loss": 0.6139, - "step": 4790 - }, - { - "epoch": 0.16816721950192176, - "grad_norm": 0.6356562376022339, - "learning_rate": 4.6685185185185184e-05, - "loss": 0.5795, - "step": 4791 - }, - { - "epoch": 0.16820232015303885, - "grad_norm": 0.5776880383491516, - "learning_rate": 4.6683333333333334e-05, - "loss": 0.5077, - "step": 4792 - }, - { - "epoch": 0.1682374208041559, - "grad_norm": 0.5842464566230774, - "learning_rate": 4.6681481481481484e-05, - "loss": 0.6004, - "step": 4793 - }, - { - "epoch": 0.168272521455273, - "grad_norm": 0.5426191687583923, - "learning_rate": 4.6679629629629634e-05, - "loss": 0.5558, - "step": 4794 - }, - { - "epoch": 0.16830762210639008, - "grad_norm": 0.5978937149047852, - "learning_rate": 4.6677777777777785e-05, - "loss": 0.4206, - "step": 4795 - }, - { - "epoch": 0.16834272275750714, - "grad_norm": 0.48916009068489075, - "learning_rate": 4.667592592592593e-05, - "loss": 0.5335, - "step": 4796 - }, - { - "epoch": 0.16837782340862423, - "grad_norm": 0.4759925603866577, - "learning_rate": 4.667407407407408e-05, - "loss": 0.4581, - "step": 4797 - }, - { - "epoch": 0.16841292405974131, - "grad_norm": 0.519025981426239, - "learning_rate": 4.667222222222222e-05, - "loss": 0.5464, - "step": 4798 - }, - { - "epoch": 0.16844802471085837, - "grad_norm": 0.48020994663238525, - "learning_rate": 4.667037037037037e-05, - "loss": 0.6216, - "step": 4799 - }, - { - "epoch": 0.16848312536197546, - "grad_norm": 0.474224328994751, - "learning_rate": 4.666851851851852e-05, - "loss": 0.5348, - "step": 4800 - }, - { - "epoch": 0.16851822601309255, - "grad_norm": 0.6934823393821716, - "learning_rate": 4.666666666666667e-05, - "loss": 0.5585, - "step": 4801 - }, - { - "epoch": 0.1685533266642096, - "grad_norm": 0.6566288471221924, - "learning_rate": 4.6664814814814815e-05, - "loss": 0.5264, - "step": 4802 - }, - { - "epoch": 0.1685884273153267, - "grad_norm": 0.550635576248169, - "learning_rate": 4.6662962962962965e-05, - "loss": 0.628, - "step": 4803 - }, - { - "epoch": 0.16862352796644378, - "grad_norm": 0.5807223320007324, - "learning_rate": 4.666111111111111e-05, - "loss": 0.5911, - "step": 4804 - }, - { - "epoch": 0.16865862861756087, - "grad_norm": 0.42847058176994324, - "learning_rate": 4.6659259259259265e-05, - "loss": 0.437, - "step": 4805 - }, - { - "epoch": 0.16869372926867793, - "grad_norm": 0.4936668276786804, - "learning_rate": 4.665740740740741e-05, - "loss": 0.5934, - "step": 4806 - }, - { - "epoch": 0.16872882991979501, - "grad_norm": 0.6832250356674194, - "learning_rate": 4.665555555555556e-05, - "loss": 0.4993, - "step": 4807 - }, - { - "epoch": 0.1687639305709121, - "grad_norm": 0.5386900305747986, - "learning_rate": 4.66537037037037e-05, - "loss": 0.4447, - "step": 4808 - }, - { - "epoch": 0.16879903122202916, - "grad_norm": 0.5358811616897583, - "learning_rate": 4.665185185185185e-05, - "loss": 0.4555, - "step": 4809 - }, - { - "epoch": 0.16883413187314625, - "grad_norm": 0.5865157246589661, - "learning_rate": 4.665e-05, - "loss": 0.5974, - "step": 4810 - }, - { - "epoch": 0.16886923252426334, - "grad_norm": 0.5821195244789124, - "learning_rate": 4.664814814814815e-05, - "loss": 0.5971, - "step": 4811 - }, - { - "epoch": 0.1689043331753804, - "grad_norm": 0.5985644459724426, - "learning_rate": 4.6646296296296296e-05, - "loss": 0.5106, - "step": 4812 - }, - { - "epoch": 0.16893943382649748, - "grad_norm": 0.5913007855415344, - "learning_rate": 4.6644444444444446e-05, - "loss": 0.6123, - "step": 4813 - }, - { - "epoch": 0.16897453447761457, - "grad_norm": 0.4988388419151306, - "learning_rate": 4.6642592592592596e-05, - "loss": 0.4776, - "step": 4814 - }, - { - "epoch": 0.16900963512873163, - "grad_norm": 0.5513761043548584, - "learning_rate": 4.664074074074074e-05, - "loss": 0.6145, - "step": 4815 - }, - { - "epoch": 0.16904473577984871, - "grad_norm": 0.534234881401062, - "learning_rate": 4.6638888888888896e-05, - "loss": 0.5827, - "step": 4816 - }, - { - "epoch": 0.1690798364309658, - "grad_norm": 0.5099828839302063, - "learning_rate": 4.663703703703704e-05, - "loss": 0.5617, - "step": 4817 - }, - { - "epoch": 0.16911493708208286, - "grad_norm": 0.480173796415329, - "learning_rate": 4.663518518518519e-05, - "loss": 0.5665, - "step": 4818 - }, - { - "epoch": 0.16915003773319995, - "grad_norm": 0.5405309796333313, - "learning_rate": 4.663333333333333e-05, - "loss": 0.5449, - "step": 4819 - }, - { - "epoch": 0.16918513838431704, - "grad_norm": 0.5550108551979065, - "learning_rate": 4.663148148148148e-05, - "loss": 0.6152, - "step": 4820 - }, - { - "epoch": 0.1692202390354341, - "grad_norm": 0.5132237076759338, - "learning_rate": 4.662962962962963e-05, - "loss": 0.5914, - "step": 4821 - }, - { - "epoch": 0.16925533968655118, - "grad_norm": 0.46771040558815, - "learning_rate": 4.662777777777778e-05, - "loss": 0.5265, - "step": 4822 - }, - { - "epoch": 0.16929044033766827, - "grad_norm": 0.48641157150268555, - "learning_rate": 4.662592592592593e-05, - "loss": 0.5998, - "step": 4823 - }, - { - "epoch": 0.16932554098878536, - "grad_norm": 0.5127284526824951, - "learning_rate": 4.662407407407408e-05, - "loss": 0.5524, - "step": 4824 - }, - { - "epoch": 0.16936064163990242, - "grad_norm": 0.5459383130073547, - "learning_rate": 4.662222222222222e-05, - "loss": 0.5126, - "step": 4825 - }, - { - "epoch": 0.1693957422910195, - "grad_norm": 0.5956749320030212, - "learning_rate": 4.662037037037038e-05, - "loss": 0.6706, - "step": 4826 - }, - { - "epoch": 0.1694308429421366, - "grad_norm": 0.5186564922332764, - "learning_rate": 4.661851851851852e-05, - "loss": 0.4714, - "step": 4827 - }, - { - "epoch": 0.16946594359325365, - "grad_norm": 0.6314890384674072, - "learning_rate": 4.661666666666667e-05, - "loss": 0.6363, - "step": 4828 - }, - { - "epoch": 0.16950104424437074, - "grad_norm": 0.5530506372451782, - "learning_rate": 4.6614814814814814e-05, - "loss": 0.5317, - "step": 4829 - }, - { - "epoch": 0.16953614489548782, - "grad_norm": 0.54120934009552, - "learning_rate": 4.6612962962962964e-05, - "loss": 0.5024, - "step": 4830 - }, - { - "epoch": 0.16957124554660488, - "grad_norm": 0.5057161450386047, - "learning_rate": 4.6611111111111114e-05, - "loss": 0.4771, - "step": 4831 - }, - { - "epoch": 0.16960634619772197, - "grad_norm": 0.45853284001350403, - "learning_rate": 4.6609259259259264e-05, - "loss": 0.4635, - "step": 4832 - }, - { - "epoch": 0.16964144684883906, - "grad_norm": 0.541888415813446, - "learning_rate": 4.660740740740741e-05, - "loss": 0.5494, - "step": 4833 - }, - { - "epoch": 0.16967654749995612, - "grad_norm": 0.480841726064682, - "learning_rate": 4.660555555555556e-05, - "loss": 0.4881, - "step": 4834 - }, - { - "epoch": 0.1697116481510732, - "grad_norm": 0.548188328742981, - "learning_rate": 4.660370370370371e-05, - "loss": 0.5167, - "step": 4835 - }, - { - "epoch": 0.1697467488021903, - "grad_norm": 0.570173442363739, - "learning_rate": 4.660185185185185e-05, - "loss": 0.4501, - "step": 4836 - }, - { - "epoch": 0.16978184945330735, - "grad_norm": 0.5166512727737427, - "learning_rate": 4.660000000000001e-05, - "loss": 0.5343, - "step": 4837 - }, - { - "epoch": 0.16981695010442444, - "grad_norm": 0.5940234065055847, - "learning_rate": 4.659814814814815e-05, - "loss": 0.5852, - "step": 4838 - }, - { - "epoch": 0.16985205075554152, - "grad_norm": 0.473504900932312, - "learning_rate": 4.65962962962963e-05, - "loss": 0.5832, - "step": 4839 - }, - { - "epoch": 0.16988715140665858, - "grad_norm": 0.48672622442245483, - "learning_rate": 4.6594444444444445e-05, - "loss": 0.4953, - "step": 4840 - }, - { - "epoch": 0.16992225205777567, - "grad_norm": 0.6355122327804565, - "learning_rate": 4.6592592592592595e-05, - "loss": 0.5964, - "step": 4841 - }, - { - "epoch": 0.16995735270889276, - "grad_norm": 0.5395540595054626, - "learning_rate": 4.659074074074074e-05, - "loss": 0.4989, - "step": 4842 - }, - { - "epoch": 0.16999245336000982, - "grad_norm": 0.5585625767707825, - "learning_rate": 4.6588888888888895e-05, - "loss": 0.6399, - "step": 4843 - }, - { - "epoch": 0.1700275540111269, - "grad_norm": 0.4825995862483978, - "learning_rate": 4.658703703703704e-05, - "loss": 0.5016, - "step": 4844 - }, - { - "epoch": 0.170062654662244, - "grad_norm": 0.5703474879264832, - "learning_rate": 4.658518518518519e-05, - "loss": 0.4582, - "step": 4845 - }, - { - "epoch": 0.17009775531336108, - "grad_norm": 0.4751543402671814, - "learning_rate": 4.658333333333333e-05, - "loss": 0.5607, - "step": 4846 - }, - { - "epoch": 0.17013285596447814, - "grad_norm": 0.517616331577301, - "learning_rate": 4.658148148148148e-05, - "loss": 0.5755, - "step": 4847 - }, - { - "epoch": 0.17016795661559522, - "grad_norm": 0.4763631224632263, - "learning_rate": 4.657962962962963e-05, - "loss": 0.4446, - "step": 4848 - }, - { - "epoch": 0.1702030572667123, - "grad_norm": 0.492542564868927, - "learning_rate": 4.657777777777778e-05, - "loss": 0.552, - "step": 4849 - }, - { - "epoch": 0.17023815791782937, - "grad_norm": 0.5501912236213684, - "learning_rate": 4.6575925925925925e-05, - "loss": 0.594, - "step": 4850 - }, - { - "epoch": 0.17027325856894646, - "grad_norm": 0.8800676465034485, - "learning_rate": 4.6574074074074076e-05, - "loss": 0.5772, - "step": 4851 - }, - { - "epoch": 0.17030835922006354, - "grad_norm": 0.550853431224823, - "learning_rate": 4.6572222222222226e-05, - "loss": 0.6127, - "step": 4852 - }, - { - "epoch": 0.1703434598711806, - "grad_norm": 0.4482005536556244, - "learning_rate": 4.6570370370370376e-05, - "loss": 0.4649, - "step": 4853 - }, - { - "epoch": 0.1703785605222977, - "grad_norm": 0.5220835208892822, - "learning_rate": 4.656851851851852e-05, - "loss": 0.4648, - "step": 4854 - }, - { - "epoch": 0.17041366117341478, - "grad_norm": 0.6479297280311584, - "learning_rate": 4.656666666666667e-05, - "loss": 0.5651, - "step": 4855 - }, - { - "epoch": 0.17044876182453184, - "grad_norm": 0.522066056728363, - "learning_rate": 4.656481481481482e-05, - "loss": 0.5157, - "step": 4856 - }, - { - "epoch": 0.17048386247564892, - "grad_norm": 0.6036342978477478, - "learning_rate": 4.656296296296296e-05, - "loss": 0.507, - "step": 4857 - }, - { - "epoch": 0.170518963126766, - "grad_norm": 0.5418443083763123, - "learning_rate": 4.656111111111111e-05, - "loss": 0.5521, - "step": 4858 - }, - { - "epoch": 0.17055406377788307, - "grad_norm": 0.6635566353797913, - "learning_rate": 4.655925925925926e-05, - "loss": 0.4928, - "step": 4859 - }, - { - "epoch": 0.17058916442900016, - "grad_norm": 0.4809054434299469, - "learning_rate": 4.655740740740741e-05, - "loss": 0.501, - "step": 4860 - }, - { - "epoch": 0.17062426508011724, - "grad_norm": 0.5223904848098755, - "learning_rate": 4.6555555555555556e-05, - "loss": 0.6249, - "step": 4861 - }, - { - "epoch": 0.1706593657312343, - "grad_norm": 0.572012186050415, - "learning_rate": 4.6553703703703706e-05, - "loss": 0.592, - "step": 4862 - }, - { - "epoch": 0.1706944663823514, - "grad_norm": 0.538120687007904, - "learning_rate": 4.655185185185185e-05, - "loss": 0.4789, - "step": 4863 - }, - { - "epoch": 0.17072956703346848, - "grad_norm": 0.48341530561447144, - "learning_rate": 4.655000000000001e-05, - "loss": 0.5663, - "step": 4864 - }, - { - "epoch": 0.17076466768458554, - "grad_norm": 0.6038640141487122, - "learning_rate": 4.654814814814815e-05, - "loss": 0.4818, - "step": 4865 - }, - { - "epoch": 0.17079976833570262, - "grad_norm": 0.5040479302406311, - "learning_rate": 4.65462962962963e-05, - "loss": 0.5616, - "step": 4866 - }, - { - "epoch": 0.1708348689868197, - "grad_norm": 0.4378325641155243, - "learning_rate": 4.6544444444444443e-05, - "loss": 0.5539, - "step": 4867 - }, - { - "epoch": 0.1708699696379368, - "grad_norm": 0.5658340454101562, - "learning_rate": 4.6542592592592594e-05, - "loss": 0.4362, - "step": 4868 - }, - { - "epoch": 0.17090507028905386, - "grad_norm": 0.5805536508560181, - "learning_rate": 4.6540740740740744e-05, - "loss": 0.5425, - "step": 4869 - }, - { - "epoch": 0.17094017094017094, - "grad_norm": 0.5365264415740967, - "learning_rate": 4.6538888888888894e-05, - "loss": 0.5179, - "step": 4870 - }, - { - "epoch": 0.17097527159128803, - "grad_norm": 0.5326135754585266, - "learning_rate": 4.653703703703704e-05, - "loss": 0.5015, - "step": 4871 - }, - { - "epoch": 0.1710103722424051, - "grad_norm": 0.42415285110473633, - "learning_rate": 4.653518518518519e-05, - "loss": 0.4784, - "step": 4872 - }, - { - "epoch": 0.17104547289352218, - "grad_norm": 0.5571331977844238, - "learning_rate": 4.653333333333334e-05, - "loss": 0.5136, - "step": 4873 - }, - { - "epoch": 0.17108057354463926, - "grad_norm": 0.5394936203956604, - "learning_rate": 4.653148148148148e-05, - "loss": 0.5025, - "step": 4874 - }, - { - "epoch": 0.17111567419575632, - "grad_norm": 0.5177575349807739, - "learning_rate": 4.652962962962963e-05, - "loss": 0.4177, - "step": 4875 - }, - { - "epoch": 0.1711507748468734, - "grad_norm": 0.5064058899879456, - "learning_rate": 4.652777777777778e-05, - "loss": 0.4555, - "step": 4876 - }, - { - "epoch": 0.1711858754979905, - "grad_norm": 0.4692469835281372, - "learning_rate": 4.652592592592593e-05, - "loss": 0.5162, - "step": 4877 - }, - { - "epoch": 0.17122097614910756, - "grad_norm": 0.4402182102203369, - "learning_rate": 4.6524074074074074e-05, - "loss": 0.4528, - "step": 4878 - }, - { - "epoch": 0.17125607680022464, - "grad_norm": 0.4781073331832886, - "learning_rate": 4.6522222222222224e-05, - "loss": 0.4884, - "step": 4879 - }, - { - "epoch": 0.17129117745134173, - "grad_norm": 0.5326604843139648, - "learning_rate": 4.6520370370370375e-05, - "loss": 0.4941, - "step": 4880 - }, - { - "epoch": 0.1713262781024588, - "grad_norm": 0.5791831016540527, - "learning_rate": 4.6518518518518525e-05, - "loss": 0.3766, - "step": 4881 - }, - { - "epoch": 0.17136137875357588, - "grad_norm": 0.5248042345046997, - "learning_rate": 4.651666666666667e-05, - "loss": 0.5268, - "step": 4882 - }, - { - "epoch": 0.17139647940469296, - "grad_norm": 0.5403181910514832, - "learning_rate": 4.651481481481482e-05, - "loss": 0.5953, - "step": 4883 - }, - { - "epoch": 0.17143158005581002, - "grad_norm": 0.5542039275169373, - "learning_rate": 4.651296296296296e-05, - "loss": 0.512, - "step": 4884 - }, - { - "epoch": 0.1714666807069271, - "grad_norm": 0.5748840570449829, - "learning_rate": 4.651111111111111e-05, - "loss": 0.5523, - "step": 4885 - }, - { - "epoch": 0.1715017813580442, - "grad_norm": 0.603463888168335, - "learning_rate": 4.650925925925926e-05, - "loss": 0.5971, - "step": 4886 - }, - { - "epoch": 0.17153688200916126, - "grad_norm": 0.5240535736083984, - "learning_rate": 4.650740740740741e-05, - "loss": 0.5768, - "step": 4887 - }, - { - "epoch": 0.17157198266027834, - "grad_norm": 0.4917793273925781, - "learning_rate": 4.6505555555555555e-05, - "loss": 0.5262, - "step": 4888 - }, - { - "epoch": 0.17160708331139543, - "grad_norm": 0.5498776435852051, - "learning_rate": 4.6503703703703705e-05, - "loss": 0.5419, - "step": 4889 - }, - { - "epoch": 0.17164218396251252, - "grad_norm": 0.48102080821990967, - "learning_rate": 4.6501851851851855e-05, - "loss": 0.5376, - "step": 4890 - }, - { - "epoch": 0.17167728461362958, - "grad_norm": 0.5780403017997742, - "learning_rate": 4.6500000000000005e-05, - "loss": 0.496, - "step": 4891 - }, - { - "epoch": 0.17171238526474666, - "grad_norm": 0.5285931825637817, - "learning_rate": 4.649814814814815e-05, - "loss": 0.6018, - "step": 4892 - }, - { - "epoch": 0.17174748591586375, - "grad_norm": 0.5806334018707275, - "learning_rate": 4.64962962962963e-05, - "loss": 0.4777, - "step": 4893 - }, - { - "epoch": 0.1717825865669808, - "grad_norm": 0.5468681454658508, - "learning_rate": 4.649444444444445e-05, - "loss": 0.5186, - "step": 4894 - }, - { - "epoch": 0.1718176872180979, - "grad_norm": 0.5613112449645996, - "learning_rate": 4.649259259259259e-05, - "loss": 0.5705, - "step": 4895 - }, - { - "epoch": 0.17185278786921498, - "grad_norm": 0.47983673214912415, - "learning_rate": 4.649074074074074e-05, - "loss": 0.4618, - "step": 4896 - }, - { - "epoch": 0.17188788852033204, - "grad_norm": 0.47354385256767273, - "learning_rate": 4.648888888888889e-05, - "loss": 0.4619, - "step": 4897 - }, - { - "epoch": 0.17192298917144913, - "grad_norm": 0.511260986328125, - "learning_rate": 4.648703703703704e-05, - "loss": 0.4286, - "step": 4898 - }, - { - "epoch": 0.17195808982256622, - "grad_norm": 0.6059295535087585, - "learning_rate": 4.6485185185185186e-05, - "loss": 0.5675, - "step": 4899 - }, - { - "epoch": 0.17199319047368328, - "grad_norm": 0.5124836564064026, - "learning_rate": 4.6483333333333336e-05, - "loss": 0.4261, - "step": 4900 - }, - { - "epoch": 0.17202829112480036, - "grad_norm": 0.5206772089004517, - "learning_rate": 4.648148148148148e-05, - "loss": 0.5321, - "step": 4901 - }, - { - "epoch": 0.17206339177591745, - "grad_norm": 0.5355825424194336, - "learning_rate": 4.6479629629629636e-05, - "loss": 0.449, - "step": 4902 - }, - { - "epoch": 0.1720984924270345, - "grad_norm": 0.49907276034355164, - "learning_rate": 4.647777777777778e-05, - "loss": 0.4222, - "step": 4903 - }, - { - "epoch": 0.1721335930781516, - "grad_norm": 0.4414079189300537, - "learning_rate": 4.647592592592593e-05, - "loss": 0.463, - "step": 4904 - }, - { - "epoch": 0.17216869372926868, - "grad_norm": 0.46008288860321045, - "learning_rate": 4.647407407407407e-05, - "loss": 0.5777, - "step": 4905 - }, - { - "epoch": 0.17220379438038574, - "grad_norm": 0.4851212501525879, - "learning_rate": 4.647222222222222e-05, - "loss": 0.5844, - "step": 4906 - }, - { - "epoch": 0.17223889503150283, - "grad_norm": 0.5814198851585388, - "learning_rate": 4.647037037037037e-05, - "loss": 0.4373, - "step": 4907 - }, - { - "epoch": 0.17227399568261992, - "grad_norm": 0.5810589790344238, - "learning_rate": 4.6468518518518523e-05, - "loss": 0.5556, - "step": 4908 - }, - { - "epoch": 0.17230909633373698, - "grad_norm": 0.606072723865509, - "learning_rate": 4.646666666666667e-05, - "loss": 0.6081, - "step": 4909 - }, - { - "epoch": 0.17234419698485406, - "grad_norm": 0.42145127058029175, - "learning_rate": 4.646481481481482e-05, - "loss": 0.556, - "step": 4910 - }, - { - "epoch": 0.17237929763597115, - "grad_norm": 0.6252307295799255, - "learning_rate": 4.646296296296297e-05, - "loss": 0.4844, - "step": 4911 - }, - { - "epoch": 0.17241439828708824, - "grad_norm": 0.573260486125946, - "learning_rate": 4.646111111111111e-05, - "loss": 0.4712, - "step": 4912 - }, - { - "epoch": 0.1724494989382053, - "grad_norm": 0.46148672699928284, - "learning_rate": 4.645925925925926e-05, - "loss": 0.4541, - "step": 4913 - }, - { - "epoch": 0.17248459958932238, - "grad_norm": 0.615251898765564, - "learning_rate": 4.645740740740741e-05, - "loss": 0.4804, - "step": 4914 - }, - { - "epoch": 0.17251970024043947, - "grad_norm": 0.7108584642410278, - "learning_rate": 4.645555555555556e-05, - "loss": 0.5267, - "step": 4915 - }, - { - "epoch": 0.17255480089155653, - "grad_norm": 0.5607901215553284, - "learning_rate": 4.6453703703703704e-05, - "loss": 0.5254, - "step": 4916 - }, - { - "epoch": 0.17258990154267362, - "grad_norm": 0.49067968130111694, - "learning_rate": 4.6451851851851854e-05, - "loss": 0.5315, - "step": 4917 - }, - { - "epoch": 0.1726250021937907, - "grad_norm": 0.6451370120048523, - "learning_rate": 4.6450000000000004e-05, - "loss": 0.5135, - "step": 4918 - }, - { - "epoch": 0.17266010284490776, - "grad_norm": 0.4761759042739868, - "learning_rate": 4.6448148148148154e-05, - "loss": 0.5022, - "step": 4919 - }, - { - "epoch": 0.17269520349602485, - "grad_norm": 0.6804413795471191, - "learning_rate": 4.64462962962963e-05, - "loss": 0.4671, - "step": 4920 - }, - { - "epoch": 0.17273030414714194, - "grad_norm": 0.6273852586746216, - "learning_rate": 4.644444444444445e-05, - "loss": 0.4933, - "step": 4921 - }, - { - "epoch": 0.172765404798259, - "grad_norm": 0.5211905837059021, - "learning_rate": 4.644259259259259e-05, - "loss": 0.5402, - "step": 4922 - }, - { - "epoch": 0.17280050544937609, - "grad_norm": 0.4973209500312805, - "learning_rate": 4.644074074074074e-05, - "loss": 0.5038, - "step": 4923 - }, - { - "epoch": 0.17283560610049317, - "grad_norm": 0.5006121397018433, - "learning_rate": 4.643888888888889e-05, - "loss": 0.6315, - "step": 4924 - }, - { - "epoch": 0.17287070675161023, - "grad_norm": 0.5867618322372437, - "learning_rate": 4.643703703703704e-05, - "loss": 0.5775, - "step": 4925 - }, - { - "epoch": 0.17290580740272732, - "grad_norm": 0.5252653360366821, - "learning_rate": 4.6435185185185185e-05, - "loss": 0.4981, - "step": 4926 - }, - { - "epoch": 0.1729409080538444, - "grad_norm": 0.6104229092597961, - "learning_rate": 4.6433333333333335e-05, - "loss": 0.5122, - "step": 4927 - }, - { - "epoch": 0.17297600870496146, - "grad_norm": 0.5326687097549438, - "learning_rate": 4.643148148148148e-05, - "loss": 0.5018, - "step": 4928 - }, - { - "epoch": 0.17301110935607855, - "grad_norm": 0.42430242896080017, - "learning_rate": 4.6429629629629635e-05, - "loss": 0.3666, - "step": 4929 - }, - { - "epoch": 0.17304621000719564, - "grad_norm": 0.49524688720703125, - "learning_rate": 4.642777777777778e-05, - "loss": 0.4897, - "step": 4930 - }, - { - "epoch": 0.1730813106583127, - "grad_norm": 0.48289695382118225, - "learning_rate": 4.642592592592593e-05, - "loss": 0.5845, - "step": 4931 - }, - { - "epoch": 0.17311641130942979, - "grad_norm": 0.5003533363342285, - "learning_rate": 4.642407407407408e-05, - "loss": 0.4701, - "step": 4932 - }, - { - "epoch": 0.17315151196054687, - "grad_norm": 0.4762292802333832, - "learning_rate": 4.642222222222222e-05, - "loss": 0.4259, - "step": 4933 - }, - { - "epoch": 0.17318661261166396, - "grad_norm": 0.558777928352356, - "learning_rate": 4.642037037037037e-05, - "loss": 0.4805, - "step": 4934 - }, - { - "epoch": 0.17322171326278102, - "grad_norm": 0.603384256362915, - "learning_rate": 4.641851851851852e-05, - "loss": 0.4687, - "step": 4935 - }, - { - "epoch": 0.1732568139138981, - "grad_norm": 0.5070538520812988, - "learning_rate": 4.641666666666667e-05, - "loss": 0.4493, - "step": 4936 - }, - { - "epoch": 0.1732919145650152, - "grad_norm": 0.45642417669296265, - "learning_rate": 4.6414814814814816e-05, - "loss": 0.4237, - "step": 4937 - }, - { - "epoch": 0.17332701521613225, - "grad_norm": 0.6689788699150085, - "learning_rate": 4.6412962962962966e-05, - "loss": 0.527, - "step": 4938 - }, - { - "epoch": 0.17336211586724934, - "grad_norm": 0.42315101623535156, - "learning_rate": 4.641111111111111e-05, - "loss": 0.5457, - "step": 4939 - }, - { - "epoch": 0.17339721651836643, - "grad_norm": 0.6278716325759888, - "learning_rate": 4.6409259259259266e-05, - "loss": 0.5015, - "step": 4940 - }, - { - "epoch": 0.17343231716948349, - "grad_norm": 0.5149016380310059, - "learning_rate": 4.640740740740741e-05, - "loss": 0.5942, - "step": 4941 - }, - { - "epoch": 0.17346741782060057, - "grad_norm": 0.6889511942863464, - "learning_rate": 4.640555555555556e-05, - "loss": 0.5234, - "step": 4942 - }, - { - "epoch": 0.17350251847171766, - "grad_norm": 0.5988193154335022, - "learning_rate": 4.64037037037037e-05, - "loss": 0.4334, - "step": 4943 - }, - { - "epoch": 0.17353761912283472, - "grad_norm": 0.5169408917427063, - "learning_rate": 4.640185185185185e-05, - "loss": 0.4914, - "step": 4944 - }, - { - "epoch": 0.1735727197739518, - "grad_norm": 0.5170817971229553, - "learning_rate": 4.64e-05, - "loss": 0.5071, - "step": 4945 - }, - { - "epoch": 0.1736078204250689, - "grad_norm": 0.5379530787467957, - "learning_rate": 4.639814814814815e-05, - "loss": 0.5259, - "step": 4946 - }, - { - "epoch": 0.17364292107618595, - "grad_norm": 0.49546563625335693, - "learning_rate": 4.6396296296296296e-05, - "loss": 0.5806, - "step": 4947 - }, - { - "epoch": 0.17367802172730304, - "grad_norm": 0.5907285213470459, - "learning_rate": 4.6394444444444447e-05, - "loss": 0.5328, - "step": 4948 - }, - { - "epoch": 0.17371312237842013, - "grad_norm": 0.6208587884902954, - "learning_rate": 4.639259259259259e-05, - "loss": 0.5346, - "step": 4949 - }, - { - "epoch": 0.17374822302953719, - "grad_norm": 0.567801833152771, - "learning_rate": 4.639074074074075e-05, - "loss": 0.5311, - "step": 4950 - }, - { - "epoch": 0.17378332368065427, - "grad_norm": 0.5492358207702637, - "learning_rate": 4.638888888888889e-05, - "loss": 0.5878, - "step": 4951 - }, - { - "epoch": 0.17381842433177136, - "grad_norm": 0.4627256393432617, - "learning_rate": 4.638703703703704e-05, - "loss": 0.4913, - "step": 4952 - }, - { - "epoch": 0.17385352498288842, - "grad_norm": 0.47718265652656555, - "learning_rate": 4.638518518518519e-05, - "loss": 0.455, - "step": 4953 - }, - { - "epoch": 0.1738886256340055, - "grad_norm": 0.48630398511886597, - "learning_rate": 4.6383333333333334e-05, - "loss": 0.43, - "step": 4954 - }, - { - "epoch": 0.1739237262851226, - "grad_norm": 0.49374687671661377, - "learning_rate": 4.6381481481481484e-05, - "loss": 0.5775, - "step": 4955 - }, - { - "epoch": 0.17395882693623968, - "grad_norm": 0.5707430243492126, - "learning_rate": 4.6379629629629634e-05, - "loss": 0.4095, - "step": 4956 - }, - { - "epoch": 0.17399392758735674, - "grad_norm": 0.5617464184761047, - "learning_rate": 4.6377777777777784e-05, - "loss": 0.4239, - "step": 4957 - }, - { - "epoch": 0.17402902823847383, - "grad_norm": 0.5591813325881958, - "learning_rate": 4.637592592592593e-05, - "loss": 0.5929, - "step": 4958 - }, - { - "epoch": 0.1740641288895909, - "grad_norm": 0.903171181678772, - "learning_rate": 4.637407407407408e-05, - "loss": 0.6173, - "step": 4959 - }, - { - "epoch": 0.17409922954070797, - "grad_norm": 0.5214223265647888, - "learning_rate": 4.637222222222222e-05, - "loss": 0.5655, - "step": 4960 - }, - { - "epoch": 0.17413433019182506, - "grad_norm": 0.536648154258728, - "learning_rate": 4.637037037037038e-05, - "loss": 0.4616, - "step": 4961 - }, - { - "epoch": 0.17416943084294215, - "grad_norm": 0.4661349952220917, - "learning_rate": 4.636851851851852e-05, - "loss": 0.4812, - "step": 4962 - }, - { - "epoch": 0.1742045314940592, - "grad_norm": 0.5595285296440125, - "learning_rate": 4.636666666666667e-05, - "loss": 0.5424, - "step": 4963 - }, - { - "epoch": 0.1742396321451763, - "grad_norm": 0.7257912755012512, - "learning_rate": 4.6364814814814814e-05, - "loss": 0.5298, - "step": 4964 - }, - { - "epoch": 0.17427473279629338, - "grad_norm": 0.5008558034896851, - "learning_rate": 4.6362962962962965e-05, - "loss": 0.559, - "step": 4965 - }, - { - "epoch": 0.17430983344741044, - "grad_norm": 0.5690444111824036, - "learning_rate": 4.636111111111111e-05, - "loss": 0.5253, - "step": 4966 - }, - { - "epoch": 0.17434493409852753, - "grad_norm": 0.5217806696891785, - "learning_rate": 4.6359259259259265e-05, - "loss": 0.5303, - "step": 4967 - }, - { - "epoch": 0.1743800347496446, - "grad_norm": 0.6335033774375916, - "learning_rate": 4.635740740740741e-05, - "loss": 0.5902, - "step": 4968 - }, - { - "epoch": 0.17441513540076167, - "grad_norm": 0.517863392829895, - "learning_rate": 4.635555555555556e-05, - "loss": 0.5588, - "step": 4969 - }, - { - "epoch": 0.17445023605187876, - "grad_norm": 0.4663393199443817, - "learning_rate": 4.63537037037037e-05, - "loss": 0.5319, - "step": 4970 - }, - { - "epoch": 0.17448533670299585, - "grad_norm": 0.5522749423980713, - "learning_rate": 4.635185185185185e-05, - "loss": 0.4508, - "step": 4971 - }, - { - "epoch": 0.1745204373541129, - "grad_norm": 0.5499442219734192, - "learning_rate": 4.635e-05, - "loss": 0.4929, - "step": 4972 - }, - { - "epoch": 0.17455553800523, - "grad_norm": 0.5699408054351807, - "learning_rate": 4.634814814814815e-05, - "loss": 0.549, - "step": 4973 - }, - { - "epoch": 0.17459063865634708, - "grad_norm": 0.5127096176147461, - "learning_rate": 4.63462962962963e-05, - "loss": 0.5518, - "step": 4974 - }, - { - "epoch": 0.17462573930746417, - "grad_norm": 0.5440295934677124, - "learning_rate": 4.6344444444444445e-05, - "loss": 0.5497, - "step": 4975 - }, - { - "epoch": 0.17466083995858123, - "grad_norm": 0.5431733727455139, - "learning_rate": 4.6342592592592595e-05, - "loss": 0.5259, - "step": 4976 - }, - { - "epoch": 0.1746959406096983, - "grad_norm": 0.6517730355262756, - "learning_rate": 4.6340740740740746e-05, - "loss": 0.6665, - "step": 4977 - }, - { - "epoch": 0.1747310412608154, - "grad_norm": 0.5089887380599976, - "learning_rate": 4.6338888888888896e-05, - "loss": 0.453, - "step": 4978 - }, - { - "epoch": 0.17476614191193246, - "grad_norm": 0.5056629180908203, - "learning_rate": 4.633703703703704e-05, - "loss": 0.5366, - "step": 4979 - }, - { - "epoch": 0.17480124256304955, - "grad_norm": 0.590910792350769, - "learning_rate": 4.633518518518519e-05, - "loss": 0.5518, - "step": 4980 - }, - { - "epoch": 0.17483634321416663, - "grad_norm": 0.45852577686309814, - "learning_rate": 4.633333333333333e-05, - "loss": 0.5345, - "step": 4981 - }, - { - "epoch": 0.1748714438652837, - "grad_norm": 0.5658305287361145, - "learning_rate": 4.633148148148148e-05, - "loss": 0.5354, - "step": 4982 - }, - { - "epoch": 0.17490654451640078, - "grad_norm": 0.5417108535766602, - "learning_rate": 4.632962962962963e-05, - "loss": 0.5162, - "step": 4983 - }, - { - "epoch": 0.17494164516751787, - "grad_norm": 0.6208826303482056, - "learning_rate": 4.632777777777778e-05, - "loss": 0.6339, - "step": 4984 - }, - { - "epoch": 0.17497674581863493, - "grad_norm": 0.5019455552101135, - "learning_rate": 4.6325925925925926e-05, - "loss": 0.6191, - "step": 4985 - }, - { - "epoch": 0.175011846469752, - "grad_norm": 0.47862279415130615, - "learning_rate": 4.6324074074074076e-05, - "loss": 0.4769, - "step": 4986 - }, - { - "epoch": 0.1750469471208691, - "grad_norm": 0.5673124194145203, - "learning_rate": 4.632222222222222e-05, - "loss": 0.3962, - "step": 4987 - }, - { - "epoch": 0.17508204777198616, - "grad_norm": 0.5703136324882507, - "learning_rate": 4.6320370370370376e-05, - "loss": 0.5898, - "step": 4988 - }, - { - "epoch": 0.17511714842310325, - "grad_norm": 0.5770776867866516, - "learning_rate": 4.631851851851852e-05, - "loss": 0.5339, - "step": 4989 - }, - { - "epoch": 0.17515224907422033, - "grad_norm": 0.5448614358901978, - "learning_rate": 4.631666666666667e-05, - "loss": 0.5677, - "step": 4990 - }, - { - "epoch": 0.1751873497253374, - "grad_norm": 0.558420717716217, - "learning_rate": 4.631481481481481e-05, - "loss": 0.5254, - "step": 4991 - }, - { - "epoch": 0.17522245037645448, - "grad_norm": 0.5751626491546631, - "learning_rate": 4.631296296296296e-05, - "loss": 0.5288, - "step": 4992 - }, - { - "epoch": 0.17525755102757157, - "grad_norm": 0.5476251840591431, - "learning_rate": 4.6311111111111113e-05, - "loss": 0.5868, - "step": 4993 - }, - { - "epoch": 0.17529265167868863, - "grad_norm": 0.5651817321777344, - "learning_rate": 4.6309259259259264e-05, - "loss": 0.4552, - "step": 4994 - }, - { - "epoch": 0.17532775232980571, - "grad_norm": 0.4887714982032776, - "learning_rate": 4.6307407407407414e-05, - "loss": 0.4835, - "step": 4995 - }, - { - "epoch": 0.1753628529809228, - "grad_norm": 0.5733048915863037, - "learning_rate": 4.630555555555556e-05, - "loss": 0.4723, - "step": 4996 - }, - { - "epoch": 0.1753979536320399, - "grad_norm": 0.5754411816596985, - "learning_rate": 4.630370370370371e-05, - "loss": 0.5779, - "step": 4997 - }, - { - "epoch": 0.17543305428315695, - "grad_norm": 0.5572612881660461, - "learning_rate": 4.630185185185185e-05, - "loss": 0.528, - "step": 4998 - }, - { - "epoch": 0.17546815493427403, - "grad_norm": 0.5107763409614563, - "learning_rate": 4.630000000000001e-05, - "loss": 0.3253, - "step": 4999 - }, - { - "epoch": 0.17550325558539112, - "grad_norm": 0.43071284890174866, - "learning_rate": 4.629814814814815e-05, - "loss": 0.4075, - "step": 5000 - }, - { - "epoch": 0.17553835623650818, - "grad_norm": 0.5960519313812256, - "learning_rate": 4.62962962962963e-05, - "loss": 0.479, - "step": 5001 - }, - { - "epoch": 0.17557345688762527, - "grad_norm": 0.6142535209655762, - "learning_rate": 4.6294444444444444e-05, - "loss": 0.5951, - "step": 5002 - }, - { - "epoch": 0.17560855753874235, - "grad_norm": 0.517889678478241, - "learning_rate": 4.6292592592592594e-05, - "loss": 0.506, - "step": 5003 - }, - { - "epoch": 0.17564365818985941, - "grad_norm": 0.4664066731929779, - "learning_rate": 4.6290740740740744e-05, - "loss": 0.3444, - "step": 5004 - }, - { - "epoch": 0.1756787588409765, - "grad_norm": 0.4833444058895111, - "learning_rate": 4.6288888888888894e-05, - "loss": 0.4988, - "step": 5005 - }, - { - "epoch": 0.1757138594920936, - "grad_norm": 0.5331190824508667, - "learning_rate": 4.628703703703704e-05, - "loss": 0.535, - "step": 5006 - }, - { - "epoch": 0.17574896014321065, - "grad_norm": 0.6004534363746643, - "learning_rate": 4.628518518518519e-05, - "loss": 0.555, - "step": 5007 - }, - { - "epoch": 0.17578406079432773, - "grad_norm": 0.43990761041641235, - "learning_rate": 4.628333333333333e-05, - "loss": 0.5742, - "step": 5008 - }, - { - "epoch": 0.17581916144544482, - "grad_norm": 0.4480760991573334, - "learning_rate": 4.628148148148148e-05, - "loss": 0.3318, - "step": 5009 - }, - { - "epoch": 0.17585426209656188, - "grad_norm": 0.4714348316192627, - "learning_rate": 4.627962962962963e-05, - "loss": 0.4248, - "step": 5010 - }, - { - "epoch": 0.17588936274767897, - "grad_norm": 0.5859754681587219, - "learning_rate": 4.627777777777778e-05, - "loss": 0.4978, - "step": 5011 - }, - { - "epoch": 0.17592446339879605, - "grad_norm": 0.5068362355232239, - "learning_rate": 4.6275925925925925e-05, - "loss": 0.6048, - "step": 5012 - }, - { - "epoch": 0.17595956404991311, - "grad_norm": 0.49707263708114624, - "learning_rate": 4.6274074074074075e-05, - "loss": 0.5052, - "step": 5013 - }, - { - "epoch": 0.1759946647010302, - "grad_norm": 0.6078557968139648, - "learning_rate": 4.6272222222222225e-05, - "loss": 0.6029, - "step": 5014 - }, - { - "epoch": 0.1760297653521473, - "grad_norm": 0.5349193811416626, - "learning_rate": 4.6270370370370375e-05, - "loss": 0.6196, - "step": 5015 - }, - { - "epoch": 0.17606486600326435, - "grad_norm": 0.4744168817996979, - "learning_rate": 4.6268518518518525e-05, - "loss": 0.5104, - "step": 5016 - }, - { - "epoch": 0.17609996665438143, - "grad_norm": 0.5646181702613831, - "learning_rate": 4.626666666666667e-05, - "loss": 0.48, - "step": 5017 - }, - { - "epoch": 0.17613506730549852, - "grad_norm": 0.4752943217754364, - "learning_rate": 4.626481481481482e-05, - "loss": 0.5563, - "step": 5018 - }, - { - "epoch": 0.1761701679566156, - "grad_norm": 0.43894052505493164, - "learning_rate": 4.626296296296296e-05, - "loss": 0.6028, - "step": 5019 - }, - { - "epoch": 0.17620526860773267, - "grad_norm": 0.46509233117103577, - "learning_rate": 4.626111111111111e-05, - "loss": 0.3774, - "step": 5020 - }, - { - "epoch": 0.17624036925884976, - "grad_norm": 0.4926984906196594, - "learning_rate": 4.625925925925926e-05, - "loss": 0.5597, - "step": 5021 - }, - { - "epoch": 0.17627546990996684, - "grad_norm": 0.540610134601593, - "learning_rate": 4.625740740740741e-05, - "loss": 0.5467, - "step": 5022 - }, - { - "epoch": 0.1763105705610839, - "grad_norm": 0.5524587035179138, - "learning_rate": 4.6255555555555556e-05, - "loss": 0.3771, - "step": 5023 - }, - { - "epoch": 0.176345671212201, - "grad_norm": 0.7634289264678955, - "learning_rate": 4.6253703703703706e-05, - "loss": 0.5059, - "step": 5024 - }, - { - "epoch": 0.17638077186331808, - "grad_norm": 0.5927325487136841, - "learning_rate": 4.625185185185185e-05, - "loss": 0.5689, - "step": 5025 - }, - { - "epoch": 0.17641587251443513, - "grad_norm": 0.5995538234710693, - "learning_rate": 4.6250000000000006e-05, - "loss": 0.5082, - "step": 5026 - }, - { - "epoch": 0.17645097316555222, - "grad_norm": 0.5114510655403137, - "learning_rate": 4.624814814814815e-05, - "loss": 0.5446, - "step": 5027 - }, - { - "epoch": 0.1764860738166693, - "grad_norm": 0.5201802253723145, - "learning_rate": 4.62462962962963e-05, - "loss": 0.5107, - "step": 5028 - }, - { - "epoch": 0.17652117446778637, - "grad_norm": 0.547325849533081, - "learning_rate": 4.624444444444444e-05, - "loss": 0.5878, - "step": 5029 - }, - { - "epoch": 0.17655627511890346, - "grad_norm": 0.46678534150123596, - "learning_rate": 4.624259259259259e-05, - "loss": 0.5438, - "step": 5030 - }, - { - "epoch": 0.17659137577002054, - "grad_norm": 0.5463664531707764, - "learning_rate": 4.624074074074074e-05, - "loss": 0.6274, - "step": 5031 - }, - { - "epoch": 0.1766264764211376, - "grad_norm": 0.48916709423065186, - "learning_rate": 4.623888888888889e-05, - "loss": 0.4401, - "step": 5032 - }, - { - "epoch": 0.1766615770722547, - "grad_norm": 0.5222157835960388, - "learning_rate": 4.6237037037037037e-05, - "loss": 0.5786, - "step": 5033 - }, - { - "epoch": 0.17669667772337178, - "grad_norm": 0.5909053087234497, - "learning_rate": 4.623518518518519e-05, - "loss": 0.6494, - "step": 5034 - }, - { - "epoch": 0.17673177837448883, - "grad_norm": 0.5067504048347473, - "learning_rate": 4.623333333333334e-05, - "loss": 0.5338, - "step": 5035 - }, - { - "epoch": 0.17676687902560592, - "grad_norm": 0.4533880054950714, - "learning_rate": 4.623148148148148e-05, - "loss": 0.5371, - "step": 5036 - }, - { - "epoch": 0.176801979676723, - "grad_norm": 0.5137471556663513, - "learning_rate": 4.622962962962964e-05, - "loss": 0.4776, - "step": 5037 - }, - { - "epoch": 0.17683708032784007, - "grad_norm": 0.4615430235862732, - "learning_rate": 4.622777777777778e-05, - "loss": 0.4906, - "step": 5038 - }, - { - "epoch": 0.17687218097895716, - "grad_norm": 0.5193494558334351, - "learning_rate": 4.622592592592593e-05, - "loss": 0.5769, - "step": 5039 - }, - { - "epoch": 0.17690728163007424, - "grad_norm": 0.5696596503257751, - "learning_rate": 4.6224074074074074e-05, - "loss": 0.6145, - "step": 5040 - }, - { - "epoch": 0.17694238228119133, - "grad_norm": 0.515204668045044, - "learning_rate": 4.6222222222222224e-05, - "loss": 0.6122, - "step": 5041 - }, - { - "epoch": 0.1769774829323084, - "grad_norm": 0.5016699433326721, - "learning_rate": 4.6220370370370374e-05, - "loss": 0.5652, - "step": 5042 - }, - { - "epoch": 0.17701258358342548, - "grad_norm": 0.5247120261192322, - "learning_rate": 4.6218518518518524e-05, - "loss": 0.4948, - "step": 5043 - }, - { - "epoch": 0.17704768423454256, - "grad_norm": 0.7121196389198303, - "learning_rate": 4.621666666666667e-05, - "loss": 0.5609, - "step": 5044 - }, - { - "epoch": 0.17708278488565962, - "grad_norm": 0.5477049946784973, - "learning_rate": 4.621481481481482e-05, - "loss": 0.4538, - "step": 5045 - }, - { - "epoch": 0.1771178855367767, - "grad_norm": 0.5139524936676025, - "learning_rate": 4.621296296296296e-05, - "loss": 0.6075, - "step": 5046 - }, - { - "epoch": 0.1771529861878938, - "grad_norm": 0.45845434069633484, - "learning_rate": 4.621111111111111e-05, - "loss": 0.5194, - "step": 5047 - }, - { - "epoch": 0.17718808683901086, - "grad_norm": 0.5199230313301086, - "learning_rate": 4.620925925925926e-05, - "loss": 0.4745, - "step": 5048 - }, - { - "epoch": 0.17722318749012794, - "grad_norm": 0.5482810139656067, - "learning_rate": 4.620740740740741e-05, - "loss": 0.4921, - "step": 5049 - }, - { - "epoch": 0.17725828814124503, - "grad_norm": 0.6172077059745789, - "learning_rate": 4.6205555555555555e-05, - "loss": 0.5976, - "step": 5050 - }, - { - "epoch": 0.1772933887923621, - "grad_norm": 0.5662705898284912, - "learning_rate": 4.6203703703703705e-05, - "loss": 0.5018, - "step": 5051 - }, - { - "epoch": 0.17732848944347918, - "grad_norm": 0.518010675907135, - "learning_rate": 4.6201851851851855e-05, - "loss": 0.566, - "step": 5052 - }, - { - "epoch": 0.17736359009459626, - "grad_norm": 0.6093509793281555, - "learning_rate": 4.6200000000000005e-05, - "loss": 0.5707, - "step": 5053 - }, - { - "epoch": 0.17739869074571332, - "grad_norm": 0.45910724997520447, - "learning_rate": 4.619814814814815e-05, - "loss": 0.4738, - "step": 5054 - }, - { - "epoch": 0.1774337913968304, - "grad_norm": 0.5389665365219116, - "learning_rate": 4.61962962962963e-05, - "loss": 0.4344, - "step": 5055 - }, - { - "epoch": 0.1774688920479475, - "grad_norm": 0.49307724833488464, - "learning_rate": 4.619444444444445e-05, - "loss": 0.537, - "step": 5056 - }, - { - "epoch": 0.17750399269906456, - "grad_norm": 0.4618641138076782, - "learning_rate": 4.619259259259259e-05, - "loss": 0.3292, - "step": 5057 - }, - { - "epoch": 0.17753909335018164, - "grad_norm": 0.7453361749649048, - "learning_rate": 4.619074074074075e-05, - "loss": 0.5138, - "step": 5058 - }, - { - "epoch": 0.17757419400129873, - "grad_norm": 0.5738391280174255, - "learning_rate": 4.618888888888889e-05, - "loss": 0.4331, - "step": 5059 - }, - { - "epoch": 0.1776092946524158, - "grad_norm": 0.6329066157341003, - "learning_rate": 4.618703703703704e-05, - "loss": 0.5528, - "step": 5060 - }, - { - "epoch": 0.17764439530353288, - "grad_norm": 0.5176612138748169, - "learning_rate": 4.6185185185185185e-05, - "loss": 0.5128, - "step": 5061 - }, - { - "epoch": 0.17767949595464996, - "grad_norm": 0.5689432621002197, - "learning_rate": 4.6183333333333336e-05, - "loss": 0.5605, - "step": 5062 - }, - { - "epoch": 0.17771459660576705, - "grad_norm": 0.5647587180137634, - "learning_rate": 4.618148148148148e-05, - "loss": 0.5038, - "step": 5063 - }, - { - "epoch": 0.1777496972568841, - "grad_norm": 0.49844154715538025, - "learning_rate": 4.6179629629629636e-05, - "loss": 0.4429, - "step": 5064 - }, - { - "epoch": 0.1777847979080012, - "grad_norm": 0.5106529593467712, - "learning_rate": 4.617777777777778e-05, - "loss": 0.415, - "step": 5065 - }, - { - "epoch": 0.17781989855911828, - "grad_norm": 0.5743507146835327, - "learning_rate": 4.617592592592593e-05, - "loss": 0.4219, - "step": 5066 - }, - { - "epoch": 0.17785499921023534, - "grad_norm": 0.5399923324584961, - "learning_rate": 4.617407407407407e-05, - "loss": 0.472, - "step": 5067 - }, - { - "epoch": 0.17789009986135243, - "grad_norm": 0.47030365467071533, - "learning_rate": 4.617222222222222e-05, - "loss": 0.6162, - "step": 5068 - }, - { - "epoch": 0.17792520051246952, - "grad_norm": 0.566268801689148, - "learning_rate": 4.617037037037037e-05, - "loss": 0.5442, - "step": 5069 - }, - { - "epoch": 0.17796030116358658, - "grad_norm": 0.5081482529640198, - "learning_rate": 4.616851851851852e-05, - "loss": 0.5187, - "step": 5070 - }, - { - "epoch": 0.17799540181470366, - "grad_norm": 0.6274792551994324, - "learning_rate": 4.6166666666666666e-05, - "loss": 0.515, - "step": 5071 - }, - { - "epoch": 0.17803050246582075, - "grad_norm": 0.5411957502365112, - "learning_rate": 4.6164814814814816e-05, - "loss": 0.5498, - "step": 5072 - }, - { - "epoch": 0.1780656031169378, - "grad_norm": 0.5154404640197754, - "learning_rate": 4.6162962962962966e-05, - "loss": 0.4914, - "step": 5073 - }, - { - "epoch": 0.1781007037680549, - "grad_norm": 0.4861733019351959, - "learning_rate": 4.6161111111111117e-05, - "loss": 0.481, - "step": 5074 - }, - { - "epoch": 0.17813580441917198, - "grad_norm": 0.5162232518196106, - "learning_rate": 4.615925925925926e-05, - "loss": 0.5124, - "step": 5075 - }, - { - "epoch": 0.17817090507028904, - "grad_norm": 0.5101162195205688, - "learning_rate": 4.615740740740741e-05, - "loss": 0.5449, - "step": 5076 - }, - { - "epoch": 0.17820600572140613, - "grad_norm": 0.5583548545837402, - "learning_rate": 4.615555555555556e-05, - "loss": 0.552, - "step": 5077 - }, - { - "epoch": 0.17824110637252322, - "grad_norm": 0.523621678352356, - "learning_rate": 4.6153703703703703e-05, - "loss": 0.4044, - "step": 5078 - }, - { - "epoch": 0.17827620702364028, - "grad_norm": 0.47557708621025085, - "learning_rate": 4.6151851851851854e-05, - "loss": 0.4825, - "step": 5079 - }, - { - "epoch": 0.17831130767475736, - "grad_norm": 0.5699413418769836, - "learning_rate": 4.6150000000000004e-05, - "loss": 0.5073, - "step": 5080 - }, - { - "epoch": 0.17834640832587445, - "grad_norm": 0.510779857635498, - "learning_rate": 4.6148148148148154e-05, - "loss": 0.5347, - "step": 5081 - }, - { - "epoch": 0.1783815089769915, - "grad_norm": 0.5009073615074158, - "learning_rate": 4.61462962962963e-05, - "loss": 0.5407, - "step": 5082 - }, - { - "epoch": 0.1784166096281086, - "grad_norm": 0.5339158773422241, - "learning_rate": 4.614444444444445e-05, - "loss": 0.556, - "step": 5083 - }, - { - "epoch": 0.17845171027922568, - "grad_norm": 0.5191090106964111, - "learning_rate": 4.614259259259259e-05, - "loss": 0.5446, - "step": 5084 - }, - { - "epoch": 0.17848681093034277, - "grad_norm": 0.49278515577316284, - "learning_rate": 4.614074074074075e-05, - "loss": 0.4344, - "step": 5085 - }, - { - "epoch": 0.17852191158145983, - "grad_norm": 0.5030877590179443, - "learning_rate": 4.613888888888889e-05, - "loss": 0.5526, - "step": 5086 - }, - { - "epoch": 0.17855701223257692, - "grad_norm": 0.5078111886978149, - "learning_rate": 4.613703703703704e-05, - "loss": 0.5882, - "step": 5087 - }, - { - "epoch": 0.178592112883694, - "grad_norm": 0.5837526917457581, - "learning_rate": 4.6135185185185184e-05, - "loss": 0.5702, - "step": 5088 - }, - { - "epoch": 0.17862721353481106, - "grad_norm": 0.5623990297317505, - "learning_rate": 4.6133333333333334e-05, - "loss": 0.452, - "step": 5089 - }, - { - "epoch": 0.17866231418592815, - "grad_norm": 0.5463975071907043, - "learning_rate": 4.6131481481481484e-05, - "loss": 0.5939, - "step": 5090 - }, - { - "epoch": 0.17869741483704524, - "grad_norm": 0.501710832118988, - "learning_rate": 4.6129629629629635e-05, - "loss": 0.5144, - "step": 5091 - }, - { - "epoch": 0.1787325154881623, - "grad_norm": 0.4985105097293854, - "learning_rate": 4.612777777777778e-05, - "loss": 0.4694, - "step": 5092 - }, - { - "epoch": 0.17876761613927938, - "grad_norm": 0.5046775937080383, - "learning_rate": 4.612592592592593e-05, - "loss": 0.5464, - "step": 5093 - }, - { - "epoch": 0.17880271679039647, - "grad_norm": 0.5015954375267029, - "learning_rate": 4.612407407407408e-05, - "loss": 0.5841, - "step": 5094 - }, - { - "epoch": 0.17883781744151353, - "grad_norm": 0.5074183940887451, - "learning_rate": 4.612222222222222e-05, - "loss": 0.6324, - "step": 5095 - }, - { - "epoch": 0.17887291809263062, - "grad_norm": 0.557970404624939, - "learning_rate": 4.612037037037037e-05, - "loss": 0.5045, - "step": 5096 - }, - { - "epoch": 0.1789080187437477, - "grad_norm": 0.4791407585144043, - "learning_rate": 4.611851851851852e-05, - "loss": 0.5407, - "step": 5097 - }, - { - "epoch": 0.17894311939486476, - "grad_norm": 0.6306085586547852, - "learning_rate": 4.611666666666667e-05, - "loss": 0.5851, - "step": 5098 - }, - { - "epoch": 0.17897822004598185, - "grad_norm": 0.5343521237373352, - "learning_rate": 4.6114814814814815e-05, - "loss": 0.5208, - "step": 5099 - }, - { - "epoch": 0.17901332069709894, - "grad_norm": 0.5021390914916992, - "learning_rate": 4.6112962962962965e-05, - "loss": 0.5428, - "step": 5100 - }, - { - "epoch": 0.179048421348216, - "grad_norm": 0.4574606120586395, - "learning_rate": 4.6111111111111115e-05, - "loss": 0.5032, - "step": 5101 - }, - { - "epoch": 0.17908352199933308, - "grad_norm": 0.4340266287326813, - "learning_rate": 4.6109259259259265e-05, - "loss": 0.5791, - "step": 5102 - }, - { - "epoch": 0.17911862265045017, - "grad_norm": 0.4728217124938965, - "learning_rate": 4.610740740740741e-05, - "loss": 0.5434, - "step": 5103 - }, - { - "epoch": 0.17915372330156723, - "grad_norm": 0.5220842957496643, - "learning_rate": 4.610555555555556e-05, - "loss": 0.562, - "step": 5104 - }, - { - "epoch": 0.17918882395268432, - "grad_norm": 0.5532286763191223, - "learning_rate": 4.61037037037037e-05, - "loss": 0.5559, - "step": 5105 - }, - { - "epoch": 0.1792239246038014, - "grad_norm": 0.47835734486579895, - "learning_rate": 4.610185185185185e-05, - "loss": 0.5563, - "step": 5106 - }, - { - "epoch": 0.1792590252549185, - "grad_norm": 0.5613712668418884, - "learning_rate": 4.61e-05, - "loss": 0.5664, - "step": 5107 - }, - { - "epoch": 0.17929412590603555, - "grad_norm": 0.46346521377563477, - "learning_rate": 4.609814814814815e-05, - "loss": 0.4018, - "step": 5108 - }, - { - "epoch": 0.17932922655715264, - "grad_norm": 0.48188725113868713, - "learning_rate": 4.6096296296296296e-05, - "loss": 0.5418, - "step": 5109 - }, - { - "epoch": 0.17936432720826972, - "grad_norm": 0.5306025743484497, - "learning_rate": 4.6094444444444446e-05, - "loss": 0.4928, - "step": 5110 - }, - { - "epoch": 0.17939942785938678, - "grad_norm": 0.6118983030319214, - "learning_rate": 4.6092592592592596e-05, - "loss": 0.5406, - "step": 5111 - }, - { - "epoch": 0.17943452851050387, - "grad_norm": 0.5534156560897827, - "learning_rate": 4.6090740740740746e-05, - "loss": 0.4807, - "step": 5112 - }, - { - "epoch": 0.17946962916162096, - "grad_norm": 0.5205903053283691, - "learning_rate": 4.608888888888889e-05, - "loss": 0.4173, - "step": 5113 - }, - { - "epoch": 0.17950472981273802, - "grad_norm": 0.5694395303726196, - "learning_rate": 4.608703703703704e-05, - "loss": 0.5349, - "step": 5114 - }, - { - "epoch": 0.1795398304638551, - "grad_norm": 0.504877507686615, - "learning_rate": 4.608518518518519e-05, - "loss": 0.5517, - "step": 5115 - }, - { - "epoch": 0.1795749311149722, - "grad_norm": 0.5089018940925598, - "learning_rate": 4.608333333333333e-05, - "loss": 0.5437, - "step": 5116 - }, - { - "epoch": 0.17961003176608925, - "grad_norm": 0.6576333045959473, - "learning_rate": 4.608148148148148e-05, - "loss": 0.6266, - "step": 5117 - }, - { - "epoch": 0.17964513241720634, - "grad_norm": 0.5102044939994812, - "learning_rate": 4.607962962962963e-05, - "loss": 0.5348, - "step": 5118 - }, - { - "epoch": 0.17968023306832343, - "grad_norm": 0.5031288862228394, - "learning_rate": 4.6077777777777783e-05, - "loss": 0.4503, - "step": 5119 - }, - { - "epoch": 0.17971533371944048, - "grad_norm": 0.5442668199539185, - "learning_rate": 4.607592592592593e-05, - "loss": 0.5472, - "step": 5120 - }, - { - "epoch": 0.17975043437055757, - "grad_norm": 0.5592094659805298, - "learning_rate": 4.607407407407408e-05, - "loss": 0.5348, - "step": 5121 - }, - { - "epoch": 0.17978553502167466, - "grad_norm": 0.48998814821243286, - "learning_rate": 4.607222222222222e-05, - "loss": 0.5906, - "step": 5122 - }, - { - "epoch": 0.17982063567279172, - "grad_norm": 0.5347537994384766, - "learning_rate": 4.607037037037038e-05, - "loss": 0.4774, - "step": 5123 - }, - { - "epoch": 0.1798557363239088, - "grad_norm": 0.4493904113769531, - "learning_rate": 4.606851851851852e-05, - "loss": 0.4677, - "step": 5124 - }, - { - "epoch": 0.1798908369750259, - "grad_norm": 0.6911894083023071, - "learning_rate": 4.606666666666667e-05, - "loss": 0.6443, - "step": 5125 - }, - { - "epoch": 0.17992593762614298, - "grad_norm": 0.5471945405006409, - "learning_rate": 4.6064814814814814e-05, - "loss": 0.5346, - "step": 5126 - }, - { - "epoch": 0.17996103827726004, - "grad_norm": 0.5783860087394714, - "learning_rate": 4.6062962962962964e-05, - "loss": 0.4514, - "step": 5127 - }, - { - "epoch": 0.17999613892837713, - "grad_norm": 0.5359005928039551, - "learning_rate": 4.6061111111111114e-05, - "loss": 0.5575, - "step": 5128 - }, - { - "epoch": 0.1800312395794942, - "grad_norm": 0.6374132037162781, - "learning_rate": 4.6059259259259264e-05, - "loss": 0.6261, - "step": 5129 - }, - { - "epoch": 0.18006634023061127, - "grad_norm": 0.4841483235359192, - "learning_rate": 4.605740740740741e-05, - "loss": 0.6023, - "step": 5130 - }, - { - "epoch": 0.18010144088172836, - "grad_norm": 0.5447208285331726, - "learning_rate": 4.605555555555556e-05, - "loss": 0.5755, - "step": 5131 - }, - { - "epoch": 0.18013654153284545, - "grad_norm": 0.5120126605033875, - "learning_rate": 4.605370370370371e-05, - "loss": 0.5816, - "step": 5132 - }, - { - "epoch": 0.1801716421839625, - "grad_norm": 0.46661829948425293, - "learning_rate": 4.605185185185185e-05, - "loss": 0.3911, - "step": 5133 - }, - { - "epoch": 0.1802067428350796, - "grad_norm": 0.5136094093322754, - "learning_rate": 4.605e-05, - "loss": 0.5516, - "step": 5134 - }, - { - "epoch": 0.18024184348619668, - "grad_norm": 0.44012510776519775, - "learning_rate": 4.604814814814815e-05, - "loss": 0.6177, - "step": 5135 - }, - { - "epoch": 0.18027694413731374, - "grad_norm": 0.5142960548400879, - "learning_rate": 4.60462962962963e-05, - "loss": 0.5108, - "step": 5136 - }, - { - "epoch": 0.18031204478843083, - "grad_norm": 0.5474504232406616, - "learning_rate": 4.6044444444444445e-05, - "loss": 0.508, - "step": 5137 - }, - { - "epoch": 0.1803471454395479, - "grad_norm": 0.5399184823036194, - "learning_rate": 4.6042592592592595e-05, - "loss": 0.5076, - "step": 5138 - }, - { - "epoch": 0.18038224609066497, - "grad_norm": 0.5132362842559814, - "learning_rate": 4.6040740740740745e-05, - "loss": 0.3597, - "step": 5139 - }, - { - "epoch": 0.18041734674178206, - "grad_norm": 0.5220217108726501, - "learning_rate": 4.6038888888888895e-05, - "loss": 0.4562, - "step": 5140 - }, - { - "epoch": 0.18045244739289915, - "grad_norm": 0.5713862180709839, - "learning_rate": 4.603703703703704e-05, - "loss": 0.6036, - "step": 5141 - }, - { - "epoch": 0.1804875480440162, - "grad_norm": 0.3867602050304413, - "learning_rate": 4.603518518518519e-05, - "loss": 0.3722, - "step": 5142 - }, - { - "epoch": 0.1805226486951333, - "grad_norm": 0.4908551275730133, - "learning_rate": 4.603333333333333e-05, - "loss": 0.4586, - "step": 5143 - }, - { - "epoch": 0.18055774934625038, - "grad_norm": 0.5520111322402954, - "learning_rate": 4.603148148148148e-05, - "loss": 0.4541, - "step": 5144 - }, - { - "epoch": 0.18059284999736744, - "grad_norm": 0.4415791630744934, - "learning_rate": 4.602962962962963e-05, - "loss": 0.4869, - "step": 5145 - }, - { - "epoch": 0.18062795064848453, - "grad_norm": 0.6346554756164551, - "learning_rate": 4.602777777777778e-05, - "loss": 0.5476, - "step": 5146 - }, - { - "epoch": 0.1806630512996016, - "grad_norm": 0.5357697606086731, - "learning_rate": 4.6025925925925926e-05, - "loss": 0.5596, - "step": 5147 - }, - { - "epoch": 0.1806981519507187, - "grad_norm": 0.49315083026885986, - "learning_rate": 4.6024074074074076e-05, - "loss": 0.5563, - "step": 5148 - }, - { - "epoch": 0.18073325260183576, - "grad_norm": 0.5912092328071594, - "learning_rate": 4.602222222222222e-05, - "loss": 0.5676, - "step": 5149 - }, - { - "epoch": 0.18076835325295285, - "grad_norm": 0.49521371722221375, - "learning_rate": 4.6020370370370376e-05, - "loss": 0.4057, - "step": 5150 - }, - { - "epoch": 0.18080345390406993, - "grad_norm": 0.5098423957824707, - "learning_rate": 4.601851851851852e-05, - "loss": 0.4793, - "step": 5151 - }, - { - "epoch": 0.180838554555187, - "grad_norm": 0.6224954724311829, - "learning_rate": 4.601666666666667e-05, - "loss": 0.5446, - "step": 5152 - }, - { - "epoch": 0.18087365520630408, - "grad_norm": 0.5562158226966858, - "learning_rate": 4.601481481481482e-05, - "loss": 0.5269, - "step": 5153 - }, - { - "epoch": 0.18090875585742117, - "grad_norm": 0.5503278374671936, - "learning_rate": 4.601296296296296e-05, - "loss": 0.398, - "step": 5154 - }, - { - "epoch": 0.18094385650853823, - "grad_norm": 0.6221495866775513, - "learning_rate": 4.601111111111111e-05, - "loss": 0.6141, - "step": 5155 - }, - { - "epoch": 0.1809789571596553, - "grad_norm": 0.6576593518257141, - "learning_rate": 4.600925925925926e-05, - "loss": 0.6298, - "step": 5156 - }, - { - "epoch": 0.1810140578107724, - "grad_norm": 0.482134073972702, - "learning_rate": 4.600740740740741e-05, - "loss": 0.5035, - "step": 5157 - }, - { - "epoch": 0.18104915846188946, - "grad_norm": 0.4832969903945923, - "learning_rate": 4.6005555555555556e-05, - "loss": 0.4588, - "step": 5158 - }, - { - "epoch": 0.18108425911300655, - "grad_norm": 0.632616400718689, - "learning_rate": 4.6003703703703707e-05, - "loss": 0.6669, - "step": 5159 - }, - { - "epoch": 0.18111935976412363, - "grad_norm": 0.4960416853427887, - "learning_rate": 4.600185185185185e-05, - "loss": 0.5766, - "step": 5160 - }, - { - "epoch": 0.1811544604152407, - "grad_norm": 0.5242733359336853, - "learning_rate": 4.600000000000001e-05, - "loss": 0.5102, - "step": 5161 - }, - { - "epoch": 0.18118956106635778, - "grad_norm": 0.49044269323349, - "learning_rate": 4.599814814814815e-05, - "loss": 0.4914, - "step": 5162 - }, - { - "epoch": 0.18122466171747487, - "grad_norm": 0.45469316840171814, - "learning_rate": 4.59962962962963e-05, - "loss": 0.4473, - "step": 5163 - }, - { - "epoch": 0.18125976236859193, - "grad_norm": 0.436967670917511, - "learning_rate": 4.5994444444444444e-05, - "loss": 0.4365, - "step": 5164 - }, - { - "epoch": 0.181294863019709, - "grad_norm": 0.5346594452857971, - "learning_rate": 4.5992592592592594e-05, - "loss": 0.6637, - "step": 5165 - }, - { - "epoch": 0.1813299636708261, - "grad_norm": 0.48714783787727356, - "learning_rate": 4.5990740740740744e-05, - "loss": 0.4736, - "step": 5166 - }, - { - "epoch": 0.18136506432194316, - "grad_norm": 0.538822591304779, - "learning_rate": 4.5988888888888894e-05, - "loss": 0.4851, - "step": 5167 - }, - { - "epoch": 0.18140016497306025, - "grad_norm": 0.4984104335308075, - "learning_rate": 4.598703703703704e-05, - "loss": 0.4532, - "step": 5168 - }, - { - "epoch": 0.18143526562417733, - "grad_norm": 0.5589075684547424, - "learning_rate": 4.598518518518519e-05, - "loss": 0.4003, - "step": 5169 - }, - { - "epoch": 0.18147036627529442, - "grad_norm": 0.5746286511421204, - "learning_rate": 4.598333333333333e-05, - "loss": 0.4184, - "step": 5170 - }, - { - "epoch": 0.18150546692641148, - "grad_norm": 0.5074183940887451, - "learning_rate": 4.598148148148148e-05, - "loss": 0.6145, - "step": 5171 - }, - { - "epoch": 0.18154056757752857, - "grad_norm": 0.6810674071311951, - "learning_rate": 4.597962962962963e-05, - "loss": 0.5791, - "step": 5172 - }, - { - "epoch": 0.18157566822864565, - "grad_norm": 0.5161910057067871, - "learning_rate": 4.597777777777778e-05, - "loss": 0.5514, - "step": 5173 - }, - { - "epoch": 0.1816107688797627, - "grad_norm": 0.6651344895362854, - "learning_rate": 4.597592592592593e-05, - "loss": 0.6256, - "step": 5174 - }, - { - "epoch": 0.1816458695308798, - "grad_norm": 0.5160751342773438, - "learning_rate": 4.5974074074074074e-05, - "loss": 0.4242, - "step": 5175 - }, - { - "epoch": 0.1816809701819969, - "grad_norm": 0.6150898337364197, - "learning_rate": 4.5972222222222225e-05, - "loss": 0.4394, - "step": 5176 - }, - { - "epoch": 0.18171607083311395, - "grad_norm": 0.48181286454200745, - "learning_rate": 4.5970370370370375e-05, - "loss": 0.4937, - "step": 5177 - }, - { - "epoch": 0.18175117148423103, - "grad_norm": 0.5488799810409546, - "learning_rate": 4.5968518518518525e-05, - "loss": 0.5788, - "step": 5178 - }, - { - "epoch": 0.18178627213534812, - "grad_norm": 0.45054829120635986, - "learning_rate": 4.596666666666667e-05, - "loss": 0.4664, - "step": 5179 - }, - { - "epoch": 0.18182137278646518, - "grad_norm": 0.556641697883606, - "learning_rate": 4.596481481481482e-05, - "loss": 0.6293, - "step": 5180 - }, - { - "epoch": 0.18185647343758227, - "grad_norm": 0.4587582051753998, - "learning_rate": 4.596296296296296e-05, - "loss": 0.4583, - "step": 5181 - }, - { - "epoch": 0.18189157408869935, - "grad_norm": 0.5028219223022461, - "learning_rate": 4.596111111111112e-05, - "loss": 0.5794, - "step": 5182 - }, - { - "epoch": 0.1819266747398164, - "grad_norm": 0.5214095711708069, - "learning_rate": 4.595925925925926e-05, - "loss": 0.4811, - "step": 5183 - }, - { - "epoch": 0.1819617753909335, - "grad_norm": 0.6346743702888489, - "learning_rate": 4.595740740740741e-05, - "loss": 0.5912, - "step": 5184 - }, - { - "epoch": 0.1819968760420506, - "grad_norm": 0.5015432238578796, - "learning_rate": 4.5955555555555555e-05, - "loss": 0.4173, - "step": 5185 - }, - { - "epoch": 0.18203197669316765, - "grad_norm": 0.4276335835456848, - "learning_rate": 4.5953703703703705e-05, - "loss": 0.4682, - "step": 5186 - }, - { - "epoch": 0.18206707734428473, - "grad_norm": 0.471682071685791, - "learning_rate": 4.595185185185185e-05, - "loss": 0.6023, - "step": 5187 - }, - { - "epoch": 0.18210217799540182, - "grad_norm": 0.5936340689659119, - "learning_rate": 4.5950000000000006e-05, - "loss": 0.4983, - "step": 5188 - }, - { - "epoch": 0.18213727864651888, - "grad_norm": 0.617203950881958, - "learning_rate": 4.594814814814815e-05, - "loss": 0.5924, - "step": 5189 - }, - { - "epoch": 0.18217237929763597, - "grad_norm": 0.6075727343559265, - "learning_rate": 4.59462962962963e-05, - "loss": 0.6065, - "step": 5190 - }, - { - "epoch": 0.18220747994875305, - "grad_norm": 0.6297062635421753, - "learning_rate": 4.594444444444444e-05, - "loss": 0.4607, - "step": 5191 - }, - { - "epoch": 0.18224258059987014, - "grad_norm": 0.5877411365509033, - "learning_rate": 4.594259259259259e-05, - "loss": 0.5439, - "step": 5192 - }, - { - "epoch": 0.1822776812509872, - "grad_norm": 0.6075952649116516, - "learning_rate": 4.594074074074074e-05, - "loss": 0.4912, - "step": 5193 - }, - { - "epoch": 0.1823127819021043, - "grad_norm": 0.538508951663971, - "learning_rate": 4.593888888888889e-05, - "loss": 0.4494, - "step": 5194 - }, - { - "epoch": 0.18234788255322137, - "grad_norm": 0.5021909475326538, - "learning_rate": 4.593703703703704e-05, - "loss": 0.5308, - "step": 5195 - }, - { - "epoch": 0.18238298320433843, - "grad_norm": 0.5260108709335327, - "learning_rate": 4.5935185185185186e-05, - "loss": 0.5993, - "step": 5196 - }, - { - "epoch": 0.18241808385545552, - "grad_norm": 0.562450647354126, - "learning_rate": 4.5933333333333336e-05, - "loss": 0.4516, - "step": 5197 - }, - { - "epoch": 0.1824531845065726, - "grad_norm": 0.5112840533256531, - "learning_rate": 4.5931481481481486e-05, - "loss": 0.5491, - "step": 5198 - }, - { - "epoch": 0.18248828515768967, - "grad_norm": 0.5372498631477356, - "learning_rate": 4.5929629629629636e-05, - "loss": 0.5276, - "step": 5199 - }, - { - "epoch": 0.18252338580880675, - "grad_norm": 0.5702542066574097, - "learning_rate": 4.592777777777778e-05, - "loss": 0.6051, - "step": 5200 - }, - { - "epoch": 0.18255848645992384, - "grad_norm": 0.5240761041641235, - "learning_rate": 4.592592592592593e-05, - "loss": 0.4207, - "step": 5201 - }, - { - "epoch": 0.1825935871110409, - "grad_norm": 0.4967013895511627, - "learning_rate": 4.592407407407407e-05, - "loss": 0.5102, - "step": 5202 - }, - { - "epoch": 0.182628687762158, - "grad_norm": 0.5433261394500732, - "learning_rate": 4.592222222222222e-05, - "loss": 0.5885, - "step": 5203 - }, - { - "epoch": 0.18266378841327507, - "grad_norm": 0.5693183541297913, - "learning_rate": 4.5920370370370373e-05, - "loss": 0.5717, - "step": 5204 - }, - { - "epoch": 0.18269888906439213, - "grad_norm": 0.5645524859428406, - "learning_rate": 4.5918518518518524e-05, - "loss": 0.6228, - "step": 5205 - }, - { - "epoch": 0.18273398971550922, - "grad_norm": 0.4743250906467438, - "learning_rate": 4.591666666666667e-05, - "loss": 0.4846, - "step": 5206 - }, - { - "epoch": 0.1827690903666263, - "grad_norm": 0.5825350880622864, - "learning_rate": 4.591481481481482e-05, - "loss": 0.5921, - "step": 5207 - }, - { - "epoch": 0.18280419101774337, - "grad_norm": 0.46121230721473694, - "learning_rate": 4.591296296296296e-05, - "loss": 0.5113, - "step": 5208 - }, - { - "epoch": 0.18283929166886045, - "grad_norm": 0.46180805563926697, - "learning_rate": 4.591111111111112e-05, - "loss": 0.4414, - "step": 5209 - }, - { - "epoch": 0.18287439231997754, - "grad_norm": 0.5849237442016602, - "learning_rate": 4.590925925925926e-05, - "loss": 0.4688, - "step": 5210 - }, - { - "epoch": 0.1829094929710946, - "grad_norm": 0.5803527235984802, - "learning_rate": 4.590740740740741e-05, - "loss": 0.5209, - "step": 5211 - }, - { - "epoch": 0.1829445936222117, - "grad_norm": 0.46605032682418823, - "learning_rate": 4.5905555555555554e-05, - "loss": 0.5298, - "step": 5212 - }, - { - "epoch": 0.18297969427332877, - "grad_norm": 0.5168383121490479, - "learning_rate": 4.5903703703703704e-05, - "loss": 0.5233, - "step": 5213 - }, - { - "epoch": 0.18301479492444586, - "grad_norm": 0.4742446541786194, - "learning_rate": 4.5901851851851854e-05, - "loss": 0.4623, - "step": 5214 - }, - { - "epoch": 0.18304989557556292, - "grad_norm": 0.5306620597839355, - "learning_rate": 4.5900000000000004e-05, - "loss": 0.5361, - "step": 5215 - }, - { - "epoch": 0.18308499622668, - "grad_norm": 0.4566444456577301, - "learning_rate": 4.5898148148148154e-05, - "loss": 0.5315, - "step": 5216 - }, - { - "epoch": 0.1831200968777971, - "grad_norm": 0.6283822059631348, - "learning_rate": 4.58962962962963e-05, - "loss": 0.3896, - "step": 5217 - }, - { - "epoch": 0.18315519752891415, - "grad_norm": 0.5860961079597473, - "learning_rate": 4.589444444444445e-05, - "loss": 0.5047, - "step": 5218 - }, - { - "epoch": 0.18319029818003124, - "grad_norm": 0.5886569023132324, - "learning_rate": 4.589259259259259e-05, - "loss": 0.5645, - "step": 5219 - }, - { - "epoch": 0.18322539883114833, - "grad_norm": 0.7847156524658203, - "learning_rate": 4.589074074074075e-05, - "loss": 0.4499, - "step": 5220 - }, - { - "epoch": 0.1832604994822654, - "grad_norm": 0.6349309682846069, - "learning_rate": 4.588888888888889e-05, - "loss": 0.4469, - "step": 5221 - }, - { - "epoch": 0.18329560013338247, - "grad_norm": 0.5140974521636963, - "learning_rate": 4.588703703703704e-05, - "loss": 0.5448, - "step": 5222 - }, - { - "epoch": 0.18333070078449956, - "grad_norm": 0.4787903428077698, - "learning_rate": 4.5885185185185185e-05, - "loss": 0.5771, - "step": 5223 - }, - { - "epoch": 0.18336580143561662, - "grad_norm": 0.5916056036949158, - "learning_rate": 4.5883333333333335e-05, - "loss": 0.5012, - "step": 5224 - }, - { - "epoch": 0.1834009020867337, - "grad_norm": 0.4402168393135071, - "learning_rate": 4.5881481481481485e-05, - "loss": 0.4315, - "step": 5225 - }, - { - "epoch": 0.1834360027378508, - "grad_norm": 0.7791626453399658, - "learning_rate": 4.5879629629629635e-05, - "loss": 0.627, - "step": 5226 - }, - { - "epoch": 0.18347110338896785, - "grad_norm": 1.9828147888183594, - "learning_rate": 4.587777777777778e-05, - "loss": 0.6252, - "step": 5227 - }, - { - "epoch": 0.18350620404008494, - "grad_norm": 0.6646904349327087, - "learning_rate": 4.587592592592593e-05, - "loss": 0.6126, - "step": 5228 - }, - { - "epoch": 0.18354130469120203, - "grad_norm": 0.5254188776016235, - "learning_rate": 4.587407407407407e-05, - "loss": 0.4529, - "step": 5229 - }, - { - "epoch": 0.1835764053423191, - "grad_norm": 0.6316901445388794, - "learning_rate": 4.587222222222222e-05, - "loss": 0.467, - "step": 5230 - }, - { - "epoch": 0.18361150599343617, - "grad_norm": 0.6235019564628601, - "learning_rate": 4.587037037037037e-05, - "loss": 0.5265, - "step": 5231 - }, - { - "epoch": 0.18364660664455326, - "grad_norm": 0.560444712638855, - "learning_rate": 4.586851851851852e-05, - "loss": 0.3186, - "step": 5232 - }, - { - "epoch": 0.18368170729567032, - "grad_norm": 0.5589517951011658, - "learning_rate": 4.5866666666666666e-05, - "loss": 0.5002, - "step": 5233 - }, - { - "epoch": 0.1837168079467874, - "grad_norm": 0.5786103010177612, - "learning_rate": 4.5864814814814816e-05, - "loss": 0.408, - "step": 5234 - }, - { - "epoch": 0.1837519085979045, - "grad_norm": 1.096904993057251, - "learning_rate": 4.5862962962962966e-05, - "loss": 0.6208, - "step": 5235 - }, - { - "epoch": 0.18378700924902158, - "grad_norm": 0.5642610192298889, - "learning_rate": 4.5861111111111116e-05, - "loss": 0.5687, - "step": 5236 - }, - { - "epoch": 0.18382210990013864, - "grad_norm": 0.6044822931289673, - "learning_rate": 4.5859259259259266e-05, - "loss": 0.4713, - "step": 5237 - }, - { - "epoch": 0.18385721055125573, - "grad_norm": 0.6504522562026978, - "learning_rate": 4.585740740740741e-05, - "loss": 0.5274, - "step": 5238 - }, - { - "epoch": 0.18389231120237282, - "grad_norm": 1.0684798955917358, - "learning_rate": 4.585555555555556e-05, - "loss": 0.4546, - "step": 5239 - }, - { - "epoch": 0.18392741185348987, - "grad_norm": 0.6178469657897949, - "learning_rate": 4.58537037037037e-05, - "loss": 0.5283, - "step": 5240 - }, - { - "epoch": 0.18396251250460696, - "grad_norm": 0.491737425327301, - "learning_rate": 4.585185185185185e-05, - "loss": 0.4931, - "step": 5241 - }, - { - "epoch": 0.18399761315572405, - "grad_norm": 0.577655017375946, - "learning_rate": 4.585e-05, - "loss": 0.4762, - "step": 5242 - }, - { - "epoch": 0.1840327138068411, - "grad_norm": 0.7517504096031189, - "learning_rate": 4.584814814814815e-05, - "loss": 0.5667, - "step": 5243 - }, - { - "epoch": 0.1840678144579582, - "grad_norm": 0.6957834362983704, - "learning_rate": 4.5846296296296297e-05, - "loss": 0.4552, - "step": 5244 - }, - { - "epoch": 0.18410291510907528, - "grad_norm": 0.4572162628173828, - "learning_rate": 4.584444444444445e-05, - "loss": 0.3789, - "step": 5245 - }, - { - "epoch": 0.18413801576019234, - "grad_norm": 0.6921077370643616, - "learning_rate": 4.584259259259259e-05, - "loss": 0.4868, - "step": 5246 - }, - { - "epoch": 0.18417311641130943, - "grad_norm": 0.5146421790122986, - "learning_rate": 4.584074074074075e-05, - "loss": 0.3807, - "step": 5247 - }, - { - "epoch": 0.18420821706242652, - "grad_norm": 0.5364363193511963, - "learning_rate": 4.583888888888889e-05, - "loss": 0.492, - "step": 5248 - }, - { - "epoch": 0.18424331771354358, - "grad_norm": 0.5151453614234924, - "learning_rate": 4.583703703703704e-05, - "loss": 0.6044, - "step": 5249 - }, - { - "epoch": 0.18427841836466066, - "grad_norm": 0.4911752939224243, - "learning_rate": 4.5835185185185184e-05, - "loss": 0.5245, - "step": 5250 - }, - { - "epoch": 0.18431351901577775, - "grad_norm": 0.49709922075271606, - "learning_rate": 4.5833333333333334e-05, - "loss": 0.426, - "step": 5251 - }, - { - "epoch": 0.1843486196668948, - "grad_norm": 0.9557886123657227, - "learning_rate": 4.5831481481481484e-05, - "loss": 0.5995, - "step": 5252 - }, - { - "epoch": 0.1843837203180119, - "grad_norm": 0.590147078037262, - "learning_rate": 4.5829629629629634e-05, - "loss": 0.5666, - "step": 5253 - }, - { - "epoch": 0.18441882096912898, - "grad_norm": 0.5549790859222412, - "learning_rate": 4.582777777777778e-05, - "loss": 0.6321, - "step": 5254 - }, - { - "epoch": 0.18445392162024604, - "grad_norm": 0.5109682679176331, - "learning_rate": 4.582592592592593e-05, - "loss": 0.5725, - "step": 5255 - }, - { - "epoch": 0.18448902227136313, - "grad_norm": 0.5433817505836487, - "learning_rate": 4.582407407407408e-05, - "loss": 0.5815, - "step": 5256 - }, - { - "epoch": 0.18452412292248022, - "grad_norm": 0.649664044380188, - "learning_rate": 4.582222222222222e-05, - "loss": 0.5543, - "step": 5257 - }, - { - "epoch": 0.1845592235735973, - "grad_norm": 0.505793035030365, - "learning_rate": 4.582037037037038e-05, - "loss": 0.3908, - "step": 5258 - }, - { - "epoch": 0.18459432422471436, - "grad_norm": 0.53371661901474, - "learning_rate": 4.581851851851852e-05, - "loss": 0.4849, - "step": 5259 - }, - { - "epoch": 0.18462942487583145, - "grad_norm": 0.5261498689651489, - "learning_rate": 4.581666666666667e-05, - "loss": 0.4959, - "step": 5260 - }, - { - "epoch": 0.18466452552694854, - "grad_norm": 0.5623217821121216, - "learning_rate": 4.5814814814814815e-05, - "loss": 0.4894, - "step": 5261 - }, - { - "epoch": 0.1846996261780656, - "grad_norm": 0.550126850605011, - "learning_rate": 4.5812962962962965e-05, - "loss": 0.6344, - "step": 5262 - }, - { - "epoch": 0.18473472682918268, - "grad_norm": 0.6053337454795837, - "learning_rate": 4.5811111111111115e-05, - "loss": 0.584, - "step": 5263 - }, - { - "epoch": 0.18476982748029977, - "grad_norm": 0.5636712312698364, - "learning_rate": 4.5809259259259265e-05, - "loss": 0.5766, - "step": 5264 - }, - { - "epoch": 0.18480492813141683, - "grad_norm": 0.5781726241111755, - "learning_rate": 4.580740740740741e-05, - "loss": 0.5192, - "step": 5265 - }, - { - "epoch": 0.18484002878253392, - "grad_norm": 0.5004985928535461, - "learning_rate": 4.580555555555556e-05, - "loss": 0.4496, - "step": 5266 - }, - { - "epoch": 0.184875129433651, - "grad_norm": 0.5075162649154663, - "learning_rate": 4.58037037037037e-05, - "loss": 0.5428, - "step": 5267 - }, - { - "epoch": 0.18491023008476806, - "grad_norm": 0.6631655097007751, - "learning_rate": 4.580185185185185e-05, - "loss": 0.5581, - "step": 5268 - }, - { - "epoch": 0.18494533073588515, - "grad_norm": 0.5546512007713318, - "learning_rate": 4.58e-05, - "loss": 0.5739, - "step": 5269 - }, - { - "epoch": 0.18498043138700224, - "grad_norm": 0.5779672861099243, - "learning_rate": 4.579814814814815e-05, - "loss": 0.5107, - "step": 5270 - }, - { - "epoch": 0.1850155320381193, - "grad_norm": 0.509425163269043, - "learning_rate": 4.5796296296296295e-05, - "loss": 0.3936, - "step": 5271 - }, - { - "epoch": 0.18505063268923638, - "grad_norm": 0.6669484376907349, - "learning_rate": 4.5794444444444446e-05, - "loss": 0.4936, - "step": 5272 - }, - { - "epoch": 0.18508573334035347, - "grad_norm": 0.6227346658706665, - "learning_rate": 4.5792592592592596e-05, - "loss": 0.5996, - "step": 5273 - }, - { - "epoch": 0.18512083399147053, - "grad_norm": 0.5283015966415405, - "learning_rate": 4.5790740740740746e-05, - "loss": 0.455, - "step": 5274 - }, - { - "epoch": 0.18515593464258762, - "grad_norm": 0.5645913481712341, - "learning_rate": 4.578888888888889e-05, - "loss": 0.4681, - "step": 5275 - }, - { - "epoch": 0.1851910352937047, - "grad_norm": 0.5757498741149902, - "learning_rate": 4.578703703703704e-05, - "loss": 0.5025, - "step": 5276 - }, - { - "epoch": 0.18522613594482176, - "grad_norm": 0.5448203086853027, - "learning_rate": 4.578518518518519e-05, - "loss": 0.5261, - "step": 5277 - }, - { - "epoch": 0.18526123659593885, - "grad_norm": 0.476387619972229, - "learning_rate": 4.578333333333333e-05, - "loss": 0.4314, - "step": 5278 - }, - { - "epoch": 0.18529633724705594, - "grad_norm": 0.5617332458496094, - "learning_rate": 4.578148148148149e-05, - "loss": 0.5531, - "step": 5279 - }, - { - "epoch": 0.18533143789817302, - "grad_norm": 0.44838282465934753, - "learning_rate": 4.577962962962963e-05, - "loss": 0.3612, - "step": 5280 - }, - { - "epoch": 0.18536653854929008, - "grad_norm": 0.6188914179801941, - "learning_rate": 4.577777777777778e-05, - "loss": 0.6628, - "step": 5281 - }, - { - "epoch": 0.18540163920040717, - "grad_norm": 0.5324870944023132, - "learning_rate": 4.5775925925925926e-05, - "loss": 0.5588, - "step": 5282 - }, - { - "epoch": 0.18543673985152426, - "grad_norm": 0.5131122469902039, - "learning_rate": 4.5774074074074076e-05, - "loss": 0.6178, - "step": 5283 - }, - { - "epoch": 0.18547184050264132, - "grad_norm": 0.6225293278694153, - "learning_rate": 4.577222222222222e-05, - "loss": 0.6128, - "step": 5284 - }, - { - "epoch": 0.1855069411537584, - "grad_norm": 0.4824599027633667, - "learning_rate": 4.577037037037038e-05, - "loss": 0.4699, - "step": 5285 - }, - { - "epoch": 0.1855420418048755, - "grad_norm": 0.624876856803894, - "learning_rate": 4.576851851851852e-05, - "loss": 0.5714, - "step": 5286 - }, - { - "epoch": 0.18557714245599255, - "grad_norm": 0.4657555818557739, - "learning_rate": 4.576666666666667e-05, - "loss": 0.5087, - "step": 5287 - }, - { - "epoch": 0.18561224310710964, - "grad_norm": 0.458682656288147, - "learning_rate": 4.5764814814814813e-05, - "loss": 0.5364, - "step": 5288 - }, - { - "epoch": 0.18564734375822672, - "grad_norm": 0.4068421423435211, - "learning_rate": 4.5762962962962964e-05, - "loss": 0.4278, - "step": 5289 - }, - { - "epoch": 0.18568244440934378, - "grad_norm": 0.5435271859169006, - "learning_rate": 4.5761111111111114e-05, - "loss": 0.385, - "step": 5290 - }, - { - "epoch": 0.18571754506046087, - "grad_norm": 0.5916385054588318, - "learning_rate": 4.5759259259259264e-05, - "loss": 0.5114, - "step": 5291 - }, - { - "epoch": 0.18575264571157796, - "grad_norm": 0.8342806696891785, - "learning_rate": 4.575740740740741e-05, - "loss": 0.608, - "step": 5292 - }, - { - "epoch": 0.18578774636269502, - "grad_norm": 0.4878314435482025, - "learning_rate": 4.575555555555556e-05, - "loss": 0.4784, - "step": 5293 - }, - { - "epoch": 0.1858228470138121, - "grad_norm": 0.5681031346321106, - "learning_rate": 4.575370370370371e-05, - "loss": 0.6043, - "step": 5294 - }, - { - "epoch": 0.1858579476649292, - "grad_norm": 0.4856996238231659, - "learning_rate": 4.575185185185185e-05, - "loss": 0.4947, - "step": 5295 - }, - { - "epoch": 0.18589304831604625, - "grad_norm": 0.5008072853088379, - "learning_rate": 4.575e-05, - "loss": 0.5048, - "step": 5296 - }, - { - "epoch": 0.18592814896716334, - "grad_norm": 0.42581385374069214, - "learning_rate": 4.574814814814815e-05, - "loss": 0.5637, - "step": 5297 - }, - { - "epoch": 0.18596324961828042, - "grad_norm": 0.5159610509872437, - "learning_rate": 4.57462962962963e-05, - "loss": 0.4105, - "step": 5298 - }, - { - "epoch": 0.1859983502693975, - "grad_norm": 0.5770937204360962, - "learning_rate": 4.5744444444444444e-05, - "loss": 0.5128, - "step": 5299 - }, - { - "epoch": 0.18603345092051457, - "grad_norm": 0.4637194275856018, - "learning_rate": 4.5742592592592594e-05, - "loss": 0.4812, - "step": 5300 - }, - { - "epoch": 0.18606855157163166, - "grad_norm": 0.5558308959007263, - "learning_rate": 4.5740740740740745e-05, - "loss": 0.4791, - "step": 5301 - }, - { - "epoch": 0.18610365222274874, - "grad_norm": 0.4802439212799072, - "learning_rate": 4.5738888888888895e-05, - "loss": 0.4789, - "step": 5302 - }, - { - "epoch": 0.1861387528738658, - "grad_norm": 0.5245698690414429, - "learning_rate": 4.573703703703704e-05, - "loss": 0.4371, - "step": 5303 - }, - { - "epoch": 0.1861738535249829, - "grad_norm": 0.5331368446350098, - "learning_rate": 4.573518518518519e-05, - "loss": 0.446, - "step": 5304 - }, - { - "epoch": 0.18620895417609998, - "grad_norm": 0.569685697555542, - "learning_rate": 4.573333333333333e-05, - "loss": 0.625, - "step": 5305 - }, - { - "epoch": 0.18624405482721704, - "grad_norm": 0.6130583882331848, - "learning_rate": 4.573148148148149e-05, - "loss": 0.5024, - "step": 5306 - }, - { - "epoch": 0.18627915547833412, - "grad_norm": 0.5376291275024414, - "learning_rate": 4.572962962962963e-05, - "loss": 0.5871, - "step": 5307 - }, - { - "epoch": 0.1863142561294512, - "grad_norm": 0.6599137187004089, - "learning_rate": 4.572777777777778e-05, - "loss": 0.5052, - "step": 5308 - }, - { - "epoch": 0.18634935678056827, - "grad_norm": 0.5420133471488953, - "learning_rate": 4.5725925925925925e-05, - "loss": 0.3855, - "step": 5309 - }, - { - "epoch": 0.18638445743168536, - "grad_norm": 0.5093613266944885, - "learning_rate": 4.5724074074074075e-05, - "loss": 0.5154, - "step": 5310 - }, - { - "epoch": 0.18641955808280244, - "grad_norm": 0.6081554293632507, - "learning_rate": 4.572222222222222e-05, - "loss": 0.5052, - "step": 5311 - }, - { - "epoch": 0.1864546587339195, - "grad_norm": 0.5812437534332275, - "learning_rate": 4.5720370370370375e-05, - "loss": 0.4867, - "step": 5312 - }, - { - "epoch": 0.1864897593850366, - "grad_norm": 0.39270782470703125, - "learning_rate": 4.571851851851852e-05, - "loss": 0.4475, - "step": 5313 - }, - { - "epoch": 0.18652486003615368, - "grad_norm": 0.5836930871009827, - "learning_rate": 4.571666666666667e-05, - "loss": 0.5729, - "step": 5314 - }, - { - "epoch": 0.18655996068727074, - "grad_norm": 0.5214181542396545, - "learning_rate": 4.571481481481482e-05, - "loss": 0.3433, - "step": 5315 - }, - { - "epoch": 0.18659506133838782, - "grad_norm": 0.5009456872940063, - "learning_rate": 4.571296296296296e-05, - "loss": 0.5537, - "step": 5316 - }, - { - "epoch": 0.1866301619895049, - "grad_norm": 0.46775689721107483, - "learning_rate": 4.571111111111111e-05, - "loss": 0.5167, - "step": 5317 - }, - { - "epoch": 0.18666526264062197, - "grad_norm": 0.5992898344993591, - "learning_rate": 4.570925925925926e-05, - "loss": 0.5369, - "step": 5318 - }, - { - "epoch": 0.18670036329173906, - "grad_norm": 0.5419416427612305, - "learning_rate": 4.570740740740741e-05, - "loss": 0.6217, - "step": 5319 - }, - { - "epoch": 0.18673546394285614, - "grad_norm": 0.5391594171524048, - "learning_rate": 4.5705555555555556e-05, - "loss": 0.5331, - "step": 5320 - }, - { - "epoch": 0.18677056459397323, - "grad_norm": 0.5137917995452881, - "learning_rate": 4.5703703703703706e-05, - "loss": 0.4805, - "step": 5321 - }, - { - "epoch": 0.1868056652450903, - "grad_norm": 0.4972102642059326, - "learning_rate": 4.5701851851851856e-05, - "loss": 0.3937, - "step": 5322 - }, - { - "epoch": 0.18684076589620738, - "grad_norm": 0.6093249320983887, - "learning_rate": 4.5700000000000006e-05, - "loss": 0.5191, - "step": 5323 - }, - { - "epoch": 0.18687586654732447, - "grad_norm": 0.48395979404449463, - "learning_rate": 4.569814814814815e-05, - "loss": 0.5049, - "step": 5324 - }, - { - "epoch": 0.18691096719844152, - "grad_norm": 0.5034534335136414, - "learning_rate": 4.56962962962963e-05, - "loss": 0.4328, - "step": 5325 - }, - { - "epoch": 0.1869460678495586, - "grad_norm": 0.5354268550872803, - "learning_rate": 4.569444444444444e-05, - "loss": 0.4598, - "step": 5326 - }, - { - "epoch": 0.1869811685006757, - "grad_norm": 0.517798662185669, - "learning_rate": 4.569259259259259e-05, - "loss": 0.6046, - "step": 5327 - }, - { - "epoch": 0.18701626915179276, - "grad_norm": 0.5419031977653503, - "learning_rate": 4.569074074074074e-05, - "loss": 0.4816, - "step": 5328 - }, - { - "epoch": 0.18705136980290984, - "grad_norm": 0.4765770435333252, - "learning_rate": 4.5688888888888893e-05, - "loss": 0.411, - "step": 5329 - }, - { - "epoch": 0.18708647045402693, - "grad_norm": 0.49629443883895874, - "learning_rate": 4.568703703703704e-05, - "loss": 0.5193, - "step": 5330 - }, - { - "epoch": 0.187121571105144, - "grad_norm": 0.4769720733165741, - "learning_rate": 4.568518518518519e-05, - "loss": 0.5057, - "step": 5331 - }, - { - "epoch": 0.18715667175626108, - "grad_norm": 0.5970495343208313, - "learning_rate": 4.568333333333333e-05, - "loss": 0.4513, - "step": 5332 - }, - { - "epoch": 0.18719177240737817, - "grad_norm": 0.5708695650100708, - "learning_rate": 4.568148148148149e-05, - "loss": 0.508, - "step": 5333 - }, - { - "epoch": 0.18722687305849522, - "grad_norm": 0.5279495716094971, - "learning_rate": 4.567962962962963e-05, - "loss": 0.4275, - "step": 5334 - }, - { - "epoch": 0.1872619737096123, - "grad_norm": 0.5154440999031067, - "learning_rate": 4.567777777777778e-05, - "loss": 0.5185, - "step": 5335 - }, - { - "epoch": 0.1872970743607294, - "grad_norm": 0.4174746870994568, - "learning_rate": 4.567592592592593e-05, - "loss": 0.4972, - "step": 5336 - }, - { - "epoch": 0.18733217501184646, - "grad_norm": 0.5170007944107056, - "learning_rate": 4.5674074074074074e-05, - "loss": 0.4277, - "step": 5337 - }, - { - "epoch": 0.18736727566296354, - "grad_norm": 0.5268702507019043, - "learning_rate": 4.5672222222222224e-05, - "loss": 0.4816, - "step": 5338 - }, - { - "epoch": 0.18740237631408063, - "grad_norm": 0.5122681260108948, - "learning_rate": 4.5670370370370374e-05, - "loss": 0.584, - "step": 5339 - }, - { - "epoch": 0.1874374769651977, - "grad_norm": 0.5529067516326904, - "learning_rate": 4.5668518518518524e-05, - "loss": 0.5405, - "step": 5340 - }, - { - "epoch": 0.18747257761631478, - "grad_norm": 0.5973703265190125, - "learning_rate": 4.566666666666667e-05, - "loss": 0.5834, - "step": 5341 - }, - { - "epoch": 0.18750767826743187, - "grad_norm": 0.5253664255142212, - "learning_rate": 4.566481481481482e-05, - "loss": 0.5818, - "step": 5342 - }, - { - "epoch": 0.18754277891854895, - "grad_norm": 0.47205883264541626, - "learning_rate": 4.566296296296296e-05, - "loss": 0.4517, - "step": 5343 - }, - { - "epoch": 0.187577879569666, - "grad_norm": 0.6738991141319275, - "learning_rate": 4.566111111111112e-05, - "loss": 0.4624, - "step": 5344 - }, - { - "epoch": 0.1876129802207831, - "grad_norm": 0.48539066314697266, - "learning_rate": 4.565925925925926e-05, - "loss": 0.5008, - "step": 5345 - }, - { - "epoch": 0.18764808087190019, - "grad_norm": 0.57154381275177, - "learning_rate": 4.565740740740741e-05, - "loss": 0.4574, - "step": 5346 - }, - { - "epoch": 0.18768318152301725, - "grad_norm": 0.5443056225776672, - "learning_rate": 4.5655555555555555e-05, - "loss": 0.558, - "step": 5347 - }, - { - "epoch": 0.18771828217413433, - "grad_norm": 0.5228602886199951, - "learning_rate": 4.5653703703703705e-05, - "loss": 0.5126, - "step": 5348 - }, - { - "epoch": 0.18775338282525142, - "grad_norm": 0.4965222477912903, - "learning_rate": 4.5651851851851855e-05, - "loss": 0.4387, - "step": 5349 - }, - { - "epoch": 0.18778848347636848, - "grad_norm": 0.5881155133247375, - "learning_rate": 4.5650000000000005e-05, - "loss": 0.5707, - "step": 5350 - }, - { - "epoch": 0.18782358412748557, - "grad_norm": 0.44466298818588257, - "learning_rate": 4.564814814814815e-05, - "loss": 0.5292, - "step": 5351 - }, - { - "epoch": 0.18785868477860265, - "grad_norm": 0.550258457660675, - "learning_rate": 4.56462962962963e-05, - "loss": 0.5291, - "step": 5352 - }, - { - "epoch": 0.1878937854297197, - "grad_norm": 0.47177815437316895, - "learning_rate": 4.564444444444444e-05, - "loss": 0.3991, - "step": 5353 - }, - { - "epoch": 0.1879288860808368, - "grad_norm": 0.5540080666542053, - "learning_rate": 4.564259259259259e-05, - "loss": 0.4639, - "step": 5354 - }, - { - "epoch": 0.18796398673195389, - "grad_norm": 0.5236107707023621, - "learning_rate": 4.564074074074074e-05, - "loss": 0.4753, - "step": 5355 - }, - { - "epoch": 0.18799908738307095, - "grad_norm": 0.5344201922416687, - "learning_rate": 4.563888888888889e-05, - "loss": 0.5203, - "step": 5356 - }, - { - "epoch": 0.18803418803418803, - "grad_norm": 0.5945436358451843, - "learning_rate": 4.563703703703704e-05, - "loss": 0.4411, - "step": 5357 - }, - { - "epoch": 0.18806928868530512, - "grad_norm": 0.5002194046974182, - "learning_rate": 4.5635185185185186e-05, - "loss": 0.5523, - "step": 5358 - }, - { - "epoch": 0.18810438933642218, - "grad_norm": 0.5505216717720032, - "learning_rate": 4.5633333333333336e-05, - "loss": 0.591, - "step": 5359 - }, - { - "epoch": 0.18813948998753927, - "grad_norm": 0.4827972948551178, - "learning_rate": 4.5631481481481486e-05, - "loss": 0.3907, - "step": 5360 - }, - { - "epoch": 0.18817459063865635, - "grad_norm": 0.43821296095848083, - "learning_rate": 4.5629629629629636e-05, - "loss": 0.576, - "step": 5361 - }, - { - "epoch": 0.1882096912897734, - "grad_norm": 0.5051521062850952, - "learning_rate": 4.562777777777778e-05, - "loss": 0.53, - "step": 5362 - }, - { - "epoch": 0.1882447919408905, - "grad_norm": 0.5579739809036255, - "learning_rate": 4.562592592592593e-05, - "loss": 0.4598, - "step": 5363 - }, - { - "epoch": 0.18827989259200759, - "grad_norm": 0.48059868812561035, - "learning_rate": 4.562407407407407e-05, - "loss": 0.4223, - "step": 5364 - }, - { - "epoch": 0.18831499324312467, - "grad_norm": 0.6416800618171692, - "learning_rate": 4.562222222222222e-05, - "loss": 0.5523, - "step": 5365 - }, - { - "epoch": 0.18835009389424173, - "grad_norm": 0.5358476638793945, - "learning_rate": 4.562037037037037e-05, - "loss": 0.6177, - "step": 5366 - }, - { - "epoch": 0.18838519454535882, - "grad_norm": 0.4339027404785156, - "learning_rate": 4.561851851851852e-05, - "loss": 0.4554, - "step": 5367 - }, - { - "epoch": 0.1884202951964759, - "grad_norm": 0.5598481297492981, - "learning_rate": 4.5616666666666666e-05, - "loss": 0.5077, - "step": 5368 - }, - { - "epoch": 0.18845539584759297, - "grad_norm": 0.4315343201160431, - "learning_rate": 4.5614814814814817e-05, - "loss": 0.3718, - "step": 5369 - }, - { - "epoch": 0.18849049649871005, - "grad_norm": 0.6835110187530518, - "learning_rate": 4.561296296296296e-05, - "loss": 0.5949, - "step": 5370 - }, - { - "epoch": 0.18852559714982714, - "grad_norm": 0.567533016204834, - "learning_rate": 4.561111111111112e-05, - "loss": 0.5616, - "step": 5371 - }, - { - "epoch": 0.1885606978009442, - "grad_norm": 0.7079283595085144, - "learning_rate": 4.560925925925926e-05, - "loss": 0.402, - "step": 5372 - }, - { - "epoch": 0.1885957984520613, - "grad_norm": 0.4897613525390625, - "learning_rate": 4.560740740740741e-05, - "loss": 0.4462, - "step": 5373 - }, - { - "epoch": 0.18863089910317837, - "grad_norm": 0.47389039397239685, - "learning_rate": 4.560555555555556e-05, - "loss": 0.4922, - "step": 5374 - }, - { - "epoch": 0.18866599975429543, - "grad_norm": 0.5152509808540344, - "learning_rate": 4.5603703703703704e-05, - "loss": 0.5178, - "step": 5375 - }, - { - "epoch": 0.18870110040541252, - "grad_norm": 0.46309927105903625, - "learning_rate": 4.5601851851851854e-05, - "loss": 0.4337, - "step": 5376 - }, - { - "epoch": 0.1887362010565296, - "grad_norm": 0.5611766576766968, - "learning_rate": 4.5600000000000004e-05, - "loss": 0.4975, - "step": 5377 - }, - { - "epoch": 0.18877130170764667, - "grad_norm": 0.49470260739326477, - "learning_rate": 4.5598148148148154e-05, - "loss": 0.5372, - "step": 5378 - }, - { - "epoch": 0.18880640235876375, - "grad_norm": 0.5196701288223267, - "learning_rate": 4.55962962962963e-05, - "loss": 0.5318, - "step": 5379 - }, - { - "epoch": 0.18884150300988084, - "grad_norm": 0.4779994785785675, - "learning_rate": 4.559444444444445e-05, - "loss": 0.4477, - "step": 5380 - }, - { - "epoch": 0.1888766036609979, - "grad_norm": 0.5960492491722107, - "learning_rate": 4.559259259259259e-05, - "loss": 0.528, - "step": 5381 - }, - { - "epoch": 0.188911704312115, - "grad_norm": 0.6168244481086731, - "learning_rate": 4.559074074074075e-05, - "loss": 0.6252, - "step": 5382 - }, - { - "epoch": 0.18894680496323207, - "grad_norm": 0.48513340950012207, - "learning_rate": 4.558888888888889e-05, - "loss": 0.4164, - "step": 5383 - }, - { - "epoch": 0.18898190561434913, - "grad_norm": 0.5500674843788147, - "learning_rate": 4.558703703703704e-05, - "loss": 0.5547, - "step": 5384 - }, - { - "epoch": 0.18901700626546622, - "grad_norm": 0.5232444405555725, - "learning_rate": 4.5585185185185184e-05, - "loss": 0.5459, - "step": 5385 - }, - { - "epoch": 0.1890521069165833, - "grad_norm": 0.5736244320869446, - "learning_rate": 4.5583333333333335e-05, - "loss": 0.4983, - "step": 5386 - }, - { - "epoch": 0.1890872075677004, - "grad_norm": 0.5178675055503845, - "learning_rate": 4.5581481481481485e-05, - "loss": 0.514, - "step": 5387 - }, - { - "epoch": 0.18912230821881745, - "grad_norm": 0.5222482681274414, - "learning_rate": 4.5579629629629635e-05, - "loss": 0.5494, - "step": 5388 - }, - { - "epoch": 0.18915740886993454, - "grad_norm": 0.425202339887619, - "learning_rate": 4.557777777777778e-05, - "loss": 0.4414, - "step": 5389 - }, - { - "epoch": 0.18919250952105163, - "grad_norm": 0.6058228015899658, - "learning_rate": 4.557592592592593e-05, - "loss": 0.4867, - "step": 5390 - }, - { - "epoch": 0.1892276101721687, - "grad_norm": 0.48388561606407166, - "learning_rate": 4.557407407407407e-05, - "loss": 0.5085, - "step": 5391 - }, - { - "epoch": 0.18926271082328577, - "grad_norm": 0.5626838207244873, - "learning_rate": 4.557222222222222e-05, - "loss": 0.5929, - "step": 5392 - }, - { - "epoch": 0.18929781147440286, - "grad_norm": 0.5962284803390503, - "learning_rate": 4.557037037037037e-05, - "loss": 0.5348, - "step": 5393 - }, - { - "epoch": 0.18933291212551992, - "grad_norm": 0.5407038927078247, - "learning_rate": 4.556851851851852e-05, - "loss": 0.5755, - "step": 5394 - }, - { - "epoch": 0.189368012776637, - "grad_norm": 0.5200050473213196, - "learning_rate": 4.556666666666667e-05, - "loss": 0.6349, - "step": 5395 - }, - { - "epoch": 0.1894031134277541, - "grad_norm": 0.5622861385345459, - "learning_rate": 4.5564814814814815e-05, - "loss": 0.4659, - "step": 5396 - }, - { - "epoch": 0.18943821407887115, - "grad_norm": 0.47796866297721863, - "learning_rate": 4.5562962962962965e-05, - "loss": 0.6007, - "step": 5397 - }, - { - "epoch": 0.18947331472998824, - "grad_norm": 0.49553173780441284, - "learning_rate": 4.5561111111111116e-05, - "loss": 0.5007, - "step": 5398 - }, - { - "epoch": 0.18950841538110533, - "grad_norm": 0.5760098099708557, - "learning_rate": 4.5559259259259266e-05, - "loss": 0.4691, - "step": 5399 - }, - { - "epoch": 0.1895435160322224, - "grad_norm": 0.47940558195114136, - "learning_rate": 4.555740740740741e-05, - "loss": 0.5448, - "step": 5400 - }, - { - "epoch": 0.18957861668333947, - "grad_norm": 1.054490566253662, - "learning_rate": 4.555555555555556e-05, - "loss": 0.5947, - "step": 5401 - }, - { - "epoch": 0.18961371733445656, - "grad_norm": 0.46238765120506287, - "learning_rate": 4.55537037037037e-05, - "loss": 0.5425, - "step": 5402 - }, - { - "epoch": 0.18964881798557362, - "grad_norm": 0.4397163987159729, - "learning_rate": 4.555185185185186e-05, - "loss": 0.4601, - "step": 5403 - }, - { - "epoch": 0.1896839186366907, - "grad_norm": 0.6428682804107666, - "learning_rate": 4.555e-05, - "loss": 0.6176, - "step": 5404 - }, - { - "epoch": 0.1897190192878078, - "grad_norm": 0.58394855260849, - "learning_rate": 4.554814814814815e-05, - "loss": 0.5181, - "step": 5405 - }, - { - "epoch": 0.18975411993892485, - "grad_norm": 0.45547571778297424, - "learning_rate": 4.5546296296296296e-05, - "loss": 0.4764, - "step": 5406 - }, - { - "epoch": 0.18978922059004194, - "grad_norm": 0.5613611936569214, - "learning_rate": 4.5544444444444446e-05, - "loss": 0.4802, - "step": 5407 - }, - { - "epoch": 0.18982432124115903, - "grad_norm": 0.4939276874065399, - "learning_rate": 4.554259259259259e-05, - "loss": 0.4509, - "step": 5408 - }, - { - "epoch": 0.18985942189227611, - "grad_norm": 0.597129225730896, - "learning_rate": 4.5540740740740746e-05, - "loss": 0.4999, - "step": 5409 - }, - { - "epoch": 0.18989452254339317, - "grad_norm": 0.6096264123916626, - "learning_rate": 4.553888888888889e-05, - "loss": 0.5349, - "step": 5410 - }, - { - "epoch": 0.18992962319451026, - "grad_norm": 0.5052928328514099, - "learning_rate": 4.553703703703704e-05, - "loss": 0.6204, - "step": 5411 - }, - { - "epoch": 0.18996472384562735, - "grad_norm": 0.41020286083221436, - "learning_rate": 4.553518518518518e-05, - "loss": 0.5612, - "step": 5412 - }, - { - "epoch": 0.1899998244967444, - "grad_norm": 0.46397674083709717, - "learning_rate": 4.553333333333333e-05, - "loss": 0.541, - "step": 5413 - }, - { - "epoch": 0.1900349251478615, - "grad_norm": 0.537020742893219, - "learning_rate": 4.5531481481481483e-05, - "loss": 0.4627, - "step": 5414 - }, - { - "epoch": 0.19007002579897858, - "grad_norm": 0.5230777263641357, - "learning_rate": 4.5529629629629634e-05, - "loss": 0.5168, - "step": 5415 - }, - { - "epoch": 0.19010512645009564, - "grad_norm": 0.48098224401474, - "learning_rate": 4.5527777777777784e-05, - "loss": 0.4857, - "step": 5416 - }, - { - "epoch": 0.19014022710121273, - "grad_norm": 0.4631260633468628, - "learning_rate": 4.552592592592593e-05, - "loss": 0.3873, - "step": 5417 - }, - { - "epoch": 0.19017532775232981, - "grad_norm": 0.468856543302536, - "learning_rate": 4.552407407407408e-05, - "loss": 0.475, - "step": 5418 - }, - { - "epoch": 0.19021042840344687, - "grad_norm": 0.9285392165184021, - "learning_rate": 4.552222222222222e-05, - "loss": 0.5345, - "step": 5419 - }, - { - "epoch": 0.19024552905456396, - "grad_norm": 0.5208016037940979, - "learning_rate": 4.552037037037038e-05, - "loss": 0.574, - "step": 5420 - }, - { - "epoch": 0.19028062970568105, - "grad_norm": 0.5323110222816467, - "learning_rate": 4.551851851851852e-05, - "loss": 0.4954, - "step": 5421 - }, - { - "epoch": 0.1903157303567981, - "grad_norm": 0.533677339553833, - "learning_rate": 4.551666666666667e-05, - "loss": 0.5368, - "step": 5422 - }, - { - "epoch": 0.1903508310079152, - "grad_norm": 0.494351863861084, - "learning_rate": 4.5514814814814814e-05, - "loss": 0.5856, - "step": 5423 - }, - { - "epoch": 0.19038593165903228, - "grad_norm": 0.6099382638931274, - "learning_rate": 4.5512962962962964e-05, - "loss": 0.533, - "step": 5424 - }, - { - "epoch": 0.19042103231014934, - "grad_norm": 0.5880776643753052, - "learning_rate": 4.5511111111111114e-05, - "loss": 0.5, - "step": 5425 - }, - { - "epoch": 0.19045613296126643, - "grad_norm": 0.5785214900970459, - "learning_rate": 4.5509259259259264e-05, - "loss": 0.4707, - "step": 5426 - }, - { - "epoch": 0.19049123361238351, - "grad_norm": 0.595557451248169, - "learning_rate": 4.550740740740741e-05, - "loss": 0.4925, - "step": 5427 - }, - { - "epoch": 0.19052633426350057, - "grad_norm": 0.6936526298522949, - "learning_rate": 4.550555555555556e-05, - "loss": 0.5456, - "step": 5428 - }, - { - "epoch": 0.19056143491461766, - "grad_norm": 0.5814068913459778, - "learning_rate": 4.55037037037037e-05, - "loss": 0.4746, - "step": 5429 - }, - { - "epoch": 0.19059653556573475, - "grad_norm": 0.607967734336853, - "learning_rate": 4.550185185185186e-05, - "loss": 0.5189, - "step": 5430 - }, - { - "epoch": 0.19063163621685184, - "grad_norm": 0.5404658317565918, - "learning_rate": 4.55e-05, - "loss": 0.5197, - "step": 5431 - }, - { - "epoch": 0.1906667368679689, - "grad_norm": 0.6242331862449646, - "learning_rate": 4.549814814814815e-05, - "loss": 0.6077, - "step": 5432 - }, - { - "epoch": 0.19070183751908598, - "grad_norm": 0.544150710105896, - "learning_rate": 4.5496296296296295e-05, - "loss": 0.4581, - "step": 5433 - }, - { - "epoch": 0.19073693817020307, - "grad_norm": 0.5115355253219604, - "learning_rate": 4.5494444444444445e-05, - "loss": 0.3525, - "step": 5434 - }, - { - "epoch": 0.19077203882132013, - "grad_norm": 0.4671737253665924, - "learning_rate": 4.5492592592592595e-05, - "loss": 0.4652, - "step": 5435 - }, - { - "epoch": 0.19080713947243721, - "grad_norm": 0.5251744389533997, - "learning_rate": 4.5490740740740745e-05, - "loss": 0.6059, - "step": 5436 - }, - { - "epoch": 0.1908422401235543, - "grad_norm": 0.5597550272941589, - "learning_rate": 4.5488888888888895e-05, - "loss": 0.5292, - "step": 5437 - }, - { - "epoch": 0.19087734077467136, - "grad_norm": 0.5562894940376282, - "learning_rate": 4.548703703703704e-05, - "loss": 0.5889, - "step": 5438 - }, - { - "epoch": 0.19091244142578845, - "grad_norm": 0.5853288769721985, - "learning_rate": 4.548518518518519e-05, - "loss": 0.5992, - "step": 5439 - }, - { - "epoch": 0.19094754207690554, - "grad_norm": 0.5427247881889343, - "learning_rate": 4.548333333333333e-05, - "loss": 0.5029, - "step": 5440 - }, - { - "epoch": 0.1909826427280226, - "grad_norm": 0.5002157688140869, - "learning_rate": 4.548148148148149e-05, - "loss": 0.5941, - "step": 5441 - }, - { - "epoch": 0.19101774337913968, - "grad_norm": 0.5942167639732361, - "learning_rate": 4.547962962962963e-05, - "loss": 0.5327, - "step": 5442 - }, - { - "epoch": 0.19105284403025677, - "grad_norm": 0.546709418296814, - "learning_rate": 4.547777777777778e-05, - "loss": 0.4816, - "step": 5443 - }, - { - "epoch": 0.19108794468137383, - "grad_norm": 0.5008942484855652, - "learning_rate": 4.5475925925925926e-05, - "loss": 0.5067, - "step": 5444 - }, - { - "epoch": 0.19112304533249092, - "grad_norm": 0.4904356300830841, - "learning_rate": 4.5474074074074076e-05, - "loss": 0.5642, - "step": 5445 - }, - { - "epoch": 0.191158145983608, - "grad_norm": 0.6714710593223572, - "learning_rate": 4.5472222222222226e-05, - "loss": 0.5732, - "step": 5446 - }, - { - "epoch": 0.19119324663472506, - "grad_norm": 0.4673844575881958, - "learning_rate": 4.5470370370370376e-05, - "loss": 0.4631, - "step": 5447 - }, - { - "epoch": 0.19122834728584215, - "grad_norm": 0.4842609167098999, - "learning_rate": 4.546851851851852e-05, - "loss": 0.5501, - "step": 5448 - }, - { - "epoch": 0.19126344793695924, - "grad_norm": 0.44580215215682983, - "learning_rate": 4.546666666666667e-05, - "loss": 0.4676, - "step": 5449 - }, - { - "epoch": 0.19129854858807632, - "grad_norm": 0.4867132902145386, - "learning_rate": 4.546481481481481e-05, - "loss": 0.4234, - "step": 5450 - }, - { - "epoch": 0.19133364923919338, - "grad_norm": 0.5572347640991211, - "learning_rate": 4.546296296296296e-05, - "loss": 0.5422, - "step": 5451 - }, - { - "epoch": 0.19136874989031047, - "grad_norm": 0.5207096934318542, - "learning_rate": 4.546111111111111e-05, - "loss": 0.5702, - "step": 5452 - }, - { - "epoch": 0.19140385054142756, - "grad_norm": 0.5737498998641968, - "learning_rate": 4.545925925925926e-05, - "loss": 0.5103, - "step": 5453 - }, - { - "epoch": 0.19143895119254462, - "grad_norm": 0.5361436605453491, - "learning_rate": 4.5457407407407407e-05, - "loss": 0.5313, - "step": 5454 - }, - { - "epoch": 0.1914740518436617, - "grad_norm": 0.535755455493927, - "learning_rate": 4.545555555555556e-05, - "loss": 0.4506, - "step": 5455 - }, - { - "epoch": 0.1915091524947788, - "grad_norm": 0.6130189299583435, - "learning_rate": 4.545370370370371e-05, - "loss": 0.5417, - "step": 5456 - }, - { - "epoch": 0.19154425314589585, - "grad_norm": 0.5041875243186951, - "learning_rate": 4.545185185185186e-05, - "loss": 0.4499, - "step": 5457 - }, - { - "epoch": 0.19157935379701294, - "grad_norm": 0.6019284129142761, - "learning_rate": 4.545000000000001e-05, - "loss": 0.6389, - "step": 5458 - }, - { - "epoch": 0.19161445444813002, - "grad_norm": 0.5032199025154114, - "learning_rate": 4.544814814814815e-05, - "loss": 0.5018, - "step": 5459 - }, - { - "epoch": 0.19164955509924708, - "grad_norm": 0.5818716883659363, - "learning_rate": 4.54462962962963e-05, - "loss": 0.4825, - "step": 5460 - }, - { - "epoch": 0.19168465575036417, - "grad_norm": 0.46601030230522156, - "learning_rate": 4.5444444444444444e-05, - "loss": 0.6095, - "step": 5461 - }, - { - "epoch": 0.19171975640148126, - "grad_norm": 0.4593387246131897, - "learning_rate": 4.5442592592592594e-05, - "loss": 0.4858, - "step": 5462 - }, - { - "epoch": 0.19175485705259832, - "grad_norm": 0.49730464816093445, - "learning_rate": 4.5440740740740744e-05, - "loss": 0.4293, - "step": 5463 - }, - { - "epoch": 0.1917899577037154, - "grad_norm": 0.4357820153236389, - "learning_rate": 4.5438888888888894e-05, - "loss": 0.5009, - "step": 5464 - }, - { - "epoch": 0.1918250583548325, - "grad_norm": 0.5351235866546631, - "learning_rate": 4.543703703703704e-05, - "loss": 0.4629, - "step": 5465 - }, - { - "epoch": 0.19186015900594955, - "grad_norm": 0.4626166820526123, - "learning_rate": 4.543518518518519e-05, - "loss": 0.54, - "step": 5466 - }, - { - "epoch": 0.19189525965706664, - "grad_norm": 0.4707779586315155, - "learning_rate": 4.543333333333333e-05, - "loss": 0.5029, - "step": 5467 - }, - { - "epoch": 0.19193036030818372, - "grad_norm": 0.4444037675857544, - "learning_rate": 4.543148148148149e-05, - "loss": 0.426, - "step": 5468 - }, - { - "epoch": 0.19196546095930078, - "grad_norm": 0.5041916370391846, - "learning_rate": 4.542962962962963e-05, - "loss": 0.4829, - "step": 5469 - }, - { - "epoch": 0.19200056161041787, - "grad_norm": 0.5521201491355896, - "learning_rate": 4.542777777777778e-05, - "loss": 0.5172, - "step": 5470 - }, - { - "epoch": 0.19203566226153496, - "grad_norm": 0.5019133687019348, - "learning_rate": 4.5425925925925925e-05, - "loss": 0.5154, - "step": 5471 - }, - { - "epoch": 0.19207076291265204, - "grad_norm": 0.5280529856681824, - "learning_rate": 4.5424074074074075e-05, - "loss": 0.4754, - "step": 5472 - }, - { - "epoch": 0.1921058635637691, - "grad_norm": 0.6005613803863525, - "learning_rate": 4.5422222222222225e-05, - "loss": 0.529, - "step": 5473 - }, - { - "epoch": 0.1921409642148862, - "grad_norm": 0.5751649737358093, - "learning_rate": 4.5420370370370375e-05, - "loss": 0.5748, - "step": 5474 - }, - { - "epoch": 0.19217606486600328, - "grad_norm": 0.45584097504615784, - "learning_rate": 4.541851851851852e-05, - "loss": 0.5766, - "step": 5475 - }, - { - "epoch": 0.19221116551712034, - "grad_norm": 0.5148016810417175, - "learning_rate": 4.541666666666667e-05, - "loss": 0.5216, - "step": 5476 - }, - { - "epoch": 0.19224626616823742, - "grad_norm": 0.6262688040733337, - "learning_rate": 4.541481481481482e-05, - "loss": 0.4635, - "step": 5477 - }, - { - "epoch": 0.1922813668193545, - "grad_norm": 0.5447850823402405, - "learning_rate": 4.541296296296296e-05, - "loss": 0.4809, - "step": 5478 - }, - { - "epoch": 0.19231646747047157, - "grad_norm": 0.4444795846939087, - "learning_rate": 4.541111111111112e-05, - "loss": 0.4583, - "step": 5479 - }, - { - "epoch": 0.19235156812158866, - "grad_norm": 0.5038244128227234, - "learning_rate": 4.540925925925926e-05, - "loss": 0.64, - "step": 5480 - }, - { - "epoch": 0.19238666877270574, - "grad_norm": 0.527904212474823, - "learning_rate": 4.540740740740741e-05, - "loss": 0.5762, - "step": 5481 - }, - { - "epoch": 0.1924217694238228, - "grad_norm": 0.43067753314971924, - "learning_rate": 4.5405555555555555e-05, - "loss": 0.5186, - "step": 5482 - }, - { - "epoch": 0.1924568700749399, - "grad_norm": 0.4701381325721741, - "learning_rate": 4.5403703703703706e-05, - "loss": 0.5304, - "step": 5483 - }, - { - "epoch": 0.19249197072605698, - "grad_norm": 0.5008642673492432, - "learning_rate": 4.5401851851851856e-05, - "loss": 0.5735, - "step": 5484 - }, - { - "epoch": 0.19252707137717404, - "grad_norm": 0.5187587141990662, - "learning_rate": 4.5400000000000006e-05, - "loss": 0.3218, - "step": 5485 - }, - { - "epoch": 0.19256217202829112, - "grad_norm": 0.516755998134613, - "learning_rate": 4.539814814814815e-05, - "loss": 0.5269, - "step": 5486 - }, - { - "epoch": 0.1925972726794082, - "grad_norm": 0.5180404782295227, - "learning_rate": 4.53962962962963e-05, - "loss": 0.6187, - "step": 5487 - }, - { - "epoch": 0.19263237333052527, - "grad_norm": 0.5774307250976562, - "learning_rate": 4.539444444444444e-05, - "loss": 0.5804, - "step": 5488 - }, - { - "epoch": 0.19266747398164236, - "grad_norm": 0.47752663493156433, - "learning_rate": 4.539259259259259e-05, - "loss": 0.5918, - "step": 5489 - }, - { - "epoch": 0.19270257463275944, - "grad_norm": 0.49492961168289185, - "learning_rate": 4.539074074074074e-05, - "loss": 0.5328, - "step": 5490 - }, - { - "epoch": 0.1927376752838765, - "grad_norm": 0.5400394201278687, - "learning_rate": 4.538888888888889e-05, - "loss": 0.5255, - "step": 5491 - }, - { - "epoch": 0.1927727759349936, - "grad_norm": 0.5463753938674927, - "learning_rate": 4.5387037037037036e-05, - "loss": 0.5872, - "step": 5492 - }, - { - "epoch": 0.19280787658611068, - "grad_norm": 0.5258082747459412, - "learning_rate": 4.5385185185185186e-05, - "loss": 0.5071, - "step": 5493 - }, - { - "epoch": 0.19284297723722776, - "grad_norm": 0.7186855673789978, - "learning_rate": 4.5383333333333336e-05, - "loss": 0.5455, - "step": 5494 - }, - { - "epoch": 0.19287807788834482, - "grad_norm": 0.491842120885849, - "learning_rate": 4.5381481481481487e-05, - "loss": 0.5819, - "step": 5495 - }, - { - "epoch": 0.1929131785394619, - "grad_norm": 0.44726258516311646, - "learning_rate": 4.537962962962963e-05, - "loss": 0.3873, - "step": 5496 - }, - { - "epoch": 0.192948279190579, - "grad_norm": 0.5905647277832031, - "learning_rate": 4.537777777777778e-05, - "loss": 0.5756, - "step": 5497 - }, - { - "epoch": 0.19298337984169606, - "grad_norm": 0.44503411650657654, - "learning_rate": 4.537592592592593e-05, - "loss": 0.5721, - "step": 5498 - }, - { - "epoch": 0.19301848049281314, - "grad_norm": 0.538502037525177, - "learning_rate": 4.5374074074074073e-05, - "loss": 0.5492, - "step": 5499 - }, - { - "epoch": 0.19305358114393023, - "grad_norm": 0.5516677498817444, - "learning_rate": 4.537222222222223e-05, - "loss": 0.5621, - "step": 5500 - }, - { - "epoch": 0.1930886817950473, - "grad_norm": 0.5226258635520935, - "learning_rate": 4.5370370370370374e-05, - "loss": 0.4416, - "step": 5501 - }, - { - "epoch": 0.19312378244616438, - "grad_norm": 0.527091920375824, - "learning_rate": 4.5368518518518524e-05, - "loss": 0.4094, - "step": 5502 - }, - { - "epoch": 0.19315888309728146, - "grad_norm": 0.508950412273407, - "learning_rate": 4.536666666666667e-05, - "loss": 0.5284, - "step": 5503 - }, - { - "epoch": 0.19319398374839852, - "grad_norm": 0.51045823097229, - "learning_rate": 4.536481481481482e-05, - "loss": 0.4913, - "step": 5504 - }, - { - "epoch": 0.1932290843995156, - "grad_norm": 0.5588054060935974, - "learning_rate": 4.536296296296296e-05, - "loss": 0.5092, - "step": 5505 - }, - { - "epoch": 0.1932641850506327, - "grad_norm": 0.49264562129974365, - "learning_rate": 4.536111111111112e-05, - "loss": 0.4287, - "step": 5506 - }, - { - "epoch": 0.19329928570174976, - "grad_norm": 0.5305018424987793, - "learning_rate": 4.535925925925926e-05, - "loss": 0.5928, - "step": 5507 - }, - { - "epoch": 0.19333438635286684, - "grad_norm": 0.49585863947868347, - "learning_rate": 4.535740740740741e-05, - "loss": 0.5082, - "step": 5508 - }, - { - "epoch": 0.19336948700398393, - "grad_norm": 0.5030539631843567, - "learning_rate": 4.5355555555555554e-05, - "loss": 0.491, - "step": 5509 - }, - { - "epoch": 0.193404587655101, - "grad_norm": 0.6027122735977173, - "learning_rate": 4.5353703703703704e-05, - "loss": 0.5323, - "step": 5510 - }, - { - "epoch": 0.19343968830621808, - "grad_norm": 0.5423810482025146, - "learning_rate": 4.5351851851851854e-05, - "loss": 0.5535, - "step": 5511 - }, - { - "epoch": 0.19347478895733516, - "grad_norm": 0.5416441559791565, - "learning_rate": 4.5350000000000005e-05, - "loss": 0.6075, - "step": 5512 - }, - { - "epoch": 0.19350988960845222, - "grad_norm": 0.5701658129692078, - "learning_rate": 4.534814814814815e-05, - "loss": 0.4994, - "step": 5513 - }, - { - "epoch": 0.1935449902595693, - "grad_norm": 0.5056540966033936, - "learning_rate": 4.53462962962963e-05, - "loss": 0.5571, - "step": 5514 - }, - { - "epoch": 0.1935800909106864, - "grad_norm": 0.6864378452301025, - "learning_rate": 4.534444444444445e-05, - "loss": 0.451, - "step": 5515 - }, - { - "epoch": 0.19361519156180348, - "grad_norm": 0.4501231908798218, - "learning_rate": 4.534259259259259e-05, - "loss": 0.4699, - "step": 5516 - }, - { - "epoch": 0.19365029221292054, - "grad_norm": 0.4643787145614624, - "learning_rate": 4.534074074074074e-05, - "loss": 0.5409, - "step": 5517 - }, - { - "epoch": 0.19368539286403763, - "grad_norm": 0.5310850739479065, - "learning_rate": 4.533888888888889e-05, - "loss": 0.4446, - "step": 5518 - }, - { - "epoch": 0.19372049351515472, - "grad_norm": 0.5516529083251953, - "learning_rate": 4.533703703703704e-05, - "loss": 0.5799, - "step": 5519 - }, - { - "epoch": 0.19375559416627178, - "grad_norm": 0.5085040926933289, - "learning_rate": 4.5335185185185185e-05, - "loss": 0.5125, - "step": 5520 - }, - { - "epoch": 0.19379069481738886, - "grad_norm": 0.5274051427841187, - "learning_rate": 4.5333333333333335e-05, - "loss": 0.5694, - "step": 5521 - }, - { - "epoch": 0.19382579546850595, - "grad_norm": 0.4795979857444763, - "learning_rate": 4.5331481481481485e-05, - "loss": 0.5117, - "step": 5522 - }, - { - "epoch": 0.193860896119623, - "grad_norm": 0.691875159740448, - "learning_rate": 4.5329629629629635e-05, - "loss": 0.3685, - "step": 5523 - }, - { - "epoch": 0.1938959967707401, - "grad_norm": 0.5388244390487671, - "learning_rate": 4.532777777777778e-05, - "loss": 0.6213, - "step": 5524 - }, - { - "epoch": 0.19393109742185718, - "grad_norm": 0.544539749622345, - "learning_rate": 4.532592592592593e-05, - "loss": 0.541, - "step": 5525 - }, - { - "epoch": 0.19396619807297424, - "grad_norm": 0.6190483570098877, - "learning_rate": 4.532407407407407e-05, - "loss": 0.5058, - "step": 5526 - }, - { - "epoch": 0.19400129872409133, - "grad_norm": 0.6012407541275024, - "learning_rate": 4.532222222222223e-05, - "loss": 0.5209, - "step": 5527 - }, - { - "epoch": 0.19403639937520842, - "grad_norm": 0.4341399371623993, - "learning_rate": 4.532037037037037e-05, - "loss": 0.4939, - "step": 5528 - }, - { - "epoch": 0.19407150002632548, - "grad_norm": 0.651709794998169, - "learning_rate": 4.531851851851852e-05, - "loss": 0.4944, - "step": 5529 - }, - { - "epoch": 0.19410660067744256, - "grad_norm": 0.5423116087913513, - "learning_rate": 4.5316666666666666e-05, - "loss": 0.6072, - "step": 5530 - }, - { - "epoch": 0.19414170132855965, - "grad_norm": 0.6007576584815979, - "learning_rate": 4.5314814814814816e-05, - "loss": 0.609, - "step": 5531 - }, - { - "epoch": 0.1941768019796767, - "grad_norm": 0.5070838332176208, - "learning_rate": 4.531296296296296e-05, - "loss": 0.4613, - "step": 5532 - }, - { - "epoch": 0.1942119026307938, - "grad_norm": 0.4265163540840149, - "learning_rate": 4.5311111111111116e-05, - "loss": 0.5397, - "step": 5533 - }, - { - "epoch": 0.19424700328191088, - "grad_norm": 0.4852292537689209, - "learning_rate": 4.530925925925926e-05, - "loss": 0.4087, - "step": 5534 - }, - { - "epoch": 0.19428210393302794, - "grad_norm": 0.520072340965271, - "learning_rate": 4.530740740740741e-05, - "loss": 0.5234, - "step": 5535 - }, - { - "epoch": 0.19431720458414503, - "grad_norm": 0.902130126953125, - "learning_rate": 4.530555555555556e-05, - "loss": 0.5367, - "step": 5536 - }, - { - "epoch": 0.19435230523526212, - "grad_norm": 0.4380682706832886, - "learning_rate": 4.53037037037037e-05, - "loss": 0.417, - "step": 5537 - }, - { - "epoch": 0.1943874058863792, - "grad_norm": 0.5625304579734802, - "learning_rate": 4.530185185185185e-05, - "loss": 0.5987, - "step": 5538 - }, - { - "epoch": 0.19442250653749626, - "grad_norm": 0.52956622838974, - "learning_rate": 4.53e-05, - "loss": 0.4841, - "step": 5539 - }, - { - "epoch": 0.19445760718861335, - "grad_norm": 0.6210545897483826, - "learning_rate": 4.5298148148148153e-05, - "loss": 0.6402, - "step": 5540 - }, - { - "epoch": 0.19449270783973044, - "grad_norm": 0.4914090931415558, - "learning_rate": 4.52962962962963e-05, - "loss": 0.4231, - "step": 5541 - }, - { - "epoch": 0.1945278084908475, - "grad_norm": 0.5733522176742554, - "learning_rate": 4.529444444444445e-05, - "loss": 0.5678, - "step": 5542 - }, - { - "epoch": 0.19456290914196459, - "grad_norm": 0.45255139470100403, - "learning_rate": 4.529259259259259e-05, - "loss": 0.5483, - "step": 5543 - }, - { - "epoch": 0.19459800979308167, - "grad_norm": 0.5085036754608154, - "learning_rate": 4.529074074074075e-05, - "loss": 0.5157, - "step": 5544 - }, - { - "epoch": 0.19463311044419873, - "grad_norm": 0.46902695298194885, - "learning_rate": 4.528888888888889e-05, - "loss": 0.5221, - "step": 5545 - }, - { - "epoch": 0.19466821109531582, - "grad_norm": 0.42178767919540405, - "learning_rate": 4.528703703703704e-05, - "loss": 0.3881, - "step": 5546 - }, - { - "epoch": 0.1947033117464329, - "grad_norm": 0.5146874785423279, - "learning_rate": 4.5285185185185184e-05, - "loss": 0.4129, - "step": 5547 - }, - { - "epoch": 0.19473841239754996, - "grad_norm": 0.7643745541572571, - "learning_rate": 4.5283333333333334e-05, - "loss": 0.4894, - "step": 5548 - }, - { - "epoch": 0.19477351304866705, - "grad_norm": 0.5517265200614929, - "learning_rate": 4.5281481481481484e-05, - "loss": 0.5309, - "step": 5549 - }, - { - "epoch": 0.19480861369978414, - "grad_norm": 0.4708397388458252, - "learning_rate": 4.5279629629629634e-05, - "loss": 0.4993, - "step": 5550 - }, - { - "epoch": 0.1948437143509012, - "grad_norm": 0.5290924310684204, - "learning_rate": 4.527777777777778e-05, - "loss": 0.6315, - "step": 5551 - }, - { - "epoch": 0.19487881500201829, - "grad_norm": 0.5150974988937378, - "learning_rate": 4.527592592592593e-05, - "loss": 0.4998, - "step": 5552 - }, - { - "epoch": 0.19491391565313537, - "grad_norm": 0.4410683810710907, - "learning_rate": 4.527407407407407e-05, - "loss": 0.4959, - "step": 5553 - }, - { - "epoch": 0.19494901630425243, - "grad_norm": 0.6930930614471436, - "learning_rate": 4.527222222222223e-05, - "loss": 0.559, - "step": 5554 - }, - { - "epoch": 0.19498411695536952, - "grad_norm": 0.5190345644950867, - "learning_rate": 4.527037037037037e-05, - "loss": 0.5109, - "step": 5555 - }, - { - "epoch": 0.1950192176064866, - "grad_norm": 0.5029524564743042, - "learning_rate": 4.526851851851852e-05, - "loss": 0.6291, - "step": 5556 - }, - { - "epoch": 0.19505431825760366, - "grad_norm": 0.5378061532974243, - "learning_rate": 4.526666666666667e-05, - "loss": 0.4666, - "step": 5557 - }, - { - "epoch": 0.19508941890872075, - "grad_norm": 0.46374568343162537, - "learning_rate": 4.5264814814814815e-05, - "loss": 0.557, - "step": 5558 - }, - { - "epoch": 0.19512451955983784, - "grad_norm": 0.48965010046958923, - "learning_rate": 4.5262962962962965e-05, - "loss": 0.5441, - "step": 5559 - }, - { - "epoch": 0.19515962021095493, - "grad_norm": 0.5115394592285156, - "learning_rate": 4.5261111111111115e-05, - "loss": 0.5726, - "step": 5560 - }, - { - "epoch": 0.19519472086207199, - "grad_norm": 0.5496609807014465, - "learning_rate": 4.5259259259259265e-05, - "loss": 0.4614, - "step": 5561 - }, - { - "epoch": 0.19522982151318907, - "grad_norm": 0.5173202753067017, - "learning_rate": 4.525740740740741e-05, - "loss": 0.5465, - "step": 5562 - }, - { - "epoch": 0.19526492216430616, - "grad_norm": 0.5365588068962097, - "learning_rate": 4.525555555555556e-05, - "loss": 0.4549, - "step": 5563 - }, - { - "epoch": 0.19530002281542322, - "grad_norm": 0.5905737280845642, - "learning_rate": 4.52537037037037e-05, - "loss": 0.5215, - "step": 5564 - }, - { - "epoch": 0.1953351234665403, - "grad_norm": 0.47929811477661133, - "learning_rate": 4.525185185185186e-05, - "loss": 0.5116, - "step": 5565 - }, - { - "epoch": 0.1953702241176574, - "grad_norm": 0.4613277316093445, - "learning_rate": 4.525e-05, - "loss": 0.4911, - "step": 5566 - }, - { - "epoch": 0.19540532476877445, - "grad_norm": 0.4699991047382355, - "learning_rate": 4.524814814814815e-05, - "loss": 0.5181, - "step": 5567 - }, - { - "epoch": 0.19544042541989154, - "grad_norm": 0.49282822012901306, - "learning_rate": 4.5246296296296296e-05, - "loss": 0.5138, - "step": 5568 - }, - { - "epoch": 0.19547552607100863, - "grad_norm": 0.5157098174095154, - "learning_rate": 4.5244444444444446e-05, - "loss": 0.5407, - "step": 5569 - }, - { - "epoch": 0.19551062672212569, - "grad_norm": 0.5029996633529663, - "learning_rate": 4.5242592592592596e-05, - "loss": 0.5135, - "step": 5570 - }, - { - "epoch": 0.19554572737324277, - "grad_norm": 0.7398983836174011, - "learning_rate": 4.5240740740740746e-05, - "loss": 0.639, - "step": 5571 - }, - { - "epoch": 0.19558082802435986, - "grad_norm": 0.46513649821281433, - "learning_rate": 4.523888888888889e-05, - "loss": 0.4516, - "step": 5572 - }, - { - "epoch": 0.19561592867547692, - "grad_norm": 0.5925256013870239, - "learning_rate": 4.523703703703704e-05, - "loss": 0.5377, - "step": 5573 - }, - { - "epoch": 0.195651029326594, - "grad_norm": 0.5161088109016418, - "learning_rate": 4.523518518518518e-05, - "loss": 0.5747, - "step": 5574 - }, - { - "epoch": 0.1956861299777111, - "grad_norm": 0.533450722694397, - "learning_rate": 4.523333333333333e-05, - "loss": 0.6272, - "step": 5575 - }, - { - "epoch": 0.19572123062882815, - "grad_norm": 0.5256359577178955, - "learning_rate": 4.523148148148148e-05, - "loss": 0.4327, - "step": 5576 - }, - { - "epoch": 0.19575633127994524, - "grad_norm": 0.5750529766082764, - "learning_rate": 4.522962962962963e-05, - "loss": 0.4416, - "step": 5577 - }, - { - "epoch": 0.19579143193106233, - "grad_norm": 0.4768681228160858, - "learning_rate": 4.522777777777778e-05, - "loss": 0.6022, - "step": 5578 - }, - { - "epoch": 0.19582653258217939, - "grad_norm": 0.6302982568740845, - "learning_rate": 4.5225925925925926e-05, - "loss": 0.6076, - "step": 5579 - }, - { - "epoch": 0.19586163323329647, - "grad_norm": 0.6626862287521362, - "learning_rate": 4.5224074074074077e-05, - "loss": 0.4938, - "step": 5580 - }, - { - "epoch": 0.19589673388441356, - "grad_norm": 0.5566423535346985, - "learning_rate": 4.522222222222223e-05, - "loss": 0.4465, - "step": 5581 - }, - { - "epoch": 0.19593183453553065, - "grad_norm": 0.586716890335083, - "learning_rate": 4.522037037037038e-05, - "loss": 0.5812, - "step": 5582 - }, - { - "epoch": 0.1959669351866477, - "grad_norm": 0.558603048324585, - "learning_rate": 4.521851851851852e-05, - "loss": 0.4476, - "step": 5583 - }, - { - "epoch": 0.1960020358377648, - "grad_norm": 0.46133509278297424, - "learning_rate": 4.521666666666667e-05, - "loss": 0.4116, - "step": 5584 - }, - { - "epoch": 0.19603713648888188, - "grad_norm": 0.46896591782569885, - "learning_rate": 4.5214814814814814e-05, - "loss": 0.5105, - "step": 5585 - }, - { - "epoch": 0.19607223713999894, - "grad_norm": 0.5593125820159912, - "learning_rate": 4.5212962962962964e-05, - "loss": 0.498, - "step": 5586 - }, - { - "epoch": 0.19610733779111603, - "grad_norm": 0.573035478591919, - "learning_rate": 4.5211111111111114e-05, - "loss": 0.5876, - "step": 5587 - }, - { - "epoch": 0.1961424384422331, - "grad_norm": 0.44513383507728577, - "learning_rate": 4.5209259259259264e-05, - "loss": 0.4691, - "step": 5588 - }, - { - "epoch": 0.19617753909335017, - "grad_norm": 0.45753708481788635, - "learning_rate": 4.520740740740741e-05, - "loss": 0.5621, - "step": 5589 - }, - { - "epoch": 0.19621263974446726, - "grad_norm": 0.5041671395301819, - "learning_rate": 4.520555555555556e-05, - "loss": 0.6258, - "step": 5590 - }, - { - "epoch": 0.19624774039558435, - "grad_norm": 0.5965087413787842, - "learning_rate": 4.52037037037037e-05, - "loss": 0.5586, - "step": 5591 - }, - { - "epoch": 0.1962828410467014, - "grad_norm": 0.5950222015380859, - "learning_rate": 4.520185185185186e-05, - "loss": 0.5514, - "step": 5592 - }, - { - "epoch": 0.1963179416978185, - "grad_norm": 0.48921287059783936, - "learning_rate": 4.52e-05, - "loss": 0.4954, - "step": 5593 - }, - { - "epoch": 0.19635304234893558, - "grad_norm": 0.47066348791122437, - "learning_rate": 4.519814814814815e-05, - "loss": 0.4847, - "step": 5594 - }, - { - "epoch": 0.19638814300005264, - "grad_norm": 0.41980451345443726, - "learning_rate": 4.5196296296296294e-05, - "loss": 0.4541, - "step": 5595 - }, - { - "epoch": 0.19642324365116973, - "grad_norm": 0.5108770132064819, - "learning_rate": 4.5194444444444444e-05, - "loss": 0.5421, - "step": 5596 - }, - { - "epoch": 0.1964583443022868, - "grad_norm": 0.4678329825401306, - "learning_rate": 4.5192592592592595e-05, - "loss": 0.4801, - "step": 5597 - }, - { - "epoch": 0.19649344495340387, - "grad_norm": 0.6086834073066711, - "learning_rate": 4.5190740740740745e-05, - "loss": 0.4864, - "step": 5598 - }, - { - "epoch": 0.19652854560452096, - "grad_norm": 0.4948514997959137, - "learning_rate": 4.5188888888888895e-05, - "loss": 0.5028, - "step": 5599 - }, - { - "epoch": 0.19656364625563805, - "grad_norm": 0.44018203020095825, - "learning_rate": 4.518703703703704e-05, - "loss": 0.556, - "step": 5600 - }, - { - "epoch": 0.19659874690675513, - "grad_norm": 0.5207661390304565, - "learning_rate": 4.518518518518519e-05, - "loss": 0.4991, - "step": 5601 - }, - { - "epoch": 0.1966338475578722, - "grad_norm": 0.4668740928173065, - "learning_rate": 4.518333333333333e-05, - "loss": 0.4938, - "step": 5602 - }, - { - "epoch": 0.19666894820898928, - "grad_norm": 0.47333821654319763, - "learning_rate": 4.518148148148149e-05, - "loss": 0.5137, - "step": 5603 - }, - { - "epoch": 0.19670404886010637, - "grad_norm": 0.535340428352356, - "learning_rate": 4.517962962962963e-05, - "loss": 0.5449, - "step": 5604 - }, - { - "epoch": 0.19673914951122343, - "grad_norm": 0.47791531682014465, - "learning_rate": 4.517777777777778e-05, - "loss": 0.4648, - "step": 5605 - }, - { - "epoch": 0.1967742501623405, - "grad_norm": 0.4933421313762665, - "learning_rate": 4.5175925925925925e-05, - "loss": 0.5558, - "step": 5606 - }, - { - "epoch": 0.1968093508134576, - "grad_norm": 0.43534672260284424, - "learning_rate": 4.5174074074074075e-05, - "loss": 0.4458, - "step": 5607 - }, - { - "epoch": 0.19684445146457466, - "grad_norm": 0.4677780866622925, - "learning_rate": 4.5172222222222225e-05, - "loss": 0.4553, - "step": 5608 - }, - { - "epoch": 0.19687955211569175, - "grad_norm": 0.4576495885848999, - "learning_rate": 4.5170370370370376e-05, - "loss": 0.4749, - "step": 5609 - }, - { - "epoch": 0.19691465276680883, - "grad_norm": 0.6522932648658752, - "learning_rate": 4.516851851851852e-05, - "loss": 0.5232, - "step": 5610 - }, - { - "epoch": 0.1969497534179259, - "grad_norm": 0.5479851365089417, - "learning_rate": 4.516666666666667e-05, - "loss": 0.5943, - "step": 5611 - }, - { - "epoch": 0.19698485406904298, - "grad_norm": 0.5271630883216858, - "learning_rate": 4.516481481481481e-05, - "loss": 0.5035, - "step": 5612 - }, - { - "epoch": 0.19701995472016007, - "grad_norm": 0.48658016324043274, - "learning_rate": 4.516296296296296e-05, - "loss": 0.5464, - "step": 5613 - }, - { - "epoch": 0.19705505537127713, - "grad_norm": 0.5439552068710327, - "learning_rate": 4.516111111111111e-05, - "loss": 0.5276, - "step": 5614 - }, - { - "epoch": 0.19709015602239421, - "grad_norm": 0.4708850681781769, - "learning_rate": 4.515925925925926e-05, - "loss": 0.4413, - "step": 5615 - }, - { - "epoch": 0.1971252566735113, - "grad_norm": 0.49980688095092773, - "learning_rate": 4.5157407407407406e-05, - "loss": 0.5212, - "step": 5616 - }, - { - "epoch": 0.19716035732462836, - "grad_norm": 0.5519167184829712, - "learning_rate": 4.5155555555555556e-05, - "loss": 0.4569, - "step": 5617 - }, - { - "epoch": 0.19719545797574545, - "grad_norm": 0.4848690927028656, - "learning_rate": 4.5153703703703706e-05, - "loss": 0.4641, - "step": 5618 - }, - { - "epoch": 0.19723055862686253, - "grad_norm": 0.5899976491928101, - "learning_rate": 4.5151851851851856e-05, - "loss": 0.5205, - "step": 5619 - }, - { - "epoch": 0.1972656592779796, - "grad_norm": 0.5548856854438782, - "learning_rate": 4.5150000000000006e-05, - "loss": 0.4988, - "step": 5620 - }, - { - "epoch": 0.19730075992909668, - "grad_norm": 0.4710460305213928, - "learning_rate": 4.514814814814815e-05, - "loss": 0.3881, - "step": 5621 - }, - { - "epoch": 0.19733586058021377, - "grad_norm": 0.5284804701805115, - "learning_rate": 4.51462962962963e-05, - "loss": 0.4334, - "step": 5622 - }, - { - "epoch": 0.19737096123133085, - "grad_norm": 0.49411487579345703, - "learning_rate": 4.514444444444444e-05, - "loss": 0.545, - "step": 5623 - }, - { - "epoch": 0.19740606188244791, - "grad_norm": 0.5716983675956726, - "learning_rate": 4.51425925925926e-05, - "loss": 0.4948, - "step": 5624 - }, - { - "epoch": 0.197441162533565, - "grad_norm": 0.5054886341094971, - "learning_rate": 4.5140740740740743e-05, - "loss": 0.4581, - "step": 5625 - }, - { - "epoch": 0.1974762631846821, - "grad_norm": 0.43306469917297363, - "learning_rate": 4.5138888888888894e-05, - "loss": 0.417, - "step": 5626 - }, - { - "epoch": 0.19751136383579915, - "grad_norm": 0.5566884279251099, - "learning_rate": 4.513703703703704e-05, - "loss": 0.5344, - "step": 5627 - }, - { - "epoch": 0.19754646448691623, - "grad_norm": 0.5983619093894958, - "learning_rate": 4.513518518518519e-05, - "loss": 0.5072, - "step": 5628 - }, - { - "epoch": 0.19758156513803332, - "grad_norm": 0.5843722224235535, - "learning_rate": 4.513333333333333e-05, - "loss": 0.6109, - "step": 5629 - }, - { - "epoch": 0.19761666578915038, - "grad_norm": 0.873418927192688, - "learning_rate": 4.513148148148149e-05, - "loss": 0.5463, - "step": 5630 - }, - { - "epoch": 0.19765176644026747, - "grad_norm": 0.48982474207878113, - "learning_rate": 4.512962962962963e-05, - "loss": 0.4429, - "step": 5631 - }, - { - "epoch": 0.19768686709138455, - "grad_norm": 0.6574650406837463, - "learning_rate": 4.512777777777778e-05, - "loss": 0.4797, - "step": 5632 - }, - { - "epoch": 0.19772196774250161, - "grad_norm": 0.6067884564399719, - "learning_rate": 4.5125925925925924e-05, - "loss": 0.4833, - "step": 5633 - }, - { - "epoch": 0.1977570683936187, - "grad_norm": 0.5379170179367065, - "learning_rate": 4.5124074074074074e-05, - "loss": 0.5976, - "step": 5634 - }, - { - "epoch": 0.1977921690447358, - "grad_norm": 0.4572732746601105, - "learning_rate": 4.5122222222222224e-05, - "loss": 0.4918, - "step": 5635 - }, - { - "epoch": 0.19782726969585285, - "grad_norm": 0.40686357021331787, - "learning_rate": 4.5120370370370374e-05, - "loss": 0.4566, - "step": 5636 - }, - { - "epoch": 0.19786237034696993, - "grad_norm": 0.5321813225746155, - "learning_rate": 4.511851851851852e-05, - "loss": 0.4577, - "step": 5637 - }, - { - "epoch": 0.19789747099808702, - "grad_norm": 0.48718932271003723, - "learning_rate": 4.511666666666667e-05, - "loss": 0.4309, - "step": 5638 - }, - { - "epoch": 0.19793257164920408, - "grad_norm": 0.5920894145965576, - "learning_rate": 4.511481481481482e-05, - "loss": 0.5529, - "step": 5639 - }, - { - "epoch": 0.19796767230032117, - "grad_norm": 0.5407557487487793, - "learning_rate": 4.511296296296296e-05, - "loss": 0.5992, - "step": 5640 - }, - { - "epoch": 0.19800277295143826, - "grad_norm": 0.49270081520080566, - "learning_rate": 4.511111111111112e-05, - "loss": 0.4158, - "step": 5641 - }, - { - "epoch": 0.19803787360255531, - "grad_norm": 0.5774834752082825, - "learning_rate": 4.510925925925926e-05, - "loss": 0.5374, - "step": 5642 - }, - { - "epoch": 0.1980729742536724, - "grad_norm": 0.4696672558784485, - "learning_rate": 4.510740740740741e-05, - "loss": 0.4371, - "step": 5643 - }, - { - "epoch": 0.1981080749047895, - "grad_norm": 0.5512875318527222, - "learning_rate": 4.5105555555555555e-05, - "loss": 0.5798, - "step": 5644 - }, - { - "epoch": 0.19814317555590658, - "grad_norm": 0.5191388726234436, - "learning_rate": 4.5103703703703705e-05, - "loss": 0.5927, - "step": 5645 - }, - { - "epoch": 0.19817827620702363, - "grad_norm": 0.39892295002937317, - "learning_rate": 4.5101851851851855e-05, - "loss": 0.451, - "step": 5646 - }, - { - "epoch": 0.19821337685814072, - "grad_norm": 0.5264814496040344, - "learning_rate": 4.5100000000000005e-05, - "loss": 0.5876, - "step": 5647 - }, - { - "epoch": 0.1982484775092578, - "grad_norm": 0.5150988101959229, - "learning_rate": 4.509814814814815e-05, - "loss": 0.4897, - "step": 5648 - }, - { - "epoch": 0.19828357816037487, - "grad_norm": 0.5559651851654053, - "learning_rate": 4.50962962962963e-05, - "loss": 0.5056, - "step": 5649 - }, - { - "epoch": 0.19831867881149196, - "grad_norm": 0.5603728294372559, - "learning_rate": 4.509444444444444e-05, - "loss": 0.5551, - "step": 5650 - }, - { - "epoch": 0.19835377946260904, - "grad_norm": 0.5642095804214478, - "learning_rate": 4.50925925925926e-05, - "loss": 0.6175, - "step": 5651 - }, - { - "epoch": 0.1983888801137261, - "grad_norm": 0.5357491970062256, - "learning_rate": 4.509074074074074e-05, - "loss": 0.5244, - "step": 5652 - }, - { - "epoch": 0.1984239807648432, - "grad_norm": 0.6638109087944031, - "learning_rate": 4.508888888888889e-05, - "loss": 0.4671, - "step": 5653 - }, - { - "epoch": 0.19845908141596028, - "grad_norm": 0.518709123134613, - "learning_rate": 4.5087037037037036e-05, - "loss": 0.6032, - "step": 5654 - }, - { - "epoch": 0.19849418206707733, - "grad_norm": 0.5515729784965515, - "learning_rate": 4.5085185185185186e-05, - "loss": 0.4686, - "step": 5655 - }, - { - "epoch": 0.19852928271819442, - "grad_norm": 0.481202095746994, - "learning_rate": 4.5083333333333336e-05, - "loss": 0.4973, - "step": 5656 - }, - { - "epoch": 0.1985643833693115, - "grad_norm": 0.5767990946769714, - "learning_rate": 4.5081481481481486e-05, - "loss": 0.5229, - "step": 5657 - }, - { - "epoch": 0.19859948402042857, - "grad_norm": 0.7057272791862488, - "learning_rate": 4.507962962962963e-05, - "loss": 0.4529, - "step": 5658 - }, - { - "epoch": 0.19863458467154566, - "grad_norm": 0.555977463722229, - "learning_rate": 4.507777777777778e-05, - "loss": 0.5151, - "step": 5659 - }, - { - "epoch": 0.19866968532266274, - "grad_norm": 0.6495957374572754, - "learning_rate": 4.507592592592593e-05, - "loss": 0.4679, - "step": 5660 - }, - { - "epoch": 0.1987047859737798, - "grad_norm": 0.4925857186317444, - "learning_rate": 4.507407407407407e-05, - "loss": 0.5088, - "step": 5661 - }, - { - "epoch": 0.1987398866248969, - "grad_norm": 0.5276985168457031, - "learning_rate": 4.507222222222223e-05, - "loss": 0.4201, - "step": 5662 - }, - { - "epoch": 0.19877498727601398, - "grad_norm": 0.5983000993728638, - "learning_rate": 4.507037037037037e-05, - "loss": 0.5105, - "step": 5663 - }, - { - "epoch": 0.19881008792713104, - "grad_norm": 0.49382665753364563, - "learning_rate": 4.506851851851852e-05, - "loss": 0.5786, - "step": 5664 - }, - { - "epoch": 0.19884518857824812, - "grad_norm": 0.4235061705112457, - "learning_rate": 4.5066666666666667e-05, - "loss": 0.5678, - "step": 5665 - }, - { - "epoch": 0.1988802892293652, - "grad_norm": 0.5252145528793335, - "learning_rate": 4.506481481481482e-05, - "loss": 0.5888, - "step": 5666 - }, - { - "epoch": 0.1989153898804823, - "grad_norm": 0.5055017471313477, - "learning_rate": 4.506296296296296e-05, - "loss": 0.5819, - "step": 5667 - }, - { - "epoch": 0.19895049053159936, - "grad_norm": 0.4651198089122772, - "learning_rate": 4.506111111111112e-05, - "loss": 0.4645, - "step": 5668 - }, - { - "epoch": 0.19898559118271644, - "grad_norm": 0.4381822943687439, - "learning_rate": 4.505925925925926e-05, - "loss": 0.4959, - "step": 5669 - }, - { - "epoch": 0.19902069183383353, - "grad_norm": 0.6057116389274597, - "learning_rate": 4.505740740740741e-05, - "loss": 0.4522, - "step": 5670 - }, - { - "epoch": 0.1990557924849506, - "grad_norm": 0.4546982944011688, - "learning_rate": 4.5055555555555554e-05, - "loss": 0.479, - "step": 5671 - }, - { - "epoch": 0.19909089313606768, - "grad_norm": 0.5293663740158081, - "learning_rate": 4.5053703703703704e-05, - "loss": 0.5168, - "step": 5672 - }, - { - "epoch": 0.19912599378718476, - "grad_norm": 0.5184152126312256, - "learning_rate": 4.5051851851851854e-05, - "loss": 0.5087, - "step": 5673 - }, - { - "epoch": 0.19916109443830182, - "grad_norm": 0.5067746043205261, - "learning_rate": 4.5050000000000004e-05, - "loss": 0.575, - "step": 5674 - }, - { - "epoch": 0.1991961950894189, - "grad_norm": 0.4992557466030121, - "learning_rate": 4.504814814814815e-05, - "loss": 0.4901, - "step": 5675 - }, - { - "epoch": 0.199231295740536, - "grad_norm": 0.5159285068511963, - "learning_rate": 4.50462962962963e-05, - "loss": 0.4287, - "step": 5676 - }, - { - "epoch": 0.19926639639165306, - "grad_norm": 0.5182773470878601, - "learning_rate": 4.504444444444445e-05, - "loss": 0.5018, - "step": 5677 - }, - { - "epoch": 0.19930149704277014, - "grad_norm": 0.3853946626186371, - "learning_rate": 4.50425925925926e-05, - "loss": 0.5854, - "step": 5678 - }, - { - "epoch": 0.19933659769388723, - "grad_norm": 0.560916006565094, - "learning_rate": 4.504074074074075e-05, - "loss": 0.5708, - "step": 5679 - }, - { - "epoch": 0.1993716983450043, - "grad_norm": 0.4962828755378723, - "learning_rate": 4.503888888888889e-05, - "loss": 0.5643, - "step": 5680 - }, - { - "epoch": 0.19940679899612138, - "grad_norm": 0.49675068259239197, - "learning_rate": 4.503703703703704e-05, - "loss": 0.5376, - "step": 5681 - }, - { - "epoch": 0.19944189964723846, - "grad_norm": 0.49448299407958984, - "learning_rate": 4.5035185185185185e-05, - "loss": 0.5697, - "step": 5682 - }, - { - "epoch": 0.19947700029835552, - "grad_norm": 0.5360910892486572, - "learning_rate": 4.5033333333333335e-05, - "loss": 0.5282, - "step": 5683 - }, - { - "epoch": 0.1995121009494726, - "grad_norm": 0.46357038617134094, - "learning_rate": 4.5031481481481485e-05, - "loss": 0.4925, - "step": 5684 - }, - { - "epoch": 0.1995472016005897, - "grad_norm": 0.5392099022865295, - "learning_rate": 4.5029629629629635e-05, - "loss": 0.5341, - "step": 5685 - }, - { - "epoch": 0.19958230225170676, - "grad_norm": 0.46273332834243774, - "learning_rate": 4.502777777777778e-05, - "loss": 0.502, - "step": 5686 - }, - { - "epoch": 0.19961740290282384, - "grad_norm": 0.5860897898674011, - "learning_rate": 4.502592592592593e-05, - "loss": 0.5536, - "step": 5687 - }, - { - "epoch": 0.19965250355394093, - "grad_norm": 0.49821987748146057, - "learning_rate": 4.502407407407407e-05, - "loss": 0.4715, - "step": 5688 - }, - { - "epoch": 0.19968760420505802, - "grad_norm": 0.5448624491691589, - "learning_rate": 4.502222222222223e-05, - "loss": 0.4661, - "step": 5689 - }, - { - "epoch": 0.19972270485617508, - "grad_norm": 0.46940627694129944, - "learning_rate": 4.502037037037037e-05, - "loss": 0.4964, - "step": 5690 - }, - { - "epoch": 0.19975780550729216, - "grad_norm": 0.5091021656990051, - "learning_rate": 4.501851851851852e-05, - "loss": 0.6186, - "step": 5691 - }, - { - "epoch": 0.19979290615840925, - "grad_norm": 0.4818097949028015, - "learning_rate": 4.5016666666666665e-05, - "loss": 0.4331, - "step": 5692 - }, - { - "epoch": 0.1998280068095263, - "grad_norm": 0.46842196583747864, - "learning_rate": 4.5014814814814815e-05, - "loss": 0.6111, - "step": 5693 - }, - { - "epoch": 0.1998631074606434, - "grad_norm": 0.5308458209037781, - "learning_rate": 4.5012962962962966e-05, - "loss": 0.5728, - "step": 5694 - }, - { - "epoch": 0.19989820811176048, - "grad_norm": 0.4643735885620117, - "learning_rate": 4.5011111111111116e-05, - "loss": 0.5534, - "step": 5695 - }, - { - "epoch": 0.19993330876287754, - "grad_norm": 0.46687546372413635, - "learning_rate": 4.500925925925926e-05, - "loss": 0.5356, - "step": 5696 - }, - { - "epoch": 0.19996840941399463, - "grad_norm": 0.4913930594921112, - "learning_rate": 4.500740740740741e-05, - "loss": 0.3824, - "step": 5697 - }, - { - "epoch": 0.20000351006511172, - "grad_norm": 0.522620439529419, - "learning_rate": 4.500555555555556e-05, - "loss": 0.4147, - "step": 5698 - }, - { - "epoch": 0.20003861071622878, - "grad_norm": 0.597610592842102, - "learning_rate": 4.50037037037037e-05, - "loss": 0.4597, - "step": 5699 - }, - { - "epoch": 0.20007371136734586, - "grad_norm": 0.5786772966384888, - "learning_rate": 4.500185185185186e-05, - "loss": 0.4575, - "step": 5700 - }, - { - "epoch": 0.20010881201846295, - "grad_norm": 0.4518912434577942, - "learning_rate": 4.5e-05, - "loss": 0.4825, - "step": 5701 - }, - { - "epoch": 0.20014391266958, - "grad_norm": 0.5365821123123169, - "learning_rate": 4.499814814814815e-05, - "loss": 0.5014, - "step": 5702 - }, - { - "epoch": 0.2001790133206971, - "grad_norm": 0.577602744102478, - "learning_rate": 4.4996296296296296e-05, - "loss": 0.6364, - "step": 5703 - }, - { - "epoch": 0.20021411397181418, - "grad_norm": 0.6403786540031433, - "learning_rate": 4.4994444444444446e-05, - "loss": 0.6039, - "step": 5704 - }, - { - "epoch": 0.20024921462293124, - "grad_norm": 0.6184222102165222, - "learning_rate": 4.4992592592592597e-05, - "loss": 0.5443, - "step": 5705 - }, - { - "epoch": 0.20028431527404833, - "grad_norm": 0.47723740339279175, - "learning_rate": 4.499074074074075e-05, - "loss": 0.5122, - "step": 5706 - }, - { - "epoch": 0.20031941592516542, - "grad_norm": 0.4538409113883972, - "learning_rate": 4.498888888888889e-05, - "loss": 0.368, - "step": 5707 - }, - { - "epoch": 0.20035451657628248, - "grad_norm": 0.5129828453063965, - "learning_rate": 4.498703703703704e-05, - "loss": 0.4501, - "step": 5708 - }, - { - "epoch": 0.20038961722739956, - "grad_norm": 0.5186452269554138, - "learning_rate": 4.4985185185185183e-05, - "loss": 0.5485, - "step": 5709 - }, - { - "epoch": 0.20042471787851665, - "grad_norm": 0.49138930439949036, - "learning_rate": 4.4983333333333334e-05, - "loss": 0.541, - "step": 5710 - }, - { - "epoch": 0.20045981852963374, - "grad_norm": 0.47088414430618286, - "learning_rate": 4.4981481481481484e-05, - "loss": 0.5684, - "step": 5711 - }, - { - "epoch": 0.2004949191807508, - "grad_norm": 0.4817778766155243, - "learning_rate": 4.4979629629629634e-05, - "loss": 0.5137, - "step": 5712 - }, - { - "epoch": 0.20053001983186788, - "grad_norm": 0.5085949301719666, - "learning_rate": 4.497777777777778e-05, - "loss": 0.4947, - "step": 5713 - }, - { - "epoch": 0.20056512048298497, - "grad_norm": 0.5327000021934509, - "learning_rate": 4.497592592592593e-05, - "loss": 0.6245, - "step": 5714 - }, - { - "epoch": 0.20060022113410203, - "grad_norm": 0.46619704365730286, - "learning_rate": 4.497407407407408e-05, - "loss": 0.4763, - "step": 5715 - }, - { - "epoch": 0.20063532178521912, - "grad_norm": 0.5547291040420532, - "learning_rate": 4.497222222222223e-05, - "loss": 0.4839, - "step": 5716 - }, - { - "epoch": 0.2006704224363362, - "grad_norm": 0.6362207531929016, - "learning_rate": 4.497037037037037e-05, - "loss": 0.5034, - "step": 5717 - }, - { - "epoch": 0.20070552308745326, - "grad_norm": 0.5427435040473938, - "learning_rate": 4.496851851851852e-05, - "loss": 0.4305, - "step": 5718 - }, - { - "epoch": 0.20074062373857035, - "grad_norm": 0.5645906925201416, - "learning_rate": 4.496666666666667e-05, - "loss": 0.5401, - "step": 5719 - }, - { - "epoch": 0.20077572438968744, - "grad_norm": 0.7046865224838257, - "learning_rate": 4.4964814814814814e-05, - "loss": 0.502, - "step": 5720 - }, - { - "epoch": 0.2008108250408045, - "grad_norm": 0.4793148338794708, - "learning_rate": 4.496296296296297e-05, - "loss": 0.4489, - "step": 5721 - }, - { - "epoch": 0.20084592569192158, - "grad_norm": 0.4923561215400696, - "learning_rate": 4.4961111111111115e-05, - "loss": 0.4788, - "step": 5722 - }, - { - "epoch": 0.20088102634303867, - "grad_norm": 0.6008941531181335, - "learning_rate": 4.4959259259259265e-05, - "loss": 0.5569, - "step": 5723 - }, - { - "epoch": 0.20091612699415573, - "grad_norm": 0.6825819611549377, - "learning_rate": 4.495740740740741e-05, - "loss": 0.6057, - "step": 5724 - }, - { - "epoch": 0.20095122764527282, - "grad_norm": 0.5460118055343628, - "learning_rate": 4.495555555555556e-05, - "loss": 0.4748, - "step": 5725 - }, - { - "epoch": 0.2009863282963899, - "grad_norm": 0.48099491000175476, - "learning_rate": 4.49537037037037e-05, - "loss": 0.4939, - "step": 5726 - }, - { - "epoch": 0.20102142894750696, - "grad_norm": 0.7212415933609009, - "learning_rate": 4.495185185185186e-05, - "loss": 0.5771, - "step": 5727 - }, - { - "epoch": 0.20105652959862405, - "grad_norm": 0.4399692118167877, - "learning_rate": 4.495e-05, - "loss": 0.4884, - "step": 5728 - }, - { - "epoch": 0.20109163024974114, - "grad_norm": 0.5137689113616943, - "learning_rate": 4.494814814814815e-05, - "loss": 0.5282, - "step": 5729 - }, - { - "epoch": 0.2011267309008582, - "grad_norm": 0.5702013373374939, - "learning_rate": 4.4946296296296295e-05, - "loss": 0.5361, - "step": 5730 - }, - { - "epoch": 0.20116183155197528, - "grad_norm": 0.5149481296539307, - "learning_rate": 4.4944444444444445e-05, - "loss": 0.5217, - "step": 5731 - }, - { - "epoch": 0.20119693220309237, - "grad_norm": 0.4646179676055908, - "learning_rate": 4.4942592592592595e-05, - "loss": 0.4158, - "step": 5732 - }, - { - "epoch": 0.20123203285420946, - "grad_norm": 0.5168300867080688, - "learning_rate": 4.4940740740740745e-05, - "loss": 0.4625, - "step": 5733 - }, - { - "epoch": 0.20126713350532652, - "grad_norm": 0.552921712398529, - "learning_rate": 4.493888888888889e-05, - "loss": 0.5133, - "step": 5734 - }, - { - "epoch": 0.2013022341564436, - "grad_norm": 0.5427674055099487, - "learning_rate": 4.493703703703704e-05, - "loss": 0.4302, - "step": 5735 - }, - { - "epoch": 0.2013373348075607, - "grad_norm": 0.457428902387619, - "learning_rate": 4.493518518518519e-05, - "loss": 0.4402, - "step": 5736 - }, - { - "epoch": 0.20137243545867775, - "grad_norm": 0.4903985261917114, - "learning_rate": 4.493333333333333e-05, - "loss": 0.4694, - "step": 5737 - }, - { - "epoch": 0.20140753610979484, - "grad_norm": 0.5491151809692383, - "learning_rate": 4.493148148148148e-05, - "loss": 0.5736, - "step": 5738 - }, - { - "epoch": 0.20144263676091193, - "grad_norm": 0.4483930468559265, - "learning_rate": 4.492962962962963e-05, - "loss": 0.5245, - "step": 5739 - }, - { - "epoch": 0.20147773741202898, - "grad_norm": 0.5329801440238953, - "learning_rate": 4.492777777777778e-05, - "loss": 0.4416, - "step": 5740 - }, - { - "epoch": 0.20151283806314607, - "grad_norm": 0.46508464217185974, - "learning_rate": 4.4925925925925926e-05, - "loss": 0.4051, - "step": 5741 - }, - { - "epoch": 0.20154793871426316, - "grad_norm": 0.607858419418335, - "learning_rate": 4.4924074074074076e-05, - "loss": 0.4197, - "step": 5742 - }, - { - "epoch": 0.20158303936538022, - "grad_norm": 0.5847887396812439, - "learning_rate": 4.4922222222222226e-05, - "loss": 0.5352, - "step": 5743 - }, - { - "epoch": 0.2016181400164973, - "grad_norm": 0.549589216709137, - "learning_rate": 4.4920370370370376e-05, - "loss": 0.4704, - "step": 5744 - }, - { - "epoch": 0.2016532406676144, - "grad_norm": 0.48921632766723633, - "learning_rate": 4.491851851851852e-05, - "loss": 0.5379, - "step": 5745 - }, - { - "epoch": 0.20168834131873145, - "grad_norm": 0.47491252422332764, - "learning_rate": 4.491666666666667e-05, - "loss": 0.5607, - "step": 5746 - }, - { - "epoch": 0.20172344196984854, - "grad_norm": 0.47088509798049927, - "learning_rate": 4.491481481481481e-05, - "loss": 0.5541, - "step": 5747 - }, - { - "epoch": 0.20175854262096563, - "grad_norm": 0.4418226182460785, - "learning_rate": 4.491296296296297e-05, - "loss": 0.5634, - "step": 5748 - }, - { - "epoch": 0.20179364327208268, - "grad_norm": 0.5042619109153748, - "learning_rate": 4.491111111111111e-05, - "loss": 0.5112, - "step": 5749 - }, - { - "epoch": 0.20182874392319977, - "grad_norm": 0.4228004217147827, - "learning_rate": 4.4909259259259263e-05, - "loss": 0.4193, - "step": 5750 - }, - { - "epoch": 0.20186384457431686, - "grad_norm": 0.5152056217193604, - "learning_rate": 4.490740740740741e-05, - "loss": 0.4822, - "step": 5751 - }, - { - "epoch": 0.20189894522543395, - "grad_norm": 0.4958988428115845, - "learning_rate": 4.490555555555556e-05, - "loss": 0.4023, - "step": 5752 - }, - { - "epoch": 0.201934045876551, - "grad_norm": 0.5835666060447693, - "learning_rate": 4.49037037037037e-05, - "loss": 0.4709, - "step": 5753 - }, - { - "epoch": 0.2019691465276681, - "grad_norm": 0.5665714740753174, - "learning_rate": 4.490185185185186e-05, - "loss": 0.5656, - "step": 5754 - }, - { - "epoch": 0.20200424717878518, - "grad_norm": 0.4525555968284607, - "learning_rate": 4.49e-05, - "loss": 0.5105, - "step": 5755 - }, - { - "epoch": 0.20203934782990224, - "grad_norm": 0.5373449921607971, - "learning_rate": 4.489814814814815e-05, - "loss": 0.5354, - "step": 5756 - }, - { - "epoch": 0.20207444848101933, - "grad_norm": 0.3956085443496704, - "learning_rate": 4.48962962962963e-05, - "loss": 0.3978, - "step": 5757 - }, - { - "epoch": 0.2021095491321364, - "grad_norm": 0.5411094427108765, - "learning_rate": 4.4894444444444444e-05, - "loss": 0.5343, - "step": 5758 - }, - { - "epoch": 0.20214464978325347, - "grad_norm": 0.5036202073097229, - "learning_rate": 4.4892592592592594e-05, - "loss": 0.46, - "step": 5759 - }, - { - "epoch": 0.20217975043437056, - "grad_norm": 0.5522124171257019, - "learning_rate": 4.4890740740740744e-05, - "loss": 0.3987, - "step": 5760 - }, - { - "epoch": 0.20221485108548765, - "grad_norm": 0.6273509860038757, - "learning_rate": 4.4888888888888894e-05, - "loss": 0.5591, - "step": 5761 - }, - { - "epoch": 0.2022499517366047, - "grad_norm": 0.5254676342010498, - "learning_rate": 4.488703703703704e-05, - "loss": 0.5279, - "step": 5762 - }, - { - "epoch": 0.2022850523877218, - "grad_norm": 0.5258471965789795, - "learning_rate": 4.488518518518519e-05, - "loss": 0.5617, - "step": 5763 - }, - { - "epoch": 0.20232015303883888, - "grad_norm": 0.5649887323379517, - "learning_rate": 4.488333333333333e-05, - "loss": 0.5856, - "step": 5764 - }, - { - "epoch": 0.20235525368995594, - "grad_norm": 0.5781709551811218, - "learning_rate": 4.488148148148149e-05, - "loss": 0.4988, - "step": 5765 - }, - { - "epoch": 0.20239035434107303, - "grad_norm": 0.4743540287017822, - "learning_rate": 4.487962962962963e-05, - "loss": 0.5473, - "step": 5766 - }, - { - "epoch": 0.2024254549921901, - "grad_norm": 0.5683735609054565, - "learning_rate": 4.487777777777778e-05, - "loss": 0.4748, - "step": 5767 - }, - { - "epoch": 0.20246055564330717, - "grad_norm": 0.46197712421417236, - "learning_rate": 4.4875925925925925e-05, - "loss": 0.614, - "step": 5768 - }, - { - "epoch": 0.20249565629442426, - "grad_norm": 0.5580824017524719, - "learning_rate": 4.4874074074074075e-05, - "loss": 0.4709, - "step": 5769 - }, - { - "epoch": 0.20253075694554135, - "grad_norm": 0.4887133240699768, - "learning_rate": 4.4872222222222225e-05, - "loss": 0.4454, - "step": 5770 - }, - { - "epoch": 0.2025658575966584, - "grad_norm": 0.4813770353794098, - "learning_rate": 4.4870370370370375e-05, - "loss": 0.5055, - "step": 5771 - }, - { - "epoch": 0.2026009582477755, - "grad_norm": 0.6241201758384705, - "learning_rate": 4.486851851851852e-05, - "loss": 0.6029, - "step": 5772 - }, - { - "epoch": 0.20263605889889258, - "grad_norm": 0.6386154294013977, - "learning_rate": 4.486666666666667e-05, - "loss": 0.4563, - "step": 5773 - }, - { - "epoch": 0.20267115955000967, - "grad_norm": 0.44305092096328735, - "learning_rate": 4.486481481481481e-05, - "loss": 0.466, - "step": 5774 - }, - { - "epoch": 0.20270626020112673, - "grad_norm": 0.5321565270423889, - "learning_rate": 4.486296296296297e-05, - "loss": 0.4841, - "step": 5775 - }, - { - "epoch": 0.2027413608522438, - "grad_norm": 0.5543883442878723, - "learning_rate": 4.486111111111111e-05, - "loss": 0.6233, - "step": 5776 - }, - { - "epoch": 0.2027764615033609, - "grad_norm": 0.6109940409660339, - "learning_rate": 4.485925925925926e-05, - "loss": 0.5113, - "step": 5777 - }, - { - "epoch": 0.20281156215447796, - "grad_norm": 0.5434693098068237, - "learning_rate": 4.485740740740741e-05, - "loss": 0.4576, - "step": 5778 - }, - { - "epoch": 0.20284666280559505, - "grad_norm": 0.6000136733055115, - "learning_rate": 4.4855555555555556e-05, - "loss": 0.4506, - "step": 5779 - }, - { - "epoch": 0.20288176345671213, - "grad_norm": 0.7722585797309875, - "learning_rate": 4.4853703703703706e-05, - "loss": 0.4824, - "step": 5780 - }, - { - "epoch": 0.2029168641078292, - "grad_norm": 0.4981674551963806, - "learning_rate": 4.4851851851851856e-05, - "loss": 0.5818, - "step": 5781 - }, - { - "epoch": 0.20295196475894628, - "grad_norm": 0.6768054366111755, - "learning_rate": 4.4850000000000006e-05, - "loss": 0.5023, - "step": 5782 - }, - { - "epoch": 0.20298706541006337, - "grad_norm": 0.5657020211219788, - "learning_rate": 4.484814814814815e-05, - "loss": 0.5469, - "step": 5783 - }, - { - "epoch": 0.20302216606118043, - "grad_norm": 0.4177098870277405, - "learning_rate": 4.48462962962963e-05, - "loss": 0.4844, - "step": 5784 - }, - { - "epoch": 0.2030572667122975, - "grad_norm": 0.5317538976669312, - "learning_rate": 4.484444444444444e-05, - "loss": 0.5475, - "step": 5785 - }, - { - "epoch": 0.2030923673634146, - "grad_norm": 0.6414995193481445, - "learning_rate": 4.48425925925926e-05, - "loss": 0.4687, - "step": 5786 - }, - { - "epoch": 0.20312746801453166, - "grad_norm": 0.6169623136520386, - "learning_rate": 4.484074074074074e-05, - "loss": 0.4785, - "step": 5787 - }, - { - "epoch": 0.20316256866564875, - "grad_norm": 0.5355234146118164, - "learning_rate": 4.483888888888889e-05, - "loss": 0.629, - "step": 5788 - }, - { - "epoch": 0.20319766931676583, - "grad_norm": 0.5960537195205688, - "learning_rate": 4.4837037037037036e-05, - "loss": 0.4821, - "step": 5789 - }, - { - "epoch": 0.2032327699678829, - "grad_norm": 0.499510794878006, - "learning_rate": 4.4835185185185187e-05, - "loss": 0.4948, - "step": 5790 - }, - { - "epoch": 0.20326787061899998, - "grad_norm": 0.5418328046798706, - "learning_rate": 4.483333333333333e-05, - "loss": 0.6021, - "step": 5791 - }, - { - "epoch": 0.20330297127011707, - "grad_norm": 0.5490937232971191, - "learning_rate": 4.483148148148149e-05, - "loss": 0.6213, - "step": 5792 - }, - { - "epoch": 0.20333807192123413, - "grad_norm": 0.6346297860145569, - "learning_rate": 4.482962962962963e-05, - "loss": 0.6458, - "step": 5793 - }, - { - "epoch": 0.2033731725723512, - "grad_norm": 0.471227765083313, - "learning_rate": 4.482777777777778e-05, - "loss": 0.4796, - "step": 5794 - }, - { - "epoch": 0.2034082732234683, - "grad_norm": 0.45637160539627075, - "learning_rate": 4.4825925925925924e-05, - "loss": 0.4114, - "step": 5795 - }, - { - "epoch": 0.2034433738745854, - "grad_norm": 0.46964889764785767, - "learning_rate": 4.4824074074074074e-05, - "loss": 0.6379, - "step": 5796 - }, - { - "epoch": 0.20347847452570245, - "grad_norm": 0.467065691947937, - "learning_rate": 4.4822222222222224e-05, - "loss": 0.5655, - "step": 5797 - }, - { - "epoch": 0.20351357517681953, - "grad_norm": 0.5032250881195068, - "learning_rate": 4.4820370370370374e-05, - "loss": 0.4743, - "step": 5798 - }, - { - "epoch": 0.20354867582793662, - "grad_norm": 0.5517982244491577, - "learning_rate": 4.4818518518518524e-05, - "loss": 0.5228, - "step": 5799 - }, - { - "epoch": 0.20358377647905368, - "grad_norm": 0.5374252796173096, - "learning_rate": 4.481666666666667e-05, - "loss": 0.5257, - "step": 5800 - }, - { - "epoch": 0.20361887713017077, - "grad_norm": 0.5068433284759521, - "learning_rate": 4.481481481481482e-05, - "loss": 0.4476, - "step": 5801 - }, - { - "epoch": 0.20365397778128785, - "grad_norm": 0.5983489751815796, - "learning_rate": 4.481296296296297e-05, - "loss": 0.5276, - "step": 5802 - }, - { - "epoch": 0.2036890784324049, - "grad_norm": 0.47493699193000793, - "learning_rate": 4.481111111111112e-05, - "loss": 0.4903, - "step": 5803 - }, - { - "epoch": 0.203724179083522, - "grad_norm": 0.4810720682144165, - "learning_rate": 4.480925925925926e-05, - "loss": 0.4889, - "step": 5804 - }, - { - "epoch": 0.2037592797346391, - "grad_norm": 0.4858260750770569, - "learning_rate": 4.480740740740741e-05, - "loss": 0.5795, - "step": 5805 - }, - { - "epoch": 0.20379438038575615, - "grad_norm": 0.4771774709224701, - "learning_rate": 4.4805555555555554e-05, - "loss": 0.4873, - "step": 5806 - }, - { - "epoch": 0.20382948103687323, - "grad_norm": 0.5290054082870483, - "learning_rate": 4.4803703703703705e-05, - "loss": 0.485, - "step": 5807 - }, - { - "epoch": 0.20386458168799032, - "grad_norm": 0.5318608283996582, - "learning_rate": 4.4801851851851855e-05, - "loss": 0.5554, - "step": 5808 - }, - { - "epoch": 0.20389968233910738, - "grad_norm": 0.5791479349136353, - "learning_rate": 4.4800000000000005e-05, - "loss": 0.6022, - "step": 5809 - }, - { - "epoch": 0.20393478299022447, - "grad_norm": 0.5055559277534485, - "learning_rate": 4.479814814814815e-05, - "loss": 0.5516, - "step": 5810 - }, - { - "epoch": 0.20396988364134155, - "grad_norm": 0.4799962341785431, - "learning_rate": 4.47962962962963e-05, - "loss": 0.4898, - "step": 5811 - }, - { - "epoch": 0.2040049842924586, - "grad_norm": 0.5186617970466614, - "learning_rate": 4.479444444444444e-05, - "loss": 0.5685, - "step": 5812 - }, - { - "epoch": 0.2040400849435757, - "grad_norm": 0.4880874454975128, - "learning_rate": 4.47925925925926e-05, - "loss": 0.5248, - "step": 5813 - }, - { - "epoch": 0.2040751855946928, - "grad_norm": 0.6285914778709412, - "learning_rate": 4.479074074074074e-05, - "loss": 0.5572, - "step": 5814 - }, - { - "epoch": 0.20411028624580985, - "grad_norm": 0.5504235625267029, - "learning_rate": 4.478888888888889e-05, - "loss": 0.4725, - "step": 5815 - }, - { - "epoch": 0.20414538689692693, - "grad_norm": 0.49847114086151123, - "learning_rate": 4.4787037037037035e-05, - "loss": 0.481, - "step": 5816 - }, - { - "epoch": 0.20418048754804402, - "grad_norm": 0.4740634560585022, - "learning_rate": 4.4785185185185185e-05, - "loss": 0.5058, - "step": 5817 - }, - { - "epoch": 0.2042155881991611, - "grad_norm": 0.4323039650917053, - "learning_rate": 4.4783333333333335e-05, - "loss": 0.4821, - "step": 5818 - }, - { - "epoch": 0.20425068885027817, - "grad_norm": 0.5978840589523315, - "learning_rate": 4.4781481481481486e-05, - "loss": 0.4341, - "step": 5819 - }, - { - "epoch": 0.20428578950139525, - "grad_norm": 0.5132298469543457, - "learning_rate": 4.4779629629629636e-05, - "loss": 0.4906, - "step": 5820 - }, - { - "epoch": 0.20432089015251234, - "grad_norm": 0.5376278758049011, - "learning_rate": 4.477777777777778e-05, - "loss": 0.5719, - "step": 5821 - }, - { - "epoch": 0.2043559908036294, - "grad_norm": 0.4333142042160034, - "learning_rate": 4.477592592592593e-05, - "loss": 0.5591, - "step": 5822 - }, - { - "epoch": 0.2043910914547465, - "grad_norm": 0.45034727454185486, - "learning_rate": 4.477407407407407e-05, - "loss": 0.4691, - "step": 5823 - }, - { - "epoch": 0.20442619210586357, - "grad_norm": 0.5477269887924194, - "learning_rate": 4.477222222222223e-05, - "loss": 0.4812, - "step": 5824 - }, - { - "epoch": 0.20446129275698063, - "grad_norm": 0.49986886978149414, - "learning_rate": 4.477037037037037e-05, - "loss": 0.5137, - "step": 5825 - }, - { - "epoch": 0.20449639340809772, - "grad_norm": 0.5509771108627319, - "learning_rate": 4.476851851851852e-05, - "loss": 0.4994, - "step": 5826 - }, - { - "epoch": 0.2045314940592148, - "grad_norm": 0.4748988151550293, - "learning_rate": 4.4766666666666666e-05, - "loss": 0.4921, - "step": 5827 - }, - { - "epoch": 0.20456659471033187, - "grad_norm": 0.5487658977508545, - "learning_rate": 4.4764814814814816e-05, - "loss": 0.5419, - "step": 5828 - }, - { - "epoch": 0.20460169536144895, - "grad_norm": 0.49800607562065125, - "learning_rate": 4.4762962962962966e-05, - "loss": 0.5773, - "step": 5829 - }, - { - "epoch": 0.20463679601256604, - "grad_norm": 0.4342157542705536, - "learning_rate": 4.4761111111111116e-05, - "loss": 0.5372, - "step": 5830 - }, - { - "epoch": 0.2046718966636831, - "grad_norm": 0.49427559971809387, - "learning_rate": 4.475925925925926e-05, - "loss": 0.453, - "step": 5831 - }, - { - "epoch": 0.2047069973148002, - "grad_norm": 0.5426750183105469, - "learning_rate": 4.475740740740741e-05, - "loss": 0.5197, - "step": 5832 - }, - { - "epoch": 0.20474209796591727, - "grad_norm": 0.5391611456871033, - "learning_rate": 4.475555555555555e-05, - "loss": 0.5851, - "step": 5833 - }, - { - "epoch": 0.20477719861703433, - "grad_norm": 0.4330785870552063, - "learning_rate": 4.47537037037037e-05, - "loss": 0.555, - "step": 5834 - }, - { - "epoch": 0.20481229926815142, - "grad_norm": 0.5440877079963684, - "learning_rate": 4.4751851851851853e-05, - "loss": 0.4676, - "step": 5835 - }, - { - "epoch": 0.2048473999192685, - "grad_norm": 0.46629419922828674, - "learning_rate": 4.4750000000000004e-05, - "loss": 0.5941, - "step": 5836 - }, - { - "epoch": 0.20488250057038557, - "grad_norm": 0.49841415882110596, - "learning_rate": 4.474814814814815e-05, - "loss": 0.4682, - "step": 5837 - }, - { - "epoch": 0.20491760122150265, - "grad_norm": 0.6315843462944031, - "learning_rate": 4.47462962962963e-05, - "loss": 0.5616, - "step": 5838 - }, - { - "epoch": 0.20495270187261974, - "grad_norm": 0.5140177011489868, - "learning_rate": 4.474444444444445e-05, - "loss": 0.5732, - "step": 5839 - }, - { - "epoch": 0.20498780252373683, - "grad_norm": 0.5204194784164429, - "learning_rate": 4.47425925925926e-05, - "loss": 0.4185, - "step": 5840 - }, - { - "epoch": 0.2050229031748539, - "grad_norm": 0.5191023349761963, - "learning_rate": 4.474074074074075e-05, - "loss": 0.5897, - "step": 5841 - }, - { - "epoch": 0.20505800382597097, - "grad_norm": 0.4786989688873291, - "learning_rate": 4.473888888888889e-05, - "loss": 0.5586, - "step": 5842 - }, - { - "epoch": 0.20509310447708806, - "grad_norm": 0.44994300603866577, - "learning_rate": 4.473703703703704e-05, - "loss": 0.4641, - "step": 5843 - }, - { - "epoch": 0.20512820512820512, - "grad_norm": 0.5526472926139832, - "learning_rate": 4.4735185185185184e-05, - "loss": 0.519, - "step": 5844 - }, - { - "epoch": 0.2051633057793222, - "grad_norm": 0.477782279253006, - "learning_rate": 4.473333333333334e-05, - "loss": 0.4367, - "step": 5845 - }, - { - "epoch": 0.2051984064304393, - "grad_norm": 0.6442241072654724, - "learning_rate": 4.4731481481481484e-05, - "loss": 0.4785, - "step": 5846 - }, - { - "epoch": 0.20523350708155635, - "grad_norm": 0.4929077923297882, - "learning_rate": 4.4729629629629634e-05, - "loss": 0.5584, - "step": 5847 - }, - { - "epoch": 0.20526860773267344, - "grad_norm": 0.5226223468780518, - "learning_rate": 4.472777777777778e-05, - "loss": 0.6077, - "step": 5848 - }, - { - "epoch": 0.20530370838379053, - "grad_norm": 0.47846922278404236, - "learning_rate": 4.472592592592593e-05, - "loss": 0.3874, - "step": 5849 - }, - { - "epoch": 0.2053388090349076, - "grad_norm": 0.5038919448852539, - "learning_rate": 4.472407407407407e-05, - "loss": 0.4892, - "step": 5850 - }, - { - "epoch": 0.20537390968602467, - "grad_norm": 0.4821794927120209, - "learning_rate": 4.472222222222223e-05, - "loss": 0.5661, - "step": 5851 - }, - { - "epoch": 0.20540901033714176, - "grad_norm": 0.5058005452156067, - "learning_rate": 4.472037037037037e-05, - "loss": 0.5273, - "step": 5852 - }, - { - "epoch": 0.20544411098825882, - "grad_norm": 0.44246792793273926, - "learning_rate": 4.471851851851852e-05, - "loss": 0.5045, - "step": 5853 - }, - { - "epoch": 0.2054792116393759, - "grad_norm": 0.500372052192688, - "learning_rate": 4.4716666666666665e-05, - "loss": 0.472, - "step": 5854 - }, - { - "epoch": 0.205514312290493, - "grad_norm": 0.41964080929756165, - "learning_rate": 4.4714814814814815e-05, - "loss": 0.6176, - "step": 5855 - }, - { - "epoch": 0.20554941294161005, - "grad_norm": 0.5187985301017761, - "learning_rate": 4.4712962962962965e-05, - "loss": 0.4419, - "step": 5856 - }, - { - "epoch": 0.20558451359272714, - "grad_norm": 0.48888126015663147, - "learning_rate": 4.4711111111111115e-05, - "loss": 0.5227, - "step": 5857 - }, - { - "epoch": 0.20561961424384423, - "grad_norm": 0.5412232279777527, - "learning_rate": 4.470925925925926e-05, - "loss": 0.583, - "step": 5858 - }, - { - "epoch": 0.2056547148949613, - "grad_norm": 0.48792392015457153, - "learning_rate": 4.470740740740741e-05, - "loss": 0.505, - "step": 5859 - }, - { - "epoch": 0.20568981554607838, - "grad_norm": 0.4833071827888489, - "learning_rate": 4.470555555555556e-05, - "loss": 0.471, - "step": 5860 - }, - { - "epoch": 0.20572491619719546, - "grad_norm": 0.652236819267273, - "learning_rate": 4.47037037037037e-05, - "loss": 0.4875, - "step": 5861 - }, - { - "epoch": 0.20576001684831255, - "grad_norm": 0.4744953513145447, - "learning_rate": 4.470185185185186e-05, - "loss": 0.4706, - "step": 5862 - }, - { - "epoch": 0.2057951174994296, - "grad_norm": 0.48067864775657654, - "learning_rate": 4.47e-05, - "loss": 0.4873, - "step": 5863 - }, - { - "epoch": 0.2058302181505467, - "grad_norm": 0.5525722503662109, - "learning_rate": 4.469814814814815e-05, - "loss": 0.4417, - "step": 5864 - }, - { - "epoch": 0.20586531880166378, - "grad_norm": 0.5446246862411499, - "learning_rate": 4.4696296296296296e-05, - "loss": 0.5334, - "step": 5865 - }, - { - "epoch": 0.20590041945278084, - "grad_norm": 0.5319468975067139, - "learning_rate": 4.4694444444444446e-05, - "loss": 0.5004, - "step": 5866 - }, - { - "epoch": 0.20593552010389793, - "grad_norm": 0.5281288027763367, - "learning_rate": 4.4692592592592596e-05, - "loss": 0.503, - "step": 5867 - }, - { - "epoch": 0.20597062075501502, - "grad_norm": 0.5313505530357361, - "learning_rate": 4.4690740740740746e-05, - "loss": 0.5698, - "step": 5868 - }, - { - "epoch": 0.20600572140613208, - "grad_norm": 0.4423024356365204, - "learning_rate": 4.468888888888889e-05, - "loss": 0.3971, - "step": 5869 - }, - { - "epoch": 0.20604082205724916, - "grad_norm": 0.4552084505558014, - "learning_rate": 4.468703703703704e-05, - "loss": 0.5165, - "step": 5870 - }, - { - "epoch": 0.20607592270836625, - "grad_norm": 0.48547104001045227, - "learning_rate": 4.468518518518518e-05, - "loss": 0.6195, - "step": 5871 - }, - { - "epoch": 0.2061110233594833, - "grad_norm": 0.4751167297363281, - "learning_rate": 4.468333333333334e-05, - "loss": 0.4281, - "step": 5872 - }, - { - "epoch": 0.2061461240106004, - "grad_norm": 0.4609469473361969, - "learning_rate": 4.468148148148148e-05, - "loss": 0.4742, - "step": 5873 - }, - { - "epoch": 0.20618122466171748, - "grad_norm": 0.4952160120010376, - "learning_rate": 4.467962962962963e-05, - "loss": 0.5502, - "step": 5874 - }, - { - "epoch": 0.20621632531283454, - "grad_norm": 0.49921169877052307, - "learning_rate": 4.4677777777777777e-05, - "loss": 0.6656, - "step": 5875 - }, - { - "epoch": 0.20625142596395163, - "grad_norm": 0.589283287525177, - "learning_rate": 4.467592592592593e-05, - "loss": 0.5869, - "step": 5876 - }, - { - "epoch": 0.20628652661506872, - "grad_norm": 0.49586397409439087, - "learning_rate": 4.467407407407408e-05, - "loss": 0.4444, - "step": 5877 - }, - { - "epoch": 0.20632162726618578, - "grad_norm": 0.5026773810386658, - "learning_rate": 4.467222222222223e-05, - "loss": 0.5026, - "step": 5878 - }, - { - "epoch": 0.20635672791730286, - "grad_norm": 0.5159242153167725, - "learning_rate": 4.467037037037037e-05, - "loss": 0.4895, - "step": 5879 - }, - { - "epoch": 0.20639182856841995, - "grad_norm": 0.5293439626693726, - "learning_rate": 4.466851851851852e-05, - "loss": 0.4402, - "step": 5880 - }, - { - "epoch": 0.206426929219537, - "grad_norm": 0.4848102331161499, - "learning_rate": 4.466666666666667e-05, - "loss": 0.4816, - "step": 5881 - }, - { - "epoch": 0.2064620298706541, - "grad_norm": 0.4847506582736969, - "learning_rate": 4.4664814814814814e-05, - "loss": 0.5263, - "step": 5882 - }, - { - "epoch": 0.20649713052177118, - "grad_norm": 0.4784710705280304, - "learning_rate": 4.466296296296297e-05, - "loss": 0.4028, - "step": 5883 - }, - { - "epoch": 0.20653223117288827, - "grad_norm": 0.590761125087738, - "learning_rate": 4.4661111111111114e-05, - "loss": 0.5211, - "step": 5884 - }, - { - "epoch": 0.20656733182400533, - "grad_norm": 0.5376534461975098, - "learning_rate": 4.4659259259259264e-05, - "loss": 0.5852, - "step": 5885 - }, - { - "epoch": 0.20660243247512242, - "grad_norm": 0.5314291715621948, - "learning_rate": 4.465740740740741e-05, - "loss": 0.5003, - "step": 5886 - }, - { - "epoch": 0.2066375331262395, - "grad_norm": 0.542811930179596, - "learning_rate": 4.465555555555556e-05, - "loss": 0.5077, - "step": 5887 - }, - { - "epoch": 0.20667263377735656, - "grad_norm": 0.41105443239212036, - "learning_rate": 4.46537037037037e-05, - "loss": 0.4846, - "step": 5888 - }, - { - "epoch": 0.20670773442847365, - "grad_norm": 0.5055883526802063, - "learning_rate": 4.465185185185186e-05, - "loss": 0.5233, - "step": 5889 - }, - { - "epoch": 0.20674283507959074, - "grad_norm": 0.4597516655921936, - "learning_rate": 4.465e-05, - "loss": 0.5489, - "step": 5890 - }, - { - "epoch": 0.2067779357307078, - "grad_norm": 0.5783149003982544, - "learning_rate": 4.464814814814815e-05, - "loss": 0.4991, - "step": 5891 - }, - { - "epoch": 0.20681303638182488, - "grad_norm": 0.5101639032363892, - "learning_rate": 4.4646296296296295e-05, - "loss": 0.5348, - "step": 5892 - }, - { - "epoch": 0.20684813703294197, - "grad_norm": 0.4527219533920288, - "learning_rate": 4.4644444444444445e-05, - "loss": 0.4607, - "step": 5893 - }, - { - "epoch": 0.20688323768405903, - "grad_norm": 0.5196918249130249, - "learning_rate": 4.4642592592592595e-05, - "loss": 0.499, - "step": 5894 - }, - { - "epoch": 0.20691833833517612, - "grad_norm": 0.5393567085266113, - "learning_rate": 4.4640740740740745e-05, - "loss": 0.512, - "step": 5895 - }, - { - "epoch": 0.2069534389862932, - "grad_norm": 0.604194164276123, - "learning_rate": 4.463888888888889e-05, - "loss": 0.5143, - "step": 5896 - }, - { - "epoch": 0.20698853963741026, - "grad_norm": 0.5782538056373596, - "learning_rate": 4.463703703703704e-05, - "loss": 0.6004, - "step": 5897 - }, - { - "epoch": 0.20702364028852735, - "grad_norm": 0.4771423041820526, - "learning_rate": 4.463518518518519e-05, - "loss": 0.4933, - "step": 5898 - }, - { - "epoch": 0.20705874093964444, - "grad_norm": 0.4920268654823303, - "learning_rate": 4.463333333333334e-05, - "loss": 0.4979, - "step": 5899 - }, - { - "epoch": 0.2070938415907615, - "grad_norm": 0.5393552184104919, - "learning_rate": 4.463148148148148e-05, - "loss": 0.5062, - "step": 5900 - }, - { - "epoch": 0.20712894224187858, - "grad_norm": 0.4858897924423218, - "learning_rate": 4.462962962962963e-05, - "loss": 0.5495, - "step": 5901 - }, - { - "epoch": 0.20716404289299567, - "grad_norm": 0.4735182225704193, - "learning_rate": 4.462777777777778e-05, - "loss": 0.5746, - "step": 5902 - }, - { - "epoch": 0.20719914354411276, - "grad_norm": 0.5300837755203247, - "learning_rate": 4.4625925925925925e-05, - "loss": 0.5191, - "step": 5903 - }, - { - "epoch": 0.20723424419522982, - "grad_norm": 0.46170657873153687, - "learning_rate": 4.4624074074074076e-05, - "loss": 0.6061, - "step": 5904 - }, - { - "epoch": 0.2072693448463469, - "grad_norm": 0.46480339765548706, - "learning_rate": 4.4622222222222226e-05, - "loss": 0.615, - "step": 5905 - }, - { - "epoch": 0.207304445497464, - "grad_norm": 0.5015420317649841, - "learning_rate": 4.4620370370370376e-05, - "loss": 0.535, - "step": 5906 - }, - { - "epoch": 0.20733954614858105, - "grad_norm": 0.5044450163841248, - "learning_rate": 4.461851851851852e-05, - "loss": 0.4876, - "step": 5907 - }, - { - "epoch": 0.20737464679969814, - "grad_norm": 0.53767329454422, - "learning_rate": 4.461666666666667e-05, - "loss": 0.4784, - "step": 5908 - }, - { - "epoch": 0.20740974745081522, - "grad_norm": 0.42224588990211487, - "learning_rate": 4.461481481481481e-05, - "loss": 0.5666, - "step": 5909 - }, - { - "epoch": 0.20744484810193228, - "grad_norm": 0.5418182611465454, - "learning_rate": 4.461296296296297e-05, - "loss": 0.5397, - "step": 5910 - }, - { - "epoch": 0.20747994875304937, - "grad_norm": 0.5308401584625244, - "learning_rate": 4.461111111111111e-05, - "loss": 0.5181, - "step": 5911 - }, - { - "epoch": 0.20751504940416646, - "grad_norm": 0.5229100584983826, - "learning_rate": 4.460925925925926e-05, - "loss": 0.5196, - "step": 5912 - }, - { - "epoch": 0.20755015005528352, - "grad_norm": 0.5424684286117554, - "learning_rate": 4.4607407407407406e-05, - "loss": 0.543, - "step": 5913 - }, - { - "epoch": 0.2075852507064006, - "grad_norm": 0.46886366605758667, - "learning_rate": 4.4605555555555556e-05, - "loss": 0.5305, - "step": 5914 - }, - { - "epoch": 0.2076203513575177, - "grad_norm": 0.4874475300312042, - "learning_rate": 4.4603703703703706e-05, - "loss": 0.5672, - "step": 5915 - }, - { - "epoch": 0.20765545200863475, - "grad_norm": 0.4596846103668213, - "learning_rate": 4.4601851851851857e-05, - "loss": 0.5035, - "step": 5916 - }, - { - "epoch": 0.20769055265975184, - "grad_norm": 0.6224212646484375, - "learning_rate": 4.46e-05, - "loss": 0.6181, - "step": 5917 - }, - { - "epoch": 0.20772565331086892, - "grad_norm": 0.4688631594181061, - "learning_rate": 4.459814814814815e-05, - "loss": 0.4029, - "step": 5918 - }, - { - "epoch": 0.20776075396198598, - "grad_norm": 0.515358567237854, - "learning_rate": 4.45962962962963e-05, - "loss": 0.5628, - "step": 5919 - }, - { - "epoch": 0.20779585461310307, - "grad_norm": 0.547846257686615, - "learning_rate": 4.4594444444444443e-05, - "loss": 0.5322, - "step": 5920 - }, - { - "epoch": 0.20783095526422016, - "grad_norm": 0.42002740502357483, - "learning_rate": 4.4592592592592594e-05, - "loss": 0.4365, - "step": 5921 - }, - { - "epoch": 0.20786605591533722, - "grad_norm": 0.40918803215026855, - "learning_rate": 4.4590740740740744e-05, - "loss": 0.414, - "step": 5922 - }, - { - "epoch": 0.2079011565664543, - "grad_norm": 0.5757547616958618, - "learning_rate": 4.4588888888888894e-05, - "loss": 0.4918, - "step": 5923 - }, - { - "epoch": 0.2079362572175714, - "grad_norm": 0.5068598985671997, - "learning_rate": 4.458703703703704e-05, - "loss": 0.6078, - "step": 5924 - }, - { - "epoch": 0.20797135786868848, - "grad_norm": 0.5672745704650879, - "learning_rate": 4.458518518518519e-05, - "loss": 0.4706, - "step": 5925 - }, - { - "epoch": 0.20800645851980554, - "grad_norm": 0.49505892395973206, - "learning_rate": 4.458333333333334e-05, - "loss": 0.5505, - "step": 5926 - }, - { - "epoch": 0.20804155917092262, - "grad_norm": 0.49949851632118225, - "learning_rate": 4.458148148148149e-05, - "loss": 0.4957, - "step": 5927 - }, - { - "epoch": 0.2080766598220397, - "grad_norm": 0.5651071667671204, - "learning_rate": 4.457962962962963e-05, - "loss": 0.5415, - "step": 5928 - }, - { - "epoch": 0.20811176047315677, - "grad_norm": 0.5197621583938599, - "learning_rate": 4.457777777777778e-05, - "loss": 0.4965, - "step": 5929 - }, - { - "epoch": 0.20814686112427386, - "grad_norm": 0.47705981135368347, - "learning_rate": 4.4575925925925924e-05, - "loss": 0.4536, - "step": 5930 - }, - { - "epoch": 0.20818196177539094, - "grad_norm": 0.45580002665519714, - "learning_rate": 4.4574074074074074e-05, - "loss": 0.521, - "step": 5931 - }, - { - "epoch": 0.208217062426508, - "grad_norm": 0.4745537340641022, - "learning_rate": 4.4572222222222224e-05, - "loss": 0.5633, - "step": 5932 - }, - { - "epoch": 0.2082521630776251, - "grad_norm": 0.6220153570175171, - "learning_rate": 4.4570370370370375e-05, - "loss": 0.5467, - "step": 5933 - }, - { - "epoch": 0.20828726372874218, - "grad_norm": 0.6311488151550293, - "learning_rate": 4.456851851851852e-05, - "loss": 0.4189, - "step": 5934 - }, - { - "epoch": 0.20832236437985924, - "grad_norm": 0.4755440354347229, - "learning_rate": 4.456666666666667e-05, - "loss": 0.5412, - "step": 5935 - }, - { - "epoch": 0.20835746503097632, - "grad_norm": 0.49685215950012207, - "learning_rate": 4.456481481481482e-05, - "loss": 0.5448, - "step": 5936 - }, - { - "epoch": 0.2083925656820934, - "grad_norm": 0.5601183772087097, - "learning_rate": 4.456296296296297e-05, - "loss": 0.4846, - "step": 5937 - }, - { - "epoch": 0.20842766633321047, - "grad_norm": 0.4868001341819763, - "learning_rate": 4.456111111111111e-05, - "loss": 0.4638, - "step": 5938 - }, - { - "epoch": 0.20846276698432756, - "grad_norm": 0.5951562523841858, - "learning_rate": 4.455925925925926e-05, - "loss": 0.5315, - "step": 5939 - }, - { - "epoch": 0.20849786763544464, - "grad_norm": 0.397978812456131, - "learning_rate": 4.455740740740741e-05, - "loss": 0.4106, - "step": 5940 - }, - { - "epoch": 0.2085329682865617, - "grad_norm": 0.5449848771095276, - "learning_rate": 4.4555555555555555e-05, - "loss": 0.6015, - "step": 5941 - }, - { - "epoch": 0.2085680689376788, - "grad_norm": 0.5061251521110535, - "learning_rate": 4.4553703703703705e-05, - "loss": 0.451, - "step": 5942 - }, - { - "epoch": 0.20860316958879588, - "grad_norm": 0.607544481754303, - "learning_rate": 4.4551851851851855e-05, - "loss": 0.539, - "step": 5943 - }, - { - "epoch": 0.20863827023991294, - "grad_norm": 0.7138727903366089, - "learning_rate": 4.4550000000000005e-05, - "loss": 0.4313, - "step": 5944 - }, - { - "epoch": 0.20867337089103002, - "grad_norm": 0.5455693006515503, - "learning_rate": 4.454814814814815e-05, - "loss": 0.4095, - "step": 5945 - }, - { - "epoch": 0.2087084715421471, - "grad_norm": 0.48461124300956726, - "learning_rate": 4.45462962962963e-05, - "loss": 0.5386, - "step": 5946 - }, - { - "epoch": 0.2087435721932642, - "grad_norm": 0.5882943868637085, - "learning_rate": 4.454444444444444e-05, - "loss": 0.4359, - "step": 5947 - }, - { - "epoch": 0.20877867284438126, - "grad_norm": 0.5576409101486206, - "learning_rate": 4.45425925925926e-05, - "loss": 0.5256, - "step": 5948 - }, - { - "epoch": 0.20881377349549834, - "grad_norm": 0.4596506655216217, - "learning_rate": 4.454074074074074e-05, - "loss": 0.4248, - "step": 5949 - }, - { - "epoch": 0.20884887414661543, - "grad_norm": 0.5271729826927185, - "learning_rate": 4.453888888888889e-05, - "loss": 0.5553, - "step": 5950 - }, - { - "epoch": 0.2088839747977325, - "grad_norm": 0.44709253311157227, - "learning_rate": 4.4537037037037036e-05, - "loss": 0.5338, - "step": 5951 - }, - { - "epoch": 0.20891907544884958, - "grad_norm": 0.5452587008476257, - "learning_rate": 4.4535185185185186e-05, - "loss": 0.5658, - "step": 5952 - }, - { - "epoch": 0.20895417609996667, - "grad_norm": 0.49648839235305786, - "learning_rate": 4.4533333333333336e-05, - "loss": 0.5798, - "step": 5953 - }, - { - "epoch": 0.20898927675108372, - "grad_norm": 0.4983101785182953, - "learning_rate": 4.4531481481481486e-05, - "loss": 0.5545, - "step": 5954 - }, - { - "epoch": 0.2090243774022008, - "grad_norm": 0.5670720934867859, - "learning_rate": 4.452962962962963e-05, - "loss": 0.451, - "step": 5955 - }, - { - "epoch": 0.2090594780533179, - "grad_norm": 0.5530253648757935, - "learning_rate": 4.452777777777778e-05, - "loss": 0.561, - "step": 5956 - }, - { - "epoch": 0.20909457870443496, - "grad_norm": 0.44548970460891724, - "learning_rate": 4.452592592592593e-05, - "loss": 0.4735, - "step": 5957 - }, - { - "epoch": 0.20912967935555205, - "grad_norm": 0.47595322132110596, - "learning_rate": 4.452407407407407e-05, - "loss": 0.5147, - "step": 5958 - }, - { - "epoch": 0.20916478000666913, - "grad_norm": 0.5181584358215332, - "learning_rate": 4.452222222222222e-05, - "loss": 0.453, - "step": 5959 - }, - { - "epoch": 0.2091998806577862, - "grad_norm": 0.5187333226203918, - "learning_rate": 4.452037037037037e-05, - "loss": 0.5326, - "step": 5960 - }, - { - "epoch": 0.20923498130890328, - "grad_norm": 0.5018815994262695, - "learning_rate": 4.4518518518518523e-05, - "loss": 0.5646, - "step": 5961 - }, - { - "epoch": 0.20927008196002037, - "grad_norm": 0.4418351352214813, - "learning_rate": 4.451666666666667e-05, - "loss": 0.4938, - "step": 5962 - }, - { - "epoch": 0.20930518261113742, - "grad_norm": 0.5201433897018433, - "learning_rate": 4.451481481481482e-05, - "loss": 0.599, - "step": 5963 - }, - { - "epoch": 0.2093402832622545, - "grad_norm": 0.5641874670982361, - "learning_rate": 4.451296296296297e-05, - "loss": 0.5291, - "step": 5964 - }, - { - "epoch": 0.2093753839133716, - "grad_norm": 0.4673503339290619, - "learning_rate": 4.451111111111112e-05, - "loss": 0.4857, - "step": 5965 - }, - { - "epoch": 0.20941048456448866, - "grad_norm": 0.627461850643158, - "learning_rate": 4.450925925925926e-05, - "loss": 0.5806, - "step": 5966 - }, - { - "epoch": 0.20944558521560575, - "grad_norm": 0.46632644534111023, - "learning_rate": 4.450740740740741e-05, - "loss": 0.4416, - "step": 5967 - }, - { - "epoch": 0.20948068586672283, - "grad_norm": 0.5272588729858398, - "learning_rate": 4.4505555555555554e-05, - "loss": 0.5534, - "step": 5968 - }, - { - "epoch": 0.20951578651783992, - "grad_norm": 0.5655030608177185, - "learning_rate": 4.450370370370371e-05, - "loss": 0.5579, - "step": 5969 - }, - { - "epoch": 0.20955088716895698, - "grad_norm": 0.4512031674385071, - "learning_rate": 4.4501851851851854e-05, - "loss": 0.4708, - "step": 5970 - }, - { - "epoch": 0.20958598782007407, - "grad_norm": 0.4588440954685211, - "learning_rate": 4.4500000000000004e-05, - "loss": 0.5666, - "step": 5971 - }, - { - "epoch": 0.20962108847119115, - "grad_norm": 0.525061309337616, - "learning_rate": 4.449814814814815e-05, - "loss": 0.5157, - "step": 5972 - }, - { - "epoch": 0.2096561891223082, - "grad_norm": 0.4979327619075775, - "learning_rate": 4.44962962962963e-05, - "loss": 0.4858, - "step": 5973 - }, - { - "epoch": 0.2096912897734253, - "grad_norm": 0.37135642766952515, - "learning_rate": 4.449444444444444e-05, - "loss": 0.3789, - "step": 5974 - }, - { - "epoch": 0.20972639042454239, - "grad_norm": 0.4976522624492645, - "learning_rate": 4.44925925925926e-05, - "loss": 0.5073, - "step": 5975 - }, - { - "epoch": 0.20976149107565945, - "grad_norm": 0.6246135830879211, - "learning_rate": 4.449074074074074e-05, - "loss": 0.5243, - "step": 5976 - }, - { - "epoch": 0.20979659172677653, - "grad_norm": 0.5090498924255371, - "learning_rate": 4.448888888888889e-05, - "loss": 0.4676, - "step": 5977 - }, - { - "epoch": 0.20983169237789362, - "grad_norm": 0.5403245687484741, - "learning_rate": 4.448703703703704e-05, - "loss": 0.639, - "step": 5978 - }, - { - "epoch": 0.20986679302901068, - "grad_norm": 0.534047544002533, - "learning_rate": 4.4485185185185185e-05, - "loss": 0.5973, - "step": 5979 - }, - { - "epoch": 0.20990189368012777, - "grad_norm": 0.5311052203178406, - "learning_rate": 4.4483333333333335e-05, - "loss": 0.5721, - "step": 5980 - }, - { - "epoch": 0.20993699433124485, - "grad_norm": 0.5575326681137085, - "learning_rate": 4.4481481481481485e-05, - "loss": 0.4746, - "step": 5981 - }, - { - "epoch": 0.2099720949823619, - "grad_norm": 0.6073180437088013, - "learning_rate": 4.4479629629629635e-05, - "loss": 0.5103, - "step": 5982 - }, - { - "epoch": 0.210007195633479, - "grad_norm": 0.5394251942634583, - "learning_rate": 4.447777777777778e-05, - "loss": 0.3992, - "step": 5983 - }, - { - "epoch": 0.2100422962845961, - "grad_norm": 0.5031133890151978, - "learning_rate": 4.447592592592593e-05, - "loss": 0.5386, - "step": 5984 - }, - { - "epoch": 0.21007739693571315, - "grad_norm": 0.5003026723861694, - "learning_rate": 4.447407407407407e-05, - "loss": 0.4839, - "step": 5985 - }, - { - "epoch": 0.21011249758683023, - "grad_norm": 0.5661305785179138, - "learning_rate": 4.447222222222223e-05, - "loss": 0.4968, - "step": 5986 - }, - { - "epoch": 0.21014759823794732, - "grad_norm": 0.5375317931175232, - "learning_rate": 4.447037037037037e-05, - "loss": 0.5577, - "step": 5987 - }, - { - "epoch": 0.21018269888906438, - "grad_norm": 0.5748344659805298, - "learning_rate": 4.446851851851852e-05, - "loss": 0.5322, - "step": 5988 - }, - { - "epoch": 0.21021779954018147, - "grad_norm": 0.44723445177078247, - "learning_rate": 4.4466666666666666e-05, - "loss": 0.5692, - "step": 5989 - }, - { - "epoch": 0.21025290019129855, - "grad_norm": 0.5863723158836365, - "learning_rate": 4.4464814814814816e-05, - "loss": 0.5419, - "step": 5990 - }, - { - "epoch": 0.21028800084241564, - "grad_norm": 0.41722235083580017, - "learning_rate": 4.4462962962962966e-05, - "loss": 0.4559, - "step": 5991 - }, - { - "epoch": 0.2103231014935327, - "grad_norm": 0.4775645434856415, - "learning_rate": 4.4461111111111116e-05, - "loss": 0.5979, - "step": 5992 - }, - { - "epoch": 0.2103582021446498, - "grad_norm": 0.49060937762260437, - "learning_rate": 4.445925925925926e-05, - "loss": 0.5761, - "step": 5993 - }, - { - "epoch": 0.21039330279576687, - "grad_norm": 0.5328198075294495, - "learning_rate": 4.445740740740741e-05, - "loss": 0.5759, - "step": 5994 - }, - { - "epoch": 0.21042840344688393, - "grad_norm": 0.45686349272727966, - "learning_rate": 4.445555555555555e-05, - "loss": 0.4513, - "step": 5995 - }, - { - "epoch": 0.21046350409800102, - "grad_norm": 0.45779940485954285, - "learning_rate": 4.445370370370371e-05, - "loss": 0.4151, - "step": 5996 - }, - { - "epoch": 0.2104986047491181, - "grad_norm": 0.4669570028781891, - "learning_rate": 4.445185185185185e-05, - "loss": 0.4985, - "step": 5997 - }, - { - "epoch": 0.21053370540023517, - "grad_norm": 0.4833686053752899, - "learning_rate": 4.445e-05, - "loss": 0.4001, - "step": 5998 - }, - { - "epoch": 0.21056880605135225, - "grad_norm": 0.5383350253105164, - "learning_rate": 4.444814814814815e-05, - "loss": 0.5551, - "step": 5999 - }, - { - "epoch": 0.21060390670246934, - "grad_norm": 0.5365159511566162, - "learning_rate": 4.4446296296296296e-05, - "loss": 0.575, - "step": 6000 - }, - { - "epoch": 0.2106390073535864, - "grad_norm": 0.426778644323349, - "learning_rate": 4.4444444444444447e-05, - "loss": 0.4293, - "step": 6001 - }, - { - "epoch": 0.2106741080047035, - "grad_norm": 0.5405651926994324, - "learning_rate": 4.44425925925926e-05, - "loss": 0.5894, - "step": 6002 - }, - { - "epoch": 0.21070920865582057, - "grad_norm": 0.4929159879684448, - "learning_rate": 4.444074074074075e-05, - "loss": 0.528, - "step": 6003 - }, - { - "epoch": 0.21074430930693763, - "grad_norm": 0.5073087215423584, - "learning_rate": 4.443888888888889e-05, - "loss": 0.451, - "step": 6004 - }, - { - "epoch": 0.21077940995805472, - "grad_norm": 0.5128059983253479, - "learning_rate": 4.443703703703704e-05, - "loss": 0.5926, - "step": 6005 - }, - { - "epoch": 0.2108145106091718, - "grad_norm": 0.6390427947044373, - "learning_rate": 4.4435185185185184e-05, - "loss": 0.4619, - "step": 6006 - }, - { - "epoch": 0.21084961126028887, - "grad_norm": 0.5313932299613953, - "learning_rate": 4.443333333333334e-05, - "loss": 0.5678, - "step": 6007 - }, - { - "epoch": 0.21088471191140595, - "grad_norm": 0.48993805050849915, - "learning_rate": 4.4431481481481484e-05, - "loss": 0.5492, - "step": 6008 - }, - { - "epoch": 0.21091981256252304, - "grad_norm": 0.5603945255279541, - "learning_rate": 4.4429629629629634e-05, - "loss": 0.556, - "step": 6009 - }, - { - "epoch": 0.2109549132136401, - "grad_norm": 0.5631628036499023, - "learning_rate": 4.442777777777778e-05, - "loss": 0.6486, - "step": 6010 - }, - { - "epoch": 0.2109900138647572, - "grad_norm": 0.4619326889514923, - "learning_rate": 4.442592592592593e-05, - "loss": 0.4741, - "step": 6011 - }, - { - "epoch": 0.21102511451587427, - "grad_norm": 0.5371477007865906, - "learning_rate": 4.442407407407407e-05, - "loss": 0.4631, - "step": 6012 - }, - { - "epoch": 0.21106021516699136, - "grad_norm": 0.51808762550354, - "learning_rate": 4.442222222222223e-05, - "loss": 0.6016, - "step": 6013 - }, - { - "epoch": 0.21109531581810842, - "grad_norm": 0.4623587131500244, - "learning_rate": 4.442037037037037e-05, - "loss": 0.5331, - "step": 6014 - }, - { - "epoch": 0.2111304164692255, - "grad_norm": 0.5646682381629944, - "learning_rate": 4.441851851851852e-05, - "loss": 0.4694, - "step": 6015 - }, - { - "epoch": 0.2111655171203426, - "grad_norm": 0.48030054569244385, - "learning_rate": 4.4416666666666664e-05, - "loss": 0.4876, - "step": 6016 - }, - { - "epoch": 0.21120061777145965, - "grad_norm": 0.5435926914215088, - "learning_rate": 4.4414814814814814e-05, - "loss": 0.4567, - "step": 6017 - }, - { - "epoch": 0.21123571842257674, - "grad_norm": 0.5060495138168335, - "learning_rate": 4.4412962962962965e-05, - "loss": 0.5732, - "step": 6018 - }, - { - "epoch": 0.21127081907369383, - "grad_norm": 0.5039764642715454, - "learning_rate": 4.4411111111111115e-05, - "loss": 0.4114, - "step": 6019 - }, - { - "epoch": 0.2113059197248109, - "grad_norm": 0.47715118527412415, - "learning_rate": 4.4409259259259265e-05, - "loss": 0.5103, - "step": 6020 - }, - { - "epoch": 0.21134102037592797, - "grad_norm": 0.5522314310073853, - "learning_rate": 4.440740740740741e-05, - "loss": 0.5419, - "step": 6021 - }, - { - "epoch": 0.21137612102704506, - "grad_norm": 0.5191988348960876, - "learning_rate": 4.440555555555556e-05, - "loss": 0.5525, - "step": 6022 - }, - { - "epoch": 0.21141122167816212, - "grad_norm": 0.62196284532547, - "learning_rate": 4.440370370370371e-05, - "loss": 0.455, - "step": 6023 - }, - { - "epoch": 0.2114463223292792, - "grad_norm": 0.4798639118671417, - "learning_rate": 4.440185185185186e-05, - "loss": 0.504, - "step": 6024 - }, - { - "epoch": 0.2114814229803963, - "grad_norm": 0.5122519731521606, - "learning_rate": 4.44e-05, - "loss": 0.4007, - "step": 6025 - }, - { - "epoch": 0.21151652363151335, - "grad_norm": 0.45959141850471497, - "learning_rate": 4.439814814814815e-05, - "loss": 0.3941, - "step": 6026 - }, - { - "epoch": 0.21155162428263044, - "grad_norm": 0.736585259437561, - "learning_rate": 4.4396296296296295e-05, - "loss": 0.5615, - "step": 6027 - }, - { - "epoch": 0.21158672493374753, - "grad_norm": 0.45332470536231995, - "learning_rate": 4.4394444444444445e-05, - "loss": 0.4985, - "step": 6028 - }, - { - "epoch": 0.2116218255848646, - "grad_norm": 0.5678082704544067, - "learning_rate": 4.4392592592592595e-05, - "loss": 0.591, - "step": 6029 - }, - { - "epoch": 0.21165692623598167, - "grad_norm": 0.7280960083007812, - "learning_rate": 4.4390740740740746e-05, - "loss": 0.5496, - "step": 6030 - }, - { - "epoch": 0.21169202688709876, - "grad_norm": 0.5740956664085388, - "learning_rate": 4.438888888888889e-05, - "loss": 0.4875, - "step": 6031 - }, - { - "epoch": 0.21172712753821582, - "grad_norm": 0.5561991930007935, - "learning_rate": 4.438703703703704e-05, - "loss": 0.5598, - "step": 6032 - }, - { - "epoch": 0.2117622281893329, - "grad_norm": 0.45020535588264465, - "learning_rate": 4.438518518518518e-05, - "loss": 0.5079, - "step": 6033 - }, - { - "epoch": 0.21179732884045, - "grad_norm": 0.4173010587692261, - "learning_rate": 4.438333333333334e-05, - "loss": 0.4837, - "step": 6034 - }, - { - "epoch": 0.21183242949156708, - "grad_norm": 0.4674872159957886, - "learning_rate": 4.438148148148148e-05, - "loss": 0.4467, - "step": 6035 - }, - { - "epoch": 0.21186753014268414, - "grad_norm": 0.5036106109619141, - "learning_rate": 4.437962962962963e-05, - "loss": 0.4911, - "step": 6036 - }, - { - "epoch": 0.21190263079380123, - "grad_norm": 0.4758087396621704, - "learning_rate": 4.4377777777777776e-05, - "loss": 0.5417, - "step": 6037 - }, - { - "epoch": 0.21193773144491831, - "grad_norm": 0.495552122592926, - "learning_rate": 4.4375925925925926e-05, - "loss": 0.4773, - "step": 6038 - }, - { - "epoch": 0.21197283209603537, - "grad_norm": 0.5520939230918884, - "learning_rate": 4.4374074074074076e-05, - "loss": 0.5387, - "step": 6039 - }, - { - "epoch": 0.21200793274715246, - "grad_norm": 0.5217005014419556, - "learning_rate": 4.4372222222222226e-05, - "loss": 0.3988, - "step": 6040 - }, - { - "epoch": 0.21204303339826955, - "grad_norm": 0.901638925075531, - "learning_rate": 4.4370370370370376e-05, - "loss": 0.5594, - "step": 6041 - }, - { - "epoch": 0.2120781340493866, - "grad_norm": 0.4123663902282715, - "learning_rate": 4.436851851851852e-05, - "loss": 0.4473, - "step": 6042 - }, - { - "epoch": 0.2121132347005037, - "grad_norm": 0.4397287964820862, - "learning_rate": 4.436666666666667e-05, - "loss": 0.5018, - "step": 6043 - }, - { - "epoch": 0.21214833535162078, - "grad_norm": 0.5345658659934998, - "learning_rate": 4.436481481481481e-05, - "loss": 0.5511, - "step": 6044 - }, - { - "epoch": 0.21218343600273784, - "grad_norm": 0.4354703724384308, - "learning_rate": 4.436296296296297e-05, - "loss": 0.4265, - "step": 6045 - }, - { - "epoch": 0.21221853665385493, - "grad_norm": 0.5787032842636108, - "learning_rate": 4.4361111111111113e-05, - "loss": 0.5514, - "step": 6046 - }, - { - "epoch": 0.21225363730497201, - "grad_norm": 0.5345259308815002, - "learning_rate": 4.4359259259259264e-05, - "loss": 0.5866, - "step": 6047 - }, - { - "epoch": 0.21228873795608907, - "grad_norm": 0.47014641761779785, - "learning_rate": 4.435740740740741e-05, - "loss": 0.496, - "step": 6048 - }, - { - "epoch": 0.21232383860720616, - "grad_norm": 0.485490620136261, - "learning_rate": 4.435555555555556e-05, - "loss": 0.4155, - "step": 6049 - }, - { - "epoch": 0.21235893925832325, - "grad_norm": 0.4904814064502716, - "learning_rate": 4.435370370370371e-05, - "loss": 0.5461, - "step": 6050 - }, - { - "epoch": 0.2123940399094403, - "grad_norm": 0.6699715852737427, - "learning_rate": 4.435185185185186e-05, - "loss": 0.4626, - "step": 6051 - }, - { - "epoch": 0.2124291405605574, - "grad_norm": 0.6159021854400635, - "learning_rate": 4.435e-05, - "loss": 0.5256, - "step": 6052 - }, - { - "epoch": 0.21246424121167448, - "grad_norm": 0.49189263582229614, - "learning_rate": 4.434814814814815e-05, - "loss": 0.4734, - "step": 6053 - }, - { - "epoch": 0.21249934186279154, - "grad_norm": 0.45219889283180237, - "learning_rate": 4.4346296296296294e-05, - "loss": 0.5441, - "step": 6054 - }, - { - "epoch": 0.21253444251390863, - "grad_norm": 0.4517466127872467, - "learning_rate": 4.4344444444444444e-05, - "loss": 0.4744, - "step": 6055 - }, - { - "epoch": 0.21256954316502572, - "grad_norm": 0.5994243621826172, - "learning_rate": 4.4342592592592594e-05, - "loss": 0.5411, - "step": 6056 - }, - { - "epoch": 0.2126046438161428, - "grad_norm": 0.6465870141983032, - "learning_rate": 4.4340740740740744e-05, - "loss": 0.5787, - "step": 6057 - }, - { - "epoch": 0.21263974446725986, - "grad_norm": 0.439838171005249, - "learning_rate": 4.433888888888889e-05, - "loss": 0.5124, - "step": 6058 - }, - { - "epoch": 0.21267484511837695, - "grad_norm": 0.5000131130218506, - "learning_rate": 4.433703703703704e-05, - "loss": 0.5266, - "step": 6059 - }, - { - "epoch": 0.21270994576949404, - "grad_norm": 0.5112720727920532, - "learning_rate": 4.433518518518519e-05, - "loss": 0.6382, - "step": 6060 - }, - { - "epoch": 0.2127450464206111, - "grad_norm": 0.5351223349571228, - "learning_rate": 4.433333333333334e-05, - "loss": 0.5734, - "step": 6061 - }, - { - "epoch": 0.21278014707172818, - "grad_norm": 0.44734176993370056, - "learning_rate": 4.433148148148149e-05, - "loss": 0.5349, - "step": 6062 - }, - { - "epoch": 0.21281524772284527, - "grad_norm": 0.6794304847717285, - "learning_rate": 4.432962962962963e-05, - "loss": 0.5409, - "step": 6063 - }, - { - "epoch": 0.21285034837396233, - "grad_norm": 0.4971737563610077, - "learning_rate": 4.432777777777778e-05, - "loss": 0.4856, - "step": 6064 - }, - { - "epoch": 0.21288544902507942, - "grad_norm": 0.42203274369239807, - "learning_rate": 4.4325925925925925e-05, - "loss": 0.5156, - "step": 6065 - }, - { - "epoch": 0.2129205496761965, - "grad_norm": 0.5023411512374878, - "learning_rate": 4.432407407407408e-05, - "loss": 0.5373, - "step": 6066 - }, - { - "epoch": 0.21295565032731356, - "grad_norm": 0.6248885989189148, - "learning_rate": 4.4322222222222225e-05, - "loss": 0.5461, - "step": 6067 - }, - { - "epoch": 0.21299075097843065, - "grad_norm": 0.5446840524673462, - "learning_rate": 4.4320370370370375e-05, - "loss": 0.4757, - "step": 6068 - }, - { - "epoch": 0.21302585162954774, - "grad_norm": 0.6059736013412476, - "learning_rate": 4.431851851851852e-05, - "loss": 0.4874, - "step": 6069 - }, - { - "epoch": 0.2130609522806648, - "grad_norm": 0.5685401558876038, - "learning_rate": 4.431666666666667e-05, - "loss": 0.5262, - "step": 6070 - }, - { - "epoch": 0.21309605293178188, - "grad_norm": 0.48891329765319824, - "learning_rate": 4.431481481481481e-05, - "loss": 0.5027, - "step": 6071 - }, - { - "epoch": 0.21313115358289897, - "grad_norm": 0.5369744896888733, - "learning_rate": 4.431296296296297e-05, - "loss": 0.5971, - "step": 6072 - }, - { - "epoch": 0.21316625423401603, - "grad_norm": 0.48513633012771606, - "learning_rate": 4.431111111111111e-05, - "loss": 0.5596, - "step": 6073 - }, - { - "epoch": 0.21320135488513312, - "grad_norm": 0.6823819279670715, - "learning_rate": 4.430925925925926e-05, - "loss": 0.5072, - "step": 6074 - }, - { - "epoch": 0.2132364555362502, - "grad_norm": 0.5316343307495117, - "learning_rate": 4.4307407407407406e-05, - "loss": 0.5303, - "step": 6075 - }, - { - "epoch": 0.2132715561873673, - "grad_norm": 0.5532602071762085, - "learning_rate": 4.4305555555555556e-05, - "loss": 0.5375, - "step": 6076 - }, - { - "epoch": 0.21330665683848435, - "grad_norm": 0.46285155415534973, - "learning_rate": 4.4303703703703706e-05, - "loss": 0.5731, - "step": 6077 - }, - { - "epoch": 0.21334175748960144, - "grad_norm": 0.5347551107406616, - "learning_rate": 4.4301851851851856e-05, - "loss": 0.6041, - "step": 6078 - }, - { - "epoch": 0.21337685814071852, - "grad_norm": 0.4441242814064026, - "learning_rate": 4.43e-05, - "loss": 0.485, - "step": 6079 - }, - { - "epoch": 0.21341195879183558, - "grad_norm": 0.5281724333763123, - "learning_rate": 4.429814814814815e-05, - "loss": 0.4096, - "step": 6080 - }, - { - "epoch": 0.21344705944295267, - "grad_norm": 0.5757550597190857, - "learning_rate": 4.42962962962963e-05, - "loss": 0.4568, - "step": 6081 - }, - { - "epoch": 0.21348216009406976, - "grad_norm": 0.5327442288398743, - "learning_rate": 4.429444444444444e-05, - "loss": 0.6213, - "step": 6082 - }, - { - "epoch": 0.21351726074518682, - "grad_norm": 0.4807966947555542, - "learning_rate": 4.42925925925926e-05, - "loss": 0.5509, - "step": 6083 - }, - { - "epoch": 0.2135523613963039, - "grad_norm": 0.5004333853721619, - "learning_rate": 4.429074074074074e-05, - "loss": 0.4728, - "step": 6084 - }, - { - "epoch": 0.213587462047421, - "grad_norm": 0.4501829743385315, - "learning_rate": 4.428888888888889e-05, - "loss": 0.4678, - "step": 6085 - }, - { - "epoch": 0.21362256269853805, - "grad_norm": 0.7183891534805298, - "learning_rate": 4.4287037037037037e-05, - "loss": 0.5697, - "step": 6086 - }, - { - "epoch": 0.21365766334965514, - "grad_norm": 0.5225006937980652, - "learning_rate": 4.428518518518519e-05, - "loss": 0.3944, - "step": 6087 - }, - { - "epoch": 0.21369276400077222, - "grad_norm": 0.4825748801231384, - "learning_rate": 4.428333333333334e-05, - "loss": 0.6097, - "step": 6088 - }, - { - "epoch": 0.21372786465188928, - "grad_norm": 0.47574594616889954, - "learning_rate": 4.428148148148149e-05, - "loss": 0.5105, - "step": 6089 - }, - { - "epoch": 0.21376296530300637, - "grad_norm": 0.6171118021011353, - "learning_rate": 4.427962962962963e-05, - "loss": 0.4858, - "step": 6090 - }, - { - "epoch": 0.21379806595412346, - "grad_norm": 0.5539529323577881, - "learning_rate": 4.427777777777778e-05, - "loss": 0.5662, - "step": 6091 - }, - { - "epoch": 0.21383316660524052, - "grad_norm": 0.5459829568862915, - "learning_rate": 4.4275925925925924e-05, - "loss": 0.4995, - "step": 6092 - }, - { - "epoch": 0.2138682672563576, - "grad_norm": 0.5417593717575073, - "learning_rate": 4.427407407407408e-05, - "loss": 0.5666, - "step": 6093 - }, - { - "epoch": 0.2139033679074747, - "grad_norm": 0.47556740045547485, - "learning_rate": 4.4272222222222224e-05, - "loss": 0.5508, - "step": 6094 - }, - { - "epoch": 0.21393846855859175, - "grad_norm": 0.4601626694202423, - "learning_rate": 4.4270370370370374e-05, - "loss": 0.4454, - "step": 6095 - }, - { - "epoch": 0.21397356920970884, - "grad_norm": 0.48633861541748047, - "learning_rate": 4.426851851851852e-05, - "loss": 0.4906, - "step": 6096 - }, - { - "epoch": 0.21400866986082592, - "grad_norm": 0.3892229199409485, - "learning_rate": 4.426666666666667e-05, - "loss": 0.5303, - "step": 6097 - }, - { - "epoch": 0.214043770511943, - "grad_norm": 0.4744836091995239, - "learning_rate": 4.426481481481482e-05, - "loss": 0.455, - "step": 6098 - }, - { - "epoch": 0.21407887116306007, - "grad_norm": 0.5654664039611816, - "learning_rate": 4.426296296296297e-05, - "loss": 0.5652, - "step": 6099 - }, - { - "epoch": 0.21411397181417716, - "grad_norm": 0.5378389954566956, - "learning_rate": 4.426111111111111e-05, - "loss": 0.5071, - "step": 6100 - }, - { - "epoch": 0.21414907246529424, - "grad_norm": 0.5759448409080505, - "learning_rate": 4.425925925925926e-05, - "loss": 0.6093, - "step": 6101 - }, - { - "epoch": 0.2141841731164113, - "grad_norm": 0.4705880284309387, - "learning_rate": 4.425740740740741e-05, - "loss": 0.3744, - "step": 6102 - }, - { - "epoch": 0.2142192737675284, - "grad_norm": 0.509179413318634, - "learning_rate": 4.4255555555555555e-05, - "loss": 0.5888, - "step": 6103 - }, - { - "epoch": 0.21425437441864548, - "grad_norm": 0.6094291806221008, - "learning_rate": 4.425370370370371e-05, - "loss": 0.4835, - "step": 6104 - }, - { - "epoch": 0.21428947506976254, - "grad_norm": 0.4352051615715027, - "learning_rate": 4.4251851851851855e-05, - "loss": 0.4605, - "step": 6105 - }, - { - "epoch": 0.21432457572087962, - "grad_norm": 0.46013182401657104, - "learning_rate": 4.4250000000000005e-05, - "loss": 0.5129, - "step": 6106 - }, - { - "epoch": 0.2143596763719967, - "grad_norm": 0.49374842643737793, - "learning_rate": 4.424814814814815e-05, - "loss": 0.5752, - "step": 6107 - }, - { - "epoch": 0.21439477702311377, - "grad_norm": 0.4674174189567566, - "learning_rate": 4.42462962962963e-05, - "loss": 0.6294, - "step": 6108 - }, - { - "epoch": 0.21442987767423086, - "grad_norm": 0.44688504934310913, - "learning_rate": 4.424444444444444e-05, - "loss": 0.4692, - "step": 6109 - }, - { - "epoch": 0.21446497832534794, - "grad_norm": 0.4781249165534973, - "learning_rate": 4.42425925925926e-05, - "loss": 0.5916, - "step": 6110 - }, - { - "epoch": 0.214500078976465, - "grad_norm": 0.5548703074455261, - "learning_rate": 4.424074074074074e-05, - "loss": 0.6189, - "step": 6111 - }, - { - "epoch": 0.2145351796275821, - "grad_norm": 0.5551955699920654, - "learning_rate": 4.423888888888889e-05, - "loss": 0.4973, - "step": 6112 - }, - { - "epoch": 0.21457028027869918, - "grad_norm": 0.4513585567474365, - "learning_rate": 4.4237037037037035e-05, - "loss": 0.4669, - "step": 6113 - }, - { - "epoch": 0.21460538092981624, - "grad_norm": 0.4319193959236145, - "learning_rate": 4.4235185185185185e-05, - "loss": 0.4887, - "step": 6114 - }, - { - "epoch": 0.21464048158093332, - "grad_norm": 0.7109453082084656, - "learning_rate": 4.4233333333333336e-05, - "loss": 0.5696, - "step": 6115 - }, - { - "epoch": 0.2146755822320504, - "grad_norm": 0.4803845286369324, - "learning_rate": 4.4231481481481486e-05, - "loss": 0.4689, - "step": 6116 - }, - { - "epoch": 0.21471068288316747, - "grad_norm": 0.49306994676589966, - "learning_rate": 4.422962962962963e-05, - "loss": 0.5504, - "step": 6117 - }, - { - "epoch": 0.21474578353428456, - "grad_norm": 0.5703309178352356, - "learning_rate": 4.422777777777778e-05, - "loss": 0.5021, - "step": 6118 - }, - { - "epoch": 0.21478088418540164, - "grad_norm": 0.5258085131645203, - "learning_rate": 4.422592592592593e-05, - "loss": 0.5102, - "step": 6119 - }, - { - "epoch": 0.21481598483651873, - "grad_norm": 0.5677812099456787, - "learning_rate": 4.422407407407408e-05, - "loss": 0.6518, - "step": 6120 - }, - { - "epoch": 0.2148510854876358, - "grad_norm": 0.4745675325393677, - "learning_rate": 4.422222222222222e-05, - "loss": 0.4679, - "step": 6121 - }, - { - "epoch": 0.21488618613875288, - "grad_norm": 0.5123950839042664, - "learning_rate": 4.422037037037037e-05, - "loss": 0.5296, - "step": 6122 - }, - { - "epoch": 0.21492128678986996, - "grad_norm": 0.4188375473022461, - "learning_rate": 4.421851851851852e-05, - "loss": 0.3744, - "step": 6123 - }, - { - "epoch": 0.21495638744098702, - "grad_norm": 0.5526732802391052, - "learning_rate": 4.4216666666666666e-05, - "loss": 0.4805, - "step": 6124 - }, - { - "epoch": 0.2149914880921041, - "grad_norm": 0.46477118134498596, - "learning_rate": 4.4214814814814816e-05, - "loss": 0.5198, - "step": 6125 - }, - { - "epoch": 0.2150265887432212, - "grad_norm": 0.5004428625106812, - "learning_rate": 4.4212962962962966e-05, - "loss": 0.4962, - "step": 6126 - }, - { - "epoch": 0.21506168939433826, - "grad_norm": 0.4123381972312927, - "learning_rate": 4.4211111111111117e-05, - "loss": 0.4728, - "step": 6127 - }, - { - "epoch": 0.21509679004545534, - "grad_norm": 0.5593514442443848, - "learning_rate": 4.420925925925926e-05, - "loss": 0.5064, - "step": 6128 - }, - { - "epoch": 0.21513189069657243, - "grad_norm": 0.45040392875671387, - "learning_rate": 4.420740740740741e-05, - "loss": 0.4686, - "step": 6129 - }, - { - "epoch": 0.2151669913476895, - "grad_norm": 0.49280354380607605, - "learning_rate": 4.420555555555555e-05, - "loss": 0.5478, - "step": 6130 - }, - { - "epoch": 0.21520209199880658, - "grad_norm": 0.5247217416763306, - "learning_rate": 4.420370370370371e-05, - "loss": 0.4985, - "step": 6131 - }, - { - "epoch": 0.21523719264992366, - "grad_norm": 0.5219798684120178, - "learning_rate": 4.4201851851851854e-05, - "loss": 0.5555, - "step": 6132 - }, - { - "epoch": 0.21527229330104072, - "grad_norm": 0.49673449993133545, - "learning_rate": 4.4200000000000004e-05, - "loss": 0.476, - "step": 6133 - }, - { - "epoch": 0.2153073939521578, - "grad_norm": 0.5519738793373108, - "learning_rate": 4.419814814814815e-05, - "loss": 0.6106, - "step": 6134 - }, - { - "epoch": 0.2153424946032749, - "grad_norm": 0.5920052528381348, - "learning_rate": 4.41962962962963e-05, - "loss": 0.6403, - "step": 6135 - }, - { - "epoch": 0.21537759525439196, - "grad_norm": 0.4535396099090576, - "learning_rate": 4.419444444444444e-05, - "loss": 0.4727, - "step": 6136 - }, - { - "epoch": 0.21541269590550904, - "grad_norm": 0.4816664755344391, - "learning_rate": 4.41925925925926e-05, - "loss": 0.4241, - "step": 6137 - }, - { - "epoch": 0.21544779655662613, - "grad_norm": 0.4598335325717926, - "learning_rate": 4.419074074074074e-05, - "loss": 0.4119, - "step": 6138 - }, - { - "epoch": 0.2154828972077432, - "grad_norm": 0.5152636170387268, - "learning_rate": 4.418888888888889e-05, - "loss": 0.5339, - "step": 6139 - }, - { - "epoch": 0.21551799785886028, - "grad_norm": 0.4845847189426422, - "learning_rate": 4.418703703703704e-05, - "loss": 0.5209, - "step": 6140 - }, - { - "epoch": 0.21555309850997736, - "grad_norm": 0.5466188788414001, - "learning_rate": 4.4185185185185184e-05, - "loss": 0.521, - "step": 6141 - }, - { - "epoch": 0.21558819916109445, - "grad_norm": 0.45470675826072693, - "learning_rate": 4.4183333333333334e-05, - "loss": 0.4944, - "step": 6142 - }, - { - "epoch": 0.2156232998122115, - "grad_norm": 0.4794469177722931, - "learning_rate": 4.4181481481481484e-05, - "loss": 0.5135, - "step": 6143 - }, - { - "epoch": 0.2156584004633286, - "grad_norm": 0.4718024432659149, - "learning_rate": 4.4179629629629635e-05, - "loss": 0.5601, - "step": 6144 - }, - { - "epoch": 0.21569350111444568, - "grad_norm": 0.5525261163711548, - "learning_rate": 4.417777777777778e-05, - "loss": 0.5057, - "step": 6145 - }, - { - "epoch": 0.21572860176556274, - "grad_norm": 0.42808082699775696, - "learning_rate": 4.417592592592593e-05, - "loss": 0.5189, - "step": 6146 - }, - { - "epoch": 0.21576370241667983, - "grad_norm": 0.5026149153709412, - "learning_rate": 4.417407407407408e-05, - "loss": 0.6046, - "step": 6147 - }, - { - "epoch": 0.21579880306779692, - "grad_norm": 0.7856538891792297, - "learning_rate": 4.417222222222223e-05, - "loss": 0.618, - "step": 6148 - }, - { - "epoch": 0.21583390371891398, - "grad_norm": 0.4292316436767578, - "learning_rate": 4.417037037037037e-05, - "loss": 0.4544, - "step": 6149 - }, - { - "epoch": 0.21586900437003106, - "grad_norm": 0.46762070059776306, - "learning_rate": 4.416851851851852e-05, - "loss": 0.5069, - "step": 6150 - }, - { - "epoch": 0.21590410502114815, - "grad_norm": 0.5500401258468628, - "learning_rate": 4.4166666666666665e-05, - "loss": 0.56, - "step": 6151 - }, - { - "epoch": 0.2159392056722652, - "grad_norm": 0.4420247972011566, - "learning_rate": 4.4164814814814815e-05, - "loss": 0.4797, - "step": 6152 - }, - { - "epoch": 0.2159743063233823, - "grad_norm": 0.4166506230831146, - "learning_rate": 4.4162962962962965e-05, - "loss": 0.4811, - "step": 6153 - }, - { - "epoch": 0.21600940697449938, - "grad_norm": 0.4312019646167755, - "learning_rate": 4.4161111111111115e-05, - "loss": 0.5828, - "step": 6154 - }, - { - "epoch": 0.21604450762561644, - "grad_norm": 0.4454611837863922, - "learning_rate": 4.415925925925926e-05, - "loss": 0.6214, - "step": 6155 - }, - { - "epoch": 0.21607960827673353, - "grad_norm": 0.44852331280708313, - "learning_rate": 4.415740740740741e-05, - "loss": 0.5596, - "step": 6156 - }, - { - "epoch": 0.21611470892785062, - "grad_norm": 0.4885976016521454, - "learning_rate": 4.415555555555556e-05, - "loss": 0.4875, - "step": 6157 - }, - { - "epoch": 0.21614980957896768, - "grad_norm": 0.4780914783477783, - "learning_rate": 4.415370370370371e-05, - "loss": 0.5151, - "step": 6158 - }, - { - "epoch": 0.21618491023008476, - "grad_norm": 0.47705209255218506, - "learning_rate": 4.415185185185185e-05, - "loss": 0.4607, - "step": 6159 - }, - { - "epoch": 0.21622001088120185, - "grad_norm": 0.667580783367157, - "learning_rate": 4.415e-05, - "loss": 0.428, - "step": 6160 - }, - { - "epoch": 0.2162551115323189, - "grad_norm": 0.5083836913108826, - "learning_rate": 4.414814814814815e-05, - "loss": 0.5703, - "step": 6161 - }, - { - "epoch": 0.216290212183436, - "grad_norm": 0.47969430685043335, - "learning_rate": 4.4146296296296296e-05, - "loss": 0.5102, - "step": 6162 - }, - { - "epoch": 0.21632531283455309, - "grad_norm": 0.5559709668159485, - "learning_rate": 4.4144444444444446e-05, - "loss": 0.5171, - "step": 6163 - }, - { - "epoch": 0.21636041348567017, - "grad_norm": 0.49216997623443604, - "learning_rate": 4.4142592592592596e-05, - "loss": 0.5326, - "step": 6164 - }, - { - "epoch": 0.21639551413678723, - "grad_norm": 0.4978572428226471, - "learning_rate": 4.4140740740740746e-05, - "loss": 0.5465, - "step": 6165 - }, - { - "epoch": 0.21643061478790432, - "grad_norm": 0.4825570583343506, - "learning_rate": 4.413888888888889e-05, - "loss": 0.5733, - "step": 6166 - }, - { - "epoch": 0.2164657154390214, - "grad_norm": 0.6727797389030457, - "learning_rate": 4.413703703703704e-05, - "loss": 0.5149, - "step": 6167 - }, - { - "epoch": 0.21650081609013846, - "grad_norm": 0.4412880539894104, - "learning_rate": 4.413518518518518e-05, - "loss": 0.3995, - "step": 6168 - }, - { - "epoch": 0.21653591674125555, - "grad_norm": 0.5017647743225098, - "learning_rate": 4.413333333333334e-05, - "loss": 0.526, - "step": 6169 - }, - { - "epoch": 0.21657101739237264, - "grad_norm": 0.4535152018070221, - "learning_rate": 4.413148148148148e-05, - "loss": 0.4801, - "step": 6170 - }, - { - "epoch": 0.2166061180434897, - "grad_norm": 0.4709092080593109, - "learning_rate": 4.4129629629629633e-05, - "loss": 0.3764, - "step": 6171 - }, - { - "epoch": 0.21664121869460679, - "grad_norm": 0.5251800417900085, - "learning_rate": 4.412777777777778e-05, - "loss": 0.5235, - "step": 6172 - }, - { - "epoch": 0.21667631934572387, - "grad_norm": 0.5032487511634827, - "learning_rate": 4.412592592592593e-05, - "loss": 0.5696, - "step": 6173 - }, - { - "epoch": 0.21671141999684093, - "grad_norm": 0.558800995349884, - "learning_rate": 4.412407407407408e-05, - "loss": 0.4865, - "step": 6174 - }, - { - "epoch": 0.21674652064795802, - "grad_norm": 0.502369225025177, - "learning_rate": 4.412222222222223e-05, - "loss": 0.5808, - "step": 6175 - }, - { - "epoch": 0.2167816212990751, - "grad_norm": 0.48360109329223633, - "learning_rate": 4.412037037037037e-05, - "loss": 0.451, - "step": 6176 - }, - { - "epoch": 0.21681672195019216, - "grad_norm": 0.6444686055183411, - "learning_rate": 4.411851851851852e-05, - "loss": 0.6265, - "step": 6177 - }, - { - "epoch": 0.21685182260130925, - "grad_norm": 0.48043304681777954, - "learning_rate": 4.411666666666667e-05, - "loss": 0.538, - "step": 6178 - }, - { - "epoch": 0.21688692325242634, - "grad_norm": 0.45765721797943115, - "learning_rate": 4.4114814814814814e-05, - "loss": 0.4938, - "step": 6179 - }, - { - "epoch": 0.2169220239035434, - "grad_norm": 0.5212236642837524, - "learning_rate": 4.4112962962962964e-05, - "loss": 0.5418, - "step": 6180 - }, - { - "epoch": 0.21695712455466049, - "grad_norm": 0.499199777841568, - "learning_rate": 4.4111111111111114e-05, - "loss": 0.579, - "step": 6181 - }, - { - "epoch": 0.21699222520577757, - "grad_norm": 0.6439045071601868, - "learning_rate": 4.4109259259259264e-05, - "loss": 0.406, - "step": 6182 - }, - { - "epoch": 0.21702732585689463, - "grad_norm": 0.4519166052341461, - "learning_rate": 4.410740740740741e-05, - "loss": 0.4321, - "step": 6183 - }, - { - "epoch": 0.21706242650801172, - "grad_norm": 0.48357197642326355, - "learning_rate": 4.410555555555556e-05, - "loss": 0.4537, - "step": 6184 - }, - { - "epoch": 0.2170975271591288, - "grad_norm": 0.5234977602958679, - "learning_rate": 4.410370370370371e-05, - "loss": 0.4373, - "step": 6185 - }, - { - "epoch": 0.2171326278102459, - "grad_norm": 0.81775963306427, - "learning_rate": 4.410185185185186e-05, - "loss": 0.6302, - "step": 6186 - }, - { - "epoch": 0.21716772846136295, - "grad_norm": 0.4549204707145691, - "learning_rate": 4.41e-05, - "loss": 0.5221, - "step": 6187 - }, - { - "epoch": 0.21720282911248004, - "grad_norm": 0.5473336577415466, - "learning_rate": 4.409814814814815e-05, - "loss": 0.5637, - "step": 6188 - }, - { - "epoch": 0.21723792976359713, - "grad_norm": 0.5592092871665955, - "learning_rate": 4.4096296296296295e-05, - "loss": 0.4096, - "step": 6189 - }, - { - "epoch": 0.21727303041471419, - "grad_norm": 0.5923792719841003, - "learning_rate": 4.409444444444445e-05, - "loss": 0.42, - "step": 6190 - }, - { - "epoch": 0.21730813106583127, - "grad_norm": 0.4628503620624542, - "learning_rate": 4.4092592592592595e-05, - "loss": 0.428, - "step": 6191 - }, - { - "epoch": 0.21734323171694836, - "grad_norm": 0.5174522995948792, - "learning_rate": 4.4090740740740745e-05, - "loss": 0.5426, - "step": 6192 - }, - { - "epoch": 0.21737833236806542, - "grad_norm": 0.4536075294017792, - "learning_rate": 4.408888888888889e-05, - "loss": 0.4668, - "step": 6193 - }, - { - "epoch": 0.2174134330191825, - "grad_norm": 0.4784795045852661, - "learning_rate": 4.408703703703704e-05, - "loss": 0.443, - "step": 6194 - }, - { - "epoch": 0.2174485336702996, - "grad_norm": 0.4880618453025818, - "learning_rate": 4.408518518518518e-05, - "loss": 0.5604, - "step": 6195 - }, - { - "epoch": 0.21748363432141665, - "grad_norm": 0.6564543843269348, - "learning_rate": 4.408333333333334e-05, - "loss": 0.5812, - "step": 6196 - }, - { - "epoch": 0.21751873497253374, - "grad_norm": 0.5523297190666199, - "learning_rate": 4.408148148148148e-05, - "loss": 0.3698, - "step": 6197 - }, - { - "epoch": 0.21755383562365083, - "grad_norm": 0.5774946212768555, - "learning_rate": 4.407962962962963e-05, - "loss": 0.5668, - "step": 6198 - }, - { - "epoch": 0.21758893627476789, - "grad_norm": 0.5113601684570312, - "learning_rate": 4.407777777777778e-05, - "loss": 0.553, - "step": 6199 - }, - { - "epoch": 0.21762403692588497, - "grad_norm": 0.4996824264526367, - "learning_rate": 4.4075925925925926e-05, - "loss": 0.5704, - "step": 6200 - }, - { - "epoch": 0.21765913757700206, - "grad_norm": 0.5552753806114197, - "learning_rate": 4.4074074074074076e-05, - "loss": 0.388, - "step": 6201 - }, - { - "epoch": 0.21769423822811912, - "grad_norm": 0.49459055066108704, - "learning_rate": 4.4072222222222226e-05, - "loss": 0.5495, - "step": 6202 - }, - { - "epoch": 0.2177293388792362, - "grad_norm": 0.5291008949279785, - "learning_rate": 4.4070370370370376e-05, - "loss": 0.5576, - "step": 6203 - }, - { - "epoch": 0.2177644395303533, - "grad_norm": 0.5060317516326904, - "learning_rate": 4.406851851851852e-05, - "loss": 0.4644, - "step": 6204 - }, - { - "epoch": 0.21779954018147035, - "grad_norm": 0.5348131060600281, - "learning_rate": 4.406666666666667e-05, - "loss": 0.5017, - "step": 6205 - }, - { - "epoch": 0.21783464083258744, - "grad_norm": 0.47825518250465393, - "learning_rate": 4.406481481481481e-05, - "loss": 0.5004, - "step": 6206 - }, - { - "epoch": 0.21786974148370453, - "grad_norm": 0.6394668817520142, - "learning_rate": 4.406296296296297e-05, - "loss": 0.5108, - "step": 6207 - }, - { - "epoch": 0.2179048421348216, - "grad_norm": 0.42238709330558777, - "learning_rate": 4.406111111111111e-05, - "loss": 0.474, - "step": 6208 - }, - { - "epoch": 0.21793994278593867, - "grad_norm": 0.48176220059394836, - "learning_rate": 4.405925925925926e-05, - "loss": 0.5931, - "step": 6209 - }, - { - "epoch": 0.21797504343705576, - "grad_norm": 0.6040773987770081, - "learning_rate": 4.4057407407407406e-05, - "loss": 0.5385, - "step": 6210 - }, - { - "epoch": 0.21801014408817285, - "grad_norm": 0.5500298738479614, - "learning_rate": 4.4055555555555557e-05, - "loss": 0.4755, - "step": 6211 - }, - { - "epoch": 0.2180452447392899, - "grad_norm": 0.5082186460494995, - "learning_rate": 4.405370370370371e-05, - "loss": 0.555, - "step": 6212 - }, - { - "epoch": 0.218080345390407, - "grad_norm": 0.5330479145050049, - "learning_rate": 4.405185185185186e-05, - "loss": 0.5253, - "step": 6213 - }, - { - "epoch": 0.21811544604152408, - "grad_norm": 0.43874073028564453, - "learning_rate": 4.405e-05, - "loss": 0.5361, - "step": 6214 - }, - { - "epoch": 0.21815054669264114, - "grad_norm": 0.4732935130596161, - "learning_rate": 4.404814814814815e-05, - "loss": 0.4457, - "step": 6215 - }, - { - "epoch": 0.21818564734375823, - "grad_norm": 0.4852568507194519, - "learning_rate": 4.4046296296296294e-05, - "loss": 0.4344, - "step": 6216 - }, - { - "epoch": 0.2182207479948753, - "grad_norm": 0.4949985146522522, - "learning_rate": 4.404444444444445e-05, - "loss": 0.5396, - "step": 6217 - }, - { - "epoch": 0.21825584864599237, - "grad_norm": 0.6460181474685669, - "learning_rate": 4.4042592592592594e-05, - "loss": 0.4988, - "step": 6218 - }, - { - "epoch": 0.21829094929710946, - "grad_norm": 0.40645045042037964, - "learning_rate": 4.4040740740740744e-05, - "loss": 0.4157, - "step": 6219 - }, - { - "epoch": 0.21832604994822655, - "grad_norm": 0.47136959433555603, - "learning_rate": 4.4038888888888894e-05, - "loss": 0.4553, - "step": 6220 - }, - { - "epoch": 0.2183611505993436, - "grad_norm": 0.4780440628528595, - "learning_rate": 4.403703703703704e-05, - "loss": 0.442, - "step": 6221 - }, - { - "epoch": 0.2183962512504607, - "grad_norm": 0.4991077482700348, - "learning_rate": 4.403518518518519e-05, - "loss": 0.618, - "step": 6222 - }, - { - "epoch": 0.21843135190157778, - "grad_norm": 0.45429477095603943, - "learning_rate": 4.403333333333334e-05, - "loss": 0.48, - "step": 6223 - }, - { - "epoch": 0.21846645255269484, - "grad_norm": 0.5229246020317078, - "learning_rate": 4.403148148148149e-05, - "loss": 0.5258, - "step": 6224 - }, - { - "epoch": 0.21850155320381193, - "grad_norm": 0.5477080345153809, - "learning_rate": 4.402962962962963e-05, - "loss": 0.5267, - "step": 6225 - }, - { - "epoch": 0.218536653854929, - "grad_norm": 0.5636204481124878, - "learning_rate": 4.402777777777778e-05, - "loss": 0.5842, - "step": 6226 - }, - { - "epoch": 0.2185717545060461, - "grad_norm": 0.5092066526412964, - "learning_rate": 4.4025925925925924e-05, - "loss": 0.6, - "step": 6227 - }, - { - "epoch": 0.21860685515716316, - "grad_norm": 0.5608065128326416, - "learning_rate": 4.402407407407408e-05, - "loss": 0.5572, - "step": 6228 - }, - { - "epoch": 0.21864195580828025, - "grad_norm": 0.5133360028266907, - "learning_rate": 4.4022222222222225e-05, - "loss": 0.4939, - "step": 6229 - }, - { - "epoch": 0.21867705645939733, - "grad_norm": 0.5531045198440552, - "learning_rate": 4.4020370370370375e-05, - "loss": 0.4549, - "step": 6230 - }, - { - "epoch": 0.2187121571105144, - "grad_norm": 0.49333709478378296, - "learning_rate": 4.401851851851852e-05, - "loss": 0.5144, - "step": 6231 - }, - { - "epoch": 0.21874725776163148, - "grad_norm": 0.49464091658592224, - "learning_rate": 4.401666666666667e-05, - "loss": 0.5837, - "step": 6232 - }, - { - "epoch": 0.21878235841274857, - "grad_norm": 0.5191746354103088, - "learning_rate": 4.401481481481481e-05, - "loss": 0.5225, - "step": 6233 - }, - { - "epoch": 0.21881745906386563, - "grad_norm": 0.49753323197364807, - "learning_rate": 4.401296296296297e-05, - "loss": 0.4, - "step": 6234 - }, - { - "epoch": 0.21885255971498271, - "grad_norm": 0.48242196440696716, - "learning_rate": 4.401111111111111e-05, - "loss": 0.5443, - "step": 6235 - }, - { - "epoch": 0.2188876603660998, - "grad_norm": 0.48852917551994324, - "learning_rate": 4.400925925925926e-05, - "loss": 0.4706, - "step": 6236 - }, - { - "epoch": 0.21892276101721686, - "grad_norm": 0.4298532009124756, - "learning_rate": 4.4007407407407405e-05, - "loss": 0.4507, - "step": 6237 - }, - { - "epoch": 0.21895786166833395, - "grad_norm": 0.4760327637195587, - "learning_rate": 4.4005555555555555e-05, - "loss": 0.5228, - "step": 6238 - }, - { - "epoch": 0.21899296231945103, - "grad_norm": 0.47378507256507874, - "learning_rate": 4.4003703703703705e-05, - "loss": 0.4869, - "step": 6239 - }, - { - "epoch": 0.2190280629705681, - "grad_norm": 0.5016470551490784, - "learning_rate": 4.4001851851851856e-05, - "loss": 0.5603, - "step": 6240 - }, - { - "epoch": 0.21906316362168518, - "grad_norm": 0.5993140935897827, - "learning_rate": 4.4000000000000006e-05, - "loss": 0.5322, - "step": 6241 - }, - { - "epoch": 0.21909826427280227, - "grad_norm": 0.621923565864563, - "learning_rate": 4.399814814814815e-05, - "loss": 0.5874, - "step": 6242 - }, - { - "epoch": 0.21913336492391933, - "grad_norm": 0.6203579306602478, - "learning_rate": 4.39962962962963e-05, - "loss": 0.5015, - "step": 6243 - }, - { - "epoch": 0.21916846557503641, - "grad_norm": 0.5294671654701233, - "learning_rate": 4.399444444444445e-05, - "loss": 0.599, - "step": 6244 - }, - { - "epoch": 0.2192035662261535, - "grad_norm": 0.5151827931404114, - "learning_rate": 4.39925925925926e-05, - "loss": 0.5006, - "step": 6245 - }, - { - "epoch": 0.21923866687727056, - "grad_norm": 0.545649528503418, - "learning_rate": 4.399074074074074e-05, - "loss": 0.5307, - "step": 6246 - }, - { - "epoch": 0.21927376752838765, - "grad_norm": 0.527482271194458, - "learning_rate": 4.398888888888889e-05, - "loss": 0.6244, - "step": 6247 - }, - { - "epoch": 0.21930886817950473, - "grad_norm": 0.6200212240219116, - "learning_rate": 4.3987037037037036e-05, - "loss": 0.537, - "step": 6248 - }, - { - "epoch": 0.21934396883062182, - "grad_norm": 0.49316439032554626, - "learning_rate": 4.3985185185185186e-05, - "loss": 0.4853, - "step": 6249 - }, - { - "epoch": 0.21937906948173888, - "grad_norm": 0.5796030759811401, - "learning_rate": 4.3983333333333336e-05, - "loss": 0.4911, - "step": 6250 - }, - { - "epoch": 0.21941417013285597, - "grad_norm": 0.48111245036125183, - "learning_rate": 4.3981481481481486e-05, - "loss": 0.425, - "step": 6251 - }, - { - "epoch": 0.21944927078397305, - "grad_norm": 0.48063114285469055, - "learning_rate": 4.397962962962963e-05, - "loss": 0.4906, - "step": 6252 - }, - { - "epoch": 0.21948437143509011, - "grad_norm": 0.46665048599243164, - "learning_rate": 4.397777777777778e-05, - "loss": 0.5039, - "step": 6253 - }, - { - "epoch": 0.2195194720862072, - "grad_norm": 0.47782960534095764, - "learning_rate": 4.397592592592592e-05, - "loss": 0.5274, - "step": 6254 - }, - { - "epoch": 0.2195545727373243, - "grad_norm": 0.42020896077156067, - "learning_rate": 4.397407407407408e-05, - "loss": 0.3124, - "step": 6255 - }, - { - "epoch": 0.21958967338844135, - "grad_norm": 0.5933518409729004, - "learning_rate": 4.3972222222222223e-05, - "loss": 0.6185, - "step": 6256 - }, - { - "epoch": 0.21962477403955843, - "grad_norm": 0.4458532929420471, - "learning_rate": 4.3970370370370374e-05, - "loss": 0.4328, - "step": 6257 - }, - { - "epoch": 0.21965987469067552, - "grad_norm": 0.5102686882019043, - "learning_rate": 4.396851851851852e-05, - "loss": 0.5093, - "step": 6258 - }, - { - "epoch": 0.21969497534179258, - "grad_norm": 0.4446847438812256, - "learning_rate": 4.396666666666667e-05, - "loss": 0.4861, - "step": 6259 - }, - { - "epoch": 0.21973007599290967, - "grad_norm": 0.4327722191810608, - "learning_rate": 4.396481481481482e-05, - "loss": 0.4, - "step": 6260 - }, - { - "epoch": 0.21976517664402676, - "grad_norm": 0.5934036374092102, - "learning_rate": 4.396296296296297e-05, - "loss": 0.5182, - "step": 6261 - }, - { - "epoch": 0.21980027729514381, - "grad_norm": 0.4879116714000702, - "learning_rate": 4.396111111111112e-05, - "loss": 0.4493, - "step": 6262 - }, - { - "epoch": 0.2198353779462609, - "grad_norm": 0.49920472502708435, - "learning_rate": 4.395925925925926e-05, - "loss": 0.5366, - "step": 6263 - }, - { - "epoch": 0.219870478597378, - "grad_norm": 0.4619874954223633, - "learning_rate": 4.395740740740741e-05, - "loss": 0.4105, - "step": 6264 - }, - { - "epoch": 0.21990557924849505, - "grad_norm": 0.4887373447418213, - "learning_rate": 4.3955555555555554e-05, - "loss": 0.5617, - "step": 6265 - }, - { - "epoch": 0.21994067989961213, - "grad_norm": 0.44380685687065125, - "learning_rate": 4.395370370370371e-05, - "loss": 0.505, - "step": 6266 - }, - { - "epoch": 0.21997578055072922, - "grad_norm": 0.460760235786438, - "learning_rate": 4.3951851851851854e-05, - "loss": 0.5612, - "step": 6267 - }, - { - "epoch": 0.22001088120184628, - "grad_norm": 0.5573042631149292, - "learning_rate": 4.3950000000000004e-05, - "loss": 0.625, - "step": 6268 - }, - { - "epoch": 0.22004598185296337, - "grad_norm": 0.5551176071166992, - "learning_rate": 4.394814814814815e-05, - "loss": 0.4663, - "step": 6269 - }, - { - "epoch": 0.22008108250408046, - "grad_norm": 0.5451775193214417, - "learning_rate": 4.39462962962963e-05, - "loss": 0.5704, - "step": 6270 - }, - { - "epoch": 0.22011618315519754, - "grad_norm": 0.43985193967819214, - "learning_rate": 4.394444444444445e-05, - "loss": 0.4976, - "step": 6271 - }, - { - "epoch": 0.2201512838063146, - "grad_norm": 0.4822920262813568, - "learning_rate": 4.39425925925926e-05, - "loss": 0.3922, - "step": 6272 - }, - { - "epoch": 0.2201863844574317, - "grad_norm": 0.4822113513946533, - "learning_rate": 4.394074074074074e-05, - "loss": 0.358, - "step": 6273 - }, - { - "epoch": 0.22022148510854878, - "grad_norm": 0.4701046049594879, - "learning_rate": 4.393888888888889e-05, - "loss": 0.5447, - "step": 6274 - }, - { - "epoch": 0.22025658575966583, - "grad_norm": 0.5381385684013367, - "learning_rate": 4.3937037037037035e-05, - "loss": 0.4674, - "step": 6275 - }, - { - "epoch": 0.22029168641078292, - "grad_norm": 0.47739487886428833, - "learning_rate": 4.3935185185185185e-05, - "loss": 0.5544, - "step": 6276 - }, - { - "epoch": 0.2203267870619, - "grad_norm": 0.4981534481048584, - "learning_rate": 4.3933333333333335e-05, - "loss": 0.5563, - "step": 6277 - }, - { - "epoch": 0.22036188771301707, - "grad_norm": 0.4711988866329193, - "learning_rate": 4.3931481481481485e-05, - "loss": 0.4639, - "step": 6278 - }, - { - "epoch": 0.22039698836413416, - "grad_norm": 0.4111834466457367, - "learning_rate": 4.392962962962963e-05, - "loss": 0.4113, - "step": 6279 - }, - { - "epoch": 0.22043208901525124, - "grad_norm": 0.4988335072994232, - "learning_rate": 4.392777777777778e-05, - "loss": 0.5318, - "step": 6280 - }, - { - "epoch": 0.2204671896663683, - "grad_norm": 0.6456666588783264, - "learning_rate": 4.392592592592593e-05, - "loss": 0.4733, - "step": 6281 - }, - { - "epoch": 0.2205022903174854, - "grad_norm": 0.3949092626571655, - "learning_rate": 4.392407407407408e-05, - "loss": 0.5126, - "step": 6282 - }, - { - "epoch": 0.22053739096860248, - "grad_norm": 0.4569627344608307, - "learning_rate": 4.392222222222223e-05, - "loss": 0.4279, - "step": 6283 - }, - { - "epoch": 0.22057249161971954, - "grad_norm": 0.47069692611694336, - "learning_rate": 4.392037037037037e-05, - "loss": 0.5285, - "step": 6284 - }, - { - "epoch": 0.22060759227083662, - "grad_norm": 0.4919087290763855, - "learning_rate": 4.391851851851852e-05, - "loss": 0.5592, - "step": 6285 - }, - { - "epoch": 0.2206426929219537, - "grad_norm": 0.4143083095550537, - "learning_rate": 4.3916666666666666e-05, - "loss": 0.4336, - "step": 6286 - }, - { - "epoch": 0.22067779357307077, - "grad_norm": 0.42070692777633667, - "learning_rate": 4.3914814814814816e-05, - "loss": 0.3724, - "step": 6287 - }, - { - "epoch": 0.22071289422418786, - "grad_norm": 0.5692270398139954, - "learning_rate": 4.3912962962962966e-05, - "loss": 0.4575, - "step": 6288 - }, - { - "epoch": 0.22074799487530494, - "grad_norm": 0.5066229104995728, - "learning_rate": 4.3911111111111116e-05, - "loss": 0.5682, - "step": 6289 - }, - { - "epoch": 0.220783095526422, - "grad_norm": 0.49895620346069336, - "learning_rate": 4.390925925925926e-05, - "loss": 0.4846, - "step": 6290 - }, - { - "epoch": 0.2208181961775391, - "grad_norm": 0.4587940573692322, - "learning_rate": 4.390740740740741e-05, - "loss": 0.5535, - "step": 6291 - }, - { - "epoch": 0.22085329682865618, - "grad_norm": 0.6126022934913635, - "learning_rate": 4.390555555555555e-05, - "loss": 0.5858, - "step": 6292 - }, - { - "epoch": 0.22088839747977326, - "grad_norm": 0.485806941986084, - "learning_rate": 4.390370370370371e-05, - "loss": 0.4564, - "step": 6293 - }, - { - "epoch": 0.22092349813089032, - "grad_norm": 0.5795376896858215, - "learning_rate": 4.390185185185185e-05, - "loss": 0.6295, - "step": 6294 - }, - { - "epoch": 0.2209585987820074, - "grad_norm": 0.4784345328807831, - "learning_rate": 4.39e-05, - "loss": 0.5935, - "step": 6295 - }, - { - "epoch": 0.2209936994331245, - "grad_norm": 0.49620088934898376, - "learning_rate": 4.3898148148148147e-05, - "loss": 0.5792, - "step": 6296 - }, - { - "epoch": 0.22102880008424156, - "grad_norm": 0.4905327558517456, - "learning_rate": 4.38962962962963e-05, - "loss": 0.4052, - "step": 6297 - }, - { - "epoch": 0.22106390073535864, - "grad_norm": 0.4950527250766754, - "learning_rate": 4.389444444444445e-05, - "loss": 0.4158, - "step": 6298 - }, - { - "epoch": 0.22109900138647573, - "grad_norm": 0.42098551988601685, - "learning_rate": 4.38925925925926e-05, - "loss": 0.536, - "step": 6299 - }, - { - "epoch": 0.2211341020375928, - "grad_norm": 0.4664209187030792, - "learning_rate": 4.389074074074074e-05, - "loss": 0.4983, - "step": 6300 - }, - { - "epoch": 0.22116920268870988, - "grad_norm": 0.5666025280952454, - "learning_rate": 4.388888888888889e-05, - "loss": 0.5513, - "step": 6301 - }, - { - "epoch": 0.22120430333982696, - "grad_norm": 0.42147207260131836, - "learning_rate": 4.388703703703704e-05, - "loss": 0.555, - "step": 6302 - }, - { - "epoch": 0.22123940399094402, - "grad_norm": 0.4849197268486023, - "learning_rate": 4.3885185185185184e-05, - "loss": 0.4652, - "step": 6303 - }, - { - "epoch": 0.2212745046420611, - "grad_norm": 0.45593011379241943, - "learning_rate": 4.388333333333334e-05, - "loss": 0.3794, - "step": 6304 - }, - { - "epoch": 0.2213096052931782, - "grad_norm": 0.48303791880607605, - "learning_rate": 4.3881481481481484e-05, - "loss": 0.4724, - "step": 6305 - }, - { - "epoch": 0.22134470594429526, - "grad_norm": 0.4261491298675537, - "learning_rate": 4.3879629629629634e-05, - "loss": 0.4976, - "step": 6306 - }, - { - "epoch": 0.22137980659541234, - "grad_norm": 0.5318865776062012, - "learning_rate": 4.387777777777778e-05, - "loss": 0.5849, - "step": 6307 - }, - { - "epoch": 0.22141490724652943, - "grad_norm": 0.4782041609287262, - "learning_rate": 4.387592592592593e-05, - "loss": 0.5373, - "step": 6308 - }, - { - "epoch": 0.2214500078976465, - "grad_norm": 0.6002064943313599, - "learning_rate": 4.387407407407408e-05, - "loss": 0.5713, - "step": 6309 - }, - { - "epoch": 0.22148510854876358, - "grad_norm": 0.49458253383636475, - "learning_rate": 4.387222222222223e-05, - "loss": 0.525, - "step": 6310 - }, - { - "epoch": 0.22152020919988066, - "grad_norm": 0.5385607481002808, - "learning_rate": 4.387037037037037e-05, - "loss": 0.5602, - "step": 6311 - }, - { - "epoch": 0.22155530985099772, - "grad_norm": 0.37835758924484253, - "learning_rate": 4.386851851851852e-05, - "loss": 0.1978, - "step": 6312 - }, - { - "epoch": 0.2215904105021148, - "grad_norm": 0.6036472320556641, - "learning_rate": 4.3866666666666665e-05, - "loss": 0.5953, - "step": 6313 - }, - { - "epoch": 0.2216255111532319, - "grad_norm": 0.5411561131477356, - "learning_rate": 4.386481481481482e-05, - "loss": 0.4392, - "step": 6314 - }, - { - "epoch": 0.22166061180434898, - "grad_norm": 0.5601428747177124, - "learning_rate": 4.3862962962962965e-05, - "loss": 0.5358, - "step": 6315 - }, - { - "epoch": 0.22169571245546604, - "grad_norm": 0.5319662690162659, - "learning_rate": 4.3861111111111115e-05, - "loss": 0.4584, - "step": 6316 - }, - { - "epoch": 0.22173081310658313, - "grad_norm": 0.602544367313385, - "learning_rate": 4.385925925925926e-05, - "loss": 0.53, - "step": 6317 - }, - { - "epoch": 0.22176591375770022, - "grad_norm": 0.5584543943405151, - "learning_rate": 4.385740740740741e-05, - "loss": 0.4966, - "step": 6318 - }, - { - "epoch": 0.22180101440881728, - "grad_norm": 0.5586845874786377, - "learning_rate": 4.385555555555556e-05, - "loss": 0.5473, - "step": 6319 - }, - { - "epoch": 0.22183611505993436, - "grad_norm": 0.5040899515151978, - "learning_rate": 4.385370370370371e-05, - "loss": 0.4189, - "step": 6320 - }, - { - "epoch": 0.22187121571105145, - "grad_norm": 0.46500101685523987, - "learning_rate": 4.385185185185185e-05, - "loss": 0.5701, - "step": 6321 - }, - { - "epoch": 0.2219063163621685, - "grad_norm": 0.5196889638900757, - "learning_rate": 4.385e-05, - "loss": 0.5119, - "step": 6322 - }, - { - "epoch": 0.2219414170132856, - "grad_norm": 0.7791686654090881, - "learning_rate": 4.384814814814815e-05, - "loss": 0.5384, - "step": 6323 - }, - { - "epoch": 0.22197651766440268, - "grad_norm": 0.5553638339042664, - "learning_rate": 4.3846296296296295e-05, - "loss": 0.4304, - "step": 6324 - }, - { - "epoch": 0.22201161831551974, - "grad_norm": 0.41129744052886963, - "learning_rate": 4.384444444444445e-05, - "loss": 0.4539, - "step": 6325 - }, - { - "epoch": 0.22204671896663683, - "grad_norm": 0.5269284248352051, - "learning_rate": 4.3842592592592596e-05, - "loss": 0.6307, - "step": 6326 - }, - { - "epoch": 0.22208181961775392, - "grad_norm": 0.5169380307197571, - "learning_rate": 4.3840740740740746e-05, - "loss": 0.4621, - "step": 6327 - }, - { - "epoch": 0.22211692026887098, - "grad_norm": 0.4241899847984314, - "learning_rate": 4.383888888888889e-05, - "loss": 0.4323, - "step": 6328 - }, - { - "epoch": 0.22215202091998806, - "grad_norm": 0.48876363039016724, - "learning_rate": 4.383703703703704e-05, - "loss": 0.5156, - "step": 6329 - }, - { - "epoch": 0.22218712157110515, - "grad_norm": 0.5182754397392273, - "learning_rate": 4.383518518518518e-05, - "loss": 0.6269, - "step": 6330 - }, - { - "epoch": 0.2222222222222222, - "grad_norm": 0.6086937189102173, - "learning_rate": 4.383333333333334e-05, - "loss": 0.4861, - "step": 6331 - }, - { - "epoch": 0.2222573228733393, - "grad_norm": 0.44847777485847473, - "learning_rate": 4.383148148148148e-05, - "loss": 0.441, - "step": 6332 - }, - { - "epoch": 0.22229242352445638, - "grad_norm": 0.5399346351623535, - "learning_rate": 4.382962962962963e-05, - "loss": 0.575, - "step": 6333 - }, - { - "epoch": 0.22232752417557344, - "grad_norm": 0.49623337388038635, - "learning_rate": 4.3827777777777776e-05, - "loss": 0.6507, - "step": 6334 - }, - { - "epoch": 0.22236262482669053, - "grad_norm": 0.43005335330963135, - "learning_rate": 4.3825925925925926e-05, - "loss": 0.5562, - "step": 6335 - }, - { - "epoch": 0.22239772547780762, - "grad_norm": 0.5250207185745239, - "learning_rate": 4.3824074074074076e-05, - "loss": 0.6423, - "step": 6336 - }, - { - "epoch": 0.2224328261289247, - "grad_norm": 0.5866613388061523, - "learning_rate": 4.3822222222222227e-05, - "loss": 0.47, - "step": 6337 - }, - { - "epoch": 0.22246792678004176, - "grad_norm": 0.5988165140151978, - "learning_rate": 4.382037037037037e-05, - "loss": 0.5, - "step": 6338 - }, - { - "epoch": 0.22250302743115885, - "grad_norm": 0.5908451080322266, - "learning_rate": 4.381851851851852e-05, - "loss": 0.5823, - "step": 6339 - }, - { - "epoch": 0.22253812808227594, - "grad_norm": 0.4913384020328522, - "learning_rate": 4.381666666666667e-05, - "loss": 0.5338, - "step": 6340 - }, - { - "epoch": 0.222573228733393, - "grad_norm": 0.5916531085968018, - "learning_rate": 4.381481481481482e-05, - "loss": 0.538, - "step": 6341 - }, - { - "epoch": 0.22260832938451008, - "grad_norm": 0.5281890630722046, - "learning_rate": 4.3812962962962964e-05, - "loss": 0.538, - "step": 6342 - }, - { - "epoch": 0.22264343003562717, - "grad_norm": 0.5062279105186462, - "learning_rate": 4.3811111111111114e-05, - "loss": 0.5396, - "step": 6343 - }, - { - "epoch": 0.22267853068674423, - "grad_norm": 0.5359518527984619, - "learning_rate": 4.3809259259259264e-05, - "loss": 0.5838, - "step": 6344 - }, - { - "epoch": 0.22271363133786132, - "grad_norm": 0.630617618560791, - "learning_rate": 4.380740740740741e-05, - "loss": 0.4285, - "step": 6345 - }, - { - "epoch": 0.2227487319889784, - "grad_norm": 0.5920730829238892, - "learning_rate": 4.380555555555556e-05, - "loss": 0.6225, - "step": 6346 - }, - { - "epoch": 0.22278383264009546, - "grad_norm": 0.5332921743392944, - "learning_rate": 4.380370370370371e-05, - "loss": 0.5789, - "step": 6347 - }, - { - "epoch": 0.22281893329121255, - "grad_norm": 0.4978759288787842, - "learning_rate": 4.380185185185186e-05, - "loss": 0.553, - "step": 6348 - }, - { - "epoch": 0.22285403394232964, - "grad_norm": 0.5182501673698425, - "learning_rate": 4.38e-05, - "loss": 0.4091, - "step": 6349 - }, - { - "epoch": 0.2228891345934467, - "grad_norm": 0.5010809302330017, - "learning_rate": 4.379814814814815e-05, - "loss": 0.4256, - "step": 6350 - }, - { - "epoch": 0.22292423524456378, - "grad_norm": 0.43245425820350647, - "learning_rate": 4.3796296296296294e-05, - "loss": 0.4752, - "step": 6351 - }, - { - "epoch": 0.22295933589568087, - "grad_norm": 0.49831315875053406, - "learning_rate": 4.379444444444445e-05, - "loss": 0.4326, - "step": 6352 - }, - { - "epoch": 0.22299443654679793, - "grad_norm": 0.4928932189941406, - "learning_rate": 4.3792592592592594e-05, - "loss": 0.5424, - "step": 6353 - }, - { - "epoch": 0.22302953719791502, - "grad_norm": 0.4852054715156555, - "learning_rate": 4.3790740740740745e-05, - "loss": 0.5974, - "step": 6354 - }, - { - "epoch": 0.2230646378490321, - "grad_norm": 0.4707433879375458, - "learning_rate": 4.378888888888889e-05, - "loss": 0.6122, - "step": 6355 - }, - { - "epoch": 0.22309973850014916, - "grad_norm": 0.4311583638191223, - "learning_rate": 4.378703703703704e-05, - "loss": 0.4102, - "step": 6356 - }, - { - "epoch": 0.22313483915126625, - "grad_norm": 0.5351525545120239, - "learning_rate": 4.378518518518518e-05, - "loss": 0.4879, - "step": 6357 - }, - { - "epoch": 0.22316993980238334, - "grad_norm": 0.5374259948730469, - "learning_rate": 4.378333333333334e-05, - "loss": 0.5725, - "step": 6358 - }, - { - "epoch": 0.22320504045350043, - "grad_norm": 0.4551353454589844, - "learning_rate": 4.378148148148148e-05, - "loss": 0.5055, - "step": 6359 - }, - { - "epoch": 0.22324014110461748, - "grad_norm": 0.42647719383239746, - "learning_rate": 4.377962962962963e-05, - "loss": 0.4437, - "step": 6360 - }, - { - "epoch": 0.22327524175573457, - "grad_norm": 0.5566410422325134, - "learning_rate": 4.377777777777778e-05, - "loss": 0.5593, - "step": 6361 - }, - { - "epoch": 0.22331034240685166, - "grad_norm": 0.45186179876327515, - "learning_rate": 4.3775925925925925e-05, - "loss": 0.5061, - "step": 6362 - }, - { - "epoch": 0.22334544305796872, - "grad_norm": 0.5327913761138916, - "learning_rate": 4.3774074074074075e-05, - "loss": 0.4284, - "step": 6363 - }, - { - "epoch": 0.2233805437090858, - "grad_norm": 0.4913274347782135, - "learning_rate": 4.3772222222222225e-05, - "loss": 0.3438, - "step": 6364 - }, - { - "epoch": 0.2234156443602029, - "grad_norm": 0.5243493914604187, - "learning_rate": 4.3770370370370375e-05, - "loss": 0.5759, - "step": 6365 - }, - { - "epoch": 0.22345074501131995, - "grad_norm": 0.5729230046272278, - "learning_rate": 4.376851851851852e-05, - "loss": 0.5032, - "step": 6366 - }, - { - "epoch": 0.22348584566243704, - "grad_norm": 0.504094123840332, - "learning_rate": 4.376666666666667e-05, - "loss": 0.5342, - "step": 6367 - }, - { - "epoch": 0.22352094631355413, - "grad_norm": 0.47615161538124084, - "learning_rate": 4.376481481481482e-05, - "loss": 0.5243, - "step": 6368 - }, - { - "epoch": 0.22355604696467118, - "grad_norm": 0.49670130014419556, - "learning_rate": 4.376296296296297e-05, - "loss": 0.5211, - "step": 6369 - }, - { - "epoch": 0.22359114761578827, - "grad_norm": 0.41458097100257874, - "learning_rate": 4.376111111111111e-05, - "loss": 0.3578, - "step": 6370 - }, - { - "epoch": 0.22362624826690536, - "grad_norm": 0.6223925352096558, - "learning_rate": 4.375925925925926e-05, - "loss": 0.5831, - "step": 6371 - }, - { - "epoch": 0.22366134891802242, - "grad_norm": 0.5051090121269226, - "learning_rate": 4.3757407407407406e-05, - "loss": 0.508, - "step": 6372 - }, - { - "epoch": 0.2236964495691395, - "grad_norm": 0.5326719284057617, - "learning_rate": 4.3755555555555556e-05, - "loss": 0.5506, - "step": 6373 - }, - { - "epoch": 0.2237315502202566, - "grad_norm": 0.4949377775192261, - "learning_rate": 4.3753703703703706e-05, - "loss": 0.4685, - "step": 6374 - }, - { - "epoch": 0.22376665087137365, - "grad_norm": 0.4929465055465698, - "learning_rate": 4.3751851851851856e-05, - "loss": 0.5993, - "step": 6375 - }, - { - "epoch": 0.22380175152249074, - "grad_norm": 0.5197126269340515, - "learning_rate": 4.375e-05, - "loss": 0.4241, - "step": 6376 - }, - { - "epoch": 0.22383685217360783, - "grad_norm": 0.48047083616256714, - "learning_rate": 4.374814814814815e-05, - "loss": 0.5813, - "step": 6377 - }, - { - "epoch": 0.2238719528247249, - "grad_norm": 0.45329195261001587, - "learning_rate": 4.374629629629629e-05, - "loss": 0.5228, - "step": 6378 - }, - { - "epoch": 0.22390705347584197, - "grad_norm": 0.45928841829299927, - "learning_rate": 4.374444444444445e-05, - "loss": 0.5264, - "step": 6379 - }, - { - "epoch": 0.22394215412695906, - "grad_norm": 0.5235545635223389, - "learning_rate": 4.374259259259259e-05, - "loss": 0.575, - "step": 6380 - }, - { - "epoch": 0.22397725477807615, - "grad_norm": 0.47029605507850647, - "learning_rate": 4.374074074074074e-05, - "loss": 0.3708, - "step": 6381 - }, - { - "epoch": 0.2240123554291932, - "grad_norm": 0.5033150911331177, - "learning_rate": 4.3738888888888893e-05, - "loss": 0.5404, - "step": 6382 - }, - { - "epoch": 0.2240474560803103, - "grad_norm": 0.4402678608894348, - "learning_rate": 4.373703703703704e-05, - "loss": 0.409, - "step": 6383 - }, - { - "epoch": 0.22408255673142738, - "grad_norm": 0.46338319778442383, - "learning_rate": 4.373518518518519e-05, - "loss": 0.4742, - "step": 6384 - }, - { - "epoch": 0.22411765738254444, - "grad_norm": 0.44654345512390137, - "learning_rate": 4.373333333333334e-05, - "loss": 0.5292, - "step": 6385 - }, - { - "epoch": 0.22415275803366153, - "grad_norm": 0.45585715770721436, - "learning_rate": 4.373148148148149e-05, - "loss": 0.6081, - "step": 6386 - }, - { - "epoch": 0.2241878586847786, - "grad_norm": 0.4940966069698334, - "learning_rate": 4.372962962962963e-05, - "loss": 0.3982, - "step": 6387 - }, - { - "epoch": 0.22422295933589567, - "grad_norm": 0.5520093441009521, - "learning_rate": 4.372777777777778e-05, - "loss": 0.5021, - "step": 6388 - }, - { - "epoch": 0.22425805998701276, - "grad_norm": 0.41265133023262024, - "learning_rate": 4.3725925925925924e-05, - "loss": 0.4878, - "step": 6389 - }, - { - "epoch": 0.22429316063812985, - "grad_norm": 0.40033531188964844, - "learning_rate": 4.372407407407408e-05, - "loss": 0.3878, - "step": 6390 - }, - { - "epoch": 0.2243282612892469, - "grad_norm": 0.5253092646598816, - "learning_rate": 4.3722222222222224e-05, - "loss": 0.3231, - "step": 6391 - }, - { - "epoch": 0.224363361940364, - "grad_norm": 0.5124841928482056, - "learning_rate": 4.3720370370370374e-05, - "loss": 0.5591, - "step": 6392 - }, - { - "epoch": 0.22439846259148108, - "grad_norm": 0.5267912149429321, - "learning_rate": 4.371851851851852e-05, - "loss": 0.4353, - "step": 6393 - }, - { - "epoch": 0.22443356324259814, - "grad_norm": 0.4775521457195282, - "learning_rate": 4.371666666666667e-05, - "loss": 0.5138, - "step": 6394 - }, - { - "epoch": 0.22446866389371523, - "grad_norm": 0.5234816670417786, - "learning_rate": 4.371481481481482e-05, - "loss": 0.4813, - "step": 6395 - }, - { - "epoch": 0.2245037645448323, - "grad_norm": 0.5209967494010925, - "learning_rate": 4.371296296296297e-05, - "loss": 0.469, - "step": 6396 - }, - { - "epoch": 0.22453886519594937, - "grad_norm": 0.6170598864555359, - "learning_rate": 4.371111111111111e-05, - "loss": 0.5802, - "step": 6397 - }, - { - "epoch": 0.22457396584706646, - "grad_norm": 0.5126031637191772, - "learning_rate": 4.370925925925926e-05, - "loss": 0.4782, - "step": 6398 - }, - { - "epoch": 0.22460906649818355, - "grad_norm": 0.5021947026252747, - "learning_rate": 4.3707407407407405e-05, - "loss": 0.6129, - "step": 6399 - }, - { - "epoch": 0.22464416714930063, - "grad_norm": 0.5636342763900757, - "learning_rate": 4.3705555555555555e-05, - "loss": 0.6528, - "step": 6400 - }, - { - "epoch": 0.2246792678004177, - "grad_norm": 0.48784753680229187, - "learning_rate": 4.3703703703703705e-05, - "loss": 0.5126, - "step": 6401 - }, - { - "epoch": 0.22471436845153478, - "grad_norm": 0.4697686433792114, - "learning_rate": 4.3701851851851855e-05, - "loss": 0.4238, - "step": 6402 - }, - { - "epoch": 0.22474946910265187, - "grad_norm": 0.6189428567886353, - "learning_rate": 4.3700000000000005e-05, - "loss": 0.51, - "step": 6403 - }, - { - "epoch": 0.22478456975376893, - "grad_norm": 0.6001622676849365, - "learning_rate": 4.369814814814815e-05, - "loss": 0.4855, - "step": 6404 - }, - { - "epoch": 0.224819670404886, - "grad_norm": 0.49806827306747437, - "learning_rate": 4.36962962962963e-05, - "loss": 0.4862, - "step": 6405 - }, - { - "epoch": 0.2248547710560031, - "grad_norm": 0.4477236568927765, - "learning_rate": 4.369444444444445e-05, - "loss": 0.4688, - "step": 6406 - }, - { - "epoch": 0.22488987170712016, - "grad_norm": 0.5235539674758911, - "learning_rate": 4.36925925925926e-05, - "loss": 0.5043, - "step": 6407 - }, - { - "epoch": 0.22492497235823725, - "grad_norm": 0.5073449611663818, - "learning_rate": 4.369074074074074e-05, - "loss": 0.4464, - "step": 6408 - }, - { - "epoch": 0.22496007300935433, - "grad_norm": 0.5201469659805298, - "learning_rate": 4.368888888888889e-05, - "loss": 0.5749, - "step": 6409 - }, - { - "epoch": 0.2249951736604714, - "grad_norm": 0.4583926200866699, - "learning_rate": 4.3687037037037036e-05, - "loss": 0.5219, - "step": 6410 - }, - { - "epoch": 0.22503027431158848, - "grad_norm": 0.4190112054347992, - "learning_rate": 4.3685185185185186e-05, - "loss": 0.5549, - "step": 6411 - }, - { - "epoch": 0.22506537496270557, - "grad_norm": 0.4623728096485138, - "learning_rate": 4.3683333333333336e-05, - "loss": 0.5265, - "step": 6412 - }, - { - "epoch": 0.22510047561382263, - "grad_norm": 0.5416220426559448, - "learning_rate": 4.3681481481481486e-05, - "loss": 0.5658, - "step": 6413 - }, - { - "epoch": 0.2251355762649397, - "grad_norm": 0.5263484120368958, - "learning_rate": 4.367962962962963e-05, - "loss": 0.4968, - "step": 6414 - }, - { - "epoch": 0.2251706769160568, - "grad_norm": 0.5262486338615417, - "learning_rate": 4.367777777777778e-05, - "loss": 0.4776, - "step": 6415 - }, - { - "epoch": 0.22520577756717386, - "grad_norm": 0.5108215808868408, - "learning_rate": 4.367592592592592e-05, - "loss": 0.5745, - "step": 6416 - }, - { - "epoch": 0.22524087821829095, - "grad_norm": 0.7710796594619751, - "learning_rate": 4.367407407407408e-05, - "loss": 0.4055, - "step": 6417 - }, - { - "epoch": 0.22527597886940803, - "grad_norm": 0.5525117516517639, - "learning_rate": 4.367222222222222e-05, - "loss": 0.4652, - "step": 6418 - }, - { - "epoch": 0.2253110795205251, - "grad_norm": 0.54571533203125, - "learning_rate": 4.367037037037037e-05, - "loss": 0.5022, - "step": 6419 - }, - { - "epoch": 0.22534618017164218, - "grad_norm": 0.6450904011726379, - "learning_rate": 4.3668518518518516e-05, - "loss": 0.4664, - "step": 6420 - }, - { - "epoch": 0.22538128082275927, - "grad_norm": 0.5334365963935852, - "learning_rate": 4.3666666666666666e-05, - "loss": 0.5474, - "step": 6421 - }, - { - "epoch": 0.22541638147387635, - "grad_norm": 0.5070394277572632, - "learning_rate": 4.3664814814814817e-05, - "loss": 0.5321, - "step": 6422 - }, - { - "epoch": 0.2254514821249934, - "grad_norm": 0.5233258605003357, - "learning_rate": 4.366296296296297e-05, - "loss": 0.5667, - "step": 6423 - }, - { - "epoch": 0.2254865827761105, - "grad_norm": 0.5006825923919678, - "learning_rate": 4.366111111111112e-05, - "loss": 0.5935, - "step": 6424 - }, - { - "epoch": 0.2255216834272276, - "grad_norm": 0.5741090774536133, - "learning_rate": 4.365925925925926e-05, - "loss": 0.5172, - "step": 6425 - }, - { - "epoch": 0.22555678407834465, - "grad_norm": 0.5074683427810669, - "learning_rate": 4.365740740740741e-05, - "loss": 0.4049, - "step": 6426 - }, - { - "epoch": 0.22559188472946173, - "grad_norm": 0.5376343727111816, - "learning_rate": 4.3655555555555554e-05, - "loss": 0.503, - "step": 6427 - }, - { - "epoch": 0.22562698538057882, - "grad_norm": 0.5347578525543213, - "learning_rate": 4.365370370370371e-05, - "loss": 0.4308, - "step": 6428 - }, - { - "epoch": 0.22566208603169588, - "grad_norm": 0.49460718035697937, - "learning_rate": 4.3651851851851854e-05, - "loss": 0.5429, - "step": 6429 - }, - { - "epoch": 0.22569718668281297, - "grad_norm": 0.46171531081199646, - "learning_rate": 4.3650000000000004e-05, - "loss": 0.463, - "step": 6430 - }, - { - "epoch": 0.22573228733393005, - "grad_norm": 0.5926260948181152, - "learning_rate": 4.364814814814815e-05, - "loss": 0.5574, - "step": 6431 - }, - { - "epoch": 0.2257673879850471, - "grad_norm": 0.47889775037765503, - "learning_rate": 4.36462962962963e-05, - "loss": 0.4469, - "step": 6432 - }, - { - "epoch": 0.2258024886361642, - "grad_norm": 0.5574856400489807, - "learning_rate": 4.364444444444445e-05, - "loss": 0.4086, - "step": 6433 - }, - { - "epoch": 0.2258375892872813, - "grad_norm": 0.43133142590522766, - "learning_rate": 4.36425925925926e-05, - "loss": 0.4616, - "step": 6434 - }, - { - "epoch": 0.22587268993839835, - "grad_norm": 0.4218815267086029, - "learning_rate": 4.364074074074074e-05, - "loss": 0.5822, - "step": 6435 - }, - { - "epoch": 0.22590779058951543, - "grad_norm": 0.49302026629447937, - "learning_rate": 4.363888888888889e-05, - "loss": 0.501, - "step": 6436 - }, - { - "epoch": 0.22594289124063252, - "grad_norm": 0.5369558334350586, - "learning_rate": 4.3637037037037034e-05, - "loss": 0.5984, - "step": 6437 - }, - { - "epoch": 0.22597799189174958, - "grad_norm": 0.4583546221256256, - "learning_rate": 4.3635185185185184e-05, - "loss": 0.5787, - "step": 6438 - }, - { - "epoch": 0.22601309254286667, - "grad_norm": 0.5227403044700623, - "learning_rate": 4.3633333333333335e-05, - "loss": 0.6102, - "step": 6439 - }, - { - "epoch": 0.22604819319398375, - "grad_norm": 0.5180759429931641, - "learning_rate": 4.3631481481481485e-05, - "loss": 0.5605, - "step": 6440 - }, - { - "epoch": 0.2260832938451008, - "grad_norm": 0.4747259318828583, - "learning_rate": 4.3629629629629635e-05, - "loss": 0.6199, - "step": 6441 - }, - { - "epoch": 0.2261183944962179, - "grad_norm": 0.4560260772705078, - "learning_rate": 4.362777777777778e-05, - "loss": 0.4846, - "step": 6442 - }, - { - "epoch": 0.226153495147335, - "grad_norm": 0.44645658135414124, - "learning_rate": 4.362592592592593e-05, - "loss": 0.5487, - "step": 6443 - }, - { - "epoch": 0.22618859579845207, - "grad_norm": 0.5632745623588562, - "learning_rate": 4.362407407407408e-05, - "loss": 0.5119, - "step": 6444 - }, - { - "epoch": 0.22622369644956913, - "grad_norm": 0.4418579339981079, - "learning_rate": 4.362222222222223e-05, - "loss": 0.5143, - "step": 6445 - }, - { - "epoch": 0.22625879710068622, - "grad_norm": 0.4915745258331299, - "learning_rate": 4.362037037037037e-05, - "loss": 0.4794, - "step": 6446 - }, - { - "epoch": 0.2262938977518033, - "grad_norm": 0.5365709066390991, - "learning_rate": 4.361851851851852e-05, - "loss": 0.5892, - "step": 6447 - }, - { - "epoch": 0.22632899840292037, - "grad_norm": 0.45590758323669434, - "learning_rate": 4.3616666666666665e-05, - "loss": 0.4482, - "step": 6448 - }, - { - "epoch": 0.22636409905403745, - "grad_norm": 0.5273575782775879, - "learning_rate": 4.361481481481482e-05, - "loss": 0.5344, - "step": 6449 - }, - { - "epoch": 0.22639919970515454, - "grad_norm": 0.5155147314071655, - "learning_rate": 4.3612962962962965e-05, - "loss": 0.5436, - "step": 6450 - }, - { - "epoch": 0.2264343003562716, - "grad_norm": 0.584534764289856, - "learning_rate": 4.3611111111111116e-05, - "loss": 0.569, - "step": 6451 - }, - { - "epoch": 0.2264694010073887, - "grad_norm": 0.5023669004440308, - "learning_rate": 4.360925925925926e-05, - "loss": 0.4198, - "step": 6452 - }, - { - "epoch": 0.22650450165850577, - "grad_norm": 0.5039702653884888, - "learning_rate": 4.360740740740741e-05, - "loss": 0.6276, - "step": 6453 - }, - { - "epoch": 0.22653960230962283, - "grad_norm": 0.46199023723602295, - "learning_rate": 4.360555555555555e-05, - "loss": 0.5377, - "step": 6454 - }, - { - "epoch": 0.22657470296073992, - "grad_norm": 0.5114182829856873, - "learning_rate": 4.360370370370371e-05, - "loss": 0.5612, - "step": 6455 - }, - { - "epoch": 0.226609803611857, - "grad_norm": 0.43905559182167053, - "learning_rate": 4.360185185185185e-05, - "loss": 0.4489, - "step": 6456 - }, - { - "epoch": 0.22664490426297407, - "grad_norm": 0.5383948683738708, - "learning_rate": 4.36e-05, - "loss": 0.4868, - "step": 6457 - }, - { - "epoch": 0.22668000491409115, - "grad_norm": 0.5072084069252014, - "learning_rate": 4.3598148148148146e-05, - "loss": 0.5251, - "step": 6458 - }, - { - "epoch": 0.22671510556520824, - "grad_norm": 0.5563737750053406, - "learning_rate": 4.3596296296296296e-05, - "loss": 0.5031, - "step": 6459 - }, - { - "epoch": 0.2267502062163253, - "grad_norm": 0.4790303409099579, - "learning_rate": 4.3594444444444446e-05, - "loss": 0.5415, - "step": 6460 - }, - { - "epoch": 0.2267853068674424, - "grad_norm": 0.4857827425003052, - "learning_rate": 4.3592592592592596e-05, - "loss": 0.528, - "step": 6461 - }, - { - "epoch": 0.22682040751855947, - "grad_norm": 0.5446473360061646, - "learning_rate": 4.3590740740740746e-05, - "loss": 0.6067, - "step": 6462 - }, - { - "epoch": 0.22685550816967653, - "grad_norm": 0.5748478174209595, - "learning_rate": 4.358888888888889e-05, - "loss": 0.5694, - "step": 6463 - }, - { - "epoch": 0.22689060882079362, - "grad_norm": 0.44190818071365356, - "learning_rate": 4.358703703703704e-05, - "loss": 0.5019, - "step": 6464 - }, - { - "epoch": 0.2269257094719107, - "grad_norm": 0.44954201579093933, - "learning_rate": 4.358518518518519e-05, - "loss": 0.4679, - "step": 6465 - }, - { - "epoch": 0.2269608101230278, - "grad_norm": 0.4864159822463989, - "learning_rate": 4.358333333333334e-05, - "loss": 0.451, - "step": 6466 - }, - { - "epoch": 0.22699591077414485, - "grad_norm": 0.4750102758407593, - "learning_rate": 4.3581481481481483e-05, - "loss": 0.444, - "step": 6467 - }, - { - "epoch": 0.22703101142526194, - "grad_norm": 0.55596524477005, - "learning_rate": 4.3579629629629634e-05, - "loss": 0.3071, - "step": 6468 - }, - { - "epoch": 0.22706611207637903, - "grad_norm": 0.5919297337532043, - "learning_rate": 4.357777777777778e-05, - "loss": 0.5084, - "step": 6469 - }, - { - "epoch": 0.2271012127274961, - "grad_norm": 0.45866307616233826, - "learning_rate": 4.357592592592593e-05, - "loss": 0.5342, - "step": 6470 - }, - { - "epoch": 0.22713631337861317, - "grad_norm": 0.5274932980537415, - "learning_rate": 4.357407407407408e-05, - "loss": 0.5406, - "step": 6471 - }, - { - "epoch": 0.22717141402973026, - "grad_norm": 0.5020320415496826, - "learning_rate": 4.357222222222223e-05, - "loss": 0.561, - "step": 6472 - }, - { - "epoch": 0.22720651468084732, - "grad_norm": 0.4542123079299927, - "learning_rate": 4.357037037037037e-05, - "loss": 0.5705, - "step": 6473 - }, - { - "epoch": 0.2272416153319644, - "grad_norm": 0.542776346206665, - "learning_rate": 4.356851851851852e-05, - "loss": 0.5625, - "step": 6474 - }, - { - "epoch": 0.2272767159830815, - "grad_norm": 0.4929980933666229, - "learning_rate": 4.3566666666666664e-05, - "loss": 0.5178, - "step": 6475 - }, - { - "epoch": 0.22731181663419855, - "grad_norm": 0.40973222255706787, - "learning_rate": 4.356481481481482e-05, - "loss": 0.4884, - "step": 6476 - }, - { - "epoch": 0.22734691728531564, - "grad_norm": 0.5119888782501221, - "learning_rate": 4.3562962962962964e-05, - "loss": 0.5063, - "step": 6477 - }, - { - "epoch": 0.22738201793643273, - "grad_norm": 0.4591399133205414, - "learning_rate": 4.3561111111111114e-05, - "loss": 0.3338, - "step": 6478 - }, - { - "epoch": 0.2274171185875498, - "grad_norm": 0.43432432413101196, - "learning_rate": 4.355925925925926e-05, - "loss": 0.3525, - "step": 6479 - }, - { - "epoch": 0.22745221923866688, - "grad_norm": 0.5999286770820618, - "learning_rate": 4.355740740740741e-05, - "loss": 0.6298, - "step": 6480 - }, - { - "epoch": 0.22748731988978396, - "grad_norm": 0.4583941698074341, - "learning_rate": 4.355555555555556e-05, - "loss": 0.4784, - "step": 6481 - }, - { - "epoch": 0.22752242054090102, - "grad_norm": 0.5821190476417542, - "learning_rate": 4.355370370370371e-05, - "loss": 0.6048, - "step": 6482 - }, - { - "epoch": 0.2275575211920181, - "grad_norm": 0.5417906641960144, - "learning_rate": 4.355185185185186e-05, - "loss": 0.5667, - "step": 6483 - }, - { - "epoch": 0.2275926218431352, - "grad_norm": 0.4712770879268646, - "learning_rate": 4.355e-05, - "loss": 0.4032, - "step": 6484 - }, - { - "epoch": 0.22762772249425225, - "grad_norm": 0.47986921668052673, - "learning_rate": 4.354814814814815e-05, - "loss": 0.4356, - "step": 6485 - }, - { - "epoch": 0.22766282314536934, - "grad_norm": 0.5206005573272705, - "learning_rate": 4.3546296296296295e-05, - "loss": 0.5664, - "step": 6486 - }, - { - "epoch": 0.22769792379648643, - "grad_norm": 0.59547358751297, - "learning_rate": 4.354444444444445e-05, - "loss": 0.4792, - "step": 6487 - }, - { - "epoch": 0.22773302444760352, - "grad_norm": 0.41378718614578247, - "learning_rate": 4.3542592592592595e-05, - "loss": 0.331, - "step": 6488 - }, - { - "epoch": 0.22776812509872058, - "grad_norm": 0.42036542296409607, - "learning_rate": 4.3540740740740745e-05, - "loss": 0.4497, - "step": 6489 - }, - { - "epoch": 0.22780322574983766, - "grad_norm": 0.5021644234657288, - "learning_rate": 4.353888888888889e-05, - "loss": 0.4694, - "step": 6490 - }, - { - "epoch": 0.22783832640095475, - "grad_norm": 0.5951006412506104, - "learning_rate": 4.353703703703704e-05, - "loss": 0.5107, - "step": 6491 - }, - { - "epoch": 0.2278734270520718, - "grad_norm": 0.6644195914268494, - "learning_rate": 4.353518518518519e-05, - "loss": 0.5018, - "step": 6492 - }, - { - "epoch": 0.2279085277031889, - "grad_norm": 0.6420860886573792, - "learning_rate": 4.353333333333334e-05, - "loss": 0.5136, - "step": 6493 - }, - { - "epoch": 0.22794362835430598, - "grad_norm": 0.5341999530792236, - "learning_rate": 4.353148148148148e-05, - "loss": 0.4753, - "step": 6494 - }, - { - "epoch": 0.22797872900542304, - "grad_norm": 0.5328982472419739, - "learning_rate": 4.352962962962963e-05, - "loss": 0.5961, - "step": 6495 - }, - { - "epoch": 0.22801382965654013, - "grad_norm": 0.546619176864624, - "learning_rate": 4.3527777777777776e-05, - "loss": 0.4826, - "step": 6496 - }, - { - "epoch": 0.22804893030765722, - "grad_norm": 0.5432867407798767, - "learning_rate": 4.3525925925925926e-05, - "loss": 0.5596, - "step": 6497 - }, - { - "epoch": 0.22808403095877428, - "grad_norm": 0.6234954595565796, - "learning_rate": 4.3524074074074076e-05, - "loss": 0.458, - "step": 6498 - }, - { - "epoch": 0.22811913160989136, - "grad_norm": 0.49551311135292053, - "learning_rate": 4.3522222222222226e-05, - "loss": 0.5424, - "step": 6499 - }, - { - "epoch": 0.22815423226100845, - "grad_norm": 0.5389822721481323, - "learning_rate": 4.352037037037037e-05, - "loss": 0.5217, - "step": 6500 - }, - { - "epoch": 0.2281893329121255, - "grad_norm": 0.620499312877655, - "learning_rate": 4.351851851851852e-05, - "loss": 0.4759, - "step": 6501 - }, - { - "epoch": 0.2282244335632426, - "grad_norm": 0.43247538805007935, - "learning_rate": 4.351666666666667e-05, - "loss": 0.5365, - "step": 6502 - }, - { - "epoch": 0.22825953421435968, - "grad_norm": 0.4959976375102997, - "learning_rate": 4.351481481481482e-05, - "loss": 0.5173, - "step": 6503 - }, - { - "epoch": 0.22829463486547674, - "grad_norm": 0.463068425655365, - "learning_rate": 4.351296296296297e-05, - "loss": 0.5268, - "step": 6504 - }, - { - "epoch": 0.22832973551659383, - "grad_norm": 0.41453033685684204, - "learning_rate": 4.351111111111111e-05, - "loss": 0.3157, - "step": 6505 - }, - { - "epoch": 0.22836483616771092, - "grad_norm": 0.5430294871330261, - "learning_rate": 4.350925925925926e-05, - "loss": 0.5224, - "step": 6506 - }, - { - "epoch": 0.22839993681882798, - "grad_norm": 0.4441576600074768, - "learning_rate": 4.3507407407407407e-05, - "loss": 0.4662, - "step": 6507 - }, - { - "epoch": 0.22843503746994506, - "grad_norm": 0.5166773796081543, - "learning_rate": 4.350555555555556e-05, - "loss": 0.5064, - "step": 6508 - }, - { - "epoch": 0.22847013812106215, - "grad_norm": 0.42738303542137146, - "learning_rate": 4.350370370370371e-05, - "loss": 0.486, - "step": 6509 - }, - { - "epoch": 0.22850523877217924, - "grad_norm": 0.4954247772693634, - "learning_rate": 4.350185185185186e-05, - "loss": 0.568, - "step": 6510 - }, - { - "epoch": 0.2285403394232963, - "grad_norm": 0.4995649755001068, - "learning_rate": 4.35e-05, - "loss": 0.4947, - "step": 6511 - }, - { - "epoch": 0.22857544007441338, - "grad_norm": 0.49454590678215027, - "learning_rate": 4.349814814814815e-05, - "loss": 0.5311, - "step": 6512 - }, - { - "epoch": 0.22861054072553047, - "grad_norm": 0.524101734161377, - "learning_rate": 4.3496296296296294e-05, - "loss": 0.5422, - "step": 6513 - }, - { - "epoch": 0.22864564137664753, - "grad_norm": 0.4672798216342926, - "learning_rate": 4.349444444444445e-05, - "loss": 0.5261, - "step": 6514 - }, - { - "epoch": 0.22868074202776462, - "grad_norm": 0.4865294396877289, - "learning_rate": 4.3492592592592594e-05, - "loss": 0.432, - "step": 6515 - }, - { - "epoch": 0.2287158426788817, - "grad_norm": 0.49646878242492676, - "learning_rate": 4.3490740740740744e-05, - "loss": 0.3887, - "step": 6516 - }, - { - "epoch": 0.22875094332999876, - "grad_norm": 0.6340665221214294, - "learning_rate": 4.348888888888889e-05, - "loss": 0.6341, - "step": 6517 - }, - { - "epoch": 0.22878604398111585, - "grad_norm": 0.49298709630966187, - "learning_rate": 4.348703703703704e-05, - "loss": 0.4763, - "step": 6518 - }, - { - "epoch": 0.22882114463223294, - "grad_norm": 0.5729209780693054, - "learning_rate": 4.348518518518519e-05, - "loss": 0.413, - "step": 6519 - }, - { - "epoch": 0.22885624528335, - "grad_norm": 0.4801141321659088, - "learning_rate": 4.348333333333334e-05, - "loss": 0.5096, - "step": 6520 - }, - { - "epoch": 0.22889134593446708, - "grad_norm": 0.45161551237106323, - "learning_rate": 4.348148148148148e-05, - "loss": 0.4472, - "step": 6521 - }, - { - "epoch": 0.22892644658558417, - "grad_norm": 0.4399338960647583, - "learning_rate": 4.347962962962963e-05, - "loss": 0.388, - "step": 6522 - }, - { - "epoch": 0.22896154723670123, - "grad_norm": 0.5400804877281189, - "learning_rate": 4.347777777777778e-05, - "loss": 0.3747, - "step": 6523 - }, - { - "epoch": 0.22899664788781832, - "grad_norm": 0.547734260559082, - "learning_rate": 4.3475925925925925e-05, - "loss": 0.5581, - "step": 6524 - }, - { - "epoch": 0.2290317485389354, - "grad_norm": 0.609146237373352, - "learning_rate": 4.347407407407408e-05, - "loss": 0.5025, - "step": 6525 - }, - { - "epoch": 0.22906684919005246, - "grad_norm": 0.48688867688179016, - "learning_rate": 4.3472222222222225e-05, - "loss": 0.5077, - "step": 6526 - }, - { - "epoch": 0.22910194984116955, - "grad_norm": 0.530373215675354, - "learning_rate": 4.3470370370370375e-05, - "loss": 0.4872, - "step": 6527 - }, - { - "epoch": 0.22913705049228664, - "grad_norm": 0.5377573370933533, - "learning_rate": 4.346851851851852e-05, - "loss": 0.4267, - "step": 6528 - }, - { - "epoch": 0.22917215114340372, - "grad_norm": 0.4558071494102478, - "learning_rate": 4.346666666666667e-05, - "loss": 0.5067, - "step": 6529 - }, - { - "epoch": 0.22920725179452078, - "grad_norm": 0.4154146611690521, - "learning_rate": 4.346481481481482e-05, - "loss": 0.4454, - "step": 6530 - }, - { - "epoch": 0.22924235244563787, - "grad_norm": 0.49592745304107666, - "learning_rate": 4.346296296296297e-05, - "loss": 0.541, - "step": 6531 - }, - { - "epoch": 0.22927745309675496, - "grad_norm": 0.49673178791999817, - "learning_rate": 4.346111111111111e-05, - "loss": 0.4782, - "step": 6532 - }, - { - "epoch": 0.22931255374787202, - "grad_norm": 0.5097607970237732, - "learning_rate": 4.345925925925926e-05, - "loss": 0.5179, - "step": 6533 - }, - { - "epoch": 0.2293476543989891, - "grad_norm": 0.49112969636917114, - "learning_rate": 4.3457407407407405e-05, - "loss": 0.5477, - "step": 6534 - }, - { - "epoch": 0.2293827550501062, - "grad_norm": 0.4596553444862366, - "learning_rate": 4.3455555555555555e-05, - "loss": 0.5311, - "step": 6535 - }, - { - "epoch": 0.22941785570122325, - "grad_norm": 0.49725252389907837, - "learning_rate": 4.3453703703703706e-05, - "loss": 0.5292, - "step": 6536 - }, - { - "epoch": 0.22945295635234034, - "grad_norm": 0.4246586859226227, - "learning_rate": 4.3451851851851856e-05, - "loss": 0.4249, - "step": 6537 - }, - { - "epoch": 0.22948805700345742, - "grad_norm": 0.538539707660675, - "learning_rate": 4.345e-05, - "loss": 0.5117, - "step": 6538 - }, - { - "epoch": 0.22952315765457448, - "grad_norm": 0.613235354423523, - "learning_rate": 4.344814814814815e-05, - "loss": 0.5808, - "step": 6539 - }, - { - "epoch": 0.22955825830569157, - "grad_norm": 0.5336416363716125, - "learning_rate": 4.34462962962963e-05, - "loss": 0.6073, - "step": 6540 - }, - { - "epoch": 0.22959335895680866, - "grad_norm": 0.3954595923423767, - "learning_rate": 4.344444444444445e-05, - "loss": 0.4596, - "step": 6541 - }, - { - "epoch": 0.22962845960792572, - "grad_norm": 0.4074261486530304, - "learning_rate": 4.344259259259259e-05, - "loss": 0.419, - "step": 6542 - }, - { - "epoch": 0.2296635602590428, - "grad_norm": 0.3953022062778473, - "learning_rate": 4.344074074074074e-05, - "loss": 0.3708, - "step": 6543 - }, - { - "epoch": 0.2296986609101599, - "grad_norm": 0.4385570287704468, - "learning_rate": 4.343888888888889e-05, - "loss": 0.5353, - "step": 6544 - }, - { - "epoch": 0.22973376156127695, - "grad_norm": 0.5609588027000427, - "learning_rate": 4.3437037037037036e-05, - "loss": 0.5619, - "step": 6545 - }, - { - "epoch": 0.22976886221239404, - "grad_norm": 0.5150418877601624, - "learning_rate": 4.343518518518519e-05, - "loss": 0.5684, - "step": 6546 - }, - { - "epoch": 0.22980396286351112, - "grad_norm": 0.8034471869468689, - "learning_rate": 4.3433333333333336e-05, - "loss": 0.4928, - "step": 6547 - }, - { - "epoch": 0.22983906351462818, - "grad_norm": 0.5059917569160461, - "learning_rate": 4.3431481481481487e-05, - "loss": 0.4887, - "step": 6548 - }, - { - "epoch": 0.22987416416574527, - "grad_norm": 0.5059003233909607, - "learning_rate": 4.342962962962963e-05, - "loss": 0.5237, - "step": 6549 - }, - { - "epoch": 0.22990926481686236, - "grad_norm": 0.5246483683586121, - "learning_rate": 4.342777777777778e-05, - "loss": 0.5059, - "step": 6550 - }, - { - "epoch": 0.22994436546797944, - "grad_norm": 0.44474807381629944, - "learning_rate": 4.342592592592592e-05, - "loss": 0.5155, - "step": 6551 - }, - { - "epoch": 0.2299794661190965, - "grad_norm": 0.462446004152298, - "learning_rate": 4.342407407407408e-05, - "loss": 0.4668, - "step": 6552 - }, - { - "epoch": 0.2300145667702136, - "grad_norm": 0.5163705348968506, - "learning_rate": 4.3422222222222224e-05, - "loss": 0.505, - "step": 6553 - }, - { - "epoch": 0.23004966742133068, - "grad_norm": 0.603691041469574, - "learning_rate": 4.3420370370370374e-05, - "loss": 0.5689, - "step": 6554 - }, - { - "epoch": 0.23008476807244774, - "grad_norm": 1.2145179510116577, - "learning_rate": 4.341851851851852e-05, - "loss": 0.6212, - "step": 6555 - }, - { - "epoch": 0.23011986872356482, - "grad_norm": 0.5608810186386108, - "learning_rate": 4.341666666666667e-05, - "loss": 0.4692, - "step": 6556 - }, - { - "epoch": 0.2301549693746819, - "grad_norm": 0.6114109754562378, - "learning_rate": 4.341481481481482e-05, - "loss": 0.4664, - "step": 6557 - }, - { - "epoch": 0.23019007002579897, - "grad_norm": 0.3499481976032257, - "learning_rate": 4.341296296296297e-05, - "loss": 0.3671, - "step": 6558 - }, - { - "epoch": 0.23022517067691606, - "grad_norm": 0.5201455950737, - "learning_rate": 4.341111111111111e-05, - "loss": 0.5685, - "step": 6559 - }, - { - "epoch": 0.23026027132803314, - "grad_norm": 0.6998898386955261, - "learning_rate": 4.340925925925926e-05, - "loss": 0.4372, - "step": 6560 - }, - { - "epoch": 0.2302953719791502, - "grad_norm": 0.6526890397071838, - "learning_rate": 4.340740740740741e-05, - "loss": 0.5073, - "step": 6561 - }, - { - "epoch": 0.2303304726302673, - "grad_norm": 0.51859050989151, - "learning_rate": 4.3405555555555554e-05, - "loss": 0.5187, - "step": 6562 - }, - { - "epoch": 0.23036557328138438, - "grad_norm": 0.5948597192764282, - "learning_rate": 4.3403703703703704e-05, - "loss": 0.5433, - "step": 6563 - }, - { - "epoch": 0.23040067393250144, - "grad_norm": 0.4706569314002991, - "learning_rate": 4.3401851851851854e-05, - "loss": 0.4732, - "step": 6564 - }, - { - "epoch": 0.23043577458361852, - "grad_norm": 0.6275587677955627, - "learning_rate": 4.3400000000000005e-05, - "loss": 0.4468, - "step": 6565 - }, - { - "epoch": 0.2304708752347356, - "grad_norm": 0.5597063302993774, - "learning_rate": 4.339814814814815e-05, - "loss": 0.5174, - "step": 6566 - }, - { - "epoch": 0.23050597588585267, - "grad_norm": 0.5124808549880981, - "learning_rate": 4.33962962962963e-05, - "loss": 0.6121, - "step": 6567 - }, - { - "epoch": 0.23054107653696976, - "grad_norm": 0.45150670409202576, - "learning_rate": 4.339444444444445e-05, - "loss": 0.3303, - "step": 6568 - }, - { - "epoch": 0.23057617718808684, - "grad_norm": 0.4977615773677826, - "learning_rate": 4.33925925925926e-05, - "loss": 0.4953, - "step": 6569 - }, - { - "epoch": 0.2306112778392039, - "grad_norm": 0.46765220165252686, - "learning_rate": 4.339074074074074e-05, - "loss": 0.5996, - "step": 6570 - }, - { - "epoch": 0.230646378490321, - "grad_norm": 0.5611410140991211, - "learning_rate": 4.338888888888889e-05, - "loss": 0.5108, - "step": 6571 - }, - { - "epoch": 0.23068147914143808, - "grad_norm": 0.5550028681755066, - "learning_rate": 4.3387037037037035e-05, - "loss": 0.3718, - "step": 6572 - }, - { - "epoch": 0.23071657979255517, - "grad_norm": 0.4774802029132843, - "learning_rate": 4.338518518518519e-05, - "loss": 0.5462, - "step": 6573 - }, - { - "epoch": 0.23075168044367222, - "grad_norm": 0.5618312358856201, - "learning_rate": 4.3383333333333335e-05, - "loss": 0.588, - "step": 6574 - }, - { - "epoch": 0.2307867810947893, - "grad_norm": 0.4888848066329956, - "learning_rate": 4.3381481481481485e-05, - "loss": 0.5382, - "step": 6575 - }, - { - "epoch": 0.2308218817459064, - "grad_norm": 0.42637285590171814, - "learning_rate": 4.337962962962963e-05, - "loss": 0.4293, - "step": 6576 - }, - { - "epoch": 0.23085698239702346, - "grad_norm": 0.5992258787155151, - "learning_rate": 4.337777777777778e-05, - "loss": 0.6047, - "step": 6577 - }, - { - "epoch": 0.23089208304814055, - "grad_norm": 0.5030609369277954, - "learning_rate": 4.337592592592592e-05, - "loss": 0.4967, - "step": 6578 - }, - { - "epoch": 0.23092718369925763, - "grad_norm": 0.44559890031814575, - "learning_rate": 4.337407407407408e-05, - "loss": 0.4992, - "step": 6579 - }, - { - "epoch": 0.2309622843503747, - "grad_norm": 0.42546346783638, - "learning_rate": 4.337222222222222e-05, - "loss": 0.4678, - "step": 6580 - }, - { - "epoch": 0.23099738500149178, - "grad_norm": 0.4889748990535736, - "learning_rate": 4.337037037037037e-05, - "loss": 0.5689, - "step": 6581 - }, - { - "epoch": 0.23103248565260887, - "grad_norm": 0.41689014434814453, - "learning_rate": 4.336851851851852e-05, - "loss": 0.4123, - "step": 6582 - }, - { - "epoch": 0.23106758630372592, - "grad_norm": 0.47393593192100525, - "learning_rate": 4.3366666666666666e-05, - "loss": 0.4325, - "step": 6583 - }, - { - "epoch": 0.231102686954843, - "grad_norm": 0.44799453020095825, - "learning_rate": 4.3364814814814816e-05, - "loss": 0.5025, - "step": 6584 - }, - { - "epoch": 0.2311377876059601, - "grad_norm": 0.47822943329811096, - "learning_rate": 4.3362962962962966e-05, - "loss": 0.5348, - "step": 6585 - }, - { - "epoch": 0.23117288825707716, - "grad_norm": 0.5278691649436951, - "learning_rate": 4.3361111111111116e-05, - "loss": 0.5662, - "step": 6586 - }, - { - "epoch": 0.23120798890819425, - "grad_norm": 0.4908180832862854, - "learning_rate": 4.335925925925926e-05, - "loss": 0.6198, - "step": 6587 - }, - { - "epoch": 0.23124308955931133, - "grad_norm": 0.5002852082252502, - "learning_rate": 4.335740740740741e-05, - "loss": 0.5737, - "step": 6588 - }, - { - "epoch": 0.2312781902104284, - "grad_norm": 0.3797661066055298, - "learning_rate": 4.335555555555556e-05, - "loss": 0.4271, - "step": 6589 - }, - { - "epoch": 0.23131329086154548, - "grad_norm": 0.5859987139701843, - "learning_rate": 4.335370370370371e-05, - "loss": 0.4861, - "step": 6590 - }, - { - "epoch": 0.23134839151266257, - "grad_norm": 0.4328489303588867, - "learning_rate": 4.335185185185185e-05, - "loss": 0.6014, - "step": 6591 - }, - { - "epoch": 0.23138349216377962, - "grad_norm": 0.49856850504875183, - "learning_rate": 4.335e-05, - "loss": 0.5501, - "step": 6592 - }, - { - "epoch": 0.2314185928148967, - "grad_norm": 0.43425390124320984, - "learning_rate": 4.334814814814815e-05, - "loss": 0.54, - "step": 6593 - }, - { - "epoch": 0.2314536934660138, - "grad_norm": 0.4579017758369446, - "learning_rate": 4.33462962962963e-05, - "loss": 0.5326, - "step": 6594 - }, - { - "epoch": 0.23148879411713089, - "grad_norm": 0.4529159963130951, - "learning_rate": 4.334444444444445e-05, - "loss": 0.3992, - "step": 6595 - }, - { - "epoch": 0.23152389476824795, - "grad_norm": 0.4574715793132782, - "learning_rate": 4.33425925925926e-05, - "loss": 0.3989, - "step": 6596 - }, - { - "epoch": 0.23155899541936503, - "grad_norm": 0.4357282221317291, - "learning_rate": 4.334074074074074e-05, - "loss": 0.573, - "step": 6597 - }, - { - "epoch": 0.23159409607048212, - "grad_norm": 0.4687122702598572, - "learning_rate": 4.333888888888889e-05, - "loss": 0.5341, - "step": 6598 - }, - { - "epoch": 0.23162919672159918, - "grad_norm": 0.379609614610672, - "learning_rate": 4.3337037037037034e-05, - "loss": 0.4285, - "step": 6599 - }, - { - "epoch": 0.23166429737271627, - "grad_norm": 0.5473927855491638, - "learning_rate": 4.333518518518519e-05, - "loss": 0.5399, - "step": 6600 - }, - { - "epoch": 0.23169939802383335, - "grad_norm": 0.5807520151138306, - "learning_rate": 4.3333333333333334e-05, - "loss": 0.5309, - "step": 6601 - }, - { - "epoch": 0.2317344986749504, - "grad_norm": 0.44863638281822205, - "learning_rate": 4.3331481481481484e-05, - "loss": 0.4004, - "step": 6602 - }, - { - "epoch": 0.2317695993260675, - "grad_norm": 0.5121362805366516, - "learning_rate": 4.3329629629629634e-05, - "loss": 0.5551, - "step": 6603 - }, - { - "epoch": 0.2318046999771846, - "grad_norm": 0.47937843203544617, - "learning_rate": 4.332777777777778e-05, - "loss": 0.4837, - "step": 6604 - }, - { - "epoch": 0.23183980062830165, - "grad_norm": 0.5101025700569153, - "learning_rate": 4.332592592592593e-05, - "loss": 0.5494, - "step": 6605 - }, - { - "epoch": 0.23187490127941873, - "grad_norm": 0.51252681016922, - "learning_rate": 4.332407407407408e-05, - "loss": 0.583, - "step": 6606 - }, - { - "epoch": 0.23191000193053582, - "grad_norm": 0.5102974772453308, - "learning_rate": 4.332222222222223e-05, - "loss": 0.5431, - "step": 6607 - }, - { - "epoch": 0.23194510258165288, - "grad_norm": 0.4310067296028137, - "learning_rate": 4.332037037037037e-05, - "loss": 0.3622, - "step": 6608 - }, - { - "epoch": 0.23198020323276997, - "grad_norm": 0.467338889837265, - "learning_rate": 4.331851851851852e-05, - "loss": 0.5331, - "step": 6609 - }, - { - "epoch": 0.23201530388388705, - "grad_norm": 0.4939861297607422, - "learning_rate": 4.3316666666666665e-05, - "loss": 0.4756, - "step": 6610 - }, - { - "epoch": 0.2320504045350041, - "grad_norm": 0.5206016302108765, - "learning_rate": 4.331481481481482e-05, - "loss": 0.5577, - "step": 6611 - }, - { - "epoch": 0.2320855051861212, - "grad_norm": 0.6127987504005432, - "learning_rate": 4.3312962962962965e-05, - "loss": 0.4739, - "step": 6612 - }, - { - "epoch": 0.2321206058372383, - "grad_norm": 0.5141534805297852, - "learning_rate": 4.3311111111111115e-05, - "loss": 0.5338, - "step": 6613 - }, - { - "epoch": 0.23215570648835535, - "grad_norm": 0.6376914381980896, - "learning_rate": 4.330925925925926e-05, - "loss": 0.6416, - "step": 6614 - }, - { - "epoch": 0.23219080713947243, - "grad_norm": 0.48155730962753296, - "learning_rate": 4.330740740740741e-05, - "loss": 0.5376, - "step": 6615 - }, - { - "epoch": 0.23222590779058952, - "grad_norm": 0.5239263772964478, - "learning_rate": 4.330555555555556e-05, - "loss": 0.4084, - "step": 6616 - }, - { - "epoch": 0.2322610084417066, - "grad_norm": 0.5923076868057251, - "learning_rate": 4.330370370370371e-05, - "loss": 0.5099, - "step": 6617 - }, - { - "epoch": 0.23229610909282367, - "grad_norm": 0.541031539440155, - "learning_rate": 4.330185185185185e-05, - "loss": 0.4346, - "step": 6618 - }, - { - "epoch": 0.23233120974394075, - "grad_norm": 0.4503752291202545, - "learning_rate": 4.33e-05, - "loss": 0.4293, - "step": 6619 - }, - { - "epoch": 0.23236631039505784, - "grad_norm": 0.49952659010887146, - "learning_rate": 4.3298148148148146e-05, - "loss": 0.5058, - "step": 6620 - }, - { - "epoch": 0.2324014110461749, - "grad_norm": 0.4772626459598541, - "learning_rate": 4.3296296296296296e-05, - "loss": 0.5047, - "step": 6621 - }, - { - "epoch": 0.232436511697292, - "grad_norm": 0.481475830078125, - "learning_rate": 4.3294444444444446e-05, - "loss": 0.4813, - "step": 6622 - }, - { - "epoch": 0.23247161234840907, - "grad_norm": 0.5237546563148499, - "learning_rate": 4.3292592592592596e-05, - "loss": 0.5492, - "step": 6623 - }, - { - "epoch": 0.23250671299952613, - "grad_norm": 0.5832902193069458, - "learning_rate": 4.3290740740740746e-05, - "loss": 0.4279, - "step": 6624 - }, - { - "epoch": 0.23254181365064322, - "grad_norm": 0.5332133173942566, - "learning_rate": 4.328888888888889e-05, - "loss": 0.5798, - "step": 6625 - }, - { - "epoch": 0.2325769143017603, - "grad_norm": 0.5572698712348938, - "learning_rate": 4.328703703703704e-05, - "loss": 0.6291, - "step": 6626 - }, - { - "epoch": 0.23261201495287737, - "grad_norm": 0.45270025730133057, - "learning_rate": 4.328518518518519e-05, - "loss": 0.4213, - "step": 6627 - }, - { - "epoch": 0.23264711560399445, - "grad_norm": 0.49674704670906067, - "learning_rate": 4.328333333333334e-05, - "loss": 0.4893, - "step": 6628 - }, - { - "epoch": 0.23268221625511154, - "grad_norm": 0.4636182188987732, - "learning_rate": 4.328148148148148e-05, - "loss": 0.4927, - "step": 6629 - }, - { - "epoch": 0.2327173169062286, - "grad_norm": 0.5239245891571045, - "learning_rate": 4.327962962962963e-05, - "loss": 0.5775, - "step": 6630 - }, - { - "epoch": 0.2327524175573457, - "grad_norm": 0.4654431939125061, - "learning_rate": 4.3277777777777776e-05, - "loss": 0.5728, - "step": 6631 - }, - { - "epoch": 0.23278751820846277, - "grad_norm": 0.5081096887588501, - "learning_rate": 4.3275925925925927e-05, - "loss": 0.5784, - "step": 6632 - }, - { - "epoch": 0.23282261885957983, - "grad_norm": 0.48051291704177856, - "learning_rate": 4.327407407407408e-05, - "loss": 0.5688, - "step": 6633 - }, - { - "epoch": 0.23285771951069692, - "grad_norm": 0.4510363042354584, - "learning_rate": 4.327222222222223e-05, - "loss": 0.5097, - "step": 6634 - }, - { - "epoch": 0.232892820161814, - "grad_norm": 0.6056020855903625, - "learning_rate": 4.327037037037037e-05, - "loss": 0.5981, - "step": 6635 - }, - { - "epoch": 0.23292792081293107, - "grad_norm": 0.4615592658519745, - "learning_rate": 4.326851851851852e-05, - "loss": 0.5699, - "step": 6636 - }, - { - "epoch": 0.23296302146404815, - "grad_norm": 0.4245399534702301, - "learning_rate": 4.3266666666666664e-05, - "loss": 0.5568, - "step": 6637 - }, - { - "epoch": 0.23299812211516524, - "grad_norm": 0.46379002928733826, - "learning_rate": 4.326481481481482e-05, - "loss": 0.4834, - "step": 6638 - }, - { - "epoch": 0.23303322276628233, - "grad_norm": 0.46925684809684753, - "learning_rate": 4.3262962962962964e-05, - "loss": 0.6177, - "step": 6639 - }, - { - "epoch": 0.2330683234173994, - "grad_norm": 0.48836326599121094, - "learning_rate": 4.3261111111111114e-05, - "loss": 0.5553, - "step": 6640 - }, - { - "epoch": 0.23310342406851647, - "grad_norm": 0.5253264904022217, - "learning_rate": 4.325925925925926e-05, - "loss": 0.5792, - "step": 6641 - }, - { - "epoch": 0.23313852471963356, - "grad_norm": 0.418148010969162, - "learning_rate": 4.325740740740741e-05, - "loss": 0.5296, - "step": 6642 - }, - { - "epoch": 0.23317362537075062, - "grad_norm": 0.4271306097507477, - "learning_rate": 4.325555555555556e-05, - "loss": 0.386, - "step": 6643 - }, - { - "epoch": 0.2332087260218677, - "grad_norm": 0.51885986328125, - "learning_rate": 4.325370370370371e-05, - "loss": 0.6325, - "step": 6644 - }, - { - "epoch": 0.2332438266729848, - "grad_norm": 0.47479045391082764, - "learning_rate": 4.325185185185186e-05, - "loss": 0.3587, - "step": 6645 - }, - { - "epoch": 0.23327892732410185, - "grad_norm": 0.5101522207260132, - "learning_rate": 4.325e-05, - "loss": 0.5151, - "step": 6646 - }, - { - "epoch": 0.23331402797521894, - "grad_norm": 0.47398191690444946, - "learning_rate": 4.324814814814815e-05, - "loss": 0.4529, - "step": 6647 - }, - { - "epoch": 0.23334912862633603, - "grad_norm": 0.4510735869407654, - "learning_rate": 4.3246296296296294e-05, - "loss": 0.5743, - "step": 6648 - }, - { - "epoch": 0.2333842292774531, - "grad_norm": 0.5578336119651794, - "learning_rate": 4.324444444444445e-05, - "loss": 0.5672, - "step": 6649 - }, - { - "epoch": 0.23341932992857017, - "grad_norm": 0.4545589089393616, - "learning_rate": 4.3242592592592595e-05, - "loss": 0.393, - "step": 6650 - }, - { - "epoch": 0.23345443057968726, - "grad_norm": 0.5408409833908081, - "learning_rate": 4.3240740740740745e-05, - "loss": 0.5519, - "step": 6651 - }, - { - "epoch": 0.23348953123080432, - "grad_norm": 0.45105981826782227, - "learning_rate": 4.323888888888889e-05, - "loss": 0.4825, - "step": 6652 - }, - { - "epoch": 0.2335246318819214, - "grad_norm": 0.5668960213661194, - "learning_rate": 4.323703703703704e-05, - "loss": 0.4695, - "step": 6653 - }, - { - "epoch": 0.2335597325330385, - "grad_norm": 0.46844708919525146, - "learning_rate": 4.323518518518519e-05, - "loss": 0.5765, - "step": 6654 - }, - { - "epoch": 0.23359483318415555, - "grad_norm": 0.48489412665367126, - "learning_rate": 4.323333333333334e-05, - "loss": 0.3631, - "step": 6655 - }, - { - "epoch": 0.23362993383527264, - "grad_norm": 0.4088861048221588, - "learning_rate": 4.323148148148148e-05, - "loss": 0.553, - "step": 6656 - }, - { - "epoch": 0.23366503448638973, - "grad_norm": 0.45302581787109375, - "learning_rate": 4.322962962962963e-05, - "loss": 0.4222, - "step": 6657 - }, - { - "epoch": 0.2337001351375068, - "grad_norm": 0.5680326223373413, - "learning_rate": 4.3227777777777775e-05, - "loss": 0.51, - "step": 6658 - }, - { - "epoch": 0.23373523578862387, - "grad_norm": 0.5264381170272827, - "learning_rate": 4.3225925925925925e-05, - "loss": 0.3588, - "step": 6659 - }, - { - "epoch": 0.23377033643974096, - "grad_norm": 0.46261200308799744, - "learning_rate": 4.3224074074074075e-05, - "loss": 0.5122, - "step": 6660 - }, - { - "epoch": 0.23380543709085805, - "grad_norm": 0.5465238094329834, - "learning_rate": 4.3222222222222226e-05, - "loss": 0.6276, - "step": 6661 - }, - { - "epoch": 0.2338405377419751, - "grad_norm": 0.5023322105407715, - "learning_rate": 4.322037037037037e-05, - "loss": 0.5285, - "step": 6662 - }, - { - "epoch": 0.2338756383930922, - "grad_norm": 0.528066098690033, - "learning_rate": 4.321851851851852e-05, - "loss": 0.4902, - "step": 6663 - }, - { - "epoch": 0.23391073904420928, - "grad_norm": 0.5941058397293091, - "learning_rate": 4.321666666666667e-05, - "loss": 0.5718, - "step": 6664 - }, - { - "epoch": 0.23394583969532634, - "grad_norm": 0.6068622469902039, - "learning_rate": 4.321481481481482e-05, - "loss": 0.4259, - "step": 6665 - }, - { - "epoch": 0.23398094034644343, - "grad_norm": 0.5318852663040161, - "learning_rate": 4.321296296296297e-05, - "loss": 0.4664, - "step": 6666 - }, - { - "epoch": 0.23401604099756051, - "grad_norm": 0.5803890228271484, - "learning_rate": 4.321111111111111e-05, - "loss": 0.4905, - "step": 6667 - }, - { - "epoch": 0.23405114164867757, - "grad_norm": 0.4538317918777466, - "learning_rate": 4.320925925925926e-05, - "loss": 0.5147, - "step": 6668 - }, - { - "epoch": 0.23408624229979466, - "grad_norm": 0.4449949562549591, - "learning_rate": 4.3207407407407406e-05, - "loss": 0.517, - "step": 6669 - }, - { - "epoch": 0.23412134295091175, - "grad_norm": 0.5100392699241638, - "learning_rate": 4.320555555555556e-05, - "loss": 0.4254, - "step": 6670 - }, - { - "epoch": 0.2341564436020288, - "grad_norm": 0.5131198167800903, - "learning_rate": 4.3203703703703706e-05, - "loss": 0.4303, - "step": 6671 - }, - { - "epoch": 0.2341915442531459, - "grad_norm": 0.5950232744216919, - "learning_rate": 4.3201851851851856e-05, - "loss": 0.3985, - "step": 6672 - }, - { - "epoch": 0.23422664490426298, - "grad_norm": 0.5690239667892456, - "learning_rate": 4.32e-05, - "loss": 0.4896, - "step": 6673 - }, - { - "epoch": 0.23426174555538004, - "grad_norm": 0.4342878758907318, - "learning_rate": 4.319814814814815e-05, - "loss": 0.5323, - "step": 6674 - }, - { - "epoch": 0.23429684620649713, - "grad_norm": 0.5047413110733032, - "learning_rate": 4.319629629629629e-05, - "loss": 0.4995, - "step": 6675 - }, - { - "epoch": 0.23433194685761422, - "grad_norm": 0.5523215532302856, - "learning_rate": 4.319444444444445e-05, - "loss": 0.4775, - "step": 6676 - }, - { - "epoch": 0.23436704750873127, - "grad_norm": 0.47172263264656067, - "learning_rate": 4.3192592592592593e-05, - "loss": 0.4873, - "step": 6677 - }, - { - "epoch": 0.23440214815984836, - "grad_norm": 0.6847055554389954, - "learning_rate": 4.3190740740740744e-05, - "loss": 0.4803, - "step": 6678 - }, - { - "epoch": 0.23443724881096545, - "grad_norm": 0.5671179294586182, - "learning_rate": 4.318888888888889e-05, - "loss": 0.5413, - "step": 6679 - }, - { - "epoch": 0.23447234946208254, - "grad_norm": 0.5431975722312927, - "learning_rate": 4.318703703703704e-05, - "loss": 0.6135, - "step": 6680 - }, - { - "epoch": 0.2345074501131996, - "grad_norm": 0.4891482889652252, - "learning_rate": 4.318518518518519e-05, - "loss": 0.4656, - "step": 6681 - }, - { - "epoch": 0.23454255076431668, - "grad_norm": 0.50204998254776, - "learning_rate": 4.318333333333334e-05, - "loss": 0.6078, - "step": 6682 - }, - { - "epoch": 0.23457765141543377, - "grad_norm": 0.5278885960578918, - "learning_rate": 4.318148148148148e-05, - "loss": 0.5673, - "step": 6683 - }, - { - "epoch": 0.23461275206655083, - "grad_norm": 0.44614139199256897, - "learning_rate": 4.317962962962963e-05, - "loss": 0.4399, - "step": 6684 - }, - { - "epoch": 0.23464785271766792, - "grad_norm": 0.5534014701843262, - "learning_rate": 4.317777777777778e-05, - "loss": 0.5104, - "step": 6685 - }, - { - "epoch": 0.234682953368785, - "grad_norm": 0.5123436450958252, - "learning_rate": 4.3175925925925924e-05, - "loss": 0.4948, - "step": 6686 - }, - { - "epoch": 0.23471805401990206, - "grad_norm": 0.4728197455406189, - "learning_rate": 4.317407407407408e-05, - "loss": 0.4898, - "step": 6687 - }, - { - "epoch": 0.23475315467101915, - "grad_norm": 0.5907412171363831, - "learning_rate": 4.3172222222222224e-05, - "loss": 0.5859, - "step": 6688 - }, - { - "epoch": 0.23478825532213624, - "grad_norm": 0.5776879191398621, - "learning_rate": 4.3170370370370374e-05, - "loss": 0.3978, - "step": 6689 - }, - { - "epoch": 0.2348233559732533, - "grad_norm": 0.5633935928344727, - "learning_rate": 4.316851851851852e-05, - "loss": 0.6158, - "step": 6690 - }, - { - "epoch": 0.23485845662437038, - "grad_norm": 0.46319475769996643, - "learning_rate": 4.316666666666667e-05, - "loss": 0.5135, - "step": 6691 - }, - { - "epoch": 0.23489355727548747, - "grad_norm": 0.5380261540412903, - "learning_rate": 4.316481481481482e-05, - "loss": 0.5101, - "step": 6692 - }, - { - "epoch": 0.23492865792660453, - "grad_norm": 0.5673020482063293, - "learning_rate": 4.316296296296297e-05, - "loss": 0.5273, - "step": 6693 - }, - { - "epoch": 0.23496375857772162, - "grad_norm": 0.47544217109680176, - "learning_rate": 4.316111111111111e-05, - "loss": 0.4535, - "step": 6694 - }, - { - "epoch": 0.2349988592288387, - "grad_norm": 0.4596867263317108, - "learning_rate": 4.315925925925926e-05, - "loss": 0.4813, - "step": 6695 - }, - { - "epoch": 0.23503395987995576, - "grad_norm": 0.5386755466461182, - "learning_rate": 4.3157407407407405e-05, - "loss": 0.5628, - "step": 6696 - }, - { - "epoch": 0.23506906053107285, - "grad_norm": 0.4684572219848633, - "learning_rate": 4.315555555555556e-05, - "loss": 0.4914, - "step": 6697 - }, - { - "epoch": 0.23510416118218994, - "grad_norm": 0.5112612843513489, - "learning_rate": 4.3153703703703705e-05, - "loss": 0.5419, - "step": 6698 - }, - { - "epoch": 0.235139261833307, - "grad_norm": 0.4063224494457245, - "learning_rate": 4.3151851851851855e-05, - "loss": 0.4643, - "step": 6699 - }, - { - "epoch": 0.23517436248442408, - "grad_norm": 0.5200114846229553, - "learning_rate": 4.315e-05, - "loss": 0.5082, - "step": 6700 - }, - { - "epoch": 0.23520946313554117, - "grad_norm": 0.43057212233543396, - "learning_rate": 4.314814814814815e-05, - "loss": 0.4643, - "step": 6701 - }, - { - "epoch": 0.23524456378665826, - "grad_norm": 0.40169164538383484, - "learning_rate": 4.31462962962963e-05, - "loss": 0.3778, - "step": 6702 - }, - { - "epoch": 0.23527966443777532, - "grad_norm": 0.49058717489242554, - "learning_rate": 4.314444444444445e-05, - "loss": 0.6289, - "step": 6703 - }, - { - "epoch": 0.2353147650888924, - "grad_norm": 0.515273928642273, - "learning_rate": 4.314259259259259e-05, - "loss": 0.5856, - "step": 6704 - }, - { - "epoch": 0.2353498657400095, - "grad_norm": 0.4015655517578125, - "learning_rate": 4.314074074074074e-05, - "loss": 0.3639, - "step": 6705 - }, - { - "epoch": 0.23538496639112655, - "grad_norm": 0.5557641983032227, - "learning_rate": 4.313888888888889e-05, - "loss": 0.6684, - "step": 6706 - }, - { - "epoch": 0.23542006704224364, - "grad_norm": 0.43047770857810974, - "learning_rate": 4.3137037037037036e-05, - "loss": 0.6089, - "step": 6707 - }, - { - "epoch": 0.23545516769336072, - "grad_norm": 0.46970489621162415, - "learning_rate": 4.313518518518519e-05, - "loss": 0.559, - "step": 6708 - }, - { - "epoch": 0.23549026834447778, - "grad_norm": 0.4516023099422455, - "learning_rate": 4.3133333333333336e-05, - "loss": 0.5025, - "step": 6709 - }, - { - "epoch": 0.23552536899559487, - "grad_norm": 0.48077842593193054, - "learning_rate": 4.3131481481481486e-05, - "loss": 0.4951, - "step": 6710 - }, - { - "epoch": 0.23556046964671196, - "grad_norm": 0.5176411271095276, - "learning_rate": 4.312962962962963e-05, - "loss": 0.4641, - "step": 6711 - }, - { - "epoch": 0.23559557029782902, - "grad_norm": 0.5049853920936584, - "learning_rate": 4.312777777777778e-05, - "loss": 0.5457, - "step": 6712 - }, - { - "epoch": 0.2356306709489461, - "grad_norm": 0.496121883392334, - "learning_rate": 4.312592592592593e-05, - "loss": 0.4931, - "step": 6713 - }, - { - "epoch": 0.2356657716000632, - "grad_norm": 0.46844252943992615, - "learning_rate": 4.312407407407408e-05, - "loss": 0.4394, - "step": 6714 - }, - { - "epoch": 0.23570087225118025, - "grad_norm": 0.5493379235267639, - "learning_rate": 4.312222222222222e-05, - "loss": 0.5603, - "step": 6715 - }, - { - "epoch": 0.23573597290229734, - "grad_norm": 0.48049652576446533, - "learning_rate": 4.312037037037037e-05, - "loss": 0.5144, - "step": 6716 - }, - { - "epoch": 0.23577107355341442, - "grad_norm": 0.448836088180542, - "learning_rate": 4.3118518518518517e-05, - "loss": 0.5666, - "step": 6717 - }, - { - "epoch": 0.23580617420453148, - "grad_norm": 0.448716938495636, - "learning_rate": 4.311666666666667e-05, - "loss": 0.5631, - "step": 6718 - }, - { - "epoch": 0.23584127485564857, - "grad_norm": 0.4468401372432709, - "learning_rate": 4.311481481481482e-05, - "loss": 0.4931, - "step": 6719 - }, - { - "epoch": 0.23587637550676566, - "grad_norm": 0.5959274768829346, - "learning_rate": 4.311296296296297e-05, - "loss": 0.6259, - "step": 6720 - }, - { - "epoch": 0.23591147615788272, - "grad_norm": 0.48048892617225647, - "learning_rate": 4.311111111111111e-05, - "loss": 0.5718, - "step": 6721 - }, - { - "epoch": 0.2359465768089998, - "grad_norm": 0.5655130743980408, - "learning_rate": 4.310925925925926e-05, - "loss": 0.5994, - "step": 6722 - }, - { - "epoch": 0.2359816774601169, - "grad_norm": 0.47198280692100525, - "learning_rate": 4.310740740740741e-05, - "loss": 0.5717, - "step": 6723 - }, - { - "epoch": 0.23601677811123398, - "grad_norm": 0.45792466402053833, - "learning_rate": 4.310555555555556e-05, - "loss": 0.5383, - "step": 6724 - }, - { - "epoch": 0.23605187876235104, - "grad_norm": 0.43417564034461975, - "learning_rate": 4.310370370370371e-05, - "loss": 0.4992, - "step": 6725 - }, - { - "epoch": 0.23608697941346812, - "grad_norm": 0.487642765045166, - "learning_rate": 4.3101851851851854e-05, - "loss": 0.4972, - "step": 6726 - }, - { - "epoch": 0.2361220800645852, - "grad_norm": 0.5024652481079102, - "learning_rate": 4.3100000000000004e-05, - "loss": 0.6071, - "step": 6727 - }, - { - "epoch": 0.23615718071570227, - "grad_norm": 0.48942291736602783, - "learning_rate": 4.309814814814815e-05, - "loss": 0.5472, - "step": 6728 - }, - { - "epoch": 0.23619228136681936, - "grad_norm": 0.487271249294281, - "learning_rate": 4.30962962962963e-05, - "loss": 0.4997, - "step": 6729 - }, - { - "epoch": 0.23622738201793644, - "grad_norm": 0.45154523849487305, - "learning_rate": 4.309444444444445e-05, - "loss": 0.4502, - "step": 6730 - }, - { - "epoch": 0.2362624826690535, - "grad_norm": 0.5203060507774353, - "learning_rate": 4.30925925925926e-05, - "loss": 0.5618, - "step": 6731 - }, - { - "epoch": 0.2362975833201706, - "grad_norm": 0.48445260524749756, - "learning_rate": 4.309074074074074e-05, - "loss": 0.5582, - "step": 6732 - }, - { - "epoch": 0.23633268397128768, - "grad_norm": 0.5705397129058838, - "learning_rate": 4.308888888888889e-05, - "loss": 0.5092, - "step": 6733 - }, - { - "epoch": 0.23636778462240474, - "grad_norm": 0.6652381420135498, - "learning_rate": 4.3087037037037035e-05, - "loss": 0.5216, - "step": 6734 - }, - { - "epoch": 0.23640288527352182, - "grad_norm": 0.6233749389648438, - "learning_rate": 4.308518518518519e-05, - "loss": 0.4653, - "step": 6735 - }, - { - "epoch": 0.2364379859246389, - "grad_norm": 0.5394173264503479, - "learning_rate": 4.3083333333333335e-05, - "loss": 0.4706, - "step": 6736 - }, - { - "epoch": 0.23647308657575597, - "grad_norm": 0.4609326720237732, - "learning_rate": 4.3081481481481485e-05, - "loss": 0.4515, - "step": 6737 - }, - { - "epoch": 0.23650818722687306, - "grad_norm": 0.47585543990135193, - "learning_rate": 4.307962962962963e-05, - "loss": 0.4247, - "step": 6738 - }, - { - "epoch": 0.23654328787799014, - "grad_norm": 0.48601391911506653, - "learning_rate": 4.307777777777778e-05, - "loss": 0.5281, - "step": 6739 - }, - { - "epoch": 0.2365783885291072, - "grad_norm": 0.5786667466163635, - "learning_rate": 4.307592592592593e-05, - "loss": 0.6159, - "step": 6740 - }, - { - "epoch": 0.2366134891802243, - "grad_norm": 0.4210897982120514, - "learning_rate": 4.307407407407408e-05, - "loss": 0.4521, - "step": 6741 - }, - { - "epoch": 0.23664858983134138, - "grad_norm": 0.4659835398197174, - "learning_rate": 4.307222222222222e-05, - "loss": 0.4836, - "step": 6742 - }, - { - "epoch": 0.23668369048245844, - "grad_norm": 0.466610848903656, - "learning_rate": 4.307037037037037e-05, - "loss": 0.5317, - "step": 6743 - }, - { - "epoch": 0.23671879113357552, - "grad_norm": 0.5562341213226318, - "learning_rate": 4.306851851851852e-05, - "loss": 0.4914, - "step": 6744 - }, - { - "epoch": 0.2367538917846926, - "grad_norm": 0.4782911241054535, - "learning_rate": 4.3066666666666665e-05, - "loss": 0.4366, - "step": 6745 - }, - { - "epoch": 0.2367889924358097, - "grad_norm": 0.5017595887184143, - "learning_rate": 4.306481481481482e-05, - "loss": 0.5652, - "step": 6746 - }, - { - "epoch": 0.23682409308692676, - "grad_norm": 0.5456122756004333, - "learning_rate": 4.3062962962962966e-05, - "loss": 0.607, - "step": 6747 - }, - { - "epoch": 0.23685919373804384, - "grad_norm": 0.471823513507843, - "learning_rate": 4.3061111111111116e-05, - "loss": 0.458, - "step": 6748 - }, - { - "epoch": 0.23689429438916093, - "grad_norm": 0.5280041098594666, - "learning_rate": 4.305925925925926e-05, - "loss": 0.4856, - "step": 6749 - }, - { - "epoch": 0.236929395040278, - "grad_norm": 0.5301018953323364, - "learning_rate": 4.305740740740741e-05, - "loss": 0.4914, - "step": 6750 - }, - { - "epoch": 0.23696449569139508, - "grad_norm": 0.40802454948425293, - "learning_rate": 4.305555555555556e-05, - "loss": 0.4034, - "step": 6751 - }, - { - "epoch": 0.23699959634251216, - "grad_norm": 0.43260353803634644, - "learning_rate": 4.305370370370371e-05, - "loss": 0.41, - "step": 6752 - }, - { - "epoch": 0.23703469699362922, - "grad_norm": 0.7694167494773865, - "learning_rate": 4.305185185185185e-05, - "loss": 0.4139, - "step": 6753 - }, - { - "epoch": 0.2370697976447463, - "grad_norm": 0.5117589235305786, - "learning_rate": 4.305e-05, - "loss": 0.4547, - "step": 6754 - }, - { - "epoch": 0.2371048982958634, - "grad_norm": 0.45655912160873413, - "learning_rate": 4.3048148148148146e-05, - "loss": 0.4177, - "step": 6755 - }, - { - "epoch": 0.23713999894698046, - "grad_norm": 0.5519530177116394, - "learning_rate": 4.3046296296296296e-05, - "loss": 0.5506, - "step": 6756 - }, - { - "epoch": 0.23717509959809754, - "grad_norm": 0.4413387179374695, - "learning_rate": 4.3044444444444446e-05, - "loss": 0.5435, - "step": 6757 - }, - { - "epoch": 0.23721020024921463, - "grad_norm": 0.6083065867424011, - "learning_rate": 4.3042592592592597e-05, - "loss": 0.5927, - "step": 6758 - }, - { - "epoch": 0.2372453009003317, - "grad_norm": 0.4513631761074066, - "learning_rate": 4.304074074074074e-05, - "loss": 0.4965, - "step": 6759 - }, - { - "epoch": 0.23728040155144878, - "grad_norm": 0.424917072057724, - "learning_rate": 4.303888888888889e-05, - "loss": 0.5583, - "step": 6760 - }, - { - "epoch": 0.23731550220256586, - "grad_norm": 0.4577857553958893, - "learning_rate": 4.303703703703704e-05, - "loss": 0.5307, - "step": 6761 - }, - { - "epoch": 0.23735060285368292, - "grad_norm": 0.5302160978317261, - "learning_rate": 4.303518518518519e-05, - "loss": 0.4814, - "step": 6762 - }, - { - "epoch": 0.2373857035048, - "grad_norm": 0.45282018184661865, - "learning_rate": 4.3033333333333334e-05, - "loss": 0.5436, - "step": 6763 - }, - { - "epoch": 0.2374208041559171, - "grad_norm": 0.4940479099750519, - "learning_rate": 4.3031481481481484e-05, - "loss": 0.4478, - "step": 6764 - }, - { - "epoch": 0.23745590480703416, - "grad_norm": 0.45737791061401367, - "learning_rate": 4.3029629629629634e-05, - "loss": 0.4084, - "step": 6765 - }, - { - "epoch": 0.23749100545815124, - "grad_norm": 0.4196029305458069, - "learning_rate": 4.302777777777778e-05, - "loss": 0.4233, - "step": 6766 - }, - { - "epoch": 0.23752610610926833, - "grad_norm": 0.523734986782074, - "learning_rate": 4.3025925925925934e-05, - "loss": 0.585, - "step": 6767 - }, - { - "epoch": 0.23756120676038542, - "grad_norm": 0.39137017726898193, - "learning_rate": 4.302407407407408e-05, - "loss": 0.3156, - "step": 6768 - }, - { - "epoch": 0.23759630741150248, - "grad_norm": 0.3814889192581177, - "learning_rate": 4.302222222222223e-05, - "loss": 0.4488, - "step": 6769 - }, - { - "epoch": 0.23763140806261956, - "grad_norm": 0.5552565455436707, - "learning_rate": 4.302037037037037e-05, - "loss": 0.5483, - "step": 6770 - }, - { - "epoch": 0.23766650871373665, - "grad_norm": 0.500012218952179, - "learning_rate": 4.301851851851852e-05, - "loss": 0.4005, - "step": 6771 - }, - { - "epoch": 0.2377016093648537, - "grad_norm": 0.49910682439804077, - "learning_rate": 4.3016666666666664e-05, - "loss": 0.5427, - "step": 6772 - }, - { - "epoch": 0.2377367100159708, - "grad_norm": 0.5009340047836304, - "learning_rate": 4.301481481481482e-05, - "loss": 0.4347, - "step": 6773 - }, - { - "epoch": 0.23777181066708789, - "grad_norm": 0.5189939737319946, - "learning_rate": 4.3012962962962964e-05, - "loss": 0.6094, - "step": 6774 - }, - { - "epoch": 0.23780691131820494, - "grad_norm": 0.4619685411453247, - "learning_rate": 4.3011111111111115e-05, - "loss": 0.5528, - "step": 6775 - }, - { - "epoch": 0.23784201196932203, - "grad_norm": 0.45545172691345215, - "learning_rate": 4.300925925925926e-05, - "loss": 0.5697, - "step": 6776 - }, - { - "epoch": 0.23787711262043912, - "grad_norm": 0.4657208025455475, - "learning_rate": 4.300740740740741e-05, - "loss": 0.5438, - "step": 6777 - }, - { - "epoch": 0.23791221327155618, - "grad_norm": 0.6602526307106018, - "learning_rate": 4.300555555555556e-05, - "loss": 0.5214, - "step": 6778 - }, - { - "epoch": 0.23794731392267326, - "grad_norm": 0.4282982051372528, - "learning_rate": 4.300370370370371e-05, - "loss": 0.5873, - "step": 6779 - }, - { - "epoch": 0.23798241457379035, - "grad_norm": 0.48123759031295776, - "learning_rate": 4.300185185185185e-05, - "loss": 0.5146, - "step": 6780 - }, - { - "epoch": 0.2380175152249074, - "grad_norm": 0.4960746169090271, - "learning_rate": 4.3e-05, - "loss": 0.5391, - "step": 6781 - }, - { - "epoch": 0.2380526158760245, - "grad_norm": 0.4826563596725464, - "learning_rate": 4.299814814814815e-05, - "loss": 0.5057, - "step": 6782 - }, - { - "epoch": 0.23808771652714159, - "grad_norm": 0.4430752098560333, - "learning_rate": 4.2996296296296295e-05, - "loss": 0.4589, - "step": 6783 - }, - { - "epoch": 0.23812281717825864, - "grad_norm": 0.5112671256065369, - "learning_rate": 4.2994444444444445e-05, - "loss": 0.3964, - "step": 6784 - }, - { - "epoch": 0.23815791782937573, - "grad_norm": 0.48457473516464233, - "learning_rate": 4.2992592592592595e-05, - "loss": 0.4683, - "step": 6785 - }, - { - "epoch": 0.23819301848049282, - "grad_norm": 0.49401575326919556, - "learning_rate": 4.2990740740740745e-05, - "loss": 0.6006, - "step": 6786 - }, - { - "epoch": 0.23822811913160988, - "grad_norm": 0.5780882239341736, - "learning_rate": 4.298888888888889e-05, - "loss": 0.5454, - "step": 6787 - }, - { - "epoch": 0.23826321978272696, - "grad_norm": 0.4554601311683655, - "learning_rate": 4.298703703703704e-05, - "loss": 0.6134, - "step": 6788 - }, - { - "epoch": 0.23829832043384405, - "grad_norm": 0.4645678997039795, - "learning_rate": 4.298518518518519e-05, - "loss": 0.4547, - "step": 6789 - }, - { - "epoch": 0.23833342108496114, - "grad_norm": 0.5710635185241699, - "learning_rate": 4.298333333333334e-05, - "loss": 0.4672, - "step": 6790 - }, - { - "epoch": 0.2383685217360782, - "grad_norm": 0.5514808297157288, - "learning_rate": 4.298148148148148e-05, - "loss": 0.5548, - "step": 6791 - }, - { - "epoch": 0.23840362238719529, - "grad_norm": 0.5442914962768555, - "learning_rate": 4.297962962962963e-05, - "loss": 0.5003, - "step": 6792 - }, - { - "epoch": 0.23843872303831237, - "grad_norm": 0.46143603324890137, - "learning_rate": 4.2977777777777776e-05, - "loss": 0.4525, - "step": 6793 - }, - { - "epoch": 0.23847382368942943, - "grad_norm": 1.153793454170227, - "learning_rate": 4.297592592592593e-05, - "loss": 0.5153, - "step": 6794 - }, - { - "epoch": 0.23850892434054652, - "grad_norm": 0.4847612679004669, - "learning_rate": 4.2974074074074076e-05, - "loss": 0.4721, - "step": 6795 - }, - { - "epoch": 0.2385440249916636, - "grad_norm": 0.4661993980407715, - "learning_rate": 4.2972222222222226e-05, - "loss": 0.5518, - "step": 6796 - }, - { - "epoch": 0.23857912564278067, - "grad_norm": 0.5639839172363281, - "learning_rate": 4.297037037037037e-05, - "loss": 0.4714, - "step": 6797 - }, - { - "epoch": 0.23861422629389775, - "grad_norm": 0.48490530252456665, - "learning_rate": 4.296851851851852e-05, - "loss": 0.5179, - "step": 6798 - }, - { - "epoch": 0.23864932694501484, - "grad_norm": 0.5674753785133362, - "learning_rate": 4.296666666666666e-05, - "loss": 0.6014, - "step": 6799 - }, - { - "epoch": 0.2386844275961319, - "grad_norm": 0.4825301766395569, - "learning_rate": 4.296481481481482e-05, - "loss": 0.5169, - "step": 6800 - }, - { - "epoch": 0.23871952824724899, - "grad_norm": 0.44591856002807617, - "learning_rate": 4.296296296296296e-05, - "loss": 0.4455, - "step": 6801 - }, - { - "epoch": 0.23875462889836607, - "grad_norm": 0.4729813039302826, - "learning_rate": 4.296111111111111e-05, - "loss": 0.4535, - "step": 6802 - }, - { - "epoch": 0.23878972954948313, - "grad_norm": 0.49631085991859436, - "learning_rate": 4.2959259259259263e-05, - "loss": 0.4322, - "step": 6803 - }, - { - "epoch": 0.23882483020060022, - "grad_norm": 0.47209393978118896, - "learning_rate": 4.295740740740741e-05, - "loss": 0.5165, - "step": 6804 - }, - { - "epoch": 0.2388599308517173, - "grad_norm": 0.6730100512504578, - "learning_rate": 4.295555555555556e-05, - "loss": 0.6209, - "step": 6805 - }, - { - "epoch": 0.23889503150283437, - "grad_norm": 0.6173808574676514, - "learning_rate": 4.295370370370371e-05, - "loss": 0.5676, - "step": 6806 - }, - { - "epoch": 0.23893013215395145, - "grad_norm": 0.4968062937259674, - "learning_rate": 4.295185185185186e-05, - "loss": 0.5027, - "step": 6807 - }, - { - "epoch": 0.23896523280506854, - "grad_norm": 0.42534536123275757, - "learning_rate": 4.295e-05, - "loss": 0.4621, - "step": 6808 - }, - { - "epoch": 0.2390003334561856, - "grad_norm": 0.6419355869293213, - "learning_rate": 4.294814814814815e-05, - "loss": 0.4797, - "step": 6809 - }, - { - "epoch": 0.23903543410730269, - "grad_norm": 0.47189417481422424, - "learning_rate": 4.2946296296296294e-05, - "loss": 0.4653, - "step": 6810 - }, - { - "epoch": 0.23907053475841977, - "grad_norm": 0.5862225294113159, - "learning_rate": 4.294444444444445e-05, - "loss": 0.5705, - "step": 6811 - }, - { - "epoch": 0.23910563540953686, - "grad_norm": 0.5548362135887146, - "learning_rate": 4.2942592592592594e-05, - "loss": 0.3771, - "step": 6812 - }, - { - "epoch": 0.23914073606065392, - "grad_norm": 0.7196215987205505, - "learning_rate": 4.2940740740740744e-05, - "loss": 0.5571, - "step": 6813 - }, - { - "epoch": 0.239175836711771, - "grad_norm": 0.49359530210494995, - "learning_rate": 4.293888888888889e-05, - "loss": 0.5767, - "step": 6814 - }, - { - "epoch": 0.2392109373628881, - "grad_norm": 0.4531099498271942, - "learning_rate": 4.293703703703704e-05, - "loss": 0.4125, - "step": 6815 - }, - { - "epoch": 0.23924603801400515, - "grad_norm": 0.49836400151252747, - "learning_rate": 4.293518518518519e-05, - "loss": 0.4447, - "step": 6816 - }, - { - "epoch": 0.23928113866512224, - "grad_norm": 0.40573620796203613, - "learning_rate": 4.293333333333334e-05, - "loss": 0.5065, - "step": 6817 - }, - { - "epoch": 0.23931623931623933, - "grad_norm": 0.520187497138977, - "learning_rate": 4.293148148148148e-05, - "loss": 0.5081, - "step": 6818 - }, - { - "epoch": 0.23935133996735639, - "grad_norm": 0.498597115278244, - "learning_rate": 4.292962962962963e-05, - "loss": 0.6447, - "step": 6819 - }, - { - "epoch": 0.23938644061847347, - "grad_norm": 0.4927423596382141, - "learning_rate": 4.2927777777777775e-05, - "loss": 0.5903, - "step": 6820 - }, - { - "epoch": 0.23942154126959056, - "grad_norm": 0.4578908085823059, - "learning_rate": 4.292592592592593e-05, - "loss": 0.5438, - "step": 6821 - }, - { - "epoch": 0.23945664192070762, - "grad_norm": 0.5048690438270569, - "learning_rate": 4.2924074074074075e-05, - "loss": 0.5461, - "step": 6822 - }, - { - "epoch": 0.2394917425718247, - "grad_norm": 0.4735915958881378, - "learning_rate": 4.2922222222222225e-05, - "loss": 0.474, - "step": 6823 - }, - { - "epoch": 0.2395268432229418, - "grad_norm": 0.48927220702171326, - "learning_rate": 4.2920370370370375e-05, - "loss": 0.6016, - "step": 6824 - }, - { - "epoch": 0.23956194387405885, - "grad_norm": 0.4432360827922821, - "learning_rate": 4.291851851851852e-05, - "loss": 0.566, - "step": 6825 - }, - { - "epoch": 0.23959704452517594, - "grad_norm": 0.4189656674861908, - "learning_rate": 4.291666666666667e-05, - "loss": 0.5516, - "step": 6826 - }, - { - "epoch": 0.23963214517629303, - "grad_norm": 0.4428917467594147, - "learning_rate": 4.291481481481482e-05, - "loss": 0.4649, - "step": 6827 - }, - { - "epoch": 0.23966724582741009, - "grad_norm": 0.5070942640304565, - "learning_rate": 4.291296296296297e-05, - "loss": 0.5504, - "step": 6828 - }, - { - "epoch": 0.23970234647852717, - "grad_norm": 0.47320857644081116, - "learning_rate": 4.291111111111111e-05, - "loss": 0.5616, - "step": 6829 - }, - { - "epoch": 0.23973744712964426, - "grad_norm": 0.5829129219055176, - "learning_rate": 4.290925925925926e-05, - "loss": 0.5759, - "step": 6830 - }, - { - "epoch": 0.23977254778076132, - "grad_norm": 0.4064013361930847, - "learning_rate": 4.2907407407407406e-05, - "loss": 0.4594, - "step": 6831 - }, - { - "epoch": 0.2398076484318784, - "grad_norm": 0.4731931686401367, - "learning_rate": 4.290555555555556e-05, - "loss": 0.5464, - "step": 6832 - }, - { - "epoch": 0.2398427490829955, - "grad_norm": 0.5519393086433411, - "learning_rate": 4.2903703703703706e-05, - "loss": 0.5373, - "step": 6833 - }, - { - "epoch": 0.23987784973411258, - "grad_norm": 0.4961772561073303, - "learning_rate": 4.2901851851851856e-05, - "loss": 0.5285, - "step": 6834 - }, - { - "epoch": 0.23991295038522964, - "grad_norm": 0.5731585025787354, - "learning_rate": 4.29e-05, - "loss": 0.4838, - "step": 6835 - }, - { - "epoch": 0.23994805103634673, - "grad_norm": 0.6265867352485657, - "learning_rate": 4.289814814814815e-05, - "loss": 0.6379, - "step": 6836 - }, - { - "epoch": 0.2399831516874638, - "grad_norm": 0.49099740386009216, - "learning_rate": 4.28962962962963e-05, - "loss": 0.5149, - "step": 6837 - }, - { - "epoch": 0.24001825233858087, - "grad_norm": 0.48277759552001953, - "learning_rate": 4.289444444444445e-05, - "loss": 0.4256, - "step": 6838 - }, - { - "epoch": 0.24005335298969796, - "grad_norm": 0.5559314489364624, - "learning_rate": 4.289259259259259e-05, - "loss": 0.5568, - "step": 6839 - }, - { - "epoch": 0.24008845364081505, - "grad_norm": 0.665891170501709, - "learning_rate": 4.289074074074074e-05, - "loss": 0.4896, - "step": 6840 - }, - { - "epoch": 0.2401235542919321, - "grad_norm": 0.47765934467315674, - "learning_rate": 4.2888888888888886e-05, - "loss": 0.5689, - "step": 6841 - }, - { - "epoch": 0.2401586549430492, - "grad_norm": 0.5168316960334778, - "learning_rate": 4.2887037037037036e-05, - "loss": 0.3801, - "step": 6842 - }, - { - "epoch": 0.24019375559416628, - "grad_norm": 0.521306574344635, - "learning_rate": 4.2885185185185187e-05, - "loss": 0.6271, - "step": 6843 - }, - { - "epoch": 0.24022885624528334, - "grad_norm": 0.5529371500015259, - "learning_rate": 4.288333333333334e-05, - "loss": 0.5143, - "step": 6844 - }, - { - "epoch": 0.24026395689640043, - "grad_norm": 0.5028990507125854, - "learning_rate": 4.288148148148149e-05, - "loss": 0.4704, - "step": 6845 - }, - { - "epoch": 0.24029905754751751, - "grad_norm": 0.4369807243347168, - "learning_rate": 4.287962962962963e-05, - "loss": 0.3865, - "step": 6846 - }, - { - "epoch": 0.24033415819863457, - "grad_norm": 0.4859701693058014, - "learning_rate": 4.287777777777778e-05, - "loss": 0.441, - "step": 6847 - }, - { - "epoch": 0.24036925884975166, - "grad_norm": 0.5612921714782715, - "learning_rate": 4.287592592592593e-05, - "loss": 0.3777, - "step": 6848 - }, - { - "epoch": 0.24040435950086875, - "grad_norm": 0.6443774700164795, - "learning_rate": 4.287407407407408e-05, - "loss": 0.5654, - "step": 6849 - }, - { - "epoch": 0.2404394601519858, - "grad_norm": 0.4465562701225281, - "learning_rate": 4.2872222222222224e-05, - "loss": 0.5301, - "step": 6850 - }, - { - "epoch": 0.2404745608031029, - "grad_norm": 0.5241650342941284, - "learning_rate": 4.2870370370370374e-05, - "loss": 0.4076, - "step": 6851 - }, - { - "epoch": 0.24050966145421998, - "grad_norm": 0.5159212946891785, - "learning_rate": 4.286851851851852e-05, - "loss": 0.5623, - "step": 6852 - }, - { - "epoch": 0.24054476210533707, - "grad_norm": 0.5197125673294067, - "learning_rate": 4.286666666666667e-05, - "loss": 0.5005, - "step": 6853 - }, - { - "epoch": 0.24057986275645413, - "grad_norm": 0.4671052098274231, - "learning_rate": 4.286481481481482e-05, - "loss": 0.5216, - "step": 6854 - }, - { - "epoch": 0.24061496340757121, - "grad_norm": 0.505756139755249, - "learning_rate": 4.286296296296297e-05, - "loss": 0.4729, - "step": 6855 - }, - { - "epoch": 0.2406500640586883, - "grad_norm": 0.46130916476249695, - "learning_rate": 4.286111111111111e-05, - "loss": 0.5931, - "step": 6856 - }, - { - "epoch": 0.24068516470980536, - "grad_norm": 0.5007908344268799, - "learning_rate": 4.285925925925926e-05, - "loss": 0.5857, - "step": 6857 - }, - { - "epoch": 0.24072026536092245, - "grad_norm": 0.5525897741317749, - "learning_rate": 4.2857407407407404e-05, - "loss": 0.5405, - "step": 6858 - }, - { - "epoch": 0.24075536601203953, - "grad_norm": 0.46718037128448486, - "learning_rate": 4.285555555555556e-05, - "loss": 0.4733, - "step": 6859 - }, - { - "epoch": 0.2407904666631566, - "grad_norm": 0.40486153960227966, - "learning_rate": 4.2853703703703705e-05, - "loss": 0.5043, - "step": 6860 - }, - { - "epoch": 0.24082556731427368, - "grad_norm": 0.4606616795063019, - "learning_rate": 4.2851851851851855e-05, - "loss": 0.5374, - "step": 6861 - }, - { - "epoch": 0.24086066796539077, - "grad_norm": 0.4435731768608093, - "learning_rate": 4.285e-05, - "loss": 0.4787, - "step": 6862 - }, - { - "epoch": 0.24089576861650783, - "grad_norm": 0.5332759022712708, - "learning_rate": 4.284814814814815e-05, - "loss": 0.6113, - "step": 6863 - }, - { - "epoch": 0.24093086926762491, - "grad_norm": 0.5365182757377625, - "learning_rate": 4.28462962962963e-05, - "loss": 0.4306, - "step": 6864 - }, - { - "epoch": 0.240965969918742, - "grad_norm": 0.3874254822731018, - "learning_rate": 4.284444444444445e-05, - "loss": 0.5607, - "step": 6865 - }, - { - "epoch": 0.24100107056985906, - "grad_norm": 0.4962996244430542, - "learning_rate": 4.28425925925926e-05, - "loss": 0.537, - "step": 6866 - }, - { - "epoch": 0.24103617122097615, - "grad_norm": 0.4933573305606842, - "learning_rate": 4.284074074074074e-05, - "loss": 0.5705, - "step": 6867 - }, - { - "epoch": 0.24107127187209323, - "grad_norm": 0.5412040948867798, - "learning_rate": 4.283888888888889e-05, - "loss": 0.5231, - "step": 6868 - }, - { - "epoch": 0.2411063725232103, - "grad_norm": 0.3820686638355255, - "learning_rate": 4.2837037037037035e-05, - "loss": 0.4833, - "step": 6869 - }, - { - "epoch": 0.24114147317432738, - "grad_norm": 0.4198676347732544, - "learning_rate": 4.283518518518519e-05, - "loss": 0.4635, - "step": 6870 - }, - { - "epoch": 0.24117657382544447, - "grad_norm": 0.45805084705352783, - "learning_rate": 4.2833333333333335e-05, - "loss": 0.4083, - "step": 6871 - }, - { - "epoch": 0.24121167447656153, - "grad_norm": 0.4277750849723816, - "learning_rate": 4.2831481481481486e-05, - "loss": 0.384, - "step": 6872 - }, - { - "epoch": 0.24124677512767861, - "grad_norm": 0.45890551805496216, - "learning_rate": 4.282962962962963e-05, - "loss": 0.5133, - "step": 6873 - }, - { - "epoch": 0.2412818757787957, - "grad_norm": 0.48818230628967285, - "learning_rate": 4.282777777777778e-05, - "loss": 0.4863, - "step": 6874 - }, - { - "epoch": 0.2413169764299128, - "grad_norm": 0.4626519978046417, - "learning_rate": 4.282592592592593e-05, - "loss": 0.3799, - "step": 6875 - }, - { - "epoch": 0.24135207708102985, - "grad_norm": 0.5657615661621094, - "learning_rate": 4.282407407407408e-05, - "loss": 0.5614, - "step": 6876 - }, - { - "epoch": 0.24138717773214693, - "grad_norm": 0.46489086747169495, - "learning_rate": 4.282222222222222e-05, - "loss": 0.5222, - "step": 6877 - }, - { - "epoch": 0.24142227838326402, - "grad_norm": 0.48138314485549927, - "learning_rate": 4.282037037037037e-05, - "loss": 0.3877, - "step": 6878 - }, - { - "epoch": 0.24145737903438108, - "grad_norm": 0.5257938504219055, - "learning_rate": 4.2818518518518516e-05, - "loss": 0.5444, - "step": 6879 - }, - { - "epoch": 0.24149247968549817, - "grad_norm": 0.42373690009117126, - "learning_rate": 4.2816666666666666e-05, - "loss": 0.3416, - "step": 6880 - }, - { - "epoch": 0.24152758033661526, - "grad_norm": 0.4863598048686981, - "learning_rate": 4.2814814814814816e-05, - "loss": 0.5084, - "step": 6881 - }, - { - "epoch": 0.24156268098773231, - "grad_norm": 0.4701608419418335, - "learning_rate": 4.2812962962962966e-05, - "loss": 0.4646, - "step": 6882 - }, - { - "epoch": 0.2415977816388494, - "grad_norm": 0.43836092948913574, - "learning_rate": 4.281111111111111e-05, - "loss": 0.5299, - "step": 6883 - }, - { - "epoch": 0.2416328822899665, - "grad_norm": 0.47519561648368835, - "learning_rate": 4.280925925925926e-05, - "loss": 0.5364, - "step": 6884 - }, - { - "epoch": 0.24166798294108355, - "grad_norm": 0.46431511640548706, - "learning_rate": 4.280740740740741e-05, - "loss": 0.5706, - "step": 6885 - }, - { - "epoch": 0.24170308359220063, - "grad_norm": 0.47817695140838623, - "learning_rate": 4.280555555555556e-05, - "loss": 0.5231, - "step": 6886 - }, - { - "epoch": 0.24173818424331772, - "grad_norm": 0.48284730315208435, - "learning_rate": 4.280370370370371e-05, - "loss": 0.5572, - "step": 6887 - }, - { - "epoch": 0.24177328489443478, - "grad_norm": 0.4902344048023224, - "learning_rate": 4.2801851851851853e-05, - "loss": 0.5777, - "step": 6888 - }, - { - "epoch": 0.24180838554555187, - "grad_norm": 0.46503835916519165, - "learning_rate": 4.2800000000000004e-05, - "loss": 0.5211, - "step": 6889 - }, - { - "epoch": 0.24184348619666896, - "grad_norm": 0.5812570452690125, - "learning_rate": 4.279814814814815e-05, - "loss": 0.6502, - "step": 6890 - }, - { - "epoch": 0.24187858684778601, - "grad_norm": 0.5173459053039551, - "learning_rate": 4.2796296296296304e-05, - "loss": 0.5735, - "step": 6891 - }, - { - "epoch": 0.2419136874989031, - "grad_norm": 0.5405662655830383, - "learning_rate": 4.279444444444445e-05, - "loss": 0.4911, - "step": 6892 - }, - { - "epoch": 0.2419487881500202, - "grad_norm": 0.5368152260780334, - "learning_rate": 4.27925925925926e-05, - "loss": 0.5353, - "step": 6893 - }, - { - "epoch": 0.24198388880113725, - "grad_norm": 0.48466628789901733, - "learning_rate": 4.279074074074074e-05, - "loss": 0.5777, - "step": 6894 - }, - { - "epoch": 0.24201898945225434, - "grad_norm": 0.4489358365535736, - "learning_rate": 4.278888888888889e-05, - "loss": 0.4511, - "step": 6895 - }, - { - "epoch": 0.24205409010337142, - "grad_norm": 0.5408017635345459, - "learning_rate": 4.2787037037037034e-05, - "loss": 0.4693, - "step": 6896 - }, - { - "epoch": 0.2420891907544885, - "grad_norm": 0.4480147063732147, - "learning_rate": 4.278518518518519e-05, - "loss": 0.5665, - "step": 6897 - }, - { - "epoch": 0.24212429140560557, - "grad_norm": 0.43665117025375366, - "learning_rate": 4.2783333333333334e-05, - "loss": 0.4497, - "step": 6898 - }, - { - "epoch": 0.24215939205672266, - "grad_norm": 0.4918012022972107, - "learning_rate": 4.2781481481481484e-05, - "loss": 0.5735, - "step": 6899 - }, - { - "epoch": 0.24219449270783974, - "grad_norm": 0.45149651169776917, - "learning_rate": 4.277962962962963e-05, - "loss": 0.5434, - "step": 6900 - }, - { - "epoch": 0.2422295933589568, - "grad_norm": 0.48591184616088867, - "learning_rate": 4.277777777777778e-05, - "loss": 0.4816, - "step": 6901 - }, - { - "epoch": 0.2422646940100739, - "grad_norm": 0.5227186679840088, - "learning_rate": 4.277592592592593e-05, - "loss": 0.4556, - "step": 6902 - }, - { - "epoch": 0.24229979466119098, - "grad_norm": 0.5306392908096313, - "learning_rate": 4.277407407407408e-05, - "loss": 0.438, - "step": 6903 - }, - { - "epoch": 0.24233489531230804, - "grad_norm": 0.56976318359375, - "learning_rate": 4.277222222222222e-05, - "loss": 0.4288, - "step": 6904 - }, - { - "epoch": 0.24236999596342512, - "grad_norm": 0.4689086675643921, - "learning_rate": 4.277037037037037e-05, - "loss": 0.5648, - "step": 6905 - }, - { - "epoch": 0.2424050966145422, - "grad_norm": 0.4591197967529297, - "learning_rate": 4.276851851851852e-05, - "loss": 0.6002, - "step": 6906 - }, - { - "epoch": 0.24244019726565927, - "grad_norm": 0.4610452353954315, - "learning_rate": 4.2766666666666665e-05, - "loss": 0.5982, - "step": 6907 - }, - { - "epoch": 0.24247529791677636, - "grad_norm": 0.5606013536453247, - "learning_rate": 4.276481481481482e-05, - "loss": 0.5475, - "step": 6908 - }, - { - "epoch": 0.24251039856789344, - "grad_norm": 0.5199403166770935, - "learning_rate": 4.2762962962962965e-05, - "loss": 0.482, - "step": 6909 - }, - { - "epoch": 0.2425454992190105, - "grad_norm": 0.5039594769477844, - "learning_rate": 4.2761111111111115e-05, - "loss": 0.567, - "step": 6910 - }, - { - "epoch": 0.2425805998701276, - "grad_norm": 0.5241091847419739, - "learning_rate": 4.275925925925926e-05, - "loss": 0.5416, - "step": 6911 - }, - { - "epoch": 0.24261570052124468, - "grad_norm": 0.5115075707435608, - "learning_rate": 4.275740740740741e-05, - "loss": 0.5844, - "step": 6912 - }, - { - "epoch": 0.24265080117236174, - "grad_norm": 0.5039592981338501, - "learning_rate": 4.275555555555556e-05, - "loss": 0.46, - "step": 6913 - }, - { - "epoch": 0.24268590182347882, - "grad_norm": 0.49442118406295776, - "learning_rate": 4.275370370370371e-05, - "loss": 0.4805, - "step": 6914 - }, - { - "epoch": 0.2427210024745959, - "grad_norm": 0.5022268891334534, - "learning_rate": 4.275185185185185e-05, - "loss": 0.4349, - "step": 6915 - }, - { - "epoch": 0.24275610312571297, - "grad_norm": 0.45598286390304565, - "learning_rate": 4.275e-05, - "loss": 0.4362, - "step": 6916 - }, - { - "epoch": 0.24279120377683006, - "grad_norm": 0.48515215516090393, - "learning_rate": 4.2748148148148146e-05, - "loss": 0.6112, - "step": 6917 - }, - { - "epoch": 0.24282630442794714, - "grad_norm": 0.44267675280570984, - "learning_rate": 4.27462962962963e-05, - "loss": 0.4951, - "step": 6918 - }, - { - "epoch": 0.24286140507906423, - "grad_norm": 0.5002373456954956, - "learning_rate": 4.2744444444444446e-05, - "loss": 0.4345, - "step": 6919 - }, - { - "epoch": 0.2428965057301813, - "grad_norm": 0.5377094745635986, - "learning_rate": 4.2742592592592596e-05, - "loss": 0.5631, - "step": 6920 - }, - { - "epoch": 0.24293160638129838, - "grad_norm": 0.44305670261383057, - "learning_rate": 4.274074074074074e-05, - "loss": 0.4397, - "step": 6921 - }, - { - "epoch": 0.24296670703241546, - "grad_norm": 0.44687142968177795, - "learning_rate": 4.273888888888889e-05, - "loss": 0.3672, - "step": 6922 - }, - { - "epoch": 0.24300180768353252, - "grad_norm": 0.49420997500419617, - "learning_rate": 4.273703703703704e-05, - "loss": 0.4934, - "step": 6923 - }, - { - "epoch": 0.2430369083346496, - "grad_norm": 0.4345281422138214, - "learning_rate": 4.273518518518519e-05, - "loss": 0.4601, - "step": 6924 - }, - { - "epoch": 0.2430720089857667, - "grad_norm": 0.5711280107498169, - "learning_rate": 4.273333333333333e-05, - "loss": 0.5762, - "step": 6925 - }, - { - "epoch": 0.24310710963688376, - "grad_norm": 0.4781034588813782, - "learning_rate": 4.273148148148148e-05, - "loss": 0.5656, - "step": 6926 - }, - { - "epoch": 0.24314221028800084, - "grad_norm": 0.593438446521759, - "learning_rate": 4.272962962962963e-05, - "loss": 0.5438, - "step": 6927 - }, - { - "epoch": 0.24317731093911793, - "grad_norm": 0.47741642594337463, - "learning_rate": 4.2727777777777777e-05, - "loss": 0.3937, - "step": 6928 - }, - { - "epoch": 0.243212411590235, - "grad_norm": 0.4767247140407562, - "learning_rate": 4.2725925925925933e-05, - "loss": 0.5202, - "step": 6929 - }, - { - "epoch": 0.24324751224135208, - "grad_norm": 0.5556877851486206, - "learning_rate": 4.272407407407408e-05, - "loss": 0.6182, - "step": 6930 - }, - { - "epoch": 0.24328261289246916, - "grad_norm": 0.5069547891616821, - "learning_rate": 4.272222222222223e-05, - "loss": 0.4831, - "step": 6931 - }, - { - "epoch": 0.24331771354358622, - "grad_norm": 0.5550232529640198, - "learning_rate": 4.272037037037037e-05, - "loss": 0.4493, - "step": 6932 - }, - { - "epoch": 0.2433528141947033, - "grad_norm": 0.44091320037841797, - "learning_rate": 4.271851851851852e-05, - "loss": 0.476, - "step": 6933 - }, - { - "epoch": 0.2433879148458204, - "grad_norm": 0.4390350580215454, - "learning_rate": 4.2716666666666664e-05, - "loss": 0.4389, - "step": 6934 - }, - { - "epoch": 0.24342301549693746, - "grad_norm": 0.44711625576019287, - "learning_rate": 4.271481481481482e-05, - "loss": 0.4846, - "step": 6935 - }, - { - "epoch": 0.24345811614805454, - "grad_norm": 0.4560905396938324, - "learning_rate": 4.2712962962962964e-05, - "loss": 0.373, - "step": 6936 - }, - { - "epoch": 0.24349321679917163, - "grad_norm": 0.3802121877670288, - "learning_rate": 4.2711111111111114e-05, - "loss": 0.3421, - "step": 6937 - }, - { - "epoch": 0.2435283174502887, - "grad_norm": 0.5450112819671631, - "learning_rate": 4.270925925925926e-05, - "loss": 0.5158, - "step": 6938 - }, - { - "epoch": 0.24356341810140578, - "grad_norm": 0.518854558467865, - "learning_rate": 4.270740740740741e-05, - "loss": 0.4962, - "step": 6939 - }, - { - "epoch": 0.24359851875252286, - "grad_norm": 0.46498724818229675, - "learning_rate": 4.270555555555556e-05, - "loss": 0.5436, - "step": 6940 - }, - { - "epoch": 0.24363361940363995, - "grad_norm": 0.4961741268634796, - "learning_rate": 4.270370370370371e-05, - "loss": 0.5768, - "step": 6941 - }, - { - "epoch": 0.243668720054757, - "grad_norm": 0.5461754202842712, - "learning_rate": 4.270185185185185e-05, - "loss": 0.5749, - "step": 6942 - }, - { - "epoch": 0.2437038207058741, - "grad_norm": 0.5310773849487305, - "learning_rate": 4.27e-05, - "loss": 0.6311, - "step": 6943 - }, - { - "epoch": 0.24373892135699118, - "grad_norm": 0.5658936500549316, - "learning_rate": 4.269814814814815e-05, - "loss": 0.5011, - "step": 6944 - }, - { - "epoch": 0.24377402200810824, - "grad_norm": 0.581426203250885, - "learning_rate": 4.26962962962963e-05, - "loss": 0.5425, - "step": 6945 - }, - { - "epoch": 0.24380912265922533, - "grad_norm": 0.4668869376182556, - "learning_rate": 4.2694444444444445e-05, - "loss": 0.4952, - "step": 6946 - }, - { - "epoch": 0.24384422331034242, - "grad_norm": 0.4065190553665161, - "learning_rate": 4.2692592592592595e-05, - "loss": 0.5298, - "step": 6947 - }, - { - "epoch": 0.24387932396145948, - "grad_norm": 0.6186764240264893, - "learning_rate": 4.2690740740740745e-05, - "loss": 0.4289, - "step": 6948 - }, - { - "epoch": 0.24391442461257656, - "grad_norm": 0.4755892753601074, - "learning_rate": 4.268888888888889e-05, - "loss": 0.4255, - "step": 6949 - }, - { - "epoch": 0.24394952526369365, - "grad_norm": 0.5672796964645386, - "learning_rate": 4.268703703703704e-05, - "loss": 0.5469, - "step": 6950 - }, - { - "epoch": 0.2439846259148107, - "grad_norm": 0.49250680208206177, - "learning_rate": 4.268518518518519e-05, - "loss": 0.5521, - "step": 6951 - }, - { - "epoch": 0.2440197265659278, - "grad_norm": 0.4093945324420929, - "learning_rate": 4.268333333333334e-05, - "loss": 0.423, - "step": 6952 - }, - { - "epoch": 0.24405482721704488, - "grad_norm": 0.4717786908149719, - "learning_rate": 4.268148148148148e-05, - "loss": 0.4745, - "step": 6953 - }, - { - "epoch": 0.24408992786816194, - "grad_norm": 0.45713570713996887, - "learning_rate": 4.267962962962963e-05, - "loss": 0.4631, - "step": 6954 - }, - { - "epoch": 0.24412502851927903, - "grad_norm": 0.5422622561454773, - "learning_rate": 4.2677777777777775e-05, - "loss": 0.4487, - "step": 6955 - }, - { - "epoch": 0.24416012917039612, - "grad_norm": 0.49696022272109985, - "learning_rate": 4.267592592592593e-05, - "loss": 0.4343, - "step": 6956 - }, - { - "epoch": 0.24419522982151318, - "grad_norm": 0.4624587595462799, - "learning_rate": 4.2674074074074076e-05, - "loss": 0.6225, - "step": 6957 - }, - { - "epoch": 0.24423033047263026, - "grad_norm": 0.49349379539489746, - "learning_rate": 4.2672222222222226e-05, - "loss": 0.537, - "step": 6958 - }, - { - "epoch": 0.24426543112374735, - "grad_norm": 0.5288269519805908, - "learning_rate": 4.267037037037037e-05, - "loss": 0.5504, - "step": 6959 - }, - { - "epoch": 0.2443005317748644, - "grad_norm": 0.4193178415298462, - "learning_rate": 4.266851851851852e-05, - "loss": 0.5297, - "step": 6960 - }, - { - "epoch": 0.2443356324259815, - "grad_norm": 0.43375569581985474, - "learning_rate": 4.266666666666667e-05, - "loss": 0.4591, - "step": 6961 - }, - { - "epoch": 0.24437073307709858, - "grad_norm": 0.44433775544166565, - "learning_rate": 4.266481481481482e-05, - "loss": 0.4628, - "step": 6962 - }, - { - "epoch": 0.24440583372821567, - "grad_norm": 0.45618709921836853, - "learning_rate": 4.266296296296296e-05, - "loss": 0.5305, - "step": 6963 - }, - { - "epoch": 0.24444093437933273, - "grad_norm": 0.5348595976829529, - "learning_rate": 4.266111111111111e-05, - "loss": 0.4877, - "step": 6964 - }, - { - "epoch": 0.24447603503044982, - "grad_norm": 0.5348373651504517, - "learning_rate": 4.265925925925926e-05, - "loss": 0.5239, - "step": 6965 - }, - { - "epoch": 0.2445111356815669, - "grad_norm": 0.4484930634498596, - "learning_rate": 4.2657407407407406e-05, - "loss": 0.488, - "step": 6966 - }, - { - "epoch": 0.24454623633268396, - "grad_norm": 0.44501793384552, - "learning_rate": 4.2655555555555556e-05, - "loss": 0.4888, - "step": 6967 - }, - { - "epoch": 0.24458133698380105, - "grad_norm": 0.5121344327926636, - "learning_rate": 4.2653703703703706e-05, - "loss": 0.5273, - "step": 6968 - }, - { - "epoch": 0.24461643763491814, - "grad_norm": 0.49874916672706604, - "learning_rate": 4.2651851851851857e-05, - "loss": 0.5687, - "step": 6969 - }, - { - "epoch": 0.2446515382860352, - "grad_norm": 0.4484309256076813, - "learning_rate": 4.265e-05, - "loss": 0.5826, - "step": 6970 - }, - { - "epoch": 0.24468663893715228, - "grad_norm": 0.5018080472946167, - "learning_rate": 4.264814814814815e-05, - "loss": 0.5944, - "step": 6971 - }, - { - "epoch": 0.24472173958826937, - "grad_norm": 0.49855837225914, - "learning_rate": 4.26462962962963e-05, - "loss": 0.5789, - "step": 6972 - }, - { - "epoch": 0.24475684023938643, - "grad_norm": 0.5241482853889465, - "learning_rate": 4.264444444444445e-05, - "loss": 0.576, - "step": 6973 - }, - { - "epoch": 0.24479194089050352, - "grad_norm": 0.5238657593727112, - "learning_rate": 4.2642592592592594e-05, - "loss": 0.5573, - "step": 6974 - }, - { - "epoch": 0.2448270415416206, - "grad_norm": 0.5198936462402344, - "learning_rate": 4.2640740740740744e-05, - "loss": 0.5361, - "step": 6975 - }, - { - "epoch": 0.24486214219273766, - "grad_norm": 0.5167099237442017, - "learning_rate": 4.263888888888889e-05, - "loss": 0.5365, - "step": 6976 - }, - { - "epoch": 0.24489724284385475, - "grad_norm": 0.46279826760292053, - "learning_rate": 4.263703703703704e-05, - "loss": 0.4267, - "step": 6977 - }, - { - "epoch": 0.24493234349497184, - "grad_norm": 0.43143513798713684, - "learning_rate": 4.263518518518519e-05, - "loss": 0.4469, - "step": 6978 - }, - { - "epoch": 0.2449674441460889, - "grad_norm": 0.42553937435150146, - "learning_rate": 4.263333333333334e-05, - "loss": 0.5238, - "step": 6979 - }, - { - "epoch": 0.24500254479720598, - "grad_norm": 0.42662864923477173, - "learning_rate": 4.263148148148148e-05, - "loss": 0.5576, - "step": 6980 - }, - { - "epoch": 0.24503764544832307, - "grad_norm": 0.5468123555183411, - "learning_rate": 4.262962962962963e-05, - "loss": 0.3577, - "step": 6981 - }, - { - "epoch": 0.24507274609944013, - "grad_norm": 0.5001240968704224, - "learning_rate": 4.262777777777778e-05, - "loss": 0.5097, - "step": 6982 - }, - { - "epoch": 0.24510784675055722, - "grad_norm": 0.45074984431266785, - "learning_rate": 4.262592592592593e-05, - "loss": 0.4498, - "step": 6983 - }, - { - "epoch": 0.2451429474016743, - "grad_norm": 0.45820125937461853, - "learning_rate": 4.2624074074074074e-05, - "loss": 0.5091, - "step": 6984 - }, - { - "epoch": 0.2451780480527914, - "grad_norm": 0.42933180928230286, - "learning_rate": 4.2622222222222224e-05, - "loss": 0.496, - "step": 6985 - }, - { - "epoch": 0.24521314870390845, - "grad_norm": 0.5488976240158081, - "learning_rate": 4.2620370370370375e-05, - "loss": 0.6557, - "step": 6986 - }, - { - "epoch": 0.24524824935502554, - "grad_norm": 0.40260982513427734, - "learning_rate": 4.261851851851852e-05, - "loss": 0.392, - "step": 6987 - }, - { - "epoch": 0.24528335000614263, - "grad_norm": 0.523179829120636, - "learning_rate": 4.261666666666667e-05, - "loss": 0.591, - "step": 6988 - }, - { - "epoch": 0.24531845065725968, - "grad_norm": 0.5950659513473511, - "learning_rate": 4.261481481481482e-05, - "loss": 0.543, - "step": 6989 - }, - { - "epoch": 0.24535355130837677, - "grad_norm": 0.4581586718559265, - "learning_rate": 4.261296296296297e-05, - "loss": 0.5436, - "step": 6990 - }, - { - "epoch": 0.24538865195949386, - "grad_norm": 0.4107372462749481, - "learning_rate": 4.261111111111111e-05, - "loss": 0.5762, - "step": 6991 - }, - { - "epoch": 0.24542375261061092, - "grad_norm": 0.5515720844268799, - "learning_rate": 4.260925925925926e-05, - "loss": 0.6058, - "step": 6992 - }, - { - "epoch": 0.245458853261728, - "grad_norm": 0.451411634683609, - "learning_rate": 4.2607407407407405e-05, - "loss": 0.4552, - "step": 6993 - }, - { - "epoch": 0.2454939539128451, - "grad_norm": 0.4619219899177551, - "learning_rate": 4.260555555555556e-05, - "loss": 0.4779, - "step": 6994 - }, - { - "epoch": 0.24552905456396215, - "grad_norm": 0.47544780373573303, - "learning_rate": 4.2603703703703705e-05, - "loss": 0.4688, - "step": 6995 - }, - { - "epoch": 0.24556415521507924, - "grad_norm": 0.5455889105796814, - "learning_rate": 4.2601851851851855e-05, - "loss": 0.4723, - "step": 6996 - }, - { - "epoch": 0.24559925586619633, - "grad_norm": 0.458645761013031, - "learning_rate": 4.26e-05, - "loss": 0.4535, - "step": 6997 - }, - { - "epoch": 0.24563435651731338, - "grad_norm": 0.49079012870788574, - "learning_rate": 4.259814814814815e-05, - "loss": 0.5908, - "step": 6998 - }, - { - "epoch": 0.24566945716843047, - "grad_norm": 0.5490921139717102, - "learning_rate": 4.25962962962963e-05, - "loss": 0.4916, - "step": 6999 - }, - { - "epoch": 0.24570455781954756, - "grad_norm": 0.4475092589855194, - "learning_rate": 4.259444444444445e-05, - "loss": 0.5122, - "step": 7000 - }, - { - "epoch": 0.24573965847066462, - "grad_norm": 0.5068225860595703, - "learning_rate": 4.259259259259259e-05, - "loss": 0.4884, - "step": 7001 - }, - { - "epoch": 0.2457747591217817, - "grad_norm": 0.5613632202148438, - "learning_rate": 4.259074074074074e-05, - "loss": 0.5797, - "step": 7002 - }, - { - "epoch": 0.2458098597728988, - "grad_norm": 0.48224636912345886, - "learning_rate": 4.258888888888889e-05, - "loss": 0.4656, - "step": 7003 - }, - { - "epoch": 0.24584496042401588, - "grad_norm": 0.4878477156162262, - "learning_rate": 4.2587037037037036e-05, - "loss": 0.5443, - "step": 7004 - }, - { - "epoch": 0.24588006107513294, - "grad_norm": 0.45970049500465393, - "learning_rate": 4.2585185185185186e-05, - "loss": 0.5729, - "step": 7005 - }, - { - "epoch": 0.24591516172625003, - "grad_norm": 0.46748507022857666, - "learning_rate": 4.2583333333333336e-05, - "loss": 0.4124, - "step": 7006 - }, - { - "epoch": 0.2459502623773671, - "grad_norm": 0.48001840710639954, - "learning_rate": 4.2581481481481486e-05, - "loss": 0.5765, - "step": 7007 - }, - { - "epoch": 0.24598536302848417, - "grad_norm": 0.47823601961135864, - "learning_rate": 4.257962962962963e-05, - "loss": 0.5016, - "step": 7008 - }, - { - "epoch": 0.24602046367960126, - "grad_norm": 0.5867689251899719, - "learning_rate": 4.257777777777778e-05, - "loss": 0.5132, - "step": 7009 - }, - { - "epoch": 0.24605556433071835, - "grad_norm": 0.5977613925933838, - "learning_rate": 4.257592592592593e-05, - "loss": 0.517, - "step": 7010 - }, - { - "epoch": 0.2460906649818354, - "grad_norm": 0.5405681133270264, - "learning_rate": 4.257407407407408e-05, - "loss": 0.566, - "step": 7011 - }, - { - "epoch": 0.2461257656329525, - "grad_norm": 0.48706933856010437, - "learning_rate": 4.257222222222222e-05, - "loss": 0.5387, - "step": 7012 - }, - { - "epoch": 0.24616086628406958, - "grad_norm": 0.5258790254592896, - "learning_rate": 4.257037037037037e-05, - "loss": 0.5358, - "step": 7013 - }, - { - "epoch": 0.24619596693518664, - "grad_norm": 0.49113452434539795, - "learning_rate": 4.256851851851852e-05, - "loss": 0.4823, - "step": 7014 - }, - { - "epoch": 0.24623106758630373, - "grad_norm": 0.4506652355194092, - "learning_rate": 4.2566666666666674e-05, - "loss": 0.4511, - "step": 7015 - }, - { - "epoch": 0.2462661682374208, - "grad_norm": 0.5110761523246765, - "learning_rate": 4.256481481481482e-05, - "loss": 0.4381, - "step": 7016 - }, - { - "epoch": 0.24630126888853787, - "grad_norm": 0.47345781326293945, - "learning_rate": 4.256296296296297e-05, - "loss": 0.5417, - "step": 7017 - }, - { - "epoch": 0.24633636953965496, - "grad_norm": 0.45470237731933594, - "learning_rate": 4.256111111111111e-05, - "loss": 0.4883, - "step": 7018 - }, - { - "epoch": 0.24637147019077205, - "grad_norm": 0.6039714813232422, - "learning_rate": 4.255925925925926e-05, - "loss": 0.5842, - "step": 7019 - }, - { - "epoch": 0.2464065708418891, - "grad_norm": 0.5056969523429871, - "learning_rate": 4.2557407407407404e-05, - "loss": 0.4385, - "step": 7020 - }, - { - "epoch": 0.2464416714930062, - "grad_norm": 0.5261315107345581, - "learning_rate": 4.255555555555556e-05, - "loss": 0.4875, - "step": 7021 - }, - { - "epoch": 0.24647677214412328, - "grad_norm": 0.5112189054489136, - "learning_rate": 4.2553703703703704e-05, - "loss": 0.4859, - "step": 7022 - }, - { - "epoch": 0.24651187279524034, - "grad_norm": 0.49181243777275085, - "learning_rate": 4.2551851851851854e-05, - "loss": 0.4686, - "step": 7023 - }, - { - "epoch": 0.24654697344635743, - "grad_norm": 0.4199931025505066, - "learning_rate": 4.2550000000000004e-05, - "loss": 0.4753, - "step": 7024 - }, - { - "epoch": 0.2465820740974745, - "grad_norm": 0.43883615732192993, - "learning_rate": 4.254814814814815e-05, - "loss": 0.4208, - "step": 7025 - }, - { - "epoch": 0.2466171747485916, - "grad_norm": 0.4909782111644745, - "learning_rate": 4.25462962962963e-05, - "loss": 0.5374, - "step": 7026 - }, - { - "epoch": 0.24665227539970866, - "grad_norm": 0.42134609818458557, - "learning_rate": 4.254444444444445e-05, - "loss": 0.3836, - "step": 7027 - }, - { - "epoch": 0.24668737605082575, - "grad_norm": 0.43803849816322327, - "learning_rate": 4.25425925925926e-05, - "loss": 0.4897, - "step": 7028 - }, - { - "epoch": 0.24672247670194283, - "grad_norm": 0.43950870633125305, - "learning_rate": 4.254074074074074e-05, - "loss": 0.5325, - "step": 7029 - }, - { - "epoch": 0.2467575773530599, - "grad_norm": 0.45738285779953003, - "learning_rate": 4.253888888888889e-05, - "loss": 0.4679, - "step": 7030 - }, - { - "epoch": 0.24679267800417698, - "grad_norm": 0.49183133244514465, - "learning_rate": 4.2537037037037035e-05, - "loss": 0.5015, - "step": 7031 - }, - { - "epoch": 0.24682777865529407, - "grad_norm": 0.564083993434906, - "learning_rate": 4.253518518518519e-05, - "loss": 0.6433, - "step": 7032 - }, - { - "epoch": 0.24686287930641113, - "grad_norm": 0.4594324827194214, - "learning_rate": 4.2533333333333335e-05, - "loss": 0.5006, - "step": 7033 - }, - { - "epoch": 0.2468979799575282, - "grad_norm": 0.4871464967727661, - "learning_rate": 4.2531481481481485e-05, - "loss": 0.5334, - "step": 7034 - }, - { - "epoch": 0.2469330806086453, - "grad_norm": 0.4858265519142151, - "learning_rate": 4.252962962962963e-05, - "loss": 0.5849, - "step": 7035 - }, - { - "epoch": 0.24696818125976236, - "grad_norm": 0.40745511651039124, - "learning_rate": 4.252777777777778e-05, - "loss": 0.4368, - "step": 7036 - }, - { - "epoch": 0.24700328191087945, - "grad_norm": 0.4941307306289673, - "learning_rate": 4.252592592592593e-05, - "loss": 0.4849, - "step": 7037 - }, - { - "epoch": 0.24703838256199653, - "grad_norm": 0.4153818190097809, - "learning_rate": 4.252407407407408e-05, - "loss": 0.4623, - "step": 7038 - }, - { - "epoch": 0.2470734832131136, - "grad_norm": 0.4647553265094757, - "learning_rate": 4.252222222222222e-05, - "loss": 0.4802, - "step": 7039 - }, - { - "epoch": 0.24710858386423068, - "grad_norm": 0.45652320981025696, - "learning_rate": 4.252037037037037e-05, - "loss": 0.5362, - "step": 7040 - }, - { - "epoch": 0.24714368451534777, - "grad_norm": 0.517852246761322, - "learning_rate": 4.2518518518518515e-05, - "loss": 0.4955, - "step": 7041 - }, - { - "epoch": 0.24717878516646483, - "grad_norm": 0.4662238359451294, - "learning_rate": 4.251666666666667e-05, - "loss": 0.5028, - "step": 7042 - }, - { - "epoch": 0.2472138858175819, - "grad_norm": 0.5233339667320251, - "learning_rate": 4.2514814814814816e-05, - "loss": 0.5064, - "step": 7043 - }, - { - "epoch": 0.247248986468699, - "grad_norm": 0.4797881245613098, - "learning_rate": 4.2512962962962966e-05, - "loss": 0.5585, - "step": 7044 - }, - { - "epoch": 0.24728408711981606, - "grad_norm": 0.43118569254875183, - "learning_rate": 4.2511111111111116e-05, - "loss": 0.6005, - "step": 7045 - }, - { - "epoch": 0.24731918777093315, - "grad_norm": 0.550628662109375, - "learning_rate": 4.250925925925926e-05, - "loss": 0.5447, - "step": 7046 - }, - { - "epoch": 0.24735428842205023, - "grad_norm": 0.4486560821533203, - "learning_rate": 4.250740740740741e-05, - "loss": 0.401, - "step": 7047 - }, - { - "epoch": 0.24738938907316732, - "grad_norm": 0.5651117563247681, - "learning_rate": 4.250555555555556e-05, - "loss": 0.5831, - "step": 7048 - }, - { - "epoch": 0.24742448972428438, - "grad_norm": 0.4114733636379242, - "learning_rate": 4.250370370370371e-05, - "loss": 0.4119, - "step": 7049 - }, - { - "epoch": 0.24745959037540147, - "grad_norm": 0.507252037525177, - "learning_rate": 4.250185185185185e-05, - "loss": 0.5378, - "step": 7050 - }, - { - "epoch": 0.24749469102651855, - "grad_norm": 0.4670450687408447, - "learning_rate": 4.25e-05, - "loss": 0.5102, - "step": 7051 - }, - { - "epoch": 0.2475297916776356, - "grad_norm": 0.4902265667915344, - "learning_rate": 4.2498148148148146e-05, - "loss": 0.5089, - "step": 7052 - }, - { - "epoch": 0.2475648923287527, - "grad_norm": 0.45112982392311096, - "learning_rate": 4.24962962962963e-05, - "loss": 0.4686, - "step": 7053 - }, - { - "epoch": 0.2475999929798698, - "grad_norm": 0.5185917019844055, - "learning_rate": 4.2494444444444447e-05, - "loss": 0.4842, - "step": 7054 - }, - { - "epoch": 0.24763509363098685, - "grad_norm": 0.4477941393852234, - "learning_rate": 4.24925925925926e-05, - "loss": 0.4355, - "step": 7055 - }, - { - "epoch": 0.24767019428210393, - "grad_norm": 0.4774925112724304, - "learning_rate": 4.249074074074074e-05, - "loss": 0.4616, - "step": 7056 - }, - { - "epoch": 0.24770529493322102, - "grad_norm": 0.6041865944862366, - "learning_rate": 4.248888888888889e-05, - "loss": 0.5001, - "step": 7057 - }, - { - "epoch": 0.24774039558433808, - "grad_norm": 0.553979754447937, - "learning_rate": 4.2487037037037034e-05, - "loss": 0.4935, - "step": 7058 - }, - { - "epoch": 0.24777549623545517, - "grad_norm": 0.4230549931526184, - "learning_rate": 4.248518518518519e-05, - "loss": 0.5296, - "step": 7059 - }, - { - "epoch": 0.24781059688657225, - "grad_norm": 0.49311187863349915, - "learning_rate": 4.2483333333333334e-05, - "loss": 0.5015, - "step": 7060 - }, - { - "epoch": 0.2478456975376893, - "grad_norm": 0.49659547209739685, - "learning_rate": 4.2481481481481484e-05, - "loss": 0.4248, - "step": 7061 - }, - { - "epoch": 0.2478807981888064, - "grad_norm": 0.4562700688838959, - "learning_rate": 4.247962962962963e-05, - "loss": 0.5414, - "step": 7062 - }, - { - "epoch": 0.2479158988399235, - "grad_norm": 0.508107602596283, - "learning_rate": 4.247777777777778e-05, - "loss": 0.5515, - "step": 7063 - }, - { - "epoch": 0.24795099949104055, - "grad_norm": 0.5510320663452148, - "learning_rate": 4.247592592592593e-05, - "loss": 0.4767, - "step": 7064 - }, - { - "epoch": 0.24798610014215763, - "grad_norm": 0.47603747248649597, - "learning_rate": 4.247407407407408e-05, - "loss": 0.5175, - "step": 7065 - }, - { - "epoch": 0.24802120079327472, - "grad_norm": 0.501089334487915, - "learning_rate": 4.247222222222223e-05, - "loss": 0.532, - "step": 7066 - }, - { - "epoch": 0.24805630144439178, - "grad_norm": 0.5812703967094421, - "learning_rate": 4.247037037037037e-05, - "loss": 0.4472, - "step": 7067 - }, - { - "epoch": 0.24809140209550887, - "grad_norm": 0.6095289587974548, - "learning_rate": 4.246851851851852e-05, - "loss": 0.5531, - "step": 7068 - }, - { - "epoch": 0.24812650274662595, - "grad_norm": 0.566001832485199, - "learning_rate": 4.246666666666667e-05, - "loss": 0.5247, - "step": 7069 - }, - { - "epoch": 0.24816160339774304, - "grad_norm": 0.5864576697349548, - "learning_rate": 4.246481481481482e-05, - "loss": 0.6146, - "step": 7070 - }, - { - "epoch": 0.2481967040488601, - "grad_norm": 0.5482074618339539, - "learning_rate": 4.2462962962962965e-05, - "loss": 0.5093, - "step": 7071 - }, - { - "epoch": 0.2482318046999772, - "grad_norm": 0.4466283321380615, - "learning_rate": 4.2461111111111115e-05, - "loss": 0.3914, - "step": 7072 - }, - { - "epoch": 0.24826690535109427, - "grad_norm": 0.5148766040802002, - "learning_rate": 4.245925925925926e-05, - "loss": 0.6296, - "step": 7073 - }, - { - "epoch": 0.24830200600221133, - "grad_norm": 0.4346380829811096, - "learning_rate": 4.245740740740741e-05, - "loss": 0.3514, - "step": 7074 - }, - { - "epoch": 0.24833710665332842, - "grad_norm": 0.4417968690395355, - "learning_rate": 4.245555555555556e-05, - "loss": 0.4543, - "step": 7075 - }, - { - "epoch": 0.2483722073044455, - "grad_norm": 0.4102010428905487, - "learning_rate": 4.245370370370371e-05, - "loss": 0.4522, - "step": 7076 - }, - { - "epoch": 0.24840730795556257, - "grad_norm": 0.5148254632949829, - "learning_rate": 4.245185185185185e-05, - "loss": 0.5967, - "step": 7077 - }, - { - "epoch": 0.24844240860667965, - "grad_norm": 0.45159924030303955, - "learning_rate": 4.245e-05, - "loss": 0.5124, - "step": 7078 - }, - { - "epoch": 0.24847750925779674, - "grad_norm": 0.5885364413261414, - "learning_rate": 4.2448148148148145e-05, - "loss": 0.445, - "step": 7079 - }, - { - "epoch": 0.2485126099089138, - "grad_norm": 0.4593806862831116, - "learning_rate": 4.24462962962963e-05, - "loss": 0.5288, - "step": 7080 - }, - { - "epoch": 0.2485477105600309, - "grad_norm": 0.5034538507461548, - "learning_rate": 4.2444444444444445e-05, - "loss": 0.4754, - "step": 7081 - }, - { - "epoch": 0.24858281121114797, - "grad_norm": 0.43144387006759644, - "learning_rate": 4.2442592592592596e-05, - "loss": 0.4444, - "step": 7082 - }, - { - "epoch": 0.24861791186226503, - "grad_norm": 0.4506106376647949, - "learning_rate": 4.244074074074074e-05, - "loss": 0.4667, - "step": 7083 - }, - { - "epoch": 0.24865301251338212, - "grad_norm": 0.5632027983665466, - "learning_rate": 4.243888888888889e-05, - "loss": 0.5748, - "step": 7084 - }, - { - "epoch": 0.2486881131644992, - "grad_norm": 0.567257285118103, - "learning_rate": 4.243703703703704e-05, - "loss": 0.4976, - "step": 7085 - }, - { - "epoch": 0.24872321381561627, - "grad_norm": 0.5478390455245972, - "learning_rate": 4.243518518518519e-05, - "loss": 0.4991, - "step": 7086 - }, - { - "epoch": 0.24875831446673335, - "grad_norm": 0.5787428021430969, - "learning_rate": 4.243333333333334e-05, - "loss": 0.4389, - "step": 7087 - }, - { - "epoch": 0.24879341511785044, - "grad_norm": 0.505879819393158, - "learning_rate": 4.243148148148148e-05, - "loss": 0.5124, - "step": 7088 - }, - { - "epoch": 0.2488285157689675, - "grad_norm": 0.5444765090942383, - "learning_rate": 4.242962962962963e-05, - "loss": 0.3921, - "step": 7089 - }, - { - "epoch": 0.2488636164200846, - "grad_norm": 0.5358332395553589, - "learning_rate": 4.2427777777777776e-05, - "loss": 0.603, - "step": 7090 - }, - { - "epoch": 0.24889871707120167, - "grad_norm": 0.4800393879413605, - "learning_rate": 4.242592592592593e-05, - "loss": 0.5265, - "step": 7091 - }, - { - "epoch": 0.24893381772231876, - "grad_norm": 0.47645288705825806, - "learning_rate": 4.2424074074074076e-05, - "loss": 0.5214, - "step": 7092 - }, - { - "epoch": 0.24896891837343582, - "grad_norm": 0.557019054889679, - "learning_rate": 4.2422222222222226e-05, - "loss": 0.5599, - "step": 7093 - }, - { - "epoch": 0.2490040190245529, - "grad_norm": 0.4789045751094818, - "learning_rate": 4.242037037037037e-05, - "loss": 0.4068, - "step": 7094 - }, - { - "epoch": 0.24903911967567, - "grad_norm": 0.48255497217178345, - "learning_rate": 4.241851851851852e-05, - "loss": 0.4665, - "step": 7095 - }, - { - "epoch": 0.24907422032678705, - "grad_norm": 0.5077030062675476, - "learning_rate": 4.241666666666667e-05, - "loss": 0.4287, - "step": 7096 - }, - { - "epoch": 0.24910932097790414, - "grad_norm": 0.4806293249130249, - "learning_rate": 4.241481481481482e-05, - "loss": 0.4511, - "step": 7097 - }, - { - "epoch": 0.24914442162902123, - "grad_norm": 0.4229167401790619, - "learning_rate": 4.2412962962962963e-05, - "loss": 0.6002, - "step": 7098 - }, - { - "epoch": 0.2491795222801383, - "grad_norm": 0.4636472165584564, - "learning_rate": 4.2411111111111114e-05, - "loss": 0.5576, - "step": 7099 - }, - { - "epoch": 0.24921462293125538, - "grad_norm": 0.5567567944526672, - "learning_rate": 4.240925925925926e-05, - "loss": 0.481, - "step": 7100 - }, - { - "epoch": 0.24924972358237246, - "grad_norm": 0.4978519082069397, - "learning_rate": 4.240740740740741e-05, - "loss": 0.5479, - "step": 7101 - }, - { - "epoch": 0.24928482423348952, - "grad_norm": 0.4666975736618042, - "learning_rate": 4.240555555555556e-05, - "loss": 0.6045, - "step": 7102 - }, - { - "epoch": 0.2493199248846066, - "grad_norm": 0.4563223123550415, - "learning_rate": 4.240370370370371e-05, - "loss": 0.5314, - "step": 7103 - }, - { - "epoch": 0.2493550255357237, - "grad_norm": 0.438554584980011, - "learning_rate": 4.240185185185185e-05, - "loss": 0.538, - "step": 7104 - }, - { - "epoch": 0.24939012618684075, - "grad_norm": 0.8616973757743835, - "learning_rate": 4.24e-05, - "loss": 0.5372, - "step": 7105 - }, - { - "epoch": 0.24942522683795784, - "grad_norm": 0.5321096777915955, - "learning_rate": 4.239814814814815e-05, - "loss": 0.5338, - "step": 7106 - }, - { - "epoch": 0.24946032748907493, - "grad_norm": 0.4426969885826111, - "learning_rate": 4.23962962962963e-05, - "loss": 0.543, - "step": 7107 - }, - { - "epoch": 0.249495428140192, - "grad_norm": 0.5106090307235718, - "learning_rate": 4.239444444444445e-05, - "loss": 0.6711, - "step": 7108 - }, - { - "epoch": 0.24953052879130908, - "grad_norm": 0.46464648842811584, - "learning_rate": 4.2392592592592594e-05, - "loss": 0.5391, - "step": 7109 - }, - { - "epoch": 0.24956562944242616, - "grad_norm": 0.5068814754486084, - "learning_rate": 4.2390740740740744e-05, - "loss": 0.6058, - "step": 7110 - }, - { - "epoch": 0.24960073009354322, - "grad_norm": 0.42722052335739136, - "learning_rate": 4.238888888888889e-05, - "loss": 0.4632, - "step": 7111 - }, - { - "epoch": 0.2496358307446603, - "grad_norm": 0.5194745659828186, - "learning_rate": 4.2387037037037045e-05, - "loss": 0.5376, - "step": 7112 - }, - { - "epoch": 0.2496709313957774, - "grad_norm": 0.4482170641422272, - "learning_rate": 4.238518518518519e-05, - "loss": 0.5376, - "step": 7113 - }, - { - "epoch": 0.24970603204689448, - "grad_norm": 0.5873549580574036, - "learning_rate": 4.238333333333334e-05, - "loss": 0.515, - "step": 7114 - }, - { - "epoch": 0.24974113269801154, - "grad_norm": 0.9540478587150574, - "learning_rate": 4.238148148148148e-05, - "loss": 0.5489, - "step": 7115 - }, - { - "epoch": 0.24977623334912863, - "grad_norm": 0.4486808180809021, - "learning_rate": 4.237962962962963e-05, - "loss": 0.5527, - "step": 7116 - }, - { - "epoch": 0.24981133400024572, - "grad_norm": 0.4738709330558777, - "learning_rate": 4.2377777777777775e-05, - "loss": 0.4845, - "step": 7117 - }, - { - "epoch": 0.24984643465136278, - "grad_norm": 0.5459502339363098, - "learning_rate": 4.237592592592593e-05, - "loss": 0.6284, - "step": 7118 - }, - { - "epoch": 0.24988153530247986, - "grad_norm": 0.49728554487228394, - "learning_rate": 4.2374074074074075e-05, - "loss": 0.5814, - "step": 7119 - }, - { - "epoch": 0.24991663595359695, - "grad_norm": 0.536780595779419, - "learning_rate": 4.2372222222222225e-05, - "loss": 0.5544, - "step": 7120 - }, - { - "epoch": 0.249951736604714, - "grad_norm": 0.5337573289871216, - "learning_rate": 4.237037037037037e-05, - "loss": 0.509, - "step": 7121 - }, - { - "epoch": 0.2499868372558311, - "grad_norm": 0.5701779127120972, - "learning_rate": 4.236851851851852e-05, - "loss": 0.4915, - "step": 7122 - }, - { - "epoch": 0.25002193790694816, - "grad_norm": 0.4941242039203644, - "learning_rate": 4.236666666666667e-05, - "loss": 0.4817, - "step": 7123 - }, - { - "epoch": 0.25005703855806527, - "grad_norm": 0.49657541513442993, - "learning_rate": 4.236481481481482e-05, - "loss": 0.5368, - "step": 7124 - }, - { - "epoch": 0.25009213920918233, - "grad_norm": 0.4352029263973236, - "learning_rate": 4.236296296296296e-05, - "loss": 0.4846, - "step": 7125 - }, - { - "epoch": 0.2501272398602994, - "grad_norm": 0.5268885493278503, - "learning_rate": 4.236111111111111e-05, - "loss": 0.5812, - "step": 7126 - }, - { - "epoch": 0.2501623405114165, - "grad_norm": 0.45606619119644165, - "learning_rate": 4.235925925925926e-05, - "loss": 0.4731, - "step": 7127 - }, - { - "epoch": 0.25019744116253356, - "grad_norm": 0.41942670941352844, - "learning_rate": 4.2357407407407406e-05, - "loss": 0.4807, - "step": 7128 - }, - { - "epoch": 0.2502325418136506, - "grad_norm": 0.4141677916049957, - "learning_rate": 4.235555555555556e-05, - "loss": 0.3767, - "step": 7129 - }, - { - "epoch": 0.25026764246476774, - "grad_norm": 0.5422846078872681, - "learning_rate": 4.2353703703703706e-05, - "loss": 0.3926, - "step": 7130 - }, - { - "epoch": 0.2503027431158848, - "grad_norm": 0.5508262515068054, - "learning_rate": 4.2351851851851856e-05, - "loss": 0.5655, - "step": 7131 - }, - { - "epoch": 0.25033784376700186, - "grad_norm": 0.4559263288974762, - "learning_rate": 4.235e-05, - "loss": 0.526, - "step": 7132 - }, - { - "epoch": 0.25037294441811897, - "grad_norm": 0.4793725609779358, - "learning_rate": 4.234814814814815e-05, - "loss": 0.5419, - "step": 7133 - }, - { - "epoch": 0.25040804506923603, - "grad_norm": 0.49002400040626526, - "learning_rate": 4.23462962962963e-05, - "loss": 0.4994, - "step": 7134 - }, - { - "epoch": 0.2504431457203531, - "grad_norm": 0.46071499586105347, - "learning_rate": 4.234444444444445e-05, - "loss": 0.4359, - "step": 7135 - }, - { - "epoch": 0.2504782463714702, - "grad_norm": 0.520607054233551, - "learning_rate": 4.234259259259259e-05, - "loss": 0.4376, - "step": 7136 - }, - { - "epoch": 0.25051334702258726, - "grad_norm": 0.4493090808391571, - "learning_rate": 4.234074074074074e-05, - "loss": 0.5021, - "step": 7137 - }, - { - "epoch": 0.2505484476737043, - "grad_norm": 0.48063984513282776, - "learning_rate": 4.2338888888888887e-05, - "loss": 0.5306, - "step": 7138 - }, - { - "epoch": 0.25058354832482144, - "grad_norm": 0.4940265715122223, - "learning_rate": 4.2337037037037043e-05, - "loss": 0.472, - "step": 7139 - }, - { - "epoch": 0.2506186489759385, - "grad_norm": 0.5240289568901062, - "learning_rate": 4.233518518518519e-05, - "loss": 0.5482, - "step": 7140 - }, - { - "epoch": 0.25065374962705556, - "grad_norm": 0.5813514590263367, - "learning_rate": 4.233333333333334e-05, - "loss": 0.554, - "step": 7141 - }, - { - "epoch": 0.25068885027817267, - "grad_norm": 0.4468008875846863, - "learning_rate": 4.233148148148148e-05, - "loss": 0.4944, - "step": 7142 - }, - { - "epoch": 0.25072395092928973, - "grad_norm": 0.43575358390808105, - "learning_rate": 4.232962962962963e-05, - "loss": 0.4125, - "step": 7143 - }, - { - "epoch": 0.25075905158040684, - "grad_norm": 0.5256523489952087, - "learning_rate": 4.232777777777778e-05, - "loss": 0.5499, - "step": 7144 - }, - { - "epoch": 0.2507941522315239, - "grad_norm": 0.4577883780002594, - "learning_rate": 4.232592592592593e-05, - "loss": 0.4412, - "step": 7145 - }, - { - "epoch": 0.25082925288264096, - "grad_norm": 0.4863947033882141, - "learning_rate": 4.2324074074074074e-05, - "loss": 0.5468, - "step": 7146 - }, - { - "epoch": 0.2508643535337581, - "grad_norm": 0.511332631111145, - "learning_rate": 4.2322222222222224e-05, - "loss": 0.5163, - "step": 7147 - }, - { - "epoch": 0.25089945418487514, - "grad_norm": 0.5263325572013855, - "learning_rate": 4.2320370370370374e-05, - "loss": 0.6151, - "step": 7148 - }, - { - "epoch": 0.2509345548359922, - "grad_norm": 0.4978317320346832, - "learning_rate": 4.231851851851852e-05, - "loss": 0.5876, - "step": 7149 - }, - { - "epoch": 0.2509696554871093, - "grad_norm": 0.5254675149917603, - "learning_rate": 4.2316666666666674e-05, - "loss": 0.4583, - "step": 7150 - }, - { - "epoch": 0.25100475613822637, - "grad_norm": 0.5110726952552795, - "learning_rate": 4.231481481481482e-05, - "loss": 0.5279, - "step": 7151 - }, - { - "epoch": 0.25103985678934343, - "grad_norm": 0.5115559101104736, - "learning_rate": 4.231296296296297e-05, - "loss": 0.5373, - "step": 7152 - }, - { - "epoch": 0.25107495744046054, - "grad_norm": 0.48705095052719116, - "learning_rate": 4.231111111111111e-05, - "loss": 0.5626, - "step": 7153 - }, - { - "epoch": 0.2511100580915776, - "grad_norm": 0.46055570244789124, - "learning_rate": 4.230925925925926e-05, - "loss": 0.488, - "step": 7154 - }, - { - "epoch": 0.25114515874269466, - "grad_norm": 0.48841431736946106, - "learning_rate": 4.2307407407407405e-05, - "loss": 0.4853, - "step": 7155 - }, - { - "epoch": 0.2511802593938118, - "grad_norm": 0.46210959553718567, - "learning_rate": 4.230555555555556e-05, - "loss": 0.5343, - "step": 7156 - }, - { - "epoch": 0.25121536004492884, - "grad_norm": 0.4730744957923889, - "learning_rate": 4.2303703703703705e-05, - "loss": 0.5263, - "step": 7157 - }, - { - "epoch": 0.2512504606960459, - "grad_norm": 0.5047268867492676, - "learning_rate": 4.2301851851851855e-05, - "loss": 0.5864, - "step": 7158 - }, - { - "epoch": 0.251285561347163, - "grad_norm": 0.5080798268318176, - "learning_rate": 4.23e-05, - "loss": 0.6052, - "step": 7159 - }, - { - "epoch": 0.25132066199828007, - "grad_norm": 0.5213130712509155, - "learning_rate": 4.229814814814815e-05, - "loss": 0.4248, - "step": 7160 - }, - { - "epoch": 0.25135576264939713, - "grad_norm": 0.45383206009864807, - "learning_rate": 4.22962962962963e-05, - "loss": 0.588, - "step": 7161 - }, - { - "epoch": 0.25139086330051424, - "grad_norm": 0.5103148221969604, - "learning_rate": 4.229444444444445e-05, - "loss": 0.4779, - "step": 7162 - }, - { - "epoch": 0.2514259639516313, - "grad_norm": 0.42050713300704956, - "learning_rate": 4.229259259259259e-05, - "loss": 0.476, - "step": 7163 - }, - { - "epoch": 0.25146106460274836, - "grad_norm": 0.4576275646686554, - "learning_rate": 4.229074074074074e-05, - "loss": 0.4818, - "step": 7164 - }, - { - "epoch": 0.2514961652538655, - "grad_norm": 0.43666312098503113, - "learning_rate": 4.228888888888889e-05, - "loss": 0.4578, - "step": 7165 - }, - { - "epoch": 0.25153126590498254, - "grad_norm": 0.4320516884326935, - "learning_rate": 4.228703703703704e-05, - "loss": 0.4957, - "step": 7166 - }, - { - "epoch": 0.2515663665560996, - "grad_norm": 0.502587616443634, - "learning_rate": 4.2285185185185186e-05, - "loss": 0.4689, - "step": 7167 - }, - { - "epoch": 0.2516014672072167, - "grad_norm": 0.4766458570957184, - "learning_rate": 4.2283333333333336e-05, - "loss": 0.4655, - "step": 7168 - }, - { - "epoch": 0.25163656785833377, - "grad_norm": 0.4580024778842926, - "learning_rate": 4.2281481481481486e-05, - "loss": 0.4987, - "step": 7169 - }, - { - "epoch": 0.25167166850945083, - "grad_norm": 0.43670234084129333, - "learning_rate": 4.227962962962963e-05, - "loss": 0.4498, - "step": 7170 - }, - { - "epoch": 0.25170676916056794, - "grad_norm": 0.5042842030525208, - "learning_rate": 4.227777777777778e-05, - "loss": 0.4906, - "step": 7171 - }, - { - "epoch": 0.251741869811685, - "grad_norm": 0.44522666931152344, - "learning_rate": 4.227592592592593e-05, - "loss": 0.4227, - "step": 7172 - }, - { - "epoch": 0.25177697046280206, - "grad_norm": 0.5216597318649292, - "learning_rate": 4.227407407407408e-05, - "loss": 0.5298, - "step": 7173 - }, - { - "epoch": 0.2518120711139192, - "grad_norm": 0.5246523022651672, - "learning_rate": 4.227222222222222e-05, - "loss": 0.4895, - "step": 7174 - }, - { - "epoch": 0.25184717176503624, - "grad_norm": 0.5134465098381042, - "learning_rate": 4.227037037037037e-05, - "loss": 0.4537, - "step": 7175 - }, - { - "epoch": 0.2518822724161533, - "grad_norm": 0.4799898862838745, - "learning_rate": 4.2268518518518516e-05, - "loss": 0.4993, - "step": 7176 - }, - { - "epoch": 0.2519173730672704, - "grad_norm": 0.45143231749534607, - "learning_rate": 4.226666666666667e-05, - "loss": 0.573, - "step": 7177 - }, - { - "epoch": 0.25195247371838747, - "grad_norm": 0.5017532706260681, - "learning_rate": 4.2264814814814816e-05, - "loss": 0.5668, - "step": 7178 - }, - { - "epoch": 0.25198757436950453, - "grad_norm": 0.41822001338005066, - "learning_rate": 4.2262962962962967e-05, - "loss": 0.4791, - "step": 7179 - }, - { - "epoch": 0.25202267502062164, - "grad_norm": 0.4662967622280121, - "learning_rate": 4.226111111111111e-05, - "loss": 0.4226, - "step": 7180 - }, - { - "epoch": 0.2520577756717387, - "grad_norm": 0.5435676574707031, - "learning_rate": 4.225925925925926e-05, - "loss": 0.474, - "step": 7181 - }, - { - "epoch": 0.25209287632285576, - "grad_norm": 0.39938491582870483, - "learning_rate": 4.22574074074074e-05, - "loss": 0.5329, - "step": 7182 - }, - { - "epoch": 0.2521279769739729, - "grad_norm": 0.6076977252960205, - "learning_rate": 4.225555555555556e-05, - "loss": 0.4312, - "step": 7183 - }, - { - "epoch": 0.25216307762508994, - "grad_norm": 0.4398164451122284, - "learning_rate": 4.2253703703703704e-05, - "loss": 0.5322, - "step": 7184 - }, - { - "epoch": 0.252198178276207, - "grad_norm": 0.48989683389663696, - "learning_rate": 4.2251851851851854e-05, - "loss": 0.4934, - "step": 7185 - }, - { - "epoch": 0.2522332789273241, - "grad_norm": 0.47717469930648804, - "learning_rate": 4.2250000000000004e-05, - "loss": 0.4946, - "step": 7186 - }, - { - "epoch": 0.25226837957844117, - "grad_norm": 0.5001689791679382, - "learning_rate": 4.224814814814815e-05, - "loss": 0.5198, - "step": 7187 - }, - { - "epoch": 0.2523034802295583, - "grad_norm": 0.48980453610420227, - "learning_rate": 4.22462962962963e-05, - "loss": 0.5292, - "step": 7188 - }, - { - "epoch": 0.25233858088067534, - "grad_norm": 0.47572481632232666, - "learning_rate": 4.224444444444445e-05, - "loss": 0.4653, - "step": 7189 - }, - { - "epoch": 0.2523736815317924, - "grad_norm": 0.49630239605903625, - "learning_rate": 4.22425925925926e-05, - "loss": 0.5691, - "step": 7190 - }, - { - "epoch": 0.2524087821829095, - "grad_norm": 0.4800170361995697, - "learning_rate": 4.224074074074074e-05, - "loss": 0.4682, - "step": 7191 - }, - { - "epoch": 0.2524438828340266, - "grad_norm": 0.4358861446380615, - "learning_rate": 4.223888888888889e-05, - "loss": 0.4377, - "step": 7192 - }, - { - "epoch": 0.25247898348514364, - "grad_norm": 0.5190211534500122, - "learning_rate": 4.223703703703704e-05, - "loss": 0.6117, - "step": 7193 - }, - { - "epoch": 0.25251408413626075, - "grad_norm": 0.4476037323474884, - "learning_rate": 4.223518518518519e-05, - "loss": 0.5219, - "step": 7194 - }, - { - "epoch": 0.2525491847873778, - "grad_norm": 0.4465997517108917, - "learning_rate": 4.2233333333333334e-05, - "loss": 0.3733, - "step": 7195 - }, - { - "epoch": 0.25258428543849487, - "grad_norm": 0.4388718008995056, - "learning_rate": 4.2231481481481485e-05, - "loss": 0.4166, - "step": 7196 - }, - { - "epoch": 0.252619386089612, - "grad_norm": 0.57804936170578, - "learning_rate": 4.222962962962963e-05, - "loss": 0.5231, - "step": 7197 - }, - { - "epoch": 0.25265448674072905, - "grad_norm": 0.45001232624053955, - "learning_rate": 4.222777777777778e-05, - "loss": 0.4764, - "step": 7198 - }, - { - "epoch": 0.2526895873918461, - "grad_norm": 0.5067341327667236, - "learning_rate": 4.222592592592593e-05, - "loss": 0.5161, - "step": 7199 - }, - { - "epoch": 0.2527246880429632, - "grad_norm": 0.40584176778793335, - "learning_rate": 4.222407407407408e-05, - "loss": 0.4418, - "step": 7200 - }, - { - "epoch": 0.2527597886940803, - "grad_norm": 0.5361566543579102, - "learning_rate": 4.222222222222222e-05, - "loss": 0.5211, - "step": 7201 - }, - { - "epoch": 0.25279488934519734, - "grad_norm": 0.46383240818977356, - "learning_rate": 4.222037037037037e-05, - "loss": 0.5291, - "step": 7202 - }, - { - "epoch": 0.25282998999631445, - "grad_norm": 0.4625844955444336, - "learning_rate": 4.2218518518518515e-05, - "loss": 0.5117, - "step": 7203 - }, - { - "epoch": 0.2528650906474315, - "grad_norm": 0.45587238669395447, - "learning_rate": 4.221666666666667e-05, - "loss": 0.5028, - "step": 7204 - }, - { - "epoch": 0.25290019129854857, - "grad_norm": 0.4219668507575989, - "learning_rate": 4.2214814814814815e-05, - "loss": 0.4705, - "step": 7205 - }, - { - "epoch": 0.2529352919496657, - "grad_norm": 0.4796769917011261, - "learning_rate": 4.2212962962962965e-05, - "loss": 0.5812, - "step": 7206 - }, - { - "epoch": 0.25297039260078275, - "grad_norm": 0.4766983687877655, - "learning_rate": 4.2211111111111115e-05, - "loss": 0.5079, - "step": 7207 - }, - { - "epoch": 0.2530054932518998, - "grad_norm": 0.5090658068656921, - "learning_rate": 4.220925925925926e-05, - "loss": 0.5236, - "step": 7208 - }, - { - "epoch": 0.2530405939030169, - "grad_norm": 0.4738699197769165, - "learning_rate": 4.220740740740741e-05, - "loss": 0.5559, - "step": 7209 - }, - { - "epoch": 0.253075694554134, - "grad_norm": 0.5639762878417969, - "learning_rate": 4.220555555555556e-05, - "loss": 0.5529, - "step": 7210 - }, - { - "epoch": 0.25311079520525104, - "grad_norm": 0.5094590783119202, - "learning_rate": 4.220370370370371e-05, - "loss": 0.5605, - "step": 7211 - }, - { - "epoch": 0.25314589585636815, - "grad_norm": 0.5372223854064941, - "learning_rate": 4.220185185185185e-05, - "loss": 0.4524, - "step": 7212 - }, - { - "epoch": 0.2531809965074852, - "grad_norm": 0.5531845688819885, - "learning_rate": 4.22e-05, - "loss": 0.567, - "step": 7213 - }, - { - "epoch": 0.25321609715860227, - "grad_norm": 0.5442966818809509, - "learning_rate": 4.2198148148148146e-05, - "loss": 0.5504, - "step": 7214 - }, - { - "epoch": 0.2532511978097194, - "grad_norm": 0.3845403790473938, - "learning_rate": 4.21962962962963e-05, - "loss": 0.3751, - "step": 7215 - }, - { - "epoch": 0.25328629846083645, - "grad_norm": 0.44003257155418396, - "learning_rate": 4.2194444444444446e-05, - "loss": 0.5486, - "step": 7216 - }, - { - "epoch": 0.2533213991119535, - "grad_norm": 0.5102463364601135, - "learning_rate": 4.2192592592592596e-05, - "loss": 0.488, - "step": 7217 - }, - { - "epoch": 0.2533564997630706, - "grad_norm": 0.5022891759872437, - "learning_rate": 4.219074074074074e-05, - "loss": 0.45, - "step": 7218 - }, - { - "epoch": 0.2533916004141877, - "grad_norm": 0.48071688413619995, - "learning_rate": 4.218888888888889e-05, - "loss": 0.5168, - "step": 7219 - }, - { - "epoch": 0.25342670106530474, - "grad_norm": 0.4366489350795746, - "learning_rate": 4.218703703703704e-05, - "loss": 0.4759, - "step": 7220 - }, - { - "epoch": 0.25346180171642185, - "grad_norm": 0.4416934549808502, - "learning_rate": 4.218518518518519e-05, - "loss": 0.5115, - "step": 7221 - }, - { - "epoch": 0.2534969023675389, - "grad_norm": 0.40962207317352295, - "learning_rate": 4.218333333333333e-05, - "loss": 0.4771, - "step": 7222 - }, - { - "epoch": 0.25353200301865597, - "grad_norm": 0.6604583263397217, - "learning_rate": 4.218148148148148e-05, - "loss": 0.5177, - "step": 7223 - }, - { - "epoch": 0.2535671036697731, - "grad_norm": 0.5265796184539795, - "learning_rate": 4.2179629629629633e-05, - "loss": 0.6032, - "step": 7224 - }, - { - "epoch": 0.25360220432089015, - "grad_norm": 0.533423662185669, - "learning_rate": 4.217777777777778e-05, - "loss": 0.5933, - "step": 7225 - }, - { - "epoch": 0.2536373049720072, - "grad_norm": 0.4441365599632263, - "learning_rate": 4.217592592592593e-05, - "loss": 0.4022, - "step": 7226 - }, - { - "epoch": 0.2536724056231243, - "grad_norm": 0.4233340322971344, - "learning_rate": 4.217407407407408e-05, - "loss": 0.469, - "step": 7227 - }, - { - "epoch": 0.2537075062742414, - "grad_norm": 0.5024195313453674, - "learning_rate": 4.217222222222223e-05, - "loss": 0.6096, - "step": 7228 - }, - { - "epoch": 0.25374260692535844, - "grad_norm": 0.4642227590084076, - "learning_rate": 4.217037037037037e-05, - "loss": 0.3964, - "step": 7229 - }, - { - "epoch": 0.25377770757647555, - "grad_norm": 0.4870080053806305, - "learning_rate": 4.216851851851852e-05, - "loss": 0.5531, - "step": 7230 - }, - { - "epoch": 0.2538128082275926, - "grad_norm": 0.4196164608001709, - "learning_rate": 4.216666666666667e-05, - "loss": 0.3396, - "step": 7231 - }, - { - "epoch": 0.2538479088787097, - "grad_norm": 0.5160650610923767, - "learning_rate": 4.216481481481482e-05, - "loss": 0.4285, - "step": 7232 - }, - { - "epoch": 0.2538830095298268, - "grad_norm": 0.4538552761077881, - "learning_rate": 4.2162962962962964e-05, - "loss": 0.5547, - "step": 7233 - }, - { - "epoch": 0.25391811018094385, - "grad_norm": 0.6733474731445312, - "learning_rate": 4.2161111111111114e-05, - "loss": 0.5301, - "step": 7234 - }, - { - "epoch": 0.25395321083206096, - "grad_norm": 0.5429283380508423, - "learning_rate": 4.215925925925926e-05, - "loss": 0.5425, - "step": 7235 - }, - { - "epoch": 0.253988311483178, - "grad_norm": 0.47486427426338196, - "learning_rate": 4.2157407407407414e-05, - "loss": 0.5278, - "step": 7236 - }, - { - "epoch": 0.2540234121342951, - "grad_norm": 0.5084711313247681, - "learning_rate": 4.215555555555556e-05, - "loss": 0.4166, - "step": 7237 - }, - { - "epoch": 0.2540585127854122, - "grad_norm": 0.48397254943847656, - "learning_rate": 4.215370370370371e-05, - "loss": 0.5237, - "step": 7238 - }, - { - "epoch": 0.25409361343652925, - "grad_norm": 0.48963841795921326, - "learning_rate": 4.215185185185185e-05, - "loss": 0.6085, - "step": 7239 - }, - { - "epoch": 0.2541287140876463, - "grad_norm": 0.7051585912704468, - "learning_rate": 4.215e-05, - "loss": 0.4263, - "step": 7240 - }, - { - "epoch": 0.2541638147387634, - "grad_norm": 0.4862191379070282, - "learning_rate": 4.2148148148148145e-05, - "loss": 0.5446, - "step": 7241 - }, - { - "epoch": 0.2541989153898805, - "grad_norm": 0.4738067090511322, - "learning_rate": 4.21462962962963e-05, - "loss": 0.4664, - "step": 7242 - }, - { - "epoch": 0.25423401604099755, - "grad_norm": 0.5185078382492065, - "learning_rate": 4.2144444444444445e-05, - "loss": 0.5559, - "step": 7243 - }, - { - "epoch": 0.25426911669211466, - "grad_norm": 0.6003031730651855, - "learning_rate": 4.2142592592592595e-05, - "loss": 0.3717, - "step": 7244 - }, - { - "epoch": 0.2543042173432317, - "grad_norm": 0.4816856384277344, - "learning_rate": 4.2140740740740745e-05, - "loss": 0.4824, - "step": 7245 - }, - { - "epoch": 0.2543393179943488, - "grad_norm": 0.9062126278877258, - "learning_rate": 4.213888888888889e-05, - "loss": 0.4912, - "step": 7246 - }, - { - "epoch": 0.2543744186454659, - "grad_norm": 0.4298720359802246, - "learning_rate": 4.213703703703704e-05, - "loss": 0.4805, - "step": 7247 - }, - { - "epoch": 0.25440951929658295, - "grad_norm": 0.5077428221702576, - "learning_rate": 4.213518518518519e-05, - "loss": 0.5053, - "step": 7248 - }, - { - "epoch": 0.2544446199477, - "grad_norm": 0.5380575060844421, - "learning_rate": 4.213333333333334e-05, - "loss": 0.5219, - "step": 7249 - }, - { - "epoch": 0.2544797205988171, - "grad_norm": 0.4728327989578247, - "learning_rate": 4.213148148148148e-05, - "loss": 0.3982, - "step": 7250 - }, - { - "epoch": 0.2545148212499342, - "grad_norm": 0.4390929937362671, - "learning_rate": 4.212962962962963e-05, - "loss": 0.4386, - "step": 7251 - }, - { - "epoch": 0.25454992190105125, - "grad_norm": 0.46288222074508667, - "learning_rate": 4.2127777777777776e-05, - "loss": 0.5108, - "step": 7252 - }, - { - "epoch": 0.25458502255216836, - "grad_norm": 0.6035040020942688, - "learning_rate": 4.212592592592593e-05, - "loss": 0.6011, - "step": 7253 - }, - { - "epoch": 0.2546201232032854, - "grad_norm": 0.4847758114337921, - "learning_rate": 4.2124074074074076e-05, - "loss": 0.5426, - "step": 7254 - }, - { - "epoch": 0.2546552238544025, - "grad_norm": 0.4641033411026001, - "learning_rate": 4.2122222222222226e-05, - "loss": 0.5961, - "step": 7255 - }, - { - "epoch": 0.2546903245055196, - "grad_norm": 0.514581561088562, - "learning_rate": 4.212037037037037e-05, - "loss": 0.5758, - "step": 7256 - }, - { - "epoch": 0.25472542515663665, - "grad_norm": 0.5201311111450195, - "learning_rate": 4.211851851851852e-05, - "loss": 0.6129, - "step": 7257 - }, - { - "epoch": 0.2547605258077537, - "grad_norm": 0.46459102630615234, - "learning_rate": 4.211666666666667e-05, - "loss": 0.5669, - "step": 7258 - }, - { - "epoch": 0.2547956264588708, - "grad_norm": 0.4249490201473236, - "learning_rate": 4.211481481481482e-05, - "loss": 0.5483, - "step": 7259 - }, - { - "epoch": 0.2548307271099879, - "grad_norm": 0.4946768879890442, - "learning_rate": 4.211296296296296e-05, - "loss": 0.4299, - "step": 7260 - }, - { - "epoch": 0.25486582776110495, - "grad_norm": 0.49172860383987427, - "learning_rate": 4.211111111111111e-05, - "loss": 0.4661, - "step": 7261 - }, - { - "epoch": 0.25490092841222206, - "grad_norm": 0.4564841687679291, - "learning_rate": 4.2109259259259256e-05, - "loss": 0.4446, - "step": 7262 - }, - { - "epoch": 0.2549360290633391, - "grad_norm": 0.5092611908912659, - "learning_rate": 4.210740740740741e-05, - "loss": 0.4735, - "step": 7263 - }, - { - "epoch": 0.2549711297144562, - "grad_norm": 0.5169810056686401, - "learning_rate": 4.2105555555555557e-05, - "loss": 0.5904, - "step": 7264 - }, - { - "epoch": 0.2550062303655733, - "grad_norm": 0.42178523540496826, - "learning_rate": 4.210370370370371e-05, - "loss": 0.4698, - "step": 7265 - }, - { - "epoch": 0.25504133101669035, - "grad_norm": 0.4637424349784851, - "learning_rate": 4.210185185185186e-05, - "loss": 0.485, - "step": 7266 - }, - { - "epoch": 0.2550764316678074, - "grad_norm": 0.543464720249176, - "learning_rate": 4.21e-05, - "loss": 0.5563, - "step": 7267 - }, - { - "epoch": 0.2551115323189245, - "grad_norm": 0.40259990096092224, - "learning_rate": 4.209814814814815e-05, - "loss": 0.489, - "step": 7268 - }, - { - "epoch": 0.2551466329700416, - "grad_norm": 0.4474457800388336, - "learning_rate": 4.20962962962963e-05, - "loss": 0.2839, - "step": 7269 - }, - { - "epoch": 0.25518173362115865, - "grad_norm": 0.5028210878372192, - "learning_rate": 4.209444444444445e-05, - "loss": 0.505, - "step": 7270 - }, - { - "epoch": 0.25521683427227576, - "grad_norm": 0.4161173105239868, - "learning_rate": 4.2092592592592594e-05, - "loss": 0.4339, - "step": 7271 - }, - { - "epoch": 0.2552519349233928, - "grad_norm": 0.5106870532035828, - "learning_rate": 4.2090740740740744e-05, - "loss": 0.6297, - "step": 7272 - }, - { - "epoch": 0.2552870355745099, - "grad_norm": 0.47720563411712646, - "learning_rate": 4.208888888888889e-05, - "loss": 0.4829, - "step": 7273 - }, - { - "epoch": 0.255322136225627, - "grad_norm": 0.5239089727401733, - "learning_rate": 4.2087037037037044e-05, - "loss": 0.4783, - "step": 7274 - }, - { - "epoch": 0.25535723687674405, - "grad_norm": 0.5472978949546814, - "learning_rate": 4.208518518518519e-05, - "loss": 0.4784, - "step": 7275 - }, - { - "epoch": 0.25539233752786117, - "grad_norm": 0.4205436408519745, - "learning_rate": 4.208333333333334e-05, - "loss": 0.5027, - "step": 7276 - }, - { - "epoch": 0.25542743817897823, - "grad_norm": 0.528825044631958, - "learning_rate": 4.208148148148148e-05, - "loss": 0.5626, - "step": 7277 - }, - { - "epoch": 0.2554625388300953, - "grad_norm": 0.5219619870185852, - "learning_rate": 4.207962962962963e-05, - "loss": 0.564, - "step": 7278 - }, - { - "epoch": 0.2554976394812124, - "grad_norm": 0.42431530356407166, - "learning_rate": 4.2077777777777774e-05, - "loss": 0.4346, - "step": 7279 - }, - { - "epoch": 0.25553274013232946, - "grad_norm": 0.43185552954673767, - "learning_rate": 4.207592592592593e-05, - "loss": 0.5639, - "step": 7280 - }, - { - "epoch": 0.2555678407834465, - "grad_norm": 0.4521058201789856, - "learning_rate": 4.2074074074074075e-05, - "loss": 0.5108, - "step": 7281 - }, - { - "epoch": 0.25560294143456364, - "grad_norm": 0.5315353870391846, - "learning_rate": 4.2072222222222225e-05, - "loss": 0.5516, - "step": 7282 - }, - { - "epoch": 0.2556380420856807, - "grad_norm": 0.4709279537200928, - "learning_rate": 4.207037037037037e-05, - "loss": 0.4871, - "step": 7283 - }, - { - "epoch": 0.25567314273679775, - "grad_norm": 0.48129013180732727, - "learning_rate": 4.206851851851852e-05, - "loss": 0.4663, - "step": 7284 - }, - { - "epoch": 0.25570824338791487, - "grad_norm": 0.49552449584007263, - "learning_rate": 4.206666666666667e-05, - "loss": 0.5727, - "step": 7285 - }, - { - "epoch": 0.25574334403903193, - "grad_norm": 0.47669580578804016, - "learning_rate": 4.206481481481482e-05, - "loss": 0.445, - "step": 7286 - }, - { - "epoch": 0.255778444690149, - "grad_norm": 0.579109787940979, - "learning_rate": 4.206296296296297e-05, - "loss": 0.4949, - "step": 7287 - }, - { - "epoch": 0.2558135453412661, - "grad_norm": 0.4716552793979645, - "learning_rate": 4.206111111111111e-05, - "loss": 0.4505, - "step": 7288 - }, - { - "epoch": 0.25584864599238316, - "grad_norm": 0.47613728046417236, - "learning_rate": 4.205925925925926e-05, - "loss": 0.518, - "step": 7289 - }, - { - "epoch": 0.2558837466435002, - "grad_norm": 0.5168890357017517, - "learning_rate": 4.205740740740741e-05, - "loss": 0.4996, - "step": 7290 - }, - { - "epoch": 0.25591884729461734, - "grad_norm": 0.5114465355873108, - "learning_rate": 4.205555555555556e-05, - "loss": 0.5481, - "step": 7291 - }, - { - "epoch": 0.2559539479457344, - "grad_norm": 0.4430375099182129, - "learning_rate": 4.2053703703703705e-05, - "loss": 0.5068, - "step": 7292 - }, - { - "epoch": 0.25598904859685145, - "grad_norm": 0.956813395023346, - "learning_rate": 4.2051851851851856e-05, - "loss": 0.5733, - "step": 7293 - }, - { - "epoch": 0.25602414924796857, - "grad_norm": 0.3920297622680664, - "learning_rate": 4.205e-05, - "loss": 0.4193, - "step": 7294 - }, - { - "epoch": 0.25605924989908563, - "grad_norm": 0.516620397567749, - "learning_rate": 4.204814814814815e-05, - "loss": 0.526, - "step": 7295 - }, - { - "epoch": 0.2560943505502027, - "grad_norm": 0.586981475353241, - "learning_rate": 4.20462962962963e-05, - "loss": 0.4867, - "step": 7296 - }, - { - "epoch": 0.2561294512013198, - "grad_norm": 0.5686987042427063, - "learning_rate": 4.204444444444445e-05, - "loss": 0.5763, - "step": 7297 - }, - { - "epoch": 0.25616455185243686, - "grad_norm": 0.4656369686126709, - "learning_rate": 4.204259259259259e-05, - "loss": 0.5822, - "step": 7298 - }, - { - "epoch": 0.2561996525035539, - "grad_norm": 0.5739111304283142, - "learning_rate": 4.204074074074074e-05, - "loss": 0.5092, - "step": 7299 - }, - { - "epoch": 0.25623475315467104, - "grad_norm": 0.6469743847846985, - "learning_rate": 4.2038888888888886e-05, - "loss": 0.5397, - "step": 7300 - }, - { - "epoch": 0.2562698538057881, - "grad_norm": 0.5646925568580627, - "learning_rate": 4.203703703703704e-05, - "loss": 0.4283, - "step": 7301 - }, - { - "epoch": 0.25630495445690515, - "grad_norm": 0.8488561511039734, - "learning_rate": 4.2035185185185186e-05, - "loss": 0.512, - "step": 7302 - }, - { - "epoch": 0.25634005510802227, - "grad_norm": 0.4638504683971405, - "learning_rate": 4.2033333333333336e-05, - "loss": 0.4445, - "step": 7303 - }, - { - "epoch": 0.25637515575913933, - "grad_norm": 0.5246667265892029, - "learning_rate": 4.203148148148148e-05, - "loss": 0.5108, - "step": 7304 - }, - { - "epoch": 0.2564102564102564, - "grad_norm": 0.6651013493537903, - "learning_rate": 4.202962962962963e-05, - "loss": 0.5786, - "step": 7305 - }, - { - "epoch": 0.2564453570613735, - "grad_norm": 0.4199935495853424, - "learning_rate": 4.202777777777778e-05, - "loss": 0.3929, - "step": 7306 - }, - { - "epoch": 0.25648045771249056, - "grad_norm": 0.518146276473999, - "learning_rate": 4.202592592592593e-05, - "loss": 0.5037, - "step": 7307 - }, - { - "epoch": 0.2565155583636076, - "grad_norm": 0.5022035241127014, - "learning_rate": 4.202407407407408e-05, - "loss": 0.558, - "step": 7308 - }, - { - "epoch": 0.25655065901472474, - "grad_norm": 0.9924136996269226, - "learning_rate": 4.2022222222222223e-05, - "loss": 0.5339, - "step": 7309 - }, - { - "epoch": 0.2565857596658418, - "grad_norm": 0.5183898210525513, - "learning_rate": 4.2020370370370374e-05, - "loss": 0.5301, - "step": 7310 - }, - { - "epoch": 0.25662086031695885, - "grad_norm": 0.5125548839569092, - "learning_rate": 4.201851851851852e-05, - "loss": 0.3973, - "step": 7311 - }, - { - "epoch": 0.25665596096807597, - "grad_norm": 0.4906295835971832, - "learning_rate": 4.2016666666666674e-05, - "loss": 0.472, - "step": 7312 - }, - { - "epoch": 0.25669106161919303, - "grad_norm": 0.6661359071731567, - "learning_rate": 4.201481481481482e-05, - "loss": 0.4876, - "step": 7313 - }, - { - "epoch": 0.2567261622703101, - "grad_norm": 0.454145222902298, - "learning_rate": 4.201296296296297e-05, - "loss": 0.4818, - "step": 7314 - }, - { - "epoch": 0.2567612629214272, - "grad_norm": 0.7717024683952332, - "learning_rate": 4.201111111111111e-05, - "loss": 0.5164, - "step": 7315 - }, - { - "epoch": 0.25679636357254426, - "grad_norm": 0.5126301050186157, - "learning_rate": 4.200925925925926e-05, - "loss": 0.5361, - "step": 7316 - }, - { - "epoch": 0.2568314642236614, - "grad_norm": 0.5945926904678345, - "learning_rate": 4.200740740740741e-05, - "loss": 0.5704, - "step": 7317 - }, - { - "epoch": 0.25686656487477844, - "grad_norm": 0.5659402012825012, - "learning_rate": 4.200555555555556e-05, - "loss": 0.5157, - "step": 7318 - }, - { - "epoch": 0.2569016655258955, - "grad_norm": 0.48224717378616333, - "learning_rate": 4.2003703703703704e-05, - "loss": 0.4471, - "step": 7319 - }, - { - "epoch": 0.2569367661770126, - "grad_norm": 0.6798071265220642, - "learning_rate": 4.2001851851851854e-05, - "loss": 0.5707, - "step": 7320 - }, - { - "epoch": 0.25697186682812967, - "grad_norm": 0.4597180485725403, - "learning_rate": 4.2e-05, - "loss": 0.5413, - "step": 7321 - }, - { - "epoch": 0.25700696747924673, - "grad_norm": 0.42877402901649475, - "learning_rate": 4.199814814814815e-05, - "loss": 0.4113, - "step": 7322 - }, - { - "epoch": 0.25704206813036384, - "grad_norm": 0.43367597460746765, - "learning_rate": 4.19962962962963e-05, - "loss": 0.4059, - "step": 7323 - }, - { - "epoch": 0.2570771687814809, - "grad_norm": 0.47548356652259827, - "learning_rate": 4.199444444444445e-05, - "loss": 0.5023, - "step": 7324 - }, - { - "epoch": 0.25711226943259796, - "grad_norm": 0.6271303296089172, - "learning_rate": 4.199259259259259e-05, - "loss": 0.4436, - "step": 7325 - }, - { - "epoch": 0.2571473700837151, - "grad_norm": 0.5693740844726562, - "learning_rate": 4.199074074074074e-05, - "loss": 0.5504, - "step": 7326 - }, - { - "epoch": 0.25718247073483214, - "grad_norm": 0.46606841683387756, - "learning_rate": 4.198888888888889e-05, - "loss": 0.5151, - "step": 7327 - }, - { - "epoch": 0.2572175713859492, - "grad_norm": 0.8157958984375, - "learning_rate": 4.198703703703704e-05, - "loss": 0.5476, - "step": 7328 - }, - { - "epoch": 0.2572526720370663, - "grad_norm": 0.5145971775054932, - "learning_rate": 4.198518518518519e-05, - "loss": 0.3976, - "step": 7329 - }, - { - "epoch": 0.25728777268818337, - "grad_norm": 0.5041426420211792, - "learning_rate": 4.1983333333333335e-05, - "loss": 0.5724, - "step": 7330 - }, - { - "epoch": 0.25732287333930043, - "grad_norm": 0.43705055117607117, - "learning_rate": 4.1981481481481485e-05, - "loss": 0.3699, - "step": 7331 - }, - { - "epoch": 0.25735797399041754, - "grad_norm": 0.5559743642807007, - "learning_rate": 4.197962962962963e-05, - "loss": 0.4752, - "step": 7332 - }, - { - "epoch": 0.2573930746415346, - "grad_norm": 0.5119414329528809, - "learning_rate": 4.1977777777777785e-05, - "loss": 0.5489, - "step": 7333 - }, - { - "epoch": 0.25742817529265166, - "grad_norm": 0.5100566744804382, - "learning_rate": 4.197592592592593e-05, - "loss": 0.578, - "step": 7334 - }, - { - "epoch": 0.2574632759437688, - "grad_norm": 0.5886195302009583, - "learning_rate": 4.197407407407408e-05, - "loss": 0.4613, - "step": 7335 - }, - { - "epoch": 0.25749837659488584, - "grad_norm": 0.501534640789032, - "learning_rate": 4.197222222222222e-05, - "loss": 0.5419, - "step": 7336 - }, - { - "epoch": 0.2575334772460029, - "grad_norm": 0.5310518145561218, - "learning_rate": 4.197037037037037e-05, - "loss": 0.5565, - "step": 7337 - }, - { - "epoch": 0.25756857789712, - "grad_norm": 0.4639138877391815, - "learning_rate": 4.1968518518518516e-05, - "loss": 0.6009, - "step": 7338 - }, - { - "epoch": 0.25760367854823707, - "grad_norm": 0.4928628206253052, - "learning_rate": 4.196666666666667e-05, - "loss": 0.4444, - "step": 7339 - }, - { - "epoch": 0.25763877919935413, - "grad_norm": 0.388913631439209, - "learning_rate": 4.1964814814814816e-05, - "loss": 0.495, - "step": 7340 - }, - { - "epoch": 0.25767387985047124, - "grad_norm": 0.5038385987281799, - "learning_rate": 4.1962962962962966e-05, - "loss": 0.4753, - "step": 7341 - }, - { - "epoch": 0.2577089805015883, - "grad_norm": 0.4853421747684479, - "learning_rate": 4.196111111111111e-05, - "loss": 0.4911, - "step": 7342 - }, - { - "epoch": 0.25774408115270536, - "grad_norm": 0.6169817447662354, - "learning_rate": 4.195925925925926e-05, - "loss": 0.5959, - "step": 7343 - }, - { - "epoch": 0.2577791818038225, - "grad_norm": 0.4559246301651001, - "learning_rate": 4.195740740740741e-05, - "loss": 0.5277, - "step": 7344 - }, - { - "epoch": 0.25781428245493954, - "grad_norm": 0.5165972709655762, - "learning_rate": 4.195555555555556e-05, - "loss": 0.5253, - "step": 7345 - }, - { - "epoch": 0.2578493831060566, - "grad_norm": 0.47321510314941406, - "learning_rate": 4.19537037037037e-05, - "loss": 0.4113, - "step": 7346 - }, - { - "epoch": 0.2578844837571737, - "grad_norm": 0.5026248693466187, - "learning_rate": 4.195185185185185e-05, - "loss": 0.4987, - "step": 7347 - }, - { - "epoch": 0.25791958440829077, - "grad_norm": 0.4907618761062622, - "learning_rate": 4.195e-05, - "loss": 0.5182, - "step": 7348 - }, - { - "epoch": 0.25795468505940783, - "grad_norm": 0.44677311182022095, - "learning_rate": 4.1948148148148147e-05, - "loss": 0.5858, - "step": 7349 - }, - { - "epoch": 0.25798978571052494, - "grad_norm": 0.46329206228256226, - "learning_rate": 4.1946296296296303e-05, - "loss": 0.4838, - "step": 7350 - }, - { - "epoch": 0.258024886361642, - "grad_norm": 0.40858545899391174, - "learning_rate": 4.194444444444445e-05, - "loss": 0.4998, - "step": 7351 - }, - { - "epoch": 0.25805998701275906, - "grad_norm": 0.5192550420761108, - "learning_rate": 4.19425925925926e-05, - "loss": 0.4704, - "step": 7352 - }, - { - "epoch": 0.2580950876638762, - "grad_norm": 0.44343090057373047, - "learning_rate": 4.194074074074074e-05, - "loss": 0.5256, - "step": 7353 - }, - { - "epoch": 0.25813018831499324, - "grad_norm": 0.39475345611572266, - "learning_rate": 4.193888888888889e-05, - "loss": 0.4682, - "step": 7354 - }, - { - "epoch": 0.2581652889661103, - "grad_norm": 0.5723071694374084, - "learning_rate": 4.193703703703704e-05, - "loss": 0.5168, - "step": 7355 - }, - { - "epoch": 0.2582003896172274, - "grad_norm": 0.5138171911239624, - "learning_rate": 4.193518518518519e-05, - "loss": 0.5904, - "step": 7356 - }, - { - "epoch": 0.25823549026834447, - "grad_norm": 0.6305220127105713, - "learning_rate": 4.1933333333333334e-05, - "loss": 0.5011, - "step": 7357 - }, - { - "epoch": 0.25827059091946153, - "grad_norm": 0.6279390454292297, - "learning_rate": 4.1931481481481484e-05, - "loss": 0.4228, - "step": 7358 - }, - { - "epoch": 0.25830569157057864, - "grad_norm": 0.4278504550457001, - "learning_rate": 4.192962962962963e-05, - "loss": 0.5569, - "step": 7359 - }, - { - "epoch": 0.2583407922216957, - "grad_norm": 0.5575003623962402, - "learning_rate": 4.1927777777777784e-05, - "loss": 0.5586, - "step": 7360 - }, - { - "epoch": 0.2583758928728128, - "grad_norm": 0.7130787372589111, - "learning_rate": 4.192592592592593e-05, - "loss": 0.4753, - "step": 7361 - }, - { - "epoch": 0.2584109935239299, - "grad_norm": 0.4733482897281647, - "learning_rate": 4.192407407407408e-05, - "loss": 0.5837, - "step": 7362 - }, - { - "epoch": 0.25844609417504694, - "grad_norm": 0.49107271432876587, - "learning_rate": 4.192222222222222e-05, - "loss": 0.4318, - "step": 7363 - }, - { - "epoch": 0.25848119482616405, - "grad_norm": 0.4167989492416382, - "learning_rate": 4.192037037037037e-05, - "loss": 0.4684, - "step": 7364 - }, - { - "epoch": 0.2585162954772811, - "grad_norm": 0.6195090413093567, - "learning_rate": 4.191851851851852e-05, - "loss": 0.4902, - "step": 7365 - }, - { - "epoch": 0.25855139612839817, - "grad_norm": 0.5235631465911865, - "learning_rate": 4.191666666666667e-05, - "loss": 0.5062, - "step": 7366 - }, - { - "epoch": 0.2585864967795153, - "grad_norm": 0.49367785453796387, - "learning_rate": 4.1914814814814815e-05, - "loss": 0.5284, - "step": 7367 - }, - { - "epoch": 0.25862159743063234, - "grad_norm": 0.6742584109306335, - "learning_rate": 4.1912962962962965e-05, - "loss": 0.5864, - "step": 7368 - }, - { - "epoch": 0.2586566980817494, - "grad_norm": 0.4951701760292053, - "learning_rate": 4.1911111111111115e-05, - "loss": 0.3496, - "step": 7369 - }, - { - "epoch": 0.2586917987328665, - "grad_norm": 0.36913901567459106, - "learning_rate": 4.190925925925926e-05, - "loss": 0.486, - "step": 7370 - }, - { - "epoch": 0.2587268993839836, - "grad_norm": 0.5145993828773499, - "learning_rate": 4.1907407407407415e-05, - "loss": 0.4359, - "step": 7371 - }, - { - "epoch": 0.25876200003510064, - "grad_norm": 0.45123085379600525, - "learning_rate": 4.190555555555556e-05, - "loss": 0.5126, - "step": 7372 - }, - { - "epoch": 0.25879710068621775, - "grad_norm": 0.5058767795562744, - "learning_rate": 4.190370370370371e-05, - "loss": 0.4724, - "step": 7373 - }, - { - "epoch": 0.2588322013373348, - "grad_norm": 0.46140068769454956, - "learning_rate": 4.190185185185185e-05, - "loss": 0.4602, - "step": 7374 - }, - { - "epoch": 0.25886730198845187, - "grad_norm": 0.4586861729621887, - "learning_rate": 4.19e-05, - "loss": 0.585, - "step": 7375 - }, - { - "epoch": 0.258902402639569, - "grad_norm": 0.5218576192855835, - "learning_rate": 4.1898148148148145e-05, - "loss": 0.5732, - "step": 7376 - }, - { - "epoch": 0.25893750329068604, - "grad_norm": 0.6651543974876404, - "learning_rate": 4.18962962962963e-05, - "loss": 0.4613, - "step": 7377 - }, - { - "epoch": 0.2589726039418031, - "grad_norm": 0.705409586429596, - "learning_rate": 4.1894444444444446e-05, - "loss": 0.532, - "step": 7378 - }, - { - "epoch": 0.2590077045929202, - "grad_norm": 0.45828378200531006, - "learning_rate": 4.1892592592592596e-05, - "loss": 0.4893, - "step": 7379 - }, - { - "epoch": 0.2590428052440373, - "grad_norm": 0.4971631169319153, - "learning_rate": 4.189074074074074e-05, - "loss": 0.4747, - "step": 7380 - }, - { - "epoch": 0.25907790589515434, - "grad_norm": 0.42262232303619385, - "learning_rate": 4.188888888888889e-05, - "loss": 0.4929, - "step": 7381 - }, - { - "epoch": 0.25911300654627145, - "grad_norm": 0.4683390259742737, - "learning_rate": 4.188703703703704e-05, - "loss": 0.4896, - "step": 7382 - }, - { - "epoch": 0.2591481071973885, - "grad_norm": 0.48211681842803955, - "learning_rate": 4.188518518518519e-05, - "loss": 0.574, - "step": 7383 - }, - { - "epoch": 0.25918320784850557, - "grad_norm": 0.5317733883857727, - "learning_rate": 4.188333333333333e-05, - "loss": 0.4465, - "step": 7384 - }, - { - "epoch": 0.2592183084996227, - "grad_norm": 0.4918562173843384, - "learning_rate": 4.188148148148148e-05, - "loss": 0.5943, - "step": 7385 - }, - { - "epoch": 0.25925340915073974, - "grad_norm": 0.5103895664215088, - "learning_rate": 4.187962962962963e-05, - "loss": 0.5387, - "step": 7386 - }, - { - "epoch": 0.2592885098018568, - "grad_norm": 0.48720884323120117, - "learning_rate": 4.187777777777778e-05, - "loss": 0.5211, - "step": 7387 - }, - { - "epoch": 0.2593236104529739, - "grad_norm": 0.5660043954849243, - "learning_rate": 4.1875925925925926e-05, - "loss": 0.561, - "step": 7388 - }, - { - "epoch": 0.259358711104091, - "grad_norm": 0.4560876786708832, - "learning_rate": 4.1874074074074076e-05, - "loss": 0.5123, - "step": 7389 - }, - { - "epoch": 0.25939381175520804, - "grad_norm": 0.4558614492416382, - "learning_rate": 4.1872222222222227e-05, - "loss": 0.5666, - "step": 7390 - }, - { - "epoch": 0.25942891240632515, - "grad_norm": 0.5053890943527222, - "learning_rate": 4.187037037037037e-05, - "loss": 0.5224, - "step": 7391 - }, - { - "epoch": 0.2594640130574422, - "grad_norm": 0.4502836763858795, - "learning_rate": 4.186851851851852e-05, - "loss": 0.4993, - "step": 7392 - }, - { - "epoch": 0.25949911370855927, - "grad_norm": 0.37803131341934204, - "learning_rate": 4.186666666666667e-05, - "loss": 0.504, - "step": 7393 - }, - { - "epoch": 0.2595342143596764, - "grad_norm": 0.48719513416290283, - "learning_rate": 4.186481481481482e-05, - "loss": 0.4071, - "step": 7394 - }, - { - "epoch": 0.25956931501079344, - "grad_norm": 0.388485848903656, - "learning_rate": 4.1862962962962964e-05, - "loss": 0.3941, - "step": 7395 - }, - { - "epoch": 0.2596044156619105, - "grad_norm": 0.46995797753334045, - "learning_rate": 4.1861111111111114e-05, - "loss": 0.5142, - "step": 7396 - }, - { - "epoch": 0.2596395163130276, - "grad_norm": 0.537367582321167, - "learning_rate": 4.185925925925926e-05, - "loss": 0.5481, - "step": 7397 - }, - { - "epoch": 0.2596746169641447, - "grad_norm": 0.48830485343933105, - "learning_rate": 4.1857407407407414e-05, - "loss": 0.5606, - "step": 7398 - }, - { - "epoch": 0.25970971761526174, - "grad_norm": 0.5678164958953857, - "learning_rate": 4.185555555555556e-05, - "loss": 0.4667, - "step": 7399 - }, - { - "epoch": 0.25974481826637885, - "grad_norm": 0.52555251121521, - "learning_rate": 4.185370370370371e-05, - "loss": 0.4488, - "step": 7400 - }, - { - "epoch": 0.2597799189174959, - "grad_norm": 0.5102935433387756, - "learning_rate": 4.185185185185185e-05, - "loss": 0.4903, - "step": 7401 - }, - { - "epoch": 0.25981501956861297, - "grad_norm": 0.4080307185649872, - "learning_rate": 4.185e-05, - "loss": 0.439, - "step": 7402 - }, - { - "epoch": 0.2598501202197301, - "grad_norm": 0.5001645088195801, - "learning_rate": 4.1848148148148144e-05, - "loss": 0.5589, - "step": 7403 - }, - { - "epoch": 0.25988522087084714, - "grad_norm": 0.4820529520511627, - "learning_rate": 4.18462962962963e-05, - "loss": 0.4757, - "step": 7404 - }, - { - "epoch": 0.25992032152196426, - "grad_norm": 0.5840334296226501, - "learning_rate": 4.1844444444444444e-05, - "loss": 0.361, - "step": 7405 - }, - { - "epoch": 0.2599554221730813, - "grad_norm": 0.49340465664863586, - "learning_rate": 4.1842592592592594e-05, - "loss": 0.5821, - "step": 7406 - }, - { - "epoch": 0.2599905228241984, - "grad_norm": 0.4284987151622772, - "learning_rate": 4.1840740740740745e-05, - "loss": 0.4596, - "step": 7407 - }, - { - "epoch": 0.2600256234753155, - "grad_norm": 0.44986820220947266, - "learning_rate": 4.183888888888889e-05, - "loss": 0.43, - "step": 7408 - }, - { - "epoch": 0.26006072412643255, - "grad_norm": 0.5190653204917908, - "learning_rate": 4.183703703703704e-05, - "loss": 0.5152, - "step": 7409 - }, - { - "epoch": 0.2600958247775496, - "grad_norm": 0.4689301550388336, - "learning_rate": 4.183518518518519e-05, - "loss": 0.4528, - "step": 7410 - }, - { - "epoch": 0.2601309254286667, - "grad_norm": 0.3964582681655884, - "learning_rate": 4.183333333333334e-05, - "loss": 0.3217, - "step": 7411 - }, - { - "epoch": 0.2601660260797838, - "grad_norm": 0.49346423149108887, - "learning_rate": 4.183148148148148e-05, - "loss": 0.4547, - "step": 7412 - }, - { - "epoch": 0.26020112673090084, - "grad_norm": 0.4195173382759094, - "learning_rate": 4.182962962962963e-05, - "loss": 0.3748, - "step": 7413 - }, - { - "epoch": 0.26023622738201796, - "grad_norm": 0.5301098823547363, - "learning_rate": 4.182777777777778e-05, - "loss": 0.4612, - "step": 7414 - }, - { - "epoch": 0.260271328033135, - "grad_norm": 0.5032715797424316, - "learning_rate": 4.182592592592593e-05, - "loss": 0.4476, - "step": 7415 - }, - { - "epoch": 0.2603064286842521, - "grad_norm": 0.5444456934928894, - "learning_rate": 4.1824074074074075e-05, - "loss": 0.4929, - "step": 7416 - }, - { - "epoch": 0.2603415293353692, - "grad_norm": 0.5169932246208191, - "learning_rate": 4.1822222222222225e-05, - "loss": 0.6203, - "step": 7417 - }, - { - "epoch": 0.26037662998648625, - "grad_norm": 0.4211421608924866, - "learning_rate": 4.182037037037037e-05, - "loss": 0.3511, - "step": 7418 - }, - { - "epoch": 0.2604117306376033, - "grad_norm": 0.4979994595050812, - "learning_rate": 4.181851851851852e-05, - "loss": 0.5311, - "step": 7419 - }, - { - "epoch": 0.2604468312887204, - "grad_norm": 0.5950201153755188, - "learning_rate": 4.181666666666667e-05, - "loss": 0.601, - "step": 7420 - }, - { - "epoch": 0.2604819319398375, - "grad_norm": 0.519418478012085, - "learning_rate": 4.181481481481482e-05, - "loss": 0.5467, - "step": 7421 - }, - { - "epoch": 0.26051703259095454, - "grad_norm": 0.42675551772117615, - "learning_rate": 4.181296296296296e-05, - "loss": 0.4435, - "step": 7422 - }, - { - "epoch": 0.26055213324207166, - "grad_norm": 0.5102945566177368, - "learning_rate": 4.181111111111111e-05, - "loss": 0.4803, - "step": 7423 - }, - { - "epoch": 0.2605872338931887, - "grad_norm": 0.5212748050689697, - "learning_rate": 4.1809259259259256e-05, - "loss": 0.4507, - "step": 7424 - }, - { - "epoch": 0.2606223345443058, - "grad_norm": 0.4872710704803467, - "learning_rate": 4.180740740740741e-05, - "loss": 0.5258, - "step": 7425 - }, - { - "epoch": 0.2606574351954229, - "grad_norm": 0.5434755086898804, - "learning_rate": 4.1805555555555556e-05, - "loss": 0.5751, - "step": 7426 - }, - { - "epoch": 0.26069253584653995, - "grad_norm": 0.4649171829223633, - "learning_rate": 4.1803703703703706e-05, - "loss": 0.5293, - "step": 7427 - }, - { - "epoch": 0.260727636497657, - "grad_norm": 0.4740045368671417, - "learning_rate": 4.1801851851851856e-05, - "loss": 0.4071, - "step": 7428 - }, - { - "epoch": 0.2607627371487741, - "grad_norm": 0.5470907092094421, - "learning_rate": 4.18e-05, - "loss": 0.6017, - "step": 7429 - }, - { - "epoch": 0.2607978377998912, - "grad_norm": 0.4416157007217407, - "learning_rate": 4.179814814814815e-05, - "loss": 0.5181, - "step": 7430 - }, - { - "epoch": 0.26083293845100824, - "grad_norm": 0.4866563379764557, - "learning_rate": 4.17962962962963e-05, - "loss": 0.4772, - "step": 7431 - }, - { - "epoch": 0.26086803910212536, - "grad_norm": 0.47569435834884644, - "learning_rate": 4.179444444444445e-05, - "loss": 0.497, - "step": 7432 - }, - { - "epoch": 0.2609031397532424, - "grad_norm": 0.4323291480541229, - "learning_rate": 4.179259259259259e-05, - "loss": 0.3876, - "step": 7433 - }, - { - "epoch": 0.2609382404043595, - "grad_norm": 0.5364957451820374, - "learning_rate": 4.179074074074074e-05, - "loss": 0.6301, - "step": 7434 - }, - { - "epoch": 0.2609733410554766, - "grad_norm": 0.5239258408546448, - "learning_rate": 4.178888888888889e-05, - "loss": 0.3719, - "step": 7435 - }, - { - "epoch": 0.26100844170659365, - "grad_norm": 0.5468759536743164, - "learning_rate": 4.1787037037037044e-05, - "loss": 0.5534, - "step": 7436 - }, - { - "epoch": 0.2610435423577107, - "grad_norm": 0.3995780646800995, - "learning_rate": 4.178518518518519e-05, - "loss": 0.3297, - "step": 7437 - }, - { - "epoch": 0.2610786430088278, - "grad_norm": 0.47892600297927856, - "learning_rate": 4.178333333333334e-05, - "loss": 0.5321, - "step": 7438 - }, - { - "epoch": 0.2611137436599449, - "grad_norm": 0.5011418461799622, - "learning_rate": 4.178148148148148e-05, - "loss": 0.5275, - "step": 7439 - }, - { - "epoch": 0.26114884431106195, - "grad_norm": 0.5101132392883301, - "learning_rate": 4.177962962962963e-05, - "loss": 0.6095, - "step": 7440 - }, - { - "epoch": 0.26118394496217906, - "grad_norm": 0.44898703694343567, - "learning_rate": 4.177777777777778e-05, - "loss": 0.5206, - "step": 7441 - }, - { - "epoch": 0.2612190456132961, - "grad_norm": 0.4473021328449249, - "learning_rate": 4.177592592592593e-05, - "loss": 0.5506, - "step": 7442 - }, - { - "epoch": 0.2612541462644132, - "grad_norm": 0.4472084641456604, - "learning_rate": 4.1774074074074074e-05, - "loss": 0.4517, - "step": 7443 - }, - { - "epoch": 0.2612892469155303, - "grad_norm": 0.449386864900589, - "learning_rate": 4.1772222222222224e-05, - "loss": 0.4364, - "step": 7444 - }, - { - "epoch": 0.26132434756664735, - "grad_norm": 0.4879491329193115, - "learning_rate": 4.177037037037037e-05, - "loss": 0.5201, - "step": 7445 - }, - { - "epoch": 0.2613594482177644, - "grad_norm": 0.49749302864074707, - "learning_rate": 4.176851851851852e-05, - "loss": 0.5055, - "step": 7446 - }, - { - "epoch": 0.2613945488688815, - "grad_norm": 0.45995163917541504, - "learning_rate": 4.176666666666667e-05, - "loss": 0.3992, - "step": 7447 - }, - { - "epoch": 0.2614296495199986, - "grad_norm": 0.46854981780052185, - "learning_rate": 4.176481481481482e-05, - "loss": 0.4986, - "step": 7448 - }, - { - "epoch": 0.2614647501711157, - "grad_norm": 0.4612634479999542, - "learning_rate": 4.176296296296297e-05, - "loss": 0.5087, - "step": 7449 - }, - { - "epoch": 0.26149985082223276, - "grad_norm": 0.49598753452301025, - "learning_rate": 4.176111111111111e-05, - "loss": 0.5472, - "step": 7450 - }, - { - "epoch": 0.2615349514733498, - "grad_norm": 0.43106546998023987, - "learning_rate": 4.175925925925926e-05, - "loss": 0.4071, - "step": 7451 - }, - { - "epoch": 0.26157005212446693, - "grad_norm": 0.47847557067871094, - "learning_rate": 4.175740740740741e-05, - "loss": 0.4951, - "step": 7452 - }, - { - "epoch": 0.261605152775584, - "grad_norm": 0.39987191557884216, - "learning_rate": 4.175555555555556e-05, - "loss": 0.4956, - "step": 7453 - }, - { - "epoch": 0.26164025342670105, - "grad_norm": 0.5235379338264465, - "learning_rate": 4.1753703703703705e-05, - "loss": 0.4251, - "step": 7454 - }, - { - "epoch": 0.26167535407781817, - "grad_norm": 0.46956416964530945, - "learning_rate": 4.1751851851851855e-05, - "loss": 0.4961, - "step": 7455 - }, - { - "epoch": 0.2617104547289352, - "grad_norm": 0.47476646304130554, - "learning_rate": 4.175e-05, - "loss": 0.5454, - "step": 7456 - }, - { - "epoch": 0.2617455553800523, - "grad_norm": 0.4880174696445465, - "learning_rate": 4.1748148148148155e-05, - "loss": 0.6133, - "step": 7457 - }, - { - "epoch": 0.2617806560311694, - "grad_norm": 0.49372997879981995, - "learning_rate": 4.17462962962963e-05, - "loss": 0.5702, - "step": 7458 - }, - { - "epoch": 0.26181575668228646, - "grad_norm": 0.6003292202949524, - "learning_rate": 4.174444444444445e-05, - "loss": 0.4468, - "step": 7459 - }, - { - "epoch": 0.2618508573334035, - "grad_norm": 0.48091575503349304, - "learning_rate": 4.174259259259259e-05, - "loss": 0.5446, - "step": 7460 - }, - { - "epoch": 0.26188595798452063, - "grad_norm": 0.5244951844215393, - "learning_rate": 4.174074074074074e-05, - "loss": 0.5743, - "step": 7461 - }, - { - "epoch": 0.2619210586356377, - "grad_norm": 0.5076891183853149, - "learning_rate": 4.1738888888888885e-05, - "loss": 0.5113, - "step": 7462 - }, - { - "epoch": 0.26195615928675475, - "grad_norm": 0.5431874394416809, - "learning_rate": 4.173703703703704e-05, - "loss": 0.494, - "step": 7463 - }, - { - "epoch": 0.26199125993787187, - "grad_norm": 0.46018287539482117, - "learning_rate": 4.1735185185185186e-05, - "loss": 0.5455, - "step": 7464 - }, - { - "epoch": 0.2620263605889889, - "grad_norm": 0.45314982533454895, - "learning_rate": 4.1733333333333336e-05, - "loss": 0.485, - "step": 7465 - }, - { - "epoch": 0.262061461240106, - "grad_norm": 0.4500822126865387, - "learning_rate": 4.173148148148148e-05, - "loss": 0.5951, - "step": 7466 - }, - { - "epoch": 0.2620965618912231, - "grad_norm": 0.5024887919425964, - "learning_rate": 4.172962962962963e-05, - "loss": 0.4714, - "step": 7467 - }, - { - "epoch": 0.26213166254234016, - "grad_norm": 0.4971187114715576, - "learning_rate": 4.172777777777778e-05, - "loss": 0.572, - "step": 7468 - }, - { - "epoch": 0.2621667631934572, - "grad_norm": 0.4980955719947815, - "learning_rate": 4.172592592592593e-05, - "loss": 0.5311, - "step": 7469 - }, - { - "epoch": 0.26220186384457433, - "grad_norm": 0.4564894735813141, - "learning_rate": 4.172407407407408e-05, - "loss": 0.4933, - "step": 7470 - }, - { - "epoch": 0.2622369644956914, - "grad_norm": 0.4290551543235779, - "learning_rate": 4.172222222222222e-05, - "loss": 0.4376, - "step": 7471 - }, - { - "epoch": 0.26227206514680845, - "grad_norm": 0.4168330729007721, - "learning_rate": 4.172037037037037e-05, - "loss": 0.4971, - "step": 7472 - }, - { - "epoch": 0.26230716579792557, - "grad_norm": 0.4790206849575043, - "learning_rate": 4.1718518518518516e-05, - "loss": 0.4667, - "step": 7473 - }, - { - "epoch": 0.2623422664490426, - "grad_norm": 0.42361652851104736, - "learning_rate": 4.171666666666667e-05, - "loss": 0.5056, - "step": 7474 - }, - { - "epoch": 0.2623773671001597, - "grad_norm": 0.4169853627681732, - "learning_rate": 4.1714814814814817e-05, - "loss": 0.4491, - "step": 7475 - }, - { - "epoch": 0.2624124677512768, - "grad_norm": 0.5924558043479919, - "learning_rate": 4.171296296296297e-05, - "loss": 0.5192, - "step": 7476 - }, - { - "epoch": 0.26244756840239386, - "grad_norm": 0.450252890586853, - "learning_rate": 4.171111111111111e-05, - "loss": 0.5594, - "step": 7477 - }, - { - "epoch": 0.2624826690535109, - "grad_norm": 0.49309948086738586, - "learning_rate": 4.170925925925926e-05, - "loss": 0.5034, - "step": 7478 - }, - { - "epoch": 0.26251776970462803, - "grad_norm": 0.4867746829986572, - "learning_rate": 4.170740740740741e-05, - "loss": 0.5366, - "step": 7479 - }, - { - "epoch": 0.2625528703557451, - "grad_norm": 0.4652055501937866, - "learning_rate": 4.170555555555556e-05, - "loss": 0.5711, - "step": 7480 - }, - { - "epoch": 0.26258797100686215, - "grad_norm": 0.48105311393737793, - "learning_rate": 4.1703703703703704e-05, - "loss": 0.4017, - "step": 7481 - }, - { - "epoch": 0.26262307165797927, - "grad_norm": 0.5056514143943787, - "learning_rate": 4.1701851851851854e-05, - "loss": 0.5235, - "step": 7482 - }, - { - "epoch": 0.2626581723090963, - "grad_norm": 0.5500014424324036, - "learning_rate": 4.17e-05, - "loss": 0.535, - "step": 7483 - }, - { - "epoch": 0.2626932729602134, - "grad_norm": 0.5417173504829407, - "learning_rate": 4.1698148148148154e-05, - "loss": 0.5049, - "step": 7484 - }, - { - "epoch": 0.2627283736113305, - "grad_norm": 0.5263307690620422, - "learning_rate": 4.16962962962963e-05, - "loss": 0.5136, - "step": 7485 - }, - { - "epoch": 0.26276347426244756, - "grad_norm": 0.4724295735359192, - "learning_rate": 4.169444444444445e-05, - "loss": 0.4848, - "step": 7486 - }, - { - "epoch": 0.2627985749135646, - "grad_norm": 0.45848241448402405, - "learning_rate": 4.169259259259259e-05, - "loss": 0.517, - "step": 7487 - }, - { - "epoch": 0.26283367556468173, - "grad_norm": 0.6878055334091187, - "learning_rate": 4.169074074074074e-05, - "loss": 0.6029, - "step": 7488 - }, - { - "epoch": 0.2628687762157988, - "grad_norm": 0.4732016921043396, - "learning_rate": 4.168888888888889e-05, - "loss": 0.53, - "step": 7489 - }, - { - "epoch": 0.2629038768669159, - "grad_norm": 0.5676099061965942, - "learning_rate": 4.168703703703704e-05, - "loss": 0.6169, - "step": 7490 - }, - { - "epoch": 0.26293897751803297, - "grad_norm": 0.4439152479171753, - "learning_rate": 4.168518518518519e-05, - "loss": 0.4601, - "step": 7491 - }, - { - "epoch": 0.26297407816915, - "grad_norm": 0.4983838200569153, - "learning_rate": 4.1683333333333335e-05, - "loss": 0.5194, - "step": 7492 - }, - { - "epoch": 0.26300917882026714, - "grad_norm": 0.5122106671333313, - "learning_rate": 4.1681481481481485e-05, - "loss": 0.4639, - "step": 7493 - }, - { - "epoch": 0.2630442794713842, - "grad_norm": 0.6291254162788391, - "learning_rate": 4.167962962962963e-05, - "loss": 0.5558, - "step": 7494 - }, - { - "epoch": 0.26307938012250126, - "grad_norm": 0.43281126022338867, - "learning_rate": 4.1677777777777785e-05, - "loss": 0.4919, - "step": 7495 - }, - { - "epoch": 0.2631144807736184, - "grad_norm": 0.4489114582538605, - "learning_rate": 4.167592592592593e-05, - "loss": 0.5129, - "step": 7496 - }, - { - "epoch": 0.26314958142473543, - "grad_norm": 0.46465814113616943, - "learning_rate": 4.167407407407408e-05, - "loss": 0.4895, - "step": 7497 - }, - { - "epoch": 0.2631846820758525, - "grad_norm": 0.4674144983291626, - "learning_rate": 4.167222222222222e-05, - "loss": 0.5116, - "step": 7498 - }, - { - "epoch": 0.2632197827269696, - "grad_norm": 0.5660070180892944, - "learning_rate": 4.167037037037037e-05, - "loss": 0.4791, - "step": 7499 - }, - { - "epoch": 0.26325488337808667, - "grad_norm": 0.4360343813896179, - "learning_rate": 4.1668518518518515e-05, - "loss": 0.4724, - "step": 7500 - }, - { - "epoch": 0.2632899840292037, - "grad_norm": 0.5401700735092163, - "learning_rate": 4.166666666666667e-05, - "loss": 0.3542, - "step": 7501 - }, - { - "epoch": 0.26332508468032084, - "grad_norm": 0.4427063763141632, - "learning_rate": 4.1664814814814815e-05, - "loss": 0.4624, - "step": 7502 - }, - { - "epoch": 0.2633601853314379, - "grad_norm": 0.4332069456577301, - "learning_rate": 4.1662962962962965e-05, - "loss": 0.5538, - "step": 7503 - }, - { - "epoch": 0.26339528598255496, - "grad_norm": 0.5247150659561157, - "learning_rate": 4.166111111111111e-05, - "loss": 0.4114, - "step": 7504 - }, - { - "epoch": 0.2634303866336721, - "grad_norm": 0.5314955711364746, - "learning_rate": 4.165925925925926e-05, - "loss": 0.4575, - "step": 7505 - }, - { - "epoch": 0.26346548728478913, - "grad_norm": 0.5053930282592773, - "learning_rate": 4.165740740740741e-05, - "loss": 0.429, - "step": 7506 - }, - { - "epoch": 0.2635005879359062, - "grad_norm": 0.6226051449775696, - "learning_rate": 4.165555555555556e-05, - "loss": 0.5408, - "step": 7507 - }, - { - "epoch": 0.2635356885870233, - "grad_norm": 0.5206559300422668, - "learning_rate": 4.165370370370371e-05, - "loss": 0.4585, - "step": 7508 - }, - { - "epoch": 0.26357078923814037, - "grad_norm": 0.5202680826187134, - "learning_rate": 4.165185185185185e-05, - "loss": 0.5184, - "step": 7509 - }, - { - "epoch": 0.2636058898892574, - "grad_norm": 0.46492689847946167, - "learning_rate": 4.165e-05, - "loss": 0.5156, - "step": 7510 - }, - { - "epoch": 0.26364099054037454, - "grad_norm": 0.49750906229019165, - "learning_rate": 4.164814814814815e-05, - "loss": 0.4992, - "step": 7511 - }, - { - "epoch": 0.2636760911914916, - "grad_norm": 0.3927859663963318, - "learning_rate": 4.16462962962963e-05, - "loss": 0.4478, - "step": 7512 - }, - { - "epoch": 0.26371119184260866, - "grad_norm": 0.49090784788131714, - "learning_rate": 4.1644444444444446e-05, - "loss": 0.5698, - "step": 7513 - }, - { - "epoch": 0.2637462924937258, - "grad_norm": 0.4016816020011902, - "learning_rate": 4.1642592592592596e-05, - "loss": 0.4429, - "step": 7514 - }, - { - "epoch": 0.26378139314484284, - "grad_norm": 0.5184400081634521, - "learning_rate": 4.164074074074074e-05, - "loss": 0.6416, - "step": 7515 - }, - { - "epoch": 0.2638164937959599, - "grad_norm": 0.4344260096549988, - "learning_rate": 4.163888888888889e-05, - "loss": 0.5394, - "step": 7516 - }, - { - "epoch": 0.263851594447077, - "grad_norm": 0.42921000719070435, - "learning_rate": 4.163703703703704e-05, - "loss": 0.4754, - "step": 7517 - }, - { - "epoch": 0.26388669509819407, - "grad_norm": 0.4028371572494507, - "learning_rate": 4.163518518518519e-05, - "loss": 0.4382, - "step": 7518 - }, - { - "epoch": 0.2639217957493111, - "grad_norm": 0.5183272361755371, - "learning_rate": 4.1633333333333333e-05, - "loss": 0.5324, - "step": 7519 - }, - { - "epoch": 0.26395689640042824, - "grad_norm": 0.565790057182312, - "learning_rate": 4.1631481481481484e-05, - "loss": 0.4695, - "step": 7520 - }, - { - "epoch": 0.2639919970515453, - "grad_norm": 0.4437619149684906, - "learning_rate": 4.162962962962963e-05, - "loss": 0.5243, - "step": 7521 - }, - { - "epoch": 0.26402709770266236, - "grad_norm": 0.4973020851612091, - "learning_rate": 4.1627777777777784e-05, - "loss": 0.5713, - "step": 7522 - }, - { - "epoch": 0.2640621983537795, - "grad_norm": 0.45927754044532776, - "learning_rate": 4.162592592592593e-05, - "loss": 0.4725, - "step": 7523 - }, - { - "epoch": 0.26409729900489654, - "grad_norm": 0.4858807325363159, - "learning_rate": 4.162407407407408e-05, - "loss": 0.556, - "step": 7524 - }, - { - "epoch": 0.2641323996560136, - "grad_norm": 0.4079865515232086, - "learning_rate": 4.162222222222222e-05, - "loss": 0.5943, - "step": 7525 - }, - { - "epoch": 0.2641675003071307, - "grad_norm": 0.3650174140930176, - "learning_rate": 4.162037037037037e-05, - "loss": 0.4841, - "step": 7526 - }, - { - "epoch": 0.26420260095824777, - "grad_norm": 0.500958263874054, - "learning_rate": 4.161851851851852e-05, - "loss": 0.5672, - "step": 7527 - }, - { - "epoch": 0.26423770160936483, - "grad_norm": 0.46089860796928406, - "learning_rate": 4.161666666666667e-05, - "loss": 0.5251, - "step": 7528 - }, - { - "epoch": 0.26427280226048194, - "grad_norm": 0.45179665088653564, - "learning_rate": 4.161481481481482e-05, - "loss": 0.534, - "step": 7529 - }, - { - "epoch": 0.264307902911599, - "grad_norm": 0.4621388912200928, - "learning_rate": 4.1612962962962964e-05, - "loss": 0.5884, - "step": 7530 - }, - { - "epoch": 0.26434300356271606, - "grad_norm": 0.40189042687416077, - "learning_rate": 4.1611111111111114e-05, - "loss": 0.4143, - "step": 7531 - }, - { - "epoch": 0.2643781042138332, - "grad_norm": 0.5874378681182861, - "learning_rate": 4.160925925925926e-05, - "loss": 0.4932, - "step": 7532 - }, - { - "epoch": 0.26441320486495024, - "grad_norm": 0.4455501139163971, - "learning_rate": 4.1607407407407415e-05, - "loss": 0.5138, - "step": 7533 - }, - { - "epoch": 0.26444830551606735, - "grad_norm": 0.5251665115356445, - "learning_rate": 4.160555555555556e-05, - "loss": 0.5457, - "step": 7534 - }, - { - "epoch": 0.2644834061671844, - "grad_norm": 0.4612125754356384, - "learning_rate": 4.160370370370371e-05, - "loss": 0.586, - "step": 7535 - }, - { - "epoch": 0.26451850681830147, - "grad_norm": 0.5327960252761841, - "learning_rate": 4.160185185185185e-05, - "loss": 0.4874, - "step": 7536 - }, - { - "epoch": 0.2645536074694186, - "grad_norm": 0.5167223215103149, - "learning_rate": 4.16e-05, - "loss": 0.5736, - "step": 7537 - }, - { - "epoch": 0.26458870812053564, - "grad_norm": 0.5393542051315308, - "learning_rate": 4.159814814814815e-05, - "loss": 0.6327, - "step": 7538 - }, - { - "epoch": 0.2646238087716527, - "grad_norm": 0.45289623737335205, - "learning_rate": 4.15962962962963e-05, - "loss": 0.5078, - "step": 7539 - }, - { - "epoch": 0.2646589094227698, - "grad_norm": 0.6524006128311157, - "learning_rate": 4.1594444444444445e-05, - "loss": 0.5294, - "step": 7540 - }, - { - "epoch": 0.2646940100738869, - "grad_norm": 0.4647675156593323, - "learning_rate": 4.1592592592592595e-05, - "loss": 0.602, - "step": 7541 - }, - { - "epoch": 0.26472911072500394, - "grad_norm": 0.4688878357410431, - "learning_rate": 4.159074074074074e-05, - "loss": 0.4946, - "step": 7542 - }, - { - "epoch": 0.26476421137612105, - "grad_norm": 0.46095725893974304, - "learning_rate": 4.158888888888889e-05, - "loss": 0.551, - "step": 7543 - }, - { - "epoch": 0.2647993120272381, - "grad_norm": 0.48833325505256653, - "learning_rate": 4.158703703703704e-05, - "loss": 0.5381, - "step": 7544 - }, - { - "epoch": 0.26483441267835517, - "grad_norm": 0.4082001745700836, - "learning_rate": 4.158518518518519e-05, - "loss": 0.4363, - "step": 7545 - }, - { - "epoch": 0.2648695133294723, - "grad_norm": 0.5213140845298767, - "learning_rate": 4.158333333333333e-05, - "loss": 0.4712, - "step": 7546 - }, - { - "epoch": 0.26490461398058934, - "grad_norm": 0.49885624647140503, - "learning_rate": 4.158148148148148e-05, - "loss": 0.4841, - "step": 7547 - }, - { - "epoch": 0.2649397146317064, - "grad_norm": 0.5354849100112915, - "learning_rate": 4.157962962962963e-05, - "loss": 0.4384, - "step": 7548 - }, - { - "epoch": 0.2649748152828235, - "grad_norm": 0.4692821204662323, - "learning_rate": 4.157777777777778e-05, - "loss": 0.5323, - "step": 7549 - }, - { - "epoch": 0.2650099159339406, - "grad_norm": 0.41787219047546387, - "learning_rate": 4.157592592592593e-05, - "loss": 0.456, - "step": 7550 - }, - { - "epoch": 0.26504501658505764, - "grad_norm": 0.46144047379493713, - "learning_rate": 4.1574074074074076e-05, - "loss": 0.4887, - "step": 7551 - }, - { - "epoch": 0.26508011723617475, - "grad_norm": 0.5334460139274597, - "learning_rate": 4.1572222222222226e-05, - "loss": 0.5765, - "step": 7552 - }, - { - "epoch": 0.2651152178872918, - "grad_norm": 0.47361454367637634, - "learning_rate": 4.157037037037037e-05, - "loss": 0.4695, - "step": 7553 - }, - { - "epoch": 0.26515031853840887, - "grad_norm": 0.5260133743286133, - "learning_rate": 4.156851851851852e-05, - "loss": 0.567, - "step": 7554 - }, - { - "epoch": 0.265185419189526, - "grad_norm": 0.3953756093978882, - "learning_rate": 4.156666666666667e-05, - "loss": 0.4452, - "step": 7555 - }, - { - "epoch": 0.26522051984064304, - "grad_norm": 0.3937123119831085, - "learning_rate": 4.156481481481482e-05, - "loss": 0.4885, - "step": 7556 - }, - { - "epoch": 0.2652556204917601, - "grad_norm": 0.5127334594726562, - "learning_rate": 4.156296296296296e-05, - "loss": 0.4778, - "step": 7557 - }, - { - "epoch": 0.2652907211428772, - "grad_norm": 0.5080131888389587, - "learning_rate": 4.156111111111111e-05, - "loss": 0.475, - "step": 7558 - }, - { - "epoch": 0.2653258217939943, - "grad_norm": 0.46610090136528015, - "learning_rate": 4.1559259259259257e-05, - "loss": 0.4962, - "step": 7559 - }, - { - "epoch": 0.26536092244511134, - "grad_norm": 0.4696860611438751, - "learning_rate": 4.1557407407407413e-05, - "loss": 0.4814, - "step": 7560 - }, - { - "epoch": 0.26539602309622845, - "grad_norm": 0.45451176166534424, - "learning_rate": 4.155555555555556e-05, - "loss": 0.4729, - "step": 7561 - }, - { - "epoch": 0.2654311237473455, - "grad_norm": 0.451628178358078, - "learning_rate": 4.155370370370371e-05, - "loss": 0.6523, - "step": 7562 - }, - { - "epoch": 0.26546622439846257, - "grad_norm": 0.40766385197639465, - "learning_rate": 4.155185185185185e-05, - "loss": 0.5166, - "step": 7563 - }, - { - "epoch": 0.2655013250495797, - "grad_norm": 0.5376324653625488, - "learning_rate": 4.155e-05, - "loss": 0.5602, - "step": 7564 - }, - { - "epoch": 0.26553642570069674, - "grad_norm": 0.5878347754478455, - "learning_rate": 4.154814814814815e-05, - "loss": 0.513, - "step": 7565 - }, - { - "epoch": 0.2655715263518138, - "grad_norm": 0.516880214214325, - "learning_rate": 4.15462962962963e-05, - "loss": 0.5492, - "step": 7566 - }, - { - "epoch": 0.2656066270029309, - "grad_norm": 0.499950647354126, - "learning_rate": 4.1544444444444444e-05, - "loss": 0.5577, - "step": 7567 - }, - { - "epoch": 0.265641727654048, - "grad_norm": 0.4637182652950287, - "learning_rate": 4.1542592592592594e-05, - "loss": 0.5273, - "step": 7568 - }, - { - "epoch": 0.26567682830516504, - "grad_norm": 0.5746326446533203, - "learning_rate": 4.1540740740740744e-05, - "loss": 0.4668, - "step": 7569 - }, - { - "epoch": 0.26571192895628215, - "grad_norm": 0.4833957254886627, - "learning_rate": 4.153888888888889e-05, - "loss": 0.5426, - "step": 7570 - }, - { - "epoch": 0.2657470296073992, - "grad_norm": 0.5001996159553528, - "learning_rate": 4.1537037037037044e-05, - "loss": 0.4197, - "step": 7571 - }, - { - "epoch": 0.26578213025851627, - "grad_norm": 0.4693335294723511, - "learning_rate": 4.153518518518519e-05, - "loss": 0.4941, - "step": 7572 - }, - { - "epoch": 0.2658172309096334, - "grad_norm": 0.45346182584762573, - "learning_rate": 4.153333333333334e-05, - "loss": 0.4193, - "step": 7573 - }, - { - "epoch": 0.26585233156075044, - "grad_norm": 0.40271127223968506, - "learning_rate": 4.153148148148148e-05, - "loss": 0.4657, - "step": 7574 - }, - { - "epoch": 0.2658874322118675, - "grad_norm": 0.44330936670303345, - "learning_rate": 4.152962962962963e-05, - "loss": 0.383, - "step": 7575 - }, - { - "epoch": 0.2659225328629846, - "grad_norm": 0.5057411193847656, - "learning_rate": 4.152777777777778e-05, - "loss": 0.4942, - "step": 7576 - }, - { - "epoch": 0.2659576335141017, - "grad_norm": 0.4910387098789215, - "learning_rate": 4.152592592592593e-05, - "loss": 0.5239, - "step": 7577 - }, - { - "epoch": 0.2659927341652188, - "grad_norm": 0.4694006145000458, - "learning_rate": 4.1524074074074075e-05, - "loss": 0.4601, - "step": 7578 - }, - { - "epoch": 0.26602783481633585, - "grad_norm": 0.5469198822975159, - "learning_rate": 4.1522222222222225e-05, - "loss": 0.6436, - "step": 7579 - }, - { - "epoch": 0.2660629354674529, - "grad_norm": 0.4276631474494934, - "learning_rate": 4.152037037037037e-05, - "loss": 0.5719, - "step": 7580 - }, - { - "epoch": 0.26609803611857, - "grad_norm": 0.45176711678504944, - "learning_rate": 4.1518518518518525e-05, - "loss": 0.5465, - "step": 7581 - }, - { - "epoch": 0.2661331367696871, - "grad_norm": 0.5191638469696045, - "learning_rate": 4.151666666666667e-05, - "loss": 0.5122, - "step": 7582 - }, - { - "epoch": 0.26616823742080414, - "grad_norm": 0.4580972492694855, - "learning_rate": 4.151481481481482e-05, - "loss": 0.5442, - "step": 7583 - }, - { - "epoch": 0.26620333807192126, - "grad_norm": 0.44986477494239807, - "learning_rate": 4.151296296296296e-05, - "loss": 0.4699, - "step": 7584 - }, - { - "epoch": 0.2662384387230383, - "grad_norm": 0.4422611892223358, - "learning_rate": 4.151111111111111e-05, - "loss": 0.573, - "step": 7585 - }, - { - "epoch": 0.2662735393741554, - "grad_norm": 0.47531193494796753, - "learning_rate": 4.150925925925926e-05, - "loss": 0.5572, - "step": 7586 - }, - { - "epoch": 0.2663086400252725, - "grad_norm": 0.48457324504852295, - "learning_rate": 4.150740740740741e-05, - "loss": 0.5368, - "step": 7587 - }, - { - "epoch": 0.26634374067638955, - "grad_norm": 0.46214455366134644, - "learning_rate": 4.1505555555555556e-05, - "loss": 0.4991, - "step": 7588 - }, - { - "epoch": 0.2663788413275066, - "grad_norm": 0.8377016186714172, - "learning_rate": 4.1503703703703706e-05, - "loss": 0.4666, - "step": 7589 - }, - { - "epoch": 0.2664139419786237, - "grad_norm": 0.6739977598190308, - "learning_rate": 4.1501851851851856e-05, - "loss": 0.5812, - "step": 7590 - }, - { - "epoch": 0.2664490426297408, - "grad_norm": 0.5208523869514465, - "learning_rate": 4.15e-05, - "loss": 0.455, - "step": 7591 - }, - { - "epoch": 0.26648414328085784, - "grad_norm": 0.4598886966705322, - "learning_rate": 4.1498148148148156e-05, - "loss": 0.5122, - "step": 7592 - }, - { - "epoch": 0.26651924393197496, - "grad_norm": 0.5153777003288269, - "learning_rate": 4.14962962962963e-05, - "loss": 0.647, - "step": 7593 - }, - { - "epoch": 0.266554344583092, - "grad_norm": 0.5190661549568176, - "learning_rate": 4.149444444444445e-05, - "loss": 0.5202, - "step": 7594 - }, - { - "epoch": 0.2665894452342091, - "grad_norm": 0.4826984405517578, - "learning_rate": 4.149259259259259e-05, - "loss": 0.5727, - "step": 7595 - }, - { - "epoch": 0.2666245458853262, - "grad_norm": 0.44541239738464355, - "learning_rate": 4.149074074074074e-05, - "loss": 0.4305, - "step": 7596 - }, - { - "epoch": 0.26665964653644325, - "grad_norm": 0.4681861102581024, - "learning_rate": 4.1488888888888886e-05, - "loss": 0.4967, - "step": 7597 - }, - { - "epoch": 0.2666947471875603, - "grad_norm": 0.5140261650085449, - "learning_rate": 4.148703703703704e-05, - "loss": 0.536, - "step": 7598 - }, - { - "epoch": 0.2667298478386774, - "grad_norm": 0.6457520127296448, - "learning_rate": 4.1485185185185186e-05, - "loss": 0.4956, - "step": 7599 - }, - { - "epoch": 0.2667649484897945, - "grad_norm": 0.5041233897209167, - "learning_rate": 4.1483333333333337e-05, - "loss": 0.5223, - "step": 7600 - }, - { - "epoch": 0.26680004914091154, - "grad_norm": 0.8785339593887329, - "learning_rate": 4.148148148148148e-05, - "loss": 0.518, - "step": 7601 - }, - { - "epoch": 0.26683514979202866, - "grad_norm": 0.5706547498703003, - "learning_rate": 4.147962962962963e-05, - "loss": 0.569, - "step": 7602 - }, - { - "epoch": 0.2668702504431457, - "grad_norm": 0.5532270073890686, - "learning_rate": 4.147777777777778e-05, - "loss": 0.5151, - "step": 7603 - }, - { - "epoch": 0.2669053510942628, - "grad_norm": 0.44636422395706177, - "learning_rate": 4.147592592592593e-05, - "loss": 0.4326, - "step": 7604 - }, - { - "epoch": 0.2669404517453799, - "grad_norm": 0.44416308403015137, - "learning_rate": 4.1474074074074074e-05, - "loss": 0.485, - "step": 7605 - }, - { - "epoch": 0.26697555239649695, - "grad_norm": 0.44582459330558777, - "learning_rate": 4.1472222222222224e-05, - "loss": 0.5014, - "step": 7606 - }, - { - "epoch": 0.267010653047614, - "grad_norm": 0.7909802198410034, - "learning_rate": 4.1470370370370374e-05, - "loss": 0.5056, - "step": 7607 - }, - { - "epoch": 0.2670457536987311, - "grad_norm": 0.4788704812526703, - "learning_rate": 4.1468518518518524e-05, - "loss": 0.5245, - "step": 7608 - }, - { - "epoch": 0.2670808543498482, - "grad_norm": 0.39164409041404724, - "learning_rate": 4.146666666666667e-05, - "loss": 0.4326, - "step": 7609 - }, - { - "epoch": 0.26711595500096524, - "grad_norm": 0.5046530961990356, - "learning_rate": 4.146481481481482e-05, - "loss": 0.4282, - "step": 7610 - }, - { - "epoch": 0.26715105565208236, - "grad_norm": 0.5588620901107788, - "learning_rate": 4.146296296296297e-05, - "loss": 0.5656, - "step": 7611 - }, - { - "epoch": 0.2671861563031994, - "grad_norm": 0.5078041553497314, - "learning_rate": 4.146111111111111e-05, - "loss": 0.5452, - "step": 7612 - }, - { - "epoch": 0.2672212569543165, - "grad_norm": 0.4782278835773468, - "learning_rate": 4.145925925925926e-05, - "loss": 0.4703, - "step": 7613 - }, - { - "epoch": 0.2672563576054336, - "grad_norm": 0.547173798084259, - "learning_rate": 4.145740740740741e-05, - "loss": 0.559, - "step": 7614 - }, - { - "epoch": 0.26729145825655065, - "grad_norm": 0.47405076026916504, - "learning_rate": 4.145555555555556e-05, - "loss": 0.2886, - "step": 7615 - }, - { - "epoch": 0.2673265589076677, - "grad_norm": 0.4494037330150604, - "learning_rate": 4.1453703703703704e-05, - "loss": 0.4618, - "step": 7616 - }, - { - "epoch": 0.2673616595587848, - "grad_norm": 0.4211485981941223, - "learning_rate": 4.1451851851851855e-05, - "loss": 0.4637, - "step": 7617 - }, - { - "epoch": 0.2673967602099019, - "grad_norm": 0.45500192046165466, - "learning_rate": 4.145e-05, - "loss": 0.406, - "step": 7618 - }, - { - "epoch": 0.267431860861019, - "grad_norm": 0.5125668048858643, - "learning_rate": 4.1448148148148155e-05, - "loss": 0.5321, - "step": 7619 - }, - { - "epoch": 0.26746696151213606, - "grad_norm": 0.5277680158615112, - "learning_rate": 4.14462962962963e-05, - "loss": 0.4492, - "step": 7620 - }, - { - "epoch": 0.2675020621632531, - "grad_norm": 0.5325971245765686, - "learning_rate": 4.144444444444445e-05, - "loss": 0.5671, - "step": 7621 - }, - { - "epoch": 0.26753716281437023, - "grad_norm": 0.7690802216529846, - "learning_rate": 4.144259259259259e-05, - "loss": 0.504, - "step": 7622 - }, - { - "epoch": 0.2675722634654873, - "grad_norm": 0.4562428891658783, - "learning_rate": 4.144074074074074e-05, - "loss": 0.4736, - "step": 7623 - }, - { - "epoch": 0.26760736411660435, - "grad_norm": 0.42155665159225464, - "learning_rate": 4.1438888888888885e-05, - "loss": 0.4216, - "step": 7624 - }, - { - "epoch": 0.26764246476772147, - "grad_norm": 0.45598453283309937, - "learning_rate": 4.143703703703704e-05, - "loss": 0.493, - "step": 7625 - }, - { - "epoch": 0.2676775654188385, - "grad_norm": 0.5497918128967285, - "learning_rate": 4.1435185185185185e-05, - "loss": 0.5464, - "step": 7626 - }, - { - "epoch": 0.2677126660699556, - "grad_norm": 0.5007471442222595, - "learning_rate": 4.1433333333333335e-05, - "loss": 0.4966, - "step": 7627 - }, - { - "epoch": 0.2677477667210727, - "grad_norm": 0.4644007384777069, - "learning_rate": 4.1431481481481485e-05, - "loss": 0.4675, - "step": 7628 - }, - { - "epoch": 0.26778286737218976, - "grad_norm": 0.4535650312900543, - "learning_rate": 4.142962962962963e-05, - "loss": 0.5203, - "step": 7629 - }, - { - "epoch": 0.2678179680233068, - "grad_norm": 0.5667701959609985, - "learning_rate": 4.142777777777778e-05, - "loss": 0.5199, - "step": 7630 - }, - { - "epoch": 0.26785306867442393, - "grad_norm": 0.485664039850235, - "learning_rate": 4.142592592592593e-05, - "loss": 0.5729, - "step": 7631 - }, - { - "epoch": 0.267888169325541, - "grad_norm": 0.4969543218612671, - "learning_rate": 4.142407407407408e-05, - "loss": 0.5521, - "step": 7632 - }, - { - "epoch": 0.26792326997665805, - "grad_norm": 0.5677583813667297, - "learning_rate": 4.142222222222222e-05, - "loss": 0.5113, - "step": 7633 - }, - { - "epoch": 0.26795837062777517, - "grad_norm": 0.5586143136024475, - "learning_rate": 4.142037037037037e-05, - "loss": 0.5752, - "step": 7634 - }, - { - "epoch": 0.2679934712788922, - "grad_norm": 0.45297497510910034, - "learning_rate": 4.141851851851852e-05, - "loss": 0.4606, - "step": 7635 - }, - { - "epoch": 0.2680285719300093, - "grad_norm": 0.509156346321106, - "learning_rate": 4.141666666666667e-05, - "loss": 0.5636, - "step": 7636 - }, - { - "epoch": 0.2680636725811264, - "grad_norm": 0.5677899718284607, - "learning_rate": 4.1414814814814816e-05, - "loss": 0.5241, - "step": 7637 - }, - { - "epoch": 0.26809877323224346, - "grad_norm": 0.4411572813987732, - "learning_rate": 4.1412962962962966e-05, - "loss": 0.4471, - "step": 7638 - }, - { - "epoch": 0.2681338738833605, - "grad_norm": 0.46847671270370483, - "learning_rate": 4.141111111111111e-05, - "loss": 0.4499, - "step": 7639 - }, - { - "epoch": 0.26816897453447763, - "grad_norm": 0.5007560849189758, - "learning_rate": 4.140925925925926e-05, - "loss": 0.508, - "step": 7640 - }, - { - "epoch": 0.2682040751855947, - "grad_norm": 0.7013872265815735, - "learning_rate": 4.140740740740741e-05, - "loss": 0.5634, - "step": 7641 - }, - { - "epoch": 0.26823917583671175, - "grad_norm": 0.5115219354629517, - "learning_rate": 4.140555555555556e-05, - "loss": 0.5703, - "step": 7642 - }, - { - "epoch": 0.26827427648782887, - "grad_norm": 0.46697232127189636, - "learning_rate": 4.14037037037037e-05, - "loss": 0.5071, - "step": 7643 - }, - { - "epoch": 0.2683093771389459, - "grad_norm": 0.5136799812316895, - "learning_rate": 4.140185185185185e-05, - "loss": 0.5832, - "step": 7644 - }, - { - "epoch": 0.268344477790063, - "grad_norm": 0.4975418150424957, - "learning_rate": 4.14e-05, - "loss": 0.5333, - "step": 7645 - }, - { - "epoch": 0.2683795784411801, - "grad_norm": 0.483143150806427, - "learning_rate": 4.1398148148148154e-05, - "loss": 0.5193, - "step": 7646 - }, - { - "epoch": 0.26841467909229716, - "grad_norm": 0.4501406252384186, - "learning_rate": 4.13962962962963e-05, - "loss": 0.5642, - "step": 7647 - }, - { - "epoch": 0.2684497797434142, - "grad_norm": 0.4924381673336029, - "learning_rate": 4.139444444444445e-05, - "loss": 0.4952, - "step": 7648 - }, - { - "epoch": 0.26848488039453133, - "grad_norm": 0.507396399974823, - "learning_rate": 4.13925925925926e-05, - "loss": 0.5876, - "step": 7649 - }, - { - "epoch": 0.2685199810456484, - "grad_norm": 0.46194109320640564, - "learning_rate": 4.139074074074074e-05, - "loss": 0.534, - "step": 7650 - }, - { - "epoch": 0.26855508169676545, - "grad_norm": 0.6872106194496155, - "learning_rate": 4.138888888888889e-05, - "loss": 0.5002, - "step": 7651 - }, - { - "epoch": 0.26859018234788257, - "grad_norm": 0.44998133182525635, - "learning_rate": 4.138703703703704e-05, - "loss": 0.5482, - "step": 7652 - }, - { - "epoch": 0.2686252829989996, - "grad_norm": 0.44032078981399536, - "learning_rate": 4.138518518518519e-05, - "loss": 0.4665, - "step": 7653 - }, - { - "epoch": 0.2686603836501167, - "grad_norm": 0.5034680962562561, - "learning_rate": 4.1383333333333334e-05, - "loss": 0.5922, - "step": 7654 - }, - { - "epoch": 0.2686954843012338, - "grad_norm": 0.46300679445266724, - "learning_rate": 4.1381481481481484e-05, - "loss": 0.5377, - "step": 7655 - }, - { - "epoch": 0.26873058495235086, - "grad_norm": 0.5636725425720215, - "learning_rate": 4.137962962962963e-05, - "loss": 0.4275, - "step": 7656 - }, - { - "epoch": 0.2687656856034679, - "grad_norm": 0.5766387581825256, - "learning_rate": 4.1377777777777784e-05, - "loss": 0.4459, - "step": 7657 - }, - { - "epoch": 0.26880078625458503, - "grad_norm": 0.4917103946208954, - "learning_rate": 4.137592592592593e-05, - "loss": 0.5446, - "step": 7658 - }, - { - "epoch": 0.2688358869057021, - "grad_norm": 0.45924898982048035, - "learning_rate": 4.137407407407408e-05, - "loss": 0.4915, - "step": 7659 - }, - { - "epoch": 0.26887098755681915, - "grad_norm": 0.4237707853317261, - "learning_rate": 4.137222222222222e-05, - "loss": 0.5079, - "step": 7660 - }, - { - "epoch": 0.26890608820793627, - "grad_norm": 0.4936401844024658, - "learning_rate": 4.137037037037037e-05, - "loss": 0.5858, - "step": 7661 - }, - { - "epoch": 0.2689411888590533, - "grad_norm": 0.3986894190311432, - "learning_rate": 4.136851851851852e-05, - "loss": 0.4861, - "step": 7662 - }, - { - "epoch": 0.26897628951017044, - "grad_norm": 0.5534679293632507, - "learning_rate": 4.136666666666667e-05, - "loss": 0.5541, - "step": 7663 - }, - { - "epoch": 0.2690113901612875, - "grad_norm": 0.4836670160293579, - "learning_rate": 4.1364814814814815e-05, - "loss": 0.4657, - "step": 7664 - }, - { - "epoch": 0.26904649081240456, - "grad_norm": 0.46123191714286804, - "learning_rate": 4.1362962962962965e-05, - "loss": 0.4798, - "step": 7665 - }, - { - "epoch": 0.2690815914635217, - "grad_norm": 0.4545932710170746, - "learning_rate": 4.136111111111111e-05, - "loss": 0.4511, - "step": 7666 - }, - { - "epoch": 0.26911669211463873, - "grad_norm": 0.35216057300567627, - "learning_rate": 4.135925925925926e-05, - "loss": 0.3657, - "step": 7667 - }, - { - "epoch": 0.2691517927657558, - "grad_norm": 0.4517417252063751, - "learning_rate": 4.135740740740741e-05, - "loss": 0.5284, - "step": 7668 - }, - { - "epoch": 0.2691868934168729, - "grad_norm": 0.46967631578445435, - "learning_rate": 4.135555555555556e-05, - "loss": 0.5486, - "step": 7669 - }, - { - "epoch": 0.26922199406798997, - "grad_norm": 0.35666370391845703, - "learning_rate": 4.135370370370371e-05, - "loss": 0.4519, - "step": 7670 - }, - { - "epoch": 0.269257094719107, - "grad_norm": 0.5271508097648621, - "learning_rate": 4.135185185185185e-05, - "loss": 0.5621, - "step": 7671 - }, - { - "epoch": 0.26929219537022414, - "grad_norm": 0.5227915048599243, - "learning_rate": 4.135e-05, - "loss": 0.51, - "step": 7672 - }, - { - "epoch": 0.2693272960213412, - "grad_norm": 0.48941370844841003, - "learning_rate": 4.134814814814815e-05, - "loss": 0.4392, - "step": 7673 - }, - { - "epoch": 0.26936239667245826, - "grad_norm": 0.4837800860404968, - "learning_rate": 4.13462962962963e-05, - "loss": 0.4893, - "step": 7674 - }, - { - "epoch": 0.2693974973235754, - "grad_norm": 0.5671278238296509, - "learning_rate": 4.1344444444444446e-05, - "loss": 0.646, - "step": 7675 - }, - { - "epoch": 0.26943259797469243, - "grad_norm": 0.4431532621383667, - "learning_rate": 4.1342592592592596e-05, - "loss": 0.3947, - "step": 7676 - }, - { - "epoch": 0.2694676986258095, - "grad_norm": 0.43284133076667786, - "learning_rate": 4.134074074074074e-05, - "loss": 0.4103, - "step": 7677 - }, - { - "epoch": 0.2695027992769266, - "grad_norm": 0.4335485100746155, - "learning_rate": 4.133888888888889e-05, - "loss": 0.4637, - "step": 7678 - }, - { - "epoch": 0.26953789992804367, - "grad_norm": 0.5402945280075073, - "learning_rate": 4.133703703703704e-05, - "loss": 0.3895, - "step": 7679 - }, - { - "epoch": 0.2695730005791607, - "grad_norm": 0.564543604850769, - "learning_rate": 4.133518518518519e-05, - "loss": 0.4155, - "step": 7680 - }, - { - "epoch": 0.26960810123027784, - "grad_norm": 0.45690223574638367, - "learning_rate": 4.133333333333333e-05, - "loss": 0.3733, - "step": 7681 - }, - { - "epoch": 0.2696432018813949, - "grad_norm": 0.4640994369983673, - "learning_rate": 4.133148148148148e-05, - "loss": 0.5061, - "step": 7682 - }, - { - "epoch": 0.26967830253251196, - "grad_norm": 0.49598708748817444, - "learning_rate": 4.1329629629629626e-05, - "loss": 0.5429, - "step": 7683 - }, - { - "epoch": 0.2697134031836291, - "grad_norm": 0.5502113103866577, - "learning_rate": 4.132777777777778e-05, - "loss": 0.4121, - "step": 7684 - }, - { - "epoch": 0.26974850383474613, - "grad_norm": 0.4273088276386261, - "learning_rate": 4.1325925925925927e-05, - "loss": 0.5562, - "step": 7685 - }, - { - "epoch": 0.2697836044858632, - "grad_norm": 0.4834423065185547, - "learning_rate": 4.132407407407408e-05, - "loss": 0.5205, - "step": 7686 - }, - { - "epoch": 0.2698187051369803, - "grad_norm": 0.5390392541885376, - "learning_rate": 4.132222222222222e-05, - "loss": 0.5665, - "step": 7687 - }, - { - "epoch": 0.26985380578809737, - "grad_norm": 0.4790933430194855, - "learning_rate": 4.132037037037037e-05, - "loss": 0.4652, - "step": 7688 - }, - { - "epoch": 0.2698889064392144, - "grad_norm": 0.459612101316452, - "learning_rate": 4.131851851851852e-05, - "loss": 0.4569, - "step": 7689 - }, - { - "epoch": 0.26992400709033154, - "grad_norm": 0.49122801423072815, - "learning_rate": 4.131666666666667e-05, - "loss": 0.5182, - "step": 7690 - }, - { - "epoch": 0.2699591077414486, - "grad_norm": 0.43151938915252686, - "learning_rate": 4.131481481481482e-05, - "loss": 0.4377, - "step": 7691 - }, - { - "epoch": 0.26999420839256566, - "grad_norm": 0.4690621495246887, - "learning_rate": 4.1312962962962964e-05, - "loss": 0.4044, - "step": 7692 - }, - { - "epoch": 0.2700293090436828, - "grad_norm": 0.5127304792404175, - "learning_rate": 4.1311111111111114e-05, - "loss": 0.496, - "step": 7693 - }, - { - "epoch": 0.27006440969479983, - "grad_norm": 0.4965999722480774, - "learning_rate": 4.130925925925926e-05, - "loss": 0.5157, - "step": 7694 - }, - { - "epoch": 0.2700995103459169, - "grad_norm": 0.4662296772003174, - "learning_rate": 4.1307407407407414e-05, - "loss": 0.5091, - "step": 7695 - }, - { - "epoch": 0.270134610997034, - "grad_norm": 0.4549185633659363, - "learning_rate": 4.130555555555556e-05, - "loss": 0.4465, - "step": 7696 - }, - { - "epoch": 0.27016971164815107, - "grad_norm": 0.4576559066772461, - "learning_rate": 4.130370370370371e-05, - "loss": 0.4036, - "step": 7697 - }, - { - "epoch": 0.2702048122992681, - "grad_norm": 0.5476909875869751, - "learning_rate": 4.130185185185185e-05, - "loss": 0.5749, - "step": 7698 - }, - { - "epoch": 0.27023991295038524, - "grad_norm": 0.5369701385498047, - "learning_rate": 4.13e-05, - "loss": 0.6156, - "step": 7699 - }, - { - "epoch": 0.2702750136015023, - "grad_norm": 0.4317467212677002, - "learning_rate": 4.129814814814815e-05, - "loss": 0.4478, - "step": 7700 - }, - { - "epoch": 0.27031011425261936, - "grad_norm": 0.4502733647823334, - "learning_rate": 4.12962962962963e-05, - "loss": 0.4992, - "step": 7701 - }, - { - "epoch": 0.2703452149037365, - "grad_norm": 0.5669975280761719, - "learning_rate": 4.1294444444444445e-05, - "loss": 0.6014, - "step": 7702 - }, - { - "epoch": 0.27038031555485353, - "grad_norm": 0.5140628218650818, - "learning_rate": 4.1292592592592595e-05, - "loss": 0.5969, - "step": 7703 - }, - { - "epoch": 0.2704154162059706, - "grad_norm": 0.5227614641189575, - "learning_rate": 4.129074074074074e-05, - "loss": 0.558, - "step": 7704 - }, - { - "epoch": 0.2704505168570877, - "grad_norm": 0.46294933557510376, - "learning_rate": 4.1288888888888895e-05, - "loss": 0.4567, - "step": 7705 - }, - { - "epoch": 0.27048561750820477, - "grad_norm": 0.46965816617012024, - "learning_rate": 4.128703703703704e-05, - "loss": 0.5502, - "step": 7706 - }, - { - "epoch": 0.2705207181593219, - "grad_norm": 0.5077442526817322, - "learning_rate": 4.128518518518519e-05, - "loss": 0.6159, - "step": 7707 - }, - { - "epoch": 0.27055581881043894, - "grad_norm": 0.45264458656311035, - "learning_rate": 4.128333333333333e-05, - "loss": 0.525, - "step": 7708 - }, - { - "epoch": 0.270590919461556, - "grad_norm": 0.43857094645500183, - "learning_rate": 4.128148148148148e-05, - "loss": 0.4909, - "step": 7709 - }, - { - "epoch": 0.2706260201126731, - "grad_norm": 0.5003405213356018, - "learning_rate": 4.127962962962963e-05, - "loss": 0.434, - "step": 7710 - }, - { - "epoch": 0.2706611207637902, - "grad_norm": 0.6247125864028931, - "learning_rate": 4.127777777777778e-05, - "loss": 0.5116, - "step": 7711 - }, - { - "epoch": 0.27069622141490723, - "grad_norm": 0.6077256202697754, - "learning_rate": 4.127592592592593e-05, - "loss": 0.4589, - "step": 7712 - }, - { - "epoch": 0.27073132206602435, - "grad_norm": 0.4525587260723114, - "learning_rate": 4.1274074074074075e-05, - "loss": 0.4521, - "step": 7713 - }, - { - "epoch": 0.2707664227171414, - "grad_norm": 0.41736602783203125, - "learning_rate": 4.1272222222222226e-05, - "loss": 0.4676, - "step": 7714 - }, - { - "epoch": 0.27080152336825847, - "grad_norm": 0.446231871843338, - "learning_rate": 4.127037037037037e-05, - "loss": 0.4216, - "step": 7715 - }, - { - "epoch": 0.2708366240193756, - "grad_norm": 0.4683670401573181, - "learning_rate": 4.1268518518518526e-05, - "loss": 0.58, - "step": 7716 - }, - { - "epoch": 0.27087172467049264, - "grad_norm": 0.4113157093524933, - "learning_rate": 4.126666666666667e-05, - "loss": 0.5022, - "step": 7717 - }, - { - "epoch": 0.2709068253216097, - "grad_norm": 0.5298221707344055, - "learning_rate": 4.126481481481482e-05, - "loss": 0.4586, - "step": 7718 - }, - { - "epoch": 0.2709419259727268, - "grad_norm": 0.5101876854896545, - "learning_rate": 4.126296296296296e-05, - "loss": 0.4975, - "step": 7719 - }, - { - "epoch": 0.2709770266238439, - "grad_norm": 0.40430304408073425, - "learning_rate": 4.126111111111111e-05, - "loss": 0.433, - "step": 7720 - }, - { - "epoch": 0.27101212727496093, - "grad_norm": 0.607788622379303, - "learning_rate": 4.1259259259259256e-05, - "loss": 0.4343, - "step": 7721 - }, - { - "epoch": 0.27104722792607805, - "grad_norm": 0.4741303622722626, - "learning_rate": 4.125740740740741e-05, - "loss": 0.6197, - "step": 7722 - }, - { - "epoch": 0.2710823285771951, - "grad_norm": 0.4905547797679901, - "learning_rate": 4.1255555555555556e-05, - "loss": 0.4941, - "step": 7723 - }, - { - "epoch": 0.27111742922831217, - "grad_norm": 0.40869927406311035, - "learning_rate": 4.1253703703703706e-05, - "loss": 0.5153, - "step": 7724 - }, - { - "epoch": 0.2711525298794293, - "grad_norm": 0.5946902632713318, - "learning_rate": 4.125185185185185e-05, - "loss": 0.5648, - "step": 7725 - }, - { - "epoch": 0.27118763053054634, - "grad_norm": 0.49512597918510437, - "learning_rate": 4.125e-05, - "loss": 0.5798, - "step": 7726 - }, - { - "epoch": 0.2712227311816634, - "grad_norm": 0.5298744440078735, - "learning_rate": 4.124814814814815e-05, - "loss": 0.4823, - "step": 7727 - }, - { - "epoch": 0.2712578318327805, - "grad_norm": 0.4396688640117645, - "learning_rate": 4.12462962962963e-05, - "loss": 0.4974, - "step": 7728 - }, - { - "epoch": 0.2712929324838976, - "grad_norm": 0.4796774983406067, - "learning_rate": 4.124444444444444e-05, - "loss": 0.5625, - "step": 7729 - }, - { - "epoch": 0.27132803313501463, - "grad_norm": 0.42306798696517944, - "learning_rate": 4.1242592592592593e-05, - "loss": 0.4939, - "step": 7730 - }, - { - "epoch": 0.27136313378613175, - "grad_norm": 0.3968740701675415, - "learning_rate": 4.1240740740740744e-05, - "loss": 0.3754, - "step": 7731 - }, - { - "epoch": 0.2713982344372488, - "grad_norm": 0.45809993147850037, - "learning_rate": 4.1238888888888894e-05, - "loss": 0.5488, - "step": 7732 - }, - { - "epoch": 0.27143333508836587, - "grad_norm": 0.5045732259750366, - "learning_rate": 4.1237037037037044e-05, - "loss": 0.4354, - "step": 7733 - }, - { - "epoch": 0.271468435739483, - "grad_norm": 0.5158512592315674, - "learning_rate": 4.123518518518519e-05, - "loss": 0.4186, - "step": 7734 - }, - { - "epoch": 0.27150353639060004, - "grad_norm": 0.45087677240371704, - "learning_rate": 4.123333333333334e-05, - "loss": 0.4245, - "step": 7735 - }, - { - "epoch": 0.2715386370417171, - "grad_norm": 0.4660809338092804, - "learning_rate": 4.123148148148148e-05, - "loss": 0.5405, - "step": 7736 - }, - { - "epoch": 0.2715737376928342, - "grad_norm": 0.609434962272644, - "learning_rate": 4.122962962962963e-05, - "loss": 0.431, - "step": 7737 - }, - { - "epoch": 0.2716088383439513, - "grad_norm": 0.5032385587692261, - "learning_rate": 4.122777777777778e-05, - "loss": 0.5892, - "step": 7738 - }, - { - "epoch": 0.27164393899506833, - "grad_norm": 0.48950958251953125, - "learning_rate": 4.122592592592593e-05, - "loss": 0.4544, - "step": 7739 - }, - { - "epoch": 0.27167903964618545, - "grad_norm": 0.5500296354293823, - "learning_rate": 4.1224074074074074e-05, - "loss": 0.5794, - "step": 7740 - }, - { - "epoch": 0.2717141402973025, - "grad_norm": 0.38775956630706787, - "learning_rate": 4.1222222222222224e-05, - "loss": 0.3224, - "step": 7741 - }, - { - "epoch": 0.27174924094841957, - "grad_norm": 0.44709858298301697, - "learning_rate": 4.122037037037037e-05, - "loss": 0.5421, - "step": 7742 - }, - { - "epoch": 0.2717843415995367, - "grad_norm": 0.4604882299900055, - "learning_rate": 4.1218518518518525e-05, - "loss": 0.3306, - "step": 7743 - }, - { - "epoch": 0.27181944225065374, - "grad_norm": 0.5004599094390869, - "learning_rate": 4.121666666666667e-05, - "loss": 0.4657, - "step": 7744 - }, - { - "epoch": 0.2718545429017708, - "grad_norm": 0.48430588841438293, - "learning_rate": 4.121481481481482e-05, - "loss": 0.5546, - "step": 7745 - }, - { - "epoch": 0.2718896435528879, - "grad_norm": 0.5021043419837952, - "learning_rate": 4.121296296296296e-05, - "loss": 0.5829, - "step": 7746 - }, - { - "epoch": 0.271924744204005, - "grad_norm": 0.48793265223503113, - "learning_rate": 4.121111111111111e-05, - "loss": 0.6176, - "step": 7747 - }, - { - "epoch": 0.27195984485512203, - "grad_norm": 0.39498522877693176, - "learning_rate": 4.120925925925926e-05, - "loss": 0.544, - "step": 7748 - }, - { - "epoch": 0.27199494550623915, - "grad_norm": 0.7011139988899231, - "learning_rate": 4.120740740740741e-05, - "loss": 0.5332, - "step": 7749 - }, - { - "epoch": 0.2720300461573562, - "grad_norm": 0.432559072971344, - "learning_rate": 4.1205555555555555e-05, - "loss": 0.3683, - "step": 7750 - }, - { - "epoch": 0.2720651468084733, - "grad_norm": 0.5495989322662354, - "learning_rate": 4.1203703703703705e-05, - "loss": 0.5839, - "step": 7751 - }, - { - "epoch": 0.2721002474595904, - "grad_norm": 0.49930086731910706, - "learning_rate": 4.1201851851851855e-05, - "loss": 0.4756, - "step": 7752 - }, - { - "epoch": 0.27213534811070744, - "grad_norm": 0.3899151384830475, - "learning_rate": 4.12e-05, - "loss": 0.4081, - "step": 7753 - }, - { - "epoch": 0.27217044876182456, - "grad_norm": 0.47197073698043823, - "learning_rate": 4.1198148148148155e-05, - "loss": 0.4393, - "step": 7754 - }, - { - "epoch": 0.2722055494129416, - "grad_norm": 0.5355337262153625, - "learning_rate": 4.11962962962963e-05, - "loss": 0.5664, - "step": 7755 - }, - { - "epoch": 0.2722406500640587, - "grad_norm": 0.43312501907348633, - "learning_rate": 4.119444444444445e-05, - "loss": 0.4326, - "step": 7756 - }, - { - "epoch": 0.2722757507151758, - "grad_norm": 0.4404217004776001, - "learning_rate": 4.119259259259259e-05, - "loss": 0.4409, - "step": 7757 - }, - { - "epoch": 0.27231085136629285, - "grad_norm": 0.4807840883731842, - "learning_rate": 4.119074074074074e-05, - "loss": 0.4309, - "step": 7758 - }, - { - "epoch": 0.2723459520174099, - "grad_norm": 0.4789086878299713, - "learning_rate": 4.118888888888889e-05, - "loss": 0.5276, - "step": 7759 - }, - { - "epoch": 0.272381052668527, - "grad_norm": 1.0247880220413208, - "learning_rate": 4.118703703703704e-05, - "loss": 0.5463, - "step": 7760 - }, - { - "epoch": 0.2724161533196441, - "grad_norm": 0.42696234583854675, - "learning_rate": 4.1185185185185186e-05, - "loss": 0.5111, - "step": 7761 - }, - { - "epoch": 0.27245125397076114, - "grad_norm": 0.522151529788971, - "learning_rate": 4.1183333333333336e-05, - "loss": 0.4973, - "step": 7762 - }, - { - "epoch": 0.27248635462187826, - "grad_norm": 0.6194097399711609, - "learning_rate": 4.118148148148148e-05, - "loss": 0.601, - "step": 7763 - }, - { - "epoch": 0.2725214552729953, - "grad_norm": 0.5165808200836182, - "learning_rate": 4.117962962962963e-05, - "loss": 0.4823, - "step": 7764 - }, - { - "epoch": 0.2725565559241124, - "grad_norm": 0.5294314622879028, - "learning_rate": 4.117777777777778e-05, - "loss": 0.6212, - "step": 7765 - }, - { - "epoch": 0.2725916565752295, - "grad_norm": 0.4214794337749481, - "learning_rate": 4.117592592592593e-05, - "loss": 0.4318, - "step": 7766 - }, - { - "epoch": 0.27262675722634655, - "grad_norm": 0.5146554708480835, - "learning_rate": 4.117407407407407e-05, - "loss": 0.5237, - "step": 7767 - }, - { - "epoch": 0.2726618578774636, - "grad_norm": 0.49801895022392273, - "learning_rate": 4.117222222222222e-05, - "loss": 0.4844, - "step": 7768 - }, - { - "epoch": 0.2726969585285807, - "grad_norm": 0.5376622676849365, - "learning_rate": 4.117037037037037e-05, - "loss": 0.5236, - "step": 7769 - }, - { - "epoch": 0.2727320591796978, - "grad_norm": 0.48356348276138306, - "learning_rate": 4.116851851851852e-05, - "loss": 0.5097, - "step": 7770 - }, - { - "epoch": 0.27276715983081484, - "grad_norm": 0.37729984521865845, - "learning_rate": 4.116666666666667e-05, - "loss": 0.5635, - "step": 7771 - }, - { - "epoch": 0.27280226048193196, - "grad_norm": 0.4721139967441559, - "learning_rate": 4.116481481481482e-05, - "loss": 0.6495, - "step": 7772 - }, - { - "epoch": 0.272837361133049, - "grad_norm": 0.49566537141799927, - "learning_rate": 4.116296296296297e-05, - "loss": 0.5005, - "step": 7773 - }, - { - "epoch": 0.2728724617841661, - "grad_norm": 0.43496233224868774, - "learning_rate": 4.116111111111111e-05, - "loss": 0.5553, - "step": 7774 - }, - { - "epoch": 0.2729075624352832, - "grad_norm": 0.4672735631465912, - "learning_rate": 4.115925925925926e-05, - "loss": 0.481, - "step": 7775 - }, - { - "epoch": 0.27294266308640025, - "grad_norm": 0.45018839836120605, - "learning_rate": 4.115740740740741e-05, - "loss": 0.5734, - "step": 7776 - }, - { - "epoch": 0.2729777637375173, - "grad_norm": 0.4860087037086487, - "learning_rate": 4.115555555555556e-05, - "loss": 0.5172, - "step": 7777 - }, - { - "epoch": 0.2730128643886344, - "grad_norm": 0.4898648262023926, - "learning_rate": 4.1153703703703704e-05, - "loss": 0.5258, - "step": 7778 - }, - { - "epoch": 0.2730479650397515, - "grad_norm": 0.4818161129951477, - "learning_rate": 4.1151851851851854e-05, - "loss": 0.5589, - "step": 7779 - }, - { - "epoch": 0.27308306569086854, - "grad_norm": 0.4567377269268036, - "learning_rate": 4.115e-05, - "loss": 0.4564, - "step": 7780 - }, - { - "epoch": 0.27311816634198566, - "grad_norm": 0.42567044496536255, - "learning_rate": 4.1148148148148154e-05, - "loss": 0.4662, - "step": 7781 - }, - { - "epoch": 0.2731532669931027, - "grad_norm": 0.510845422744751, - "learning_rate": 4.11462962962963e-05, - "loss": 0.4522, - "step": 7782 - }, - { - "epoch": 0.2731883676442198, - "grad_norm": 0.4227745532989502, - "learning_rate": 4.114444444444445e-05, - "loss": 0.4796, - "step": 7783 - }, - { - "epoch": 0.2732234682953369, - "grad_norm": 0.5109333395957947, - "learning_rate": 4.114259259259259e-05, - "loss": 0.5748, - "step": 7784 - }, - { - "epoch": 0.27325856894645395, - "grad_norm": 0.5108780264854431, - "learning_rate": 4.114074074074074e-05, - "loss": 0.5505, - "step": 7785 - }, - { - "epoch": 0.273293669597571, - "grad_norm": 0.49346938729286194, - "learning_rate": 4.113888888888889e-05, - "loss": 0.5843, - "step": 7786 - }, - { - "epoch": 0.2733287702486881, - "grad_norm": 0.4314527213573456, - "learning_rate": 4.113703703703704e-05, - "loss": 0.6009, - "step": 7787 - }, - { - "epoch": 0.2733638708998052, - "grad_norm": 0.5262062549591064, - "learning_rate": 4.1135185185185185e-05, - "loss": 0.5159, - "step": 7788 - }, - { - "epoch": 0.27339897155092224, - "grad_norm": 0.4940626621246338, - "learning_rate": 4.1133333333333335e-05, - "loss": 0.4881, - "step": 7789 - }, - { - "epoch": 0.27343407220203936, - "grad_norm": 0.4182960093021393, - "learning_rate": 4.1131481481481485e-05, - "loss": 0.5031, - "step": 7790 - }, - { - "epoch": 0.2734691728531564, - "grad_norm": 0.5245519280433655, - "learning_rate": 4.112962962962963e-05, - "loss": 0.5805, - "step": 7791 - }, - { - "epoch": 0.27350427350427353, - "grad_norm": 0.45374444127082825, - "learning_rate": 4.1127777777777785e-05, - "loss": 0.4834, - "step": 7792 - }, - { - "epoch": 0.2735393741553906, - "grad_norm": 0.5043225884437561, - "learning_rate": 4.112592592592593e-05, - "loss": 0.5904, - "step": 7793 - }, - { - "epoch": 0.27357447480650765, - "grad_norm": 0.465851753950119, - "learning_rate": 4.112407407407408e-05, - "loss": 0.5223, - "step": 7794 - }, - { - "epoch": 0.27360957545762477, - "grad_norm": 0.43449297547340393, - "learning_rate": 4.112222222222222e-05, - "loss": 0.505, - "step": 7795 - }, - { - "epoch": 0.2736446761087418, - "grad_norm": 0.48560020327568054, - "learning_rate": 4.112037037037037e-05, - "loss": 0.446, - "step": 7796 - }, - { - "epoch": 0.2736797767598589, - "grad_norm": 0.46282708644866943, - "learning_rate": 4.111851851851852e-05, - "loss": 0.4123, - "step": 7797 - }, - { - "epoch": 0.273714877410976, - "grad_norm": 0.4530523419380188, - "learning_rate": 4.111666666666667e-05, - "loss": 0.4875, - "step": 7798 - }, - { - "epoch": 0.27374997806209306, - "grad_norm": 0.44399458169937134, - "learning_rate": 4.1114814814814816e-05, - "loss": 0.3916, - "step": 7799 - }, - { - "epoch": 0.2737850787132101, - "grad_norm": 0.4795461893081665, - "learning_rate": 4.1112962962962966e-05, - "loss": 0.4821, - "step": 7800 - }, - { - "epoch": 0.27382017936432723, - "grad_norm": 0.5405919551849365, - "learning_rate": 4.111111111111111e-05, - "loss": 0.3614, - "step": 7801 - }, - { - "epoch": 0.2738552800154443, - "grad_norm": 0.4708065688610077, - "learning_rate": 4.110925925925926e-05, - "loss": 0.5854, - "step": 7802 - }, - { - "epoch": 0.27389038066656135, - "grad_norm": 0.3676167130470276, - "learning_rate": 4.110740740740741e-05, - "loss": 0.3225, - "step": 7803 - }, - { - "epoch": 0.27392548131767847, - "grad_norm": 0.43827205896377563, - "learning_rate": 4.110555555555556e-05, - "loss": 0.4755, - "step": 7804 - }, - { - "epoch": 0.2739605819687955, - "grad_norm": 0.43459635972976685, - "learning_rate": 4.11037037037037e-05, - "loss": 0.4225, - "step": 7805 - }, - { - "epoch": 0.2739956826199126, - "grad_norm": 0.47987109422683716, - "learning_rate": 4.110185185185185e-05, - "loss": 0.5456, - "step": 7806 - }, - { - "epoch": 0.2740307832710297, - "grad_norm": 0.4811306595802307, - "learning_rate": 4.11e-05, - "loss": 0.5505, - "step": 7807 - }, - { - "epoch": 0.27406588392214676, - "grad_norm": 0.517788290977478, - "learning_rate": 4.109814814814815e-05, - "loss": 0.3459, - "step": 7808 - }, - { - "epoch": 0.2741009845732638, - "grad_norm": 0.46567606925964355, - "learning_rate": 4.1096296296296296e-05, - "loss": 0.4988, - "step": 7809 - }, - { - "epoch": 0.27413608522438093, - "grad_norm": 0.6458544731140137, - "learning_rate": 4.1094444444444446e-05, - "loss": 0.5143, - "step": 7810 - }, - { - "epoch": 0.274171185875498, - "grad_norm": 0.5265978574752808, - "learning_rate": 4.1092592592592597e-05, - "loss": 0.6255, - "step": 7811 - }, - { - "epoch": 0.27420628652661505, - "grad_norm": 0.5369976758956909, - "learning_rate": 4.109074074074074e-05, - "loss": 0.4712, - "step": 7812 - }, - { - "epoch": 0.27424138717773217, - "grad_norm": 0.5601565837860107, - "learning_rate": 4.10888888888889e-05, - "loss": 0.4929, - "step": 7813 - }, - { - "epoch": 0.2742764878288492, - "grad_norm": 0.50289386510849, - "learning_rate": 4.108703703703704e-05, - "loss": 0.4677, - "step": 7814 - }, - { - "epoch": 0.2743115884799663, - "grad_norm": 0.4575662314891815, - "learning_rate": 4.108518518518519e-05, - "loss": 0.4541, - "step": 7815 - }, - { - "epoch": 0.2743466891310834, - "grad_norm": 0.5137734413146973, - "learning_rate": 4.1083333333333334e-05, - "loss": 0.5033, - "step": 7816 - }, - { - "epoch": 0.27438178978220046, - "grad_norm": 0.464229017496109, - "learning_rate": 4.1081481481481484e-05, - "loss": 0.5643, - "step": 7817 - }, - { - "epoch": 0.2744168904333175, - "grad_norm": 0.4344482123851776, - "learning_rate": 4.107962962962963e-05, - "loss": 0.4926, - "step": 7818 - }, - { - "epoch": 0.27445199108443463, - "grad_norm": 0.5127958655357361, - "learning_rate": 4.1077777777777784e-05, - "loss": 0.5492, - "step": 7819 - }, - { - "epoch": 0.2744870917355517, - "grad_norm": 0.43547531962394714, - "learning_rate": 4.107592592592593e-05, - "loss": 0.6341, - "step": 7820 - }, - { - "epoch": 0.27452219238666875, - "grad_norm": 0.4592842757701874, - "learning_rate": 4.107407407407408e-05, - "loss": 0.5278, - "step": 7821 - }, - { - "epoch": 0.27455729303778587, - "grad_norm": 0.5459216833114624, - "learning_rate": 4.107222222222222e-05, - "loss": 0.4546, - "step": 7822 - }, - { - "epoch": 0.2745923936889029, - "grad_norm": 0.4498363137245178, - "learning_rate": 4.107037037037037e-05, - "loss": 0.4078, - "step": 7823 - }, - { - "epoch": 0.27462749434002, - "grad_norm": 0.43574151396751404, - "learning_rate": 4.106851851851852e-05, - "loss": 0.4639, - "step": 7824 - }, - { - "epoch": 0.2746625949911371, - "grad_norm": 0.5410889983177185, - "learning_rate": 4.106666666666667e-05, - "loss": 0.5131, - "step": 7825 - }, - { - "epoch": 0.27469769564225416, - "grad_norm": 0.41596245765686035, - "learning_rate": 4.1064814814814814e-05, - "loss": 0.4544, - "step": 7826 - }, - { - "epoch": 0.2747327962933712, - "grad_norm": 0.43353360891342163, - "learning_rate": 4.1062962962962964e-05, - "loss": 0.4924, - "step": 7827 - }, - { - "epoch": 0.27476789694448833, - "grad_norm": 0.4979435205459595, - "learning_rate": 4.1061111111111115e-05, - "loss": 0.5504, - "step": 7828 - }, - { - "epoch": 0.2748029975956054, - "grad_norm": 0.4958559274673462, - "learning_rate": 4.1059259259259265e-05, - "loss": 0.4114, - "step": 7829 - }, - { - "epoch": 0.27483809824672245, - "grad_norm": 0.672920286655426, - "learning_rate": 4.105740740740741e-05, - "loss": 0.535, - "step": 7830 - }, - { - "epoch": 0.27487319889783957, - "grad_norm": 0.5170559883117676, - "learning_rate": 4.105555555555556e-05, - "loss": 0.5017, - "step": 7831 - }, - { - "epoch": 0.2749082995489566, - "grad_norm": 0.4703062176704407, - "learning_rate": 4.105370370370371e-05, - "loss": 0.5458, - "step": 7832 - }, - { - "epoch": 0.2749434002000737, - "grad_norm": 0.43580538034439087, - "learning_rate": 4.105185185185185e-05, - "loss": 0.4767, - "step": 7833 - }, - { - "epoch": 0.2749785008511908, - "grad_norm": 0.40965357422828674, - "learning_rate": 4.105e-05, - "loss": 0.4576, - "step": 7834 - }, - { - "epoch": 0.27501360150230786, - "grad_norm": 0.47708502411842346, - "learning_rate": 4.104814814814815e-05, - "loss": 0.5278, - "step": 7835 - }, - { - "epoch": 0.275048702153425, - "grad_norm": 0.5179563164710999, - "learning_rate": 4.10462962962963e-05, - "loss": 0.5337, - "step": 7836 - }, - { - "epoch": 0.27508380280454203, - "grad_norm": 0.3858884572982788, - "learning_rate": 4.1044444444444445e-05, - "loss": 0.5267, - "step": 7837 - }, - { - "epoch": 0.2751189034556591, - "grad_norm": 0.4927499294281006, - "learning_rate": 4.1042592592592595e-05, - "loss": 0.3933, - "step": 7838 - }, - { - "epoch": 0.2751540041067762, - "grad_norm": 0.5559276938438416, - "learning_rate": 4.104074074074074e-05, - "loss": 0.4434, - "step": 7839 - }, - { - "epoch": 0.27518910475789327, - "grad_norm": 0.5437283515930176, - "learning_rate": 4.1038888888888896e-05, - "loss": 0.5039, - "step": 7840 - }, - { - "epoch": 0.2752242054090103, - "grad_norm": 0.5759140849113464, - "learning_rate": 4.103703703703704e-05, - "loss": 0.531, - "step": 7841 - }, - { - "epoch": 0.27525930606012744, - "grad_norm": 0.4584856629371643, - "learning_rate": 4.103518518518519e-05, - "loss": 0.5264, - "step": 7842 - }, - { - "epoch": 0.2752944067112445, - "grad_norm": 0.43245044350624084, - "learning_rate": 4.103333333333333e-05, - "loss": 0.4608, - "step": 7843 - }, - { - "epoch": 0.27532950736236156, - "grad_norm": 0.5555004477500916, - "learning_rate": 4.103148148148148e-05, - "loss": 0.5494, - "step": 7844 - }, - { - "epoch": 0.2753646080134787, - "grad_norm": 0.481701135635376, - "learning_rate": 4.1029629629629626e-05, - "loss": 0.3863, - "step": 7845 - }, - { - "epoch": 0.27539970866459573, - "grad_norm": 0.49417224526405334, - "learning_rate": 4.102777777777778e-05, - "loss": 0.467, - "step": 7846 - }, - { - "epoch": 0.2754348093157128, - "grad_norm": 0.4032270610332489, - "learning_rate": 4.1025925925925926e-05, - "loss": 0.3781, - "step": 7847 - }, - { - "epoch": 0.2754699099668299, - "grad_norm": 0.6592342853546143, - "learning_rate": 4.1024074074074076e-05, - "loss": 0.5657, - "step": 7848 - }, - { - "epoch": 0.27550501061794697, - "grad_norm": 0.5312392115592957, - "learning_rate": 4.1022222222222226e-05, - "loss": 0.5793, - "step": 7849 - }, - { - "epoch": 0.275540111269064, - "grad_norm": 0.443953275680542, - "learning_rate": 4.102037037037037e-05, - "loss": 0.4822, - "step": 7850 - }, - { - "epoch": 0.27557521192018114, - "grad_norm": 0.47307008504867554, - "learning_rate": 4.101851851851852e-05, - "loss": 0.5277, - "step": 7851 - }, - { - "epoch": 0.2756103125712982, - "grad_norm": 0.4914732873439789, - "learning_rate": 4.101666666666667e-05, - "loss": 0.5652, - "step": 7852 - }, - { - "epoch": 0.27564541322241526, - "grad_norm": 0.49751776456832886, - "learning_rate": 4.101481481481482e-05, - "loss": 0.4661, - "step": 7853 - }, - { - "epoch": 0.2756805138735324, - "grad_norm": 0.4852551519870758, - "learning_rate": 4.101296296296296e-05, - "loss": 0.5305, - "step": 7854 - }, - { - "epoch": 0.27571561452464943, - "grad_norm": 0.49527662992477417, - "learning_rate": 4.101111111111111e-05, - "loss": 0.527, - "step": 7855 - }, - { - "epoch": 0.2757507151757665, - "grad_norm": 0.47790756821632385, - "learning_rate": 4.1009259259259263e-05, - "loss": 0.4805, - "step": 7856 - }, - { - "epoch": 0.2757858158268836, - "grad_norm": 0.572037398815155, - "learning_rate": 4.1007407407407414e-05, - "loss": 0.4747, - "step": 7857 - }, - { - "epoch": 0.27582091647800067, - "grad_norm": 0.5022187232971191, - "learning_rate": 4.100555555555556e-05, - "loss": 0.5926, - "step": 7858 - }, - { - "epoch": 0.2758560171291177, - "grad_norm": 0.4085732102394104, - "learning_rate": 4.100370370370371e-05, - "loss": 0.4717, - "step": 7859 - }, - { - "epoch": 0.27589111778023484, - "grad_norm": 0.5451929569244385, - "learning_rate": 4.100185185185185e-05, - "loss": 0.4506, - "step": 7860 - }, - { - "epoch": 0.2759262184313519, - "grad_norm": 0.5754143595695496, - "learning_rate": 4.1e-05, - "loss": 0.5087, - "step": 7861 - }, - { - "epoch": 0.27596131908246896, - "grad_norm": 0.5542033910751343, - "learning_rate": 4.099814814814815e-05, - "loss": 0.5912, - "step": 7862 - }, - { - "epoch": 0.2759964197335861, - "grad_norm": 0.6500203013420105, - "learning_rate": 4.09962962962963e-05, - "loss": 0.5043, - "step": 7863 - }, - { - "epoch": 0.27603152038470313, - "grad_norm": 0.5760796070098877, - "learning_rate": 4.0994444444444444e-05, - "loss": 0.5164, - "step": 7864 - }, - { - "epoch": 0.2760666210358202, - "grad_norm": 0.5292539000511169, - "learning_rate": 4.0992592592592594e-05, - "loss": 0.5366, - "step": 7865 - }, - { - "epoch": 0.2761017216869373, - "grad_norm": 0.41747111082077026, - "learning_rate": 4.099074074074074e-05, - "loss": 0.4999, - "step": 7866 - }, - { - "epoch": 0.27613682233805437, - "grad_norm": 0.45500117540359497, - "learning_rate": 4.0988888888888894e-05, - "loss": 0.5324, - "step": 7867 - }, - { - "epoch": 0.2761719229891714, - "grad_norm": 0.5111092329025269, - "learning_rate": 4.098703703703704e-05, - "loss": 0.4091, - "step": 7868 - }, - { - "epoch": 0.27620702364028854, - "grad_norm": 0.5890510678291321, - "learning_rate": 4.098518518518519e-05, - "loss": 0.6444, - "step": 7869 - }, - { - "epoch": 0.2762421242914056, - "grad_norm": 0.556816816329956, - "learning_rate": 4.098333333333334e-05, - "loss": 0.4694, - "step": 7870 - }, - { - "epoch": 0.27627722494252266, - "grad_norm": 0.5874245166778564, - "learning_rate": 4.098148148148148e-05, - "loss": 0.6006, - "step": 7871 - }, - { - "epoch": 0.2763123255936398, - "grad_norm": 0.4477379024028778, - "learning_rate": 4.097962962962963e-05, - "loss": 0.4335, - "step": 7872 - }, - { - "epoch": 0.27634742624475683, - "grad_norm": 0.5111028552055359, - "learning_rate": 4.097777777777778e-05, - "loss": 0.5741, - "step": 7873 - }, - { - "epoch": 0.2763825268958739, - "grad_norm": 0.44865602254867554, - "learning_rate": 4.097592592592593e-05, - "loss": 0.5052, - "step": 7874 - }, - { - "epoch": 0.276417627546991, - "grad_norm": 0.4635795056819916, - "learning_rate": 4.0974074074074075e-05, - "loss": 0.5518, - "step": 7875 - }, - { - "epoch": 0.27645272819810807, - "grad_norm": 0.4994930624961853, - "learning_rate": 4.0972222222222225e-05, - "loss": 0.5332, - "step": 7876 - }, - { - "epoch": 0.2764878288492251, - "grad_norm": 0.4676767587661743, - "learning_rate": 4.097037037037037e-05, - "loss": 0.507, - "step": 7877 - }, - { - "epoch": 0.27652292950034224, - "grad_norm": 0.43237313628196716, - "learning_rate": 4.0968518518518525e-05, - "loss": 0.5868, - "step": 7878 - }, - { - "epoch": 0.2765580301514593, - "grad_norm": 0.41624507308006287, - "learning_rate": 4.096666666666667e-05, - "loss": 0.449, - "step": 7879 - }, - { - "epoch": 0.2765931308025764, - "grad_norm": 0.5069583654403687, - "learning_rate": 4.096481481481482e-05, - "loss": 0.5632, - "step": 7880 - }, - { - "epoch": 0.2766282314536935, - "grad_norm": 0.38259395956993103, - "learning_rate": 4.096296296296296e-05, - "loss": 0.4354, - "step": 7881 - }, - { - "epoch": 0.27666333210481053, - "grad_norm": 0.4509560465812683, - "learning_rate": 4.096111111111111e-05, - "loss": 0.5449, - "step": 7882 - }, - { - "epoch": 0.27669843275592765, - "grad_norm": 0.41755226254463196, - "learning_rate": 4.095925925925926e-05, - "loss": 0.5306, - "step": 7883 - }, - { - "epoch": 0.2767335334070447, - "grad_norm": 0.4419791102409363, - "learning_rate": 4.095740740740741e-05, - "loss": 0.4563, - "step": 7884 - }, - { - "epoch": 0.27676863405816177, - "grad_norm": 0.37255746126174927, - "learning_rate": 4.0955555555555556e-05, - "loss": 0.5627, - "step": 7885 - }, - { - "epoch": 0.2768037347092789, - "grad_norm": 0.41805362701416016, - "learning_rate": 4.0953703703703706e-05, - "loss": 0.5751, - "step": 7886 - }, - { - "epoch": 0.27683883536039594, - "grad_norm": 0.4114614427089691, - "learning_rate": 4.095185185185185e-05, - "loss": 0.485, - "step": 7887 - }, - { - "epoch": 0.276873936011513, - "grad_norm": 0.5162444114685059, - "learning_rate": 4.095e-05, - "loss": 0.594, - "step": 7888 - }, - { - "epoch": 0.2769090366626301, - "grad_norm": 0.46693867444992065, - "learning_rate": 4.094814814814815e-05, - "loss": 0.5221, - "step": 7889 - }, - { - "epoch": 0.2769441373137472, - "grad_norm": 0.47641506791114807, - "learning_rate": 4.09462962962963e-05, - "loss": 0.432, - "step": 7890 - }, - { - "epoch": 0.27697923796486423, - "grad_norm": 0.47800034284591675, - "learning_rate": 4.094444444444445e-05, - "loss": 0.4204, - "step": 7891 - }, - { - "epoch": 0.27701433861598135, - "grad_norm": 0.38821107149124146, - "learning_rate": 4.094259259259259e-05, - "loss": 0.452, - "step": 7892 - }, - { - "epoch": 0.2770494392670984, - "grad_norm": 0.4781849682331085, - "learning_rate": 4.094074074074074e-05, - "loss": 0.451, - "step": 7893 - }, - { - "epoch": 0.27708453991821547, - "grad_norm": 0.45554330945014954, - "learning_rate": 4.093888888888889e-05, - "loss": 0.6524, - "step": 7894 - }, - { - "epoch": 0.2771196405693326, - "grad_norm": 0.4381646513938904, - "learning_rate": 4.093703703703704e-05, - "loss": 0.4878, - "step": 7895 - }, - { - "epoch": 0.27715474122044964, - "grad_norm": 0.4923740327358246, - "learning_rate": 4.0935185185185187e-05, - "loss": 0.494, - "step": 7896 - }, - { - "epoch": 0.2771898418715667, - "grad_norm": 0.5060998797416687, - "learning_rate": 4.093333333333334e-05, - "loss": 0.5351, - "step": 7897 - }, - { - "epoch": 0.2772249425226838, - "grad_norm": 0.405555784702301, - "learning_rate": 4.093148148148148e-05, - "loss": 0.4399, - "step": 7898 - }, - { - "epoch": 0.2772600431738009, - "grad_norm": 0.4374408423900604, - "learning_rate": 4.092962962962963e-05, - "loss": 0.5685, - "step": 7899 - }, - { - "epoch": 0.27729514382491793, - "grad_norm": 0.4379095435142517, - "learning_rate": 4.092777777777778e-05, - "loss": 0.5621, - "step": 7900 - }, - { - "epoch": 0.27733024447603505, - "grad_norm": 0.5020437240600586, - "learning_rate": 4.092592592592593e-05, - "loss": 0.6421, - "step": 7901 - }, - { - "epoch": 0.2773653451271521, - "grad_norm": 0.5417577028274536, - "learning_rate": 4.0924074074074074e-05, - "loss": 0.5246, - "step": 7902 - }, - { - "epoch": 0.27740044577826917, - "grad_norm": 0.492760568857193, - "learning_rate": 4.0922222222222224e-05, - "loss": 0.5638, - "step": 7903 - }, - { - "epoch": 0.2774355464293863, - "grad_norm": 0.48257091641426086, - "learning_rate": 4.092037037037037e-05, - "loss": 0.417, - "step": 7904 - }, - { - "epoch": 0.27747064708050334, - "grad_norm": 0.6336067318916321, - "learning_rate": 4.0918518518518524e-05, - "loss": 0.5981, - "step": 7905 - }, - { - "epoch": 0.2775057477316204, - "grad_norm": 0.45369216799736023, - "learning_rate": 4.091666666666667e-05, - "loss": 0.4474, - "step": 7906 - }, - { - "epoch": 0.2775408483827375, - "grad_norm": 0.6345931887626648, - "learning_rate": 4.091481481481482e-05, - "loss": 0.5335, - "step": 7907 - }, - { - "epoch": 0.2775759490338546, - "grad_norm": 0.4886617064476013, - "learning_rate": 4.091296296296296e-05, - "loss": 0.533, - "step": 7908 - }, - { - "epoch": 0.27761104968497163, - "grad_norm": 0.6177127957344055, - "learning_rate": 4.091111111111111e-05, - "loss": 0.5632, - "step": 7909 - }, - { - "epoch": 0.27764615033608875, - "grad_norm": 0.5202939510345459, - "learning_rate": 4.090925925925926e-05, - "loss": 0.52, - "step": 7910 - }, - { - "epoch": 0.2776812509872058, - "grad_norm": 0.42625051736831665, - "learning_rate": 4.090740740740741e-05, - "loss": 0.4713, - "step": 7911 - }, - { - "epoch": 0.27771635163832287, - "grad_norm": 0.4472450315952301, - "learning_rate": 4.090555555555556e-05, - "loss": 0.5004, - "step": 7912 - }, - { - "epoch": 0.27775145228944, - "grad_norm": 0.45150497555732727, - "learning_rate": 4.0903703703703705e-05, - "loss": 0.5574, - "step": 7913 - }, - { - "epoch": 0.27778655294055704, - "grad_norm": 0.5306472778320312, - "learning_rate": 4.0901851851851855e-05, - "loss": 0.5752, - "step": 7914 - }, - { - "epoch": 0.2778216535916741, - "grad_norm": 0.5337181687355042, - "learning_rate": 4.09e-05, - "loss": 0.4861, - "step": 7915 - }, - { - "epoch": 0.2778567542427912, - "grad_norm": 0.53714519739151, - "learning_rate": 4.0898148148148155e-05, - "loss": 0.5155, - "step": 7916 - }, - { - "epoch": 0.2778918548939083, - "grad_norm": 0.4030104875564575, - "learning_rate": 4.08962962962963e-05, - "loss": 0.4551, - "step": 7917 - }, - { - "epoch": 0.27792695554502533, - "grad_norm": 0.4559984803199768, - "learning_rate": 4.089444444444445e-05, - "loss": 0.5538, - "step": 7918 - }, - { - "epoch": 0.27796205619614245, - "grad_norm": 0.49963101744651794, - "learning_rate": 4.089259259259259e-05, - "loss": 0.4786, - "step": 7919 - }, - { - "epoch": 0.2779971568472595, - "grad_norm": 0.4937446415424347, - "learning_rate": 4.089074074074074e-05, - "loss": 0.519, - "step": 7920 - }, - { - "epoch": 0.2780322574983766, - "grad_norm": 0.42274028062820435, - "learning_rate": 4.088888888888889e-05, - "loss": 0.4448, - "step": 7921 - }, - { - "epoch": 0.2780673581494937, - "grad_norm": 0.5461887717247009, - "learning_rate": 4.088703703703704e-05, - "loss": 0.4329, - "step": 7922 - }, - { - "epoch": 0.27810245880061074, - "grad_norm": 0.5357968211174011, - "learning_rate": 4.0885185185185185e-05, - "loss": 0.5464, - "step": 7923 - }, - { - "epoch": 0.27813755945172786, - "grad_norm": 0.511338472366333, - "learning_rate": 4.0883333333333335e-05, - "loss": 0.5462, - "step": 7924 - }, - { - "epoch": 0.2781726601028449, - "grad_norm": 0.4925311505794525, - "learning_rate": 4.088148148148148e-05, - "loss": 0.5156, - "step": 7925 - }, - { - "epoch": 0.278207760753962, - "grad_norm": 0.4790682792663574, - "learning_rate": 4.087962962962963e-05, - "loss": 0.4838, - "step": 7926 - }, - { - "epoch": 0.2782428614050791, - "grad_norm": 0.45375901460647583, - "learning_rate": 4.087777777777778e-05, - "loss": 0.5437, - "step": 7927 - }, - { - "epoch": 0.27827796205619615, - "grad_norm": 0.48572948575019836, - "learning_rate": 4.087592592592593e-05, - "loss": 0.5284, - "step": 7928 - }, - { - "epoch": 0.2783130627073132, - "grad_norm": 0.5587866902351379, - "learning_rate": 4.087407407407407e-05, - "loss": 0.5219, - "step": 7929 - }, - { - "epoch": 0.2783481633584303, - "grad_norm": 0.41554728150367737, - "learning_rate": 4.087222222222222e-05, - "loss": 0.3946, - "step": 7930 - }, - { - "epoch": 0.2783832640095474, - "grad_norm": 0.5373744368553162, - "learning_rate": 4.087037037037037e-05, - "loss": 0.5438, - "step": 7931 - }, - { - "epoch": 0.27841836466066444, - "grad_norm": 0.46152541041374207, - "learning_rate": 4.086851851851852e-05, - "loss": 0.407, - "step": 7932 - }, - { - "epoch": 0.27845346531178156, - "grad_norm": 0.5146769285202026, - "learning_rate": 4.086666666666667e-05, - "loss": 0.6258, - "step": 7933 - }, - { - "epoch": 0.2784885659628986, - "grad_norm": 0.42317959666252136, - "learning_rate": 4.0864814814814816e-05, - "loss": 0.3889, - "step": 7934 - }, - { - "epoch": 0.2785236666140157, - "grad_norm": 0.46248093247413635, - "learning_rate": 4.0862962962962966e-05, - "loss": 0.4521, - "step": 7935 - }, - { - "epoch": 0.2785587672651328, - "grad_norm": 0.4622972905635834, - "learning_rate": 4.086111111111111e-05, - "loss": 0.4562, - "step": 7936 - }, - { - "epoch": 0.27859386791624985, - "grad_norm": 0.508287787437439, - "learning_rate": 4.0859259259259267e-05, - "loss": 0.4473, - "step": 7937 - }, - { - "epoch": 0.2786289685673669, - "grad_norm": 0.4771938920021057, - "learning_rate": 4.085740740740741e-05, - "loss": 0.5023, - "step": 7938 - }, - { - "epoch": 0.278664069218484, - "grad_norm": 0.4425145387649536, - "learning_rate": 4.085555555555556e-05, - "loss": 0.5529, - "step": 7939 - }, - { - "epoch": 0.2786991698696011, - "grad_norm": 0.5179188251495361, - "learning_rate": 4.08537037037037e-05, - "loss": 0.4302, - "step": 7940 - }, - { - "epoch": 0.27873427052071814, - "grad_norm": 0.5344011783599854, - "learning_rate": 4.0851851851851853e-05, - "loss": 0.5583, - "step": 7941 - }, - { - "epoch": 0.27876937117183526, - "grad_norm": 0.42933565378189087, - "learning_rate": 4.085e-05, - "loss": 0.5764, - "step": 7942 - }, - { - "epoch": 0.2788044718229523, - "grad_norm": 0.46680948138237, - "learning_rate": 4.0848148148148154e-05, - "loss": 0.5453, - "step": 7943 - }, - { - "epoch": 0.2788395724740694, - "grad_norm": 0.5272208452224731, - "learning_rate": 4.08462962962963e-05, - "loss": 0.4019, - "step": 7944 - }, - { - "epoch": 0.2788746731251865, - "grad_norm": 0.5343120694160461, - "learning_rate": 4.084444444444445e-05, - "loss": 0.597, - "step": 7945 - }, - { - "epoch": 0.27890977377630355, - "grad_norm": 0.49073290824890137, - "learning_rate": 4.084259259259259e-05, - "loss": 0.5571, - "step": 7946 - }, - { - "epoch": 0.2789448744274206, - "grad_norm": 0.46020108461380005, - "learning_rate": 4.084074074074074e-05, - "loss": 0.5001, - "step": 7947 - }, - { - "epoch": 0.2789799750785377, - "grad_norm": 0.5948832035064697, - "learning_rate": 4.083888888888889e-05, - "loss": 0.5804, - "step": 7948 - }, - { - "epoch": 0.2790150757296548, - "grad_norm": 0.49232959747314453, - "learning_rate": 4.083703703703704e-05, - "loss": 0.6008, - "step": 7949 - }, - { - "epoch": 0.27905017638077184, - "grad_norm": 0.4900668263435364, - "learning_rate": 4.0835185185185184e-05, - "loss": 0.5187, - "step": 7950 - }, - { - "epoch": 0.27908527703188896, - "grad_norm": 0.4627559781074524, - "learning_rate": 4.0833333333333334e-05, - "loss": 0.4989, - "step": 7951 - }, - { - "epoch": 0.279120377683006, - "grad_norm": 0.45985740423202515, - "learning_rate": 4.0831481481481484e-05, - "loss": 0.5219, - "step": 7952 - }, - { - "epoch": 0.2791554783341231, - "grad_norm": 0.4675130844116211, - "learning_rate": 4.0829629629629634e-05, - "loss": 0.516, - "step": 7953 - }, - { - "epoch": 0.2791905789852402, - "grad_norm": 0.5487849116325378, - "learning_rate": 4.0827777777777785e-05, - "loss": 0.5233, - "step": 7954 - }, - { - "epoch": 0.27922567963635725, - "grad_norm": 0.4342455267906189, - "learning_rate": 4.082592592592593e-05, - "loss": 0.3948, - "step": 7955 - }, - { - "epoch": 0.2792607802874743, - "grad_norm": 0.5137965679168701, - "learning_rate": 4.082407407407408e-05, - "loss": 0.5882, - "step": 7956 - }, - { - "epoch": 0.2792958809385914, - "grad_norm": 0.4604355990886688, - "learning_rate": 4.082222222222222e-05, - "loss": 0.5468, - "step": 7957 - }, - { - "epoch": 0.2793309815897085, - "grad_norm": 0.49510952830314636, - "learning_rate": 4.082037037037037e-05, - "loss": 0.4481, - "step": 7958 - }, - { - "epoch": 0.27936608224082554, - "grad_norm": 0.4664628803730011, - "learning_rate": 4.081851851851852e-05, - "loss": 0.5269, - "step": 7959 - }, - { - "epoch": 0.27940118289194266, - "grad_norm": 0.5847564935684204, - "learning_rate": 4.081666666666667e-05, - "loss": 0.5598, - "step": 7960 - }, - { - "epoch": 0.2794362835430597, - "grad_norm": 0.5626074075698853, - "learning_rate": 4.0814814814814815e-05, - "loss": 0.394, - "step": 7961 - }, - { - "epoch": 0.2794713841941768, - "grad_norm": 0.47834110260009766, - "learning_rate": 4.0812962962962965e-05, - "loss": 0.4804, - "step": 7962 - }, - { - "epoch": 0.2795064848452939, - "grad_norm": 0.5216735005378723, - "learning_rate": 4.081111111111111e-05, - "loss": 0.5489, - "step": 7963 - }, - { - "epoch": 0.27954158549641095, - "grad_norm": 0.4369824230670929, - "learning_rate": 4.0809259259259265e-05, - "loss": 0.4951, - "step": 7964 - }, - { - "epoch": 0.27957668614752806, - "grad_norm": 0.4862610399723053, - "learning_rate": 4.080740740740741e-05, - "loss": 0.4687, - "step": 7965 - }, - { - "epoch": 0.2796117867986451, - "grad_norm": 0.4973202347755432, - "learning_rate": 4.080555555555556e-05, - "loss": 0.5022, - "step": 7966 - }, - { - "epoch": 0.2796468874497622, - "grad_norm": 0.5134009122848511, - "learning_rate": 4.08037037037037e-05, - "loss": 0.6105, - "step": 7967 - }, - { - "epoch": 0.2796819881008793, - "grad_norm": 0.514795184135437, - "learning_rate": 4.080185185185185e-05, - "loss": 0.6585, - "step": 7968 - }, - { - "epoch": 0.27971708875199636, - "grad_norm": 0.47297024726867676, - "learning_rate": 4.08e-05, - "loss": 0.5045, - "step": 7969 - }, - { - "epoch": 0.2797521894031134, - "grad_norm": 0.5119077563285828, - "learning_rate": 4.079814814814815e-05, - "loss": 0.3313, - "step": 7970 - }, - { - "epoch": 0.27978729005423053, - "grad_norm": 0.3787788450717926, - "learning_rate": 4.0796296296296296e-05, - "loss": 0.4038, - "step": 7971 - }, - { - "epoch": 0.2798223907053476, - "grad_norm": 0.4780018925666809, - "learning_rate": 4.0794444444444446e-05, - "loss": 0.4453, - "step": 7972 - }, - { - "epoch": 0.27985749135646465, - "grad_norm": 0.5017353892326355, - "learning_rate": 4.0792592592592596e-05, - "loss": 0.5291, - "step": 7973 - }, - { - "epoch": 0.27989259200758176, - "grad_norm": 0.45233187079429626, - "learning_rate": 4.079074074074074e-05, - "loss": 0.4692, - "step": 7974 - }, - { - "epoch": 0.2799276926586988, - "grad_norm": 0.5208582282066345, - "learning_rate": 4.0788888888888896e-05, - "loss": 0.5678, - "step": 7975 - }, - { - "epoch": 0.2799627933098159, - "grad_norm": 0.41176047921180725, - "learning_rate": 4.078703703703704e-05, - "loss": 0.4084, - "step": 7976 - }, - { - "epoch": 0.279997893960933, - "grad_norm": 0.42679262161254883, - "learning_rate": 4.078518518518519e-05, - "loss": 0.475, - "step": 7977 - }, - { - "epoch": 0.28003299461205006, - "grad_norm": 0.4897710680961609, - "learning_rate": 4.078333333333333e-05, - "loss": 0.4225, - "step": 7978 - }, - { - "epoch": 0.2800680952631671, - "grad_norm": 0.46619144082069397, - "learning_rate": 4.078148148148148e-05, - "loss": 0.6223, - "step": 7979 - }, - { - "epoch": 0.28010319591428423, - "grad_norm": 0.43579769134521484, - "learning_rate": 4.077962962962963e-05, - "loss": 0.5074, - "step": 7980 - }, - { - "epoch": 0.2801382965654013, - "grad_norm": 0.446338415145874, - "learning_rate": 4.0777777777777783e-05, - "loss": 0.4578, - "step": 7981 - }, - { - "epoch": 0.28017339721651835, - "grad_norm": 0.4331212639808655, - "learning_rate": 4.077592592592593e-05, - "loss": 0.3584, - "step": 7982 - }, - { - "epoch": 0.28020849786763546, - "grad_norm": 0.43930599093437195, - "learning_rate": 4.077407407407408e-05, - "loss": 0.3545, - "step": 7983 - }, - { - "epoch": 0.2802435985187525, - "grad_norm": 0.4407409727573395, - "learning_rate": 4.077222222222222e-05, - "loss": 0.4487, - "step": 7984 - }, - { - "epoch": 0.2802786991698696, - "grad_norm": 0.4880591034889221, - "learning_rate": 4.077037037037037e-05, - "loss": 0.3848, - "step": 7985 - }, - { - "epoch": 0.2803137998209867, - "grad_norm": 0.47966670989990234, - "learning_rate": 4.076851851851852e-05, - "loss": 0.5709, - "step": 7986 - }, - { - "epoch": 0.28034890047210376, - "grad_norm": 0.4759008288383484, - "learning_rate": 4.076666666666667e-05, - "loss": 0.497, - "step": 7987 - }, - { - "epoch": 0.2803840011232208, - "grad_norm": 0.494576632976532, - "learning_rate": 4.0764814814814814e-05, - "loss": 0.4843, - "step": 7988 - }, - { - "epoch": 0.28041910177433793, - "grad_norm": 0.3992688059806824, - "learning_rate": 4.0762962962962964e-05, - "loss": 0.455, - "step": 7989 - }, - { - "epoch": 0.280454202425455, - "grad_norm": 0.4157765507698059, - "learning_rate": 4.0761111111111114e-05, - "loss": 0.4771, - "step": 7990 - }, - { - "epoch": 0.28048930307657205, - "grad_norm": 0.4481564164161682, - "learning_rate": 4.0759259259259264e-05, - "loss": 0.3552, - "step": 7991 - }, - { - "epoch": 0.28052440372768916, - "grad_norm": 0.562509298324585, - "learning_rate": 4.075740740740741e-05, - "loss": 0.4985, - "step": 7992 - }, - { - "epoch": 0.2805595043788062, - "grad_norm": 0.5243647694587708, - "learning_rate": 4.075555555555556e-05, - "loss": 0.353, - "step": 7993 - }, - { - "epoch": 0.2805946050299233, - "grad_norm": 0.45379653573036194, - "learning_rate": 4.075370370370371e-05, - "loss": 0.5251, - "step": 7994 - }, - { - "epoch": 0.2806297056810404, - "grad_norm": 0.6141947507858276, - "learning_rate": 4.075185185185185e-05, - "loss": 0.4652, - "step": 7995 - }, - { - "epoch": 0.28066480633215746, - "grad_norm": 0.46891850233078003, - "learning_rate": 4.075e-05, - "loss": 0.4946, - "step": 7996 - }, - { - "epoch": 0.2806999069832745, - "grad_norm": 0.6355248093605042, - "learning_rate": 4.074814814814815e-05, - "loss": 0.6131, - "step": 7997 - }, - { - "epoch": 0.28073500763439163, - "grad_norm": 0.5389485359191895, - "learning_rate": 4.07462962962963e-05, - "loss": 0.5412, - "step": 7998 - }, - { - "epoch": 0.2807701082855087, - "grad_norm": 0.47179463505744934, - "learning_rate": 4.0744444444444445e-05, - "loss": 0.5024, - "step": 7999 - }, - { - "epoch": 0.28080520893662575, - "grad_norm": 0.4186546206474304, - "learning_rate": 4.0742592592592595e-05, - "loss": 0.4122, - "step": 8000 - }, - { - "epoch": 0.28084030958774286, - "grad_norm": 0.4676593542098999, - "learning_rate": 4.074074074074074e-05, - "loss": 0.5162, - "step": 8001 - }, - { - "epoch": 0.2808754102388599, - "grad_norm": 0.46146470308303833, - "learning_rate": 4.0738888888888895e-05, - "loss": 0.495, - "step": 8002 - }, - { - "epoch": 0.280910510889977, - "grad_norm": 0.46057799458503723, - "learning_rate": 4.073703703703704e-05, - "loss": 0.5403, - "step": 8003 - }, - { - "epoch": 0.2809456115410941, - "grad_norm": 0.5117209553718567, - "learning_rate": 4.073518518518519e-05, - "loss": 0.4988, - "step": 8004 - }, - { - "epoch": 0.28098071219221116, - "grad_norm": 0.595409095287323, - "learning_rate": 4.073333333333333e-05, - "loss": 0.4933, - "step": 8005 - }, - { - "epoch": 0.2810158128433282, - "grad_norm": 0.4410655200481415, - "learning_rate": 4.073148148148148e-05, - "loss": 0.5936, - "step": 8006 - }, - { - "epoch": 0.28105091349444533, - "grad_norm": 0.42402568459510803, - "learning_rate": 4.072962962962963e-05, - "loss": 0.4246, - "step": 8007 - }, - { - "epoch": 0.2810860141455624, - "grad_norm": 0.4704415500164032, - "learning_rate": 4.072777777777778e-05, - "loss": 0.46, - "step": 8008 - }, - { - "epoch": 0.2811211147966795, - "grad_norm": 0.4257764220237732, - "learning_rate": 4.0725925925925926e-05, - "loss": 0.5484, - "step": 8009 - }, - { - "epoch": 0.28115621544779656, - "grad_norm": 0.4923824965953827, - "learning_rate": 4.0724074074074076e-05, - "loss": 0.5489, - "step": 8010 - }, - { - "epoch": 0.2811913160989136, - "grad_norm": 0.4724316895008087, - "learning_rate": 4.0722222222222226e-05, - "loss": 0.5154, - "step": 8011 - }, - { - "epoch": 0.28122641675003074, - "grad_norm": 0.4177713692188263, - "learning_rate": 4.072037037037037e-05, - "loss": 0.493, - "step": 8012 - }, - { - "epoch": 0.2812615174011478, - "grad_norm": 0.44422951340675354, - "learning_rate": 4.071851851851852e-05, - "loss": 0.5196, - "step": 8013 - }, - { - "epoch": 0.28129661805226486, - "grad_norm": 0.46216443181037903, - "learning_rate": 4.071666666666667e-05, - "loss": 0.4385, - "step": 8014 - }, - { - "epoch": 0.28133171870338197, - "grad_norm": 0.5174856185913086, - "learning_rate": 4.071481481481482e-05, - "loss": 0.4544, - "step": 8015 - }, - { - "epoch": 0.28136681935449903, - "grad_norm": 0.4204725921154022, - "learning_rate": 4.071296296296296e-05, - "loss": 0.5047, - "step": 8016 - }, - { - "epoch": 0.2814019200056161, - "grad_norm": 0.35260385274887085, - "learning_rate": 4.071111111111111e-05, - "loss": 0.331, - "step": 8017 - }, - { - "epoch": 0.2814370206567332, - "grad_norm": 0.4789131283760071, - "learning_rate": 4.070925925925926e-05, - "loss": 0.4602, - "step": 8018 - }, - { - "epoch": 0.28147212130785026, - "grad_norm": 0.4223562479019165, - "learning_rate": 4.070740740740741e-05, - "loss": 0.4212, - "step": 8019 - }, - { - "epoch": 0.2815072219589673, - "grad_norm": 0.5064011216163635, - "learning_rate": 4.0705555555555556e-05, - "loss": 0.5748, - "step": 8020 - }, - { - "epoch": 0.28154232261008444, - "grad_norm": 0.6087396740913391, - "learning_rate": 4.0703703703703707e-05, - "loss": 0.5406, - "step": 8021 - }, - { - "epoch": 0.2815774232612015, - "grad_norm": 0.39791154861450195, - "learning_rate": 4.070185185185185e-05, - "loss": 0.4526, - "step": 8022 - }, - { - "epoch": 0.28161252391231856, - "grad_norm": 0.4267604649066925, - "learning_rate": 4.07e-05, - "loss": 0.6011, - "step": 8023 - }, - { - "epoch": 0.2816476245634357, - "grad_norm": 0.3917820155620575, - "learning_rate": 4.069814814814815e-05, - "loss": 0.3879, - "step": 8024 - }, - { - "epoch": 0.28168272521455273, - "grad_norm": 0.4462912976741791, - "learning_rate": 4.06962962962963e-05, - "loss": 0.4096, - "step": 8025 - }, - { - "epoch": 0.2817178258656698, - "grad_norm": 0.4905613660812378, - "learning_rate": 4.0694444444444444e-05, - "loss": 0.5112, - "step": 8026 - }, - { - "epoch": 0.2817529265167869, - "grad_norm": 0.4440365731716156, - "learning_rate": 4.0692592592592594e-05, - "loss": 0.4665, - "step": 8027 - }, - { - "epoch": 0.28178802716790396, - "grad_norm": 0.5830773711204529, - "learning_rate": 4.0690740740740744e-05, - "loss": 0.6231, - "step": 8028 - }, - { - "epoch": 0.281823127819021, - "grad_norm": 0.5967283844947815, - "learning_rate": 4.0688888888888894e-05, - "loss": 0.5234, - "step": 8029 - }, - { - "epoch": 0.28185822847013814, - "grad_norm": 0.4988676905632019, - "learning_rate": 4.068703703703704e-05, - "loss": 0.5125, - "step": 8030 - }, - { - "epoch": 0.2818933291212552, - "grad_norm": 0.4037335216999054, - "learning_rate": 4.068518518518519e-05, - "loss": 0.43, - "step": 8031 - }, - { - "epoch": 0.28192842977237226, - "grad_norm": 0.5608326196670532, - "learning_rate": 4.068333333333334e-05, - "loss": 0.5111, - "step": 8032 - }, - { - "epoch": 0.2819635304234894, - "grad_norm": 0.5061492919921875, - "learning_rate": 4.068148148148148e-05, - "loss": 0.4723, - "step": 8033 - }, - { - "epoch": 0.28199863107460643, - "grad_norm": 0.4636203646659851, - "learning_rate": 4.067962962962963e-05, - "loss": 0.5078, - "step": 8034 - }, - { - "epoch": 0.2820337317257235, - "grad_norm": 0.46644335985183716, - "learning_rate": 4.067777777777778e-05, - "loss": 0.4605, - "step": 8035 - }, - { - "epoch": 0.2820688323768406, - "grad_norm": 0.5107771754264832, - "learning_rate": 4.067592592592593e-05, - "loss": 0.4308, - "step": 8036 - }, - { - "epoch": 0.28210393302795767, - "grad_norm": 0.4554886817932129, - "learning_rate": 4.0674074074074074e-05, - "loss": 0.5021, - "step": 8037 - }, - { - "epoch": 0.2821390336790747, - "grad_norm": 0.48687800765037537, - "learning_rate": 4.0672222222222225e-05, - "loss": 0.3952, - "step": 8038 - }, - { - "epoch": 0.28217413433019184, - "grad_norm": 0.5896759629249573, - "learning_rate": 4.067037037037037e-05, - "loss": 0.438, - "step": 8039 - }, - { - "epoch": 0.2822092349813089, - "grad_norm": 0.465492844581604, - "learning_rate": 4.0668518518518525e-05, - "loss": 0.5378, - "step": 8040 - }, - { - "epoch": 0.28224433563242596, - "grad_norm": 0.48038434982299805, - "learning_rate": 4.066666666666667e-05, - "loss": 0.4777, - "step": 8041 - }, - { - "epoch": 0.2822794362835431, - "grad_norm": 0.5062415599822998, - "learning_rate": 4.066481481481482e-05, - "loss": 0.5859, - "step": 8042 - }, - { - "epoch": 0.28231453693466013, - "grad_norm": 0.9282384514808655, - "learning_rate": 4.066296296296296e-05, - "loss": 0.5592, - "step": 8043 - }, - { - "epoch": 0.2823496375857772, - "grad_norm": 0.43872007727622986, - "learning_rate": 4.066111111111111e-05, - "loss": 0.5052, - "step": 8044 - }, - { - "epoch": 0.2823847382368943, - "grad_norm": 0.3608463704586029, - "learning_rate": 4.065925925925926e-05, - "loss": 0.3569, - "step": 8045 - }, - { - "epoch": 0.28241983888801137, - "grad_norm": 0.4610837399959564, - "learning_rate": 4.065740740740741e-05, - "loss": 0.4644, - "step": 8046 - }, - { - "epoch": 0.2824549395391284, - "grad_norm": 0.436646044254303, - "learning_rate": 4.0655555555555555e-05, - "loss": 0.4998, - "step": 8047 - }, - { - "epoch": 0.28249004019024554, - "grad_norm": 0.4377426505088806, - "learning_rate": 4.0653703703703705e-05, - "loss": 0.5688, - "step": 8048 - }, - { - "epoch": 0.2825251408413626, - "grad_norm": 0.4132697284221649, - "learning_rate": 4.0651851851851855e-05, - "loss": 0.5845, - "step": 8049 - }, - { - "epoch": 0.28256024149247966, - "grad_norm": 0.45453760027885437, - "learning_rate": 4.065e-05, - "loss": 0.5072, - "step": 8050 - }, - { - "epoch": 0.2825953421435968, - "grad_norm": 0.7920612692832947, - "learning_rate": 4.064814814814815e-05, - "loss": 0.4969, - "step": 8051 - }, - { - "epoch": 0.28263044279471383, - "grad_norm": 0.6089039444923401, - "learning_rate": 4.06462962962963e-05, - "loss": 0.5496, - "step": 8052 - }, - { - "epoch": 0.28266554344583095, - "grad_norm": 0.5046930313110352, - "learning_rate": 4.064444444444445e-05, - "loss": 0.469, - "step": 8053 - }, - { - "epoch": 0.282700644096948, - "grad_norm": 0.5192446112632751, - "learning_rate": 4.064259259259259e-05, - "loss": 0.5587, - "step": 8054 - }, - { - "epoch": 0.28273574474806507, - "grad_norm": 0.4415225386619568, - "learning_rate": 4.064074074074074e-05, - "loss": 0.3954, - "step": 8055 - }, - { - "epoch": 0.2827708453991822, - "grad_norm": 0.4509764015674591, - "learning_rate": 4.063888888888889e-05, - "loss": 0.4224, - "step": 8056 - }, - { - "epoch": 0.28280594605029924, - "grad_norm": 0.48445382714271545, - "learning_rate": 4.063703703703704e-05, - "loss": 0.4848, - "step": 8057 - }, - { - "epoch": 0.2828410467014163, - "grad_norm": 0.45312193036079407, - "learning_rate": 4.0635185185185186e-05, - "loss": 0.3986, - "step": 8058 - }, - { - "epoch": 0.2828761473525334, - "grad_norm": 0.45773014426231384, - "learning_rate": 4.0633333333333336e-05, - "loss": 0.4151, - "step": 8059 - }, - { - "epoch": 0.2829112480036505, - "grad_norm": 0.518431544303894, - "learning_rate": 4.063148148148148e-05, - "loss": 0.5153, - "step": 8060 - }, - { - "epoch": 0.28294634865476753, - "grad_norm": 0.5256792902946472, - "learning_rate": 4.0629629629629636e-05, - "loss": 0.5557, - "step": 8061 - }, - { - "epoch": 0.28298144930588465, - "grad_norm": 0.49641355872154236, - "learning_rate": 4.062777777777778e-05, - "loss": 0.3805, - "step": 8062 - }, - { - "epoch": 0.2830165499570017, - "grad_norm": 0.5426306128501892, - "learning_rate": 4.062592592592593e-05, - "loss": 0.5244, - "step": 8063 - }, - { - "epoch": 0.28305165060811877, - "grad_norm": 0.4797968566417694, - "learning_rate": 4.062407407407407e-05, - "loss": 0.5051, - "step": 8064 - }, - { - "epoch": 0.2830867512592359, - "grad_norm": 0.7212132811546326, - "learning_rate": 4.062222222222222e-05, - "loss": 0.5789, - "step": 8065 - }, - { - "epoch": 0.28312185191035294, - "grad_norm": 0.5456981062889099, - "learning_rate": 4.062037037037037e-05, - "loss": 0.5371, - "step": 8066 - }, - { - "epoch": 0.28315695256147, - "grad_norm": 0.48504072427749634, - "learning_rate": 4.0618518518518524e-05, - "loss": 0.4481, - "step": 8067 - }, - { - "epoch": 0.2831920532125871, - "grad_norm": 0.5455562472343445, - "learning_rate": 4.061666666666667e-05, - "loss": 0.4833, - "step": 8068 - }, - { - "epoch": 0.2832271538637042, - "grad_norm": 0.46583792567253113, - "learning_rate": 4.061481481481482e-05, - "loss": 0.4994, - "step": 8069 - }, - { - "epoch": 0.28326225451482123, - "grad_norm": 0.5792588591575623, - "learning_rate": 4.061296296296297e-05, - "loss": 0.5077, - "step": 8070 - }, - { - "epoch": 0.28329735516593835, - "grad_norm": 0.5327225923538208, - "learning_rate": 4.061111111111111e-05, - "loss": 0.474, - "step": 8071 - }, - { - "epoch": 0.2833324558170554, - "grad_norm": 0.5065831542015076, - "learning_rate": 4.060925925925926e-05, - "loss": 0.505, - "step": 8072 - }, - { - "epoch": 0.28336755646817247, - "grad_norm": 0.5003475546836853, - "learning_rate": 4.060740740740741e-05, - "loss": 0.5524, - "step": 8073 - }, - { - "epoch": 0.2834026571192896, - "grad_norm": 0.5355148315429688, - "learning_rate": 4.060555555555556e-05, - "loss": 0.5022, - "step": 8074 - }, - { - "epoch": 0.28343775777040664, - "grad_norm": 0.41615086793899536, - "learning_rate": 4.0603703703703704e-05, - "loss": 0.4717, - "step": 8075 - }, - { - "epoch": 0.2834728584215237, - "grad_norm": 0.6635816097259521, - "learning_rate": 4.0601851851851854e-05, - "loss": 0.467, - "step": 8076 - }, - { - "epoch": 0.2835079590726408, - "grad_norm": 0.5651918649673462, - "learning_rate": 4.0600000000000004e-05, - "loss": 0.4857, - "step": 8077 - }, - { - "epoch": 0.2835430597237579, - "grad_norm": 0.5045571327209473, - "learning_rate": 4.0598148148148154e-05, - "loss": 0.3723, - "step": 8078 - }, - { - "epoch": 0.28357816037487493, - "grad_norm": 0.4455765187740326, - "learning_rate": 4.05962962962963e-05, - "loss": 0.4848, - "step": 8079 - }, - { - "epoch": 0.28361326102599205, - "grad_norm": 0.4644143581390381, - "learning_rate": 4.059444444444445e-05, - "loss": 0.3654, - "step": 8080 - }, - { - "epoch": 0.2836483616771091, - "grad_norm": 0.4728992283344269, - "learning_rate": 4.059259259259259e-05, - "loss": 0.4039, - "step": 8081 - }, - { - "epoch": 0.28368346232822617, - "grad_norm": 0.4642181098461151, - "learning_rate": 4.059074074074074e-05, - "loss": 0.4447, - "step": 8082 - }, - { - "epoch": 0.2837185629793433, - "grad_norm": 0.5512563586235046, - "learning_rate": 4.058888888888889e-05, - "loss": 0.5523, - "step": 8083 - }, - { - "epoch": 0.28375366363046034, - "grad_norm": 0.408136248588562, - "learning_rate": 4.058703703703704e-05, - "loss": 0.3774, - "step": 8084 - }, - { - "epoch": 0.2837887642815774, - "grad_norm": 0.49333131313323975, - "learning_rate": 4.0585185185185185e-05, - "loss": 0.4759, - "step": 8085 - }, - { - "epoch": 0.2838238649326945, - "grad_norm": 0.532110333442688, - "learning_rate": 4.0583333333333335e-05, - "loss": 0.5101, - "step": 8086 - }, - { - "epoch": 0.2838589655838116, - "grad_norm": 0.5234708786010742, - "learning_rate": 4.058148148148148e-05, - "loss": 0.5641, - "step": 8087 - }, - { - "epoch": 0.28389406623492863, - "grad_norm": 0.5088000893592834, - "learning_rate": 4.0579629629629635e-05, - "loss": 0.4922, - "step": 8088 - }, - { - "epoch": 0.28392916688604575, - "grad_norm": 0.5189211368560791, - "learning_rate": 4.057777777777778e-05, - "loss": 0.5166, - "step": 8089 - }, - { - "epoch": 0.2839642675371628, - "grad_norm": 0.5195832252502441, - "learning_rate": 4.057592592592593e-05, - "loss": 0.5321, - "step": 8090 - }, - { - "epoch": 0.28399936818827987, - "grad_norm": 0.5092904567718506, - "learning_rate": 4.057407407407408e-05, - "loss": 0.4898, - "step": 8091 - }, - { - "epoch": 0.284034468839397, - "grad_norm": 0.41007059812545776, - "learning_rate": 4.057222222222222e-05, - "loss": 0.4652, - "step": 8092 - }, - { - "epoch": 0.28406956949051404, - "grad_norm": 0.45726752281188965, - "learning_rate": 4.057037037037037e-05, - "loss": 0.441, - "step": 8093 - }, - { - "epoch": 0.28410467014163115, - "grad_norm": 0.5065674781799316, - "learning_rate": 4.056851851851852e-05, - "loss": 0.4811, - "step": 8094 - }, - { - "epoch": 0.2841397707927482, - "grad_norm": 0.4962552785873413, - "learning_rate": 4.056666666666667e-05, - "loss": 0.4795, - "step": 8095 - }, - { - "epoch": 0.2841748714438653, - "grad_norm": 0.5224127769470215, - "learning_rate": 4.0564814814814816e-05, - "loss": 0.5222, - "step": 8096 - }, - { - "epoch": 0.2842099720949824, - "grad_norm": 0.5880312919616699, - "learning_rate": 4.0562962962962966e-05, - "loss": 0.4176, - "step": 8097 - }, - { - "epoch": 0.28424507274609945, - "grad_norm": 0.5924611687660217, - "learning_rate": 4.056111111111111e-05, - "loss": 0.4369, - "step": 8098 - }, - { - "epoch": 0.2842801733972165, - "grad_norm": 0.49070483446121216, - "learning_rate": 4.0559259259259266e-05, - "loss": 0.5365, - "step": 8099 - }, - { - "epoch": 0.2843152740483336, - "grad_norm": 0.43305522203445435, - "learning_rate": 4.055740740740741e-05, - "loss": 0.4935, - "step": 8100 - }, - { - "epoch": 0.2843503746994507, - "grad_norm": 0.4474570155143738, - "learning_rate": 4.055555555555556e-05, - "loss": 0.5714, - "step": 8101 - }, - { - "epoch": 0.28438547535056774, - "grad_norm": 0.4542944133281708, - "learning_rate": 4.05537037037037e-05, - "loss": 0.3609, - "step": 8102 - }, - { - "epoch": 0.28442057600168485, - "grad_norm": 0.5114876627922058, - "learning_rate": 4.055185185185185e-05, - "loss": 0.6249, - "step": 8103 - }, - { - "epoch": 0.2844556766528019, - "grad_norm": 0.4601055979728699, - "learning_rate": 4.055e-05, - "loss": 0.4969, - "step": 8104 - }, - { - "epoch": 0.284490777303919, - "grad_norm": 0.4982481598854065, - "learning_rate": 4.054814814814815e-05, - "loss": 0.5227, - "step": 8105 - }, - { - "epoch": 0.2845258779550361, - "grad_norm": 0.48595577478408813, - "learning_rate": 4.0546296296296297e-05, - "loss": 0.4899, - "step": 8106 - }, - { - "epoch": 0.28456097860615315, - "grad_norm": 0.4868353605270386, - "learning_rate": 4.054444444444445e-05, - "loss": 0.5777, - "step": 8107 - }, - { - "epoch": 0.2845960792572702, - "grad_norm": 0.4889174997806549, - "learning_rate": 4.054259259259259e-05, - "loss": 0.5033, - "step": 8108 - }, - { - "epoch": 0.2846311799083873, - "grad_norm": 0.5406430959701538, - "learning_rate": 4.054074074074074e-05, - "loss": 0.5319, - "step": 8109 - }, - { - "epoch": 0.2846662805595044, - "grad_norm": 0.3945758640766144, - "learning_rate": 4.053888888888889e-05, - "loss": 0.4847, - "step": 8110 - }, - { - "epoch": 0.28470138121062144, - "grad_norm": 0.44126343727111816, - "learning_rate": 4.053703703703704e-05, - "loss": 0.5076, - "step": 8111 - }, - { - "epoch": 0.28473648186173856, - "grad_norm": 0.4627716541290283, - "learning_rate": 4.053518518518519e-05, - "loss": 0.5234, - "step": 8112 - }, - { - "epoch": 0.2847715825128556, - "grad_norm": 0.4966994822025299, - "learning_rate": 4.0533333333333334e-05, - "loss": 0.3964, - "step": 8113 - }, - { - "epoch": 0.2848066831639727, - "grad_norm": 0.49300989508628845, - "learning_rate": 4.0531481481481484e-05, - "loss": 0.6035, - "step": 8114 - }, - { - "epoch": 0.2848417838150898, - "grad_norm": 0.46468865871429443, - "learning_rate": 4.0529629629629634e-05, - "loss": 0.592, - "step": 8115 - }, - { - "epoch": 0.28487688446620685, - "grad_norm": 0.44414177536964417, - "learning_rate": 4.0527777777777784e-05, - "loss": 0.4592, - "step": 8116 - }, - { - "epoch": 0.2849119851173239, - "grad_norm": 0.496947705745697, - "learning_rate": 4.052592592592593e-05, - "loss": 0.5313, - "step": 8117 - }, - { - "epoch": 0.284947085768441, - "grad_norm": 0.47998765110969543, - "learning_rate": 4.052407407407408e-05, - "loss": 0.4527, - "step": 8118 - }, - { - "epoch": 0.2849821864195581, - "grad_norm": 0.49000442028045654, - "learning_rate": 4.052222222222222e-05, - "loss": 0.5268, - "step": 8119 - }, - { - "epoch": 0.28501728707067514, - "grad_norm": 0.4491293430328369, - "learning_rate": 4.052037037037037e-05, - "loss": 0.5292, - "step": 8120 - }, - { - "epoch": 0.28505238772179226, - "grad_norm": 0.4193068742752075, - "learning_rate": 4.051851851851852e-05, - "loss": 0.361, - "step": 8121 - }, - { - "epoch": 0.2850874883729093, - "grad_norm": 0.406738817691803, - "learning_rate": 4.051666666666667e-05, - "loss": 0.3829, - "step": 8122 - }, - { - "epoch": 0.2851225890240264, - "grad_norm": 0.4577412009239197, - "learning_rate": 4.0514814814814815e-05, - "loss": 0.4391, - "step": 8123 - }, - { - "epoch": 0.2851576896751435, - "grad_norm": 0.5002464056015015, - "learning_rate": 4.0512962962962965e-05, - "loss": 0.5298, - "step": 8124 - }, - { - "epoch": 0.28519279032626055, - "grad_norm": 0.4798411726951599, - "learning_rate": 4.051111111111111e-05, - "loss": 0.4555, - "step": 8125 - }, - { - "epoch": 0.2852278909773776, - "grad_norm": 0.5236383676528931, - "learning_rate": 4.0509259259259265e-05, - "loss": 0.5223, - "step": 8126 - }, - { - "epoch": 0.2852629916284947, - "grad_norm": 0.5016698837280273, - "learning_rate": 4.050740740740741e-05, - "loss": 0.5019, - "step": 8127 - }, - { - "epoch": 0.2852980922796118, - "grad_norm": 0.5081701874732971, - "learning_rate": 4.050555555555556e-05, - "loss": 0.5107, - "step": 8128 - }, - { - "epoch": 0.28533319293072884, - "grad_norm": 0.5519953966140747, - "learning_rate": 4.05037037037037e-05, - "loss": 0.5183, - "step": 8129 - }, - { - "epoch": 0.28536829358184596, - "grad_norm": 0.46333232522010803, - "learning_rate": 4.050185185185185e-05, - "loss": 0.4479, - "step": 8130 - }, - { - "epoch": 0.285403394232963, - "grad_norm": 0.584630012512207, - "learning_rate": 4.05e-05, - "loss": 0.5519, - "step": 8131 - }, - { - "epoch": 0.2854384948840801, - "grad_norm": 0.4944828748703003, - "learning_rate": 4.049814814814815e-05, - "loss": 0.5012, - "step": 8132 - }, - { - "epoch": 0.2854735955351972, - "grad_norm": 0.4979732036590576, - "learning_rate": 4.04962962962963e-05, - "loss": 0.5351, - "step": 8133 - }, - { - "epoch": 0.28550869618631425, - "grad_norm": 0.4783311188220978, - "learning_rate": 4.0494444444444445e-05, - "loss": 0.4916, - "step": 8134 - }, - { - "epoch": 0.2855437968374313, - "grad_norm": 0.5095236897468567, - "learning_rate": 4.0492592592592596e-05, - "loss": 0.4423, - "step": 8135 - }, - { - "epoch": 0.2855788974885484, - "grad_norm": 0.45643332600593567, - "learning_rate": 4.049074074074074e-05, - "loss": 0.5416, - "step": 8136 - }, - { - "epoch": 0.2856139981396655, - "grad_norm": 0.40063369274139404, - "learning_rate": 4.0488888888888896e-05, - "loss": 0.4092, - "step": 8137 - }, - { - "epoch": 0.2856490987907826, - "grad_norm": 0.485043466091156, - "learning_rate": 4.048703703703704e-05, - "loss": 0.5461, - "step": 8138 - }, - { - "epoch": 0.28568419944189966, - "grad_norm": 0.4991069734096527, - "learning_rate": 4.048518518518519e-05, - "loss": 0.612, - "step": 8139 - }, - { - "epoch": 0.2857193000930167, - "grad_norm": 0.4628628194332123, - "learning_rate": 4.048333333333333e-05, - "loss": 0.494, - "step": 8140 - }, - { - "epoch": 0.28575440074413383, - "grad_norm": 0.3982737064361572, - "learning_rate": 4.048148148148148e-05, - "loss": 0.413, - "step": 8141 - }, - { - "epoch": 0.2857895013952509, - "grad_norm": 0.393583208322525, - "learning_rate": 4.047962962962963e-05, - "loss": 0.3997, - "step": 8142 - }, - { - "epoch": 0.28582460204636795, - "grad_norm": 0.525490939617157, - "learning_rate": 4.047777777777778e-05, - "loss": 0.4752, - "step": 8143 - }, - { - "epoch": 0.28585970269748506, - "grad_norm": 0.4103042781352997, - "learning_rate": 4.0475925925925926e-05, - "loss": 0.4319, - "step": 8144 - }, - { - "epoch": 0.2858948033486021, - "grad_norm": 0.44447991251945496, - "learning_rate": 4.0474074074074076e-05, - "loss": 0.441, - "step": 8145 - }, - { - "epoch": 0.2859299039997192, - "grad_norm": 0.4380057454109192, - "learning_rate": 4.047222222222222e-05, - "loss": 0.5675, - "step": 8146 - }, - { - "epoch": 0.2859650046508363, - "grad_norm": 0.38948607444763184, - "learning_rate": 4.047037037037037e-05, - "loss": 0.4675, - "step": 8147 - }, - { - "epoch": 0.28600010530195336, - "grad_norm": 0.5261877179145813, - "learning_rate": 4.046851851851852e-05, - "loss": 0.5334, - "step": 8148 - }, - { - "epoch": 0.2860352059530704, - "grad_norm": 0.47408589720726013, - "learning_rate": 4.046666666666667e-05, - "loss": 0.5519, - "step": 8149 - }, - { - "epoch": 0.28607030660418753, - "grad_norm": 0.4929717183113098, - "learning_rate": 4.046481481481481e-05, - "loss": 0.5825, - "step": 8150 - }, - { - "epoch": 0.2861054072553046, - "grad_norm": 0.519781768321991, - "learning_rate": 4.0462962962962963e-05, - "loss": 0.5204, - "step": 8151 - }, - { - "epoch": 0.28614050790642165, - "grad_norm": 0.4942604899406433, - "learning_rate": 4.0461111111111114e-05, - "loss": 0.5644, - "step": 8152 - }, - { - "epoch": 0.28617560855753876, - "grad_norm": 0.6012133955955505, - "learning_rate": 4.0459259259259264e-05, - "loss": 0.5526, - "step": 8153 - }, - { - "epoch": 0.2862107092086558, - "grad_norm": 0.413562148809433, - "learning_rate": 4.0457407407407414e-05, - "loss": 0.4827, - "step": 8154 - }, - { - "epoch": 0.2862458098597729, - "grad_norm": 0.5048647522926331, - "learning_rate": 4.045555555555556e-05, - "loss": 0.4497, - "step": 8155 - }, - { - "epoch": 0.28628091051089, - "grad_norm": 0.5098483562469482, - "learning_rate": 4.045370370370371e-05, - "loss": 0.4243, - "step": 8156 - }, - { - "epoch": 0.28631601116200706, - "grad_norm": 0.5430288910865784, - "learning_rate": 4.045185185185185e-05, - "loss": 0.5886, - "step": 8157 - }, - { - "epoch": 0.2863511118131241, - "grad_norm": 0.5362138152122498, - "learning_rate": 4.045000000000001e-05, - "loss": 0.528, - "step": 8158 - }, - { - "epoch": 0.28638621246424123, - "grad_norm": 0.4301993250846863, - "learning_rate": 4.044814814814815e-05, - "loss": 0.3719, - "step": 8159 - }, - { - "epoch": 0.2864213131153583, - "grad_norm": 0.46853408217430115, - "learning_rate": 4.04462962962963e-05, - "loss": 0.5319, - "step": 8160 - }, - { - "epoch": 0.28645641376647535, - "grad_norm": 0.435899943113327, - "learning_rate": 4.0444444444444444e-05, - "loss": 0.4355, - "step": 8161 - }, - { - "epoch": 0.28649151441759246, - "grad_norm": 0.47153639793395996, - "learning_rate": 4.0442592592592594e-05, - "loss": 0.5392, - "step": 8162 - }, - { - "epoch": 0.2865266150687095, - "grad_norm": 0.5276529788970947, - "learning_rate": 4.044074074074074e-05, - "loss": 0.4339, - "step": 8163 - }, - { - "epoch": 0.2865617157198266, - "grad_norm": 0.665938675403595, - "learning_rate": 4.0438888888888895e-05, - "loss": 0.5431, - "step": 8164 - }, - { - "epoch": 0.2865968163709437, - "grad_norm": 0.5313217043876648, - "learning_rate": 4.043703703703704e-05, - "loss": 0.4714, - "step": 8165 - }, - { - "epoch": 0.28663191702206076, - "grad_norm": 0.48257043957710266, - "learning_rate": 4.043518518518519e-05, - "loss": 0.5076, - "step": 8166 - }, - { - "epoch": 0.2866670176731778, - "grad_norm": 0.5141524076461792, - "learning_rate": 4.043333333333333e-05, - "loss": 0.5342, - "step": 8167 - }, - { - "epoch": 0.28670211832429493, - "grad_norm": 0.5105558633804321, - "learning_rate": 4.043148148148148e-05, - "loss": 0.5246, - "step": 8168 - }, - { - "epoch": 0.286737218975412, - "grad_norm": 0.45840251445770264, - "learning_rate": 4.042962962962963e-05, - "loss": 0.5323, - "step": 8169 - }, - { - "epoch": 0.28677231962652905, - "grad_norm": 0.4983043670654297, - "learning_rate": 4.042777777777778e-05, - "loss": 0.4977, - "step": 8170 - }, - { - "epoch": 0.28680742027764616, - "grad_norm": 0.45156893134117126, - "learning_rate": 4.0425925925925925e-05, - "loss": 0.5274, - "step": 8171 - }, - { - "epoch": 0.2868425209287632, - "grad_norm": 0.45778200030326843, - "learning_rate": 4.0424074074074075e-05, - "loss": 0.5141, - "step": 8172 - }, - { - "epoch": 0.2868776215798803, - "grad_norm": 0.39041879773139954, - "learning_rate": 4.0422222222222225e-05, - "loss": 0.4847, - "step": 8173 - }, - { - "epoch": 0.2869127222309974, - "grad_norm": 0.4571963846683502, - "learning_rate": 4.042037037037037e-05, - "loss": 0.445, - "step": 8174 - }, - { - "epoch": 0.28694782288211446, - "grad_norm": 0.39101749658584595, - "learning_rate": 4.0418518518518525e-05, - "loss": 0.4433, - "step": 8175 - }, - { - "epoch": 0.2869829235332315, - "grad_norm": 0.5091982483863831, - "learning_rate": 4.041666666666667e-05, - "loss": 0.5468, - "step": 8176 - }, - { - "epoch": 0.28701802418434863, - "grad_norm": 0.4883688688278198, - "learning_rate": 4.041481481481482e-05, - "loss": 0.5317, - "step": 8177 - }, - { - "epoch": 0.2870531248354657, - "grad_norm": 0.48284032940864563, - "learning_rate": 4.041296296296296e-05, - "loss": 0.5346, - "step": 8178 - }, - { - "epoch": 0.28708822548658275, - "grad_norm": 0.4809252619743347, - "learning_rate": 4.041111111111111e-05, - "loss": 0.5564, - "step": 8179 - }, - { - "epoch": 0.28712332613769986, - "grad_norm": 0.4898936450481415, - "learning_rate": 4.040925925925926e-05, - "loss": 0.5571, - "step": 8180 - }, - { - "epoch": 0.2871584267888169, - "grad_norm": 0.406755268573761, - "learning_rate": 4.040740740740741e-05, - "loss": 0.5034, - "step": 8181 - }, - { - "epoch": 0.28719352743993404, - "grad_norm": 0.4602193832397461, - "learning_rate": 4.0405555555555556e-05, - "loss": 0.498, - "step": 8182 - }, - { - "epoch": 0.2872286280910511, - "grad_norm": 0.4930579960346222, - "learning_rate": 4.0403703703703706e-05, - "loss": 0.4737, - "step": 8183 - }, - { - "epoch": 0.28726372874216816, - "grad_norm": 0.42065542936325073, - "learning_rate": 4.040185185185185e-05, - "loss": 0.35, - "step": 8184 - }, - { - "epoch": 0.28729882939328527, - "grad_norm": 0.4506486654281616, - "learning_rate": 4.0400000000000006e-05, - "loss": 0.4739, - "step": 8185 - }, - { - "epoch": 0.28733393004440233, - "grad_norm": 0.46011191606521606, - "learning_rate": 4.039814814814815e-05, - "loss": 0.4606, - "step": 8186 - }, - { - "epoch": 0.2873690306955194, - "grad_norm": 0.42633652687072754, - "learning_rate": 4.03962962962963e-05, - "loss": 0.4324, - "step": 8187 - }, - { - "epoch": 0.2874041313466365, - "grad_norm": 0.438777357339859, - "learning_rate": 4.039444444444444e-05, - "loss": 0.5046, - "step": 8188 - }, - { - "epoch": 0.28743923199775356, - "grad_norm": 0.5430569648742676, - "learning_rate": 4.039259259259259e-05, - "loss": 0.5551, - "step": 8189 - }, - { - "epoch": 0.2874743326488706, - "grad_norm": 0.5307911038398743, - "learning_rate": 4.039074074074074e-05, - "loss": 0.5282, - "step": 8190 - }, - { - "epoch": 0.28750943329998774, - "grad_norm": 0.4823094606399536, - "learning_rate": 4.038888888888889e-05, - "loss": 0.5192, - "step": 8191 - }, - { - "epoch": 0.2875445339511048, - "grad_norm": 0.4672820270061493, - "learning_rate": 4.038703703703704e-05, - "loss": 0.4844, - "step": 8192 - }, - { - "epoch": 0.28757963460222186, - "grad_norm": 0.4899882674217224, - "learning_rate": 4.038518518518519e-05, - "loss": 0.4043, - "step": 8193 - }, - { - "epoch": 0.28761473525333897, - "grad_norm": 0.4138847887516022, - "learning_rate": 4.038333333333334e-05, - "loss": 0.3898, - "step": 8194 - }, - { - "epoch": 0.28764983590445603, - "grad_norm": 0.5617159008979797, - "learning_rate": 4.038148148148148e-05, - "loss": 0.5458, - "step": 8195 - }, - { - "epoch": 0.2876849365555731, - "grad_norm": 0.4798480272293091, - "learning_rate": 4.037962962962964e-05, - "loss": 0.5152, - "step": 8196 - }, - { - "epoch": 0.2877200372066902, - "grad_norm": 0.438936710357666, - "learning_rate": 4.037777777777778e-05, - "loss": 0.4957, - "step": 8197 - }, - { - "epoch": 0.28775513785780726, - "grad_norm": 0.35926133394241333, - "learning_rate": 4.037592592592593e-05, - "loss": 0.3754, - "step": 8198 - }, - { - "epoch": 0.2877902385089243, - "grad_norm": 0.47143498063087463, - "learning_rate": 4.0374074074074074e-05, - "loss": 0.4569, - "step": 8199 - }, - { - "epoch": 0.28782533916004144, - "grad_norm": 0.5453431606292725, - "learning_rate": 4.0372222222222224e-05, - "loss": 0.4195, - "step": 8200 - }, - { - "epoch": 0.2878604398111585, - "grad_norm": 0.5249070525169373, - "learning_rate": 4.0370370370370374e-05, - "loss": 0.4735, - "step": 8201 - }, - { - "epoch": 0.28789554046227556, - "grad_norm": 0.6424857378005981, - "learning_rate": 4.0368518518518524e-05, - "loss": 0.4651, - "step": 8202 - }, - { - "epoch": 0.28793064111339267, - "grad_norm": 0.4866859018802643, - "learning_rate": 4.036666666666667e-05, - "loss": 0.527, - "step": 8203 - }, - { - "epoch": 0.28796574176450973, - "grad_norm": 0.5302785634994507, - "learning_rate": 4.036481481481482e-05, - "loss": 0.4964, - "step": 8204 - }, - { - "epoch": 0.2880008424156268, - "grad_norm": 0.49172911047935486, - "learning_rate": 4.036296296296296e-05, - "loss": 0.4698, - "step": 8205 - }, - { - "epoch": 0.2880359430667439, - "grad_norm": 0.5235224366188049, - "learning_rate": 4.036111111111111e-05, - "loss": 0.4086, - "step": 8206 - }, - { - "epoch": 0.28807104371786096, - "grad_norm": 0.46156564354896545, - "learning_rate": 4.035925925925926e-05, - "loss": 0.4627, - "step": 8207 - }, - { - "epoch": 0.288106144368978, - "grad_norm": 0.4198870062828064, - "learning_rate": 4.035740740740741e-05, - "loss": 0.5038, - "step": 8208 - }, - { - "epoch": 0.28814124502009514, - "grad_norm": 0.5260627865791321, - "learning_rate": 4.0355555555555555e-05, - "loss": 0.5095, - "step": 8209 - }, - { - "epoch": 0.2881763456712122, - "grad_norm": 0.46729937195777893, - "learning_rate": 4.0353703703703705e-05, - "loss": 0.5616, - "step": 8210 - }, - { - "epoch": 0.28821144632232926, - "grad_norm": 0.461606502532959, - "learning_rate": 4.0351851851851855e-05, - "loss": 0.4104, - "step": 8211 - }, - { - "epoch": 0.28824654697344637, - "grad_norm": 0.49734342098236084, - "learning_rate": 4.0350000000000005e-05, - "loss": 0.5355, - "step": 8212 - }, - { - "epoch": 0.28828164762456343, - "grad_norm": 0.5569117665290833, - "learning_rate": 4.034814814814815e-05, - "loss": 0.5256, - "step": 8213 - }, - { - "epoch": 0.2883167482756805, - "grad_norm": 0.47766849398612976, - "learning_rate": 4.03462962962963e-05, - "loss": 0.5038, - "step": 8214 - }, - { - "epoch": 0.2883518489267976, - "grad_norm": 0.5460997819900513, - "learning_rate": 4.034444444444445e-05, - "loss": 0.4881, - "step": 8215 - }, - { - "epoch": 0.28838694957791466, - "grad_norm": 0.49688783288002014, - "learning_rate": 4.034259259259259e-05, - "loss": 0.5467, - "step": 8216 - }, - { - "epoch": 0.2884220502290317, - "grad_norm": 0.5633458495140076, - "learning_rate": 4.034074074074074e-05, - "loss": 0.5191, - "step": 8217 - }, - { - "epoch": 0.28845715088014884, - "grad_norm": 0.4483993649482727, - "learning_rate": 4.033888888888889e-05, - "loss": 0.4843, - "step": 8218 - }, - { - "epoch": 0.2884922515312659, - "grad_norm": 0.5097050070762634, - "learning_rate": 4.033703703703704e-05, - "loss": 0.4809, - "step": 8219 - }, - { - "epoch": 0.28852735218238296, - "grad_norm": 0.671750545501709, - "learning_rate": 4.0335185185185186e-05, - "loss": 0.4804, - "step": 8220 - }, - { - "epoch": 0.28856245283350007, - "grad_norm": 0.45227405428886414, - "learning_rate": 4.0333333333333336e-05, - "loss": 0.4157, - "step": 8221 - }, - { - "epoch": 0.28859755348461713, - "grad_norm": 0.4684091806411743, - "learning_rate": 4.033148148148148e-05, - "loss": 0.4996, - "step": 8222 - }, - { - "epoch": 0.2886326541357342, - "grad_norm": 0.5947206020355225, - "learning_rate": 4.0329629629629636e-05, - "loss": 0.5423, - "step": 8223 - }, - { - "epoch": 0.2886677547868513, - "grad_norm": 0.5050478577613831, - "learning_rate": 4.032777777777778e-05, - "loss": 0.4817, - "step": 8224 - }, - { - "epoch": 0.28870285543796836, - "grad_norm": 0.39983493089675903, - "learning_rate": 4.032592592592593e-05, - "loss": 0.4883, - "step": 8225 - }, - { - "epoch": 0.2887379560890855, - "grad_norm": 0.5140522122383118, - "learning_rate": 4.032407407407407e-05, - "loss": 0.5586, - "step": 8226 - }, - { - "epoch": 0.28877305674020254, - "grad_norm": 0.4915984570980072, - "learning_rate": 4.032222222222222e-05, - "loss": 0.5753, - "step": 8227 - }, - { - "epoch": 0.2888081573913196, - "grad_norm": 0.49818721413612366, - "learning_rate": 4.032037037037037e-05, - "loss": 0.4374, - "step": 8228 - }, - { - "epoch": 0.2888432580424367, - "grad_norm": 0.547380805015564, - "learning_rate": 4.031851851851852e-05, - "loss": 0.4802, - "step": 8229 - }, - { - "epoch": 0.28887835869355377, - "grad_norm": 0.4852157235145569, - "learning_rate": 4.0316666666666666e-05, - "loss": 0.4433, - "step": 8230 - }, - { - "epoch": 0.28891345934467083, - "grad_norm": 0.510252058506012, - "learning_rate": 4.0314814814814816e-05, - "loss": 0.5015, - "step": 8231 - }, - { - "epoch": 0.28894855999578795, - "grad_norm": 0.5820581316947937, - "learning_rate": 4.0312962962962967e-05, - "loss": 0.593, - "step": 8232 - }, - { - "epoch": 0.288983660646905, - "grad_norm": 0.571452260017395, - "learning_rate": 4.031111111111111e-05, - "loss": 0.5573, - "step": 8233 - }, - { - "epoch": 0.28901876129802206, - "grad_norm": 0.39781659841537476, - "learning_rate": 4.030925925925926e-05, - "loss": 0.3551, - "step": 8234 - }, - { - "epoch": 0.2890538619491392, - "grad_norm": 0.46615567803382874, - "learning_rate": 4.030740740740741e-05, - "loss": 0.4353, - "step": 8235 - }, - { - "epoch": 0.28908896260025624, - "grad_norm": 0.43723493814468384, - "learning_rate": 4.030555555555556e-05, - "loss": 0.444, - "step": 8236 - }, - { - "epoch": 0.2891240632513733, - "grad_norm": 0.3920901119709015, - "learning_rate": 4.0303703703703704e-05, - "loss": 0.3726, - "step": 8237 - }, - { - "epoch": 0.2891591639024904, - "grad_norm": 0.5791125893592834, - "learning_rate": 4.0301851851851854e-05, - "loss": 0.4668, - "step": 8238 - }, - { - "epoch": 0.28919426455360747, - "grad_norm": 0.5602439045906067, - "learning_rate": 4.0300000000000004e-05, - "loss": 0.5421, - "step": 8239 - }, - { - "epoch": 0.28922936520472453, - "grad_norm": 0.44971105456352234, - "learning_rate": 4.0298148148148154e-05, - "loss": 0.463, - "step": 8240 - }, - { - "epoch": 0.28926446585584165, - "grad_norm": 0.5058718323707581, - "learning_rate": 4.02962962962963e-05, - "loss": 0.4947, - "step": 8241 - }, - { - "epoch": 0.2892995665069587, - "grad_norm": 0.4540637731552124, - "learning_rate": 4.029444444444445e-05, - "loss": 0.5256, - "step": 8242 - }, - { - "epoch": 0.28933466715807576, - "grad_norm": 0.5239630937576294, - "learning_rate": 4.029259259259259e-05, - "loss": 0.5119, - "step": 8243 - }, - { - "epoch": 0.2893697678091929, - "grad_norm": 0.4087996482849121, - "learning_rate": 4.029074074074074e-05, - "loss": 0.4843, - "step": 8244 - }, - { - "epoch": 0.28940486846030994, - "grad_norm": 0.41863593459129333, - "learning_rate": 4.028888888888889e-05, - "loss": 0.3999, - "step": 8245 - }, - { - "epoch": 0.289439969111427, - "grad_norm": 0.4848571717739105, - "learning_rate": 4.028703703703704e-05, - "loss": 0.5904, - "step": 8246 - }, - { - "epoch": 0.2894750697625441, - "grad_norm": 0.42392534017562866, - "learning_rate": 4.0285185185185184e-05, - "loss": 0.3894, - "step": 8247 - }, - { - "epoch": 0.28951017041366117, - "grad_norm": 0.5444669127464294, - "learning_rate": 4.0283333333333334e-05, - "loss": 0.535, - "step": 8248 - }, - { - "epoch": 0.28954527106477823, - "grad_norm": 0.5028539299964905, - "learning_rate": 4.028148148148148e-05, - "loss": 0.4062, - "step": 8249 - }, - { - "epoch": 0.28958037171589535, - "grad_norm": 0.5731725692749023, - "learning_rate": 4.0279629629629635e-05, - "loss": 0.492, - "step": 8250 - }, - { - "epoch": 0.2896154723670124, - "grad_norm": 0.4938134253025055, - "learning_rate": 4.027777777777778e-05, - "loss": 0.5793, - "step": 8251 - }, - { - "epoch": 0.28965057301812946, - "grad_norm": 0.36381658911705017, - "learning_rate": 4.027592592592593e-05, - "loss": 0.3818, - "step": 8252 - }, - { - "epoch": 0.2896856736692466, - "grad_norm": 0.5522885918617249, - "learning_rate": 4.027407407407408e-05, - "loss": 0.4063, - "step": 8253 - }, - { - "epoch": 0.28972077432036364, - "grad_norm": 0.40425676107406616, - "learning_rate": 4.027222222222222e-05, - "loss": 0.4824, - "step": 8254 - }, - { - "epoch": 0.2897558749714807, - "grad_norm": 0.373562753200531, - "learning_rate": 4.027037037037037e-05, - "loss": 0.4423, - "step": 8255 - }, - { - "epoch": 0.2897909756225978, - "grad_norm": 0.5248168110847473, - "learning_rate": 4.026851851851852e-05, - "loss": 0.5291, - "step": 8256 - }, - { - "epoch": 0.28982607627371487, - "grad_norm": 0.43395283818244934, - "learning_rate": 4.026666666666667e-05, - "loss": 0.4495, - "step": 8257 - }, - { - "epoch": 0.28986117692483193, - "grad_norm": 0.5201600790023804, - "learning_rate": 4.0264814814814815e-05, - "loss": 0.5868, - "step": 8258 - }, - { - "epoch": 0.28989627757594905, - "grad_norm": 0.5023894309997559, - "learning_rate": 4.0262962962962965e-05, - "loss": 0.4792, - "step": 8259 - }, - { - "epoch": 0.2899313782270661, - "grad_norm": 0.44724002480506897, - "learning_rate": 4.026111111111111e-05, - "loss": 0.5577, - "step": 8260 - }, - { - "epoch": 0.28996647887818316, - "grad_norm": 0.4625886380672455, - "learning_rate": 4.0259259259259266e-05, - "loss": 0.3452, - "step": 8261 - }, - { - "epoch": 0.2900015795293003, - "grad_norm": 0.49551331996917725, - "learning_rate": 4.025740740740741e-05, - "loss": 0.4578, - "step": 8262 - }, - { - "epoch": 0.29003668018041734, - "grad_norm": 0.5226545333862305, - "learning_rate": 4.025555555555556e-05, - "loss": 0.4667, - "step": 8263 - }, - { - "epoch": 0.2900717808315344, - "grad_norm": 0.5068575739860535, - "learning_rate": 4.02537037037037e-05, - "loss": 0.5177, - "step": 8264 - }, - { - "epoch": 0.2901068814826515, - "grad_norm": 0.5347346067428589, - "learning_rate": 4.025185185185185e-05, - "loss": 0.5434, - "step": 8265 - }, - { - "epoch": 0.2901419821337686, - "grad_norm": 0.6074475646018982, - "learning_rate": 4.025e-05, - "loss": 0.4382, - "step": 8266 - }, - { - "epoch": 0.2901770827848857, - "grad_norm": 0.46770232915878296, - "learning_rate": 4.024814814814815e-05, - "loss": 0.4177, - "step": 8267 - }, - { - "epoch": 0.29021218343600275, - "grad_norm": 0.475931853055954, - "learning_rate": 4.0246296296296296e-05, - "loss": 0.516, - "step": 8268 - }, - { - "epoch": 0.2902472840871198, - "grad_norm": 0.5540890693664551, - "learning_rate": 4.0244444444444446e-05, - "loss": 0.6085, - "step": 8269 - }, - { - "epoch": 0.2902823847382369, - "grad_norm": 0.40686821937561035, - "learning_rate": 4.024259259259259e-05, - "loss": 0.4561, - "step": 8270 - }, - { - "epoch": 0.290317485389354, - "grad_norm": 0.5698222517967224, - "learning_rate": 4.024074074074074e-05, - "loss": 0.5076, - "step": 8271 - }, - { - "epoch": 0.29035258604047104, - "grad_norm": 0.43011149764060974, - "learning_rate": 4.023888888888889e-05, - "loss": 0.3911, - "step": 8272 - }, - { - "epoch": 0.29038768669158815, - "grad_norm": 0.4394207000732422, - "learning_rate": 4.023703703703704e-05, - "loss": 0.491, - "step": 8273 - }, - { - "epoch": 0.2904227873427052, - "grad_norm": 0.4442225694656372, - "learning_rate": 4.023518518518519e-05, - "loss": 0.4768, - "step": 8274 - }, - { - "epoch": 0.2904578879938223, - "grad_norm": 0.45522916316986084, - "learning_rate": 4.023333333333333e-05, - "loss": 0.6125, - "step": 8275 - }, - { - "epoch": 0.2904929886449394, - "grad_norm": 0.47991305589675903, - "learning_rate": 4.023148148148148e-05, - "loss": 0.5017, - "step": 8276 - }, - { - "epoch": 0.29052808929605645, - "grad_norm": 0.5009598731994629, - "learning_rate": 4.0229629629629633e-05, - "loss": 0.5749, - "step": 8277 - }, - { - "epoch": 0.2905631899471735, - "grad_norm": 0.5342342853546143, - "learning_rate": 4.0227777777777784e-05, - "loss": 0.6254, - "step": 8278 - }, - { - "epoch": 0.2905982905982906, - "grad_norm": 0.5017557740211487, - "learning_rate": 4.022592592592593e-05, - "loss": 0.5485, - "step": 8279 - }, - { - "epoch": 0.2906333912494077, - "grad_norm": 0.6068403720855713, - "learning_rate": 4.022407407407408e-05, - "loss": 0.5496, - "step": 8280 - }, - { - "epoch": 0.29066849190052474, - "grad_norm": 0.5969502925872803, - "learning_rate": 4.022222222222222e-05, - "loss": 0.5321, - "step": 8281 - }, - { - "epoch": 0.29070359255164185, - "grad_norm": 0.4781227111816406, - "learning_rate": 4.022037037037038e-05, - "loss": 0.4721, - "step": 8282 - }, - { - "epoch": 0.2907386932027589, - "grad_norm": 0.5797592997550964, - "learning_rate": 4.021851851851852e-05, - "loss": 0.4819, - "step": 8283 - }, - { - "epoch": 0.290773793853876, - "grad_norm": 0.4987160861492157, - "learning_rate": 4.021666666666667e-05, - "loss": 0.4859, - "step": 8284 - }, - { - "epoch": 0.2908088945049931, - "grad_norm": 0.43952804803848267, - "learning_rate": 4.0214814814814814e-05, - "loss": 0.4561, - "step": 8285 - }, - { - "epoch": 0.29084399515611015, - "grad_norm": 0.5113147497177124, - "learning_rate": 4.0212962962962964e-05, - "loss": 0.4161, - "step": 8286 - }, - { - "epoch": 0.2908790958072272, - "grad_norm": 0.45117974281311035, - "learning_rate": 4.021111111111111e-05, - "loss": 0.4182, - "step": 8287 - }, - { - "epoch": 0.2909141964583443, - "grad_norm": 0.4609515964984894, - "learning_rate": 4.0209259259259264e-05, - "loss": 0.4401, - "step": 8288 - }, - { - "epoch": 0.2909492971094614, - "grad_norm": 0.5255576372146606, - "learning_rate": 4.020740740740741e-05, - "loss": 0.5549, - "step": 8289 - }, - { - "epoch": 0.29098439776057844, - "grad_norm": 0.5325406789779663, - "learning_rate": 4.020555555555556e-05, - "loss": 0.549, - "step": 8290 - }, - { - "epoch": 0.29101949841169555, - "grad_norm": 0.5108119249343872, - "learning_rate": 4.020370370370371e-05, - "loss": 0.5335, - "step": 8291 - }, - { - "epoch": 0.2910545990628126, - "grad_norm": 0.6698627471923828, - "learning_rate": 4.020185185185185e-05, - "loss": 0.5682, - "step": 8292 - }, - { - "epoch": 0.2910896997139297, - "grad_norm": 0.4897412657737732, - "learning_rate": 4.02e-05, - "loss": 0.4944, - "step": 8293 - }, - { - "epoch": 0.2911248003650468, - "grad_norm": 0.519709050655365, - "learning_rate": 4.019814814814815e-05, - "loss": 0.5208, - "step": 8294 - }, - { - "epoch": 0.29115990101616385, - "grad_norm": 0.4544903635978699, - "learning_rate": 4.01962962962963e-05, - "loss": 0.3893, - "step": 8295 - }, - { - "epoch": 0.2911950016672809, - "grad_norm": 0.4341937303543091, - "learning_rate": 4.0194444444444445e-05, - "loss": 0.5835, - "step": 8296 - }, - { - "epoch": 0.291230102318398, - "grad_norm": 0.497541218996048, - "learning_rate": 4.0192592592592595e-05, - "loss": 0.455, - "step": 8297 - }, - { - "epoch": 0.2912652029695151, - "grad_norm": 0.5894740223884583, - "learning_rate": 4.019074074074074e-05, - "loss": 0.4665, - "step": 8298 - }, - { - "epoch": 0.29130030362063214, - "grad_norm": 0.5576427578926086, - "learning_rate": 4.0188888888888895e-05, - "loss": 0.4661, - "step": 8299 - }, - { - "epoch": 0.29133540427174925, - "grad_norm": 0.5810111165046692, - "learning_rate": 4.018703703703704e-05, - "loss": 0.5548, - "step": 8300 - }, - { - "epoch": 0.2913705049228663, - "grad_norm": 0.4866715967655182, - "learning_rate": 4.018518518518519e-05, - "loss": 0.3905, - "step": 8301 - }, - { - "epoch": 0.2914056055739834, - "grad_norm": 0.4030437767505646, - "learning_rate": 4.018333333333333e-05, - "loss": 0.5455, - "step": 8302 - }, - { - "epoch": 0.2914407062251005, - "grad_norm": 0.4321271777153015, - "learning_rate": 4.018148148148148e-05, - "loss": 0.4402, - "step": 8303 - }, - { - "epoch": 0.29147580687621755, - "grad_norm": 0.4838320016860962, - "learning_rate": 4.017962962962963e-05, - "loss": 0.5466, - "step": 8304 - }, - { - "epoch": 0.2915109075273346, - "grad_norm": 0.42875492572784424, - "learning_rate": 4.017777777777778e-05, - "loss": 0.5408, - "step": 8305 - }, - { - "epoch": 0.2915460081784517, - "grad_norm": 0.4729139506816864, - "learning_rate": 4.0175925925925926e-05, - "loss": 0.3928, - "step": 8306 - }, - { - "epoch": 0.2915811088295688, - "grad_norm": 0.4493867754936218, - "learning_rate": 4.0174074074074076e-05, - "loss": 0.4461, - "step": 8307 - }, - { - "epoch": 0.29161620948068584, - "grad_norm": 0.4088646173477173, - "learning_rate": 4.017222222222222e-05, - "loss": 0.4213, - "step": 8308 - }, - { - "epoch": 0.29165131013180295, - "grad_norm": 0.5107525587081909, - "learning_rate": 4.0170370370370376e-05, - "loss": 0.5453, - "step": 8309 - }, - { - "epoch": 0.29168641078292, - "grad_norm": 0.5025156140327454, - "learning_rate": 4.016851851851852e-05, - "loss": 0.52, - "step": 8310 - }, - { - "epoch": 0.29172151143403713, - "grad_norm": 0.47868984937667847, - "learning_rate": 4.016666666666667e-05, - "loss": 0.5733, - "step": 8311 - }, - { - "epoch": 0.2917566120851542, - "grad_norm": 0.42428916692733765, - "learning_rate": 4.016481481481482e-05, - "loss": 0.483, - "step": 8312 - }, - { - "epoch": 0.29179171273627125, - "grad_norm": 0.48054999113082886, - "learning_rate": 4.016296296296296e-05, - "loss": 0.4779, - "step": 8313 - }, - { - "epoch": 0.29182681338738836, - "grad_norm": 0.43156901001930237, - "learning_rate": 4.016111111111111e-05, - "loss": 0.5685, - "step": 8314 - }, - { - "epoch": 0.2918619140385054, - "grad_norm": 0.43460291624069214, - "learning_rate": 4.015925925925926e-05, - "loss": 0.5759, - "step": 8315 - }, - { - "epoch": 0.2918970146896225, - "grad_norm": 0.4912845194339752, - "learning_rate": 4.015740740740741e-05, - "loss": 0.4779, - "step": 8316 - }, - { - "epoch": 0.2919321153407396, - "grad_norm": 0.506504476070404, - "learning_rate": 4.0155555555555557e-05, - "loss": 0.4611, - "step": 8317 - }, - { - "epoch": 0.29196721599185665, - "grad_norm": 0.45487430691719055, - "learning_rate": 4.015370370370371e-05, - "loss": 0.4802, - "step": 8318 - }, - { - "epoch": 0.2920023166429737, - "grad_norm": 0.49680662155151367, - "learning_rate": 4.015185185185185e-05, - "loss": 0.6102, - "step": 8319 - }, - { - "epoch": 0.29203741729409083, - "grad_norm": 0.46258485317230225, - "learning_rate": 4.015000000000001e-05, - "loss": 0.4767, - "step": 8320 - }, - { - "epoch": 0.2920725179452079, - "grad_norm": 0.40085533261299133, - "learning_rate": 4.014814814814815e-05, - "loss": 0.366, - "step": 8321 - }, - { - "epoch": 0.29210761859632495, - "grad_norm": 0.4332641065120697, - "learning_rate": 4.01462962962963e-05, - "loss": 0.3685, - "step": 8322 - }, - { - "epoch": 0.29214271924744206, - "grad_norm": 0.48378732800483704, - "learning_rate": 4.0144444444444444e-05, - "loss": 0.6409, - "step": 8323 - }, - { - "epoch": 0.2921778198985591, - "grad_norm": 0.44449982047080994, - "learning_rate": 4.0142592592592594e-05, - "loss": 0.3988, - "step": 8324 - }, - { - "epoch": 0.2922129205496762, - "grad_norm": 0.420091450214386, - "learning_rate": 4.0140740740740744e-05, - "loss": 0.4852, - "step": 8325 - }, - { - "epoch": 0.2922480212007933, - "grad_norm": 0.47828248143196106, - "learning_rate": 4.0138888888888894e-05, - "loss": 0.4924, - "step": 8326 - }, - { - "epoch": 0.29228312185191035, - "grad_norm": 0.45401591062545776, - "learning_rate": 4.013703703703704e-05, - "loss": 0.4193, - "step": 8327 - }, - { - "epoch": 0.2923182225030274, - "grad_norm": 0.5447410345077515, - "learning_rate": 4.013518518518519e-05, - "loss": 0.4623, - "step": 8328 - }, - { - "epoch": 0.29235332315414453, - "grad_norm": 0.5555384755134583, - "learning_rate": 4.013333333333333e-05, - "loss": 0.6259, - "step": 8329 - }, - { - "epoch": 0.2923884238052616, - "grad_norm": 0.4884251356124878, - "learning_rate": 4.013148148148148e-05, - "loss": 0.5464, - "step": 8330 - }, - { - "epoch": 0.29242352445637865, - "grad_norm": 0.5038489103317261, - "learning_rate": 4.012962962962963e-05, - "loss": 0.4467, - "step": 8331 - }, - { - "epoch": 0.29245862510749576, - "grad_norm": 0.46819785237312317, - "learning_rate": 4.012777777777778e-05, - "loss": 0.4953, - "step": 8332 - }, - { - "epoch": 0.2924937257586128, - "grad_norm": 0.4568704068660736, - "learning_rate": 4.012592592592593e-05, - "loss": 0.4922, - "step": 8333 - }, - { - "epoch": 0.2925288264097299, - "grad_norm": 0.4828489124774933, - "learning_rate": 4.0124074074074075e-05, - "loss": 0.4723, - "step": 8334 - }, - { - "epoch": 0.292563927060847, - "grad_norm": 0.3853275775909424, - "learning_rate": 4.0122222222222225e-05, - "loss": 0.4115, - "step": 8335 - }, - { - "epoch": 0.29259902771196405, - "grad_norm": 0.5145053267478943, - "learning_rate": 4.0120370370370375e-05, - "loss": 0.5802, - "step": 8336 - }, - { - "epoch": 0.2926341283630811, - "grad_norm": 0.45453986525535583, - "learning_rate": 4.0118518518518525e-05, - "loss": 0.4108, - "step": 8337 - }, - { - "epoch": 0.29266922901419823, - "grad_norm": 0.48861443996429443, - "learning_rate": 4.011666666666667e-05, - "loss": 0.4714, - "step": 8338 - }, - { - "epoch": 0.2927043296653153, - "grad_norm": 0.506854772567749, - "learning_rate": 4.011481481481482e-05, - "loss": 0.5479, - "step": 8339 - }, - { - "epoch": 0.29273943031643235, - "grad_norm": 0.5700206160545349, - "learning_rate": 4.011296296296296e-05, - "loss": 0.4492, - "step": 8340 - }, - { - "epoch": 0.29277453096754946, - "grad_norm": 0.46890556812286377, - "learning_rate": 4.011111111111111e-05, - "loss": 0.5772, - "step": 8341 - }, - { - "epoch": 0.2928096316186665, - "grad_norm": 0.5579214096069336, - "learning_rate": 4.010925925925926e-05, - "loss": 0.5539, - "step": 8342 - }, - { - "epoch": 0.2928447322697836, - "grad_norm": 0.4459303617477417, - "learning_rate": 4.010740740740741e-05, - "loss": 0.3736, - "step": 8343 - }, - { - "epoch": 0.2928798329209007, - "grad_norm": 0.512940526008606, - "learning_rate": 4.0105555555555555e-05, - "loss": 0.4714, - "step": 8344 - }, - { - "epoch": 0.29291493357201775, - "grad_norm": 0.48265907168388367, - "learning_rate": 4.0103703703703705e-05, - "loss": 0.5214, - "step": 8345 - }, - { - "epoch": 0.2929500342231348, - "grad_norm": 0.45596253871917725, - "learning_rate": 4.010185185185185e-05, - "loss": 0.5458, - "step": 8346 - }, - { - "epoch": 0.29298513487425193, - "grad_norm": 0.4959734082221985, - "learning_rate": 4.0100000000000006e-05, - "loss": 0.5166, - "step": 8347 - }, - { - "epoch": 0.293020235525369, - "grad_norm": 0.46400168538093567, - "learning_rate": 4.009814814814815e-05, - "loss": 0.4485, - "step": 8348 - }, - { - "epoch": 0.29305533617648605, - "grad_norm": 0.4830968976020813, - "learning_rate": 4.00962962962963e-05, - "loss": 0.6219, - "step": 8349 - }, - { - "epoch": 0.29309043682760316, - "grad_norm": 0.5147757530212402, - "learning_rate": 4.009444444444444e-05, - "loss": 0.5625, - "step": 8350 - }, - { - "epoch": 0.2931255374787202, - "grad_norm": 0.4779815375804901, - "learning_rate": 4.009259259259259e-05, - "loss": 0.5258, - "step": 8351 - }, - { - "epoch": 0.2931606381298373, - "grad_norm": 0.468417227268219, - "learning_rate": 4.009074074074074e-05, - "loss": 0.5462, - "step": 8352 - }, - { - "epoch": 0.2931957387809544, - "grad_norm": 0.5936179757118225, - "learning_rate": 4.008888888888889e-05, - "loss": 0.5187, - "step": 8353 - }, - { - "epoch": 0.29323083943207146, - "grad_norm": 0.4871712028980255, - "learning_rate": 4.008703703703704e-05, - "loss": 0.5036, - "step": 8354 - }, - { - "epoch": 0.29326594008318857, - "grad_norm": 0.5313497185707092, - "learning_rate": 4.0085185185185186e-05, - "loss": 0.497, - "step": 8355 - }, - { - "epoch": 0.29330104073430563, - "grad_norm": 0.700410783290863, - "learning_rate": 4.0083333333333336e-05, - "loss": 0.638, - "step": 8356 - }, - { - "epoch": 0.2933361413854227, - "grad_norm": 0.46175098419189453, - "learning_rate": 4.008148148148148e-05, - "loss": 0.4274, - "step": 8357 - }, - { - "epoch": 0.2933712420365398, - "grad_norm": 0.4563196003437042, - "learning_rate": 4.0079629629629637e-05, - "loss": 0.5216, - "step": 8358 - }, - { - "epoch": 0.29340634268765686, - "grad_norm": 0.4713110625743866, - "learning_rate": 4.007777777777778e-05, - "loss": 0.5499, - "step": 8359 - }, - { - "epoch": 0.2934414433387739, - "grad_norm": 0.449725478887558, - "learning_rate": 4.007592592592593e-05, - "loss": 0.486, - "step": 8360 - }, - { - "epoch": 0.29347654398989104, - "grad_norm": 0.4558824896812439, - "learning_rate": 4.007407407407407e-05, - "loss": 0.5969, - "step": 8361 - }, - { - "epoch": 0.2935116446410081, - "grad_norm": 0.41928768157958984, - "learning_rate": 4.0072222222222223e-05, - "loss": 0.4352, - "step": 8362 - }, - { - "epoch": 0.29354674529212516, - "grad_norm": 0.4582603871822357, - "learning_rate": 4.0070370370370374e-05, - "loss": 0.4379, - "step": 8363 - }, - { - "epoch": 0.29358184594324227, - "grad_norm": 0.41327935457229614, - "learning_rate": 4.0068518518518524e-05, - "loss": 0.3924, - "step": 8364 - }, - { - "epoch": 0.29361694659435933, - "grad_norm": 0.38856446743011475, - "learning_rate": 4.006666666666667e-05, - "loss": 0.438, - "step": 8365 - }, - { - "epoch": 0.2936520472454764, - "grad_norm": 0.4402570426464081, - "learning_rate": 4.006481481481482e-05, - "loss": 0.5335, - "step": 8366 - }, - { - "epoch": 0.2936871478965935, - "grad_norm": 0.43702924251556396, - "learning_rate": 4.006296296296296e-05, - "loss": 0.5572, - "step": 8367 - }, - { - "epoch": 0.29372224854771056, - "grad_norm": 0.4991077780723572, - "learning_rate": 4.006111111111111e-05, - "loss": 0.3167, - "step": 8368 - }, - { - "epoch": 0.2937573491988276, - "grad_norm": 0.7812251448631287, - "learning_rate": 4.005925925925926e-05, - "loss": 0.5089, - "step": 8369 - }, - { - "epoch": 0.29379244984994474, - "grad_norm": 0.5238428115844727, - "learning_rate": 4.005740740740741e-05, - "loss": 0.5324, - "step": 8370 - }, - { - "epoch": 0.2938275505010618, - "grad_norm": 0.4551834166049957, - "learning_rate": 4.0055555555555554e-05, - "loss": 0.453, - "step": 8371 - }, - { - "epoch": 0.29386265115217886, - "grad_norm": 0.46025606989860535, - "learning_rate": 4.0053703703703704e-05, - "loss": 0.4192, - "step": 8372 - }, - { - "epoch": 0.29389775180329597, - "grad_norm": 0.45238110423088074, - "learning_rate": 4.0051851851851854e-05, - "loss": 0.5526, - "step": 8373 - }, - { - "epoch": 0.29393285245441303, - "grad_norm": 0.4611052870750427, - "learning_rate": 4.0050000000000004e-05, - "loss": 0.4731, - "step": 8374 - }, - { - "epoch": 0.2939679531055301, - "grad_norm": 0.4741457402706146, - "learning_rate": 4.0048148148148155e-05, - "loss": 0.4354, - "step": 8375 - }, - { - "epoch": 0.2940030537566472, - "grad_norm": 0.46236446499824524, - "learning_rate": 4.00462962962963e-05, - "loss": 0.4435, - "step": 8376 - }, - { - "epoch": 0.29403815440776426, - "grad_norm": 0.4376284182071686, - "learning_rate": 4.004444444444445e-05, - "loss": 0.5165, - "step": 8377 - }, - { - "epoch": 0.2940732550588813, - "grad_norm": 0.54815673828125, - "learning_rate": 4.004259259259259e-05, - "loss": 0.351, - "step": 8378 - }, - { - "epoch": 0.29410835570999844, - "grad_norm": 0.4729309678077698, - "learning_rate": 4.004074074074075e-05, - "loss": 0.5716, - "step": 8379 - }, - { - "epoch": 0.2941434563611155, - "grad_norm": 0.5624547004699707, - "learning_rate": 4.003888888888889e-05, - "loss": 0.5886, - "step": 8380 - }, - { - "epoch": 0.29417855701223256, - "grad_norm": 0.485294371843338, - "learning_rate": 4.003703703703704e-05, - "loss": 0.5116, - "step": 8381 - }, - { - "epoch": 0.29421365766334967, - "grad_norm": 0.4979398548603058, - "learning_rate": 4.0035185185185185e-05, - "loss": 0.5394, - "step": 8382 - }, - { - "epoch": 0.29424875831446673, - "grad_norm": 0.4641183912754059, - "learning_rate": 4.0033333333333335e-05, - "loss": 0.5717, - "step": 8383 - }, - { - "epoch": 0.2942838589655838, - "grad_norm": 0.4712856113910675, - "learning_rate": 4.003148148148148e-05, - "loss": 0.4403, - "step": 8384 - }, - { - "epoch": 0.2943189596167009, - "grad_norm": 0.49917012453079224, - "learning_rate": 4.0029629629629635e-05, - "loss": 0.5417, - "step": 8385 - }, - { - "epoch": 0.29435406026781796, - "grad_norm": 0.5377385020256042, - "learning_rate": 4.002777777777778e-05, - "loss": 0.5572, - "step": 8386 - }, - { - "epoch": 0.294389160918935, - "grad_norm": 0.6291019916534424, - "learning_rate": 4.002592592592593e-05, - "loss": 0.5277, - "step": 8387 - }, - { - "epoch": 0.29442426157005214, - "grad_norm": 0.45928069949150085, - "learning_rate": 4.002407407407407e-05, - "loss": 0.4144, - "step": 8388 - }, - { - "epoch": 0.2944593622211692, - "grad_norm": 0.45751404762268066, - "learning_rate": 4.002222222222222e-05, - "loss": 0.5104, - "step": 8389 - }, - { - "epoch": 0.29449446287228626, - "grad_norm": 0.5101454854011536, - "learning_rate": 4.002037037037037e-05, - "loss": 0.5196, - "step": 8390 - }, - { - "epoch": 0.29452956352340337, - "grad_norm": 0.5287780165672302, - "learning_rate": 4.001851851851852e-05, - "loss": 0.5118, - "step": 8391 - }, - { - "epoch": 0.29456466417452043, - "grad_norm": 0.4622137248516083, - "learning_rate": 4.0016666666666666e-05, - "loss": 0.451, - "step": 8392 - }, - { - "epoch": 0.2945997648256375, - "grad_norm": 0.747372031211853, - "learning_rate": 4.0014814814814816e-05, - "loss": 0.4513, - "step": 8393 - }, - { - "epoch": 0.2946348654767546, - "grad_norm": 0.44405800104141235, - "learning_rate": 4.0012962962962966e-05, - "loss": 0.4974, - "step": 8394 - }, - { - "epoch": 0.29466996612787166, - "grad_norm": 0.47139251232147217, - "learning_rate": 4.001111111111111e-05, - "loss": 0.4308, - "step": 8395 - }, - { - "epoch": 0.2947050667789888, - "grad_norm": 0.46186935901641846, - "learning_rate": 4.0009259259259266e-05, - "loss": 0.6025, - "step": 8396 - }, - { - "epoch": 0.29474016743010584, - "grad_norm": 0.4200986325740814, - "learning_rate": 4.000740740740741e-05, - "loss": 0.4528, - "step": 8397 - }, - { - "epoch": 0.2947752680812229, - "grad_norm": 0.4244002401828766, - "learning_rate": 4.000555555555556e-05, - "loss": 0.4618, - "step": 8398 - }, - { - "epoch": 0.29481036873234, - "grad_norm": 0.4860490560531616, - "learning_rate": 4.00037037037037e-05, - "loss": 0.5084, - "step": 8399 - }, - { - "epoch": 0.29484546938345707, - "grad_norm": 0.4785040318965912, - "learning_rate": 4.000185185185185e-05, - "loss": 0.6012, - "step": 8400 - }, - { - "epoch": 0.29488057003457413, - "grad_norm": 0.46504461765289307, - "learning_rate": 4e-05, - "loss": 0.5426, - "step": 8401 - }, - { - "epoch": 0.29491567068569124, - "grad_norm": 0.8175114989280701, - "learning_rate": 3.999814814814815e-05, - "loss": 0.4565, - "step": 8402 - }, - { - "epoch": 0.2949507713368083, - "grad_norm": 0.45966243743896484, - "learning_rate": 3.99962962962963e-05, - "loss": 0.4808, - "step": 8403 - }, - { - "epoch": 0.29498587198792536, - "grad_norm": 0.4677942991256714, - "learning_rate": 3.999444444444445e-05, - "loss": 0.5306, - "step": 8404 - }, - { - "epoch": 0.2950209726390425, - "grad_norm": 0.45696309208869934, - "learning_rate": 3.999259259259259e-05, - "loss": 0.4788, - "step": 8405 - }, - { - "epoch": 0.29505607329015954, - "grad_norm": 0.46556928753852844, - "learning_rate": 3.999074074074075e-05, - "loss": 0.5244, - "step": 8406 - }, - { - "epoch": 0.2950911739412766, - "grad_norm": 0.6084827184677124, - "learning_rate": 3.998888888888889e-05, - "loss": 0.5648, - "step": 8407 - }, - { - "epoch": 0.2951262745923937, - "grad_norm": 0.4985499978065491, - "learning_rate": 3.998703703703704e-05, - "loss": 0.5888, - "step": 8408 - }, - { - "epoch": 0.29516137524351077, - "grad_norm": 0.43743401765823364, - "learning_rate": 3.9985185185185184e-05, - "loss": 0.5228, - "step": 8409 - }, - { - "epoch": 0.29519647589462783, - "grad_norm": 0.4061650335788727, - "learning_rate": 3.9983333333333334e-05, - "loss": 0.3944, - "step": 8410 - }, - { - "epoch": 0.29523157654574494, - "grad_norm": 0.4770485460758209, - "learning_rate": 3.9981481481481484e-05, - "loss": 0.535, - "step": 8411 - }, - { - "epoch": 0.295266677196862, - "grad_norm": 0.481139600276947, - "learning_rate": 3.9979629629629634e-05, - "loss": 0.5538, - "step": 8412 - }, - { - "epoch": 0.29530177784797906, - "grad_norm": 0.4266698956489563, - "learning_rate": 3.997777777777778e-05, - "loss": 0.4906, - "step": 8413 - }, - { - "epoch": 0.2953368784990962, - "grad_norm": 0.5343571901321411, - "learning_rate": 3.997592592592593e-05, - "loss": 0.4644, - "step": 8414 - }, - { - "epoch": 0.29537197915021324, - "grad_norm": 0.423025518655777, - "learning_rate": 3.997407407407408e-05, - "loss": 0.5894, - "step": 8415 - }, - { - "epoch": 0.2954070798013303, - "grad_norm": 0.4860774576663971, - "learning_rate": 3.997222222222222e-05, - "loss": 0.5456, - "step": 8416 - }, - { - "epoch": 0.2954421804524474, - "grad_norm": 0.4707348942756653, - "learning_rate": 3.997037037037038e-05, - "loss": 0.4447, - "step": 8417 - }, - { - "epoch": 0.29547728110356447, - "grad_norm": 0.48760128021240234, - "learning_rate": 3.996851851851852e-05, - "loss": 0.5559, - "step": 8418 - }, - { - "epoch": 0.29551238175468153, - "grad_norm": 0.483665406703949, - "learning_rate": 3.996666666666667e-05, - "loss": 0.3644, - "step": 8419 - }, - { - "epoch": 0.29554748240579864, - "grad_norm": 0.464437335729599, - "learning_rate": 3.9964814814814815e-05, - "loss": 0.5101, - "step": 8420 - }, - { - "epoch": 0.2955825830569157, - "grad_norm": 0.5047559142112732, - "learning_rate": 3.9962962962962965e-05, - "loss": 0.4661, - "step": 8421 - }, - { - "epoch": 0.29561768370803276, - "grad_norm": 0.5377012491226196, - "learning_rate": 3.996111111111111e-05, - "loss": 0.4671, - "step": 8422 - }, - { - "epoch": 0.2956527843591499, - "grad_norm": 0.4312930107116699, - "learning_rate": 3.9959259259259265e-05, - "loss": 0.5587, - "step": 8423 - }, - { - "epoch": 0.29568788501026694, - "grad_norm": 0.4725513756275177, - "learning_rate": 3.995740740740741e-05, - "loss": 0.5257, - "step": 8424 - }, - { - "epoch": 0.295722985661384, - "grad_norm": 0.5235335230827332, - "learning_rate": 3.995555555555556e-05, - "loss": 0.5474, - "step": 8425 - }, - { - "epoch": 0.2957580863125011, - "grad_norm": 0.505176305770874, - "learning_rate": 3.99537037037037e-05, - "loss": 0.5187, - "step": 8426 - }, - { - "epoch": 0.29579318696361817, - "grad_norm": 0.5040125846862793, - "learning_rate": 3.995185185185185e-05, - "loss": 0.5579, - "step": 8427 - }, - { - "epoch": 0.29582828761473523, - "grad_norm": 0.49105727672576904, - "learning_rate": 3.995e-05, - "loss": 0.5931, - "step": 8428 - }, - { - "epoch": 0.29586338826585235, - "grad_norm": 0.4217044413089752, - "learning_rate": 3.994814814814815e-05, - "loss": 0.4431, - "step": 8429 - }, - { - "epoch": 0.2958984889169694, - "grad_norm": 0.5258684158325195, - "learning_rate": 3.9946296296296296e-05, - "loss": 0.5174, - "step": 8430 - }, - { - "epoch": 0.29593358956808646, - "grad_norm": 0.4864620566368103, - "learning_rate": 3.9944444444444446e-05, - "loss": 0.5178, - "step": 8431 - }, - { - "epoch": 0.2959686902192036, - "grad_norm": 0.41117894649505615, - "learning_rate": 3.9942592592592596e-05, - "loss": 0.3424, - "step": 8432 - }, - { - "epoch": 0.29600379087032064, - "grad_norm": 0.4786394536495209, - "learning_rate": 3.9940740740740746e-05, - "loss": 0.363, - "step": 8433 - }, - { - "epoch": 0.2960388915214377, - "grad_norm": 0.4425114095211029, - "learning_rate": 3.993888888888889e-05, - "loss": 0.4652, - "step": 8434 - }, - { - "epoch": 0.2960739921725548, - "grad_norm": 0.40914854407310486, - "learning_rate": 3.993703703703704e-05, - "loss": 0.5687, - "step": 8435 - }, - { - "epoch": 0.29610909282367187, - "grad_norm": 0.459251344203949, - "learning_rate": 3.993518518518519e-05, - "loss": 0.456, - "step": 8436 - }, - { - "epoch": 0.29614419347478893, - "grad_norm": 0.4209972620010376, - "learning_rate": 3.993333333333333e-05, - "loss": 0.4116, - "step": 8437 - }, - { - "epoch": 0.29617929412590605, - "grad_norm": 0.5048547983169556, - "learning_rate": 3.993148148148148e-05, - "loss": 0.4726, - "step": 8438 - }, - { - "epoch": 0.2962143947770231, - "grad_norm": 0.4596779942512512, - "learning_rate": 3.992962962962963e-05, - "loss": 0.5062, - "step": 8439 - }, - { - "epoch": 0.2962494954281402, - "grad_norm": 0.4394044876098633, - "learning_rate": 3.992777777777778e-05, - "loss": 0.5196, - "step": 8440 - }, - { - "epoch": 0.2962845960792573, - "grad_norm": 0.4732615649700165, - "learning_rate": 3.9925925925925926e-05, - "loss": 0.4209, - "step": 8441 - }, - { - "epoch": 0.29631969673037434, - "grad_norm": 0.5302631855010986, - "learning_rate": 3.9924074074074077e-05, - "loss": 0.5506, - "step": 8442 - }, - { - "epoch": 0.29635479738149145, - "grad_norm": 0.4367506206035614, - "learning_rate": 3.992222222222222e-05, - "loss": 0.4327, - "step": 8443 - }, - { - "epoch": 0.2963898980326085, - "grad_norm": 0.5772961974143982, - "learning_rate": 3.992037037037038e-05, - "loss": 0.5595, - "step": 8444 - }, - { - "epoch": 0.29642499868372557, - "grad_norm": 0.5253996849060059, - "learning_rate": 3.991851851851852e-05, - "loss": 0.5215, - "step": 8445 - }, - { - "epoch": 0.2964600993348427, - "grad_norm": 0.5569830536842346, - "learning_rate": 3.991666666666667e-05, - "loss": 0.4912, - "step": 8446 - }, - { - "epoch": 0.29649519998595975, - "grad_norm": 0.5528599619865417, - "learning_rate": 3.9914814814814814e-05, - "loss": 0.5596, - "step": 8447 - }, - { - "epoch": 0.2965303006370768, - "grad_norm": 0.5227194428443909, - "learning_rate": 3.9912962962962964e-05, - "loss": 0.593, - "step": 8448 - }, - { - "epoch": 0.2965654012881939, - "grad_norm": 0.4269760251045227, - "learning_rate": 3.9911111111111114e-05, - "loss": 0.5344, - "step": 8449 - }, - { - "epoch": 0.296600501939311, - "grad_norm": 0.5352786183357239, - "learning_rate": 3.9909259259259264e-05, - "loss": 0.5171, - "step": 8450 - }, - { - "epoch": 0.29663560259042804, - "grad_norm": 0.5129208564758301, - "learning_rate": 3.990740740740741e-05, - "loss": 0.5261, - "step": 8451 - }, - { - "epoch": 0.29667070324154515, - "grad_norm": 0.4723984897136688, - "learning_rate": 3.990555555555556e-05, - "loss": 0.5342, - "step": 8452 - }, - { - "epoch": 0.2967058038926622, - "grad_norm": 0.45431822538375854, - "learning_rate": 3.990370370370371e-05, - "loss": 0.5118, - "step": 8453 - }, - { - "epoch": 0.29674090454377927, - "grad_norm": 0.6284418702125549, - "learning_rate": 3.990185185185185e-05, - "loss": 0.4833, - "step": 8454 - }, - { - "epoch": 0.2967760051948964, - "grad_norm": 0.4358634352684021, - "learning_rate": 3.99e-05, - "loss": 0.5277, - "step": 8455 - }, - { - "epoch": 0.29681110584601345, - "grad_norm": 0.395084410905838, - "learning_rate": 3.989814814814815e-05, - "loss": 0.5383, - "step": 8456 - }, - { - "epoch": 0.2968462064971305, - "grad_norm": 0.40737125277519226, - "learning_rate": 3.98962962962963e-05, - "loss": 0.4126, - "step": 8457 - }, - { - "epoch": 0.2968813071482476, - "grad_norm": 0.5000039935112, - "learning_rate": 3.9894444444444444e-05, - "loss": 0.5358, - "step": 8458 - }, - { - "epoch": 0.2969164077993647, - "grad_norm": 0.46645355224609375, - "learning_rate": 3.9892592592592595e-05, - "loss": 0.3924, - "step": 8459 - }, - { - "epoch": 0.29695150845048174, - "grad_norm": 0.6011117696762085, - "learning_rate": 3.9890740740740745e-05, - "loss": 0.4916, - "step": 8460 - }, - { - "epoch": 0.29698660910159885, - "grad_norm": 0.517902135848999, - "learning_rate": 3.9888888888888895e-05, - "loss": 0.5179, - "step": 8461 - }, - { - "epoch": 0.2970217097527159, - "grad_norm": 0.4904567301273346, - "learning_rate": 3.988703703703704e-05, - "loss": 0.4217, - "step": 8462 - }, - { - "epoch": 0.29705681040383297, - "grad_norm": 0.48597902059555054, - "learning_rate": 3.988518518518519e-05, - "loss": 0.4927, - "step": 8463 - }, - { - "epoch": 0.2970919110549501, - "grad_norm": 0.4395125210285187, - "learning_rate": 3.988333333333333e-05, - "loss": 0.5702, - "step": 8464 - }, - { - "epoch": 0.29712701170606715, - "grad_norm": 0.5284866690635681, - "learning_rate": 3.988148148148148e-05, - "loss": 0.5067, - "step": 8465 - }, - { - "epoch": 0.2971621123571842, - "grad_norm": 0.513624370098114, - "learning_rate": 3.987962962962963e-05, - "loss": 0.5131, - "step": 8466 - }, - { - "epoch": 0.2971972130083013, - "grad_norm": 0.43924203515052795, - "learning_rate": 3.987777777777778e-05, - "loss": 0.5559, - "step": 8467 - }, - { - "epoch": 0.2972323136594184, - "grad_norm": 0.3995630741119385, - "learning_rate": 3.9875925925925925e-05, - "loss": 0.5068, - "step": 8468 - }, - { - "epoch": 0.29726741431053544, - "grad_norm": 0.47337886691093445, - "learning_rate": 3.9874074074074075e-05, - "loss": 0.6103, - "step": 8469 - }, - { - "epoch": 0.29730251496165255, - "grad_norm": 0.437176913022995, - "learning_rate": 3.987222222222222e-05, - "loss": 0.5251, - "step": 8470 - }, - { - "epoch": 0.2973376156127696, - "grad_norm": 0.4684687554836273, - "learning_rate": 3.9870370370370376e-05, - "loss": 0.5407, - "step": 8471 - }, - { - "epoch": 0.29737271626388667, - "grad_norm": 3.7013134956359863, - "learning_rate": 3.986851851851852e-05, - "loss": 0.6001, - "step": 8472 - }, - { - "epoch": 0.2974078169150038, - "grad_norm": 0.47488391399383545, - "learning_rate": 3.986666666666667e-05, - "loss": 0.5346, - "step": 8473 - }, - { - "epoch": 0.29744291756612085, - "grad_norm": 0.4974324703216553, - "learning_rate": 3.986481481481482e-05, - "loss": 0.5055, - "step": 8474 - }, - { - "epoch": 0.2974780182172379, - "grad_norm": 0.49147912859916687, - "learning_rate": 3.986296296296296e-05, - "loss": 0.4536, - "step": 8475 - }, - { - "epoch": 0.297513118868355, - "grad_norm": 0.6092701554298401, - "learning_rate": 3.986111111111111e-05, - "loss": 0.5496, - "step": 8476 - }, - { - "epoch": 0.2975482195194721, - "grad_norm": 0.424752801656723, - "learning_rate": 3.985925925925926e-05, - "loss": 0.4359, - "step": 8477 - }, - { - "epoch": 0.29758332017058914, - "grad_norm": 0.44956356287002563, - "learning_rate": 3.985740740740741e-05, - "loss": 0.4705, - "step": 8478 - }, - { - "epoch": 0.29761842082170625, - "grad_norm": 0.5643345713615417, - "learning_rate": 3.9855555555555556e-05, - "loss": 0.5749, - "step": 8479 - }, - { - "epoch": 0.2976535214728233, - "grad_norm": 0.42627575993537903, - "learning_rate": 3.9853703703703706e-05, - "loss": 0.4597, - "step": 8480 - }, - { - "epoch": 0.29768862212394037, - "grad_norm": 0.3624163866043091, - "learning_rate": 3.985185185185185e-05, - "loss": 0.4608, - "step": 8481 - }, - { - "epoch": 0.2977237227750575, - "grad_norm": 0.49053484201431274, - "learning_rate": 3.9850000000000006e-05, - "loss": 0.4996, - "step": 8482 - }, - { - "epoch": 0.29775882342617455, - "grad_norm": 0.42421475052833557, - "learning_rate": 3.984814814814815e-05, - "loss": 0.4975, - "step": 8483 - }, - { - "epoch": 0.29779392407729166, - "grad_norm": 0.46854203939437866, - "learning_rate": 3.98462962962963e-05, - "loss": 0.582, - "step": 8484 - }, - { - "epoch": 0.2978290247284087, - "grad_norm": 0.46449288725852966, - "learning_rate": 3.984444444444444e-05, - "loss": 0.4818, - "step": 8485 - }, - { - "epoch": 0.2978641253795258, - "grad_norm": 0.4258647859096527, - "learning_rate": 3.984259259259259e-05, - "loss": 0.3728, - "step": 8486 - }, - { - "epoch": 0.2978992260306429, - "grad_norm": 0.5217698216438293, - "learning_rate": 3.9840740740740743e-05, - "loss": 0.5446, - "step": 8487 - }, - { - "epoch": 0.29793432668175995, - "grad_norm": 0.5082271695137024, - "learning_rate": 3.9838888888888894e-05, - "loss": 0.5656, - "step": 8488 - }, - { - "epoch": 0.297969427332877, - "grad_norm": 0.48177576065063477, - "learning_rate": 3.983703703703704e-05, - "loss": 0.5836, - "step": 8489 - }, - { - "epoch": 0.2980045279839941, - "grad_norm": 0.4746452271938324, - "learning_rate": 3.983518518518519e-05, - "loss": 0.5379, - "step": 8490 - }, - { - "epoch": 0.2980396286351112, - "grad_norm": 0.4408978223800659, - "learning_rate": 3.983333333333333e-05, - "loss": 0.542, - "step": 8491 - }, - { - "epoch": 0.29807472928622825, - "grad_norm": 0.5578538775444031, - "learning_rate": 3.983148148148148e-05, - "loss": 0.5382, - "step": 8492 - }, - { - "epoch": 0.29810982993734536, - "grad_norm": 0.5460729598999023, - "learning_rate": 3.982962962962963e-05, - "loss": 0.4999, - "step": 8493 - }, - { - "epoch": 0.2981449305884624, - "grad_norm": 0.4483976662158966, - "learning_rate": 3.982777777777778e-05, - "loss": 0.464, - "step": 8494 - }, - { - "epoch": 0.2981800312395795, - "grad_norm": 0.4478009045124054, - "learning_rate": 3.982592592592593e-05, - "loss": 0.4349, - "step": 8495 - }, - { - "epoch": 0.2982151318906966, - "grad_norm": 0.5261837244033813, - "learning_rate": 3.9824074074074074e-05, - "loss": 0.4932, - "step": 8496 - }, - { - "epoch": 0.29825023254181365, - "grad_norm": 0.5317858457565308, - "learning_rate": 3.9822222222222224e-05, - "loss": 0.4631, - "step": 8497 - }, - { - "epoch": 0.2982853331929307, - "grad_norm": 0.48399674892425537, - "learning_rate": 3.9820370370370374e-05, - "loss": 0.4231, - "step": 8498 - }, - { - "epoch": 0.2983204338440478, - "grad_norm": 0.47850286960601807, - "learning_rate": 3.9818518518518524e-05, - "loss": 0.4587, - "step": 8499 - }, - { - "epoch": 0.2983555344951649, - "grad_norm": 0.44586920738220215, - "learning_rate": 3.981666666666667e-05, - "loss": 0.5167, - "step": 8500 - }, - { - "epoch": 0.29839063514628195, - "grad_norm": 0.5227728486061096, - "learning_rate": 3.981481481481482e-05, - "loss": 0.6135, - "step": 8501 - }, - { - "epoch": 0.29842573579739906, - "grad_norm": 0.5463497638702393, - "learning_rate": 3.981296296296296e-05, - "loss": 0.4867, - "step": 8502 - }, - { - "epoch": 0.2984608364485161, - "grad_norm": 0.4903933107852936, - "learning_rate": 3.981111111111112e-05, - "loss": 0.5182, - "step": 8503 - }, - { - "epoch": 0.2984959370996332, - "grad_norm": 0.4934491217136383, - "learning_rate": 3.980925925925926e-05, - "loss": 0.5442, - "step": 8504 - }, - { - "epoch": 0.2985310377507503, - "grad_norm": 0.5537309050559998, - "learning_rate": 3.980740740740741e-05, - "loss": 0.5132, - "step": 8505 - }, - { - "epoch": 0.29856613840186735, - "grad_norm": 0.48636507987976074, - "learning_rate": 3.9805555555555555e-05, - "loss": 0.4835, - "step": 8506 - }, - { - "epoch": 0.2986012390529844, - "grad_norm": 0.5085264444351196, - "learning_rate": 3.9803703703703705e-05, - "loss": 0.5845, - "step": 8507 - }, - { - "epoch": 0.2986363397041015, - "grad_norm": 0.45617470145225525, - "learning_rate": 3.980185185185185e-05, - "loss": 0.5214, - "step": 8508 - }, - { - "epoch": 0.2986714403552186, - "grad_norm": 0.4377318322658539, - "learning_rate": 3.9800000000000005e-05, - "loss": 0.4075, - "step": 8509 - }, - { - "epoch": 0.29870654100633565, - "grad_norm": 0.4624309837818146, - "learning_rate": 3.979814814814815e-05, - "loss": 0.5166, - "step": 8510 - }, - { - "epoch": 0.29874164165745276, - "grad_norm": 0.47823432087898254, - "learning_rate": 3.97962962962963e-05, - "loss": 0.5062, - "step": 8511 - }, - { - "epoch": 0.2987767423085698, - "grad_norm": 0.45971280336380005, - "learning_rate": 3.979444444444444e-05, - "loss": 0.4951, - "step": 8512 - }, - { - "epoch": 0.2988118429596869, - "grad_norm": 0.45041775703430176, - "learning_rate": 3.979259259259259e-05, - "loss": 0.5506, - "step": 8513 - }, - { - "epoch": 0.298846943610804, - "grad_norm": 0.5284222960472107, - "learning_rate": 3.979074074074074e-05, - "loss": 0.392, - "step": 8514 - }, - { - "epoch": 0.29888204426192105, - "grad_norm": 0.5277480483055115, - "learning_rate": 3.978888888888889e-05, - "loss": 0.6322, - "step": 8515 - }, - { - "epoch": 0.2989171449130381, - "grad_norm": 0.502187967300415, - "learning_rate": 3.978703703703704e-05, - "loss": 0.5443, - "step": 8516 - }, - { - "epoch": 0.29895224556415523, - "grad_norm": 0.4944749176502228, - "learning_rate": 3.9785185185185186e-05, - "loss": 0.417, - "step": 8517 - }, - { - "epoch": 0.2989873462152723, - "grad_norm": 0.39544370770454407, - "learning_rate": 3.9783333333333336e-05, - "loss": 0.4312, - "step": 8518 - }, - { - "epoch": 0.29902244686638935, - "grad_norm": 0.48058393597602844, - "learning_rate": 3.978148148148148e-05, - "loss": 0.4366, - "step": 8519 - }, - { - "epoch": 0.29905754751750646, - "grad_norm": 0.531200647354126, - "learning_rate": 3.9779629629629636e-05, - "loss": 0.6099, - "step": 8520 - }, - { - "epoch": 0.2990926481686235, - "grad_norm": 0.4466700255870819, - "learning_rate": 3.977777777777778e-05, - "loss": 0.5242, - "step": 8521 - }, - { - "epoch": 0.2991277488197406, - "grad_norm": 0.4756726026535034, - "learning_rate": 3.977592592592593e-05, - "loss": 0.5241, - "step": 8522 - }, - { - "epoch": 0.2991628494708577, - "grad_norm": 0.45591747760772705, - "learning_rate": 3.977407407407407e-05, - "loss": 0.5723, - "step": 8523 - }, - { - "epoch": 0.29919795012197475, - "grad_norm": 0.47692304849624634, - "learning_rate": 3.977222222222222e-05, - "loss": 0.517, - "step": 8524 - }, - { - "epoch": 0.2992330507730918, - "grad_norm": 0.4693818986415863, - "learning_rate": 3.977037037037037e-05, - "loss": 0.5587, - "step": 8525 - }, - { - "epoch": 0.29926815142420893, - "grad_norm": 0.5088730454444885, - "learning_rate": 3.976851851851852e-05, - "loss": 0.5386, - "step": 8526 - }, - { - "epoch": 0.299303252075326, - "grad_norm": 0.44273892045021057, - "learning_rate": 3.9766666666666667e-05, - "loss": 0.5511, - "step": 8527 - }, - { - "epoch": 0.2993383527264431, - "grad_norm": 0.44590646028518677, - "learning_rate": 3.976481481481482e-05, - "loss": 0.5724, - "step": 8528 - }, - { - "epoch": 0.29937345337756016, - "grad_norm": 0.4623454809188843, - "learning_rate": 3.976296296296296e-05, - "loss": 0.4463, - "step": 8529 - }, - { - "epoch": 0.2994085540286772, - "grad_norm": 0.4394809603691101, - "learning_rate": 3.976111111111112e-05, - "loss": 0.5231, - "step": 8530 - }, - { - "epoch": 0.29944365467979434, - "grad_norm": 0.4799475371837616, - "learning_rate": 3.975925925925926e-05, - "loss": 0.5745, - "step": 8531 - }, - { - "epoch": 0.2994787553309114, - "grad_norm": 0.5454737544059753, - "learning_rate": 3.975740740740741e-05, - "loss": 0.3969, - "step": 8532 - }, - { - "epoch": 0.29951385598202845, - "grad_norm": 0.4263201355934143, - "learning_rate": 3.9755555555555554e-05, - "loss": 0.489, - "step": 8533 - }, - { - "epoch": 0.29954895663314557, - "grad_norm": 0.4299584925174713, - "learning_rate": 3.9753703703703704e-05, - "loss": 0.5106, - "step": 8534 - }, - { - "epoch": 0.29958405728426263, - "grad_norm": 0.44373786449432373, - "learning_rate": 3.9751851851851854e-05, - "loss": 0.4784, - "step": 8535 - }, - { - "epoch": 0.2996191579353797, - "grad_norm": 0.5141566395759583, - "learning_rate": 3.9750000000000004e-05, - "loss": 0.5438, - "step": 8536 - }, - { - "epoch": 0.2996542585864968, - "grad_norm": 0.5677040219306946, - "learning_rate": 3.9748148148148154e-05, - "loss": 0.5006, - "step": 8537 - }, - { - "epoch": 0.29968935923761386, - "grad_norm": 0.4373248219490051, - "learning_rate": 3.97462962962963e-05, - "loss": 0.5079, - "step": 8538 - }, - { - "epoch": 0.2997244598887309, - "grad_norm": 0.4315747618675232, - "learning_rate": 3.974444444444445e-05, - "loss": 0.4401, - "step": 8539 - }, - { - "epoch": 0.29975956053984804, - "grad_norm": 1.3376966714859009, - "learning_rate": 3.974259259259259e-05, - "loss": 0.4331, - "step": 8540 - }, - { - "epoch": 0.2997946611909651, - "grad_norm": 0.45537427067756653, - "learning_rate": 3.974074074074075e-05, - "loss": 0.53, - "step": 8541 - }, - { - "epoch": 0.29982976184208215, - "grad_norm": 0.5173161625862122, - "learning_rate": 3.973888888888889e-05, - "loss": 0.5364, - "step": 8542 - }, - { - "epoch": 0.29986486249319927, - "grad_norm": 0.44929003715515137, - "learning_rate": 3.973703703703704e-05, - "loss": 0.6149, - "step": 8543 - }, - { - "epoch": 0.29989996314431633, - "grad_norm": 0.49709513783454895, - "learning_rate": 3.9735185185185185e-05, - "loss": 0.5683, - "step": 8544 - }, - { - "epoch": 0.2999350637954334, - "grad_norm": 0.5782919526100159, - "learning_rate": 3.9733333333333335e-05, - "loss": 0.4748, - "step": 8545 - }, - { - "epoch": 0.2999701644465505, - "grad_norm": 0.4076713025569916, - "learning_rate": 3.973148148148148e-05, - "loss": 0.5341, - "step": 8546 - }, - { - "epoch": 0.30000526509766756, - "grad_norm": 0.6174102425575256, - "learning_rate": 3.9729629629629635e-05, - "loss": 0.4051, - "step": 8547 - }, - { - "epoch": 0.3000403657487846, - "grad_norm": 0.5237469673156738, - "learning_rate": 3.972777777777778e-05, - "loss": 0.472, - "step": 8548 - }, - { - "epoch": 0.30007546639990174, - "grad_norm": 0.42920565605163574, - "learning_rate": 3.972592592592593e-05, - "loss": 0.4189, - "step": 8549 - }, - { - "epoch": 0.3001105670510188, - "grad_norm": 0.4502197802066803, - "learning_rate": 3.972407407407407e-05, - "loss": 0.4581, - "step": 8550 - }, - { - "epoch": 0.30014566770213585, - "grad_norm": 0.45912715792655945, - "learning_rate": 3.972222222222222e-05, - "loss": 0.4553, - "step": 8551 - }, - { - "epoch": 0.30018076835325297, - "grad_norm": 0.5219545960426331, - "learning_rate": 3.972037037037037e-05, - "loss": 0.463, - "step": 8552 - }, - { - "epoch": 0.30021586900437003, - "grad_norm": 0.5263922810554504, - "learning_rate": 3.971851851851852e-05, - "loss": 0.5897, - "step": 8553 - }, - { - "epoch": 0.3002509696554871, - "grad_norm": 0.45777207612991333, - "learning_rate": 3.9716666666666665e-05, - "loss": 0.4971, - "step": 8554 - }, - { - "epoch": 0.3002860703066042, - "grad_norm": 0.5575727820396423, - "learning_rate": 3.9714814814814815e-05, - "loss": 0.5973, - "step": 8555 - }, - { - "epoch": 0.30032117095772126, - "grad_norm": 0.4530044496059418, - "learning_rate": 3.9712962962962966e-05, - "loss": 0.4671, - "step": 8556 - }, - { - "epoch": 0.3003562716088383, - "grad_norm": 0.42350122332572937, - "learning_rate": 3.9711111111111116e-05, - "loss": 0.3417, - "step": 8557 - }, - { - "epoch": 0.30039137225995544, - "grad_norm": 0.49256643652915955, - "learning_rate": 3.9709259259259266e-05, - "loss": 0.5034, - "step": 8558 - }, - { - "epoch": 0.3004264729110725, - "grad_norm": 0.4348563551902771, - "learning_rate": 3.970740740740741e-05, - "loss": 0.4993, - "step": 8559 - }, - { - "epoch": 0.30046157356218955, - "grad_norm": 0.6047967672348022, - "learning_rate": 3.970555555555556e-05, - "loss": 0.5385, - "step": 8560 - }, - { - "epoch": 0.30049667421330667, - "grad_norm": 0.46107107400894165, - "learning_rate": 3.97037037037037e-05, - "loss": 0.5411, - "step": 8561 - }, - { - "epoch": 0.30053177486442373, - "grad_norm": 0.4787382185459137, - "learning_rate": 3.970185185185185e-05, - "loss": 0.4392, - "step": 8562 - }, - { - "epoch": 0.3005668755155408, - "grad_norm": 0.5629346370697021, - "learning_rate": 3.97e-05, - "loss": 0.5478, - "step": 8563 - }, - { - "epoch": 0.3006019761666579, - "grad_norm": 0.5090131759643555, - "learning_rate": 3.969814814814815e-05, - "loss": 0.6243, - "step": 8564 - }, - { - "epoch": 0.30063707681777496, - "grad_norm": 0.47027090191841125, - "learning_rate": 3.9696296296296296e-05, - "loss": 0.5058, - "step": 8565 - }, - { - "epoch": 0.300672177468892, - "grad_norm": 0.47195443511009216, - "learning_rate": 3.9694444444444446e-05, - "loss": 0.3597, - "step": 8566 - }, - { - "epoch": 0.30070727812000914, - "grad_norm": 0.4480992555618286, - "learning_rate": 3.969259259259259e-05, - "loss": 0.4873, - "step": 8567 - }, - { - "epoch": 0.3007423787711262, - "grad_norm": 0.410148561000824, - "learning_rate": 3.9690740740740747e-05, - "loss": 0.4186, - "step": 8568 - }, - { - "epoch": 0.3007774794222433, - "grad_norm": 0.5760303735733032, - "learning_rate": 3.968888888888889e-05, - "loss": 0.552, - "step": 8569 - }, - { - "epoch": 0.30081258007336037, - "grad_norm": 0.45093005895614624, - "learning_rate": 3.968703703703704e-05, - "loss": 0.5131, - "step": 8570 - }, - { - "epoch": 0.30084768072447743, - "grad_norm": 0.7975167036056519, - "learning_rate": 3.968518518518518e-05, - "loss": 0.4919, - "step": 8571 - }, - { - "epoch": 0.30088278137559454, - "grad_norm": 0.5533704161643982, - "learning_rate": 3.9683333333333333e-05, - "loss": 0.5828, - "step": 8572 - }, - { - "epoch": 0.3009178820267116, - "grad_norm": 0.4865869879722595, - "learning_rate": 3.9681481481481484e-05, - "loss": 0.5605, - "step": 8573 - }, - { - "epoch": 0.30095298267782866, - "grad_norm": 0.4951707124710083, - "learning_rate": 3.9679629629629634e-05, - "loss": 0.4642, - "step": 8574 - }, - { - "epoch": 0.3009880833289458, - "grad_norm": 0.5125311017036438, - "learning_rate": 3.9677777777777784e-05, - "loss": 0.4439, - "step": 8575 - }, - { - "epoch": 0.30102318398006284, - "grad_norm": 0.5496068596839905, - "learning_rate": 3.967592592592593e-05, - "loss": 0.5517, - "step": 8576 - }, - { - "epoch": 0.3010582846311799, - "grad_norm": 0.44633910059928894, - "learning_rate": 3.967407407407408e-05, - "loss": 0.5202, - "step": 8577 - }, - { - "epoch": 0.301093385282297, - "grad_norm": 0.47660133242607117, - "learning_rate": 3.967222222222222e-05, - "loss": 0.4182, - "step": 8578 - }, - { - "epoch": 0.30112848593341407, - "grad_norm": 0.45565617084503174, - "learning_rate": 3.967037037037038e-05, - "loss": 0.4859, - "step": 8579 - }, - { - "epoch": 0.30116358658453113, - "grad_norm": 0.5164135694503784, - "learning_rate": 3.966851851851852e-05, - "loss": 0.5779, - "step": 8580 - }, - { - "epoch": 0.30119868723564824, - "grad_norm": 0.4641110301017761, - "learning_rate": 3.966666666666667e-05, - "loss": 0.5489, - "step": 8581 - }, - { - "epoch": 0.3012337878867653, - "grad_norm": 0.5897434949874878, - "learning_rate": 3.9664814814814814e-05, - "loss": 0.3955, - "step": 8582 - }, - { - "epoch": 0.30126888853788236, - "grad_norm": 0.5363796353340149, - "learning_rate": 3.9662962962962964e-05, - "loss": 0.5886, - "step": 8583 - }, - { - "epoch": 0.3013039891889995, - "grad_norm": 0.49708858132362366, - "learning_rate": 3.9661111111111114e-05, - "loss": 0.4598, - "step": 8584 - }, - { - "epoch": 0.30133908984011654, - "grad_norm": 0.6007192730903625, - "learning_rate": 3.9659259259259265e-05, - "loss": 0.5611, - "step": 8585 - }, - { - "epoch": 0.3013741904912336, - "grad_norm": 0.4550457000732422, - "learning_rate": 3.965740740740741e-05, - "loss": 0.5176, - "step": 8586 - }, - { - "epoch": 0.3014092911423507, - "grad_norm": 0.5345965027809143, - "learning_rate": 3.965555555555556e-05, - "loss": 0.5209, - "step": 8587 - }, - { - "epoch": 0.30144439179346777, - "grad_norm": 0.503053605556488, - "learning_rate": 3.96537037037037e-05, - "loss": 0.5463, - "step": 8588 - }, - { - "epoch": 0.30147949244458483, - "grad_norm": 0.4306589365005493, - "learning_rate": 3.965185185185185e-05, - "loss": 0.4593, - "step": 8589 - }, - { - "epoch": 0.30151459309570194, - "grad_norm": 0.49499252438545227, - "learning_rate": 3.965e-05, - "loss": 0.5963, - "step": 8590 - }, - { - "epoch": 0.301549693746819, - "grad_norm": 0.5254275798797607, - "learning_rate": 3.964814814814815e-05, - "loss": 0.4205, - "step": 8591 - }, - { - "epoch": 0.30158479439793606, - "grad_norm": 0.4190976917743683, - "learning_rate": 3.9646296296296295e-05, - "loss": 0.4445, - "step": 8592 - }, - { - "epoch": 0.3016198950490532, - "grad_norm": 0.40497446060180664, - "learning_rate": 3.9644444444444445e-05, - "loss": 0.4009, - "step": 8593 - }, - { - "epoch": 0.30165499570017024, - "grad_norm": 0.46487781405448914, - "learning_rate": 3.9642592592592595e-05, - "loss": 0.4204, - "step": 8594 - }, - { - "epoch": 0.3016900963512873, - "grad_norm": 0.484646201133728, - "learning_rate": 3.9640740740740745e-05, - "loss": 0.4671, - "step": 8595 - }, - { - "epoch": 0.3017251970024044, - "grad_norm": 0.44616973400115967, - "learning_rate": 3.9638888888888895e-05, - "loss": 0.4069, - "step": 8596 - }, - { - "epoch": 0.30176029765352147, - "grad_norm": 0.41988322138786316, - "learning_rate": 3.963703703703704e-05, - "loss": 0.4408, - "step": 8597 - }, - { - "epoch": 0.30179539830463853, - "grad_norm": 0.4082513749599457, - "learning_rate": 3.963518518518519e-05, - "loss": 0.4598, - "step": 8598 - }, - { - "epoch": 0.30183049895575564, - "grad_norm": 0.4450177550315857, - "learning_rate": 3.963333333333333e-05, - "loss": 0.4837, - "step": 8599 - }, - { - "epoch": 0.3018655996068727, - "grad_norm": 0.4868752956390381, - "learning_rate": 3.963148148148149e-05, - "loss": 0.6259, - "step": 8600 - }, - { - "epoch": 0.30190070025798976, - "grad_norm": 0.38363781571388245, - "learning_rate": 3.962962962962963e-05, - "loss": 0.3948, - "step": 8601 - }, - { - "epoch": 0.3019358009091069, - "grad_norm": 0.48519250750541687, - "learning_rate": 3.962777777777778e-05, - "loss": 0.508, - "step": 8602 - }, - { - "epoch": 0.30197090156022394, - "grad_norm": 0.4312346279621124, - "learning_rate": 3.9625925925925926e-05, - "loss": 0.4681, - "step": 8603 - }, - { - "epoch": 0.302006002211341, - "grad_norm": 0.4362448453903198, - "learning_rate": 3.9624074074074076e-05, - "loss": 0.5049, - "step": 8604 - }, - { - "epoch": 0.3020411028624581, - "grad_norm": 0.5746029615402222, - "learning_rate": 3.962222222222222e-05, - "loss": 0.5918, - "step": 8605 - }, - { - "epoch": 0.30207620351357517, - "grad_norm": 0.42478224635124207, - "learning_rate": 3.9620370370370376e-05, - "loss": 0.4804, - "step": 8606 - }, - { - "epoch": 0.30211130416469223, - "grad_norm": 0.48531436920166016, - "learning_rate": 3.961851851851852e-05, - "loss": 0.4568, - "step": 8607 - }, - { - "epoch": 0.30214640481580934, - "grad_norm": 0.474275141954422, - "learning_rate": 3.961666666666667e-05, - "loss": 0.4539, - "step": 8608 - }, - { - "epoch": 0.3021815054669264, - "grad_norm": 0.4725187420845032, - "learning_rate": 3.961481481481481e-05, - "loss": 0.4587, - "step": 8609 - }, - { - "epoch": 0.30221660611804346, - "grad_norm": 0.4855188727378845, - "learning_rate": 3.961296296296296e-05, - "loss": 0.482, - "step": 8610 - }, - { - "epoch": 0.3022517067691606, - "grad_norm": 0.5246474742889404, - "learning_rate": 3.961111111111111e-05, - "loss": 0.5703, - "step": 8611 - }, - { - "epoch": 0.30228680742027764, - "grad_norm": 0.5002575516700745, - "learning_rate": 3.960925925925926e-05, - "loss": 0.48, - "step": 8612 - }, - { - "epoch": 0.30232190807139475, - "grad_norm": 0.4618802070617676, - "learning_rate": 3.960740740740741e-05, - "loss": 0.651, - "step": 8613 - }, - { - "epoch": 0.3023570087225118, - "grad_norm": 0.501482367515564, - "learning_rate": 3.960555555555556e-05, - "loss": 0.4371, - "step": 8614 - }, - { - "epoch": 0.30239210937362887, - "grad_norm": 0.4423310458660126, - "learning_rate": 3.960370370370371e-05, - "loss": 0.4485, - "step": 8615 - }, - { - "epoch": 0.302427210024746, - "grad_norm": 0.44264376163482666, - "learning_rate": 3.960185185185185e-05, - "loss": 0.4823, - "step": 8616 - }, - { - "epoch": 0.30246231067586304, - "grad_norm": 0.4516925811767578, - "learning_rate": 3.960000000000001e-05, - "loss": 0.4001, - "step": 8617 - }, - { - "epoch": 0.3024974113269801, - "grad_norm": 0.47280824184417725, - "learning_rate": 3.959814814814815e-05, - "loss": 0.5209, - "step": 8618 - }, - { - "epoch": 0.3025325119780972, - "grad_norm": 0.4162990152835846, - "learning_rate": 3.95962962962963e-05, - "loss": 0.5079, - "step": 8619 - }, - { - "epoch": 0.3025676126292143, - "grad_norm": 0.46725690364837646, - "learning_rate": 3.9594444444444444e-05, - "loss": 0.5028, - "step": 8620 - }, - { - "epoch": 0.30260271328033134, - "grad_norm": 0.4575485289096832, - "learning_rate": 3.9592592592592594e-05, - "loss": 0.4382, - "step": 8621 - }, - { - "epoch": 0.30263781393144845, - "grad_norm": 0.5805893540382385, - "learning_rate": 3.9590740740740744e-05, - "loss": 0.5332, - "step": 8622 - }, - { - "epoch": 0.3026729145825655, - "grad_norm": 0.44255533814430237, - "learning_rate": 3.9588888888888894e-05, - "loss": 0.4775, - "step": 8623 - }, - { - "epoch": 0.30270801523368257, - "grad_norm": 0.5653334259986877, - "learning_rate": 3.958703703703704e-05, - "loss": 0.5198, - "step": 8624 - }, - { - "epoch": 0.3027431158847997, - "grad_norm": 0.47981011867523193, - "learning_rate": 3.958518518518519e-05, - "loss": 0.5229, - "step": 8625 - }, - { - "epoch": 0.30277821653591674, - "grad_norm": 0.47273901104927063, - "learning_rate": 3.958333333333333e-05, - "loss": 0.5313, - "step": 8626 - }, - { - "epoch": 0.3028133171870338, - "grad_norm": 0.5630565285682678, - "learning_rate": 3.958148148148149e-05, - "loss": 0.5555, - "step": 8627 - }, - { - "epoch": 0.3028484178381509, - "grad_norm": 0.47700780630111694, - "learning_rate": 3.957962962962963e-05, - "loss": 0.4172, - "step": 8628 - }, - { - "epoch": 0.302883518489268, - "grad_norm": 0.512074887752533, - "learning_rate": 3.957777777777778e-05, - "loss": 0.4841, - "step": 8629 - }, - { - "epoch": 0.30291861914038504, - "grad_norm": 0.40305987000465393, - "learning_rate": 3.9575925925925925e-05, - "loss": 0.334, - "step": 8630 - }, - { - "epoch": 0.30295371979150215, - "grad_norm": 0.4933673143386841, - "learning_rate": 3.9574074074074075e-05, - "loss": 0.4659, - "step": 8631 - }, - { - "epoch": 0.3029888204426192, - "grad_norm": 0.4125552773475647, - "learning_rate": 3.9572222222222225e-05, - "loss": 0.4715, - "step": 8632 - }, - { - "epoch": 0.30302392109373627, - "grad_norm": 0.41876867413520813, - "learning_rate": 3.9570370370370375e-05, - "loss": 0.4588, - "step": 8633 - }, - { - "epoch": 0.3030590217448534, - "grad_norm": 0.5461416244506836, - "learning_rate": 3.956851851851852e-05, - "loss": 0.4479, - "step": 8634 - }, - { - "epoch": 0.30309412239597044, - "grad_norm": 0.5668278336524963, - "learning_rate": 3.956666666666667e-05, - "loss": 0.5416, - "step": 8635 - }, - { - "epoch": 0.3031292230470875, - "grad_norm": 0.4698724150657654, - "learning_rate": 3.956481481481482e-05, - "loss": 0.4664, - "step": 8636 - }, - { - "epoch": 0.3031643236982046, - "grad_norm": 0.5216076374053955, - "learning_rate": 3.956296296296296e-05, - "loss": 0.6393, - "step": 8637 - }, - { - "epoch": 0.3031994243493217, - "grad_norm": 0.5273298621177673, - "learning_rate": 3.956111111111112e-05, - "loss": 0.5443, - "step": 8638 - }, - { - "epoch": 0.30323452500043874, - "grad_norm": 0.4996747374534607, - "learning_rate": 3.955925925925926e-05, - "loss": 0.4685, - "step": 8639 - }, - { - "epoch": 0.30326962565155585, - "grad_norm": 0.5697492361068726, - "learning_rate": 3.955740740740741e-05, - "loss": 0.5614, - "step": 8640 - }, - { - "epoch": 0.3033047263026729, - "grad_norm": 0.6453805565834045, - "learning_rate": 3.9555555555555556e-05, - "loss": 0.5996, - "step": 8641 - }, - { - "epoch": 0.30333982695378997, - "grad_norm": 0.5149725079536438, - "learning_rate": 3.9553703703703706e-05, - "loss": 0.4991, - "step": 8642 - }, - { - "epoch": 0.3033749276049071, - "grad_norm": 0.4489347040653229, - "learning_rate": 3.955185185185185e-05, - "loss": 0.4772, - "step": 8643 - }, - { - "epoch": 0.30341002825602414, - "grad_norm": 0.40657010674476624, - "learning_rate": 3.9550000000000006e-05, - "loss": 0.4938, - "step": 8644 - }, - { - "epoch": 0.3034451289071412, - "grad_norm": 0.5124055743217468, - "learning_rate": 3.954814814814815e-05, - "loss": 0.5364, - "step": 8645 - }, - { - "epoch": 0.3034802295582583, - "grad_norm": 0.5459873080253601, - "learning_rate": 3.95462962962963e-05, - "loss": 0.5721, - "step": 8646 - }, - { - "epoch": 0.3035153302093754, - "grad_norm": 0.4917494058609009, - "learning_rate": 3.954444444444444e-05, - "loss": 0.4766, - "step": 8647 - }, - { - "epoch": 0.30355043086049244, - "grad_norm": 0.4147953391075134, - "learning_rate": 3.954259259259259e-05, - "loss": 0.4568, - "step": 8648 - }, - { - "epoch": 0.30358553151160955, - "grad_norm": 0.5782347917556763, - "learning_rate": 3.954074074074074e-05, - "loss": 0.4999, - "step": 8649 - }, - { - "epoch": 0.3036206321627266, - "grad_norm": 0.4771626889705658, - "learning_rate": 3.953888888888889e-05, - "loss": 0.4925, - "step": 8650 - }, - { - "epoch": 0.30365573281384367, - "grad_norm": 0.46953243017196655, - "learning_rate": 3.9537037037037036e-05, - "loss": 0.5262, - "step": 8651 - }, - { - "epoch": 0.3036908334649608, - "grad_norm": 0.41832906007766724, - "learning_rate": 3.9535185185185186e-05, - "loss": 0.579, - "step": 8652 - }, - { - "epoch": 0.30372593411607784, - "grad_norm": 0.4280802607536316, - "learning_rate": 3.9533333333333337e-05, - "loss": 0.533, - "step": 8653 - }, - { - "epoch": 0.3037610347671949, - "grad_norm": 0.49013233184814453, - "learning_rate": 3.953148148148149e-05, - "loss": 0.4738, - "step": 8654 - }, - { - "epoch": 0.303796135418312, - "grad_norm": 0.6333420276641846, - "learning_rate": 3.952962962962963e-05, - "loss": 0.6182, - "step": 8655 - }, - { - "epoch": 0.3038312360694291, - "grad_norm": 0.4919169545173645, - "learning_rate": 3.952777777777778e-05, - "loss": 0.5248, - "step": 8656 - }, - { - "epoch": 0.3038663367205462, - "grad_norm": 0.480339914560318, - "learning_rate": 3.952592592592593e-05, - "loss": 0.4303, - "step": 8657 - }, - { - "epoch": 0.30390143737166325, - "grad_norm": 0.43666979670524597, - "learning_rate": 3.9524074074074074e-05, - "loss": 0.5017, - "step": 8658 - }, - { - "epoch": 0.3039365380227803, - "grad_norm": 0.48938441276550293, - "learning_rate": 3.9522222222222224e-05, - "loss": 0.4907, - "step": 8659 - }, - { - "epoch": 0.3039716386738974, - "grad_norm": 0.5131134986877441, - "learning_rate": 3.9520370370370374e-05, - "loss": 0.5728, - "step": 8660 - }, - { - "epoch": 0.3040067393250145, - "grad_norm": 0.44735586643218994, - "learning_rate": 3.9518518518518524e-05, - "loss": 0.5421, - "step": 8661 - }, - { - "epoch": 0.30404183997613154, - "grad_norm": 0.47705820202827454, - "learning_rate": 3.951666666666667e-05, - "loss": 0.5766, - "step": 8662 - }, - { - "epoch": 0.30407694062724866, - "grad_norm": 0.47211042046546936, - "learning_rate": 3.951481481481482e-05, - "loss": 0.488, - "step": 8663 - }, - { - "epoch": 0.3041120412783657, - "grad_norm": 0.4874033033847809, - "learning_rate": 3.951296296296296e-05, - "loss": 0.5308, - "step": 8664 - }, - { - "epoch": 0.3041471419294828, - "grad_norm": 0.4301599860191345, - "learning_rate": 3.951111111111112e-05, - "loss": 0.4704, - "step": 8665 - }, - { - "epoch": 0.3041822425805999, - "grad_norm": 0.4500030279159546, - "learning_rate": 3.950925925925926e-05, - "loss": 0.4604, - "step": 8666 - }, - { - "epoch": 0.30421734323171695, - "grad_norm": 0.5225470662117004, - "learning_rate": 3.950740740740741e-05, - "loss": 0.4781, - "step": 8667 - }, - { - "epoch": 0.304252443882834, - "grad_norm": 0.4581815302371979, - "learning_rate": 3.9505555555555554e-05, - "loss": 0.49, - "step": 8668 - }, - { - "epoch": 0.3042875445339511, - "grad_norm": 0.4960554540157318, - "learning_rate": 3.9503703703703704e-05, - "loss": 0.5662, - "step": 8669 - }, - { - "epoch": 0.3043226451850682, - "grad_norm": 0.5128034949302673, - "learning_rate": 3.950185185185185e-05, - "loss": 0.4823, - "step": 8670 - }, - { - "epoch": 0.30435774583618525, - "grad_norm": 0.5427200794219971, - "learning_rate": 3.9500000000000005e-05, - "loss": 0.4616, - "step": 8671 - }, - { - "epoch": 0.30439284648730236, - "grad_norm": 0.4783497154712677, - "learning_rate": 3.949814814814815e-05, - "loss": 0.6179, - "step": 8672 - }, - { - "epoch": 0.3044279471384194, - "grad_norm": 0.39576756954193115, - "learning_rate": 3.94962962962963e-05, - "loss": 0.5041, - "step": 8673 - }, - { - "epoch": 0.3044630477895365, - "grad_norm": 0.5019932985305786, - "learning_rate": 3.949444444444445e-05, - "loss": 0.4583, - "step": 8674 - }, - { - "epoch": 0.3044981484406536, - "grad_norm": 0.46960949897766113, - "learning_rate": 3.949259259259259e-05, - "loss": 0.4753, - "step": 8675 - }, - { - "epoch": 0.30453324909177065, - "grad_norm": 0.48464056849479675, - "learning_rate": 3.949074074074074e-05, - "loss": 0.5345, - "step": 8676 - }, - { - "epoch": 0.3045683497428877, - "grad_norm": 0.48973768949508667, - "learning_rate": 3.948888888888889e-05, - "loss": 0.5507, - "step": 8677 - }, - { - "epoch": 0.3046034503940048, - "grad_norm": 0.5706129670143127, - "learning_rate": 3.948703703703704e-05, - "loss": 0.5722, - "step": 8678 - }, - { - "epoch": 0.3046385510451219, - "grad_norm": 0.5434141159057617, - "learning_rate": 3.9485185185185185e-05, - "loss": 0.5285, - "step": 8679 - }, - { - "epoch": 0.30467365169623895, - "grad_norm": 0.4947757422924042, - "learning_rate": 3.9483333333333335e-05, - "loss": 0.5642, - "step": 8680 - }, - { - "epoch": 0.30470875234735606, - "grad_norm": 0.4289102256298065, - "learning_rate": 3.9481481481481485e-05, - "loss": 0.4812, - "step": 8681 - }, - { - "epoch": 0.3047438529984731, - "grad_norm": 0.554417073726654, - "learning_rate": 3.9479629629629636e-05, - "loss": 0.4557, - "step": 8682 - }, - { - "epoch": 0.3047789536495902, - "grad_norm": 0.3802686333656311, - "learning_rate": 3.947777777777778e-05, - "loss": 0.4472, - "step": 8683 - }, - { - "epoch": 0.3048140543007073, - "grad_norm": 0.5090487003326416, - "learning_rate": 3.947592592592593e-05, - "loss": 0.5724, - "step": 8684 - }, - { - "epoch": 0.30484915495182435, - "grad_norm": 0.4884255826473236, - "learning_rate": 3.947407407407407e-05, - "loss": 0.5076, - "step": 8685 - }, - { - "epoch": 0.3048842556029414, - "grad_norm": 0.3774528503417969, - "learning_rate": 3.947222222222222e-05, - "loss": 0.4159, - "step": 8686 - }, - { - "epoch": 0.3049193562540585, - "grad_norm": 0.41077321767807007, - "learning_rate": 3.947037037037037e-05, - "loss": 0.5596, - "step": 8687 - }, - { - "epoch": 0.3049544569051756, - "grad_norm": 0.4244905114173889, - "learning_rate": 3.946851851851852e-05, - "loss": 0.5807, - "step": 8688 - }, - { - "epoch": 0.30498955755629265, - "grad_norm": 0.4664440155029297, - "learning_rate": 3.9466666666666666e-05, - "loss": 0.3969, - "step": 8689 - }, - { - "epoch": 0.30502465820740976, - "grad_norm": 0.4193909466266632, - "learning_rate": 3.9464814814814816e-05, - "loss": 0.425, - "step": 8690 - }, - { - "epoch": 0.3050597588585268, - "grad_norm": 0.4258134961128235, - "learning_rate": 3.946296296296296e-05, - "loss": 0.4764, - "step": 8691 - }, - { - "epoch": 0.3050948595096439, - "grad_norm": 0.5628892779350281, - "learning_rate": 3.9461111111111116e-05, - "loss": 0.566, - "step": 8692 - }, - { - "epoch": 0.305129960160761, - "grad_norm": 0.47160840034484863, - "learning_rate": 3.945925925925926e-05, - "loss": 0.5199, - "step": 8693 - }, - { - "epoch": 0.30516506081187805, - "grad_norm": 0.420484721660614, - "learning_rate": 3.945740740740741e-05, - "loss": 0.4212, - "step": 8694 - }, - { - "epoch": 0.3052001614629951, - "grad_norm": 0.4397449791431427, - "learning_rate": 3.945555555555556e-05, - "loss": 0.407, - "step": 8695 - }, - { - "epoch": 0.3052352621141122, - "grad_norm": 0.41507479548454285, - "learning_rate": 3.94537037037037e-05, - "loss": 0.5101, - "step": 8696 - }, - { - "epoch": 0.3052703627652293, - "grad_norm": 0.5965529084205627, - "learning_rate": 3.945185185185185e-05, - "loss": 0.602, - "step": 8697 - }, - { - "epoch": 0.3053054634163464, - "grad_norm": 0.49673742055892944, - "learning_rate": 3.9450000000000003e-05, - "loss": 0.5493, - "step": 8698 - }, - { - "epoch": 0.30534056406746346, - "grad_norm": 0.461818665266037, - "learning_rate": 3.9448148148148154e-05, - "loss": 0.5745, - "step": 8699 - }, - { - "epoch": 0.3053756647185805, - "grad_norm": 0.46759822964668274, - "learning_rate": 3.94462962962963e-05, - "loss": 0.489, - "step": 8700 - }, - { - "epoch": 0.30541076536969763, - "grad_norm": 0.5662795901298523, - "learning_rate": 3.944444444444445e-05, - "loss": 0.5578, - "step": 8701 - }, - { - "epoch": 0.3054458660208147, - "grad_norm": 0.49840685725212097, - "learning_rate": 3.944259259259259e-05, - "loss": 0.5802, - "step": 8702 - }, - { - "epoch": 0.30548096667193175, - "grad_norm": 0.452137291431427, - "learning_rate": 3.944074074074075e-05, - "loss": 0.5066, - "step": 8703 - }, - { - "epoch": 0.30551606732304887, - "grad_norm": 0.545537531375885, - "learning_rate": 3.943888888888889e-05, - "loss": 0.5393, - "step": 8704 - }, - { - "epoch": 0.3055511679741659, - "grad_norm": 0.5855019688606262, - "learning_rate": 3.943703703703704e-05, - "loss": 0.564, - "step": 8705 - }, - { - "epoch": 0.305586268625283, - "grad_norm": 0.5640453696250916, - "learning_rate": 3.9435185185185184e-05, - "loss": 0.466, - "step": 8706 - }, - { - "epoch": 0.3056213692764001, - "grad_norm": 0.4662555158138275, - "learning_rate": 3.9433333333333334e-05, - "loss": 0.4928, - "step": 8707 - }, - { - "epoch": 0.30565646992751716, - "grad_norm": 0.5162858963012695, - "learning_rate": 3.9431481481481484e-05, - "loss": 0.5166, - "step": 8708 - }, - { - "epoch": 0.3056915705786342, - "grad_norm": 0.4921945035457611, - "learning_rate": 3.9429629629629634e-05, - "loss": 0.6155, - "step": 8709 - }, - { - "epoch": 0.30572667122975133, - "grad_norm": 0.619929850101471, - "learning_rate": 3.942777777777778e-05, - "loss": 0.6201, - "step": 8710 - }, - { - "epoch": 0.3057617718808684, - "grad_norm": 0.5620907545089722, - "learning_rate": 3.942592592592593e-05, - "loss": 0.5323, - "step": 8711 - }, - { - "epoch": 0.30579687253198545, - "grad_norm": 0.4557121694087982, - "learning_rate": 3.942407407407407e-05, - "loss": 0.5116, - "step": 8712 - }, - { - "epoch": 0.30583197318310257, - "grad_norm": 0.385118305683136, - "learning_rate": 3.942222222222222e-05, - "loss": 0.4678, - "step": 8713 - }, - { - "epoch": 0.3058670738342196, - "grad_norm": 0.4236917197704315, - "learning_rate": 3.942037037037037e-05, - "loss": 0.5433, - "step": 8714 - }, - { - "epoch": 0.3059021744853367, - "grad_norm": 0.4401707053184509, - "learning_rate": 3.941851851851852e-05, - "loss": 0.5181, - "step": 8715 - }, - { - "epoch": 0.3059372751364538, - "grad_norm": 0.5016449689865112, - "learning_rate": 3.941666666666667e-05, - "loss": 0.5372, - "step": 8716 - }, - { - "epoch": 0.30597237578757086, - "grad_norm": 0.3915505111217499, - "learning_rate": 3.9414814814814815e-05, - "loss": 0.4752, - "step": 8717 - }, - { - "epoch": 0.3060074764386879, - "grad_norm": 0.47751185297966003, - "learning_rate": 3.9412962962962965e-05, - "loss": 0.4738, - "step": 8718 - }, - { - "epoch": 0.30604257708980503, - "grad_norm": 0.46168264746665955, - "learning_rate": 3.9411111111111115e-05, - "loss": 0.6007, - "step": 8719 - }, - { - "epoch": 0.3060776777409221, - "grad_norm": 0.6341105699539185, - "learning_rate": 3.9409259259259265e-05, - "loss": 0.4967, - "step": 8720 - }, - { - "epoch": 0.30611277839203915, - "grad_norm": 0.46257856488227844, - "learning_rate": 3.940740740740741e-05, - "loss": 0.4749, - "step": 8721 - }, - { - "epoch": 0.30614787904315627, - "grad_norm": 0.5143444538116455, - "learning_rate": 3.940555555555556e-05, - "loss": 0.4244, - "step": 8722 - }, - { - "epoch": 0.3061829796942733, - "grad_norm": 0.4296113848686218, - "learning_rate": 3.94037037037037e-05, - "loss": 0.4862, - "step": 8723 - }, - { - "epoch": 0.3062180803453904, - "grad_norm": 0.4205456078052521, - "learning_rate": 3.940185185185186e-05, - "loss": 0.5334, - "step": 8724 - }, - { - "epoch": 0.3062531809965075, - "grad_norm": 0.5338257551193237, - "learning_rate": 3.94e-05, - "loss": 0.4718, - "step": 8725 - }, - { - "epoch": 0.30628828164762456, - "grad_norm": 0.3950191140174866, - "learning_rate": 3.939814814814815e-05, - "loss": 0.5032, - "step": 8726 - }, - { - "epoch": 0.3063233822987416, - "grad_norm": 0.5441983938217163, - "learning_rate": 3.9396296296296296e-05, - "loss": 0.4687, - "step": 8727 - }, - { - "epoch": 0.30635848294985873, - "grad_norm": 0.4578394889831543, - "learning_rate": 3.9394444444444446e-05, - "loss": 0.4609, - "step": 8728 - }, - { - "epoch": 0.3063935836009758, - "grad_norm": 0.39350616931915283, - "learning_rate": 3.939259259259259e-05, - "loss": 0.4812, - "step": 8729 - }, - { - "epoch": 0.30642868425209285, - "grad_norm": 0.526911735534668, - "learning_rate": 3.9390740740740746e-05, - "loss": 0.58, - "step": 8730 - }, - { - "epoch": 0.30646378490320997, - "grad_norm": 0.5064572691917419, - "learning_rate": 3.938888888888889e-05, - "loss": 0.5212, - "step": 8731 - }, - { - "epoch": 0.306498885554327, - "grad_norm": 0.4668792188167572, - "learning_rate": 3.938703703703704e-05, - "loss": 0.6327, - "step": 8732 - }, - { - "epoch": 0.3065339862054441, - "grad_norm": 0.48029157519340515, - "learning_rate": 3.938518518518518e-05, - "loss": 0.507, - "step": 8733 - }, - { - "epoch": 0.3065690868565612, - "grad_norm": 0.4906366169452667, - "learning_rate": 3.938333333333333e-05, - "loss": 0.5551, - "step": 8734 - }, - { - "epoch": 0.30660418750767826, - "grad_norm": 0.5226960182189941, - "learning_rate": 3.938148148148148e-05, - "loss": 0.5298, - "step": 8735 - }, - { - "epoch": 0.3066392881587953, - "grad_norm": 0.5579103827476501, - "learning_rate": 3.937962962962963e-05, - "loss": 0.581, - "step": 8736 - }, - { - "epoch": 0.30667438880991243, - "grad_norm": 0.45884960889816284, - "learning_rate": 3.937777777777778e-05, - "loss": 0.5402, - "step": 8737 - }, - { - "epoch": 0.3067094894610295, - "grad_norm": 0.535212516784668, - "learning_rate": 3.9375925925925927e-05, - "loss": 0.426, - "step": 8738 - }, - { - "epoch": 0.30674459011214655, - "grad_norm": 0.41749534010887146, - "learning_rate": 3.937407407407408e-05, - "loss": 0.494, - "step": 8739 - }, - { - "epoch": 0.30677969076326367, - "grad_norm": 0.49958568811416626, - "learning_rate": 3.937222222222222e-05, - "loss": 0.5199, - "step": 8740 - }, - { - "epoch": 0.3068147914143807, - "grad_norm": 0.4662969708442688, - "learning_rate": 3.937037037037038e-05, - "loss": 0.3671, - "step": 8741 - }, - { - "epoch": 0.30684989206549784, - "grad_norm": 0.4694395065307617, - "learning_rate": 3.936851851851852e-05, - "loss": 0.5542, - "step": 8742 - }, - { - "epoch": 0.3068849927166149, - "grad_norm": 0.5641579031944275, - "learning_rate": 3.936666666666667e-05, - "loss": 0.5254, - "step": 8743 - }, - { - "epoch": 0.30692009336773196, - "grad_norm": 0.4714747369289398, - "learning_rate": 3.9364814814814814e-05, - "loss": 0.4816, - "step": 8744 - }, - { - "epoch": 0.3069551940188491, - "grad_norm": 0.6107221245765686, - "learning_rate": 3.9362962962962964e-05, - "loss": 0.4606, - "step": 8745 - }, - { - "epoch": 0.30699029466996614, - "grad_norm": 0.44907963275909424, - "learning_rate": 3.9361111111111114e-05, - "loss": 0.4403, - "step": 8746 - }, - { - "epoch": 0.3070253953210832, - "grad_norm": 0.5027180910110474, - "learning_rate": 3.9359259259259264e-05, - "loss": 0.5004, - "step": 8747 - }, - { - "epoch": 0.3070604959722003, - "grad_norm": 0.44461700320243835, - "learning_rate": 3.935740740740741e-05, - "loss": 0.4153, - "step": 8748 - }, - { - "epoch": 0.30709559662331737, - "grad_norm": 0.46302419900894165, - "learning_rate": 3.935555555555556e-05, - "loss": 0.4727, - "step": 8749 - }, - { - "epoch": 0.3071306972744344, - "grad_norm": 0.4714001417160034, - "learning_rate": 3.93537037037037e-05, - "loss": 0.5446, - "step": 8750 - }, - { - "epoch": 0.30716579792555154, - "grad_norm": 0.4725210964679718, - "learning_rate": 3.935185185185186e-05, - "loss": 0.4413, - "step": 8751 - }, - { - "epoch": 0.3072008985766686, - "grad_norm": 0.4595108926296234, - "learning_rate": 3.935e-05, - "loss": 0.5442, - "step": 8752 - }, - { - "epoch": 0.30723599922778566, - "grad_norm": 0.4774869382381439, - "learning_rate": 3.934814814814815e-05, - "loss": 0.5283, - "step": 8753 - }, - { - "epoch": 0.3072710998789028, - "grad_norm": 0.4862412214279175, - "learning_rate": 3.9346296296296294e-05, - "loss": 0.4814, - "step": 8754 - }, - { - "epoch": 0.30730620053001984, - "grad_norm": 0.38672032952308655, - "learning_rate": 3.9344444444444445e-05, - "loss": 0.5002, - "step": 8755 - }, - { - "epoch": 0.3073413011811369, - "grad_norm": 0.48458823561668396, - "learning_rate": 3.9342592592592595e-05, - "loss": 0.6119, - "step": 8756 - }, - { - "epoch": 0.307376401832254, - "grad_norm": 0.4352933466434479, - "learning_rate": 3.9340740740740745e-05, - "loss": 0.5363, - "step": 8757 - }, - { - "epoch": 0.30741150248337107, - "grad_norm": 0.49094241857528687, - "learning_rate": 3.9338888888888895e-05, - "loss": 0.413, - "step": 8758 - }, - { - "epoch": 0.30744660313448813, - "grad_norm": 0.48559215664863586, - "learning_rate": 3.933703703703704e-05, - "loss": 0.5016, - "step": 8759 - }, - { - "epoch": 0.30748170378560524, - "grad_norm": 0.4450388252735138, - "learning_rate": 3.933518518518519e-05, - "loss": 0.4511, - "step": 8760 - }, - { - "epoch": 0.3075168044367223, - "grad_norm": 0.4151293933391571, - "learning_rate": 3.933333333333333e-05, - "loss": 0.5015, - "step": 8761 - }, - { - "epoch": 0.30755190508783936, - "grad_norm": 0.4170636832714081, - "learning_rate": 3.933148148148149e-05, - "loss": 0.5784, - "step": 8762 - }, - { - "epoch": 0.3075870057389565, - "grad_norm": 0.5053126811981201, - "learning_rate": 3.932962962962963e-05, - "loss": 0.5033, - "step": 8763 - }, - { - "epoch": 0.30762210639007354, - "grad_norm": 0.49803057312965393, - "learning_rate": 3.932777777777778e-05, - "loss": 0.5082, - "step": 8764 - }, - { - "epoch": 0.3076572070411906, - "grad_norm": 0.44361767172813416, - "learning_rate": 3.9325925925925925e-05, - "loss": 0.5126, - "step": 8765 - }, - { - "epoch": 0.3076923076923077, - "grad_norm": 0.5190625190734863, - "learning_rate": 3.9324074074074075e-05, - "loss": 0.648, - "step": 8766 - }, - { - "epoch": 0.30772740834342477, - "grad_norm": 0.4127783179283142, - "learning_rate": 3.932222222222222e-05, - "loss": 0.41, - "step": 8767 - }, - { - "epoch": 0.30776250899454183, - "grad_norm": 0.48539865016937256, - "learning_rate": 3.9320370370370376e-05, - "loss": 0.5515, - "step": 8768 - }, - { - "epoch": 0.30779760964565894, - "grad_norm": 0.515684962272644, - "learning_rate": 3.931851851851852e-05, - "loss": 0.4564, - "step": 8769 - }, - { - "epoch": 0.307832710296776, - "grad_norm": 0.44570302963256836, - "learning_rate": 3.931666666666667e-05, - "loss": 0.4878, - "step": 8770 - }, - { - "epoch": 0.30786781094789306, - "grad_norm": 0.47116732597351074, - "learning_rate": 3.931481481481481e-05, - "loss": 0.5905, - "step": 8771 - }, - { - "epoch": 0.3079029115990102, - "grad_norm": 0.41465622186660767, - "learning_rate": 3.931296296296296e-05, - "loss": 0.4382, - "step": 8772 - }, - { - "epoch": 0.30793801225012724, - "grad_norm": 0.4784121513366699, - "learning_rate": 3.931111111111111e-05, - "loss": 0.4923, - "step": 8773 - }, - { - "epoch": 0.3079731129012443, - "grad_norm": 0.38677582144737244, - "learning_rate": 3.930925925925926e-05, - "loss": 0.4391, - "step": 8774 - }, - { - "epoch": 0.3080082135523614, - "grad_norm": 0.4381020963191986, - "learning_rate": 3.9307407407407406e-05, - "loss": 0.5738, - "step": 8775 - }, - { - "epoch": 0.30804331420347847, - "grad_norm": 0.47698602080345154, - "learning_rate": 3.9305555555555556e-05, - "loss": 0.6561, - "step": 8776 - }, - { - "epoch": 0.30807841485459553, - "grad_norm": 0.5099846720695496, - "learning_rate": 3.9303703703703706e-05, - "loss": 0.5063, - "step": 8777 - }, - { - "epoch": 0.30811351550571264, - "grad_norm": 0.5377997756004333, - "learning_rate": 3.9301851851851856e-05, - "loss": 0.4937, - "step": 8778 - }, - { - "epoch": 0.3081486161568297, - "grad_norm": 0.4510904550552368, - "learning_rate": 3.9300000000000007e-05, - "loss": 0.4108, - "step": 8779 - }, - { - "epoch": 0.30818371680794676, - "grad_norm": 0.4931807816028595, - "learning_rate": 3.929814814814815e-05, - "loss": 0.4162, - "step": 8780 - }, - { - "epoch": 0.3082188174590639, - "grad_norm": 0.48580771684646606, - "learning_rate": 3.92962962962963e-05, - "loss": 0.4746, - "step": 8781 - }, - { - "epoch": 0.30825391811018094, - "grad_norm": 0.46178099513053894, - "learning_rate": 3.929444444444444e-05, - "loss": 0.4725, - "step": 8782 - }, - { - "epoch": 0.308289018761298, - "grad_norm": 0.5141914486885071, - "learning_rate": 3.9292592592592593e-05, - "loss": 0.4495, - "step": 8783 - }, - { - "epoch": 0.3083241194124151, - "grad_norm": 0.5259555578231812, - "learning_rate": 3.9290740740740744e-05, - "loss": 0.4918, - "step": 8784 - }, - { - "epoch": 0.30835922006353217, - "grad_norm": 0.48467570543289185, - "learning_rate": 3.9288888888888894e-05, - "loss": 0.5656, - "step": 8785 - }, - { - "epoch": 0.3083943207146493, - "grad_norm": 0.4536272883415222, - "learning_rate": 3.928703703703704e-05, - "loss": 0.512, - "step": 8786 - }, - { - "epoch": 0.30842942136576634, - "grad_norm": 0.4893020689487457, - "learning_rate": 3.928518518518519e-05, - "loss": 0.4602, - "step": 8787 - }, - { - "epoch": 0.3084645220168834, - "grad_norm": 0.44596216082572937, - "learning_rate": 3.928333333333333e-05, - "loss": 0.524, - "step": 8788 - }, - { - "epoch": 0.3084996226680005, - "grad_norm": 0.49498552083969116, - "learning_rate": 3.928148148148149e-05, - "loss": 0.5067, - "step": 8789 - }, - { - "epoch": 0.3085347233191176, - "grad_norm": 0.45623937249183655, - "learning_rate": 3.927962962962963e-05, - "loss": 0.4555, - "step": 8790 - }, - { - "epoch": 0.30856982397023464, - "grad_norm": 0.43212226033210754, - "learning_rate": 3.927777777777778e-05, - "loss": 0.4962, - "step": 8791 - }, - { - "epoch": 0.30860492462135175, - "grad_norm": 0.46493348479270935, - "learning_rate": 3.9275925925925924e-05, - "loss": 0.4687, - "step": 8792 - }, - { - "epoch": 0.3086400252724688, - "grad_norm": 0.4725116789340973, - "learning_rate": 3.9274074074074074e-05, - "loss": 0.4568, - "step": 8793 - }, - { - "epoch": 0.30867512592358587, - "grad_norm": 0.5334082841873169, - "learning_rate": 3.9272222222222224e-05, - "loss": 0.4664, - "step": 8794 - }, - { - "epoch": 0.308710226574703, - "grad_norm": 0.48105213046073914, - "learning_rate": 3.9270370370370374e-05, - "loss": 0.5867, - "step": 8795 - }, - { - "epoch": 0.30874532722582004, - "grad_norm": 0.4026000201702118, - "learning_rate": 3.926851851851852e-05, - "loss": 0.4357, - "step": 8796 - }, - { - "epoch": 0.3087804278769371, - "grad_norm": 0.4023224115371704, - "learning_rate": 3.926666666666667e-05, - "loss": 0.501, - "step": 8797 - }, - { - "epoch": 0.3088155285280542, - "grad_norm": 0.5373108983039856, - "learning_rate": 3.926481481481482e-05, - "loss": 0.5511, - "step": 8798 - }, - { - "epoch": 0.3088506291791713, - "grad_norm": 0.45499032735824585, - "learning_rate": 3.926296296296296e-05, - "loss": 0.4928, - "step": 8799 - }, - { - "epoch": 0.30888572983028834, - "grad_norm": 0.4539770185947418, - "learning_rate": 3.926111111111112e-05, - "loss": 0.5426, - "step": 8800 - }, - { - "epoch": 0.30892083048140545, - "grad_norm": 0.45671460032463074, - "learning_rate": 3.925925925925926e-05, - "loss": 0.5494, - "step": 8801 - }, - { - "epoch": 0.3089559311325225, - "grad_norm": 0.4962742030620575, - "learning_rate": 3.925740740740741e-05, - "loss": 0.5457, - "step": 8802 - }, - { - "epoch": 0.30899103178363957, - "grad_norm": 0.43830347061157227, - "learning_rate": 3.9255555555555555e-05, - "loss": 0.4994, - "step": 8803 - }, - { - "epoch": 0.3090261324347567, - "grad_norm": 0.4228097200393677, - "learning_rate": 3.9253703703703705e-05, - "loss": 0.5557, - "step": 8804 - }, - { - "epoch": 0.30906123308587374, - "grad_norm": 0.57325279712677, - "learning_rate": 3.9251851851851855e-05, - "loss": 0.6374, - "step": 8805 - }, - { - "epoch": 0.3090963337369908, - "grad_norm": 0.5213414430618286, - "learning_rate": 3.9250000000000005e-05, - "loss": 0.4228, - "step": 8806 - }, - { - "epoch": 0.3091314343881079, - "grad_norm": 0.674948513507843, - "learning_rate": 3.924814814814815e-05, - "loss": 0.5753, - "step": 8807 - }, - { - "epoch": 0.309166535039225, - "grad_norm": 0.48095443844795227, - "learning_rate": 3.92462962962963e-05, - "loss": 0.5318, - "step": 8808 - }, - { - "epoch": 0.30920163569034204, - "grad_norm": 0.387461394071579, - "learning_rate": 3.924444444444444e-05, - "loss": 0.3995, - "step": 8809 - }, - { - "epoch": 0.30923673634145915, - "grad_norm": 0.506006121635437, - "learning_rate": 3.924259259259259e-05, - "loss": 0.4614, - "step": 8810 - }, - { - "epoch": 0.3092718369925762, - "grad_norm": 0.4645986258983612, - "learning_rate": 3.924074074074074e-05, - "loss": 0.5135, - "step": 8811 - }, - { - "epoch": 0.30930693764369327, - "grad_norm": 0.4918312728404999, - "learning_rate": 3.923888888888889e-05, - "loss": 0.5374, - "step": 8812 - }, - { - "epoch": 0.3093420382948104, - "grad_norm": 0.4714263379573822, - "learning_rate": 3.9237037037037036e-05, - "loss": 0.5322, - "step": 8813 - }, - { - "epoch": 0.30937713894592744, - "grad_norm": 0.4145309329032898, - "learning_rate": 3.9235185185185186e-05, - "loss": 0.5047, - "step": 8814 - }, - { - "epoch": 0.3094122395970445, - "grad_norm": 0.4974603056907654, - "learning_rate": 3.9233333333333336e-05, - "loss": 0.4082, - "step": 8815 - }, - { - "epoch": 0.3094473402481616, - "grad_norm": 0.41587987542152405, - "learning_rate": 3.9231481481481486e-05, - "loss": 0.4967, - "step": 8816 - }, - { - "epoch": 0.3094824408992787, - "grad_norm": 0.4979100525379181, - "learning_rate": 3.922962962962963e-05, - "loss": 0.4806, - "step": 8817 - }, - { - "epoch": 0.30951754155039574, - "grad_norm": 0.4637008607387543, - "learning_rate": 3.922777777777778e-05, - "loss": 0.3574, - "step": 8818 - }, - { - "epoch": 0.30955264220151285, - "grad_norm": 0.45295584201812744, - "learning_rate": 3.922592592592593e-05, - "loss": 0.5173, - "step": 8819 - }, - { - "epoch": 0.3095877428526299, - "grad_norm": 0.41619691252708435, - "learning_rate": 3.922407407407407e-05, - "loss": 0.4842, - "step": 8820 - }, - { - "epoch": 0.30962284350374697, - "grad_norm": 0.5464077591896057, - "learning_rate": 3.922222222222223e-05, - "loss": 0.4808, - "step": 8821 - }, - { - "epoch": 0.3096579441548641, - "grad_norm": 0.38551726937294006, - "learning_rate": 3.922037037037037e-05, - "loss": 0.4039, - "step": 8822 - }, - { - "epoch": 0.30969304480598114, - "grad_norm": 0.46730029582977295, - "learning_rate": 3.921851851851852e-05, - "loss": 0.4904, - "step": 8823 - }, - { - "epoch": 0.3097281454570982, - "grad_norm": 0.5198444128036499, - "learning_rate": 3.921666666666667e-05, - "loss": 0.5256, - "step": 8824 - }, - { - "epoch": 0.3097632461082153, - "grad_norm": 0.4479045271873474, - "learning_rate": 3.921481481481482e-05, - "loss": 0.522, - "step": 8825 - }, - { - "epoch": 0.3097983467593324, - "grad_norm": 0.5630019903182983, - "learning_rate": 3.921296296296296e-05, - "loss": 0.5016, - "step": 8826 - }, - { - "epoch": 0.30983344741044944, - "grad_norm": 0.5076788067817688, - "learning_rate": 3.921111111111112e-05, - "loss": 0.5403, - "step": 8827 - }, - { - "epoch": 0.30986854806156655, - "grad_norm": 0.46219485998153687, - "learning_rate": 3.920925925925926e-05, - "loss": 0.5457, - "step": 8828 - }, - { - "epoch": 0.3099036487126836, - "grad_norm": 0.48247432708740234, - "learning_rate": 3.920740740740741e-05, - "loss": 0.471, - "step": 8829 - }, - { - "epoch": 0.3099387493638007, - "grad_norm": 0.5240874886512756, - "learning_rate": 3.9205555555555554e-05, - "loss": 0.615, - "step": 8830 - }, - { - "epoch": 0.3099738500149178, - "grad_norm": 0.4381912052631378, - "learning_rate": 3.9203703703703704e-05, - "loss": 0.3954, - "step": 8831 - }, - { - "epoch": 0.31000895066603484, - "grad_norm": 0.5528464317321777, - "learning_rate": 3.9201851851851854e-05, - "loss": 0.5457, - "step": 8832 - }, - { - "epoch": 0.31004405131715196, - "grad_norm": 0.4594953954219818, - "learning_rate": 3.9200000000000004e-05, - "loss": 0.4913, - "step": 8833 - }, - { - "epoch": 0.310079151968269, - "grad_norm": 0.4231271743774414, - "learning_rate": 3.919814814814815e-05, - "loss": 0.4367, - "step": 8834 - }, - { - "epoch": 0.3101142526193861, - "grad_norm": 0.4530611038208008, - "learning_rate": 3.91962962962963e-05, - "loss": 0.4794, - "step": 8835 - }, - { - "epoch": 0.3101493532705032, - "grad_norm": 0.4232786297798157, - "learning_rate": 3.919444444444445e-05, - "loss": 0.5351, - "step": 8836 - }, - { - "epoch": 0.31018445392162025, - "grad_norm": 0.4587383568286896, - "learning_rate": 3.919259259259259e-05, - "loss": 0.5104, - "step": 8837 - }, - { - "epoch": 0.3102195545727373, - "grad_norm": 0.40078210830688477, - "learning_rate": 3.919074074074074e-05, - "loss": 0.3828, - "step": 8838 - }, - { - "epoch": 0.3102546552238544, - "grad_norm": 0.45840489864349365, - "learning_rate": 3.918888888888889e-05, - "loss": 0.4232, - "step": 8839 - }, - { - "epoch": 0.3102897558749715, - "grad_norm": 0.4865371882915497, - "learning_rate": 3.918703703703704e-05, - "loss": 0.45, - "step": 8840 - }, - { - "epoch": 0.31032485652608854, - "grad_norm": 0.4786504805088043, - "learning_rate": 3.9185185185185185e-05, - "loss": 0.4772, - "step": 8841 - }, - { - "epoch": 0.31035995717720566, - "grad_norm": 0.4598773121833801, - "learning_rate": 3.9183333333333335e-05, - "loss": 0.4893, - "step": 8842 - }, - { - "epoch": 0.3103950578283227, - "grad_norm": 0.44626519083976746, - "learning_rate": 3.9181481481481485e-05, - "loss": 0.4967, - "step": 8843 - }, - { - "epoch": 0.3104301584794398, - "grad_norm": 0.5617661476135254, - "learning_rate": 3.9179629629629635e-05, - "loss": 0.4924, - "step": 8844 - }, - { - "epoch": 0.3104652591305569, - "grad_norm": 0.5470641851425171, - "learning_rate": 3.917777777777778e-05, - "loss": 0.3199, - "step": 8845 - }, - { - "epoch": 0.31050035978167395, - "grad_norm": 1.08914053440094, - "learning_rate": 3.917592592592593e-05, - "loss": 0.6093, - "step": 8846 - }, - { - "epoch": 0.310535460432791, - "grad_norm": 0.47433972358703613, - "learning_rate": 3.917407407407407e-05, - "loss": 0.4375, - "step": 8847 - }, - { - "epoch": 0.3105705610839081, - "grad_norm": 0.44496044516563416, - "learning_rate": 3.917222222222223e-05, - "loss": 0.4426, - "step": 8848 - }, - { - "epoch": 0.3106056617350252, - "grad_norm": 0.4063497483730316, - "learning_rate": 3.917037037037037e-05, - "loss": 0.4446, - "step": 8849 - }, - { - "epoch": 0.31064076238614224, - "grad_norm": 0.48426681756973267, - "learning_rate": 3.916851851851852e-05, - "loss": 0.493, - "step": 8850 - }, - { - "epoch": 0.31067586303725936, - "grad_norm": 0.44462502002716064, - "learning_rate": 3.9166666666666665e-05, - "loss": 0.4866, - "step": 8851 - }, - { - "epoch": 0.3107109636883764, - "grad_norm": 0.5254144072532654, - "learning_rate": 3.9164814814814816e-05, - "loss": 0.54, - "step": 8852 - }, - { - "epoch": 0.3107460643394935, - "grad_norm": 0.5215391516685486, - "learning_rate": 3.9162962962962966e-05, - "loss": 0.5293, - "step": 8853 - }, - { - "epoch": 0.3107811649906106, - "grad_norm": 0.432104229927063, - "learning_rate": 3.9161111111111116e-05, - "loss": 0.46, - "step": 8854 - }, - { - "epoch": 0.31081626564172765, - "grad_norm": 0.4759671688079834, - "learning_rate": 3.915925925925926e-05, - "loss": 0.4473, - "step": 8855 - }, - { - "epoch": 0.3108513662928447, - "grad_norm": 0.4657142758369446, - "learning_rate": 3.915740740740741e-05, - "loss": 0.4879, - "step": 8856 - }, - { - "epoch": 0.3108864669439618, - "grad_norm": 0.49429750442504883, - "learning_rate": 3.915555555555556e-05, - "loss": 0.4906, - "step": 8857 - }, - { - "epoch": 0.3109215675950789, - "grad_norm": 0.4377487301826477, - "learning_rate": 3.91537037037037e-05, - "loss": 0.4886, - "step": 8858 - }, - { - "epoch": 0.31095666824619594, - "grad_norm": 0.46842101216316223, - "learning_rate": 3.915185185185186e-05, - "loss": 0.4471, - "step": 8859 - }, - { - "epoch": 0.31099176889731306, - "grad_norm": 0.4664541482925415, - "learning_rate": 3.915e-05, - "loss": 0.5352, - "step": 8860 - }, - { - "epoch": 0.3110268695484301, - "grad_norm": 0.48965442180633545, - "learning_rate": 3.914814814814815e-05, - "loss": 0.5988, - "step": 8861 - }, - { - "epoch": 0.3110619701995472, - "grad_norm": 0.5325160026550293, - "learning_rate": 3.9146296296296296e-05, - "loss": 0.4975, - "step": 8862 - }, - { - "epoch": 0.3110970708506643, - "grad_norm": 0.4721429944038391, - "learning_rate": 3.9144444444444446e-05, - "loss": 0.3951, - "step": 8863 - }, - { - "epoch": 0.31113217150178135, - "grad_norm": 0.5160288214683533, - "learning_rate": 3.914259259259259e-05, - "loss": 0.4758, - "step": 8864 - }, - { - "epoch": 0.3111672721528984, - "grad_norm": 0.5035923719406128, - "learning_rate": 3.914074074074075e-05, - "loss": 0.5342, - "step": 8865 - }, - { - "epoch": 0.3112023728040155, - "grad_norm": 0.49354884028434753, - "learning_rate": 3.913888888888889e-05, - "loss": 0.5507, - "step": 8866 - }, - { - "epoch": 0.3112374734551326, - "grad_norm": 0.46520206332206726, - "learning_rate": 3.913703703703704e-05, - "loss": 0.5782, - "step": 8867 - }, - { - "epoch": 0.31127257410624964, - "grad_norm": 0.4144098162651062, - "learning_rate": 3.9135185185185183e-05, - "loss": 0.2487, - "step": 8868 - }, - { - "epoch": 0.31130767475736676, - "grad_norm": 0.5508770942687988, - "learning_rate": 3.9133333333333334e-05, - "loss": 0.5594, - "step": 8869 - }, - { - "epoch": 0.3113427754084838, - "grad_norm": 0.3716624975204468, - "learning_rate": 3.9131481481481484e-05, - "loss": 0.3137, - "step": 8870 - }, - { - "epoch": 0.31137787605960093, - "grad_norm": 0.4619409441947937, - "learning_rate": 3.9129629629629634e-05, - "loss": 0.6387, - "step": 8871 - }, - { - "epoch": 0.311412976710718, - "grad_norm": 0.4463960528373718, - "learning_rate": 3.912777777777778e-05, - "loss": 0.5626, - "step": 8872 - }, - { - "epoch": 0.31144807736183505, - "grad_norm": 0.4954317510128021, - "learning_rate": 3.912592592592593e-05, - "loss": 0.4987, - "step": 8873 - }, - { - "epoch": 0.31148317801295217, - "grad_norm": 0.4547634720802307, - "learning_rate": 3.912407407407408e-05, - "loss": 0.4179, - "step": 8874 - }, - { - "epoch": 0.3115182786640692, - "grad_norm": 0.4283832907676697, - "learning_rate": 3.912222222222223e-05, - "loss": 0.5647, - "step": 8875 - }, - { - "epoch": 0.3115533793151863, - "grad_norm": 0.5543498396873474, - "learning_rate": 3.912037037037037e-05, - "loss": 0.5701, - "step": 8876 - }, - { - "epoch": 0.3115884799663034, - "grad_norm": 0.4814988970756531, - "learning_rate": 3.911851851851852e-05, - "loss": 0.5461, - "step": 8877 - }, - { - "epoch": 0.31162358061742046, - "grad_norm": 0.5078903436660767, - "learning_rate": 3.911666666666667e-05, - "loss": 0.5733, - "step": 8878 - }, - { - "epoch": 0.3116586812685375, - "grad_norm": 0.4838479161262512, - "learning_rate": 3.9114814814814814e-05, - "loss": 0.5417, - "step": 8879 - }, - { - "epoch": 0.31169378191965463, - "grad_norm": 0.47843754291534424, - "learning_rate": 3.9112962962962965e-05, - "loss": 0.6086, - "step": 8880 - }, - { - "epoch": 0.3117288825707717, - "grad_norm": 0.46655675768852234, - "learning_rate": 3.9111111111111115e-05, - "loss": 0.4794, - "step": 8881 - }, - { - "epoch": 0.31176398322188875, - "grad_norm": 0.4440867602825165, - "learning_rate": 3.9109259259259265e-05, - "loss": 0.4907, - "step": 8882 - }, - { - "epoch": 0.31179908387300587, - "grad_norm": 0.48203611373901367, - "learning_rate": 3.910740740740741e-05, - "loss": 0.4799, - "step": 8883 - }, - { - "epoch": 0.3118341845241229, - "grad_norm": 0.4492906332015991, - "learning_rate": 3.910555555555556e-05, - "loss": 0.5215, - "step": 8884 - }, - { - "epoch": 0.31186928517524, - "grad_norm": 0.4051781892776489, - "learning_rate": 3.91037037037037e-05, - "loss": 0.3945, - "step": 8885 - }, - { - "epoch": 0.3119043858263571, - "grad_norm": 0.46575140953063965, - "learning_rate": 3.910185185185186e-05, - "loss": 0.5241, - "step": 8886 - }, - { - "epoch": 0.31193948647747416, - "grad_norm": 0.4367229640483856, - "learning_rate": 3.91e-05, - "loss": 0.4882, - "step": 8887 - }, - { - "epoch": 0.3119745871285912, - "grad_norm": 0.47344374656677246, - "learning_rate": 3.909814814814815e-05, - "loss": 0.5135, - "step": 8888 - }, - { - "epoch": 0.31200968777970833, - "grad_norm": 0.4134385287761688, - "learning_rate": 3.9096296296296295e-05, - "loss": 0.5297, - "step": 8889 - }, - { - "epoch": 0.3120447884308254, - "grad_norm": 0.4516943693161011, - "learning_rate": 3.9094444444444445e-05, - "loss": 0.4533, - "step": 8890 - }, - { - "epoch": 0.31207988908194245, - "grad_norm": 0.5235376358032227, - "learning_rate": 3.909259259259259e-05, - "loss": 0.4707, - "step": 8891 - }, - { - "epoch": 0.31211498973305957, - "grad_norm": 0.5447539687156677, - "learning_rate": 3.9090740740740746e-05, - "loss": 0.4504, - "step": 8892 - }, - { - "epoch": 0.3121500903841766, - "grad_norm": 0.43256938457489014, - "learning_rate": 3.908888888888889e-05, - "loss": 0.4788, - "step": 8893 - }, - { - "epoch": 0.3121851910352937, - "grad_norm": 0.48416733741760254, - "learning_rate": 3.908703703703704e-05, - "loss": 0.4931, - "step": 8894 - }, - { - "epoch": 0.3122202916864108, - "grad_norm": 0.44216111302375793, - "learning_rate": 3.908518518518519e-05, - "loss": 0.4759, - "step": 8895 - }, - { - "epoch": 0.31225539233752786, - "grad_norm": 0.43629512190818787, - "learning_rate": 3.908333333333333e-05, - "loss": 0.4723, - "step": 8896 - }, - { - "epoch": 0.3122904929886449, - "grad_norm": 0.4736635684967041, - "learning_rate": 3.908148148148148e-05, - "loss": 0.475, - "step": 8897 - }, - { - "epoch": 0.31232559363976203, - "grad_norm": 0.5125321745872498, - "learning_rate": 3.907962962962963e-05, - "loss": 0.5605, - "step": 8898 - }, - { - "epoch": 0.3123606942908791, - "grad_norm": 0.47709307074546814, - "learning_rate": 3.907777777777778e-05, - "loss": 0.516, - "step": 8899 - }, - { - "epoch": 0.31239579494199615, - "grad_norm": 0.48999637365341187, - "learning_rate": 3.9075925925925926e-05, - "loss": 0.4742, - "step": 8900 - }, - { - "epoch": 0.31243089559311327, - "grad_norm": 0.5124170184135437, - "learning_rate": 3.9074074074074076e-05, - "loss": 0.5068, - "step": 8901 - }, - { - "epoch": 0.3124659962442303, - "grad_norm": 0.45287585258483887, - "learning_rate": 3.9072222222222226e-05, - "loss": 0.4822, - "step": 8902 - }, - { - "epoch": 0.3125010968953474, - "grad_norm": 0.4401754140853882, - "learning_rate": 3.9070370370370376e-05, - "loss": 0.5074, - "step": 8903 - }, - { - "epoch": 0.3125361975464645, - "grad_norm": 0.524543046951294, - "learning_rate": 3.906851851851852e-05, - "loss": 0.477, - "step": 8904 - }, - { - "epoch": 0.31257129819758156, - "grad_norm": 0.5241998434066772, - "learning_rate": 3.906666666666667e-05, - "loss": 0.4742, - "step": 8905 - }, - { - "epoch": 0.3126063988486986, - "grad_norm": 0.44840148091316223, - "learning_rate": 3.906481481481481e-05, - "loss": 0.4201, - "step": 8906 - }, - { - "epoch": 0.31264149949981573, - "grad_norm": 0.460419625043869, - "learning_rate": 3.906296296296296e-05, - "loss": 0.5447, - "step": 8907 - }, - { - "epoch": 0.3126766001509328, - "grad_norm": 0.48116958141326904, - "learning_rate": 3.9061111111111113e-05, - "loss": 0.5687, - "step": 8908 - }, - { - "epoch": 0.31271170080204985, - "grad_norm": 0.5152699947357178, - "learning_rate": 3.9059259259259264e-05, - "loss": 0.5953, - "step": 8909 - }, - { - "epoch": 0.31274680145316697, - "grad_norm": 0.5250675082206726, - "learning_rate": 3.905740740740741e-05, - "loss": 0.6163, - "step": 8910 - }, - { - "epoch": 0.312781902104284, - "grad_norm": 0.4863961637020111, - "learning_rate": 3.905555555555556e-05, - "loss": 0.5993, - "step": 8911 - }, - { - "epoch": 0.3128170027554011, - "grad_norm": 0.500785768032074, - "learning_rate": 3.90537037037037e-05, - "loss": 0.4304, - "step": 8912 - }, - { - "epoch": 0.3128521034065182, - "grad_norm": 0.47236108779907227, - "learning_rate": 3.905185185185186e-05, - "loss": 0.6357, - "step": 8913 - }, - { - "epoch": 0.31288720405763526, - "grad_norm": 0.471764475107193, - "learning_rate": 3.905e-05, - "loss": 0.4031, - "step": 8914 - }, - { - "epoch": 0.3129223047087524, - "grad_norm": 0.4345032274723053, - "learning_rate": 3.904814814814815e-05, - "loss": 0.5146, - "step": 8915 - }, - { - "epoch": 0.31295740535986943, - "grad_norm": 0.4552704691886902, - "learning_rate": 3.90462962962963e-05, - "loss": 0.5681, - "step": 8916 - }, - { - "epoch": 0.3129925060109865, - "grad_norm": 0.47894832491874695, - "learning_rate": 3.9044444444444444e-05, - "loss": 0.464, - "step": 8917 - }, - { - "epoch": 0.3130276066621036, - "grad_norm": 0.4125959873199463, - "learning_rate": 3.9042592592592594e-05, - "loss": 0.5679, - "step": 8918 - }, - { - "epoch": 0.31306270731322067, - "grad_norm": 0.42393359541893005, - "learning_rate": 3.9040740740740744e-05, - "loss": 0.5115, - "step": 8919 - }, - { - "epoch": 0.3130978079643377, - "grad_norm": 0.46123573184013367, - "learning_rate": 3.9038888888888894e-05, - "loss": 0.5386, - "step": 8920 - }, - { - "epoch": 0.31313290861545484, - "grad_norm": 0.41490548849105835, - "learning_rate": 3.903703703703704e-05, - "loss": 0.4442, - "step": 8921 - }, - { - "epoch": 0.3131680092665719, - "grad_norm": 0.4983821213245392, - "learning_rate": 3.903518518518519e-05, - "loss": 0.5261, - "step": 8922 - }, - { - "epoch": 0.31320310991768896, - "grad_norm": 0.4620037376880646, - "learning_rate": 3.903333333333333e-05, - "loss": 0.4897, - "step": 8923 - }, - { - "epoch": 0.3132382105688061, - "grad_norm": 0.3942438066005707, - "learning_rate": 3.903148148148149e-05, - "loss": 0.5361, - "step": 8924 - }, - { - "epoch": 0.31327331121992313, - "grad_norm": 0.43578606843948364, - "learning_rate": 3.902962962962963e-05, - "loss": 0.4982, - "step": 8925 - }, - { - "epoch": 0.3133084118710402, - "grad_norm": 0.45858100056648254, - "learning_rate": 3.902777777777778e-05, - "loss": 0.463, - "step": 8926 - }, - { - "epoch": 0.3133435125221573, - "grad_norm": 0.5154055953025818, - "learning_rate": 3.9025925925925925e-05, - "loss": 0.4834, - "step": 8927 - }, - { - "epoch": 0.31337861317327437, - "grad_norm": 0.41354629397392273, - "learning_rate": 3.9024074074074075e-05, - "loss": 0.5189, - "step": 8928 - }, - { - "epoch": 0.3134137138243914, - "grad_norm": 0.44237077236175537, - "learning_rate": 3.9022222222222225e-05, - "loss": 0.5503, - "step": 8929 - }, - { - "epoch": 0.31344881447550854, - "grad_norm": 0.45970579981803894, - "learning_rate": 3.9020370370370375e-05, - "loss": 0.5871, - "step": 8930 - }, - { - "epoch": 0.3134839151266256, - "grad_norm": 0.4462451934814453, - "learning_rate": 3.901851851851852e-05, - "loss": 0.4643, - "step": 8931 - }, - { - "epoch": 0.31351901577774266, - "grad_norm": 0.47117114067077637, - "learning_rate": 3.901666666666667e-05, - "loss": 0.5583, - "step": 8932 - }, - { - "epoch": 0.3135541164288598, - "grad_norm": 0.4398563504219055, - "learning_rate": 3.901481481481481e-05, - "loss": 0.5599, - "step": 8933 - }, - { - "epoch": 0.31358921707997683, - "grad_norm": 0.4393994212150574, - "learning_rate": 3.901296296296296e-05, - "loss": 0.5257, - "step": 8934 - }, - { - "epoch": 0.3136243177310939, - "grad_norm": 0.46653449535369873, - "learning_rate": 3.901111111111111e-05, - "loss": 0.5709, - "step": 8935 - }, - { - "epoch": 0.313659418382211, - "grad_norm": 0.45014747977256775, - "learning_rate": 3.900925925925926e-05, - "loss": 0.476, - "step": 8936 - }, - { - "epoch": 0.31369451903332807, - "grad_norm": 0.5085892677307129, - "learning_rate": 3.900740740740741e-05, - "loss": 0.3974, - "step": 8937 - }, - { - "epoch": 0.3137296196844451, - "grad_norm": 0.4314693808555603, - "learning_rate": 3.9005555555555556e-05, - "loss": 0.4139, - "step": 8938 - }, - { - "epoch": 0.31376472033556224, - "grad_norm": 0.5374365448951721, - "learning_rate": 3.9003703703703706e-05, - "loss": 0.4143, - "step": 8939 - }, - { - "epoch": 0.3137998209866793, - "grad_norm": 0.5017508268356323, - "learning_rate": 3.9001851851851856e-05, - "loss": 0.5446, - "step": 8940 - }, - { - "epoch": 0.31383492163779636, - "grad_norm": 0.4682517647743225, - "learning_rate": 3.9000000000000006e-05, - "loss": 0.4893, - "step": 8941 - }, - { - "epoch": 0.3138700222889135, - "grad_norm": 0.4773528277873993, - "learning_rate": 3.899814814814815e-05, - "loss": 0.5976, - "step": 8942 - }, - { - "epoch": 0.31390512294003053, - "grad_norm": 0.4261312782764435, - "learning_rate": 3.89962962962963e-05, - "loss": 0.3908, - "step": 8943 - }, - { - "epoch": 0.3139402235911476, - "grad_norm": 0.4691356122493744, - "learning_rate": 3.899444444444444e-05, - "loss": 0.5677, - "step": 8944 - }, - { - "epoch": 0.3139753242422647, - "grad_norm": 0.49944254755973816, - "learning_rate": 3.89925925925926e-05, - "loss": 0.5049, - "step": 8945 - }, - { - "epoch": 0.31401042489338177, - "grad_norm": 0.39600351452827454, - "learning_rate": 3.899074074074074e-05, - "loss": 0.404, - "step": 8946 - }, - { - "epoch": 0.3140455255444988, - "grad_norm": 0.48580020666122437, - "learning_rate": 3.898888888888889e-05, - "loss": 0.3762, - "step": 8947 - }, - { - "epoch": 0.31408062619561594, - "grad_norm": 0.5015786290168762, - "learning_rate": 3.8987037037037037e-05, - "loss": 0.5623, - "step": 8948 - }, - { - "epoch": 0.314115726846733, - "grad_norm": 0.4011344015598297, - "learning_rate": 3.898518518518519e-05, - "loss": 0.3941, - "step": 8949 - }, - { - "epoch": 0.31415082749785006, - "grad_norm": 0.4997386336326599, - "learning_rate": 3.898333333333333e-05, - "loss": 0.5784, - "step": 8950 - }, - { - "epoch": 0.3141859281489672, - "grad_norm": 0.4794956147670746, - "learning_rate": 3.898148148148149e-05, - "loss": 0.4077, - "step": 8951 - }, - { - "epoch": 0.31422102880008423, - "grad_norm": 0.4309598207473755, - "learning_rate": 3.897962962962963e-05, - "loss": 0.5981, - "step": 8952 - }, - { - "epoch": 0.3142561294512013, - "grad_norm": 0.5210030674934387, - "learning_rate": 3.897777777777778e-05, - "loss": 0.4276, - "step": 8953 - }, - { - "epoch": 0.3142912301023184, - "grad_norm": 0.5705810189247131, - "learning_rate": 3.8975925925925924e-05, - "loss": 0.524, - "step": 8954 - }, - { - "epoch": 0.31432633075343547, - "grad_norm": 0.4648550748825073, - "learning_rate": 3.8974074074074074e-05, - "loss": 0.4917, - "step": 8955 - }, - { - "epoch": 0.3143614314045525, - "grad_norm": 0.5317983031272888, - "learning_rate": 3.8972222222222224e-05, - "loss": 0.5104, - "step": 8956 - }, - { - "epoch": 0.31439653205566964, - "grad_norm": 0.5174683928489685, - "learning_rate": 3.8970370370370374e-05, - "loss": 0.5232, - "step": 8957 - }, - { - "epoch": 0.3144316327067867, - "grad_norm": 0.4457626938819885, - "learning_rate": 3.8968518518518524e-05, - "loss": 0.514, - "step": 8958 - }, - { - "epoch": 0.3144667333579038, - "grad_norm": 0.5064382553100586, - "learning_rate": 3.896666666666667e-05, - "loss": 0.5029, - "step": 8959 - }, - { - "epoch": 0.3145018340090209, - "grad_norm": 0.4349118769168854, - "learning_rate": 3.896481481481482e-05, - "loss": 0.4646, - "step": 8960 - }, - { - "epoch": 0.31453693466013793, - "grad_norm": 0.4308491051197052, - "learning_rate": 3.896296296296296e-05, - "loss": 0.4538, - "step": 8961 - }, - { - "epoch": 0.31457203531125505, - "grad_norm": 0.42070499062538147, - "learning_rate": 3.896111111111112e-05, - "loss": 0.5393, - "step": 8962 - }, - { - "epoch": 0.3146071359623721, - "grad_norm": 0.6270118951797485, - "learning_rate": 3.895925925925926e-05, - "loss": 0.5887, - "step": 8963 - }, - { - "epoch": 0.31464223661348917, - "grad_norm": 0.4596158564090729, - "learning_rate": 3.895740740740741e-05, - "loss": 0.4836, - "step": 8964 - }, - { - "epoch": 0.3146773372646063, - "grad_norm": 0.5363198518753052, - "learning_rate": 3.8955555555555555e-05, - "loss": 0.4513, - "step": 8965 - }, - { - "epoch": 0.31471243791572334, - "grad_norm": 0.45141324400901794, - "learning_rate": 3.8953703703703705e-05, - "loss": 0.4312, - "step": 8966 - }, - { - "epoch": 0.3147475385668404, - "grad_norm": 0.4092727303504944, - "learning_rate": 3.8951851851851855e-05, - "loss": 0.3886, - "step": 8967 - }, - { - "epoch": 0.3147826392179575, - "grad_norm": 0.43888071179389954, - "learning_rate": 3.8950000000000005e-05, - "loss": 0.4355, - "step": 8968 - }, - { - "epoch": 0.3148177398690746, - "grad_norm": 0.4978719651699066, - "learning_rate": 3.894814814814815e-05, - "loss": 0.4994, - "step": 8969 - }, - { - "epoch": 0.31485284052019163, - "grad_norm": 0.4546457827091217, - "learning_rate": 3.89462962962963e-05, - "loss": 0.5128, - "step": 8970 - }, - { - "epoch": 0.31488794117130875, - "grad_norm": 0.4735872745513916, - "learning_rate": 3.894444444444444e-05, - "loss": 0.4311, - "step": 8971 - }, - { - "epoch": 0.3149230418224258, - "grad_norm": 0.469704806804657, - "learning_rate": 3.89425925925926e-05, - "loss": 0.4587, - "step": 8972 - }, - { - "epoch": 0.31495814247354287, - "grad_norm": 0.45944929122924805, - "learning_rate": 3.894074074074074e-05, - "loss": 0.4636, - "step": 8973 - }, - { - "epoch": 0.31499324312466, - "grad_norm": 0.5019104480743408, - "learning_rate": 3.893888888888889e-05, - "loss": 0.4898, - "step": 8974 - }, - { - "epoch": 0.31502834377577704, - "grad_norm": 0.42603176832199097, - "learning_rate": 3.8937037037037035e-05, - "loss": 0.5626, - "step": 8975 - }, - { - "epoch": 0.3150634444268941, - "grad_norm": 0.4861310124397278, - "learning_rate": 3.8935185185185185e-05, - "loss": 0.5629, - "step": 8976 - }, - { - "epoch": 0.3150985450780112, - "grad_norm": 0.41495075821876526, - "learning_rate": 3.8933333333333336e-05, - "loss": 0.4369, - "step": 8977 - }, - { - "epoch": 0.3151336457291283, - "grad_norm": 0.5025618672370911, - "learning_rate": 3.8931481481481486e-05, - "loss": 0.4874, - "step": 8978 - }, - { - "epoch": 0.31516874638024533, - "grad_norm": 0.5681062936782837, - "learning_rate": 3.8929629629629636e-05, - "loss": 0.618, - "step": 8979 - }, - { - "epoch": 0.31520384703136245, - "grad_norm": 0.4653059244155884, - "learning_rate": 3.892777777777778e-05, - "loss": 0.5745, - "step": 8980 - }, - { - "epoch": 0.3152389476824795, - "grad_norm": 0.4504815638065338, - "learning_rate": 3.892592592592593e-05, - "loss": 0.5012, - "step": 8981 - }, - { - "epoch": 0.31527404833359657, - "grad_norm": 0.4842020869255066, - "learning_rate": 3.892407407407407e-05, - "loss": 0.5895, - "step": 8982 - }, - { - "epoch": 0.3153091489847137, - "grad_norm": 0.41734743118286133, - "learning_rate": 3.892222222222223e-05, - "loss": 0.496, - "step": 8983 - }, - { - "epoch": 0.31534424963583074, - "grad_norm": 0.5034120678901672, - "learning_rate": 3.892037037037037e-05, - "loss": 0.5224, - "step": 8984 - }, - { - "epoch": 0.3153793502869478, - "grad_norm": 0.4578949213027954, - "learning_rate": 3.891851851851852e-05, - "loss": 0.4798, - "step": 8985 - }, - { - "epoch": 0.3154144509380649, - "grad_norm": 0.46887627243995667, - "learning_rate": 3.8916666666666666e-05, - "loss": 0.5997, - "step": 8986 - }, - { - "epoch": 0.315449551589182, - "grad_norm": 0.4266267418861389, - "learning_rate": 3.8914814814814816e-05, - "loss": 0.4723, - "step": 8987 - }, - { - "epoch": 0.31548465224029903, - "grad_norm": 0.414373517036438, - "learning_rate": 3.891296296296296e-05, - "loss": 0.5072, - "step": 8988 - }, - { - "epoch": 0.31551975289141615, - "grad_norm": 0.47556883096694946, - "learning_rate": 3.8911111111111117e-05, - "loss": 0.5329, - "step": 8989 - }, - { - "epoch": 0.3155548535425332, - "grad_norm": 0.39345118403434753, - "learning_rate": 3.890925925925926e-05, - "loss": 0.4526, - "step": 8990 - }, - { - "epoch": 0.31558995419365027, - "grad_norm": 0.5027639269828796, - "learning_rate": 3.890740740740741e-05, - "loss": 0.4117, - "step": 8991 - }, - { - "epoch": 0.3156250548447674, - "grad_norm": 0.4817275106906891, - "learning_rate": 3.890555555555555e-05, - "loss": 0.539, - "step": 8992 - }, - { - "epoch": 0.31566015549588444, - "grad_norm": 0.488498717546463, - "learning_rate": 3.8903703703703703e-05, - "loss": 0.5639, - "step": 8993 - }, - { - "epoch": 0.3156952561470015, - "grad_norm": 0.6148736476898193, - "learning_rate": 3.8901851851851854e-05, - "loss": 0.5492, - "step": 8994 - }, - { - "epoch": 0.3157303567981186, - "grad_norm": 0.5105966329574585, - "learning_rate": 3.8900000000000004e-05, - "loss": 0.5875, - "step": 8995 - }, - { - "epoch": 0.3157654574492357, - "grad_norm": 0.4724808633327484, - "learning_rate": 3.889814814814815e-05, - "loss": 0.5657, - "step": 8996 - }, - { - "epoch": 0.31580055810035274, - "grad_norm": 0.40373674035072327, - "learning_rate": 3.88962962962963e-05, - "loss": 0.5367, - "step": 8997 - }, - { - "epoch": 0.31583565875146985, - "grad_norm": 0.4577322006225586, - "learning_rate": 3.889444444444445e-05, - "loss": 0.4592, - "step": 8998 - }, - { - "epoch": 0.3158707594025869, - "grad_norm": 0.4519622027873993, - "learning_rate": 3.88925925925926e-05, - "loss": 0.5278, - "step": 8999 - }, - { - "epoch": 0.31590586005370397, - "grad_norm": 0.4620085060596466, - "learning_rate": 3.889074074074075e-05, - "loss": 0.4759, - "step": 9000 - }, - { - "epoch": 0.3159409607048211, - "grad_norm": 0.44618991017341614, - "learning_rate": 3.888888888888889e-05, - "loss": 0.4869, - "step": 9001 - }, - { - "epoch": 0.31597606135593814, - "grad_norm": 0.3818812966346741, - "learning_rate": 3.888703703703704e-05, - "loss": 0.4741, - "step": 9002 - }, - { - "epoch": 0.31601116200705526, - "grad_norm": 0.4394904375076294, - "learning_rate": 3.8885185185185184e-05, - "loss": 0.554, - "step": 9003 - }, - { - "epoch": 0.3160462626581723, - "grad_norm": 0.4212164282798767, - "learning_rate": 3.8883333333333334e-05, - "loss": 0.4092, - "step": 9004 - }, - { - "epoch": 0.3160813633092894, - "grad_norm": 0.44073110818862915, - "learning_rate": 3.8881481481481484e-05, - "loss": 0.5593, - "step": 9005 - }, - { - "epoch": 0.3161164639604065, - "grad_norm": 0.49652305245399475, - "learning_rate": 3.8879629629629635e-05, - "loss": 0.4764, - "step": 9006 - }, - { - "epoch": 0.31615156461152355, - "grad_norm": 0.4832695424556732, - "learning_rate": 3.887777777777778e-05, - "loss": 0.5491, - "step": 9007 - }, - { - "epoch": 0.3161866652626406, - "grad_norm": 0.46382322907447815, - "learning_rate": 3.887592592592593e-05, - "loss": 0.4729, - "step": 9008 - }, - { - "epoch": 0.3162217659137577, - "grad_norm": 0.49807214736938477, - "learning_rate": 3.887407407407407e-05, - "loss": 0.4891, - "step": 9009 - }, - { - "epoch": 0.3162568665648748, - "grad_norm": 0.4193347692489624, - "learning_rate": 3.887222222222223e-05, - "loss": 0.3721, - "step": 9010 - }, - { - "epoch": 0.31629196721599184, - "grad_norm": 0.4240076541900635, - "learning_rate": 3.887037037037037e-05, - "loss": 0.4883, - "step": 9011 - }, - { - "epoch": 0.31632706786710896, - "grad_norm": 0.45969536900520325, - "learning_rate": 3.886851851851852e-05, - "loss": 0.4697, - "step": 9012 - }, - { - "epoch": 0.316362168518226, - "grad_norm": 0.4130192995071411, - "learning_rate": 3.8866666666666665e-05, - "loss": 0.4426, - "step": 9013 - }, - { - "epoch": 0.3163972691693431, - "grad_norm": 0.46462300419807434, - "learning_rate": 3.8864814814814815e-05, - "loss": 0.5643, - "step": 9014 - }, - { - "epoch": 0.3164323698204602, - "grad_norm": 0.46901580691337585, - "learning_rate": 3.8862962962962965e-05, - "loss": 0.5246, - "step": 9015 - }, - { - "epoch": 0.31646747047157725, - "grad_norm": 0.5083248615264893, - "learning_rate": 3.8861111111111115e-05, - "loss": 0.5141, - "step": 9016 - }, - { - "epoch": 0.3165025711226943, - "grad_norm": 0.5086377859115601, - "learning_rate": 3.885925925925926e-05, - "loss": 0.5033, - "step": 9017 - }, - { - "epoch": 0.3165376717738114, - "grad_norm": 0.4573846161365509, - "learning_rate": 3.885740740740741e-05, - "loss": 0.4384, - "step": 9018 - }, - { - "epoch": 0.3165727724249285, - "grad_norm": 0.461128830909729, - "learning_rate": 3.885555555555556e-05, - "loss": 0.473, - "step": 9019 - }, - { - "epoch": 0.31660787307604554, - "grad_norm": 0.5254818201065063, - "learning_rate": 3.88537037037037e-05, - "loss": 0.524, - "step": 9020 - }, - { - "epoch": 0.31664297372716266, - "grad_norm": 0.5124198198318481, - "learning_rate": 3.885185185185186e-05, - "loss": 0.4706, - "step": 9021 - }, - { - "epoch": 0.3166780743782797, - "grad_norm": 0.4993664622306824, - "learning_rate": 3.885e-05, - "loss": 0.4602, - "step": 9022 - }, - { - "epoch": 0.3167131750293968, - "grad_norm": 0.6033975481987, - "learning_rate": 3.884814814814815e-05, - "loss": 0.571, - "step": 9023 - }, - { - "epoch": 0.3167482756805139, - "grad_norm": 0.5065687894821167, - "learning_rate": 3.8846296296296296e-05, - "loss": 0.5395, - "step": 9024 - }, - { - "epoch": 0.31678337633163095, - "grad_norm": 0.44348758459091187, - "learning_rate": 3.8844444444444446e-05, - "loss": 0.4358, - "step": 9025 - }, - { - "epoch": 0.316818476982748, - "grad_norm": 0.45045799016952515, - "learning_rate": 3.8842592592592596e-05, - "loss": 0.4291, - "step": 9026 - }, - { - "epoch": 0.3168535776338651, - "grad_norm": 0.463662326335907, - "learning_rate": 3.8840740740740746e-05, - "loss": 0.5035, - "step": 9027 - }, - { - "epoch": 0.3168886782849822, - "grad_norm": 0.5158819556236267, - "learning_rate": 3.883888888888889e-05, - "loss": 0.5109, - "step": 9028 - }, - { - "epoch": 0.31692377893609924, - "grad_norm": 0.46269842982292175, - "learning_rate": 3.883703703703704e-05, - "loss": 0.556, - "step": 9029 - }, - { - "epoch": 0.31695887958721636, - "grad_norm": 0.44399893283843994, - "learning_rate": 3.883518518518518e-05, - "loss": 0.46, - "step": 9030 - }, - { - "epoch": 0.3169939802383334, - "grad_norm": 0.4457315504550934, - "learning_rate": 3.883333333333333e-05, - "loss": 0.4438, - "step": 9031 - }, - { - "epoch": 0.3170290808894505, - "grad_norm": 0.44687503576278687, - "learning_rate": 3.883148148148148e-05, - "loss": 0.5068, - "step": 9032 - }, - { - "epoch": 0.3170641815405676, - "grad_norm": 0.47611141204833984, - "learning_rate": 3.882962962962963e-05, - "loss": 0.5424, - "step": 9033 - }, - { - "epoch": 0.31709928219168465, - "grad_norm": 0.48581892251968384, - "learning_rate": 3.882777777777778e-05, - "loss": 0.5551, - "step": 9034 - }, - { - "epoch": 0.3171343828428017, - "grad_norm": 0.5362278819084167, - "learning_rate": 3.882592592592593e-05, - "loss": 0.5807, - "step": 9035 - }, - { - "epoch": 0.3171694834939188, - "grad_norm": 0.43257012963294983, - "learning_rate": 3.882407407407408e-05, - "loss": 0.3946, - "step": 9036 - }, - { - "epoch": 0.3172045841450359, - "grad_norm": 0.482378751039505, - "learning_rate": 3.882222222222223e-05, - "loss": 0.3939, - "step": 9037 - }, - { - "epoch": 0.31723968479615294, - "grad_norm": 0.5299557447433472, - "learning_rate": 3.882037037037037e-05, - "loss": 0.5814, - "step": 9038 - }, - { - "epoch": 0.31727478544727006, - "grad_norm": 0.5148888826370239, - "learning_rate": 3.881851851851852e-05, - "loss": 0.5411, - "step": 9039 - }, - { - "epoch": 0.3173098860983871, - "grad_norm": 0.5063890814781189, - "learning_rate": 3.881666666666667e-05, - "loss": 0.4625, - "step": 9040 - }, - { - "epoch": 0.3173449867495042, - "grad_norm": 0.4453532099723816, - "learning_rate": 3.8814814814814814e-05, - "loss": 0.4513, - "step": 9041 - }, - { - "epoch": 0.3173800874006213, - "grad_norm": 0.4255765974521637, - "learning_rate": 3.8812962962962964e-05, - "loss": 0.4308, - "step": 9042 - }, - { - "epoch": 0.31741518805173835, - "grad_norm": 0.4656733572483063, - "learning_rate": 3.8811111111111114e-05, - "loss": 0.5202, - "step": 9043 - }, - { - "epoch": 0.31745028870285547, - "grad_norm": 0.4742688834667206, - "learning_rate": 3.8809259259259264e-05, - "loss": 0.5549, - "step": 9044 - }, - { - "epoch": 0.3174853893539725, - "grad_norm": 0.45317593216896057, - "learning_rate": 3.880740740740741e-05, - "loss": 0.4411, - "step": 9045 - }, - { - "epoch": 0.3175204900050896, - "grad_norm": 0.4649879038333893, - "learning_rate": 3.880555555555556e-05, - "loss": 0.5259, - "step": 9046 - }, - { - "epoch": 0.3175555906562067, - "grad_norm": 0.4471015930175781, - "learning_rate": 3.88037037037037e-05, - "loss": 0.5066, - "step": 9047 - }, - { - "epoch": 0.31759069130732376, - "grad_norm": 0.4049876034259796, - "learning_rate": 3.880185185185186e-05, - "loss": 0.5276, - "step": 9048 - }, - { - "epoch": 0.3176257919584408, - "grad_norm": 0.45993754267692566, - "learning_rate": 3.88e-05, - "loss": 0.5191, - "step": 9049 - }, - { - "epoch": 0.31766089260955793, - "grad_norm": 0.4925743639469147, - "learning_rate": 3.879814814814815e-05, - "loss": 0.5037, - "step": 9050 - }, - { - "epoch": 0.317695993260675, - "grad_norm": 0.45465028285980225, - "learning_rate": 3.8796296296296295e-05, - "loss": 0.4919, - "step": 9051 - }, - { - "epoch": 0.31773109391179205, - "grad_norm": 0.5461937785148621, - "learning_rate": 3.8794444444444445e-05, - "loss": 0.5392, - "step": 9052 - }, - { - "epoch": 0.31776619456290917, - "grad_norm": 0.5076373815536499, - "learning_rate": 3.8792592592592595e-05, - "loss": 0.3204, - "step": 9053 - }, - { - "epoch": 0.3178012952140262, - "grad_norm": 0.4780726730823517, - "learning_rate": 3.8790740740740745e-05, - "loss": 0.601, - "step": 9054 - }, - { - "epoch": 0.3178363958651433, - "grad_norm": 0.4331030249595642, - "learning_rate": 3.878888888888889e-05, - "loss": 0.3771, - "step": 9055 - }, - { - "epoch": 0.3178714965162604, - "grad_norm": 0.4755726158618927, - "learning_rate": 3.878703703703704e-05, - "loss": 0.524, - "step": 9056 - }, - { - "epoch": 0.31790659716737746, - "grad_norm": 0.4769328236579895, - "learning_rate": 3.878518518518519e-05, - "loss": 0.6173, - "step": 9057 - }, - { - "epoch": 0.3179416978184945, - "grad_norm": 0.5118137001991272, - "learning_rate": 3.878333333333333e-05, - "loss": 0.4812, - "step": 9058 - }, - { - "epoch": 0.31797679846961163, - "grad_norm": 0.44706815481185913, - "learning_rate": 3.878148148148148e-05, - "loss": 0.4375, - "step": 9059 - }, - { - "epoch": 0.3180118991207287, - "grad_norm": 0.42854875326156616, - "learning_rate": 3.877962962962963e-05, - "loss": 0.3724, - "step": 9060 - }, - { - "epoch": 0.31804699977184575, - "grad_norm": 0.44997286796569824, - "learning_rate": 3.877777777777778e-05, - "loss": 0.5251, - "step": 9061 - }, - { - "epoch": 0.31808210042296287, - "grad_norm": 0.4803847074508667, - "learning_rate": 3.8775925925925926e-05, - "loss": 0.4682, - "step": 9062 - }, - { - "epoch": 0.3181172010740799, - "grad_norm": 0.4633980989456177, - "learning_rate": 3.8774074074074076e-05, - "loss": 0.4342, - "step": 9063 - }, - { - "epoch": 0.318152301725197, - "grad_norm": 0.48310741782188416, - "learning_rate": 3.8772222222222226e-05, - "loss": 0.6186, - "step": 9064 - }, - { - "epoch": 0.3181874023763141, - "grad_norm": 0.4624057412147522, - "learning_rate": 3.8770370370370376e-05, - "loss": 0.4872, - "step": 9065 - }, - { - "epoch": 0.31822250302743116, - "grad_norm": 0.6993070840835571, - "learning_rate": 3.876851851851852e-05, - "loss": 0.5619, - "step": 9066 - }, - { - "epoch": 0.3182576036785482, - "grad_norm": 0.5273158550262451, - "learning_rate": 3.876666666666667e-05, - "loss": 0.5522, - "step": 9067 - }, - { - "epoch": 0.31829270432966533, - "grad_norm": 0.44142070412635803, - "learning_rate": 3.876481481481481e-05, - "loss": 0.5074, - "step": 9068 - }, - { - "epoch": 0.3183278049807824, - "grad_norm": 0.44813841581344604, - "learning_rate": 3.876296296296297e-05, - "loss": 0.4262, - "step": 9069 - }, - { - "epoch": 0.31836290563189945, - "grad_norm": 0.4158579707145691, - "learning_rate": 3.876111111111111e-05, - "loss": 0.4035, - "step": 9070 - }, - { - "epoch": 0.31839800628301657, - "grad_norm": 0.4419023394584656, - "learning_rate": 3.875925925925926e-05, - "loss": 0.5085, - "step": 9071 - }, - { - "epoch": 0.3184331069341336, - "grad_norm": 0.41360917687416077, - "learning_rate": 3.8757407407407406e-05, - "loss": 0.3768, - "step": 9072 - }, - { - "epoch": 0.3184682075852507, - "grad_norm": 0.49957722425460815, - "learning_rate": 3.8755555555555556e-05, - "loss": 0.5137, - "step": 9073 - }, - { - "epoch": 0.3185033082363678, - "grad_norm": 0.43188896775245667, - "learning_rate": 3.8753703703703707e-05, - "loss": 0.4539, - "step": 9074 - }, - { - "epoch": 0.31853840888748486, - "grad_norm": 0.4656308889389038, - "learning_rate": 3.875185185185186e-05, - "loss": 0.5592, - "step": 9075 - }, - { - "epoch": 0.3185735095386019, - "grad_norm": 0.42773380875587463, - "learning_rate": 3.875e-05, - "loss": 0.4547, - "step": 9076 - }, - { - "epoch": 0.31860861018971903, - "grad_norm": 0.48402342200279236, - "learning_rate": 3.874814814814815e-05, - "loss": 0.5151, - "step": 9077 - }, - { - "epoch": 0.3186437108408361, - "grad_norm": 0.5146555304527283, - "learning_rate": 3.87462962962963e-05, - "loss": 0.5142, - "step": 9078 - }, - { - "epoch": 0.31867881149195315, - "grad_norm": 0.520416796207428, - "learning_rate": 3.8744444444444444e-05, - "loss": 0.5092, - "step": 9079 - }, - { - "epoch": 0.31871391214307027, - "grad_norm": 0.48988014459609985, - "learning_rate": 3.8742592592592594e-05, - "loss": 0.47, - "step": 9080 - }, - { - "epoch": 0.3187490127941873, - "grad_norm": 0.45684000849723816, - "learning_rate": 3.8740740740740744e-05, - "loss": 0.5541, - "step": 9081 - }, - { - "epoch": 0.3187841134453044, - "grad_norm": 0.3864118754863739, - "learning_rate": 3.8738888888888894e-05, - "loss": 0.2819, - "step": 9082 - }, - { - "epoch": 0.3188192140964215, - "grad_norm": 0.4561155140399933, - "learning_rate": 3.873703703703704e-05, - "loss": 0.4868, - "step": 9083 - }, - { - "epoch": 0.31885431474753856, - "grad_norm": 0.4556969702243805, - "learning_rate": 3.873518518518519e-05, - "loss": 0.5373, - "step": 9084 - }, - { - "epoch": 0.3188894153986556, - "grad_norm": 0.44738635420799255, - "learning_rate": 3.873333333333333e-05, - "loss": 0.4915, - "step": 9085 - }, - { - "epoch": 0.31892451604977273, - "grad_norm": 0.4660283625125885, - "learning_rate": 3.873148148148149e-05, - "loss": 0.3666, - "step": 9086 - }, - { - "epoch": 0.3189596167008898, - "grad_norm": 0.5061261653900146, - "learning_rate": 3.872962962962963e-05, - "loss": 0.5683, - "step": 9087 - }, - { - "epoch": 0.3189947173520069, - "grad_norm": 0.5244332551956177, - "learning_rate": 3.872777777777778e-05, - "loss": 0.5712, - "step": 9088 - }, - { - "epoch": 0.31902981800312397, - "grad_norm": 0.4297672510147095, - "learning_rate": 3.8725925925925924e-05, - "loss": 0.4238, - "step": 9089 - }, - { - "epoch": 0.319064918654241, - "grad_norm": 0.4653649628162384, - "learning_rate": 3.8724074074074074e-05, - "loss": 0.554, - "step": 9090 - }, - { - "epoch": 0.31910001930535814, - "grad_norm": 0.41553667187690735, - "learning_rate": 3.8722222222222225e-05, - "loss": 0.5809, - "step": 9091 - }, - { - "epoch": 0.3191351199564752, - "grad_norm": 0.47262638807296753, - "learning_rate": 3.8720370370370375e-05, - "loss": 0.4238, - "step": 9092 - }, - { - "epoch": 0.31917022060759226, - "grad_norm": 0.5102313160896301, - "learning_rate": 3.871851851851852e-05, - "loss": 0.4873, - "step": 9093 - }, - { - "epoch": 0.3192053212587094, - "grad_norm": 0.5345553159713745, - "learning_rate": 3.871666666666667e-05, - "loss": 0.5177, - "step": 9094 - }, - { - "epoch": 0.31924042190982643, - "grad_norm": 0.5040786862373352, - "learning_rate": 3.871481481481482e-05, - "loss": 0.5704, - "step": 9095 - }, - { - "epoch": 0.3192755225609435, - "grad_norm": 0.46008843183517456, - "learning_rate": 3.871296296296297e-05, - "loss": 0.59, - "step": 9096 - }, - { - "epoch": 0.3193106232120606, - "grad_norm": 0.48294466733932495, - "learning_rate": 3.871111111111111e-05, - "loss": 0.5751, - "step": 9097 - }, - { - "epoch": 0.31934572386317767, - "grad_norm": 0.5980268120765686, - "learning_rate": 3.870925925925926e-05, - "loss": 0.4325, - "step": 9098 - }, - { - "epoch": 0.3193808245142947, - "grad_norm": 0.5486664175987244, - "learning_rate": 3.870740740740741e-05, - "loss": 0.5455, - "step": 9099 - }, - { - "epoch": 0.31941592516541184, - "grad_norm": 0.4272853136062622, - "learning_rate": 3.8705555555555555e-05, - "loss": 0.5001, - "step": 9100 - }, - { - "epoch": 0.3194510258165289, - "grad_norm": 0.45541849732398987, - "learning_rate": 3.8703703703703705e-05, - "loss": 0.5112, - "step": 9101 - }, - { - "epoch": 0.31948612646764596, - "grad_norm": 0.5878722667694092, - "learning_rate": 3.8701851851851855e-05, - "loss": 0.4679, - "step": 9102 - }, - { - "epoch": 0.3195212271187631, - "grad_norm": 0.5583001971244812, - "learning_rate": 3.8700000000000006e-05, - "loss": 0.4453, - "step": 9103 - }, - { - "epoch": 0.31955632776988013, - "grad_norm": 0.5765295028686523, - "learning_rate": 3.869814814814815e-05, - "loss": 0.3968, - "step": 9104 - }, - { - "epoch": 0.3195914284209972, - "grad_norm": 0.6155054569244385, - "learning_rate": 3.86962962962963e-05, - "loss": 0.4367, - "step": 9105 - }, - { - "epoch": 0.3196265290721143, - "grad_norm": 0.39002734422683716, - "learning_rate": 3.869444444444444e-05, - "loss": 0.4921, - "step": 9106 - }, - { - "epoch": 0.31966162972323137, - "grad_norm": 0.45627471804618835, - "learning_rate": 3.86925925925926e-05, - "loss": 0.5673, - "step": 9107 - }, - { - "epoch": 0.3196967303743484, - "grad_norm": 0.4389161467552185, - "learning_rate": 3.869074074074074e-05, - "loss": 0.5422, - "step": 9108 - }, - { - "epoch": 0.31973183102546554, - "grad_norm": 0.44138601422309875, - "learning_rate": 3.868888888888889e-05, - "loss": 0.5229, - "step": 9109 - }, - { - "epoch": 0.3197669316765826, - "grad_norm": 0.5496671199798584, - "learning_rate": 3.8687037037037036e-05, - "loss": 0.431, - "step": 9110 - }, - { - "epoch": 0.31980203232769966, - "grad_norm": 0.5882061123847961, - "learning_rate": 3.8685185185185186e-05, - "loss": 0.5616, - "step": 9111 - }, - { - "epoch": 0.3198371329788168, - "grad_norm": 0.4507993757724762, - "learning_rate": 3.868333333333333e-05, - "loss": 0.4777, - "step": 9112 - }, - { - "epoch": 0.31987223362993383, - "grad_norm": 0.47792062163352966, - "learning_rate": 3.8681481481481486e-05, - "loss": 0.544, - "step": 9113 - }, - { - "epoch": 0.3199073342810509, - "grad_norm": 0.46242228150367737, - "learning_rate": 3.867962962962963e-05, - "loss": 0.5184, - "step": 9114 - }, - { - "epoch": 0.319942434932168, - "grad_norm": 0.5302236080169678, - "learning_rate": 3.867777777777778e-05, - "loss": 0.5004, - "step": 9115 - }, - { - "epoch": 0.31997753558328507, - "grad_norm": 0.48153984546661377, - "learning_rate": 3.867592592592593e-05, - "loss": 0.4465, - "step": 9116 - }, - { - "epoch": 0.3200126362344021, - "grad_norm": 0.49130964279174805, - "learning_rate": 3.867407407407407e-05, - "loss": 0.5501, - "step": 9117 - }, - { - "epoch": 0.32004773688551924, - "grad_norm": 0.5338379144668579, - "learning_rate": 3.867222222222222e-05, - "loss": 0.6039, - "step": 9118 - }, - { - "epoch": 0.3200828375366363, - "grad_norm": 0.4252210557460785, - "learning_rate": 3.8670370370370373e-05, - "loss": 0.488, - "step": 9119 - }, - { - "epoch": 0.32011793818775336, - "grad_norm": 0.5127785801887512, - "learning_rate": 3.8668518518518524e-05, - "loss": 0.4501, - "step": 9120 - }, - { - "epoch": 0.3201530388388705, - "grad_norm": 0.4288437068462372, - "learning_rate": 3.866666666666667e-05, - "loss": 0.4461, - "step": 9121 - }, - { - "epoch": 0.32018813948998753, - "grad_norm": 0.4257925748825073, - "learning_rate": 3.866481481481482e-05, - "loss": 0.5028, - "step": 9122 - }, - { - "epoch": 0.3202232401411046, - "grad_norm": 0.43216079473495483, - "learning_rate": 3.866296296296297e-05, - "loss": 0.4229, - "step": 9123 - }, - { - "epoch": 0.3202583407922217, - "grad_norm": 0.4296945631504059, - "learning_rate": 3.866111111111112e-05, - "loss": 0.4858, - "step": 9124 - }, - { - "epoch": 0.32029344144333877, - "grad_norm": 0.6204297542572021, - "learning_rate": 3.865925925925926e-05, - "loss": 0.5037, - "step": 9125 - }, - { - "epoch": 0.3203285420944558, - "grad_norm": 0.5090298652648926, - "learning_rate": 3.865740740740741e-05, - "loss": 0.4944, - "step": 9126 - }, - { - "epoch": 0.32036364274557294, - "grad_norm": 0.5652896165847778, - "learning_rate": 3.8655555555555554e-05, - "loss": 0.473, - "step": 9127 - }, - { - "epoch": 0.32039874339669, - "grad_norm": 0.4604063332080841, - "learning_rate": 3.8653703703703704e-05, - "loss": 0.487, - "step": 9128 - }, - { - "epoch": 0.32043384404780706, - "grad_norm": 0.4910777509212494, - "learning_rate": 3.8651851851851854e-05, - "loss": 0.4508, - "step": 9129 - }, - { - "epoch": 0.3204689446989242, - "grad_norm": 0.42185550928115845, - "learning_rate": 3.8650000000000004e-05, - "loss": 0.4836, - "step": 9130 - }, - { - "epoch": 0.32050404535004123, - "grad_norm": 0.490182101726532, - "learning_rate": 3.864814814814815e-05, - "loss": 0.5093, - "step": 9131 - }, - { - "epoch": 0.32053914600115835, - "grad_norm": 0.43560364842414856, - "learning_rate": 3.86462962962963e-05, - "loss": 0.6078, - "step": 9132 - }, - { - "epoch": 0.3205742466522754, - "grad_norm": 0.3961637616157532, - "learning_rate": 3.864444444444444e-05, - "loss": 0.466, - "step": 9133 - }, - { - "epoch": 0.32060934730339247, - "grad_norm": 0.4500815272331238, - "learning_rate": 3.86425925925926e-05, - "loss": 0.4352, - "step": 9134 - }, - { - "epoch": 0.3206444479545096, - "grad_norm": 0.5352196097373962, - "learning_rate": 3.864074074074074e-05, - "loss": 0.5035, - "step": 9135 - }, - { - "epoch": 0.32067954860562664, - "grad_norm": 0.5286598801612854, - "learning_rate": 3.863888888888889e-05, - "loss": 0.5798, - "step": 9136 - }, - { - "epoch": 0.3207146492567437, - "grad_norm": 0.4368419349193573, - "learning_rate": 3.863703703703704e-05, - "loss": 0.5059, - "step": 9137 - }, - { - "epoch": 0.3207497499078608, - "grad_norm": 0.47668808698654175, - "learning_rate": 3.8635185185185185e-05, - "loss": 0.493, - "step": 9138 - }, - { - "epoch": 0.3207848505589779, - "grad_norm": 0.5335453152656555, - "learning_rate": 3.8633333333333335e-05, - "loss": 0.438, - "step": 9139 - }, - { - "epoch": 0.32081995121009493, - "grad_norm": 0.4811007082462311, - "learning_rate": 3.8631481481481485e-05, - "loss": 0.5027, - "step": 9140 - }, - { - "epoch": 0.32085505186121205, - "grad_norm": 0.535470187664032, - "learning_rate": 3.8629629629629635e-05, - "loss": 0.6072, - "step": 9141 - }, - { - "epoch": 0.3208901525123291, - "grad_norm": 0.5454007387161255, - "learning_rate": 3.862777777777778e-05, - "loss": 0.4269, - "step": 9142 - }, - { - "epoch": 0.32092525316344617, - "grad_norm": 0.3857831060886383, - "learning_rate": 3.862592592592593e-05, - "loss": 0.5571, - "step": 9143 - }, - { - "epoch": 0.3209603538145633, - "grad_norm": 0.5040194392204285, - "learning_rate": 3.862407407407407e-05, - "loss": 0.4719, - "step": 9144 - }, - { - "epoch": 0.32099545446568034, - "grad_norm": 0.44880664348602295, - "learning_rate": 3.862222222222223e-05, - "loss": 0.4384, - "step": 9145 - }, - { - "epoch": 0.3210305551167974, - "grad_norm": 0.4044463634490967, - "learning_rate": 3.862037037037037e-05, - "loss": 0.3242, - "step": 9146 - }, - { - "epoch": 0.3210656557679145, - "grad_norm": 0.5111329555511475, - "learning_rate": 3.861851851851852e-05, - "loss": 0.3364, - "step": 9147 - }, - { - "epoch": 0.3211007564190316, - "grad_norm": 0.4594385027885437, - "learning_rate": 3.8616666666666666e-05, - "loss": 0.6148, - "step": 9148 - }, - { - "epoch": 0.32113585707014863, - "grad_norm": 0.41781243681907654, - "learning_rate": 3.8614814814814816e-05, - "loss": 0.5391, - "step": 9149 - }, - { - "epoch": 0.32117095772126575, - "grad_norm": 0.45848509669303894, - "learning_rate": 3.8612962962962966e-05, - "loss": 0.531, - "step": 9150 - }, - { - "epoch": 0.3212060583723828, - "grad_norm": 0.4188441336154938, - "learning_rate": 3.8611111111111116e-05, - "loss": 0.5499, - "step": 9151 - }, - { - "epoch": 0.32124115902349987, - "grad_norm": 0.45268353819847107, - "learning_rate": 3.860925925925926e-05, - "loss": 0.4894, - "step": 9152 - }, - { - "epoch": 0.321276259674617, - "grad_norm": 0.5033339262008667, - "learning_rate": 3.860740740740741e-05, - "loss": 0.5084, - "step": 9153 - }, - { - "epoch": 0.32131136032573404, - "grad_norm": 0.5315006375312805, - "learning_rate": 3.860555555555555e-05, - "loss": 0.5299, - "step": 9154 - }, - { - "epoch": 0.3213464609768511, - "grad_norm": 0.5250594615936279, - "learning_rate": 3.86037037037037e-05, - "loss": 0.4355, - "step": 9155 - }, - { - "epoch": 0.3213815616279682, - "grad_norm": 0.4663780927658081, - "learning_rate": 3.860185185185185e-05, - "loss": 0.5019, - "step": 9156 - }, - { - "epoch": 0.3214166622790853, - "grad_norm": 0.46668723225593567, - "learning_rate": 3.86e-05, - "loss": 0.5551, - "step": 9157 - }, - { - "epoch": 0.32145176293020233, - "grad_norm": 0.4474014639854431, - "learning_rate": 3.859814814814815e-05, - "loss": 0.5372, - "step": 9158 - }, - { - "epoch": 0.32148686358131945, - "grad_norm": 0.4671368896961212, - "learning_rate": 3.8596296296296297e-05, - "loss": 0.5953, - "step": 9159 - }, - { - "epoch": 0.3215219642324365, - "grad_norm": 0.606778085231781, - "learning_rate": 3.859444444444445e-05, - "loss": 0.3963, - "step": 9160 - }, - { - "epoch": 0.32155706488355357, - "grad_norm": 0.49966976046562195, - "learning_rate": 3.85925925925926e-05, - "loss": 0.4382, - "step": 9161 - }, - { - "epoch": 0.3215921655346707, - "grad_norm": 0.39169833064079285, - "learning_rate": 3.859074074074075e-05, - "loss": 0.4126, - "step": 9162 - }, - { - "epoch": 0.32162726618578774, - "grad_norm": 0.5098713636398315, - "learning_rate": 3.858888888888889e-05, - "loss": 0.5162, - "step": 9163 - }, - { - "epoch": 0.3216623668369048, - "grad_norm": 0.4586046040058136, - "learning_rate": 3.858703703703704e-05, - "loss": 0.4906, - "step": 9164 - }, - { - "epoch": 0.3216974674880219, - "grad_norm": 0.4651489853858948, - "learning_rate": 3.8585185185185184e-05, - "loss": 0.3559, - "step": 9165 - }, - { - "epoch": 0.321732568139139, - "grad_norm": 0.49148714542388916, - "learning_rate": 3.8583333333333334e-05, - "loss": 0.3915, - "step": 9166 - }, - { - "epoch": 0.32176766879025603, - "grad_norm": 0.4221508502960205, - "learning_rate": 3.8581481481481484e-05, - "loss": 0.5023, - "step": 9167 - }, - { - "epoch": 0.32180276944137315, - "grad_norm": 0.5138300061225891, - "learning_rate": 3.8579629629629634e-05, - "loss": 0.5837, - "step": 9168 - }, - { - "epoch": 0.3218378700924902, - "grad_norm": 0.48664501309394836, - "learning_rate": 3.857777777777778e-05, - "loss": 0.4945, - "step": 9169 - }, - { - "epoch": 0.32187297074360727, - "grad_norm": 0.583771288394928, - "learning_rate": 3.857592592592593e-05, - "loss": 0.5215, - "step": 9170 - }, - { - "epoch": 0.3219080713947244, - "grad_norm": 0.4582420289516449, - "learning_rate": 3.857407407407407e-05, - "loss": 0.441, - "step": 9171 - }, - { - "epoch": 0.32194317204584144, - "grad_norm": 0.4572754502296448, - "learning_rate": 3.857222222222223e-05, - "loss": 0.4205, - "step": 9172 - }, - { - "epoch": 0.32197827269695856, - "grad_norm": 0.4600498378276825, - "learning_rate": 3.857037037037037e-05, - "loss": 0.4534, - "step": 9173 - }, - { - "epoch": 0.3220133733480756, - "grad_norm": 0.450205534696579, - "learning_rate": 3.856851851851852e-05, - "loss": 0.5585, - "step": 9174 - }, - { - "epoch": 0.3220484739991927, - "grad_norm": 0.45925307273864746, - "learning_rate": 3.8566666666666664e-05, - "loss": 0.538, - "step": 9175 - }, - { - "epoch": 0.3220835746503098, - "grad_norm": 0.4606945812702179, - "learning_rate": 3.8564814814814815e-05, - "loss": 0.4412, - "step": 9176 - }, - { - "epoch": 0.32211867530142685, - "grad_norm": 0.5876978635787964, - "learning_rate": 3.8562962962962965e-05, - "loss": 0.3488, - "step": 9177 - }, - { - "epoch": 0.3221537759525439, - "grad_norm": 0.42694371938705444, - "learning_rate": 3.8561111111111115e-05, - "loss": 0.5384, - "step": 9178 - }, - { - "epoch": 0.322188876603661, - "grad_norm": 0.5336583256721497, - "learning_rate": 3.8559259259259265e-05, - "loss": 0.6077, - "step": 9179 - }, - { - "epoch": 0.3222239772547781, - "grad_norm": 0.40044477581977844, - "learning_rate": 3.855740740740741e-05, - "loss": 0.4497, - "step": 9180 - }, - { - "epoch": 0.32225907790589514, - "grad_norm": 0.5328624248504639, - "learning_rate": 3.855555555555556e-05, - "loss": 0.5706, - "step": 9181 - }, - { - "epoch": 0.32229417855701226, - "grad_norm": 0.5562294125556946, - "learning_rate": 3.85537037037037e-05, - "loss": 0.505, - "step": 9182 - }, - { - "epoch": 0.3223292792081293, - "grad_norm": 0.4918789267539978, - "learning_rate": 3.855185185185186e-05, - "loss": 0.5197, - "step": 9183 - }, - { - "epoch": 0.3223643798592464, - "grad_norm": 0.5558565258979797, - "learning_rate": 3.855e-05, - "loss": 0.5323, - "step": 9184 - }, - { - "epoch": 0.3223994805103635, - "grad_norm": 0.33527126908302307, - "learning_rate": 3.854814814814815e-05, - "loss": 0.2507, - "step": 9185 - }, - { - "epoch": 0.32243458116148055, - "grad_norm": 0.459124892950058, - "learning_rate": 3.8546296296296295e-05, - "loss": 0.5509, - "step": 9186 - }, - { - "epoch": 0.3224696818125976, - "grad_norm": 0.4760535955429077, - "learning_rate": 3.8544444444444445e-05, - "loss": 0.4803, - "step": 9187 - }, - { - "epoch": 0.3225047824637147, - "grad_norm": 0.4319220185279846, - "learning_rate": 3.8542592592592596e-05, - "loss": 0.4199, - "step": 9188 - }, - { - "epoch": 0.3225398831148318, - "grad_norm": 0.4818323850631714, - "learning_rate": 3.8540740740740746e-05, - "loss": 0.4375, - "step": 9189 - }, - { - "epoch": 0.32257498376594884, - "grad_norm": 0.43079590797424316, - "learning_rate": 3.853888888888889e-05, - "loss": 0.421, - "step": 9190 - }, - { - "epoch": 0.32261008441706596, - "grad_norm": 0.4186486005783081, - "learning_rate": 3.853703703703704e-05, - "loss": 0.5764, - "step": 9191 - }, - { - "epoch": 0.322645185068183, - "grad_norm": 0.507675051689148, - "learning_rate": 3.853518518518518e-05, - "loss": 0.4369, - "step": 9192 - }, - { - "epoch": 0.3226802857193001, - "grad_norm": 0.4919483959674835, - "learning_rate": 3.853333333333334e-05, - "loss": 0.4704, - "step": 9193 - }, - { - "epoch": 0.3227153863704172, - "grad_norm": 0.6673687100410461, - "learning_rate": 3.853148148148148e-05, - "loss": 0.5113, - "step": 9194 - }, - { - "epoch": 0.32275048702153425, - "grad_norm": 0.45290225744247437, - "learning_rate": 3.852962962962963e-05, - "loss": 0.5668, - "step": 9195 - }, - { - "epoch": 0.3227855876726513, - "grad_norm": 0.4936917722225189, - "learning_rate": 3.8527777777777776e-05, - "loss": 0.4268, - "step": 9196 - }, - { - "epoch": 0.3228206883237684, - "grad_norm": 0.5639608502388, - "learning_rate": 3.8525925925925926e-05, - "loss": 0.6379, - "step": 9197 - }, - { - "epoch": 0.3228557889748855, - "grad_norm": 0.46298012137413025, - "learning_rate": 3.8524074074074076e-05, - "loss": 0.4574, - "step": 9198 - }, - { - "epoch": 0.32289088962600254, - "grad_norm": 0.6216588020324707, - "learning_rate": 3.8522222222222226e-05, - "loss": 0.5373, - "step": 9199 - }, - { - "epoch": 0.32292599027711966, - "grad_norm": 0.5268998146057129, - "learning_rate": 3.8520370370370377e-05, - "loss": 0.5104, - "step": 9200 - }, - { - "epoch": 0.3229610909282367, - "grad_norm": 0.6153725981712341, - "learning_rate": 3.851851851851852e-05, - "loss": 0.5001, - "step": 9201 - }, - { - "epoch": 0.3229961915793538, - "grad_norm": 0.4442901611328125, - "learning_rate": 3.851666666666667e-05, - "loss": 0.5612, - "step": 9202 - }, - { - "epoch": 0.3230312922304709, - "grad_norm": 0.4876284599304199, - "learning_rate": 3.851481481481481e-05, - "loss": 0.5199, - "step": 9203 - }, - { - "epoch": 0.32306639288158795, - "grad_norm": 0.5207678079605103, - "learning_rate": 3.851296296296297e-05, - "loss": 0.4624, - "step": 9204 - }, - { - "epoch": 0.323101493532705, - "grad_norm": 0.4854971766471863, - "learning_rate": 3.8511111111111114e-05, - "loss": 0.4477, - "step": 9205 - }, - { - "epoch": 0.3231365941838221, - "grad_norm": 0.5762370228767395, - "learning_rate": 3.8509259259259264e-05, - "loss": 0.5242, - "step": 9206 - }, - { - "epoch": 0.3231716948349392, - "grad_norm": 0.4247106611728668, - "learning_rate": 3.850740740740741e-05, - "loss": 0.4197, - "step": 9207 - }, - { - "epoch": 0.32320679548605624, - "grad_norm": 0.4779694676399231, - "learning_rate": 3.850555555555556e-05, - "loss": 0.5222, - "step": 9208 - }, - { - "epoch": 0.32324189613717336, - "grad_norm": 0.49420925974845886, - "learning_rate": 3.85037037037037e-05, - "loss": 0.4582, - "step": 9209 - }, - { - "epoch": 0.3232769967882904, - "grad_norm": 0.5605322122573853, - "learning_rate": 3.850185185185186e-05, - "loss": 0.4084, - "step": 9210 - }, - { - "epoch": 0.3233120974394075, - "grad_norm": 0.5762921571731567, - "learning_rate": 3.85e-05, - "loss": 0.5553, - "step": 9211 - }, - { - "epoch": 0.3233471980905246, - "grad_norm": 0.4808727502822876, - "learning_rate": 3.849814814814815e-05, - "loss": 0.4048, - "step": 9212 - }, - { - "epoch": 0.32338229874164165, - "grad_norm": 0.5897752046585083, - "learning_rate": 3.8496296296296294e-05, - "loss": 0.5177, - "step": 9213 - }, - { - "epoch": 0.3234173993927587, - "grad_norm": 0.4771571159362793, - "learning_rate": 3.8494444444444444e-05, - "loss": 0.5627, - "step": 9214 - }, - { - "epoch": 0.3234525000438758, - "grad_norm": 0.48758333921432495, - "learning_rate": 3.8492592592592594e-05, - "loss": 0.4997, - "step": 9215 - }, - { - "epoch": 0.3234876006949929, - "grad_norm": 0.46219125390052795, - "learning_rate": 3.8490740740740744e-05, - "loss": 0.5586, - "step": 9216 - }, - { - "epoch": 0.32352270134611, - "grad_norm": 0.3750215768814087, - "learning_rate": 3.848888888888889e-05, - "loss": 0.5047, - "step": 9217 - }, - { - "epoch": 0.32355780199722706, - "grad_norm": 0.4475983679294586, - "learning_rate": 3.848703703703704e-05, - "loss": 0.477, - "step": 9218 - }, - { - "epoch": 0.3235929026483441, - "grad_norm": 0.4987353980541229, - "learning_rate": 3.848518518518519e-05, - "loss": 0.4446, - "step": 9219 - }, - { - "epoch": 0.32362800329946123, - "grad_norm": 0.41485172510147095, - "learning_rate": 3.848333333333334e-05, - "loss": 0.4777, - "step": 9220 - }, - { - "epoch": 0.3236631039505783, - "grad_norm": 0.4860338270664215, - "learning_rate": 3.848148148148149e-05, - "loss": 0.4503, - "step": 9221 - }, - { - "epoch": 0.32369820460169535, - "grad_norm": 0.5176123976707458, - "learning_rate": 3.847962962962963e-05, - "loss": 0.568, - "step": 9222 - }, - { - "epoch": 0.32373330525281246, - "grad_norm": 0.41937127709388733, - "learning_rate": 3.847777777777778e-05, - "loss": 0.4544, - "step": 9223 - }, - { - "epoch": 0.3237684059039295, - "grad_norm": 0.39771080017089844, - "learning_rate": 3.8475925925925925e-05, - "loss": 0.4312, - "step": 9224 - }, - { - "epoch": 0.3238035065550466, - "grad_norm": 0.506766140460968, - "learning_rate": 3.8474074074074075e-05, - "loss": 0.4839, - "step": 9225 - }, - { - "epoch": 0.3238386072061637, - "grad_norm": 0.41584137082099915, - "learning_rate": 3.8472222222222225e-05, - "loss": 0.4826, - "step": 9226 - }, - { - "epoch": 0.32387370785728076, - "grad_norm": 0.4920274615287781, - "learning_rate": 3.8470370370370375e-05, - "loss": 0.5346, - "step": 9227 - }, - { - "epoch": 0.3239088085083978, - "grad_norm": 0.5773022174835205, - "learning_rate": 3.846851851851852e-05, - "loss": 0.5375, - "step": 9228 - }, - { - "epoch": 0.32394390915951493, - "grad_norm": 0.48240941762924194, - "learning_rate": 3.846666666666667e-05, - "loss": 0.4326, - "step": 9229 - }, - { - "epoch": 0.323979009810632, - "grad_norm": 0.4621478021144867, - "learning_rate": 3.846481481481481e-05, - "loss": 0.4973, - "step": 9230 - }, - { - "epoch": 0.32401411046174905, - "grad_norm": 0.4163142740726471, - "learning_rate": 3.846296296296297e-05, - "loss": 0.3916, - "step": 9231 - }, - { - "epoch": 0.32404921111286616, - "grad_norm": 0.5156774520874023, - "learning_rate": 3.846111111111111e-05, - "loss": 0.4437, - "step": 9232 - }, - { - "epoch": 0.3240843117639832, - "grad_norm": 0.45847970247268677, - "learning_rate": 3.845925925925926e-05, - "loss": 0.4994, - "step": 9233 - }, - { - "epoch": 0.3241194124151003, - "grad_norm": 0.42578649520874023, - "learning_rate": 3.8457407407407406e-05, - "loss": 0.4813, - "step": 9234 - }, - { - "epoch": 0.3241545130662174, - "grad_norm": 0.5453693866729736, - "learning_rate": 3.8455555555555556e-05, - "loss": 0.443, - "step": 9235 - }, - { - "epoch": 0.32418961371733446, - "grad_norm": 0.4323490560054779, - "learning_rate": 3.8453703703703706e-05, - "loss": 0.4785, - "step": 9236 - }, - { - "epoch": 0.3242247143684515, - "grad_norm": 0.4076054096221924, - "learning_rate": 3.8451851851851856e-05, - "loss": 0.5687, - "step": 9237 - }, - { - "epoch": 0.32425981501956863, - "grad_norm": 0.42329540848731995, - "learning_rate": 3.845e-05, - "loss": 0.5579, - "step": 9238 - }, - { - "epoch": 0.3242949156706857, - "grad_norm": 0.464568555355072, - "learning_rate": 3.844814814814815e-05, - "loss": 0.5636, - "step": 9239 - }, - { - "epoch": 0.32433001632180275, - "grad_norm": 0.44332945346832275, - "learning_rate": 3.84462962962963e-05, - "loss": 0.4147, - "step": 9240 - }, - { - "epoch": 0.32436511697291986, - "grad_norm": 0.5548185110092163, - "learning_rate": 3.844444444444444e-05, - "loss": 0.5821, - "step": 9241 - }, - { - "epoch": 0.3244002176240369, - "grad_norm": 0.45361611247062683, - "learning_rate": 3.84425925925926e-05, - "loss": 0.4824, - "step": 9242 - }, - { - "epoch": 0.324435318275154, - "grad_norm": 0.46461254358291626, - "learning_rate": 3.844074074074074e-05, - "loss": 0.5612, - "step": 9243 - }, - { - "epoch": 0.3244704189262711, - "grad_norm": 0.44193509221076965, - "learning_rate": 3.843888888888889e-05, - "loss": 0.4597, - "step": 9244 - }, - { - "epoch": 0.32450551957738816, - "grad_norm": 0.4829690754413605, - "learning_rate": 3.843703703703704e-05, - "loss": 0.5756, - "step": 9245 - }, - { - "epoch": 0.3245406202285052, - "grad_norm": 0.541575014591217, - "learning_rate": 3.843518518518519e-05, - "loss": 0.5575, - "step": 9246 - }, - { - "epoch": 0.32457572087962233, - "grad_norm": 0.46903789043426514, - "learning_rate": 3.843333333333334e-05, - "loss": 0.5385, - "step": 9247 - }, - { - "epoch": 0.3246108215307394, - "grad_norm": 0.4345047175884247, - "learning_rate": 3.843148148148149e-05, - "loss": 0.43, - "step": 9248 - }, - { - "epoch": 0.32464592218185645, - "grad_norm": 0.47288572788238525, - "learning_rate": 3.842962962962963e-05, - "loss": 0.3955, - "step": 9249 - }, - { - "epoch": 0.32468102283297356, - "grad_norm": 0.438863605260849, - "learning_rate": 3.842777777777778e-05, - "loss": 0.4721, - "step": 9250 - }, - { - "epoch": 0.3247161234840906, - "grad_norm": 0.4718061685562134, - "learning_rate": 3.8425925925925924e-05, - "loss": 0.434, - "step": 9251 - }, - { - "epoch": 0.3247512241352077, - "grad_norm": 0.44974225759506226, - "learning_rate": 3.8424074074074074e-05, - "loss": 0.4621, - "step": 9252 - }, - { - "epoch": 0.3247863247863248, - "grad_norm": 0.4371887743473053, - "learning_rate": 3.8422222222222224e-05, - "loss": 0.4513, - "step": 9253 - }, - { - "epoch": 0.32482142543744186, - "grad_norm": 0.46292662620544434, - "learning_rate": 3.8420370370370374e-05, - "loss": 0.5296, - "step": 9254 - }, - { - "epoch": 0.3248565260885589, - "grad_norm": 0.48408782482147217, - "learning_rate": 3.841851851851852e-05, - "loss": 0.5949, - "step": 9255 - }, - { - "epoch": 0.32489162673967603, - "grad_norm": 0.5083295106887817, - "learning_rate": 3.841666666666667e-05, - "loss": 0.5006, - "step": 9256 - }, - { - "epoch": 0.3249267273907931, - "grad_norm": 0.5054420232772827, - "learning_rate": 3.841481481481482e-05, - "loss": 0.5502, - "step": 9257 - }, - { - "epoch": 0.32496182804191015, - "grad_norm": 0.42754602432250977, - "learning_rate": 3.841296296296297e-05, - "loss": 0.5154, - "step": 9258 - }, - { - "epoch": 0.32499692869302726, - "grad_norm": 0.5088138580322266, - "learning_rate": 3.841111111111111e-05, - "loss": 0.5313, - "step": 9259 - }, - { - "epoch": 0.3250320293441443, - "grad_norm": 0.6036704182624817, - "learning_rate": 3.840925925925926e-05, - "loss": 0.6162, - "step": 9260 - }, - { - "epoch": 0.32506712999526144, - "grad_norm": 0.5288023948669434, - "learning_rate": 3.840740740740741e-05, - "loss": 0.4344, - "step": 9261 - }, - { - "epoch": 0.3251022306463785, - "grad_norm": 0.538509726524353, - "learning_rate": 3.8405555555555555e-05, - "loss": 0.5363, - "step": 9262 - }, - { - "epoch": 0.32513733129749556, - "grad_norm": 0.4872816801071167, - "learning_rate": 3.8403703703703705e-05, - "loss": 0.432, - "step": 9263 - }, - { - "epoch": 0.3251724319486127, - "grad_norm": 0.45741620659828186, - "learning_rate": 3.8401851851851855e-05, - "loss": 0.5807, - "step": 9264 - }, - { - "epoch": 0.32520753259972973, - "grad_norm": 0.5220634341239929, - "learning_rate": 3.8400000000000005e-05, - "loss": 0.5057, - "step": 9265 - }, - { - "epoch": 0.3252426332508468, - "grad_norm": 0.41928473114967346, - "learning_rate": 3.839814814814815e-05, - "loss": 0.5376, - "step": 9266 - }, - { - "epoch": 0.3252777339019639, - "grad_norm": 0.5546934604644775, - "learning_rate": 3.83962962962963e-05, - "loss": 0.4941, - "step": 9267 - }, - { - "epoch": 0.32531283455308097, - "grad_norm": 0.4720420837402344, - "learning_rate": 3.839444444444444e-05, - "loss": 0.5351, - "step": 9268 - }, - { - "epoch": 0.325347935204198, - "grad_norm": 0.46533286571502686, - "learning_rate": 3.83925925925926e-05, - "loss": 0.5638, - "step": 9269 - }, - { - "epoch": 0.32538303585531514, - "grad_norm": 0.45826566219329834, - "learning_rate": 3.839074074074074e-05, - "loss": 0.4983, - "step": 9270 - }, - { - "epoch": 0.3254181365064322, - "grad_norm": 0.5523805022239685, - "learning_rate": 3.838888888888889e-05, - "loss": 0.5032, - "step": 9271 - }, - { - "epoch": 0.32545323715754926, - "grad_norm": 0.46050912141799927, - "learning_rate": 3.8387037037037035e-05, - "loss": 0.4656, - "step": 9272 - }, - { - "epoch": 0.3254883378086664, - "grad_norm": 0.39523109793663025, - "learning_rate": 3.8385185185185186e-05, - "loss": 0.4347, - "step": 9273 - }, - { - "epoch": 0.32552343845978343, - "grad_norm": 0.4400665760040283, - "learning_rate": 3.8383333333333336e-05, - "loss": 0.446, - "step": 9274 - }, - { - "epoch": 0.3255585391109005, - "grad_norm": 0.409435510635376, - "learning_rate": 3.8381481481481486e-05, - "loss": 0.4054, - "step": 9275 - }, - { - "epoch": 0.3255936397620176, - "grad_norm": 0.4444444179534912, - "learning_rate": 3.837962962962963e-05, - "loss": 0.4786, - "step": 9276 - }, - { - "epoch": 0.32562874041313467, - "grad_norm": 0.4312337338924408, - "learning_rate": 3.837777777777778e-05, - "loss": 0.4379, - "step": 9277 - }, - { - "epoch": 0.3256638410642517, - "grad_norm": 0.5149259567260742, - "learning_rate": 3.837592592592593e-05, - "loss": 0.5254, - "step": 9278 - }, - { - "epoch": 0.32569894171536884, - "grad_norm": 0.46000880002975464, - "learning_rate": 3.837407407407407e-05, - "loss": 0.4512, - "step": 9279 - }, - { - "epoch": 0.3257340423664859, - "grad_norm": 0.5909322500228882, - "learning_rate": 3.837222222222222e-05, - "loss": 0.4609, - "step": 9280 - }, - { - "epoch": 0.32576914301760296, - "grad_norm": 0.49806466698646545, - "learning_rate": 3.837037037037037e-05, - "loss": 0.6005, - "step": 9281 - }, - { - "epoch": 0.3258042436687201, - "grad_norm": 0.44262635707855225, - "learning_rate": 3.836851851851852e-05, - "loss": 0.5441, - "step": 9282 - }, - { - "epoch": 0.32583934431983713, - "grad_norm": 0.5087305307388306, - "learning_rate": 3.8366666666666666e-05, - "loss": 0.5563, - "step": 9283 - }, - { - "epoch": 0.3258744449709542, - "grad_norm": 0.4341077506542206, - "learning_rate": 3.8364814814814816e-05, - "loss": 0.5529, - "step": 9284 - }, - { - "epoch": 0.3259095456220713, - "grad_norm": 0.45717862248420715, - "learning_rate": 3.8362962962962967e-05, - "loss": 0.5498, - "step": 9285 - }, - { - "epoch": 0.32594464627318837, - "grad_norm": 0.46193912625312805, - "learning_rate": 3.836111111111112e-05, - "loss": 0.5164, - "step": 9286 - }, - { - "epoch": 0.3259797469243054, - "grad_norm": 0.4871320128440857, - "learning_rate": 3.835925925925926e-05, - "loss": 0.5255, - "step": 9287 - }, - { - "epoch": 0.32601484757542254, - "grad_norm": 0.48089537024497986, - "learning_rate": 3.835740740740741e-05, - "loss": 0.4448, - "step": 9288 - }, - { - "epoch": 0.3260499482265396, - "grad_norm": 0.48508012294769287, - "learning_rate": 3.8355555555555553e-05, - "loss": 0.4829, - "step": 9289 - }, - { - "epoch": 0.32608504887765666, - "grad_norm": 0.5473867058753967, - "learning_rate": 3.8353703703703704e-05, - "loss": 0.4297, - "step": 9290 - }, - { - "epoch": 0.3261201495287738, - "grad_norm": 0.4745658338069916, - "learning_rate": 3.8351851851851854e-05, - "loss": 0.5722, - "step": 9291 - }, - { - "epoch": 0.32615525017989083, - "grad_norm": 0.4657931625843048, - "learning_rate": 3.8350000000000004e-05, - "loss": 0.3864, - "step": 9292 - }, - { - "epoch": 0.3261903508310079, - "grad_norm": 0.5737243890762329, - "learning_rate": 3.834814814814815e-05, - "loss": 0.6455, - "step": 9293 - }, - { - "epoch": 0.326225451482125, - "grad_norm": 0.4007911682128906, - "learning_rate": 3.83462962962963e-05, - "loss": 0.4489, - "step": 9294 - }, - { - "epoch": 0.32626055213324207, - "grad_norm": 0.427497535943985, - "learning_rate": 3.834444444444444e-05, - "loss": 0.3441, - "step": 9295 - }, - { - "epoch": 0.3262956527843591, - "grad_norm": 0.5464351773262024, - "learning_rate": 3.83425925925926e-05, - "loss": 0.551, - "step": 9296 - }, - { - "epoch": 0.32633075343547624, - "grad_norm": 0.4382094740867615, - "learning_rate": 3.834074074074074e-05, - "loss": 0.5498, - "step": 9297 - }, - { - "epoch": 0.3263658540865933, - "grad_norm": 0.432677298784256, - "learning_rate": 3.833888888888889e-05, - "loss": 0.48, - "step": 9298 - }, - { - "epoch": 0.32640095473771036, - "grad_norm": 0.5007349848747253, - "learning_rate": 3.833703703703704e-05, - "loss": 0.4527, - "step": 9299 - }, - { - "epoch": 0.3264360553888275, - "grad_norm": 0.4536573886871338, - "learning_rate": 3.8335185185185184e-05, - "loss": 0.5271, - "step": 9300 - }, - { - "epoch": 0.32647115603994453, - "grad_norm": 0.45095404982566833, - "learning_rate": 3.8333333333333334e-05, - "loss": 0.3423, - "step": 9301 - }, - { - "epoch": 0.3265062566910616, - "grad_norm": 0.5135613679885864, - "learning_rate": 3.8331481481481485e-05, - "loss": 0.6212, - "step": 9302 - }, - { - "epoch": 0.3265413573421787, - "grad_norm": 0.42298248410224915, - "learning_rate": 3.8329629629629635e-05, - "loss": 0.5092, - "step": 9303 - }, - { - "epoch": 0.32657645799329577, - "grad_norm": 0.4546050429344177, - "learning_rate": 3.832777777777778e-05, - "loss": 0.5753, - "step": 9304 - }, - { - "epoch": 0.3266115586444129, - "grad_norm": 0.46818095445632935, - "learning_rate": 3.832592592592593e-05, - "loss": 0.3538, - "step": 9305 - }, - { - "epoch": 0.32664665929552994, - "grad_norm": 0.4135405719280243, - "learning_rate": 3.832407407407407e-05, - "loss": 0.3968, - "step": 9306 - }, - { - "epoch": 0.326681759946647, - "grad_norm": 0.4737575650215149, - "learning_rate": 3.832222222222223e-05, - "loss": 0.54, - "step": 9307 - }, - { - "epoch": 0.3267168605977641, - "grad_norm": 0.46787646412849426, - "learning_rate": 3.832037037037037e-05, - "loss": 0.5031, - "step": 9308 - }, - { - "epoch": 0.3267519612488812, - "grad_norm": 0.4305093586444855, - "learning_rate": 3.831851851851852e-05, - "loss": 0.4459, - "step": 9309 - }, - { - "epoch": 0.32678706189999823, - "grad_norm": 0.5001400113105774, - "learning_rate": 3.8316666666666665e-05, - "loss": 0.4964, - "step": 9310 - }, - { - "epoch": 0.32682216255111535, - "grad_norm": 0.4723387360572815, - "learning_rate": 3.8314814814814815e-05, - "loss": 0.4273, - "step": 9311 - }, - { - "epoch": 0.3268572632022324, - "grad_norm": 0.5370679497718811, - "learning_rate": 3.8312962962962965e-05, - "loss": 0.5614, - "step": 9312 - }, - { - "epoch": 0.32689236385334947, - "grad_norm": 0.5238627195358276, - "learning_rate": 3.8311111111111115e-05, - "loss": 0.4959, - "step": 9313 - }, - { - "epoch": 0.3269274645044666, - "grad_norm": 0.4484194815158844, - "learning_rate": 3.830925925925926e-05, - "loss": 0.4269, - "step": 9314 - }, - { - "epoch": 0.32696256515558364, - "grad_norm": 0.3670453429222107, - "learning_rate": 3.830740740740741e-05, - "loss": 0.4387, - "step": 9315 - }, - { - "epoch": 0.3269976658067007, - "grad_norm": 0.44881558418273926, - "learning_rate": 3.830555555555555e-05, - "loss": 0.5167, - "step": 9316 - }, - { - "epoch": 0.3270327664578178, - "grad_norm": 0.4811628758907318, - "learning_rate": 3.830370370370371e-05, - "loss": 0.4398, - "step": 9317 - }, - { - "epoch": 0.3270678671089349, - "grad_norm": 0.4256042242050171, - "learning_rate": 3.830185185185185e-05, - "loss": 0.4409, - "step": 9318 - }, - { - "epoch": 0.32710296776005193, - "grad_norm": 0.49674180150032043, - "learning_rate": 3.83e-05, - "loss": 0.4813, - "step": 9319 - }, - { - "epoch": 0.32713806841116905, - "grad_norm": 0.5567758083343506, - "learning_rate": 3.829814814814815e-05, - "loss": 0.5669, - "step": 9320 - }, - { - "epoch": 0.3271731690622861, - "grad_norm": 0.5041676163673401, - "learning_rate": 3.8296296296296296e-05, - "loss": 0.5511, - "step": 9321 - }, - { - "epoch": 0.32720826971340317, - "grad_norm": 0.4371301531791687, - "learning_rate": 3.8294444444444446e-05, - "loss": 0.4757, - "step": 9322 - }, - { - "epoch": 0.3272433703645203, - "grad_norm": 0.3583464026451111, - "learning_rate": 3.8292592592592596e-05, - "loss": 0.3399, - "step": 9323 - }, - { - "epoch": 0.32727847101563734, - "grad_norm": 0.40993252396583557, - "learning_rate": 3.8290740740740746e-05, - "loss": 0.5258, - "step": 9324 - }, - { - "epoch": 0.3273135716667544, - "grad_norm": 0.4972432851791382, - "learning_rate": 3.828888888888889e-05, - "loss": 0.328, - "step": 9325 - }, - { - "epoch": 0.3273486723178715, - "grad_norm": 0.4204657971858978, - "learning_rate": 3.828703703703704e-05, - "loss": 0.4151, - "step": 9326 - }, - { - "epoch": 0.3273837729689886, - "grad_norm": 0.4658922851085663, - "learning_rate": 3.828518518518518e-05, - "loss": 0.5625, - "step": 9327 - }, - { - "epoch": 0.32741887362010563, - "grad_norm": 0.4045134484767914, - "learning_rate": 3.828333333333334e-05, - "loss": 0.5627, - "step": 9328 - }, - { - "epoch": 0.32745397427122275, - "grad_norm": 0.4482491612434387, - "learning_rate": 3.8281481481481483e-05, - "loss": 0.5111, - "step": 9329 - }, - { - "epoch": 0.3274890749223398, - "grad_norm": 0.4614836275577545, - "learning_rate": 3.8279629629629633e-05, - "loss": 0.5789, - "step": 9330 - }, - { - "epoch": 0.32752417557345687, - "grad_norm": 0.37776219844818115, - "learning_rate": 3.827777777777778e-05, - "loss": 0.4131, - "step": 9331 - }, - { - "epoch": 0.327559276224574, - "grad_norm": 0.5117833018302917, - "learning_rate": 3.827592592592593e-05, - "loss": 0.6137, - "step": 9332 - }, - { - "epoch": 0.32759437687569104, - "grad_norm": 0.4870204031467438, - "learning_rate": 3.827407407407407e-05, - "loss": 0.5523, - "step": 9333 - }, - { - "epoch": 0.3276294775268081, - "grad_norm": 0.46982261538505554, - "learning_rate": 3.827222222222223e-05, - "loss": 0.5301, - "step": 9334 - }, - { - "epoch": 0.3276645781779252, - "grad_norm": 0.42041125893592834, - "learning_rate": 3.827037037037037e-05, - "loss": 0.4656, - "step": 9335 - }, - { - "epoch": 0.3276996788290423, - "grad_norm": 0.4803139865398407, - "learning_rate": 3.826851851851852e-05, - "loss": 0.5328, - "step": 9336 - }, - { - "epoch": 0.32773477948015933, - "grad_norm": 0.47635799646377563, - "learning_rate": 3.8266666666666664e-05, - "loss": 0.5059, - "step": 9337 - }, - { - "epoch": 0.32776988013127645, - "grad_norm": 0.52680903673172, - "learning_rate": 3.8264814814814814e-05, - "loss": 0.5071, - "step": 9338 - }, - { - "epoch": 0.3278049807823935, - "grad_norm": 0.46189409494400024, - "learning_rate": 3.8262962962962964e-05, - "loss": 0.6026, - "step": 9339 - }, - { - "epoch": 0.32784008143351057, - "grad_norm": 0.4247935116291046, - "learning_rate": 3.8261111111111114e-05, - "loss": 0.5068, - "step": 9340 - }, - { - "epoch": 0.3278751820846277, - "grad_norm": 0.48132506012916565, - "learning_rate": 3.8259259259259264e-05, - "loss": 0.4289, - "step": 9341 - }, - { - "epoch": 0.32791028273574474, - "grad_norm": 0.5261765122413635, - "learning_rate": 3.825740740740741e-05, - "loss": 0.5556, - "step": 9342 - }, - { - "epoch": 0.3279453833868618, - "grad_norm": 0.4512733221054077, - "learning_rate": 3.825555555555556e-05, - "loss": 0.4621, - "step": 9343 - }, - { - "epoch": 0.3279804840379789, - "grad_norm": 0.5076183676719666, - "learning_rate": 3.825370370370371e-05, - "loss": 0.3838, - "step": 9344 - }, - { - "epoch": 0.328015584689096, - "grad_norm": 0.47266507148742676, - "learning_rate": 3.825185185185186e-05, - "loss": 0.5119, - "step": 9345 - }, - { - "epoch": 0.3280506853402131, - "grad_norm": 0.4862973093986511, - "learning_rate": 3.825e-05, - "loss": 0.4539, - "step": 9346 - }, - { - "epoch": 0.32808578599133015, - "grad_norm": 0.4483219385147095, - "learning_rate": 3.824814814814815e-05, - "loss": 0.4791, - "step": 9347 - }, - { - "epoch": 0.3281208866424472, - "grad_norm": 0.5024521350860596, - "learning_rate": 3.8246296296296295e-05, - "loss": 0.499, - "step": 9348 - }, - { - "epoch": 0.3281559872935643, - "grad_norm": 0.45451319217681885, - "learning_rate": 3.8244444444444445e-05, - "loss": 0.4144, - "step": 9349 - }, - { - "epoch": 0.3281910879446814, - "grad_norm": 0.4293416142463684, - "learning_rate": 3.8242592592592595e-05, - "loss": 0.4735, - "step": 9350 - }, - { - "epoch": 0.32822618859579844, - "grad_norm": 0.5343773365020752, - "learning_rate": 3.8240740740740745e-05, - "loss": 0.4355, - "step": 9351 - }, - { - "epoch": 0.32826128924691556, - "grad_norm": 0.47210532426834106, - "learning_rate": 3.823888888888889e-05, - "loss": 0.5782, - "step": 9352 - }, - { - "epoch": 0.3282963898980326, - "grad_norm": 0.817824125289917, - "learning_rate": 3.823703703703704e-05, - "loss": 0.589, - "step": 9353 - }, - { - "epoch": 0.3283314905491497, - "grad_norm": 0.5242180824279785, - "learning_rate": 3.823518518518518e-05, - "loss": 0.5657, - "step": 9354 - }, - { - "epoch": 0.3283665912002668, - "grad_norm": 0.4580303132534027, - "learning_rate": 3.823333333333334e-05, - "loss": 0.5222, - "step": 9355 - }, - { - "epoch": 0.32840169185138385, - "grad_norm": 0.4436110556125641, - "learning_rate": 3.823148148148148e-05, - "loss": 0.3909, - "step": 9356 - }, - { - "epoch": 0.3284367925025009, - "grad_norm": 0.38960838317871094, - "learning_rate": 3.822962962962963e-05, - "loss": 0.4902, - "step": 9357 - }, - { - "epoch": 0.328471893153618, - "grad_norm": 0.38377347588539124, - "learning_rate": 3.822777777777778e-05, - "loss": 0.4832, - "step": 9358 - }, - { - "epoch": 0.3285069938047351, - "grad_norm": 0.4770687222480774, - "learning_rate": 3.8225925925925926e-05, - "loss": 0.484, - "step": 9359 - }, - { - "epoch": 0.32854209445585214, - "grad_norm": 0.4771292209625244, - "learning_rate": 3.8224074074074076e-05, - "loss": 0.3809, - "step": 9360 - }, - { - "epoch": 0.32857719510696926, - "grad_norm": 0.6387614011764526, - "learning_rate": 3.8222222222222226e-05, - "loss": 0.5726, - "step": 9361 - }, - { - "epoch": 0.3286122957580863, - "grad_norm": 0.428085058927536, - "learning_rate": 3.8220370370370376e-05, - "loss": 0.4874, - "step": 9362 - }, - { - "epoch": 0.3286473964092034, - "grad_norm": 0.5123072266578674, - "learning_rate": 3.821851851851852e-05, - "loss": 0.5596, - "step": 9363 - }, - { - "epoch": 0.3286824970603205, - "grad_norm": 1.07273268699646, - "learning_rate": 3.821666666666667e-05, - "loss": 0.3843, - "step": 9364 - }, - { - "epoch": 0.32871759771143755, - "grad_norm": 0.4039015471935272, - "learning_rate": 3.821481481481481e-05, - "loss": 0.4785, - "step": 9365 - }, - { - "epoch": 0.3287526983625546, - "grad_norm": 0.4861341118812561, - "learning_rate": 3.821296296296297e-05, - "loss": 0.5039, - "step": 9366 - }, - { - "epoch": 0.3287877990136717, - "grad_norm": 0.4581652581691742, - "learning_rate": 3.821111111111111e-05, - "loss": 0.4267, - "step": 9367 - }, - { - "epoch": 0.3288228996647888, - "grad_norm": 0.4693053364753723, - "learning_rate": 3.820925925925926e-05, - "loss": 0.5443, - "step": 9368 - }, - { - "epoch": 0.32885800031590584, - "grad_norm": 0.4871752858161926, - "learning_rate": 3.8207407407407407e-05, - "loss": 0.5336, - "step": 9369 - }, - { - "epoch": 0.32889310096702296, - "grad_norm": 0.5412018895149231, - "learning_rate": 3.820555555555556e-05, - "loss": 0.4972, - "step": 9370 - }, - { - "epoch": 0.32892820161814, - "grad_norm": 0.43549636006355286, - "learning_rate": 3.820370370370371e-05, - "loss": 0.3722, - "step": 9371 - }, - { - "epoch": 0.3289633022692571, - "grad_norm": 0.46050047874450684, - "learning_rate": 3.820185185185186e-05, - "loss": 0.5405, - "step": 9372 - }, - { - "epoch": 0.3289984029203742, - "grad_norm": 0.5348737239837646, - "learning_rate": 3.82e-05, - "loss": 0.5398, - "step": 9373 - }, - { - "epoch": 0.32903350357149125, - "grad_norm": 0.47461962699890137, - "learning_rate": 3.819814814814815e-05, - "loss": 0.5614, - "step": 9374 - }, - { - "epoch": 0.3290686042226083, - "grad_norm": 0.44448021054267883, - "learning_rate": 3.8196296296296294e-05, - "loss": 0.5502, - "step": 9375 - }, - { - "epoch": 0.3291037048737254, - "grad_norm": 0.5185517072677612, - "learning_rate": 3.8194444444444444e-05, - "loss": 0.5131, - "step": 9376 - }, - { - "epoch": 0.3291388055248425, - "grad_norm": 0.39955005049705505, - "learning_rate": 3.8192592592592594e-05, - "loss": 0.4071, - "step": 9377 - }, - { - "epoch": 0.32917390617595954, - "grad_norm": 0.5174840092658997, - "learning_rate": 3.8190740740740744e-05, - "loss": 0.4949, - "step": 9378 - }, - { - "epoch": 0.32920900682707666, - "grad_norm": 0.44163230061531067, - "learning_rate": 3.8188888888888894e-05, - "loss": 0.5001, - "step": 9379 - }, - { - "epoch": 0.3292441074781937, - "grad_norm": 0.6291944980621338, - "learning_rate": 3.818703703703704e-05, - "loss": 0.3726, - "step": 9380 - }, - { - "epoch": 0.3292792081293108, - "grad_norm": 0.44206342101097107, - "learning_rate": 3.818518518518519e-05, - "loss": 0.4627, - "step": 9381 - }, - { - "epoch": 0.3293143087804279, - "grad_norm": 0.4779073894023895, - "learning_rate": 3.818333333333334e-05, - "loss": 0.5374, - "step": 9382 - }, - { - "epoch": 0.32934940943154495, - "grad_norm": 0.49121931195259094, - "learning_rate": 3.818148148148149e-05, - "loss": 0.4959, - "step": 9383 - }, - { - "epoch": 0.329384510082662, - "grad_norm": 0.5070273280143738, - "learning_rate": 3.817962962962963e-05, - "loss": 0.4879, - "step": 9384 - }, - { - "epoch": 0.3294196107337791, - "grad_norm": 0.48291391134262085, - "learning_rate": 3.817777777777778e-05, - "loss": 0.5492, - "step": 9385 - }, - { - "epoch": 0.3294547113848962, - "grad_norm": 0.42173653841018677, - "learning_rate": 3.8175925925925925e-05, - "loss": 0.4944, - "step": 9386 - }, - { - "epoch": 0.32948981203601324, - "grad_norm": 0.4222872853279114, - "learning_rate": 3.8174074074074075e-05, - "loss": 0.4686, - "step": 9387 - }, - { - "epoch": 0.32952491268713036, - "grad_norm": 0.5215309858322144, - "learning_rate": 3.8172222222222225e-05, - "loss": 0.5477, - "step": 9388 - }, - { - "epoch": 0.3295600133382474, - "grad_norm": 0.49774911999702454, - "learning_rate": 3.8170370370370375e-05, - "loss": 0.5159, - "step": 9389 - }, - { - "epoch": 0.32959511398936453, - "grad_norm": 0.4349445700645447, - "learning_rate": 3.816851851851852e-05, - "loss": 0.5697, - "step": 9390 - }, - { - "epoch": 0.3296302146404816, - "grad_norm": 0.4938293695449829, - "learning_rate": 3.816666666666667e-05, - "loss": 0.5178, - "step": 9391 - }, - { - "epoch": 0.32966531529159865, - "grad_norm": 0.617517352104187, - "learning_rate": 3.816481481481481e-05, - "loss": 0.5938, - "step": 9392 - }, - { - "epoch": 0.32970041594271576, - "grad_norm": 0.4563286304473877, - "learning_rate": 3.816296296296297e-05, - "loss": 0.5734, - "step": 9393 - }, - { - "epoch": 0.3297355165938328, - "grad_norm": 0.4529157876968384, - "learning_rate": 3.816111111111111e-05, - "loss": 0.5535, - "step": 9394 - }, - { - "epoch": 0.3297706172449499, - "grad_norm": 0.4178524315357208, - "learning_rate": 3.815925925925926e-05, - "loss": 0.4286, - "step": 9395 - }, - { - "epoch": 0.329805717896067, - "grad_norm": 0.4390113055706024, - "learning_rate": 3.8157407407407405e-05, - "loss": 0.5158, - "step": 9396 - }, - { - "epoch": 0.32984081854718406, - "grad_norm": 0.492967814207077, - "learning_rate": 3.8155555555555555e-05, - "loss": 0.5384, - "step": 9397 - }, - { - "epoch": 0.3298759191983011, - "grad_norm": 0.40793269872665405, - "learning_rate": 3.8153703703703706e-05, - "loss": 0.5878, - "step": 9398 - }, - { - "epoch": 0.32991101984941823, - "grad_norm": 0.4921724200248718, - "learning_rate": 3.8151851851851856e-05, - "loss": 0.43, - "step": 9399 - }, - { - "epoch": 0.3299461205005353, - "grad_norm": 0.4629223346710205, - "learning_rate": 3.8150000000000006e-05, - "loss": 0.4455, - "step": 9400 - }, - { - "epoch": 0.32998122115165235, - "grad_norm": 0.42406052350997925, - "learning_rate": 3.814814814814815e-05, - "loss": 0.5596, - "step": 9401 - }, - { - "epoch": 0.33001632180276946, - "grad_norm": 0.7418732047080994, - "learning_rate": 3.81462962962963e-05, - "loss": 0.515, - "step": 9402 - }, - { - "epoch": 0.3300514224538865, - "grad_norm": 0.4790859520435333, - "learning_rate": 3.814444444444444e-05, - "loss": 0.5843, - "step": 9403 - }, - { - "epoch": 0.3300865231050036, - "grad_norm": 0.5359311699867249, - "learning_rate": 3.81425925925926e-05, - "loss": 0.4853, - "step": 9404 - }, - { - "epoch": 0.3301216237561207, - "grad_norm": 0.4872746169567108, - "learning_rate": 3.814074074074074e-05, - "loss": 0.5072, - "step": 9405 - }, - { - "epoch": 0.33015672440723776, - "grad_norm": 0.5369772911071777, - "learning_rate": 3.813888888888889e-05, - "loss": 0.5531, - "step": 9406 - }, - { - "epoch": 0.3301918250583548, - "grad_norm": 0.45693501830101013, - "learning_rate": 3.8137037037037036e-05, - "loss": 0.4872, - "step": 9407 - }, - { - "epoch": 0.33022692570947193, - "grad_norm": 0.47913241386413574, - "learning_rate": 3.8135185185185186e-05, - "loss": 0.4375, - "step": 9408 - }, - { - "epoch": 0.330262026360589, - "grad_norm": 0.43452122807502747, - "learning_rate": 3.8133333333333336e-05, - "loss": 0.5644, - "step": 9409 - }, - { - "epoch": 0.33029712701170605, - "grad_norm": 0.4160023629665375, - "learning_rate": 3.8131481481481487e-05, - "loss": 0.4801, - "step": 9410 - }, - { - "epoch": 0.33033222766282316, - "grad_norm": 0.5092787146568298, - "learning_rate": 3.812962962962963e-05, - "loss": 0.4917, - "step": 9411 - }, - { - "epoch": 0.3303673283139402, - "grad_norm": 0.4686991274356842, - "learning_rate": 3.812777777777778e-05, - "loss": 0.424, - "step": 9412 - }, - { - "epoch": 0.3304024289650573, - "grad_norm": 0.4584668278694153, - "learning_rate": 3.812592592592592e-05, - "loss": 0.4827, - "step": 9413 - }, - { - "epoch": 0.3304375296161744, - "grad_norm": 0.49552375078201294, - "learning_rate": 3.8124074074074073e-05, - "loss": 0.3666, - "step": 9414 - }, - { - "epoch": 0.33047263026729146, - "grad_norm": 0.5354194045066833, - "learning_rate": 3.8122222222222224e-05, - "loss": 0.3735, - "step": 9415 - }, - { - "epoch": 0.3305077309184085, - "grad_norm": 0.44568485021591187, - "learning_rate": 3.8120370370370374e-05, - "loss": 0.5348, - "step": 9416 - }, - { - "epoch": 0.33054283156952563, - "grad_norm": 0.4985167980194092, - "learning_rate": 3.811851851851852e-05, - "loss": 0.4594, - "step": 9417 - }, - { - "epoch": 0.3305779322206427, - "grad_norm": 0.45548900961875916, - "learning_rate": 3.811666666666667e-05, - "loss": 0.5383, - "step": 9418 - }, - { - "epoch": 0.33061303287175975, - "grad_norm": 0.6199905872344971, - "learning_rate": 3.811481481481482e-05, - "loss": 0.5183, - "step": 9419 - }, - { - "epoch": 0.33064813352287686, - "grad_norm": 0.5130459666252136, - "learning_rate": 3.811296296296297e-05, - "loss": 0.4418, - "step": 9420 - }, - { - "epoch": 0.3306832341739939, - "grad_norm": 0.5171413421630859, - "learning_rate": 3.811111111111112e-05, - "loss": 0.4195, - "step": 9421 - }, - { - "epoch": 0.330718334825111, - "grad_norm": 0.5118372440338135, - "learning_rate": 3.810925925925926e-05, - "loss": 0.4773, - "step": 9422 - }, - { - "epoch": 0.3307534354762281, - "grad_norm": 0.4921201467514038, - "learning_rate": 3.810740740740741e-05, - "loss": 0.5161, - "step": 9423 - }, - { - "epoch": 0.33078853612734516, - "grad_norm": 0.4678921699523926, - "learning_rate": 3.8105555555555554e-05, - "loss": 0.4606, - "step": 9424 - }, - { - "epoch": 0.3308236367784622, - "grad_norm": 0.4185035228729248, - "learning_rate": 3.810370370370371e-05, - "loss": 0.451, - "step": 9425 - }, - { - "epoch": 0.33085873742957933, - "grad_norm": 0.4829072952270508, - "learning_rate": 3.8101851851851854e-05, - "loss": 0.478, - "step": 9426 - }, - { - "epoch": 0.3308938380806964, - "grad_norm": 0.5325945019721985, - "learning_rate": 3.8100000000000005e-05, - "loss": 0.478, - "step": 9427 - }, - { - "epoch": 0.33092893873181345, - "grad_norm": 0.495243102312088, - "learning_rate": 3.809814814814815e-05, - "loss": 0.4075, - "step": 9428 - }, - { - "epoch": 0.33096403938293056, - "grad_norm": 0.4129623472690582, - "learning_rate": 3.80962962962963e-05, - "loss": 0.4702, - "step": 9429 - }, - { - "epoch": 0.3309991400340476, - "grad_norm": 0.4589189887046814, - "learning_rate": 3.809444444444444e-05, - "loss": 0.5349, - "step": 9430 - }, - { - "epoch": 0.3310342406851647, - "grad_norm": 0.4594510495662689, - "learning_rate": 3.80925925925926e-05, - "loss": 0.4622, - "step": 9431 - }, - { - "epoch": 0.3310693413362818, - "grad_norm": 0.451803594827652, - "learning_rate": 3.809074074074074e-05, - "loss": 0.5578, - "step": 9432 - }, - { - "epoch": 0.33110444198739886, - "grad_norm": 0.4507528841495514, - "learning_rate": 3.808888888888889e-05, - "loss": 0.4537, - "step": 9433 - }, - { - "epoch": 0.33113954263851597, - "grad_norm": 0.5140794515609741, - "learning_rate": 3.8087037037037035e-05, - "loss": 0.4759, - "step": 9434 - }, - { - "epoch": 0.33117464328963303, - "grad_norm": 0.5097683072090149, - "learning_rate": 3.8085185185185185e-05, - "loss": 0.3719, - "step": 9435 - }, - { - "epoch": 0.3312097439407501, - "grad_norm": 0.47000834345817566, - "learning_rate": 3.8083333333333335e-05, - "loss": 0.5442, - "step": 9436 - }, - { - "epoch": 0.3312448445918672, - "grad_norm": 0.5597690343856812, - "learning_rate": 3.8081481481481485e-05, - "loss": 0.5854, - "step": 9437 - }, - { - "epoch": 0.33127994524298426, - "grad_norm": 0.42068102955818176, - "learning_rate": 3.807962962962963e-05, - "loss": 0.4445, - "step": 9438 - }, - { - "epoch": 0.3313150458941013, - "grad_norm": 0.4844914972782135, - "learning_rate": 3.807777777777778e-05, - "loss": 0.5676, - "step": 9439 - }, - { - "epoch": 0.33135014654521844, - "grad_norm": 0.5025601983070374, - "learning_rate": 3.807592592592593e-05, - "loss": 0.5481, - "step": 9440 - }, - { - "epoch": 0.3313852471963355, - "grad_norm": 0.48928263783454895, - "learning_rate": 3.807407407407408e-05, - "loss": 0.5064, - "step": 9441 - }, - { - "epoch": 0.33142034784745256, - "grad_norm": 0.457284539937973, - "learning_rate": 3.807222222222223e-05, - "loss": 0.4836, - "step": 9442 - }, - { - "epoch": 0.33145544849856967, - "grad_norm": 0.4610788822174072, - "learning_rate": 3.807037037037037e-05, - "loss": 0.4961, - "step": 9443 - }, - { - "epoch": 0.33149054914968673, - "grad_norm": 0.4577023983001709, - "learning_rate": 3.806851851851852e-05, - "loss": 0.5574, - "step": 9444 - }, - { - "epoch": 0.3315256498008038, - "grad_norm": 0.5278072953224182, - "learning_rate": 3.8066666666666666e-05, - "loss": 0.5759, - "step": 9445 - }, - { - "epoch": 0.3315607504519209, - "grad_norm": 0.5467875003814697, - "learning_rate": 3.8064814814814816e-05, - "loss": 0.5099, - "step": 9446 - }, - { - "epoch": 0.33159585110303796, - "grad_norm": 0.48143884539604187, - "learning_rate": 3.8062962962962966e-05, - "loss": 0.4368, - "step": 9447 - }, - { - "epoch": 0.331630951754155, - "grad_norm": 0.43417105078697205, - "learning_rate": 3.8061111111111116e-05, - "loss": 0.5503, - "step": 9448 - }, - { - "epoch": 0.33166605240527214, - "grad_norm": 0.5666148662567139, - "learning_rate": 3.805925925925926e-05, - "loss": 0.3851, - "step": 9449 - }, - { - "epoch": 0.3317011530563892, - "grad_norm": 0.473667174577713, - "learning_rate": 3.805740740740741e-05, - "loss": 0.4789, - "step": 9450 - }, - { - "epoch": 0.33173625370750626, - "grad_norm": 0.4896650016307831, - "learning_rate": 3.805555555555555e-05, - "loss": 0.5304, - "step": 9451 - }, - { - "epoch": 0.33177135435862337, - "grad_norm": 0.4170546531677246, - "learning_rate": 3.805370370370371e-05, - "loss": 0.4362, - "step": 9452 - }, - { - "epoch": 0.33180645500974043, - "grad_norm": 0.4464282989501953, - "learning_rate": 3.805185185185185e-05, - "loss": 0.445, - "step": 9453 - }, - { - "epoch": 0.3318415556608575, - "grad_norm": 0.4889729619026184, - "learning_rate": 3.805e-05, - "loss": 0.5503, - "step": 9454 - }, - { - "epoch": 0.3318766563119746, - "grad_norm": 0.4950689673423767, - "learning_rate": 3.804814814814815e-05, - "loss": 0.4987, - "step": 9455 - }, - { - "epoch": 0.33191175696309166, - "grad_norm": 0.7711585760116577, - "learning_rate": 3.80462962962963e-05, - "loss": 0.3799, - "step": 9456 - }, - { - "epoch": 0.3319468576142087, - "grad_norm": 0.47983890771865845, - "learning_rate": 3.804444444444445e-05, - "loss": 0.6202, - "step": 9457 - }, - { - "epoch": 0.33198195826532584, - "grad_norm": 0.33488407731056213, - "learning_rate": 3.80425925925926e-05, - "loss": 0.3913, - "step": 9458 - }, - { - "epoch": 0.3320170589164429, - "grad_norm": 0.4812847375869751, - "learning_rate": 3.804074074074074e-05, - "loss": 0.4836, - "step": 9459 - }, - { - "epoch": 0.33205215956755996, - "grad_norm": 0.5429162979125977, - "learning_rate": 3.803888888888889e-05, - "loss": 0.4195, - "step": 9460 - }, - { - "epoch": 0.33208726021867707, - "grad_norm": 0.4378207325935364, - "learning_rate": 3.803703703703704e-05, - "loss": 0.5523, - "step": 9461 - }, - { - "epoch": 0.33212236086979413, - "grad_norm": 0.4537898302078247, - "learning_rate": 3.8035185185185184e-05, - "loss": 0.611, - "step": 9462 - }, - { - "epoch": 0.3321574615209112, - "grad_norm": 0.521142303943634, - "learning_rate": 3.803333333333334e-05, - "loss": 0.472, - "step": 9463 - }, - { - "epoch": 0.3321925621720283, - "grad_norm": 0.37048712372779846, - "learning_rate": 3.8031481481481484e-05, - "loss": 0.4676, - "step": 9464 - }, - { - "epoch": 0.33222766282314536, - "grad_norm": 0.5702465176582336, - "learning_rate": 3.8029629629629634e-05, - "loss": 0.586, - "step": 9465 - }, - { - "epoch": 0.3322627634742624, - "grad_norm": 0.4621877074241638, - "learning_rate": 3.802777777777778e-05, - "loss": 0.4518, - "step": 9466 - }, - { - "epoch": 0.33229786412537954, - "grad_norm": 0.44138267636299133, - "learning_rate": 3.802592592592593e-05, - "loss": 0.469, - "step": 9467 - }, - { - "epoch": 0.3323329647764966, - "grad_norm": 0.42195823788642883, - "learning_rate": 3.802407407407408e-05, - "loss": 0.4933, - "step": 9468 - }, - { - "epoch": 0.33236806542761366, - "grad_norm": 0.45842477679252625, - "learning_rate": 3.802222222222223e-05, - "loss": 0.5068, - "step": 9469 - }, - { - "epoch": 0.33240316607873077, - "grad_norm": 0.5290613770484924, - "learning_rate": 3.802037037037037e-05, - "loss": 0.5239, - "step": 9470 - }, - { - "epoch": 0.33243826672984783, - "grad_norm": 0.44596943259239197, - "learning_rate": 3.801851851851852e-05, - "loss": 0.3654, - "step": 9471 - }, - { - "epoch": 0.3324733673809649, - "grad_norm": 0.399008184671402, - "learning_rate": 3.8016666666666665e-05, - "loss": 0.4159, - "step": 9472 - }, - { - "epoch": 0.332508468032082, - "grad_norm": 0.48730289936065674, - "learning_rate": 3.8014814814814815e-05, - "loss": 0.5418, - "step": 9473 - }, - { - "epoch": 0.33254356868319906, - "grad_norm": 0.44245341420173645, - "learning_rate": 3.8012962962962965e-05, - "loss": 0.6088, - "step": 9474 - }, - { - "epoch": 0.3325786693343162, - "grad_norm": 0.5017073154449463, - "learning_rate": 3.8011111111111115e-05, - "loss": 0.4264, - "step": 9475 - }, - { - "epoch": 0.33261376998543324, - "grad_norm": 0.5142520070075989, - "learning_rate": 3.800925925925926e-05, - "loss": 0.5187, - "step": 9476 - }, - { - "epoch": 0.3326488706365503, - "grad_norm": 0.484843373298645, - "learning_rate": 3.800740740740741e-05, - "loss": 0.4515, - "step": 9477 - }, - { - "epoch": 0.3326839712876674, - "grad_norm": 0.3985145390033722, - "learning_rate": 3.800555555555556e-05, - "loss": 0.4411, - "step": 9478 - }, - { - "epoch": 0.33271907193878447, - "grad_norm": 0.43154382705688477, - "learning_rate": 3.800370370370371e-05, - "loss": 0.4651, - "step": 9479 - }, - { - "epoch": 0.33275417258990153, - "grad_norm": 0.43901926279067993, - "learning_rate": 3.800185185185185e-05, - "loss": 0.439, - "step": 9480 - }, - { - "epoch": 0.33278927324101865, - "grad_norm": 0.39551159739494324, - "learning_rate": 3.8e-05, - "loss": 0.3906, - "step": 9481 - }, - { - "epoch": 0.3328243738921357, - "grad_norm": 0.3922595679759979, - "learning_rate": 3.799814814814815e-05, - "loss": 0.3391, - "step": 9482 - }, - { - "epoch": 0.33285947454325276, - "grad_norm": 0.578636646270752, - "learning_rate": 3.7996296296296296e-05, - "loss": 0.657, - "step": 9483 - }, - { - "epoch": 0.3328945751943699, - "grad_norm": 0.47449377179145813, - "learning_rate": 3.7994444444444446e-05, - "loss": 0.3016, - "step": 9484 - }, - { - "epoch": 0.33292967584548694, - "grad_norm": 0.41430166363716125, - "learning_rate": 3.7992592592592596e-05, - "loss": 0.4815, - "step": 9485 - }, - { - "epoch": 0.332964776496604, - "grad_norm": 0.4083324670791626, - "learning_rate": 3.7990740740740746e-05, - "loss": 0.4292, - "step": 9486 - }, - { - "epoch": 0.3329998771477211, - "grad_norm": 0.4300175607204437, - "learning_rate": 3.798888888888889e-05, - "loss": 0.4844, - "step": 9487 - }, - { - "epoch": 0.33303497779883817, - "grad_norm": 0.4493827223777771, - "learning_rate": 3.798703703703704e-05, - "loss": 0.4562, - "step": 9488 - }, - { - "epoch": 0.33307007844995523, - "grad_norm": 0.4650612473487854, - "learning_rate": 3.798518518518518e-05, - "loss": 0.6253, - "step": 9489 - }, - { - "epoch": 0.33310517910107235, - "grad_norm": 0.4624698758125305, - "learning_rate": 3.798333333333334e-05, - "loss": 0.367, - "step": 9490 - }, - { - "epoch": 0.3331402797521894, - "grad_norm": 0.4915560483932495, - "learning_rate": 3.798148148148148e-05, - "loss": 0.497, - "step": 9491 - }, - { - "epoch": 0.33317538040330646, - "grad_norm": 0.5170532464981079, - "learning_rate": 3.797962962962963e-05, - "loss": 0.5209, - "step": 9492 - }, - { - "epoch": 0.3332104810544236, - "grad_norm": 0.5161156058311462, - "learning_rate": 3.7977777777777776e-05, - "loss": 0.5382, - "step": 9493 - }, - { - "epoch": 0.33324558170554064, - "grad_norm": 0.42869460582733154, - "learning_rate": 3.7975925925925926e-05, - "loss": 0.5558, - "step": 9494 - }, - { - "epoch": 0.3332806823566577, - "grad_norm": 0.4677700996398926, - "learning_rate": 3.7974074074074077e-05, - "loss": 0.5019, - "step": 9495 - }, - { - "epoch": 0.3333157830077748, - "grad_norm": 0.44292208552360535, - "learning_rate": 3.797222222222223e-05, - "loss": 0.582, - "step": 9496 - }, - { - "epoch": 0.33335088365889187, - "grad_norm": 0.548804759979248, - "learning_rate": 3.797037037037037e-05, - "loss": 0.4657, - "step": 9497 - }, - { - "epoch": 0.33338598431000893, - "grad_norm": 0.5195844769477844, - "learning_rate": 3.796851851851852e-05, - "loss": 0.4569, - "step": 9498 - }, - { - "epoch": 0.33342108496112605, - "grad_norm": 0.5125408172607422, - "learning_rate": 3.796666666666667e-05, - "loss": 0.6192, - "step": 9499 - }, - { - "epoch": 0.3334561856122431, - "grad_norm": 0.5160167217254639, - "learning_rate": 3.7964814814814814e-05, - "loss": 0.4661, - "step": 9500 - } - ], - "logging_steps": 1, - "max_steps": 30000, - "num_input_tokens_seen": 0, - "num_train_epochs": 2, - "save_steps": 500, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 3.745194080771113e+17, - "train_batch_size": 4, - "trial_name": null, - "trial_params": null -}