diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,51822 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 7397, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0001351922264469793, + "grad_norm": 81.23384094238281, + "learning_rate": 0.0, + "loss": 0.75775146484375, + "step": 1 + }, + { + "epoch": 0.0002703844528939586, + "grad_norm": 62.586753845214844, + "learning_rate": 5.859375e-08, + "loss": 0.735595703125, + "step": 2 + }, + { + "epoch": 0.0004055766793409379, + "grad_norm": 64.03299713134766, + "learning_rate": 1.171875e-07, + "loss": 0.73968505859375, + "step": 3 + }, + { + "epoch": 0.0005407689057879172, + "grad_norm": 67.41014862060547, + "learning_rate": 1.7578125e-07, + "loss": 0.7408447265625, + "step": 4 + }, + { + "epoch": 0.0006759611322348965, + "grad_norm": 64.51225280761719, + "learning_rate": 2.34375e-07, + "loss": 0.73907470703125, + "step": 5 + }, + { + "epoch": 0.0008111533586818758, + "grad_norm": 71.63513946533203, + "learning_rate": 2.9296875000000003e-07, + "loss": 0.74359130859375, + "step": 6 + }, + { + "epoch": 0.0009463455851288551, + "grad_norm": 74.01216125488281, + "learning_rate": 3.515625e-07, + "loss": 0.745849609375, + "step": 7 + }, + { + "epoch": 0.0010815378115758344, + "grad_norm": 64.52039337158203, + "learning_rate": 4.1015625e-07, + "loss": 0.73797607421875, + "step": 8 + }, + { + "epoch": 0.0012167300380228137, + "grad_norm": 69.04621124267578, + "learning_rate": 4.6875e-07, + "loss": 0.740966796875, + "step": 9 + }, + { + "epoch": 0.001351922264469793, + "grad_norm": 67.63582611083984, + "learning_rate": 5.2734375e-07, + "loss": 0.73968505859375, + "step": 10 + }, + { + "epoch": 0.0014871144909167722, + "grad_norm": 72.65314483642578, + "learning_rate": 5.859375000000001e-07, + "loss": 0.74273681640625, + "step": 11 + }, + { + "epoch": 0.0016223067173637515, + "grad_norm": 70.06163024902344, + "learning_rate": 6.4453125e-07, + "loss": 0.7413330078125, + "step": 12 + }, + { + "epoch": 0.0017574989438107309, + "grad_norm": 69.63719940185547, + "learning_rate": 7.03125e-07, + "loss": 0.73553466796875, + "step": 13 + }, + { + "epoch": 0.0018926911702577102, + "grad_norm": 73.82935333251953, + "learning_rate": 7.6171875e-07, + "loss": 0.74273681640625, + "step": 14 + }, + { + "epoch": 0.0020278833967046894, + "grad_norm": 72.55986022949219, + "learning_rate": 8.203125e-07, + "loss": 0.73785400390625, + "step": 15 + }, + { + "epoch": 0.0021630756231516687, + "grad_norm": 74.7134017944336, + "learning_rate": 8.7890625e-07, + "loss": 0.73419189453125, + "step": 16 + }, + { + "epoch": 0.002298267849598648, + "grad_norm": 60.44881820678711, + "learning_rate": 9.375e-07, + "loss": 0.72503662109375, + "step": 17 + }, + { + "epoch": 0.0024334600760456274, + "grad_norm": 51.40937042236328, + "learning_rate": 9.9609375e-07, + "loss": 0.71600341796875, + "step": 18 + }, + { + "epoch": 0.0025686523024926067, + "grad_norm": 77.11557006835938, + "learning_rate": 1.0546875e-06, + "loss": 0.72540283203125, + "step": 19 + }, + { + "epoch": 0.002703844528939586, + "grad_norm": 54.883934020996094, + "learning_rate": 1.11328125e-06, + "loss": 0.71466064453125, + "step": 20 + }, + { + "epoch": 0.0028390367553865654, + "grad_norm": 47.21072769165039, + "learning_rate": 1.1718750000000001e-06, + "loss": 0.71234130859375, + "step": 21 + }, + { + "epoch": 0.0029742289818335444, + "grad_norm": 69.94409942626953, + "learning_rate": 1.23046875e-06, + "loss": 0.69549560546875, + "step": 22 + }, + { + "epoch": 0.0031094212082805237, + "grad_norm": 60.53618621826172, + "learning_rate": 1.2890625e-06, + "loss": 0.69573974609375, + "step": 23 + }, + { + "epoch": 0.003244613434727503, + "grad_norm": 60.381465911865234, + "learning_rate": 1.34765625e-06, + "loss": 0.6925048828125, + "step": 24 + }, + { + "epoch": 0.0033798056611744824, + "grad_norm": 51.19105911254883, + "learning_rate": 1.40625e-06, + "loss": 0.67572021484375, + "step": 25 + }, + { + "epoch": 0.0035149978876214617, + "grad_norm": 52.668724060058594, + "learning_rate": 1.46484375e-06, + "loss": 0.6717529296875, + "step": 26 + }, + { + "epoch": 0.003650190114068441, + "grad_norm": 52.021339416503906, + "learning_rate": 1.5234375e-06, + "loss": 0.67193603515625, + "step": 27 + }, + { + "epoch": 0.0037853823405154204, + "grad_norm": 55.213294982910156, + "learning_rate": 1.5820312500000001e-06, + "loss": 0.6666259765625, + "step": 28 + }, + { + "epoch": 0.003920574566962399, + "grad_norm": 49.64873123168945, + "learning_rate": 1.640625e-06, + "loss": 0.666748046875, + "step": 29 + }, + { + "epoch": 0.004055766793409379, + "grad_norm": 47.42051696777344, + "learning_rate": 1.69921875e-06, + "loss": 0.6123046875, + "step": 30 + }, + { + "epoch": 0.004190959019856358, + "grad_norm": 47.74208068847656, + "learning_rate": 1.7578125e-06, + "loss": 0.6053466796875, + "step": 31 + }, + { + "epoch": 0.004326151246303337, + "grad_norm": 40.186519622802734, + "learning_rate": 1.81640625e-06, + "loss": 0.6141357421875, + "step": 32 + }, + { + "epoch": 0.004461343472750317, + "grad_norm": 40.875267028808594, + "learning_rate": 1.875e-06, + "loss": 0.60552978515625, + "step": 33 + }, + { + "epoch": 0.004596535699197296, + "grad_norm": 41.25675964355469, + "learning_rate": 1.93359375e-06, + "loss": 0.58428955078125, + "step": 34 + }, + { + "epoch": 0.0047317279256442754, + "grad_norm": 41.89238357543945, + "learning_rate": 1.9921875e-06, + "loss": 0.572235107421875, + "step": 35 + }, + { + "epoch": 0.004866920152091255, + "grad_norm": 49.741817474365234, + "learning_rate": 2.05078125e-06, + "loss": 0.54248046875, + "step": 36 + }, + { + "epoch": 0.005002112378538234, + "grad_norm": 39.604393005371094, + "learning_rate": 2.109375e-06, + "loss": 0.560882568359375, + "step": 37 + }, + { + "epoch": 0.0051373046049852135, + "grad_norm": 36.70024871826172, + "learning_rate": 2.16796875e-06, + "loss": 0.573028564453125, + "step": 38 + }, + { + "epoch": 0.005272496831432193, + "grad_norm": 31.933439254760742, + "learning_rate": 2.2265625e-06, + "loss": 0.562408447265625, + "step": 39 + }, + { + "epoch": 0.005407689057879172, + "grad_norm": 26.103178024291992, + "learning_rate": 2.28515625e-06, + "loss": 0.54730224609375, + "step": 40 + }, + { + "epoch": 0.0055428812843261515, + "grad_norm": 17.096759796142578, + "learning_rate": 2.3437500000000002e-06, + "loss": 0.533477783203125, + "step": 41 + }, + { + "epoch": 0.005678073510773131, + "grad_norm": 17.932605743408203, + "learning_rate": 2.40234375e-06, + "loss": 0.50421142578125, + "step": 42 + }, + { + "epoch": 0.00581326573722011, + "grad_norm": 14.606308937072754, + "learning_rate": 2.4609375e-06, + "loss": 0.51214599609375, + "step": 43 + }, + { + "epoch": 0.005948457963667089, + "grad_norm": 19.89915657043457, + "learning_rate": 2.5195312500000003e-06, + "loss": 0.4691162109375, + "step": 44 + }, + { + "epoch": 0.006083650190114068, + "grad_norm": 14.97246265411377, + "learning_rate": 2.578125e-06, + "loss": 0.490631103515625, + "step": 45 + }, + { + "epoch": 0.006218842416561047, + "grad_norm": 13.272928237915039, + "learning_rate": 2.63671875e-06, + "loss": 0.48687744140625, + "step": 46 + }, + { + "epoch": 0.006354034643008027, + "grad_norm": 17.15617561340332, + "learning_rate": 2.6953125e-06, + "loss": 0.451324462890625, + "step": 47 + }, + { + "epoch": 0.006489226869455006, + "grad_norm": 18.87810707092285, + "learning_rate": 2.75390625e-06, + "loss": 0.427703857421875, + "step": 48 + }, + { + "epoch": 0.0066244190959019855, + "grad_norm": 4.2861738204956055, + "learning_rate": 2.8125e-06, + "loss": 0.51715087890625, + "step": 49 + }, + { + "epoch": 0.006759611322348965, + "grad_norm": 1.9247297048568726, + "learning_rate": 2.87109375e-06, + "loss": 0.525146484375, + "step": 50 + }, + { + "epoch": 0.006894803548795944, + "grad_norm": 6.371675491333008, + "learning_rate": 2.9296875e-06, + "loss": 0.48345947265625, + "step": 51 + }, + { + "epoch": 0.0070299957752429235, + "grad_norm": 2.8637478351593018, + "learning_rate": 2.9882812500000002e-06, + "loss": 0.496612548828125, + "step": 52 + }, + { + "epoch": 0.007165188001689903, + "grad_norm": 8.740577697753906, + "learning_rate": 3.046875e-06, + "loss": 0.4402313232421875, + "step": 53 + }, + { + "epoch": 0.007300380228136882, + "grad_norm": 4.226352691650391, + "learning_rate": 3.10546875e-06, + "loss": 0.462860107421875, + "step": 54 + }, + { + "epoch": 0.0074355724545838615, + "grad_norm": 5.924042224884033, + "learning_rate": 3.1640625000000003e-06, + "loss": 0.5353546142578125, + "step": 55 + }, + { + "epoch": 0.007570764681030841, + "grad_norm": 6.777284145355225, + "learning_rate": 3.22265625e-06, + "loss": 0.42047119140625, + "step": 56 + }, + { + "epoch": 0.00770595690747782, + "grad_norm": 6.5702080726623535, + "learning_rate": 3.28125e-06, + "loss": 0.4191436767578125, + "step": 57 + }, + { + "epoch": 0.007841149133924799, + "grad_norm": 1.8501906394958496, + "learning_rate": 3.3398437500000003e-06, + "loss": 0.4581298828125, + "step": 58 + }, + { + "epoch": 0.007976341360371779, + "grad_norm": 9.259123802185059, + "learning_rate": 3.3984375e-06, + "loss": 0.5446319580078125, + "step": 59 + }, + { + "epoch": 0.008111533586818757, + "grad_norm": 4.78719425201416, + "learning_rate": 3.45703125e-06, + "loss": 0.518890380859375, + "step": 60 + }, + { + "epoch": 0.008246725813265738, + "grad_norm": 11.670870780944824, + "learning_rate": 3.515625e-06, + "loss": 0.3968658447265625, + "step": 61 + }, + { + "epoch": 0.008381918039712716, + "grad_norm": 14.914684295654297, + "learning_rate": 3.57421875e-06, + "loss": 0.360321044921875, + "step": 62 + }, + { + "epoch": 0.008517110266159696, + "grad_norm": 11.149032592773438, + "learning_rate": 3.6328125e-06, + "loss": 0.569610595703125, + "step": 63 + }, + { + "epoch": 0.008652302492606675, + "grad_norm": 9.514175415039062, + "learning_rate": 3.69140625e-06, + "loss": 0.3821563720703125, + "step": 64 + }, + { + "epoch": 0.008787494719053655, + "grad_norm": 4.481330394744873, + "learning_rate": 3.75e-06, + "loss": 0.4743499755859375, + "step": 65 + }, + { + "epoch": 0.008922686945500634, + "grad_norm": 5.4768218994140625, + "learning_rate": 3.8085937500000002e-06, + "loss": 0.469268798828125, + "step": 66 + }, + { + "epoch": 0.009057879171947614, + "grad_norm": 7.203653335571289, + "learning_rate": 3.8671875e-06, + "loss": 0.4289093017578125, + "step": 67 + }, + { + "epoch": 0.009193071398394592, + "grad_norm": 5.832691669464111, + "learning_rate": 3.92578125e-06, + "loss": 0.456634521484375, + "step": 68 + }, + { + "epoch": 0.009328263624841572, + "grad_norm": 9.66999340057373, + "learning_rate": 3.984375e-06, + "loss": 0.3985137939453125, + "step": 69 + }, + { + "epoch": 0.009463455851288551, + "grad_norm": 6.045322418212891, + "learning_rate": 4.0429687500000004e-06, + "loss": 0.438232421875, + "step": 70 + }, + { + "epoch": 0.00959864807773553, + "grad_norm": 9.853673934936523, + "learning_rate": 4.1015625e-06, + "loss": 0.40960693359375, + "step": 71 + }, + { + "epoch": 0.00973384030418251, + "grad_norm": 14.634685516357422, + "learning_rate": 4.16015625e-06, + "loss": 0.6088104248046875, + "step": 72 + }, + { + "epoch": 0.009869032530629488, + "grad_norm": 20.060110092163086, + "learning_rate": 4.21875e-06, + "loss": 0.460662841796875, + "step": 73 + }, + { + "epoch": 0.010004224757076468, + "grad_norm": 12.174761772155762, + "learning_rate": 4.27734375e-06, + "loss": 0.542572021484375, + "step": 74 + }, + { + "epoch": 0.010139416983523447, + "grad_norm": 9.763131141662598, + "learning_rate": 4.3359375e-06, + "loss": 0.401763916015625, + "step": 75 + }, + { + "epoch": 0.010274609209970427, + "grad_norm": 3.679368257522583, + "learning_rate": 4.3945312500000005e-06, + "loss": 0.4951019287109375, + "step": 76 + }, + { + "epoch": 0.010409801436417405, + "grad_norm": 5.807453632354736, + "learning_rate": 4.453125e-06, + "loss": 0.485809326171875, + "step": 77 + }, + { + "epoch": 0.010544993662864386, + "grad_norm": 9.901314735412598, + "learning_rate": 4.51171875e-06, + "loss": 0.46160888671875, + "step": 78 + }, + { + "epoch": 0.010680185889311364, + "grad_norm": 8.818758964538574, + "learning_rate": 4.5703125e-06, + "loss": 0.4622344970703125, + "step": 79 + }, + { + "epoch": 0.010815378115758344, + "grad_norm": 13.558056831359863, + "learning_rate": 4.62890625e-06, + "loss": 0.480987548828125, + "step": 80 + }, + { + "epoch": 0.010950570342205323, + "grad_norm": 7.493067741394043, + "learning_rate": 4.6875000000000004e-06, + "loss": 0.4514007568359375, + "step": 81 + }, + { + "epoch": 0.011085762568652303, + "grad_norm": 7.834639549255371, + "learning_rate": 4.74609375e-06, + "loss": 0.3695068359375, + "step": 82 + }, + { + "epoch": 0.011220954795099282, + "grad_norm": 8.9389009475708, + "learning_rate": 4.8046875e-06, + "loss": 0.5247650146484375, + "step": 83 + }, + { + "epoch": 0.011356147021546262, + "grad_norm": 7.8822102546691895, + "learning_rate": 4.86328125e-06, + "loss": 0.37096405029296875, + "step": 84 + }, + { + "epoch": 0.01149133924799324, + "grad_norm": 8.921436309814453, + "learning_rate": 4.921875e-06, + "loss": 0.469696044921875, + "step": 85 + }, + { + "epoch": 0.01162653147444022, + "grad_norm": 13.998419761657715, + "learning_rate": 4.98046875e-06, + "loss": 0.4219207763671875, + "step": 86 + }, + { + "epoch": 0.011761723700887199, + "grad_norm": 20.839725494384766, + "learning_rate": 5.0390625000000005e-06, + "loss": 0.273895263671875, + "step": 87 + }, + { + "epoch": 0.011896915927334177, + "grad_norm": 14.408513069152832, + "learning_rate": 5.09765625e-06, + "loss": 0.4432525634765625, + "step": 88 + }, + { + "epoch": 0.012032108153781158, + "grad_norm": 7.540858268737793, + "learning_rate": 5.15625e-06, + "loss": 0.403533935546875, + "step": 89 + }, + { + "epoch": 0.012167300380228136, + "grad_norm": 11.975055694580078, + "learning_rate": 5.21484375e-06, + "loss": 0.4277801513671875, + "step": 90 + }, + { + "epoch": 0.012302492606675116, + "grad_norm": 13.353858947753906, + "learning_rate": 5.2734375e-06, + "loss": 0.39711761474609375, + "step": 91 + }, + { + "epoch": 0.012437684833122095, + "grad_norm": 16.96271514892578, + "learning_rate": 5.3320312500000004e-06, + "loss": 0.3714141845703125, + "step": 92 + }, + { + "epoch": 0.012572877059569075, + "grad_norm": 10.64302921295166, + "learning_rate": 5.390625e-06, + "loss": 0.443603515625, + "step": 93 + }, + { + "epoch": 0.012708069286016054, + "grad_norm": 13.161224365234375, + "learning_rate": 5.44921875e-06, + "loss": 0.3780975341796875, + "step": 94 + }, + { + "epoch": 0.012843261512463034, + "grad_norm": 11.99887752532959, + "learning_rate": 5.5078125e-06, + "loss": 0.3973236083984375, + "step": 95 + }, + { + "epoch": 0.012978453738910012, + "grad_norm": 12.698922157287598, + "learning_rate": 5.56640625e-06, + "loss": 0.4170989990234375, + "step": 96 + }, + { + "epoch": 0.013113645965356992, + "grad_norm": 12.280120849609375, + "learning_rate": 5.625e-06, + "loss": 0.390411376953125, + "step": 97 + }, + { + "epoch": 0.013248838191803971, + "grad_norm": 10.428301811218262, + "learning_rate": 5.6835937500000005e-06, + "loss": 0.32039642333984375, + "step": 98 + }, + { + "epoch": 0.013384030418250951, + "grad_norm": 13.72142219543457, + "learning_rate": 5.7421875e-06, + "loss": 0.40573883056640625, + "step": 99 + }, + { + "epoch": 0.01351922264469793, + "grad_norm": 11.776626586914062, + "learning_rate": 5.80078125e-06, + "loss": 0.29107666015625, + "step": 100 + }, + { + "epoch": 0.01365441487114491, + "grad_norm": 9.963550567626953, + "learning_rate": 5.859375e-06, + "loss": 0.466705322265625, + "step": 101 + }, + { + "epoch": 0.013789607097591888, + "grad_norm": 12.714323043823242, + "learning_rate": 5.91796875e-06, + "loss": 0.348602294921875, + "step": 102 + }, + { + "epoch": 0.013924799324038869, + "grad_norm": 9.004317283630371, + "learning_rate": 5.9765625000000004e-06, + "loss": 0.32762908935546875, + "step": 103 + }, + { + "epoch": 0.014059991550485847, + "grad_norm": 6.390115737915039, + "learning_rate": 6.03515625e-06, + "loss": 0.367767333984375, + "step": 104 + }, + { + "epoch": 0.014195183776932827, + "grad_norm": 5.2753520011901855, + "learning_rate": 6.09375e-06, + "loss": 0.340789794921875, + "step": 105 + }, + { + "epoch": 0.014330376003379806, + "grad_norm": 19.965015411376953, + "learning_rate": 6.15234375e-06, + "loss": 0.4667205810546875, + "step": 106 + }, + { + "epoch": 0.014465568229826784, + "grad_norm": 7.3258466720581055, + "learning_rate": 6.2109375e-06, + "loss": 0.25698089599609375, + "step": 107 + }, + { + "epoch": 0.014600760456273764, + "grad_norm": 7.896075248718262, + "learning_rate": 6.26953125e-06, + "loss": 0.407379150390625, + "step": 108 + }, + { + "epoch": 0.014735952682720743, + "grad_norm": 14.004000663757324, + "learning_rate": 6.3281250000000005e-06, + "loss": 0.39601898193359375, + "step": 109 + }, + { + "epoch": 0.014871144909167723, + "grad_norm": 14.068887710571289, + "learning_rate": 6.38671875e-06, + "loss": 0.31365203857421875, + "step": 110 + }, + { + "epoch": 0.015006337135614702, + "grad_norm": 4.65051794052124, + "learning_rate": 6.4453125e-06, + "loss": 0.426300048828125, + "step": 111 + }, + { + "epoch": 0.015141529362061682, + "grad_norm": 19.288307189941406, + "learning_rate": 6.50390625e-06, + "loss": 0.362030029296875, + "step": 112 + }, + { + "epoch": 0.01527672158850866, + "grad_norm": 17.15165138244629, + "learning_rate": 6.5625e-06, + "loss": 0.2597808837890625, + "step": 113 + }, + { + "epoch": 0.01541191381495564, + "grad_norm": 9.220398902893066, + "learning_rate": 6.6210937500000004e-06, + "loss": 0.4727783203125, + "step": 114 + }, + { + "epoch": 0.015547106041402619, + "grad_norm": 16.606584548950195, + "learning_rate": 6.679687500000001e-06, + "loss": 0.34545135498046875, + "step": 115 + }, + { + "epoch": 0.015682298267849597, + "grad_norm": 12.121626853942871, + "learning_rate": 6.73828125e-06, + "loss": 0.32347869873046875, + "step": 116 + }, + { + "epoch": 0.01581749049429658, + "grad_norm": 9.206586837768555, + "learning_rate": 6.796875e-06, + "loss": 0.2846336364746094, + "step": 117 + }, + { + "epoch": 0.015952682720743558, + "grad_norm": 11.462008476257324, + "learning_rate": 6.85546875e-06, + "loss": 0.44615936279296875, + "step": 118 + }, + { + "epoch": 0.016087874947190536, + "grad_norm": 7.873308181762695, + "learning_rate": 6.9140625e-06, + "loss": 0.317413330078125, + "step": 119 + }, + { + "epoch": 0.016223067173637515, + "grad_norm": 11.945247650146484, + "learning_rate": 6.9726562500000005e-06, + "loss": 0.41915130615234375, + "step": 120 + }, + { + "epoch": 0.016358259400084497, + "grad_norm": 8.758480072021484, + "learning_rate": 7.03125e-06, + "loss": 0.41268157958984375, + "step": 121 + }, + { + "epoch": 0.016493451626531475, + "grad_norm": 16.99803352355957, + "learning_rate": 7.08984375e-06, + "loss": 0.33650970458984375, + "step": 122 + }, + { + "epoch": 0.016628643852978454, + "grad_norm": 12.159713745117188, + "learning_rate": 7.1484375e-06, + "loss": 0.431549072265625, + "step": 123 + }, + { + "epoch": 0.016763836079425432, + "grad_norm": 8.628973007202148, + "learning_rate": 7.20703125e-06, + "loss": 0.3750762939453125, + "step": 124 + }, + { + "epoch": 0.01689902830587241, + "grad_norm": 15.568916320800781, + "learning_rate": 7.265625e-06, + "loss": 0.352691650390625, + "step": 125 + }, + { + "epoch": 0.017034220532319393, + "grad_norm": 32.42605972290039, + "learning_rate": 7.3242187500000006e-06, + "loss": 0.4794044494628906, + "step": 126 + }, + { + "epoch": 0.01716941275876637, + "grad_norm": 8.526758193969727, + "learning_rate": 7.3828125e-06, + "loss": 0.3136444091796875, + "step": 127 + }, + { + "epoch": 0.01730460498521335, + "grad_norm": 12.648958206176758, + "learning_rate": 7.44140625e-06, + "loss": 0.272247314453125, + "step": 128 + }, + { + "epoch": 0.017439797211660328, + "grad_norm": 9.088125228881836, + "learning_rate": 7.5e-06, + "loss": 0.25101470947265625, + "step": 129 + }, + { + "epoch": 0.01757498943810731, + "grad_norm": 10.947595596313477, + "learning_rate": 7.55859375e-06, + "loss": 0.33487701416015625, + "step": 130 + }, + { + "epoch": 0.01771018166455429, + "grad_norm": 8.987068176269531, + "learning_rate": 7.6171875000000005e-06, + "loss": 0.44268798828125, + "step": 131 + }, + { + "epoch": 0.017845373891001267, + "grad_norm": 10.176579475402832, + "learning_rate": 7.67578125e-06, + "loss": 0.39456939697265625, + "step": 132 + }, + { + "epoch": 0.017980566117448245, + "grad_norm": 4.725843906402588, + "learning_rate": 7.734375e-06, + "loss": 0.39962005615234375, + "step": 133 + }, + { + "epoch": 0.018115758343895227, + "grad_norm": 5.729996681213379, + "learning_rate": 7.792968750000001e-06, + "loss": 0.4094390869140625, + "step": 134 + }, + { + "epoch": 0.018250950570342206, + "grad_norm": 6.760279655456543, + "learning_rate": 7.8515625e-06, + "loss": 0.34267425537109375, + "step": 135 + }, + { + "epoch": 0.018386142796789184, + "grad_norm": 6.717825412750244, + "learning_rate": 7.91015625e-06, + "loss": 0.38262939453125, + "step": 136 + }, + { + "epoch": 0.018521335023236163, + "grad_norm": 8.859396934509277, + "learning_rate": 7.96875e-06, + "loss": 0.4053955078125, + "step": 137 + }, + { + "epoch": 0.018656527249683145, + "grad_norm": 13.363924980163574, + "learning_rate": 8.02734375e-06, + "loss": 0.37969207763671875, + "step": 138 + }, + { + "epoch": 0.018791719476130123, + "grad_norm": 7.235777854919434, + "learning_rate": 8.085937500000001e-06, + "loss": 0.32489013671875, + "step": 139 + }, + { + "epoch": 0.018926911702577102, + "grad_norm": 6.2245564460754395, + "learning_rate": 8.14453125e-06, + "loss": 0.2818946838378906, + "step": 140 + }, + { + "epoch": 0.01906210392902408, + "grad_norm": 3.3765194416046143, + "learning_rate": 8.203125e-06, + "loss": 0.3400154113769531, + "step": 141 + }, + { + "epoch": 0.01919729615547106, + "grad_norm": 3.3864426612854004, + "learning_rate": 8.26171875e-06, + "loss": 0.372314453125, + "step": 142 + }, + { + "epoch": 0.01933248838191804, + "grad_norm": 7.222830295562744, + "learning_rate": 8.3203125e-06, + "loss": 0.3372230529785156, + "step": 143 + }, + { + "epoch": 0.01946768060836502, + "grad_norm": 9.870015144348145, + "learning_rate": 8.37890625e-06, + "loss": 0.3125762939453125, + "step": 144 + }, + { + "epoch": 0.019602872834811998, + "grad_norm": 3.844804525375366, + "learning_rate": 8.4375e-06, + "loss": 0.40564727783203125, + "step": 145 + }, + { + "epoch": 0.019738065061258976, + "grad_norm": 10.666752815246582, + "learning_rate": 8.49609375e-06, + "loss": 0.399871826171875, + "step": 146 + }, + { + "epoch": 0.019873257287705958, + "grad_norm": 20.025733947753906, + "learning_rate": 8.5546875e-06, + "loss": 0.29644775390625, + "step": 147 + }, + { + "epoch": 0.020008449514152937, + "grad_norm": 5.059605598449707, + "learning_rate": 8.61328125e-06, + "loss": 0.3897705078125, + "step": 148 + }, + { + "epoch": 0.020143641740599915, + "grad_norm": 12.984992980957031, + "learning_rate": 8.671875e-06, + "loss": 0.32715606689453125, + "step": 149 + }, + { + "epoch": 0.020278833967046894, + "grad_norm": 4.818148612976074, + "learning_rate": 8.73046875e-06, + "loss": 0.3247337341308594, + "step": 150 + }, + { + "epoch": 0.020414026193493875, + "grad_norm": 4.039071083068848, + "learning_rate": 8.789062500000001e-06, + "loss": 0.34757232666015625, + "step": 151 + }, + { + "epoch": 0.020549218419940854, + "grad_norm": 5.584234237670898, + "learning_rate": 8.84765625e-06, + "loss": 0.2400054931640625, + "step": 152 + }, + { + "epoch": 0.020684410646387832, + "grad_norm": 5.080233573913574, + "learning_rate": 8.90625e-06, + "loss": 0.26808929443359375, + "step": 153 + }, + { + "epoch": 0.02081960287283481, + "grad_norm": 9.186685562133789, + "learning_rate": 8.96484375e-06, + "loss": 0.3578472137451172, + "step": 154 + }, + { + "epoch": 0.020954795099281793, + "grad_norm": 8.834890365600586, + "learning_rate": 9.0234375e-06, + "loss": 0.32218170166015625, + "step": 155 + }, + { + "epoch": 0.02108998732572877, + "grad_norm": 29.870014190673828, + "learning_rate": 9.082031250000001e-06, + "loss": 0.430694580078125, + "step": 156 + }, + { + "epoch": 0.02122517955217575, + "grad_norm": 22.970182418823242, + "learning_rate": 9.140625e-06, + "loss": 0.45813751220703125, + "step": 157 + }, + { + "epoch": 0.02136037177862273, + "grad_norm": 17.33034896850586, + "learning_rate": 9.19921875e-06, + "loss": 0.31902313232421875, + "step": 158 + }, + { + "epoch": 0.021495564005069707, + "grad_norm": 8.452795028686523, + "learning_rate": 9.2578125e-06, + "loss": 0.3018226623535156, + "step": 159 + }, + { + "epoch": 0.02163075623151669, + "grad_norm": 11.49000358581543, + "learning_rate": 9.31640625e-06, + "loss": 0.277069091796875, + "step": 160 + }, + { + "epoch": 0.021765948457963667, + "grad_norm": 12.22887134552002, + "learning_rate": 9.375000000000001e-06, + "loss": 0.2909584045410156, + "step": 161 + }, + { + "epoch": 0.021901140684410646, + "grad_norm": 16.935884475708008, + "learning_rate": 9.43359375e-06, + "loss": 0.3690185546875, + "step": 162 + }, + { + "epoch": 0.022036332910857624, + "grad_norm": 19.534664154052734, + "learning_rate": 9.4921875e-06, + "loss": 0.44385528564453125, + "step": 163 + }, + { + "epoch": 0.022171525137304606, + "grad_norm": 11.166132926940918, + "learning_rate": 9.55078125e-06, + "loss": 0.3913116455078125, + "step": 164 + }, + { + "epoch": 0.022306717363751585, + "grad_norm": 7.799185752868652, + "learning_rate": 9.609375e-06, + "loss": 0.2675018310546875, + "step": 165 + }, + { + "epoch": 0.022441909590198563, + "grad_norm": 7.301191806793213, + "learning_rate": 9.66796875e-06, + "loss": 0.23796844482421875, + "step": 166 + }, + { + "epoch": 0.02257710181664554, + "grad_norm": 12.863231658935547, + "learning_rate": 9.7265625e-06, + "loss": 0.417205810546875, + "step": 167 + }, + { + "epoch": 0.022712294043092524, + "grad_norm": 5.220954895019531, + "learning_rate": 9.78515625e-06, + "loss": 0.29888153076171875, + "step": 168 + }, + { + "epoch": 0.022847486269539502, + "grad_norm": 6.2648539543151855, + "learning_rate": 9.84375e-06, + "loss": 0.3386878967285156, + "step": 169 + }, + { + "epoch": 0.02298267849598648, + "grad_norm": 11.405940055847168, + "learning_rate": 9.90234375e-06, + "loss": 0.36103057861328125, + "step": 170 + }, + { + "epoch": 0.02311787072243346, + "grad_norm": 13.143427848815918, + "learning_rate": 9.9609375e-06, + "loss": 0.4598541259765625, + "step": 171 + }, + { + "epoch": 0.02325306294888044, + "grad_norm": 9.1638822555542, + "learning_rate": 1.001953125e-05, + "loss": 0.413787841796875, + "step": 172 + }, + { + "epoch": 0.02338825517532742, + "grad_norm": 14.425140380859375, + "learning_rate": 1.0078125000000001e-05, + "loss": 0.3013954162597656, + "step": 173 + }, + { + "epoch": 0.023523447401774398, + "grad_norm": 7.103453636169434, + "learning_rate": 1.013671875e-05, + "loss": 0.3523521423339844, + "step": 174 + }, + { + "epoch": 0.023658639628221376, + "grad_norm": 7.764557838439941, + "learning_rate": 1.01953125e-05, + "loss": 0.2942028045654297, + "step": 175 + }, + { + "epoch": 0.023793831854668355, + "grad_norm": 9.54080867767334, + "learning_rate": 1.025390625e-05, + "loss": 0.4434051513671875, + "step": 176 + }, + { + "epoch": 0.023929024081115337, + "grad_norm": 9.157366752624512, + "learning_rate": 1.03125e-05, + "loss": 0.31499481201171875, + "step": 177 + }, + { + "epoch": 0.024064216307562315, + "grad_norm": 7.387535572052002, + "learning_rate": 1.0371093750000001e-05, + "loss": 0.3870658874511719, + "step": 178 + }, + { + "epoch": 0.024199408534009294, + "grad_norm": 9.071150779724121, + "learning_rate": 1.04296875e-05, + "loss": 0.29233551025390625, + "step": 179 + }, + { + "epoch": 0.024334600760456272, + "grad_norm": 8.732147216796875, + "learning_rate": 1.048828125e-05, + "loss": 0.3111305236816406, + "step": 180 + }, + { + "epoch": 0.024469792986903254, + "grad_norm": 3.857217788696289, + "learning_rate": 1.0546875e-05, + "loss": 0.33007049560546875, + "step": 181 + }, + { + "epoch": 0.024604985213350233, + "grad_norm": 4.270830154418945, + "learning_rate": 1.060546875e-05, + "loss": 0.3454132080078125, + "step": 182 + }, + { + "epoch": 0.02474017743979721, + "grad_norm": 4.961788654327393, + "learning_rate": 1.0664062500000001e-05, + "loss": 0.4149169921875, + "step": 183 + }, + { + "epoch": 0.02487536966624419, + "grad_norm": 9.476563453674316, + "learning_rate": 1.072265625e-05, + "loss": 0.34647369384765625, + "step": 184 + }, + { + "epoch": 0.02501056189269117, + "grad_norm": 8.641980171203613, + "learning_rate": 1.078125e-05, + "loss": 0.38382720947265625, + "step": 185 + }, + { + "epoch": 0.02514575411913815, + "grad_norm": 17.837678909301758, + "learning_rate": 1.083984375e-05, + "loss": 0.31372833251953125, + "step": 186 + }, + { + "epoch": 0.02528094634558513, + "grad_norm": 6.171658039093018, + "learning_rate": 1.08984375e-05, + "loss": 0.24570274353027344, + "step": 187 + }, + { + "epoch": 0.025416138572032107, + "grad_norm": 45.034725189208984, + "learning_rate": 1.095703125e-05, + "loss": 0.4607353210449219, + "step": 188 + }, + { + "epoch": 0.02555133079847909, + "grad_norm": 29.19608497619629, + "learning_rate": 1.1015625e-05, + "loss": 0.3447914123535156, + "step": 189 + }, + { + "epoch": 0.025686523024926067, + "grad_norm": 9.654741287231445, + "learning_rate": 1.1074218750000001e-05, + "loss": 0.34331512451171875, + "step": 190 + }, + { + "epoch": 0.025821715251373046, + "grad_norm": 27.05508041381836, + "learning_rate": 1.11328125e-05, + "loss": 0.33905029296875, + "step": 191 + }, + { + "epoch": 0.025956907477820024, + "grad_norm": 19.2808837890625, + "learning_rate": 1.119140625e-05, + "loss": 0.32193756103515625, + "step": 192 + }, + { + "epoch": 0.026092099704267006, + "grad_norm": 6.524961471557617, + "learning_rate": 1.125e-05, + "loss": 0.2579345703125, + "step": 193 + }, + { + "epoch": 0.026227291930713985, + "grad_norm": 26.02958106994629, + "learning_rate": 1.130859375e-05, + "loss": 0.3294353485107422, + "step": 194 + }, + { + "epoch": 0.026362484157160963, + "grad_norm": 9.47587776184082, + "learning_rate": 1.1367187500000001e-05, + "loss": 0.30571746826171875, + "step": 195 + }, + { + "epoch": 0.026497676383607942, + "grad_norm": 8.048364639282227, + "learning_rate": 1.142578125e-05, + "loss": 0.2755298614501953, + "step": 196 + }, + { + "epoch": 0.02663286861005492, + "grad_norm": 17.279748916625977, + "learning_rate": 1.1484375e-05, + "loss": 0.2881507873535156, + "step": 197 + }, + { + "epoch": 0.026768060836501902, + "grad_norm": 9.93814468383789, + "learning_rate": 1.154296875e-05, + "loss": 0.3548431396484375, + "step": 198 + }, + { + "epoch": 0.02690325306294888, + "grad_norm": 9.75289535522461, + "learning_rate": 1.16015625e-05, + "loss": 0.25585174560546875, + "step": 199 + }, + { + "epoch": 0.02703844528939586, + "grad_norm": 21.917634963989258, + "learning_rate": 1.1660156250000001e-05, + "loss": 0.3770751953125, + "step": 200 + }, + { + "epoch": 0.027173637515842838, + "grad_norm": 10.445472717285156, + "learning_rate": 1.171875e-05, + "loss": 0.33472442626953125, + "step": 201 + }, + { + "epoch": 0.02730882974228982, + "grad_norm": 28.41674041748047, + "learning_rate": 1.177734375e-05, + "loss": 0.3972015380859375, + "step": 202 + }, + { + "epoch": 0.027444021968736798, + "grad_norm": 16.933401107788086, + "learning_rate": 1.18359375e-05, + "loss": 0.2882823944091797, + "step": 203 + }, + { + "epoch": 0.027579214195183777, + "grad_norm": 8.066474914550781, + "learning_rate": 1.189453125e-05, + "loss": 0.28705596923828125, + "step": 204 + }, + { + "epoch": 0.027714406421630755, + "grad_norm": 8.025276184082031, + "learning_rate": 1.1953125000000001e-05, + "loss": 0.3666839599609375, + "step": 205 + }, + { + "epoch": 0.027849598648077737, + "grad_norm": 13.482354164123535, + "learning_rate": 1.201171875e-05, + "loss": 0.283447265625, + "step": 206 + }, + { + "epoch": 0.027984790874524716, + "grad_norm": 8.359320640563965, + "learning_rate": 1.20703125e-05, + "loss": 0.331634521484375, + "step": 207 + }, + { + "epoch": 0.028119983100971694, + "grad_norm": 14.367286682128906, + "learning_rate": 1.212890625e-05, + "loss": 0.3609771728515625, + "step": 208 + }, + { + "epoch": 0.028255175327418672, + "grad_norm": 17.017526626586914, + "learning_rate": 1.21875e-05, + "loss": 0.3172340393066406, + "step": 209 + }, + { + "epoch": 0.028390367553865654, + "grad_norm": 12.142645835876465, + "learning_rate": 1.224609375e-05, + "loss": 0.393798828125, + "step": 210 + }, + { + "epoch": 0.028525559780312633, + "grad_norm": 10.331498146057129, + "learning_rate": 1.23046875e-05, + "loss": 0.2973442077636719, + "step": 211 + }, + { + "epoch": 0.02866075200675961, + "grad_norm": 15.544872283935547, + "learning_rate": 1.2363281250000001e-05, + "loss": 0.4059600830078125, + "step": 212 + }, + { + "epoch": 0.02879594423320659, + "grad_norm": 6.606779098510742, + "learning_rate": 1.2421875e-05, + "loss": 0.3096923828125, + "step": 213 + }, + { + "epoch": 0.02893113645965357, + "grad_norm": 5.737157821655273, + "learning_rate": 1.248046875e-05, + "loss": 0.32675933837890625, + "step": 214 + }, + { + "epoch": 0.02906632868610055, + "grad_norm": 5.541354656219482, + "learning_rate": 1.25390625e-05, + "loss": 0.2597007751464844, + "step": 215 + }, + { + "epoch": 0.02920152091254753, + "grad_norm": 5.784581661224365, + "learning_rate": 1.259765625e-05, + "loss": 0.23841476440429688, + "step": 216 + }, + { + "epoch": 0.029336713138994507, + "grad_norm": 9.065890312194824, + "learning_rate": 1.2656250000000001e-05, + "loss": 0.2053375244140625, + "step": 217 + }, + { + "epoch": 0.029471905365441486, + "grad_norm": 5.764812469482422, + "learning_rate": 1.271484375e-05, + "loss": 0.3548126220703125, + "step": 218 + }, + { + "epoch": 0.029607097591888468, + "grad_norm": 12.094768524169922, + "learning_rate": 1.27734375e-05, + "loss": 0.2837066650390625, + "step": 219 + }, + { + "epoch": 0.029742289818335446, + "grad_norm": 15.443978309631348, + "learning_rate": 1.283203125e-05, + "loss": 0.3972358703613281, + "step": 220 + }, + { + "epoch": 0.029877482044782425, + "grad_norm": 8.34568977355957, + "learning_rate": 1.2890625e-05, + "loss": 0.2913818359375, + "step": 221 + }, + { + "epoch": 0.030012674271229403, + "grad_norm": 24.16734504699707, + "learning_rate": 1.2949218750000001e-05, + "loss": 0.2877616882324219, + "step": 222 + }, + { + "epoch": 0.030147866497676385, + "grad_norm": 19.035968780517578, + "learning_rate": 1.30078125e-05, + "loss": 0.2989997863769531, + "step": 223 + }, + { + "epoch": 0.030283058724123364, + "grad_norm": 7.522139072418213, + "learning_rate": 1.306640625e-05, + "loss": 0.24536705017089844, + "step": 224 + }, + { + "epoch": 0.030418250950570342, + "grad_norm": 13.014775276184082, + "learning_rate": 1.3125e-05, + "loss": 0.2332782745361328, + "step": 225 + }, + { + "epoch": 0.03055344317701732, + "grad_norm": 25.451913833618164, + "learning_rate": 1.318359375e-05, + "loss": 0.4199247360229492, + "step": 226 + }, + { + "epoch": 0.030688635403464302, + "grad_norm": 11.6997652053833, + "learning_rate": 1.3242187500000001e-05, + "loss": 0.27001953125, + "step": 227 + }, + { + "epoch": 0.03082382762991128, + "grad_norm": 4.29236364364624, + "learning_rate": 1.330078125e-05, + "loss": 0.3325614929199219, + "step": 228 + }, + { + "epoch": 0.03095901985635826, + "grad_norm": 14.468042373657227, + "learning_rate": 1.3359375000000001e-05, + "loss": 0.3017425537109375, + "step": 229 + }, + { + "epoch": 0.031094212082805238, + "grad_norm": 34.97588348388672, + "learning_rate": 1.341796875e-05, + "loss": 0.2820014953613281, + "step": 230 + }, + { + "epoch": 0.031229404309252216, + "grad_norm": 31.7833309173584, + "learning_rate": 1.34765625e-05, + "loss": 0.30699920654296875, + "step": 231 + }, + { + "epoch": 0.031364596535699195, + "grad_norm": 9.370939254760742, + "learning_rate": 1.353515625e-05, + "loss": 0.29029083251953125, + "step": 232 + }, + { + "epoch": 0.03149978876214617, + "grad_norm": 18.17502784729004, + "learning_rate": 1.359375e-05, + "loss": 0.3473472595214844, + "step": 233 + }, + { + "epoch": 0.03163498098859316, + "grad_norm": 16.00482749938965, + "learning_rate": 1.3652343750000001e-05, + "loss": 0.34558868408203125, + "step": 234 + }, + { + "epoch": 0.03177017321504014, + "grad_norm": 10.303112030029297, + "learning_rate": 1.37109375e-05, + "loss": 0.23703765869140625, + "step": 235 + }, + { + "epoch": 0.031905365441487116, + "grad_norm": 3.414919853210449, + "learning_rate": 1.376953125e-05, + "loss": 0.170166015625, + "step": 236 + }, + { + "epoch": 0.032040557667934094, + "grad_norm": 17.686811447143555, + "learning_rate": 1.3828125e-05, + "loss": 0.3130531311035156, + "step": 237 + }, + { + "epoch": 0.03217574989438107, + "grad_norm": 18.49032211303711, + "learning_rate": 1.388671875e-05, + "loss": 0.29993247985839844, + "step": 238 + }, + { + "epoch": 0.03231094212082805, + "grad_norm": 9.850604057312012, + "learning_rate": 1.3945312500000001e-05, + "loss": 0.2655344009399414, + "step": 239 + }, + { + "epoch": 0.03244613434727503, + "grad_norm": 6.737331867218018, + "learning_rate": 1.400390625e-05, + "loss": 0.2937583923339844, + "step": 240 + }, + { + "epoch": 0.03258132657372201, + "grad_norm": 4.894274711608887, + "learning_rate": 1.40625e-05, + "loss": 0.18481063842773438, + "step": 241 + }, + { + "epoch": 0.032716518800168994, + "grad_norm": 3.0006649494171143, + "learning_rate": 1.412109375e-05, + "loss": 0.24634361267089844, + "step": 242 + }, + { + "epoch": 0.03285171102661597, + "grad_norm": 5.40485954284668, + "learning_rate": 1.41796875e-05, + "loss": 0.3110008239746094, + "step": 243 + }, + { + "epoch": 0.03298690325306295, + "grad_norm": 3.7458879947662354, + "learning_rate": 1.4238281250000001e-05, + "loss": 0.2802543640136719, + "step": 244 + }, + { + "epoch": 0.03312209547950993, + "grad_norm": 7.09403657913208, + "learning_rate": 1.4296875e-05, + "loss": 0.324951171875, + "step": 245 + }, + { + "epoch": 0.03325728770595691, + "grad_norm": 10.268945693969727, + "learning_rate": 1.435546875e-05, + "loss": 0.2251300811767578, + "step": 246 + }, + { + "epoch": 0.033392479932403886, + "grad_norm": 16.29755973815918, + "learning_rate": 1.44140625e-05, + "loss": 0.3162803649902344, + "step": 247 + }, + { + "epoch": 0.033527672158850864, + "grad_norm": 18.552701950073242, + "learning_rate": 1.447265625e-05, + "loss": 0.25196361541748047, + "step": 248 + }, + { + "epoch": 0.03366286438529784, + "grad_norm": 6.371397495269775, + "learning_rate": 1.453125e-05, + "loss": 0.32024192810058594, + "step": 249 + }, + { + "epoch": 0.03379805661174482, + "grad_norm": 21.615142822265625, + "learning_rate": 1.458984375e-05, + "loss": 0.31653594970703125, + "step": 250 + }, + { + "epoch": 0.03393324883819181, + "grad_norm": 24.584264755249023, + "learning_rate": 1.4648437500000001e-05, + "loss": 0.3035125732421875, + "step": 251 + }, + { + "epoch": 0.034068441064638785, + "grad_norm": 7.3283796310424805, + "learning_rate": 1.470703125e-05, + "loss": 0.2931327819824219, + "step": 252 + }, + { + "epoch": 0.034203633291085764, + "grad_norm": 11.640965461730957, + "learning_rate": 1.4765625e-05, + "loss": 0.22333145141601562, + "step": 253 + }, + { + "epoch": 0.03433882551753274, + "grad_norm": 11.33310604095459, + "learning_rate": 1.482421875e-05, + "loss": 0.33174896240234375, + "step": 254 + }, + { + "epoch": 0.03447401774397972, + "grad_norm": 10.178672790527344, + "learning_rate": 1.48828125e-05, + "loss": 0.30368804931640625, + "step": 255 + }, + { + "epoch": 0.0346092099704267, + "grad_norm": 12.790731430053711, + "learning_rate": 1.4941406250000001e-05, + "loss": 0.331085205078125, + "step": 256 + }, + { + "epoch": 0.03474440219687368, + "grad_norm": 8.673245429992676, + "learning_rate": 1.5e-05, + "loss": 0.2547416687011719, + "step": 257 + }, + { + "epoch": 0.034879594423320656, + "grad_norm": 4.6530866622924805, + "learning_rate": 1.505859375e-05, + "loss": 0.2751197814941406, + "step": 258 + }, + { + "epoch": 0.03501478664976764, + "grad_norm": 6.476203918457031, + "learning_rate": 1.51171875e-05, + "loss": 0.28345680236816406, + "step": 259 + }, + { + "epoch": 0.03514997887621462, + "grad_norm": 14.593551635742188, + "learning_rate": 1.517578125e-05, + "loss": 0.3824882507324219, + "step": 260 + }, + { + "epoch": 0.0352851711026616, + "grad_norm": 6.374526500701904, + "learning_rate": 1.5234375000000001e-05, + "loss": 0.28825950622558594, + "step": 261 + }, + { + "epoch": 0.03542036332910858, + "grad_norm": 3.9945902824401855, + "learning_rate": 1.529296875e-05, + "loss": 0.2472667694091797, + "step": 262 + }, + { + "epoch": 0.035555555555555556, + "grad_norm": 2.7736659049987793, + "learning_rate": 1.53515625e-05, + "loss": 0.18715667724609375, + "step": 263 + }, + { + "epoch": 0.035690747782002534, + "grad_norm": 4.772533416748047, + "learning_rate": 1.541015625e-05, + "loss": 0.2139892578125, + "step": 264 + }, + { + "epoch": 0.03582594000844951, + "grad_norm": 5.28697395324707, + "learning_rate": 1.546875e-05, + "loss": 0.271026611328125, + "step": 265 + }, + { + "epoch": 0.03596113223489649, + "grad_norm": 4.601757526397705, + "learning_rate": 1.552734375e-05, + "loss": 0.3020343780517578, + "step": 266 + }, + { + "epoch": 0.03609632446134347, + "grad_norm": 20.658479690551758, + "learning_rate": 1.5585937500000002e-05, + "loss": 0.3063621520996094, + "step": 267 + }, + { + "epoch": 0.036231516687790455, + "grad_norm": 17.867830276489258, + "learning_rate": 1.564453125e-05, + "loss": 0.245086669921875, + "step": 268 + }, + { + "epoch": 0.03636670891423743, + "grad_norm": 17.761445999145508, + "learning_rate": 1.5703125e-05, + "loss": 0.27483367919921875, + "step": 269 + }, + { + "epoch": 0.03650190114068441, + "grad_norm": 8.101611137390137, + "learning_rate": 1.576171875e-05, + "loss": 0.27150917053222656, + "step": 270 + }, + { + "epoch": 0.03663709336713139, + "grad_norm": 9.16867733001709, + "learning_rate": 1.58203125e-05, + "loss": 0.29632568359375, + "step": 271 + }, + { + "epoch": 0.03677228559357837, + "grad_norm": 5.931591987609863, + "learning_rate": 1.587890625e-05, + "loss": 0.253448486328125, + "step": 272 + }, + { + "epoch": 0.03690747782002535, + "grad_norm": 9.642913818359375, + "learning_rate": 1.59375e-05, + "loss": 0.3577842712402344, + "step": 273 + }, + { + "epoch": 0.037042670046472326, + "grad_norm": 8.441208839416504, + "learning_rate": 1.599609375e-05, + "loss": 0.2773256301879883, + "step": 274 + }, + { + "epoch": 0.037177862272919304, + "grad_norm": 7.997591018676758, + "learning_rate": 1.60546875e-05, + "loss": 0.3258819580078125, + "step": 275 + }, + { + "epoch": 0.03731305449936629, + "grad_norm": 2.695664167404175, + "learning_rate": 1.611328125e-05, + "loss": 0.2272777557373047, + "step": 276 + }, + { + "epoch": 0.03744824672581327, + "grad_norm": 15.456908226013184, + "learning_rate": 1.6171875000000002e-05, + "loss": 0.2947044372558594, + "step": 277 + }, + { + "epoch": 0.03758343895226025, + "grad_norm": 11.73242473602295, + "learning_rate": 1.623046875e-05, + "loss": 0.24881935119628906, + "step": 278 + }, + { + "epoch": 0.037718631178707225, + "grad_norm": 3.7097480297088623, + "learning_rate": 1.62890625e-05, + "loss": 0.2575969696044922, + "step": 279 + }, + { + "epoch": 0.037853823405154204, + "grad_norm": 6.957103252410889, + "learning_rate": 1.634765625e-05, + "loss": 0.26689910888671875, + "step": 280 + }, + { + "epoch": 0.03798901563160118, + "grad_norm": 10.129047393798828, + "learning_rate": 1.640625e-05, + "loss": 0.258697509765625, + "step": 281 + }, + { + "epoch": 0.03812420785804816, + "grad_norm": 4.334877967834473, + "learning_rate": 1.646484375e-05, + "loss": 0.2727241516113281, + "step": 282 + }, + { + "epoch": 0.03825940008449514, + "grad_norm": 7.06351900100708, + "learning_rate": 1.65234375e-05, + "loss": 0.24359893798828125, + "step": 283 + }, + { + "epoch": 0.03839459231094212, + "grad_norm": 9.38354778289795, + "learning_rate": 1.6582031250000002e-05, + "loss": 0.24840164184570312, + "step": 284 + }, + { + "epoch": 0.0385297845373891, + "grad_norm": 3.335857391357422, + "learning_rate": 1.6640625e-05, + "loss": 0.21318435668945312, + "step": 285 + }, + { + "epoch": 0.03866497676383608, + "grad_norm": 4.352935314178467, + "learning_rate": 1.669921875e-05, + "loss": 0.2790679931640625, + "step": 286 + }, + { + "epoch": 0.03880016899028306, + "grad_norm": 2.930211305618286, + "learning_rate": 1.67578125e-05, + "loss": 0.23891067504882812, + "step": 287 + }, + { + "epoch": 0.03893536121673004, + "grad_norm": 3.004099130630493, + "learning_rate": 1.681640625e-05, + "loss": 0.2674732208251953, + "step": 288 + }, + { + "epoch": 0.03907055344317702, + "grad_norm": 2.01277232170105, + "learning_rate": 1.6875e-05, + "loss": 0.17083263397216797, + "step": 289 + }, + { + "epoch": 0.039205745669623995, + "grad_norm": 7.299152374267578, + "learning_rate": 1.693359375e-05, + "loss": 0.24252033233642578, + "step": 290 + }, + { + "epoch": 0.039340937896070974, + "grad_norm": 3.011568546295166, + "learning_rate": 1.69921875e-05, + "loss": 0.20447540283203125, + "step": 291 + }, + { + "epoch": 0.03947613012251795, + "grad_norm": 23.560914993286133, + "learning_rate": 1.705078125e-05, + "loss": 0.36942291259765625, + "step": 292 + }, + { + "epoch": 0.03961132234896494, + "grad_norm": 21.460453033447266, + "learning_rate": 1.7109375e-05, + "loss": 0.3147735595703125, + "step": 293 + }, + { + "epoch": 0.039746514575411916, + "grad_norm": 10.668805122375488, + "learning_rate": 1.7167968750000002e-05, + "loss": 0.2743339538574219, + "step": 294 + }, + { + "epoch": 0.039881706801858895, + "grad_norm": 13.089706420898438, + "learning_rate": 1.72265625e-05, + "loss": 0.3108539581298828, + "step": 295 + }, + { + "epoch": 0.04001689902830587, + "grad_norm": 7.003668785095215, + "learning_rate": 1.728515625e-05, + "loss": 0.16877460479736328, + "step": 296 + }, + { + "epoch": 0.04015209125475285, + "grad_norm": 11.977317810058594, + "learning_rate": 1.734375e-05, + "loss": 0.32431793212890625, + "step": 297 + }, + { + "epoch": 0.04028728348119983, + "grad_norm": 5.474246025085449, + "learning_rate": 1.740234375e-05, + "loss": 0.3123817443847656, + "step": 298 + }, + { + "epoch": 0.04042247570764681, + "grad_norm": 10.325031280517578, + "learning_rate": 1.74609375e-05, + "loss": 0.2569923400878906, + "step": 299 + }, + { + "epoch": 0.04055766793409379, + "grad_norm": 10.47744083404541, + "learning_rate": 1.751953125e-05, + "loss": 0.301055908203125, + "step": 300 + }, + { + "epoch": 0.040692860160540766, + "grad_norm": 3.0338551998138428, + "learning_rate": 1.7578125000000002e-05, + "loss": 0.21859359741210938, + "step": 301 + }, + { + "epoch": 0.04082805238698775, + "grad_norm": 12.519035339355469, + "learning_rate": 1.763671875e-05, + "loss": 0.23868560791015625, + "step": 302 + }, + { + "epoch": 0.04096324461343473, + "grad_norm": 10.300127029418945, + "learning_rate": 1.76953125e-05, + "loss": 0.2349681854248047, + "step": 303 + }, + { + "epoch": 0.04109843683988171, + "grad_norm": 8.500965118408203, + "learning_rate": 1.775390625e-05, + "loss": 0.30113983154296875, + "step": 304 + }, + { + "epoch": 0.041233629066328686, + "grad_norm": 2.5798254013061523, + "learning_rate": 1.78125e-05, + "loss": 0.25214195251464844, + "step": 305 + }, + { + "epoch": 0.041368821292775665, + "grad_norm": 7.849074840545654, + "learning_rate": 1.787109375e-05, + "loss": 0.2838611602783203, + "step": 306 + }, + { + "epoch": 0.04150401351922264, + "grad_norm": 8.086491584777832, + "learning_rate": 1.79296875e-05, + "loss": 0.24018478393554688, + "step": 307 + }, + { + "epoch": 0.04163920574566962, + "grad_norm": 7.702834606170654, + "learning_rate": 1.798828125e-05, + "loss": 0.31969451904296875, + "step": 308 + }, + { + "epoch": 0.0417743979721166, + "grad_norm": 3.2762339115142822, + "learning_rate": 1.8046875e-05, + "loss": 0.2358264923095703, + "step": 309 + }, + { + "epoch": 0.041909590198563586, + "grad_norm": 4.388336658477783, + "learning_rate": 1.810546875e-05, + "loss": 0.27145957946777344, + "step": 310 + }, + { + "epoch": 0.042044782425010564, + "grad_norm": 13.492469787597656, + "learning_rate": 1.8164062500000002e-05, + "loss": 0.3591728210449219, + "step": 311 + }, + { + "epoch": 0.04217997465145754, + "grad_norm": 6.579132556915283, + "learning_rate": 1.822265625e-05, + "loss": 0.3139381408691406, + "step": 312 + }, + { + "epoch": 0.04231516687790452, + "grad_norm": 4.826672554016113, + "learning_rate": 1.828125e-05, + "loss": 0.2637805938720703, + "step": 313 + }, + { + "epoch": 0.0424503591043515, + "grad_norm": 7.286214828491211, + "learning_rate": 1.833984375e-05, + "loss": 0.17613601684570312, + "step": 314 + }, + { + "epoch": 0.04258555133079848, + "grad_norm": 8.452092170715332, + "learning_rate": 1.83984375e-05, + "loss": 0.224517822265625, + "step": 315 + }, + { + "epoch": 0.04272074355724546, + "grad_norm": 7.877833366394043, + "learning_rate": 1.845703125e-05, + "loss": 0.23193931579589844, + "step": 316 + }, + { + "epoch": 0.042855935783692435, + "grad_norm": 7.8617753982543945, + "learning_rate": 1.8515625e-05, + "loss": 0.25810813903808594, + "step": 317 + }, + { + "epoch": 0.042991128010139414, + "grad_norm": 7.064297199249268, + "learning_rate": 1.8574218750000002e-05, + "loss": 0.262176513671875, + "step": 318 + }, + { + "epoch": 0.0431263202365864, + "grad_norm": 4.151225566864014, + "learning_rate": 1.86328125e-05, + "loss": 0.2243175506591797, + "step": 319 + }, + { + "epoch": 0.04326151246303338, + "grad_norm": 3.794111490249634, + "learning_rate": 1.869140625e-05, + "loss": 0.20331192016601562, + "step": 320 + }, + { + "epoch": 0.043396704689480356, + "grad_norm": 2.461073875427246, + "learning_rate": 1.8750000000000002e-05, + "loss": 0.21922779083251953, + "step": 321 + }, + { + "epoch": 0.043531896915927334, + "grad_norm": 13.333686828613281, + "learning_rate": 1.880859375e-05, + "loss": 0.33484649658203125, + "step": 322 + }, + { + "epoch": 0.04366708914237431, + "grad_norm": 2.3405187129974365, + "learning_rate": 1.88671875e-05, + "loss": 0.2036457061767578, + "step": 323 + }, + { + "epoch": 0.04380228136882129, + "grad_norm": 2.2139670848846436, + "learning_rate": 1.892578125e-05, + "loss": 0.24195098876953125, + "step": 324 + }, + { + "epoch": 0.04393747359526827, + "grad_norm": 6.235994815826416, + "learning_rate": 1.8984375e-05, + "loss": 0.2675590515136719, + "step": 325 + }, + { + "epoch": 0.04407266582171525, + "grad_norm": 7.546850204467773, + "learning_rate": 1.904296875e-05, + "loss": 0.26372528076171875, + "step": 326 + }, + { + "epoch": 0.044207858048162234, + "grad_norm": 3.1368439197540283, + "learning_rate": 1.91015625e-05, + "loss": 0.16007423400878906, + "step": 327 + }, + { + "epoch": 0.04434305027460921, + "grad_norm": 3.7086551189422607, + "learning_rate": 1.9160156250000002e-05, + "loss": 0.21489334106445312, + "step": 328 + }, + { + "epoch": 0.04447824250105619, + "grad_norm": 4.261851787567139, + "learning_rate": 1.921875e-05, + "loss": 0.2682380676269531, + "step": 329 + }, + { + "epoch": 0.04461343472750317, + "grad_norm": 8.233115196228027, + "learning_rate": 1.927734375e-05, + "loss": 0.23845291137695312, + "step": 330 + }, + { + "epoch": 0.04474862695395015, + "grad_norm": 4.965247631072998, + "learning_rate": 1.93359375e-05, + "loss": 0.2496166229248047, + "step": 331 + }, + { + "epoch": 0.044883819180397126, + "grad_norm": 7.692807197570801, + "learning_rate": 1.939453125e-05, + "loss": 0.3063468933105469, + "step": 332 + }, + { + "epoch": 0.045019011406844105, + "grad_norm": 6.241950988769531, + "learning_rate": 1.9453125e-05, + "loss": 0.1916065216064453, + "step": 333 + }, + { + "epoch": 0.04515420363329108, + "grad_norm": 4.692477226257324, + "learning_rate": 1.951171875e-05, + "loss": 0.3115119934082031, + "step": 334 + }, + { + "epoch": 0.04528939585973806, + "grad_norm": 5.504664421081543, + "learning_rate": 1.95703125e-05, + "loss": 0.24482059478759766, + "step": 335 + }, + { + "epoch": 0.04542458808618505, + "grad_norm": 7.126749515533447, + "learning_rate": 1.962890625e-05, + "loss": 0.29393482208251953, + "step": 336 + }, + { + "epoch": 0.045559780312632026, + "grad_norm": 2.8093347549438477, + "learning_rate": 1.96875e-05, + "loss": 0.18366432189941406, + "step": 337 + }, + { + "epoch": 0.045694972539079004, + "grad_norm": 3.0783469676971436, + "learning_rate": 1.9746093750000002e-05, + "loss": 0.2876453399658203, + "step": 338 + }, + { + "epoch": 0.04583016476552598, + "grad_norm": 3.5758750438690186, + "learning_rate": 1.98046875e-05, + "loss": 0.1931171417236328, + "step": 339 + }, + { + "epoch": 0.04596535699197296, + "grad_norm": 2.6777350902557373, + "learning_rate": 1.986328125e-05, + "loss": 0.16173744201660156, + "step": 340 + }, + { + "epoch": 0.04610054921841994, + "grad_norm": 14.727062225341797, + "learning_rate": 1.9921875e-05, + "loss": 0.34665393829345703, + "step": 341 + }, + { + "epoch": 0.04623574144486692, + "grad_norm": 6.252585411071777, + "learning_rate": 1.998046875e-05, + "loss": 0.2133960723876953, + "step": 342 + }, + { + "epoch": 0.046370933671313896, + "grad_norm": 6.252894401550293, + "learning_rate": 2.00390625e-05, + "loss": 0.231964111328125, + "step": 343 + }, + { + "epoch": 0.04650612589776088, + "grad_norm": 9.064559936523438, + "learning_rate": 2.009765625e-05, + "loss": 0.269866943359375, + "step": 344 + }, + { + "epoch": 0.04664131812420786, + "grad_norm": 2.912302017211914, + "learning_rate": 2.0156250000000002e-05, + "loss": 0.24353790283203125, + "step": 345 + }, + { + "epoch": 0.04677651035065484, + "grad_norm": 9.062849044799805, + "learning_rate": 2.021484375e-05, + "loss": 0.23482990264892578, + "step": 346 + }, + { + "epoch": 0.04691170257710182, + "grad_norm": 10.70043659210205, + "learning_rate": 2.02734375e-05, + "loss": 0.23520183563232422, + "step": 347 + }, + { + "epoch": 0.047046894803548796, + "grad_norm": 12.147732734680176, + "learning_rate": 2.033203125e-05, + "loss": 0.3428192138671875, + "step": 348 + }, + { + "epoch": 0.047182087029995774, + "grad_norm": 3.205064535140991, + "learning_rate": 2.0390625e-05, + "loss": 0.25203704833984375, + "step": 349 + }, + { + "epoch": 0.04731727925644275, + "grad_norm": 12.81641960144043, + "learning_rate": 2.044921875e-05, + "loss": 0.3087310791015625, + "step": 350 + }, + { + "epoch": 0.04745247148288973, + "grad_norm": 12.45585823059082, + "learning_rate": 2.05078125e-05, + "loss": 0.2872276306152344, + "step": 351 + }, + { + "epoch": 0.04758766370933671, + "grad_norm": 8.705986022949219, + "learning_rate": 2.056640625e-05, + "loss": 0.22833251953125, + "step": 352 + }, + { + "epoch": 0.047722855935783695, + "grad_norm": 1.7599164247512817, + "learning_rate": 2.0625e-05, + "loss": 0.22316741943359375, + "step": 353 + }, + { + "epoch": 0.047858048162230674, + "grad_norm": 4.0907182693481445, + "learning_rate": 2.068359375e-05, + "loss": 0.18764877319335938, + "step": 354 + }, + { + "epoch": 0.04799324038867765, + "grad_norm": 6.2126593589782715, + "learning_rate": 2.0742187500000002e-05, + "loss": 0.20470428466796875, + "step": 355 + }, + { + "epoch": 0.04812843261512463, + "grad_norm": 3.4249207973480225, + "learning_rate": 2.080078125e-05, + "loss": 0.245849609375, + "step": 356 + }, + { + "epoch": 0.04826362484157161, + "grad_norm": 3.2741127014160156, + "learning_rate": 2.0859375e-05, + "loss": 0.23532485961914062, + "step": 357 + }, + { + "epoch": 0.04839881706801859, + "grad_norm": 5.984785079956055, + "learning_rate": 2.091796875e-05, + "loss": 0.22047042846679688, + "step": 358 + }, + { + "epoch": 0.048534009294465566, + "grad_norm": 4.98041296005249, + "learning_rate": 2.09765625e-05, + "loss": 0.22716712951660156, + "step": 359 + }, + { + "epoch": 0.048669201520912544, + "grad_norm": 3.615262508392334, + "learning_rate": 2.103515625e-05, + "loss": 0.17584776878356934, + "step": 360 + }, + { + "epoch": 0.04880439374735953, + "grad_norm": 6.543410778045654, + "learning_rate": 2.109375e-05, + "loss": 0.2426910400390625, + "step": 361 + }, + { + "epoch": 0.04893958597380651, + "grad_norm": 10.300976753234863, + "learning_rate": 2.1152343750000002e-05, + "loss": 0.3159904479980469, + "step": 362 + }, + { + "epoch": 0.04907477820025349, + "grad_norm": 3.458944082260132, + "learning_rate": 2.12109375e-05, + "loss": 0.3029327392578125, + "step": 363 + }, + { + "epoch": 0.049209970426700465, + "grad_norm": 3.7983591556549072, + "learning_rate": 2.126953125e-05, + "loss": 0.26883888244628906, + "step": 364 + }, + { + "epoch": 0.049345162653147444, + "grad_norm": 2.6477882862091064, + "learning_rate": 2.1328125000000002e-05, + "loss": 0.22419357299804688, + "step": 365 + }, + { + "epoch": 0.04948035487959442, + "grad_norm": 8.004109382629395, + "learning_rate": 2.138671875e-05, + "loss": 0.3123588562011719, + "step": 366 + }, + { + "epoch": 0.0496155471060414, + "grad_norm": 4.6794867515563965, + "learning_rate": 2.14453125e-05, + "loss": 0.28144264221191406, + "step": 367 + }, + { + "epoch": 0.04975073933248838, + "grad_norm": 2.9711503982543945, + "learning_rate": 2.150390625e-05, + "loss": 0.3131561279296875, + "step": 368 + }, + { + "epoch": 0.049885931558935365, + "grad_norm": 2.825662851333618, + "learning_rate": 2.15625e-05, + "loss": 0.3069877624511719, + "step": 369 + }, + { + "epoch": 0.05002112378538234, + "grad_norm": 2.660505771636963, + "learning_rate": 2.162109375e-05, + "loss": 0.18547439575195312, + "step": 370 + }, + { + "epoch": 0.05015631601182932, + "grad_norm": 3.1826038360595703, + "learning_rate": 2.16796875e-05, + "loss": 0.23659896850585938, + "step": 371 + }, + { + "epoch": 0.0502915082382763, + "grad_norm": 2.21543550491333, + "learning_rate": 2.1738281250000002e-05, + "loss": 0.2086467742919922, + "step": 372 + }, + { + "epoch": 0.05042670046472328, + "grad_norm": 3.2628138065338135, + "learning_rate": 2.1796875e-05, + "loss": 0.25342559814453125, + "step": 373 + }, + { + "epoch": 0.05056189269117026, + "grad_norm": 2.196044921875, + "learning_rate": 2.185546875e-05, + "loss": 0.23962783813476562, + "step": 374 + }, + { + "epoch": 0.050697084917617236, + "grad_norm": 9.615337371826172, + "learning_rate": 2.19140625e-05, + "loss": 0.2426605224609375, + "step": 375 + }, + { + "epoch": 0.050832277144064214, + "grad_norm": 6.569389820098877, + "learning_rate": 2.197265625e-05, + "loss": 0.26476287841796875, + "step": 376 + }, + { + "epoch": 0.05096746937051119, + "grad_norm": 4.2048115730285645, + "learning_rate": 2.203125e-05, + "loss": 0.30292510986328125, + "step": 377 + }, + { + "epoch": 0.05110266159695818, + "grad_norm": 6.898926734924316, + "learning_rate": 2.208984375e-05, + "loss": 0.23542022705078125, + "step": 378 + }, + { + "epoch": 0.051237853823405156, + "grad_norm": 4.4261274337768555, + "learning_rate": 2.2148437500000002e-05, + "loss": 0.22298049926757812, + "step": 379 + }, + { + "epoch": 0.051373046049852135, + "grad_norm": 3.0320937633514404, + "learning_rate": 2.220703125e-05, + "loss": 0.3094482421875, + "step": 380 + }, + { + "epoch": 0.05150823827629911, + "grad_norm": 6.120395183563232, + "learning_rate": 2.2265625e-05, + "loss": 0.26009178161621094, + "step": 381 + }, + { + "epoch": 0.05164343050274609, + "grad_norm": 2.3273556232452393, + "learning_rate": 2.2324218750000002e-05, + "loss": 0.1851358413696289, + "step": 382 + }, + { + "epoch": 0.05177862272919307, + "grad_norm": 8.57958698272705, + "learning_rate": 2.23828125e-05, + "loss": 0.32439231872558594, + "step": 383 + }, + { + "epoch": 0.05191381495564005, + "grad_norm": 5.873696804046631, + "learning_rate": 2.244140625e-05, + "loss": 0.18160438537597656, + "step": 384 + }, + { + "epoch": 0.05204900718208703, + "grad_norm": 3.3337645530700684, + "learning_rate": 2.25e-05, + "loss": 0.2522258758544922, + "step": 385 + }, + { + "epoch": 0.05218419940853401, + "grad_norm": 2.935910701751709, + "learning_rate": 2.255859375e-05, + "loss": 0.21403980255126953, + "step": 386 + }, + { + "epoch": 0.05231939163498099, + "grad_norm": 1.5019489526748657, + "learning_rate": 2.26171875e-05, + "loss": 0.19890975952148438, + "step": 387 + }, + { + "epoch": 0.05245458386142797, + "grad_norm": 2.947593927383423, + "learning_rate": 2.267578125e-05, + "loss": 0.22186899185180664, + "step": 388 + }, + { + "epoch": 0.05258977608787495, + "grad_norm": 4.533196449279785, + "learning_rate": 2.2734375000000002e-05, + "loss": 0.24439239501953125, + "step": 389 + }, + { + "epoch": 0.05272496831432193, + "grad_norm": 2.6896891593933105, + "learning_rate": 2.279296875e-05, + "loss": 0.22571945190429688, + "step": 390 + }, + { + "epoch": 0.052860160540768905, + "grad_norm": 6.780669689178467, + "learning_rate": 2.28515625e-05, + "loss": 0.14905357360839844, + "step": 391 + }, + { + "epoch": 0.052995352767215884, + "grad_norm": 5.1014814376831055, + "learning_rate": 2.291015625e-05, + "loss": 0.21990394592285156, + "step": 392 + }, + { + "epoch": 0.05313054499366286, + "grad_norm": 2.7403886318206787, + "learning_rate": 2.296875e-05, + "loss": 0.1731433868408203, + "step": 393 + }, + { + "epoch": 0.05326573722010984, + "grad_norm": 3.225177049636841, + "learning_rate": 2.302734375e-05, + "loss": 0.19796180725097656, + "step": 394 + }, + { + "epoch": 0.053400929446556826, + "grad_norm": 5.5813703536987305, + "learning_rate": 2.30859375e-05, + "loss": 0.2199115753173828, + "step": 395 + }, + { + "epoch": 0.053536121673003804, + "grad_norm": 5.583347320556641, + "learning_rate": 2.3144531250000002e-05, + "loss": 0.21397972106933594, + "step": 396 + }, + { + "epoch": 0.05367131389945078, + "grad_norm": 5.987549304962158, + "learning_rate": 2.3203125e-05, + "loss": 0.290771484375, + "step": 397 + }, + { + "epoch": 0.05380650612589776, + "grad_norm": 5.06838846206665, + "learning_rate": 2.326171875e-05, + "loss": 0.23844623565673828, + "step": 398 + }, + { + "epoch": 0.05394169835234474, + "grad_norm": 3.899824619293213, + "learning_rate": 2.3320312500000002e-05, + "loss": 0.23722076416015625, + "step": 399 + }, + { + "epoch": 0.05407689057879172, + "grad_norm": 4.47520112991333, + "learning_rate": 2.337890625e-05, + "loss": 0.2806396484375, + "step": 400 + }, + { + "epoch": 0.0542120828052387, + "grad_norm": 2.9857523441314697, + "learning_rate": 2.34375e-05, + "loss": 0.265472412109375, + "step": 401 + }, + { + "epoch": 0.054347275031685675, + "grad_norm": 7.4022016525268555, + "learning_rate": 2.349609375e-05, + "loss": 0.29033660888671875, + "step": 402 + }, + { + "epoch": 0.05448246725813266, + "grad_norm": 6.564642429351807, + "learning_rate": 2.35546875e-05, + "loss": 0.18271923065185547, + "step": 403 + }, + { + "epoch": 0.05461765948457964, + "grad_norm": 2.413015365600586, + "learning_rate": 2.361328125e-05, + "loss": 0.2622261047363281, + "step": 404 + }, + { + "epoch": 0.05475285171102662, + "grad_norm": 3.002732038497925, + "learning_rate": 2.3671875e-05, + "loss": 0.13452529907226562, + "step": 405 + }, + { + "epoch": 0.054888043937473596, + "grad_norm": 8.486944198608398, + "learning_rate": 2.3730468750000002e-05, + "loss": 0.22184371948242188, + "step": 406 + }, + { + "epoch": 0.055023236163920575, + "grad_norm": 6.0893425941467285, + "learning_rate": 2.37890625e-05, + "loss": 0.1980915069580078, + "step": 407 + }, + { + "epoch": 0.05515842839036755, + "grad_norm": 5.347884654998779, + "learning_rate": 2.384765625e-05, + "loss": 0.2153921127319336, + "step": 408 + }, + { + "epoch": 0.05529362061681453, + "grad_norm": 5.466994285583496, + "learning_rate": 2.3906250000000002e-05, + "loss": 0.2512245178222656, + "step": 409 + }, + { + "epoch": 0.05542881284326151, + "grad_norm": 8.381427764892578, + "learning_rate": 2.396484375e-05, + "loss": 0.22220420837402344, + "step": 410 + }, + { + "epoch": 0.05556400506970849, + "grad_norm": 6.532052516937256, + "learning_rate": 2.40234375e-05, + "loss": 0.20798015594482422, + "step": 411 + }, + { + "epoch": 0.055699197296155474, + "grad_norm": 2.884687900543213, + "learning_rate": 2.408203125e-05, + "loss": 0.22382354736328125, + "step": 412 + }, + { + "epoch": 0.05583438952260245, + "grad_norm": 6.863306045532227, + "learning_rate": 2.4140625e-05, + "loss": 0.22544097900390625, + "step": 413 + }, + { + "epoch": 0.05596958174904943, + "grad_norm": 6.584400653839111, + "learning_rate": 2.419921875e-05, + "loss": 0.29451560974121094, + "step": 414 + }, + { + "epoch": 0.05610477397549641, + "grad_norm": 1.5605359077453613, + "learning_rate": 2.42578125e-05, + "loss": 0.14832115173339844, + "step": 415 + }, + { + "epoch": 0.05623996620194339, + "grad_norm": 7.915679454803467, + "learning_rate": 2.4316406250000002e-05, + "loss": 0.25832462310791016, + "step": 416 + }, + { + "epoch": 0.056375158428390366, + "grad_norm": 1.8255497217178345, + "learning_rate": 2.4375e-05, + "loss": 0.22922325134277344, + "step": 417 + }, + { + "epoch": 0.056510350654837345, + "grad_norm": 2.959744930267334, + "learning_rate": 2.443359375e-05, + "loss": 0.17743492126464844, + "step": 418 + }, + { + "epoch": 0.05664554288128432, + "grad_norm": 2.6996357440948486, + "learning_rate": 2.44921875e-05, + "loss": 0.2768716812133789, + "step": 419 + }, + { + "epoch": 0.05678073510773131, + "grad_norm": 2.3034348487854004, + "learning_rate": 2.455078125e-05, + "loss": 0.1283588409423828, + "step": 420 + }, + { + "epoch": 0.05691592733417829, + "grad_norm": 2.5214056968688965, + "learning_rate": 2.4609375e-05, + "loss": 0.2310009002685547, + "step": 421 + }, + { + "epoch": 0.057051119560625266, + "grad_norm": 2.879680871963501, + "learning_rate": 2.466796875e-05, + "loss": 0.23724746704101562, + "step": 422 + }, + { + "epoch": 0.057186311787072244, + "grad_norm": 10.767793655395508, + "learning_rate": 2.4726562500000002e-05, + "loss": 0.325225830078125, + "step": 423 + }, + { + "epoch": 0.05732150401351922, + "grad_norm": 3.9540767669677734, + "learning_rate": 2.478515625e-05, + "loss": 0.20673561096191406, + "step": 424 + }, + { + "epoch": 0.0574566962399662, + "grad_norm": 1.7493932247161865, + "learning_rate": 2.484375e-05, + "loss": 0.19439697265625, + "step": 425 + }, + { + "epoch": 0.05759188846641318, + "grad_norm": 5.450231075286865, + "learning_rate": 2.4902343750000002e-05, + "loss": 0.22993087768554688, + "step": 426 + }, + { + "epoch": 0.05772708069286016, + "grad_norm": 8.564682960510254, + "learning_rate": 2.49609375e-05, + "loss": 0.2718772888183594, + "step": 427 + }, + { + "epoch": 0.05786227291930714, + "grad_norm": 6.0642476081848145, + "learning_rate": 2.501953125e-05, + "loss": 0.28029823303222656, + "step": 428 + }, + { + "epoch": 0.05799746514575412, + "grad_norm": 3.1526358127593994, + "learning_rate": 2.5078125e-05, + "loss": 0.18692588806152344, + "step": 429 + }, + { + "epoch": 0.0581326573722011, + "grad_norm": 4.7930498123168945, + "learning_rate": 2.513671875e-05, + "loss": 0.20549392700195312, + "step": 430 + }, + { + "epoch": 0.05826784959864808, + "grad_norm": 4.353790283203125, + "learning_rate": 2.51953125e-05, + "loss": 0.22398090362548828, + "step": 431 + }, + { + "epoch": 0.05840304182509506, + "grad_norm": 4.567189693450928, + "learning_rate": 2.525390625e-05, + "loss": 0.23551559448242188, + "step": 432 + }, + { + "epoch": 0.058538234051542036, + "grad_norm": 3.749096393585205, + "learning_rate": 2.5312500000000002e-05, + "loss": 0.24753570556640625, + "step": 433 + }, + { + "epoch": 0.058673426277989014, + "grad_norm": 6.223649501800537, + "learning_rate": 2.537109375e-05, + "loss": 0.295318603515625, + "step": 434 + }, + { + "epoch": 0.05880861850443599, + "grad_norm": 4.8487548828125, + "learning_rate": 2.54296875e-05, + "loss": 0.23954010009765625, + "step": 435 + }, + { + "epoch": 0.05894381073088297, + "grad_norm": 3.14316987991333, + "learning_rate": 2.548828125e-05, + "loss": 0.2597484588623047, + "step": 436 + }, + { + "epoch": 0.05907900295732996, + "grad_norm": 3.5172786712646484, + "learning_rate": 2.5546875e-05, + "loss": 0.22348642349243164, + "step": 437 + }, + { + "epoch": 0.059214195183776935, + "grad_norm": 2.4640042781829834, + "learning_rate": 2.560546875e-05, + "loss": 0.29814910888671875, + "step": 438 + }, + { + "epoch": 0.059349387410223914, + "grad_norm": 2.347519636154175, + "learning_rate": 2.56640625e-05, + "loss": 0.25396728515625, + "step": 439 + }, + { + "epoch": 0.05948457963667089, + "grad_norm": 5.281825065612793, + "learning_rate": 2.5722656250000002e-05, + "loss": 0.23248767852783203, + "step": 440 + }, + { + "epoch": 0.05961977186311787, + "grad_norm": 1.6211901903152466, + "learning_rate": 2.578125e-05, + "loss": 0.1940174102783203, + "step": 441 + }, + { + "epoch": 0.05975496408956485, + "grad_norm": 4.001313209533691, + "learning_rate": 2.583984375e-05, + "loss": 0.23023605346679688, + "step": 442 + }, + { + "epoch": 0.05989015631601183, + "grad_norm": 6.017233848571777, + "learning_rate": 2.5898437500000002e-05, + "loss": 0.2937355041503906, + "step": 443 + }, + { + "epoch": 0.060025348542458806, + "grad_norm": 2.354102611541748, + "learning_rate": 2.595703125e-05, + "loss": 0.22694778442382812, + "step": 444 + }, + { + "epoch": 0.060160540768905785, + "grad_norm": 2.4847521781921387, + "learning_rate": 2.6015625e-05, + "loss": 0.21416473388671875, + "step": 445 + }, + { + "epoch": 0.06029573299535277, + "grad_norm": 1.6427913904190063, + "learning_rate": 2.607421875e-05, + "loss": 0.19562911987304688, + "step": 446 + }, + { + "epoch": 0.06043092522179975, + "grad_norm": 3.264057159423828, + "learning_rate": 2.61328125e-05, + "loss": 0.29549407958984375, + "step": 447 + }, + { + "epoch": 0.06056611744824673, + "grad_norm": 3.7565858364105225, + "learning_rate": 2.619140625e-05, + "loss": 0.2630767822265625, + "step": 448 + }, + { + "epoch": 0.060701309674693706, + "grad_norm": 4.027137279510498, + "learning_rate": 2.625e-05, + "loss": 0.27564239501953125, + "step": 449 + }, + { + "epoch": 0.060836501901140684, + "grad_norm": 6.034366607666016, + "learning_rate": 2.6308593750000002e-05, + "loss": 0.22731399536132812, + "step": 450 + }, + { + "epoch": 0.06097169412758766, + "grad_norm": 4.657773494720459, + "learning_rate": 2.63671875e-05, + "loss": 0.18934249877929688, + "step": 451 + }, + { + "epoch": 0.06110688635403464, + "grad_norm": 1.4614509344100952, + "learning_rate": 2.642578125e-05, + "loss": 0.1571674346923828, + "step": 452 + }, + { + "epoch": 0.06124207858048162, + "grad_norm": 3.297563314437866, + "learning_rate": 2.6484375000000002e-05, + "loss": 0.22957229614257812, + "step": 453 + }, + { + "epoch": 0.061377270806928605, + "grad_norm": 4.779603004455566, + "learning_rate": 2.654296875e-05, + "loss": 0.273284912109375, + "step": 454 + }, + { + "epoch": 0.06151246303337558, + "grad_norm": 5.337319374084473, + "learning_rate": 2.66015625e-05, + "loss": 0.2867279052734375, + "step": 455 + }, + { + "epoch": 0.06164765525982256, + "grad_norm": 2.4499621391296387, + "learning_rate": 2.666015625e-05, + "loss": 0.2900524139404297, + "step": 456 + }, + { + "epoch": 0.06178284748626954, + "grad_norm": 11.705016136169434, + "learning_rate": 2.6718750000000002e-05, + "loss": 0.2447681427001953, + "step": 457 + }, + { + "epoch": 0.06191803971271652, + "grad_norm": 9.218425750732422, + "learning_rate": 2.677734375e-05, + "loss": 0.2472400665283203, + "step": 458 + }, + { + "epoch": 0.0620532319391635, + "grad_norm": 7.032962799072266, + "learning_rate": 2.68359375e-05, + "loss": 0.1722564697265625, + "step": 459 + }, + { + "epoch": 0.062188424165610476, + "grad_norm": 2.389957904815674, + "learning_rate": 2.6894531250000002e-05, + "loss": 0.2060413360595703, + "step": 460 + }, + { + "epoch": 0.062323616392057454, + "grad_norm": 3.1125590801239014, + "learning_rate": 2.6953125e-05, + "loss": 0.2782325744628906, + "step": 461 + }, + { + "epoch": 0.06245880861850443, + "grad_norm": 6.986378192901611, + "learning_rate": 2.701171875e-05, + "loss": 0.2196359634399414, + "step": 462 + }, + { + "epoch": 0.06259400084495141, + "grad_norm": 3.1970584392547607, + "learning_rate": 2.70703125e-05, + "loss": 0.24990081787109375, + "step": 463 + }, + { + "epoch": 0.06272919307139839, + "grad_norm": 2.5662636756896973, + "learning_rate": 2.712890625e-05, + "loss": 0.20447540283203125, + "step": 464 + }, + { + "epoch": 0.06286438529784537, + "grad_norm": 3.5405521392822266, + "learning_rate": 2.71875e-05, + "loss": 0.29277610778808594, + "step": 465 + }, + { + "epoch": 0.06299957752429235, + "grad_norm": 1.644301414489746, + "learning_rate": 2.724609375e-05, + "loss": 0.2523994445800781, + "step": 466 + }, + { + "epoch": 0.06313476975073934, + "grad_norm": 3.12206768989563, + "learning_rate": 2.7304687500000002e-05, + "loss": 0.2957744598388672, + "step": 467 + }, + { + "epoch": 0.06326996197718632, + "grad_norm": 1.8226984739303589, + "learning_rate": 2.736328125e-05, + "loss": 0.22678375244140625, + "step": 468 + }, + { + "epoch": 0.0634051542036333, + "grad_norm": 3.7132630348205566, + "learning_rate": 2.7421875e-05, + "loss": 0.2456817626953125, + "step": 469 + }, + { + "epoch": 0.06354034643008027, + "grad_norm": 3.246887683868408, + "learning_rate": 2.7480468750000002e-05, + "loss": 0.19822216033935547, + "step": 470 + }, + { + "epoch": 0.06367553865652725, + "grad_norm": 2.4337685108184814, + "learning_rate": 2.75390625e-05, + "loss": 0.25124549865722656, + "step": 471 + }, + { + "epoch": 0.06381073088297423, + "grad_norm": 1.425062894821167, + "learning_rate": 2.759765625e-05, + "loss": 0.15297698974609375, + "step": 472 + }, + { + "epoch": 0.06394592310942121, + "grad_norm": 1.73653244972229, + "learning_rate": 2.765625e-05, + "loss": 0.2443256378173828, + "step": 473 + }, + { + "epoch": 0.06408111533586819, + "grad_norm": 6.485707759857178, + "learning_rate": 2.7714843750000002e-05, + "loss": 0.24271011352539062, + "step": 474 + }, + { + "epoch": 0.06421630756231517, + "grad_norm": 4.944649696350098, + "learning_rate": 2.77734375e-05, + "loss": 0.20535194873809814, + "step": 475 + }, + { + "epoch": 0.06435149978876215, + "grad_norm": 1.7760387659072876, + "learning_rate": 2.783203125e-05, + "loss": 0.18415069580078125, + "step": 476 + }, + { + "epoch": 0.06448669201520912, + "grad_norm": 2.805852174758911, + "learning_rate": 2.7890625000000002e-05, + "loss": 0.2592926025390625, + "step": 477 + }, + { + "epoch": 0.0646218842416561, + "grad_norm": 7.339973449707031, + "learning_rate": 2.794921875e-05, + "loss": 0.303680419921875, + "step": 478 + }, + { + "epoch": 0.06475707646810308, + "grad_norm": 3.530911684036255, + "learning_rate": 2.80078125e-05, + "loss": 0.1755390167236328, + "step": 479 + }, + { + "epoch": 0.06489226869455006, + "grad_norm": 2.5858101844787598, + "learning_rate": 2.806640625e-05, + "loss": 0.2688274383544922, + "step": 480 + }, + { + "epoch": 0.06502746092099704, + "grad_norm": 4.0986175537109375, + "learning_rate": 2.8125e-05, + "loss": 0.1589217185974121, + "step": 481 + }, + { + "epoch": 0.06516265314744402, + "grad_norm": 3.4985320568084717, + "learning_rate": 2.818359375e-05, + "loss": 0.269622802734375, + "step": 482 + }, + { + "epoch": 0.065297845373891, + "grad_norm": 2.5402328968048096, + "learning_rate": 2.82421875e-05, + "loss": 0.22033214569091797, + "step": 483 + }, + { + "epoch": 0.06543303760033799, + "grad_norm": 2.327655076980591, + "learning_rate": 2.8300781250000002e-05, + "loss": 0.2842884063720703, + "step": 484 + }, + { + "epoch": 0.06556822982678497, + "grad_norm": 6.257692813873291, + "learning_rate": 2.8359375e-05, + "loss": 0.2590751647949219, + "step": 485 + }, + { + "epoch": 0.06570342205323194, + "grad_norm": 1.8788670301437378, + "learning_rate": 2.841796875e-05, + "loss": 0.2032928466796875, + "step": 486 + }, + { + "epoch": 0.06583861427967892, + "grad_norm": 2.2117254734039307, + "learning_rate": 2.8476562500000002e-05, + "loss": 0.28125762939453125, + "step": 487 + }, + { + "epoch": 0.0659738065061259, + "grad_norm": 3.816068410873413, + "learning_rate": 2.853515625e-05, + "loss": 0.2964038848876953, + "step": 488 + }, + { + "epoch": 0.06610899873257288, + "grad_norm": 4.036928653717041, + "learning_rate": 2.859375e-05, + "loss": 0.20896530151367188, + "step": 489 + }, + { + "epoch": 0.06624419095901986, + "grad_norm": 1.7956700325012207, + "learning_rate": 2.865234375e-05, + "loss": 0.23501205444335938, + "step": 490 + }, + { + "epoch": 0.06637938318546684, + "grad_norm": 3.9310989379882812, + "learning_rate": 2.87109375e-05, + "loss": 0.29599761962890625, + "step": 491 + }, + { + "epoch": 0.06651457541191381, + "grad_norm": 2.839219808578491, + "learning_rate": 2.876953125e-05, + "loss": 0.26711463928222656, + "step": 492 + }, + { + "epoch": 0.0666497676383608, + "grad_norm": 3.587273597717285, + "learning_rate": 2.8828125e-05, + "loss": 0.2360992431640625, + "step": 493 + }, + { + "epoch": 0.06678495986480777, + "grad_norm": 5.025626182556152, + "learning_rate": 2.8886718750000002e-05, + "loss": 0.2226734161376953, + "step": 494 + }, + { + "epoch": 0.06692015209125475, + "grad_norm": 2.0641028881073, + "learning_rate": 2.89453125e-05, + "loss": 0.2490673065185547, + "step": 495 + }, + { + "epoch": 0.06705534431770173, + "grad_norm": 3.0104970932006836, + "learning_rate": 2.900390625e-05, + "loss": 0.2042388916015625, + "step": 496 + }, + { + "epoch": 0.06719053654414871, + "grad_norm": 2.581287384033203, + "learning_rate": 2.90625e-05, + "loss": 0.2679271697998047, + "step": 497 + }, + { + "epoch": 0.06732572877059569, + "grad_norm": 1.3771032094955444, + "learning_rate": 2.912109375e-05, + "loss": 0.18079185485839844, + "step": 498 + }, + { + "epoch": 0.06746092099704266, + "grad_norm": 1.7775890827178955, + "learning_rate": 2.91796875e-05, + "loss": 0.22919654846191406, + "step": 499 + }, + { + "epoch": 0.06759611322348964, + "grad_norm": 6.642019271850586, + "learning_rate": 2.923828125e-05, + "loss": 0.22353172302246094, + "step": 500 + }, + { + "epoch": 0.06773130544993664, + "grad_norm": 3.651278495788574, + "learning_rate": 2.9296875000000002e-05, + "loss": 0.1730327606201172, + "step": 501 + }, + { + "epoch": 0.06786649767638361, + "grad_norm": 1.6108444929122925, + "learning_rate": 2.935546875e-05, + "loss": 0.19461822509765625, + "step": 502 + }, + { + "epoch": 0.06800168990283059, + "grad_norm": 3.256890296936035, + "learning_rate": 2.94140625e-05, + "loss": 0.18629169464111328, + "step": 503 + }, + { + "epoch": 0.06813688212927757, + "grad_norm": 2.7766642570495605, + "learning_rate": 2.9472656250000002e-05, + "loss": 0.20125579833984375, + "step": 504 + }, + { + "epoch": 0.06827207435572455, + "grad_norm": 5.880879878997803, + "learning_rate": 2.953125e-05, + "loss": 0.2665996551513672, + "step": 505 + }, + { + "epoch": 0.06840726658217153, + "grad_norm": 7.447145938873291, + "learning_rate": 2.958984375e-05, + "loss": 0.24422645568847656, + "step": 506 + }, + { + "epoch": 0.0685424588086185, + "grad_norm": 4.4957685470581055, + "learning_rate": 2.96484375e-05, + "loss": 0.1899728775024414, + "step": 507 + }, + { + "epoch": 0.06867765103506548, + "grad_norm": 2.0955448150634766, + "learning_rate": 2.970703125e-05, + "loss": 0.190673828125, + "step": 508 + }, + { + "epoch": 0.06881284326151246, + "grad_norm": 2.9720211029052734, + "learning_rate": 2.9765625e-05, + "loss": 0.19868850708007812, + "step": 509 + }, + { + "epoch": 0.06894803548795944, + "grad_norm": 1.5614224672317505, + "learning_rate": 2.982421875e-05, + "loss": 0.19950485229492188, + "step": 510 + }, + { + "epoch": 0.06908322771440642, + "grad_norm": 2.4292871952056885, + "learning_rate": 2.9882812500000002e-05, + "loss": 0.18830108642578125, + "step": 511 + }, + { + "epoch": 0.0692184199408534, + "grad_norm": 2.116597890853882, + "learning_rate": 2.994140625e-05, + "loss": 0.23431777954101562, + "step": 512 + }, + { + "epoch": 0.06935361216730038, + "grad_norm": 2.2066287994384766, + "learning_rate": 3e-05, + "loss": 0.2046375274658203, + "step": 513 + }, + { + "epoch": 0.06948880439374736, + "grad_norm": 1.9042032957077026, + "learning_rate": 2.9999998438460004e-05, + "loss": 0.2035846710205078, + "step": 514 + }, + { + "epoch": 0.06962399662019433, + "grad_norm": 2.0176074504852295, + "learning_rate": 2.9999993753840344e-05, + "loss": 0.17517471313476562, + "step": 515 + }, + { + "epoch": 0.06975918884664131, + "grad_norm": 1.8081135749816895, + "learning_rate": 2.9999985946141995e-05, + "loss": 0.22444915771484375, + "step": 516 + }, + { + "epoch": 0.06989438107308829, + "grad_norm": 1.5158302783966064, + "learning_rate": 2.9999975015366586e-05, + "loss": 0.20190048217773438, + "step": 517 + }, + { + "epoch": 0.07002957329953528, + "grad_norm": 5.406824111938477, + "learning_rate": 2.9999960961516384e-05, + "loss": 0.2318258285522461, + "step": 518 + }, + { + "epoch": 0.07016476552598226, + "grad_norm": 5.942788600921631, + "learning_rate": 2.9999943784594325e-05, + "loss": 0.21835899353027344, + "step": 519 + }, + { + "epoch": 0.07029995775242924, + "grad_norm": 5.351837158203125, + "learning_rate": 2.9999923484603975e-05, + "loss": 0.2577095031738281, + "step": 520 + }, + { + "epoch": 0.07043514997887622, + "grad_norm": 1.3302587270736694, + "learning_rate": 2.999990006154957e-05, + "loss": 0.2042369842529297, + "step": 521 + }, + { + "epoch": 0.0705703422053232, + "grad_norm": 2.843609571456909, + "learning_rate": 2.9999873515435977e-05, + "loss": 0.22577476501464844, + "step": 522 + }, + { + "epoch": 0.07070553443177018, + "grad_norm": 3.0564115047454834, + "learning_rate": 2.9999843846268735e-05, + "loss": 0.28678131103515625, + "step": 523 + }, + { + "epoch": 0.07084072665821715, + "grad_norm": 3.8802387714385986, + "learning_rate": 2.9999811054054018e-05, + "loss": 0.27037811279296875, + "step": 524 + }, + { + "epoch": 0.07097591888466413, + "grad_norm": 6.748092174530029, + "learning_rate": 2.9999775138798646e-05, + "loss": 0.2612476348876953, + "step": 525 + }, + { + "epoch": 0.07111111111111111, + "grad_norm": 1.852267861366272, + "learning_rate": 2.99997361005101e-05, + "loss": 0.22038650512695312, + "step": 526 + }, + { + "epoch": 0.07124630333755809, + "grad_norm": 2.9750607013702393, + "learning_rate": 2.9999693939196513e-05, + "loss": 0.22796630859375, + "step": 527 + }, + { + "epoch": 0.07138149556400507, + "grad_norm": 3.8122777938842773, + "learning_rate": 2.999964865486666e-05, + "loss": 0.17757225036621094, + "step": 528 + }, + { + "epoch": 0.07151668779045205, + "grad_norm": 4.453284740447998, + "learning_rate": 2.999960024752997e-05, + "loss": 0.31827449798583984, + "step": 529 + }, + { + "epoch": 0.07165188001689902, + "grad_norm": 3.6600072383880615, + "learning_rate": 2.9999548717196516e-05, + "loss": 0.32689666748046875, + "step": 530 + }, + { + "epoch": 0.071787072243346, + "grad_norm": 2.352653980255127, + "learning_rate": 2.999949406387703e-05, + "loss": 0.18181228637695312, + "step": 531 + }, + { + "epoch": 0.07192226446979298, + "grad_norm": 2.4650542736053467, + "learning_rate": 2.9999436287582903e-05, + "loss": 0.2533912658691406, + "step": 532 + }, + { + "epoch": 0.07205745669623996, + "grad_norm": 5.945155620574951, + "learning_rate": 2.9999375388326145e-05, + "loss": 0.27979278564453125, + "step": 533 + }, + { + "epoch": 0.07219264892268694, + "grad_norm": 1.88994562625885, + "learning_rate": 2.9999311366119447e-05, + "loss": 0.18726348876953125, + "step": 534 + }, + { + "epoch": 0.07232784114913393, + "grad_norm": 2.98053240776062, + "learning_rate": 2.9999244220976137e-05, + "loss": 0.302459716796875, + "step": 535 + }, + { + "epoch": 0.07246303337558091, + "grad_norm": 3.906646490097046, + "learning_rate": 2.9999173952910197e-05, + "loss": 0.27861785888671875, + "step": 536 + }, + { + "epoch": 0.07259822560202789, + "grad_norm": 4.483268737792969, + "learning_rate": 2.9999100561936252e-05, + "loss": 0.23635482788085938, + "step": 537 + }, + { + "epoch": 0.07273341782847487, + "grad_norm": 3.2484240531921387, + "learning_rate": 2.9999024048069585e-05, + "loss": 0.16626930236816406, + "step": 538 + }, + { + "epoch": 0.07286861005492185, + "grad_norm": 2.304311990737915, + "learning_rate": 2.9998944411326127e-05, + "loss": 0.21567249298095703, + "step": 539 + }, + { + "epoch": 0.07300380228136882, + "grad_norm": 5.122791767120361, + "learning_rate": 2.999886165172246e-05, + "loss": 0.25857067108154297, + "step": 540 + }, + { + "epoch": 0.0731389945078158, + "grad_norm": 1.861470103263855, + "learning_rate": 2.9998775769275814e-05, + "loss": 0.30873870849609375, + "step": 541 + }, + { + "epoch": 0.07327418673426278, + "grad_norm": 1.849015474319458, + "learning_rate": 2.9998686764004067e-05, + "loss": 0.19608020782470703, + "step": 542 + }, + { + "epoch": 0.07340937896070976, + "grad_norm": 2.7299294471740723, + "learning_rate": 2.9998594635925755e-05, + "loss": 0.19956016540527344, + "step": 543 + }, + { + "epoch": 0.07354457118715674, + "grad_norm": 1.5337392091751099, + "learning_rate": 2.999849938506005e-05, + "loss": 0.22431564331054688, + "step": 544 + }, + { + "epoch": 0.07367976341360372, + "grad_norm": 2.3284547328948975, + "learning_rate": 2.99984010114268e-05, + "loss": 0.23108291625976562, + "step": 545 + }, + { + "epoch": 0.0738149556400507, + "grad_norm": 3.4310529232025146, + "learning_rate": 2.9998299515046475e-05, + "loss": 0.1887073516845703, + "step": 546 + }, + { + "epoch": 0.07395014786649767, + "grad_norm": 2.7551848888397217, + "learning_rate": 2.9998194895940213e-05, + "loss": 0.26019287109375, + "step": 547 + }, + { + "epoch": 0.07408534009294465, + "grad_norm": 1.8915430307388306, + "learning_rate": 2.9998087154129792e-05, + "loss": 0.19549083709716797, + "step": 548 + }, + { + "epoch": 0.07422053231939163, + "grad_norm": 4.413297176361084, + "learning_rate": 2.9997976289637645e-05, + "loss": 0.23038673400878906, + "step": 549 + }, + { + "epoch": 0.07435572454583861, + "grad_norm": 3.587127447128296, + "learning_rate": 2.9997862302486855e-05, + "loss": 0.1690073013305664, + "step": 550 + }, + { + "epoch": 0.07449091677228559, + "grad_norm": 1.7189197540283203, + "learning_rate": 2.9997745192701153e-05, + "loss": 0.17391681671142578, + "step": 551 + }, + { + "epoch": 0.07462610899873258, + "grad_norm": 2.2448935508728027, + "learning_rate": 2.9997624960304926e-05, + "loss": 0.18960976600646973, + "step": 552 + }, + { + "epoch": 0.07476130122517956, + "grad_norm": 2.7006289958953857, + "learning_rate": 2.9997501605323214e-05, + "loss": 0.13849639892578125, + "step": 553 + }, + { + "epoch": 0.07489649345162654, + "grad_norm": 3.4401018619537354, + "learning_rate": 2.999737512778168e-05, + "loss": 0.22487258911132812, + "step": 554 + }, + { + "epoch": 0.07503168567807351, + "grad_norm": 2.79577898979187, + "learning_rate": 2.9997245527706674e-05, + "loss": 0.1993846893310547, + "step": 555 + }, + { + "epoch": 0.0751668779045205, + "grad_norm": 1.3946646451950073, + "learning_rate": 2.999711280512517e-05, + "loss": 0.15007591247558594, + "step": 556 + }, + { + "epoch": 0.07530207013096747, + "grad_norm": 10.264063835144043, + "learning_rate": 2.9996976960064807e-05, + "loss": 0.2519207000732422, + "step": 557 + }, + { + "epoch": 0.07543726235741445, + "grad_norm": 10.854535102844238, + "learning_rate": 2.999683799255387e-05, + "loss": 0.2876319885253906, + "step": 558 + }, + { + "epoch": 0.07557245458386143, + "grad_norm": 4.031068801879883, + "learning_rate": 2.999669590262129e-05, + "loss": 0.22539710998535156, + "step": 559 + }, + { + "epoch": 0.07570764681030841, + "grad_norm": 2.927306890487671, + "learning_rate": 2.999655069029665e-05, + "loss": 0.21682333946228027, + "step": 560 + }, + { + "epoch": 0.07584283903675539, + "grad_norm": 2.416666269302368, + "learning_rate": 2.9996402355610183e-05, + "loss": 0.2017526626586914, + "step": 561 + }, + { + "epoch": 0.07597803126320236, + "grad_norm": 3.3551390171051025, + "learning_rate": 2.9996250898592777e-05, + "loss": 0.325958251953125, + "step": 562 + }, + { + "epoch": 0.07611322348964934, + "grad_norm": 1.9157609939575195, + "learning_rate": 2.9996096319275962e-05, + "loss": 0.24203109741210938, + "step": 563 + }, + { + "epoch": 0.07624841571609632, + "grad_norm": 4.674991607666016, + "learning_rate": 2.9995938617691925e-05, + "loss": 0.2591667175292969, + "step": 564 + }, + { + "epoch": 0.0763836079425433, + "grad_norm": 2.6372244358062744, + "learning_rate": 2.9995777793873504e-05, + "loss": 0.20122432708740234, + "step": 565 + }, + { + "epoch": 0.07651880016899028, + "grad_norm": 2.5040884017944336, + "learning_rate": 2.9995613847854176e-05, + "loss": 0.2384195327758789, + "step": 566 + }, + { + "epoch": 0.07665399239543726, + "grad_norm": 4.281469821929932, + "learning_rate": 2.9995446779668078e-05, + "loss": 0.19385933876037598, + "step": 567 + }, + { + "epoch": 0.07678918462188423, + "grad_norm": 5.763937473297119, + "learning_rate": 2.9995276589349992e-05, + "loss": 0.24071502685546875, + "step": 568 + }, + { + "epoch": 0.07692437684833123, + "grad_norm": 4.173258304595947, + "learning_rate": 2.9995103276935357e-05, + "loss": 0.23386859893798828, + "step": 569 + }, + { + "epoch": 0.0770595690747782, + "grad_norm": 7.034768104553223, + "learning_rate": 2.9994926842460258e-05, + "loss": 0.2025146484375, + "step": 570 + }, + { + "epoch": 0.07719476130122518, + "grad_norm": 7.723634243011475, + "learning_rate": 2.9994747285961428e-05, + "loss": 0.3099212646484375, + "step": 571 + }, + { + "epoch": 0.07732995352767216, + "grad_norm": 4.624016761779785, + "learning_rate": 2.9994564607476255e-05, + "loss": 0.18726730346679688, + "step": 572 + }, + { + "epoch": 0.07746514575411914, + "grad_norm": 2.3658721446990967, + "learning_rate": 2.9994378807042762e-05, + "loss": 0.20200729370117188, + "step": 573 + }, + { + "epoch": 0.07760033798056612, + "grad_norm": 3.864985942840576, + "learning_rate": 2.9994189884699647e-05, + "loss": 0.2131948471069336, + "step": 574 + }, + { + "epoch": 0.0777355302070131, + "grad_norm": 3.0287411212921143, + "learning_rate": 2.9993997840486233e-05, + "loss": 0.226959228515625, + "step": 575 + }, + { + "epoch": 0.07787072243346008, + "grad_norm": 4.788344383239746, + "learning_rate": 2.9993802674442516e-05, + "loss": 0.2349262237548828, + "step": 576 + }, + { + "epoch": 0.07800591465990706, + "grad_norm": 2.301671266555786, + "learning_rate": 2.999360438660913e-05, + "loss": 0.2593231201171875, + "step": 577 + }, + { + "epoch": 0.07814110688635403, + "grad_norm": 3.565685272216797, + "learning_rate": 2.9993402977027346e-05, + "loss": 0.2610816955566406, + "step": 578 + }, + { + "epoch": 0.07827629911280101, + "grad_norm": 3.00917649269104, + "learning_rate": 2.999319844573911e-05, + "loss": 0.2576713562011719, + "step": 579 + }, + { + "epoch": 0.07841149133924799, + "grad_norm": 4.8254265785217285, + "learning_rate": 2.9992990792787007e-05, + "loss": 0.22041988372802734, + "step": 580 + }, + { + "epoch": 0.07854668356569497, + "grad_norm": 4.8675923347473145, + "learning_rate": 2.999278001821427e-05, + "loss": 0.24315643310546875, + "step": 581 + }, + { + "epoch": 0.07868187579214195, + "grad_norm": 2.737541675567627, + "learning_rate": 2.9992566122064775e-05, + "loss": 0.2404327392578125, + "step": 582 + }, + { + "epoch": 0.07881706801858893, + "grad_norm": 1.5600872039794922, + "learning_rate": 2.999234910438307e-05, + "loss": 0.17913246154785156, + "step": 583 + }, + { + "epoch": 0.0789522602450359, + "grad_norm": 5.774623870849609, + "learning_rate": 2.999212896521433e-05, + "loss": 0.24062418937683105, + "step": 584 + }, + { + "epoch": 0.07908745247148288, + "grad_norm": 9.099496841430664, + "learning_rate": 2.999190570460439e-05, + "loss": 0.25443267822265625, + "step": 585 + }, + { + "epoch": 0.07922264469792988, + "grad_norm": 4.413066387176514, + "learning_rate": 2.9991679322599734e-05, + "loss": 0.15851974487304688, + "step": 586 + }, + { + "epoch": 0.07935783692437685, + "grad_norm": 2.0952835083007812, + "learning_rate": 2.9991449819247505e-05, + "loss": 0.2638568878173828, + "step": 587 + }, + { + "epoch": 0.07949302915082383, + "grad_norm": 1.587292194366455, + "learning_rate": 2.9991217194595474e-05, + "loss": 0.1701498031616211, + "step": 588 + }, + { + "epoch": 0.07962822137727081, + "grad_norm": 11.579439163208008, + "learning_rate": 2.9990981448692078e-05, + "loss": 0.363983154296875, + "step": 589 + }, + { + "epoch": 0.07976341360371779, + "grad_norm": 4.696298599243164, + "learning_rate": 2.999074258158641e-05, + "loss": 0.20482683181762695, + "step": 590 + }, + { + "epoch": 0.07989860583016477, + "grad_norm": 1.8145850896835327, + "learning_rate": 2.9990500593328192e-05, + "loss": 0.19809770584106445, + "step": 591 + }, + { + "epoch": 0.08003379805661175, + "grad_norm": 3.4981093406677246, + "learning_rate": 2.999025548396781e-05, + "loss": 0.18554019927978516, + "step": 592 + }, + { + "epoch": 0.08016899028305872, + "grad_norm": 1.5671679973602295, + "learning_rate": 2.9990007253556302e-05, + "loss": 0.2060699462890625, + "step": 593 + }, + { + "epoch": 0.0803041825095057, + "grad_norm": 1.588470220565796, + "learning_rate": 2.9989755902145345e-05, + "loss": 0.22665834426879883, + "step": 594 + }, + { + "epoch": 0.08043937473595268, + "grad_norm": 2.0635406970977783, + "learning_rate": 2.9989501429787273e-05, + "loss": 0.20351028442382812, + "step": 595 + }, + { + "epoch": 0.08057456696239966, + "grad_norm": 3.4679715633392334, + "learning_rate": 2.9989243836535073e-05, + "loss": 0.22932195663452148, + "step": 596 + }, + { + "epoch": 0.08070975918884664, + "grad_norm": 2.1162478923797607, + "learning_rate": 2.998898312244237e-05, + "loss": 0.22040939331054688, + "step": 597 + }, + { + "epoch": 0.08084495141529362, + "grad_norm": 1.7832661867141724, + "learning_rate": 2.9988719287563452e-05, + "loss": 0.184783935546875, + "step": 598 + }, + { + "epoch": 0.0809801436417406, + "grad_norm": 1.2377979755401611, + "learning_rate": 2.998845233195325e-05, + "loss": 0.2354755401611328, + "step": 599 + }, + { + "epoch": 0.08111533586818757, + "grad_norm": 4.187588691711426, + "learning_rate": 2.998818225566734e-05, + "loss": 0.2707843780517578, + "step": 600 + }, + { + "epoch": 0.08125052809463455, + "grad_norm": 5.3939385414123535, + "learning_rate": 2.998790905876196e-05, + "loss": 0.29697132110595703, + "step": 601 + }, + { + "epoch": 0.08138572032108153, + "grad_norm": 3.264021873474121, + "learning_rate": 2.9987632741293987e-05, + "loss": 0.2369537353515625, + "step": 602 + }, + { + "epoch": 0.08152091254752852, + "grad_norm": 1.913804531097412, + "learning_rate": 2.998735330332096e-05, + "loss": 0.2901735305786133, + "step": 603 + }, + { + "epoch": 0.0816561047739755, + "grad_norm": 6.600580215454102, + "learning_rate": 2.9987070744901046e-05, + "loss": 0.28864288330078125, + "step": 604 + }, + { + "epoch": 0.08179129700042248, + "grad_norm": 8.056124687194824, + "learning_rate": 2.9986785066093084e-05, + "loss": 0.2801971435546875, + "step": 605 + }, + { + "epoch": 0.08192648922686946, + "grad_norm": 2.676353693008423, + "learning_rate": 2.9986496266956556e-05, + "loss": 0.17203235626220703, + "step": 606 + }, + { + "epoch": 0.08206168145331644, + "grad_norm": 1.5418872833251953, + "learning_rate": 2.9986204347551583e-05, + "loss": 0.2698516845703125, + "step": 607 + }, + { + "epoch": 0.08219687367976342, + "grad_norm": 5.122815132141113, + "learning_rate": 2.9985909307938948e-05, + "loss": 0.19475841522216797, + "step": 608 + }, + { + "epoch": 0.0823320659062104, + "grad_norm": 9.323260307312012, + "learning_rate": 2.9985611148180082e-05, + "loss": 0.27602052688598633, + "step": 609 + }, + { + "epoch": 0.08246725813265737, + "grad_norm": 5.362401485443115, + "learning_rate": 2.9985309868337063e-05, + "loss": 0.20574188232421875, + "step": 610 + }, + { + "epoch": 0.08260245035910435, + "grad_norm": 3.271744966506958, + "learning_rate": 2.9985005468472617e-05, + "loss": 0.2656221389770508, + "step": 611 + }, + { + "epoch": 0.08273764258555133, + "grad_norm": 3.040532112121582, + "learning_rate": 2.9984697948650124e-05, + "loss": 0.2406768798828125, + "step": 612 + }, + { + "epoch": 0.08287283481199831, + "grad_norm": 5.9468607902526855, + "learning_rate": 2.998438730893361e-05, + "loss": 0.3377494812011719, + "step": 613 + }, + { + "epoch": 0.08300802703844529, + "grad_norm": 7.20884895324707, + "learning_rate": 2.9984073549387747e-05, + "loss": 0.23948097229003906, + "step": 614 + }, + { + "epoch": 0.08314321926489227, + "grad_norm": 6.783653736114502, + "learning_rate": 2.998375667007787e-05, + "loss": 0.28171539306640625, + "step": 615 + }, + { + "epoch": 0.08327841149133924, + "grad_norm": 3.8011462688446045, + "learning_rate": 2.998343667106995e-05, + "loss": 0.22377777099609375, + "step": 616 + }, + { + "epoch": 0.08341360371778622, + "grad_norm": 3.0447449684143066, + "learning_rate": 2.9983113552430616e-05, + "loss": 0.2240753173828125, + "step": 617 + }, + { + "epoch": 0.0835487959442332, + "grad_norm": 3.1536710262298584, + "learning_rate": 2.9982787314227134e-05, + "loss": 0.2377605438232422, + "step": 618 + }, + { + "epoch": 0.08368398817068018, + "grad_norm": 1.3408907651901245, + "learning_rate": 2.998245795652744e-05, + "loss": 0.1878509521484375, + "step": 619 + }, + { + "epoch": 0.08381918039712717, + "grad_norm": 3.2851474285125732, + "learning_rate": 2.9982125479400106e-05, + "loss": 0.2526235580444336, + "step": 620 + }, + { + "epoch": 0.08395437262357415, + "grad_norm": 2.2513623237609863, + "learning_rate": 2.9981789882914352e-05, + "loss": 0.1652812957763672, + "step": 621 + }, + { + "epoch": 0.08408956485002113, + "grad_norm": 1.2861738204956055, + "learning_rate": 2.9981451167140048e-05, + "loss": 0.19992446899414062, + "step": 622 + }, + { + "epoch": 0.08422475707646811, + "grad_norm": 1.907787561416626, + "learning_rate": 2.9981109332147722e-05, + "loss": 0.17819786071777344, + "step": 623 + }, + { + "epoch": 0.08435994930291509, + "grad_norm": 2.4429855346679688, + "learning_rate": 2.9980764378008545e-05, + "loss": 0.19518470764160156, + "step": 624 + }, + { + "epoch": 0.08449514152936206, + "grad_norm": 1.650206446647644, + "learning_rate": 2.9980416304794332e-05, + "loss": 0.212799072265625, + "step": 625 + }, + { + "epoch": 0.08463033375580904, + "grad_norm": 2.758831262588501, + "learning_rate": 2.9980065112577565e-05, + "loss": 0.23705673217773438, + "step": 626 + }, + { + "epoch": 0.08476552598225602, + "grad_norm": 2.2661426067352295, + "learning_rate": 2.9979710801431357e-05, + "loss": 0.25582122802734375, + "step": 627 + }, + { + "epoch": 0.084900718208703, + "grad_norm": 3.0475738048553467, + "learning_rate": 2.997935337142948e-05, + "loss": 0.24195194244384766, + "step": 628 + }, + { + "epoch": 0.08503591043514998, + "grad_norm": 3.1204683780670166, + "learning_rate": 2.9978992822646347e-05, + "loss": 0.23675918579101562, + "step": 629 + }, + { + "epoch": 0.08517110266159696, + "grad_norm": 3.261786699295044, + "learning_rate": 2.9978629155157036e-05, + "loss": 0.19226837158203125, + "step": 630 + }, + { + "epoch": 0.08530629488804393, + "grad_norm": 2.0601446628570557, + "learning_rate": 2.9978262369037252e-05, + "loss": 0.23221588134765625, + "step": 631 + }, + { + "epoch": 0.08544148711449091, + "grad_norm": 2.870387554168701, + "learning_rate": 2.9977892464363375e-05, + "loss": 0.23838233947753906, + "step": 632 + }, + { + "epoch": 0.08557667934093789, + "grad_norm": 2.2026076316833496, + "learning_rate": 2.9977519441212412e-05, + "loss": 0.2767181396484375, + "step": 633 + }, + { + "epoch": 0.08571187156738487, + "grad_norm": 2.9643681049346924, + "learning_rate": 2.9977143299662034e-05, + "loss": 0.22817230224609375, + "step": 634 + }, + { + "epoch": 0.08584706379383185, + "grad_norm": 3.964478015899658, + "learning_rate": 2.997676403979055e-05, + "loss": 0.3262901306152344, + "step": 635 + }, + { + "epoch": 0.08598225602027883, + "grad_norm": 2.877920150756836, + "learning_rate": 2.997638166167693e-05, + "loss": 0.2660694122314453, + "step": 636 + }, + { + "epoch": 0.08611744824672582, + "grad_norm": 1.5161670446395874, + "learning_rate": 2.9975996165400786e-05, + "loss": 0.1931772232055664, + "step": 637 + }, + { + "epoch": 0.0862526404731728, + "grad_norm": 0.940235435962677, + "learning_rate": 2.9975607551042373e-05, + "loss": 0.18752288818359375, + "step": 638 + }, + { + "epoch": 0.08638783269961978, + "grad_norm": 3.5364956855773926, + "learning_rate": 2.9975215818682607e-05, + "loss": 0.2422189712524414, + "step": 639 + }, + { + "epoch": 0.08652302492606675, + "grad_norm": 3.902735471725464, + "learning_rate": 2.9974820968403056e-05, + "loss": 0.31037139892578125, + "step": 640 + }, + { + "epoch": 0.08665821715251373, + "grad_norm": 3.035266637802124, + "learning_rate": 2.9974423000285923e-05, + "loss": 0.2532157897949219, + "step": 641 + }, + { + "epoch": 0.08679340937896071, + "grad_norm": 2.4688339233398438, + "learning_rate": 2.9974021914414068e-05, + "loss": 0.19481420516967773, + "step": 642 + }, + { + "epoch": 0.08692860160540769, + "grad_norm": 2.674933671951294, + "learning_rate": 2.9973617710871e-05, + "loss": 0.2714195251464844, + "step": 643 + }, + { + "epoch": 0.08706379383185467, + "grad_norm": 2.497926712036133, + "learning_rate": 2.997321038974087e-05, + "loss": 0.21243762969970703, + "step": 644 + }, + { + "epoch": 0.08719898605830165, + "grad_norm": 2.77506422996521, + "learning_rate": 2.997279995110849e-05, + "loss": 0.21555233001708984, + "step": 645 + }, + { + "epoch": 0.08733417828474863, + "grad_norm": 5.690005779266357, + "learning_rate": 2.997238639505932e-05, + "loss": 0.2408885955810547, + "step": 646 + }, + { + "epoch": 0.0874693705111956, + "grad_norm": 1.117785096168518, + "learning_rate": 2.997196972167946e-05, + "loss": 0.2142963409423828, + "step": 647 + }, + { + "epoch": 0.08760456273764258, + "grad_norm": 5.270402908325195, + "learning_rate": 2.9971549931055665e-05, + "loss": 0.2450122833251953, + "step": 648 + }, + { + "epoch": 0.08773975496408956, + "grad_norm": 1.8804930448532104, + "learning_rate": 2.997112702327533e-05, + "loss": 0.1765308380126953, + "step": 649 + }, + { + "epoch": 0.08787494719053654, + "grad_norm": 1.0243396759033203, + "learning_rate": 2.9970700998426518e-05, + "loss": 0.1481466293334961, + "step": 650 + }, + { + "epoch": 0.08801013941698352, + "grad_norm": 2.6848249435424805, + "learning_rate": 2.9970271856597925e-05, + "loss": 0.24842071533203125, + "step": 651 + }, + { + "epoch": 0.0881453316434305, + "grad_norm": 4.168036937713623, + "learning_rate": 2.9969839597878896e-05, + "loss": 0.1752300262451172, + "step": 652 + }, + { + "epoch": 0.08828052386987748, + "grad_norm": 1.4729108810424805, + "learning_rate": 2.9969404222359436e-05, + "loss": 0.1951122283935547, + "step": 653 + }, + { + "epoch": 0.08841571609632447, + "grad_norm": 3.0390238761901855, + "learning_rate": 2.9968965730130188e-05, + "loss": 0.21633243560791016, + "step": 654 + }, + { + "epoch": 0.08855090832277145, + "grad_norm": 2.2606661319732666, + "learning_rate": 2.9968524121282455e-05, + "loss": 0.23235511779785156, + "step": 655 + }, + { + "epoch": 0.08868610054921842, + "grad_norm": 3.079070806503296, + "learning_rate": 2.9968079395908178e-05, + "loss": 0.16539764404296875, + "step": 656 + }, + { + "epoch": 0.0888212927756654, + "grad_norm": 4.1072282791137695, + "learning_rate": 2.9967631554099947e-05, + "loss": 0.17200088500976562, + "step": 657 + }, + { + "epoch": 0.08895648500211238, + "grad_norm": 1.945708155632019, + "learning_rate": 2.996718059595101e-05, + "loss": 0.2137300968170166, + "step": 658 + }, + { + "epoch": 0.08909167722855936, + "grad_norm": 2.9191908836364746, + "learning_rate": 2.9966726521555265e-05, + "loss": 0.214569091796875, + "step": 659 + }, + { + "epoch": 0.08922686945500634, + "grad_norm": 1.7798532247543335, + "learning_rate": 2.996626933100724e-05, + "loss": 0.24387741088867188, + "step": 660 + }, + { + "epoch": 0.08936206168145332, + "grad_norm": 5.730529308319092, + "learning_rate": 2.996580902440213e-05, + "loss": 0.2046966552734375, + "step": 661 + }, + { + "epoch": 0.0894972539079003, + "grad_norm": 4.43034553527832, + "learning_rate": 2.9965345601835773e-05, + "loss": 0.2306232452392578, + "step": 662 + }, + { + "epoch": 0.08963244613434727, + "grad_norm": 4.346843719482422, + "learning_rate": 2.996487906340466e-05, + "loss": 0.21460723876953125, + "step": 663 + }, + { + "epoch": 0.08976763836079425, + "grad_norm": 2.161485433578491, + "learning_rate": 2.996440940920592e-05, + "loss": 0.1844959259033203, + "step": 664 + }, + { + "epoch": 0.08990283058724123, + "grad_norm": 2.3417811393737793, + "learning_rate": 2.996393663933735e-05, + "loss": 0.23455810546875, + "step": 665 + }, + { + "epoch": 0.09003802281368821, + "grad_norm": 5.569293022155762, + "learning_rate": 2.9963460753897364e-05, + "loss": 0.3124427795410156, + "step": 666 + }, + { + "epoch": 0.09017321504013519, + "grad_norm": 5.964304447174072, + "learning_rate": 2.996298175298506e-05, + "loss": 0.23615455627441406, + "step": 667 + }, + { + "epoch": 0.09030840726658217, + "grad_norm": 3.099236488342285, + "learning_rate": 2.996249963670016e-05, + "loss": 0.2277660369873047, + "step": 668 + }, + { + "epoch": 0.09044359949302914, + "grad_norm": 2.5078465938568115, + "learning_rate": 2.9962014405143042e-05, + "loss": 0.24589157104492188, + "step": 669 + }, + { + "epoch": 0.09057879171947612, + "grad_norm": 2.3147389888763428, + "learning_rate": 2.9961526058414745e-05, + "loss": 0.22175121307373047, + "step": 670 + }, + { + "epoch": 0.09071398394592312, + "grad_norm": 2.0527143478393555, + "learning_rate": 2.9961034596616936e-05, + "loss": 0.24064350128173828, + "step": 671 + }, + { + "epoch": 0.0908491761723701, + "grad_norm": 0.8992170095443726, + "learning_rate": 2.996054001985194e-05, + "loss": 0.11851060390472412, + "step": 672 + }, + { + "epoch": 0.09098436839881707, + "grad_norm": 2.15411376953125, + "learning_rate": 2.9960042328222732e-05, + "loss": 0.21610164642333984, + "step": 673 + }, + { + "epoch": 0.09111956062526405, + "grad_norm": 3.516059160232544, + "learning_rate": 2.995954152183294e-05, + "loss": 0.1685779094696045, + "step": 674 + }, + { + "epoch": 0.09125475285171103, + "grad_norm": 3.6010234355926514, + "learning_rate": 2.9959037600786822e-05, + "loss": 0.2592010498046875, + "step": 675 + }, + { + "epoch": 0.09138994507815801, + "grad_norm": 1.9942626953125, + "learning_rate": 2.9958530565189307e-05, + "loss": 0.22705411911010742, + "step": 676 + }, + { + "epoch": 0.09152513730460499, + "grad_norm": 3.967087745666504, + "learning_rate": 2.995802041514596e-05, + "loss": 0.22382259368896484, + "step": 677 + }, + { + "epoch": 0.09166032953105197, + "grad_norm": 5.022583484649658, + "learning_rate": 2.9957507150762996e-05, + "loss": 0.21911907196044922, + "step": 678 + }, + { + "epoch": 0.09179552175749894, + "grad_norm": 7.530004501342773, + "learning_rate": 2.9956990772147283e-05, + "loss": 0.23216629028320312, + "step": 679 + }, + { + "epoch": 0.09193071398394592, + "grad_norm": 4.901467800140381, + "learning_rate": 2.9956471279406324e-05, + "loss": 0.24022865295410156, + "step": 680 + }, + { + "epoch": 0.0920659062103929, + "grad_norm": 1.4078967571258545, + "learning_rate": 2.9955948672648298e-05, + "loss": 0.20527267456054688, + "step": 681 + }, + { + "epoch": 0.09220109843683988, + "grad_norm": 2.4498071670532227, + "learning_rate": 2.9955422951981994e-05, + "loss": 0.20481491088867188, + "step": 682 + }, + { + "epoch": 0.09233629066328686, + "grad_norm": 1.6855629682540894, + "learning_rate": 2.995489411751688e-05, + "loss": 0.22455883026123047, + "step": 683 + }, + { + "epoch": 0.09247148288973384, + "grad_norm": 2.1756672859191895, + "learning_rate": 2.9954362169363064e-05, + "loss": 0.32009124755859375, + "step": 684 + }, + { + "epoch": 0.09260667511618081, + "grad_norm": 3.7600691318511963, + "learning_rate": 2.99538271076313e-05, + "loss": 0.22552490234375, + "step": 685 + }, + { + "epoch": 0.09274186734262779, + "grad_norm": 1.720667839050293, + "learning_rate": 2.9953288932432985e-05, + "loss": 0.17055988311767578, + "step": 686 + }, + { + "epoch": 0.09287705956907477, + "grad_norm": 1.6106798648834229, + "learning_rate": 2.995274764388018e-05, + "loss": 0.20691156387329102, + "step": 687 + }, + { + "epoch": 0.09301225179552176, + "grad_norm": 2.795907735824585, + "learning_rate": 2.9952203242085566e-05, + "loss": 0.20398902893066406, + "step": 688 + }, + { + "epoch": 0.09314744402196874, + "grad_norm": 1.6927968263626099, + "learning_rate": 2.995165572716251e-05, + "loss": 0.207000732421875, + "step": 689 + }, + { + "epoch": 0.09328263624841572, + "grad_norm": 1.1094919443130493, + "learning_rate": 2.9951105099225003e-05, + "loss": 0.1868276596069336, + "step": 690 + }, + { + "epoch": 0.0934178284748627, + "grad_norm": 2.6226561069488525, + "learning_rate": 2.995055135838768e-05, + "loss": 0.20621681213378906, + "step": 691 + }, + { + "epoch": 0.09355302070130968, + "grad_norm": 4.05411434173584, + "learning_rate": 2.994999450476584e-05, + "loss": 0.2272930145263672, + "step": 692 + }, + { + "epoch": 0.09368821292775666, + "grad_norm": 1.8267302513122559, + "learning_rate": 2.9949434538475414e-05, + "loss": 0.231536865234375, + "step": 693 + }, + { + "epoch": 0.09382340515420363, + "grad_norm": 2.947174310684204, + "learning_rate": 2.9948871459633008e-05, + "loss": 0.20539474487304688, + "step": 694 + }, + { + "epoch": 0.09395859738065061, + "grad_norm": 2.799992799758911, + "learning_rate": 2.994830526835584e-05, + "loss": 0.24158477783203125, + "step": 695 + }, + { + "epoch": 0.09409378960709759, + "grad_norm": 2.7172555923461914, + "learning_rate": 2.9947735964761803e-05, + "loss": 0.24485206604003906, + "step": 696 + }, + { + "epoch": 0.09422898183354457, + "grad_norm": 1.82590913772583, + "learning_rate": 2.9947163548969428e-05, + "loss": 0.2465667724609375, + "step": 697 + }, + { + "epoch": 0.09436417405999155, + "grad_norm": 1.8396632671356201, + "learning_rate": 2.9946588021097893e-05, + "loss": 0.15935707092285156, + "step": 698 + }, + { + "epoch": 0.09449936628643853, + "grad_norm": 2.0061380863189697, + "learning_rate": 2.9946009381267028e-05, + "loss": 0.12570571899414062, + "step": 699 + }, + { + "epoch": 0.0946345585128855, + "grad_norm": 1.2996466159820557, + "learning_rate": 2.9945427629597306e-05, + "loss": 0.1744217872619629, + "step": 700 + }, + { + "epoch": 0.09476975073933248, + "grad_norm": 2.234083414077759, + "learning_rate": 2.9944842766209853e-05, + "loss": 0.20143508911132812, + "step": 701 + }, + { + "epoch": 0.09490494296577946, + "grad_norm": 2.381791830062866, + "learning_rate": 2.9944254791226444e-05, + "loss": 0.24639129638671875, + "step": 702 + }, + { + "epoch": 0.09504013519222644, + "grad_norm": 4.267126560211182, + "learning_rate": 2.994366370476949e-05, + "loss": 0.21299982070922852, + "step": 703 + }, + { + "epoch": 0.09517532741867342, + "grad_norm": 3.35009765625, + "learning_rate": 2.9943069506962067e-05, + "loss": 0.24997329711914062, + "step": 704 + }, + { + "epoch": 0.09531051964512041, + "grad_norm": 4.331963539123535, + "learning_rate": 2.9942472197927886e-05, + "loss": 0.19637107849121094, + "step": 705 + }, + { + "epoch": 0.09544571187156739, + "grad_norm": 4.351712226867676, + "learning_rate": 2.994187177779131e-05, + "loss": 0.2784843444824219, + "step": 706 + }, + { + "epoch": 0.09558090409801437, + "grad_norm": 3.0953354835510254, + "learning_rate": 2.9941268246677353e-05, + "loss": 0.21401596069335938, + "step": 707 + }, + { + "epoch": 0.09571609632446135, + "grad_norm": 3.9907052516937256, + "learning_rate": 2.9940661604711664e-05, + "loss": 0.199676513671875, + "step": 708 + }, + { + "epoch": 0.09585128855090833, + "grad_norm": 1.653907299041748, + "learning_rate": 2.994005185202056e-05, + "loss": 0.19843292236328125, + "step": 709 + }, + { + "epoch": 0.0959864807773553, + "grad_norm": 1.2830501794815063, + "learning_rate": 2.9939438988730986e-05, + "loss": 0.20074748992919922, + "step": 710 + }, + { + "epoch": 0.09612167300380228, + "grad_norm": 7.845682144165039, + "learning_rate": 2.9938823014970553e-05, + "loss": 0.25787353515625, + "step": 711 + }, + { + "epoch": 0.09625686523024926, + "grad_norm": 7.704529762268066, + "learning_rate": 2.99382039308675e-05, + "loss": 0.21416091918945312, + "step": 712 + }, + { + "epoch": 0.09639205745669624, + "grad_norm": 12.752790451049805, + "learning_rate": 2.993758173655073e-05, + "loss": 0.3880424499511719, + "step": 713 + }, + { + "epoch": 0.09652724968314322, + "grad_norm": 3.0337939262390137, + "learning_rate": 2.993695643214979e-05, + "loss": 0.19612884521484375, + "step": 714 + }, + { + "epoch": 0.0966624419095902, + "grad_norm": 3.417722463607788, + "learning_rate": 2.9936328017794864e-05, + "loss": 0.2630596160888672, + "step": 715 + }, + { + "epoch": 0.09679763413603718, + "grad_norm": 4.16177225112915, + "learning_rate": 2.9935696493616796e-05, + "loss": 0.219146728515625, + "step": 716 + }, + { + "epoch": 0.09693282636248415, + "grad_norm": 4.165134906768799, + "learning_rate": 2.9935061859747065e-05, + "loss": 0.18430137634277344, + "step": 717 + }, + { + "epoch": 0.09706801858893113, + "grad_norm": 5.327160835266113, + "learning_rate": 2.993442411631782e-05, + "loss": 0.210662841796875, + "step": 718 + }, + { + "epoch": 0.09720321081537811, + "grad_norm": 2.605717182159424, + "learning_rate": 2.9933783263461827e-05, + "loss": 0.24525070190429688, + "step": 719 + }, + { + "epoch": 0.09733840304182509, + "grad_norm": 3.0118792057037354, + "learning_rate": 2.9933139301312526e-05, + "loss": 0.26732635498046875, + "step": 720 + }, + { + "epoch": 0.09747359526827207, + "grad_norm": 2.1598966121673584, + "learning_rate": 2.9932492230003984e-05, + "loss": 0.21266746520996094, + "step": 721 + }, + { + "epoch": 0.09760878749471906, + "grad_norm": 1.9449892044067383, + "learning_rate": 2.993184204967094e-05, + "loss": 0.17081451416015625, + "step": 722 + }, + { + "epoch": 0.09774397972116604, + "grad_norm": 3.7190966606140137, + "learning_rate": 2.9931188760448748e-05, + "loss": 0.19688892364501953, + "step": 723 + }, + { + "epoch": 0.09787917194761302, + "grad_norm": 3.5418851375579834, + "learning_rate": 2.9930532362473433e-05, + "loss": 0.20413970947265625, + "step": 724 + }, + { + "epoch": 0.09801436417406, + "grad_norm": 1.8002017736434937, + "learning_rate": 2.9929872855881663e-05, + "loss": 0.19686412811279297, + "step": 725 + }, + { + "epoch": 0.09814955640050697, + "grad_norm": 1.354059100151062, + "learning_rate": 2.9929210240810744e-05, + "loss": 0.23836898803710938, + "step": 726 + }, + { + "epoch": 0.09828474862695395, + "grad_norm": 2.2063252925872803, + "learning_rate": 2.9928544517398644e-05, + "loss": 0.1832256317138672, + "step": 727 + }, + { + "epoch": 0.09841994085340093, + "grad_norm": 2.802690267562866, + "learning_rate": 2.9927875685783966e-05, + "loss": 0.18004226684570312, + "step": 728 + }, + { + "epoch": 0.09855513307984791, + "grad_norm": 1.7790663242340088, + "learning_rate": 2.9927203746105968e-05, + "loss": 0.2653388977050781, + "step": 729 + }, + { + "epoch": 0.09869032530629489, + "grad_norm": 3.0444061756134033, + "learning_rate": 2.9926528698504546e-05, + "loss": 0.2430095672607422, + "step": 730 + }, + { + "epoch": 0.09882551753274187, + "grad_norm": 1.303714632987976, + "learning_rate": 2.992585054312025e-05, + "loss": 0.23905372619628906, + "step": 731 + }, + { + "epoch": 0.09896070975918884, + "grad_norm": 2.3533811569213867, + "learning_rate": 2.9925169280094278e-05, + "loss": 0.23558425903320312, + "step": 732 + }, + { + "epoch": 0.09909590198563582, + "grad_norm": 1.462594985961914, + "learning_rate": 2.9924484909568472e-05, + "loss": 0.13215255737304688, + "step": 733 + }, + { + "epoch": 0.0992310942120828, + "grad_norm": 2.914008140563965, + "learning_rate": 2.9923797431685322e-05, + "loss": 0.24617767333984375, + "step": 734 + }, + { + "epoch": 0.09936628643852978, + "grad_norm": 2.204941511154175, + "learning_rate": 2.992310684658796e-05, + "loss": 0.1777210235595703, + "step": 735 + }, + { + "epoch": 0.09950147866497676, + "grad_norm": 5.732989311218262, + "learning_rate": 2.9922413154420173e-05, + "loss": 0.23714733123779297, + "step": 736 + }, + { + "epoch": 0.09963667089142374, + "grad_norm": 2.6614887714385986, + "learning_rate": 2.9921716355326393e-05, + "loss": 0.2075042724609375, + "step": 737 + }, + { + "epoch": 0.09977186311787073, + "grad_norm": 5.0292487144470215, + "learning_rate": 2.9921016449451695e-05, + "loss": 0.21545982360839844, + "step": 738 + }, + { + "epoch": 0.09990705534431771, + "grad_norm": 2.069394111633301, + "learning_rate": 2.9920313436941805e-05, + "loss": 0.2056560516357422, + "step": 739 + }, + { + "epoch": 0.10004224757076469, + "grad_norm": 6.506106853485107, + "learning_rate": 2.991960731794309e-05, + "loss": 0.23987579345703125, + "step": 740 + }, + { + "epoch": 0.10017743979721166, + "grad_norm": 5.627443790435791, + "learning_rate": 2.991889809260257e-05, + "loss": 0.22814083099365234, + "step": 741 + }, + { + "epoch": 0.10031263202365864, + "grad_norm": 8.164318084716797, + "learning_rate": 2.9918185761067912e-05, + "loss": 0.2442607879638672, + "step": 742 + }, + { + "epoch": 0.10044782425010562, + "grad_norm": 8.184733390808105, + "learning_rate": 2.9917470323487423e-05, + "loss": 0.2915458679199219, + "step": 743 + }, + { + "epoch": 0.1005830164765526, + "grad_norm": 0.954133927822113, + "learning_rate": 2.9916751780010063e-05, + "loss": 0.17754173278808594, + "step": 744 + }, + { + "epoch": 0.10071820870299958, + "grad_norm": 0.8303343057632446, + "learning_rate": 2.9916030130785436e-05, + "loss": 0.15448570251464844, + "step": 745 + }, + { + "epoch": 0.10085340092944656, + "grad_norm": 4.113893985748291, + "learning_rate": 2.99153053759638e-05, + "loss": 0.181427001953125, + "step": 746 + }, + { + "epoch": 0.10098859315589354, + "grad_norm": 3.069335699081421, + "learning_rate": 2.991457751569604e-05, + "loss": 0.2271251678466797, + "step": 747 + }, + { + "epoch": 0.10112378538234051, + "grad_norm": 3.969558000564575, + "learning_rate": 2.991384655013371e-05, + "loss": 0.29486083984375, + "step": 748 + }, + { + "epoch": 0.10125897760878749, + "grad_norm": 2.7881696224212646, + "learning_rate": 2.9913112479429e-05, + "loss": 0.2432541847229004, + "step": 749 + }, + { + "epoch": 0.10139416983523447, + "grad_norm": 1.2278746366500854, + "learning_rate": 2.991237530373474e-05, + "loss": 0.25930023193359375, + "step": 750 + }, + { + "epoch": 0.10152936206168145, + "grad_norm": 5.310372829437256, + "learning_rate": 2.9911635023204423e-05, + "loss": 0.2572669982910156, + "step": 751 + }, + { + "epoch": 0.10166455428812843, + "grad_norm": 6.210998058319092, + "learning_rate": 2.9910891637992172e-05, + "loss": 0.22033119201660156, + "step": 752 + }, + { + "epoch": 0.1017997465145754, + "grad_norm": 4.798659324645996, + "learning_rate": 2.991014514825277e-05, + "loss": 0.2593822479248047, + "step": 753 + }, + { + "epoch": 0.10193493874102239, + "grad_norm": 2.6991829872131348, + "learning_rate": 2.9909395554141638e-05, + "loss": 0.20731163024902344, + "step": 754 + }, + { + "epoch": 0.10207013096746938, + "grad_norm": 3.036484479904175, + "learning_rate": 2.9908642855814844e-05, + "loss": 0.24079513549804688, + "step": 755 + }, + { + "epoch": 0.10220532319391636, + "grad_norm": 4.04808235168457, + "learning_rate": 2.9907887053429107e-05, + "loss": 0.19020462036132812, + "step": 756 + }, + { + "epoch": 0.10234051542036333, + "grad_norm": 3.054243326187134, + "learning_rate": 2.9907128147141783e-05, + "loss": 0.17934560775756836, + "step": 757 + }, + { + "epoch": 0.10247570764681031, + "grad_norm": 2.564101219177246, + "learning_rate": 2.990636613711089e-05, + "loss": 0.24249744415283203, + "step": 758 + }, + { + "epoch": 0.10261089987325729, + "grad_norm": 1.6259689331054688, + "learning_rate": 2.990560102349507e-05, + "loss": 0.09309077262878418, + "step": 759 + }, + { + "epoch": 0.10274609209970427, + "grad_norm": 4.681954383850098, + "learning_rate": 2.9904832806453635e-05, + "loss": 0.26602935791015625, + "step": 760 + }, + { + "epoch": 0.10288128432615125, + "grad_norm": 2.3799095153808594, + "learning_rate": 2.9904061486146524e-05, + "loss": 0.2074737548828125, + "step": 761 + }, + { + "epoch": 0.10301647655259823, + "grad_norm": 3.689250946044922, + "learning_rate": 2.9903287062734333e-05, + "loss": 0.2257223129272461, + "step": 762 + }, + { + "epoch": 0.1031516687790452, + "grad_norm": 2.096745014190674, + "learning_rate": 2.990250953637831e-05, + "loss": 0.21210384368896484, + "step": 763 + }, + { + "epoch": 0.10328686100549218, + "grad_norm": 2.1747965812683105, + "learning_rate": 2.9901728907240326e-05, + "loss": 0.23769092559814453, + "step": 764 + }, + { + "epoch": 0.10342205323193916, + "grad_norm": 5.016852378845215, + "learning_rate": 2.9900945175482916e-05, + "loss": 0.1928386688232422, + "step": 765 + }, + { + "epoch": 0.10355724545838614, + "grad_norm": 1.9081209897994995, + "learning_rate": 2.990015834126926e-05, + "loss": 0.24413681030273438, + "step": 766 + }, + { + "epoch": 0.10369243768483312, + "grad_norm": 1.4171063899993896, + "learning_rate": 2.989936840476318e-05, + "loss": 0.19045639038085938, + "step": 767 + }, + { + "epoch": 0.1038276299112801, + "grad_norm": 5.06385612487793, + "learning_rate": 2.9898575366129145e-05, + "loss": 0.23425865173339844, + "step": 768 + }, + { + "epoch": 0.10396282213772708, + "grad_norm": 3.5191590785980225, + "learning_rate": 2.9897779225532273e-05, + "loss": 0.20087623596191406, + "step": 769 + }, + { + "epoch": 0.10409801436417405, + "grad_norm": 1.4005638360977173, + "learning_rate": 2.989697998313832e-05, + "loss": 0.21179962158203125, + "step": 770 + }, + { + "epoch": 0.10423320659062103, + "grad_norm": 3.2815475463867188, + "learning_rate": 2.989617763911369e-05, + "loss": 0.26393890380859375, + "step": 771 + }, + { + "epoch": 0.10436839881706803, + "grad_norm": 1.8032615184783936, + "learning_rate": 2.9895372193625442e-05, + "loss": 0.20674514770507812, + "step": 772 + }, + { + "epoch": 0.104503591043515, + "grad_norm": 3.285004138946533, + "learning_rate": 2.9894563646841273e-05, + "loss": 0.22967910766601562, + "step": 773 + }, + { + "epoch": 0.10463878326996198, + "grad_norm": 2.8750803470611572, + "learning_rate": 2.9893751998929523e-05, + "loss": 0.2560882568359375, + "step": 774 + }, + { + "epoch": 0.10477397549640896, + "grad_norm": 1.3432013988494873, + "learning_rate": 2.9892937250059187e-05, + "loss": 0.24227142333984375, + "step": 775 + }, + { + "epoch": 0.10490916772285594, + "grad_norm": 1.9622797966003418, + "learning_rate": 2.9892119400399894e-05, + "loss": 0.2538471221923828, + "step": 776 + }, + { + "epoch": 0.10504435994930292, + "grad_norm": 5.148944854736328, + "learning_rate": 2.989129845012193e-05, + "loss": 0.24422264099121094, + "step": 777 + }, + { + "epoch": 0.1051795521757499, + "grad_norm": 1.8951119184494019, + "learning_rate": 2.989047439939621e-05, + "loss": 0.15408658981323242, + "step": 778 + }, + { + "epoch": 0.10531474440219687, + "grad_norm": 1.4069232940673828, + "learning_rate": 2.9889647248394324e-05, + "loss": 0.2533149719238281, + "step": 779 + }, + { + "epoch": 0.10544993662864385, + "grad_norm": 1.3385614156723022, + "learning_rate": 2.9888816997288475e-05, + "loss": 0.20326805114746094, + "step": 780 + }, + { + "epoch": 0.10558512885509083, + "grad_norm": 1.6833949089050293, + "learning_rate": 2.988798364625153e-05, + "loss": 0.20999956130981445, + "step": 781 + }, + { + "epoch": 0.10572032108153781, + "grad_norm": 4.966547012329102, + "learning_rate": 2.9887147195457002e-05, + "loss": 0.26210498809814453, + "step": 782 + }, + { + "epoch": 0.10585551330798479, + "grad_norm": 1.4661803245544434, + "learning_rate": 2.9886307645079037e-05, + "loss": 0.2142810821533203, + "step": 783 + }, + { + "epoch": 0.10599070553443177, + "grad_norm": 3.6143314838409424, + "learning_rate": 2.9885464995292436e-05, + "loss": 0.24684715270996094, + "step": 784 + }, + { + "epoch": 0.10612589776087875, + "grad_norm": 2.127908945083618, + "learning_rate": 2.9884619246272648e-05, + "loss": 0.22627639770507812, + "step": 785 + }, + { + "epoch": 0.10626108998732572, + "grad_norm": 5.926246643066406, + "learning_rate": 2.988377039819575e-05, + "loss": 0.2096424102783203, + "step": 786 + }, + { + "epoch": 0.1063962822137727, + "grad_norm": 2.9677090644836426, + "learning_rate": 2.9882918451238494e-05, + "loss": 0.1527118682861328, + "step": 787 + }, + { + "epoch": 0.10653147444021968, + "grad_norm": 3.0704970359802246, + "learning_rate": 2.988206340557825e-05, + "loss": 0.14369964599609375, + "step": 788 + }, + { + "epoch": 0.10666666666666667, + "grad_norm": 1.7233011722564697, + "learning_rate": 2.9881205261393037e-05, + "loss": 0.2638359069824219, + "step": 789 + }, + { + "epoch": 0.10680185889311365, + "grad_norm": 2.2960398197174072, + "learning_rate": 2.988034401886154e-05, + "loss": 0.23662757873535156, + "step": 790 + }, + { + "epoch": 0.10693705111956063, + "grad_norm": 2.6003386974334717, + "learning_rate": 2.9879479678163065e-05, + "loss": 0.2786293029785156, + "step": 791 + }, + { + "epoch": 0.10707224334600761, + "grad_norm": 2.2291908264160156, + "learning_rate": 2.9878612239477568e-05, + "loss": 0.20638656616210938, + "step": 792 + }, + { + "epoch": 0.10720743557245459, + "grad_norm": 2.587191343307495, + "learning_rate": 2.9877741702985666e-05, + "loss": 0.22580814361572266, + "step": 793 + }, + { + "epoch": 0.10734262779890157, + "grad_norm": 2.0171875953674316, + "learning_rate": 2.98768680688686e-05, + "loss": 0.2021503448486328, + "step": 794 + }, + { + "epoch": 0.10747782002534854, + "grad_norm": 4.239964485168457, + "learning_rate": 2.9875991337308274e-05, + "loss": 0.2075824737548828, + "step": 795 + }, + { + "epoch": 0.10761301225179552, + "grad_norm": 3.4658312797546387, + "learning_rate": 2.987511150848722e-05, + "loss": 0.16890716552734375, + "step": 796 + }, + { + "epoch": 0.1077482044782425, + "grad_norm": 5.566956043243408, + "learning_rate": 2.9874228582588627e-05, + "loss": 0.2117938995361328, + "step": 797 + }, + { + "epoch": 0.10788339670468948, + "grad_norm": 4.032079696655273, + "learning_rate": 2.9873342559796325e-05, + "loss": 0.2528724670410156, + "step": 798 + }, + { + "epoch": 0.10801858893113646, + "grad_norm": 2.9652650356292725, + "learning_rate": 2.9872453440294787e-05, + "loss": 0.17624187469482422, + "step": 799 + }, + { + "epoch": 0.10815378115758344, + "grad_norm": 2.2240042686462402, + "learning_rate": 2.9871561224269134e-05, + "loss": 0.2869300842285156, + "step": 800 + }, + { + "epoch": 0.10828897338403042, + "grad_norm": 3.174017906188965, + "learning_rate": 2.9870665911905127e-05, + "loss": 0.18218994140625, + "step": 801 + }, + { + "epoch": 0.1084241656104774, + "grad_norm": 3.9555299282073975, + "learning_rate": 2.9869767503389176e-05, + "loss": 0.21283626556396484, + "step": 802 + }, + { + "epoch": 0.10855935783692437, + "grad_norm": 6.190732955932617, + "learning_rate": 2.986886599890834e-05, + "loss": 0.28665733337402344, + "step": 803 + }, + { + "epoch": 0.10869455006337135, + "grad_norm": 5.123399257659912, + "learning_rate": 2.9867961398650306e-05, + "loss": 0.19631671905517578, + "step": 804 + }, + { + "epoch": 0.10882974228981833, + "grad_norm": 0.9978769421577454, + "learning_rate": 2.9867053702803425e-05, + "loss": 0.1465005874633789, + "step": 805 + }, + { + "epoch": 0.10896493451626532, + "grad_norm": 3.4984874725341797, + "learning_rate": 2.9866142911556685e-05, + "loss": 0.20914459228515625, + "step": 806 + }, + { + "epoch": 0.1091001267427123, + "grad_norm": 3.1558218002319336, + "learning_rate": 2.9865229025099713e-05, + "loss": 0.20807170867919922, + "step": 807 + }, + { + "epoch": 0.10923531896915928, + "grad_norm": 1.9393128156661987, + "learning_rate": 2.986431204362279e-05, + "loss": 0.16961383819580078, + "step": 808 + }, + { + "epoch": 0.10937051119560626, + "grad_norm": 3.27996563911438, + "learning_rate": 2.9863391967316835e-05, + "loss": 0.19561386108398438, + "step": 809 + }, + { + "epoch": 0.10950570342205324, + "grad_norm": 1.438079833984375, + "learning_rate": 2.9862468796373404e-05, + "loss": 0.2212977409362793, + "step": 810 + }, + { + "epoch": 0.10964089564850021, + "grad_norm": 1.8021934032440186, + "learning_rate": 2.9861542530984718e-05, + "loss": 0.19769763946533203, + "step": 811 + }, + { + "epoch": 0.10977608787494719, + "grad_norm": 1.8025696277618408, + "learning_rate": 2.9860613171343624e-05, + "loss": 0.2205047607421875, + "step": 812 + }, + { + "epoch": 0.10991128010139417, + "grad_norm": 1.6591147184371948, + "learning_rate": 2.9859680717643623e-05, + "loss": 0.19939613342285156, + "step": 813 + }, + { + "epoch": 0.11004647232784115, + "grad_norm": 1.3125712871551514, + "learning_rate": 2.985874517007885e-05, + "loss": 0.19457054138183594, + "step": 814 + }, + { + "epoch": 0.11018166455428813, + "grad_norm": 1.421205997467041, + "learning_rate": 2.98578065288441e-05, + "loss": 0.1952829360961914, + "step": 815 + }, + { + "epoch": 0.1103168567807351, + "grad_norm": 1.8531049489974976, + "learning_rate": 2.9856864794134798e-05, + "loss": 0.18225479125976562, + "step": 816 + }, + { + "epoch": 0.11045204900718208, + "grad_norm": 1.8185570240020752, + "learning_rate": 2.9855919966147025e-05, + "loss": 0.21445465087890625, + "step": 817 + }, + { + "epoch": 0.11058724123362906, + "grad_norm": 1.273156762123108, + "learning_rate": 2.9854972045077485e-05, + "loss": 0.26434326171875, + "step": 818 + }, + { + "epoch": 0.11072243346007604, + "grad_norm": 1.9759249687194824, + "learning_rate": 2.9854021031123555e-05, + "loss": 0.21663475036621094, + "step": 819 + }, + { + "epoch": 0.11085762568652302, + "grad_norm": 1.6428236961364746, + "learning_rate": 2.9853066924483232e-05, + "loss": 0.20756149291992188, + "step": 820 + }, + { + "epoch": 0.11099281791297, + "grad_norm": 2.1191680431365967, + "learning_rate": 2.9852109725355173e-05, + "loss": 0.13865947723388672, + "step": 821 + }, + { + "epoch": 0.11112801013941698, + "grad_norm": 1.5840522050857544, + "learning_rate": 2.9851149433938662e-05, + "loss": 0.15669631958007812, + "step": 822 + }, + { + "epoch": 0.11126320236586397, + "grad_norm": 1.2050979137420654, + "learning_rate": 2.9850186050433645e-05, + "loss": 0.1946253776550293, + "step": 823 + }, + { + "epoch": 0.11139839459231095, + "grad_norm": 2.593139171600342, + "learning_rate": 2.9849219575040708e-05, + "loss": 0.18323898315429688, + "step": 824 + }, + { + "epoch": 0.11153358681875793, + "grad_norm": 3.8891239166259766, + "learning_rate": 2.984825000796106e-05, + "loss": 0.17827606201171875, + "step": 825 + }, + { + "epoch": 0.1116687790452049, + "grad_norm": 1.5008375644683838, + "learning_rate": 2.9847277349396586e-05, + "loss": 0.2430572509765625, + "step": 826 + }, + { + "epoch": 0.11180397127165188, + "grad_norm": 1.4743549823760986, + "learning_rate": 2.984630159954979e-05, + "loss": 0.2042236328125, + "step": 827 + }, + { + "epoch": 0.11193916349809886, + "grad_norm": 2.3984475135803223, + "learning_rate": 2.9845322758623833e-05, + "loss": 0.3000831604003906, + "step": 828 + }, + { + "epoch": 0.11207435572454584, + "grad_norm": 3.811556100845337, + "learning_rate": 2.984434082682251e-05, + "loss": 0.26007080078125, + "step": 829 + }, + { + "epoch": 0.11220954795099282, + "grad_norm": 3.7559499740600586, + "learning_rate": 2.984335580435027e-05, + "loss": 0.22284698486328125, + "step": 830 + }, + { + "epoch": 0.1123447401774398, + "grad_norm": 5.927177429199219, + "learning_rate": 2.9842367691412192e-05, + "loss": 0.18510055541992188, + "step": 831 + }, + { + "epoch": 0.11247993240388678, + "grad_norm": 2.3276305198669434, + "learning_rate": 2.9841376488214015e-05, + "loss": 0.2044525146484375, + "step": 832 + }, + { + "epoch": 0.11261512463033375, + "grad_norm": 2.2319157123565674, + "learning_rate": 2.984038219496211e-05, + "loss": 0.27033138275146484, + "step": 833 + }, + { + "epoch": 0.11275031685678073, + "grad_norm": 3.4616312980651855, + "learning_rate": 2.9839384811863493e-05, + "loss": 0.20784759521484375, + "step": 834 + }, + { + "epoch": 0.11288550908322771, + "grad_norm": 3.4167797565460205, + "learning_rate": 2.9838384339125824e-05, + "loss": 0.16777801513671875, + "step": 835 + }, + { + "epoch": 0.11302070130967469, + "grad_norm": 4.379949569702148, + "learning_rate": 2.9837380776957405e-05, + "loss": 0.2557048797607422, + "step": 836 + }, + { + "epoch": 0.11315589353612167, + "grad_norm": 2.310194492340088, + "learning_rate": 2.9836374125567193e-05, + "loss": 0.22476959228515625, + "step": 837 + }, + { + "epoch": 0.11329108576256865, + "grad_norm": 3.056804656982422, + "learning_rate": 2.9835364385164764e-05, + "loss": 0.2047567367553711, + "step": 838 + }, + { + "epoch": 0.11342627798901563, + "grad_norm": 4.926165580749512, + "learning_rate": 2.983435155596036e-05, + "loss": 0.2521171569824219, + "step": 839 + }, + { + "epoch": 0.11356147021546262, + "grad_norm": 4.483332633972168, + "learning_rate": 2.9833335638164858e-05, + "loss": 0.2576662302017212, + "step": 840 + }, + { + "epoch": 0.1136966624419096, + "grad_norm": 2.9521732330322266, + "learning_rate": 2.9832316631989774e-05, + "loss": 0.2041778564453125, + "step": 841 + }, + { + "epoch": 0.11383185466835657, + "grad_norm": 2.077770233154297, + "learning_rate": 2.9831294537647272e-05, + "loss": 0.2508983612060547, + "step": 842 + }, + { + "epoch": 0.11396704689480355, + "grad_norm": 1.4439202547073364, + "learning_rate": 2.9830269355350155e-05, + "loss": 0.22361373901367188, + "step": 843 + }, + { + "epoch": 0.11410223912125053, + "grad_norm": 1.345982551574707, + "learning_rate": 2.9829241085311872e-05, + "loss": 0.18943023681640625, + "step": 844 + }, + { + "epoch": 0.11423743134769751, + "grad_norm": 3.3798348903656006, + "learning_rate": 2.9828209727746522e-05, + "loss": 0.23786544799804688, + "step": 845 + }, + { + "epoch": 0.11437262357414449, + "grad_norm": 3.047659397125244, + "learning_rate": 2.982717528286883e-05, + "loss": 0.23119735717773438, + "step": 846 + }, + { + "epoch": 0.11450781580059147, + "grad_norm": 5.75372314453125, + "learning_rate": 2.9826137750894176e-05, + "loss": 0.2439889907836914, + "step": 847 + }, + { + "epoch": 0.11464300802703845, + "grad_norm": 1.329908847808838, + "learning_rate": 2.9825097132038578e-05, + "loss": 0.20257186889648438, + "step": 848 + }, + { + "epoch": 0.11477820025348542, + "grad_norm": 2.1224234104156494, + "learning_rate": 2.9824053426518703e-05, + "loss": 0.22783279418945312, + "step": 849 + }, + { + "epoch": 0.1149133924799324, + "grad_norm": 2.4482314586639404, + "learning_rate": 2.9823006634551848e-05, + "loss": 0.22953224182128906, + "step": 850 + }, + { + "epoch": 0.11504858470637938, + "grad_norm": 1.3808059692382812, + "learning_rate": 2.9821956756355973e-05, + "loss": 0.20325851440429688, + "step": 851 + }, + { + "epoch": 0.11518377693282636, + "grad_norm": 1.2937262058258057, + "learning_rate": 2.9820903792149653e-05, + "loss": 0.1532764434814453, + "step": 852 + }, + { + "epoch": 0.11531896915927334, + "grad_norm": 1.2671122550964355, + "learning_rate": 2.981984774215214e-05, + "loss": 0.17059707641601562, + "step": 853 + }, + { + "epoch": 0.11545416138572032, + "grad_norm": 1.4780408143997192, + "learning_rate": 2.9818788606583286e-05, + "loss": 0.21390724182128906, + "step": 854 + }, + { + "epoch": 0.1155893536121673, + "grad_norm": 3.9047470092773438, + "learning_rate": 2.9817726385663627e-05, + "loss": 0.24678611755371094, + "step": 855 + }, + { + "epoch": 0.11572454583861427, + "grad_norm": 1.4960459470748901, + "learning_rate": 2.9816661079614316e-05, + "loss": 0.18773746490478516, + "step": 856 + }, + { + "epoch": 0.11585973806506127, + "grad_norm": 2.1197240352630615, + "learning_rate": 2.9815592688657154e-05, + "loss": 0.21656036376953125, + "step": 857 + }, + { + "epoch": 0.11599493029150824, + "grad_norm": 4.272851467132568, + "learning_rate": 2.9814521213014588e-05, + "loss": 0.21514129638671875, + "step": 858 + }, + { + "epoch": 0.11613012251795522, + "grad_norm": 2.013625383377075, + "learning_rate": 2.9813446652909707e-05, + "loss": 0.2300581932067871, + "step": 859 + }, + { + "epoch": 0.1162653147444022, + "grad_norm": 2.0729455947875977, + "learning_rate": 2.981236900856624e-05, + "loss": 0.21991348266601562, + "step": 860 + }, + { + "epoch": 0.11640050697084918, + "grad_norm": 3.9367828369140625, + "learning_rate": 2.9811288280208552e-05, + "loss": 0.20949935913085938, + "step": 861 + }, + { + "epoch": 0.11653569919729616, + "grad_norm": 5.059543609619141, + "learning_rate": 2.9810204468061664e-05, + "loss": 0.27564048767089844, + "step": 862 + }, + { + "epoch": 0.11667089142374314, + "grad_norm": 2.236551284790039, + "learning_rate": 2.9809117572351223e-05, + "loss": 0.18333816528320312, + "step": 863 + }, + { + "epoch": 0.11680608365019012, + "grad_norm": 2.1935770511627197, + "learning_rate": 2.9808027593303537e-05, + "loss": 0.18828582763671875, + "step": 864 + }, + { + "epoch": 0.1169412758766371, + "grad_norm": 2.681454658508301, + "learning_rate": 2.980693453114554e-05, + "loss": 0.2298579216003418, + "step": 865 + }, + { + "epoch": 0.11707646810308407, + "grad_norm": 4.517125606536865, + "learning_rate": 2.980583838610481e-05, + "loss": 0.2062397003173828, + "step": 866 + }, + { + "epoch": 0.11721166032953105, + "grad_norm": 2.519273519515991, + "learning_rate": 2.980473915840957e-05, + "loss": 0.20514249801635742, + "step": 867 + }, + { + "epoch": 0.11734685255597803, + "grad_norm": 3.7285878658294678, + "learning_rate": 2.9803636848288696e-05, + "loss": 0.18355274200439453, + "step": 868 + }, + { + "epoch": 0.11748204478242501, + "grad_norm": 1.79351007938385, + "learning_rate": 2.9802531455971686e-05, + "loss": 0.20285606384277344, + "step": 869 + }, + { + "epoch": 0.11761723700887199, + "grad_norm": 3.005331039428711, + "learning_rate": 2.980142298168869e-05, + "loss": 0.23609542846679688, + "step": 870 + }, + { + "epoch": 0.11775242923531896, + "grad_norm": 4.313886642456055, + "learning_rate": 2.9800311425670495e-05, + "loss": 0.2367558479309082, + "step": 871 + }, + { + "epoch": 0.11788762146176594, + "grad_norm": 1.496138572692871, + "learning_rate": 2.9799196788148538e-05, + "loss": 0.19964981079101562, + "step": 872 + }, + { + "epoch": 0.11802281368821292, + "grad_norm": 4.341944217681885, + "learning_rate": 2.9798079069354893e-05, + "loss": 0.2216787338256836, + "step": 873 + }, + { + "epoch": 0.11815800591465991, + "grad_norm": 4.757167339324951, + "learning_rate": 2.9796958269522273e-05, + "loss": 0.1971874237060547, + "step": 874 + }, + { + "epoch": 0.11829319814110689, + "grad_norm": 2.4889369010925293, + "learning_rate": 2.9795834388884034e-05, + "loss": 0.1622314453125, + "step": 875 + }, + { + "epoch": 0.11842839036755387, + "grad_norm": 2.677736759185791, + "learning_rate": 2.979470742767417e-05, + "loss": 0.2699871063232422, + "step": 876 + }, + { + "epoch": 0.11856358259400085, + "grad_norm": 3.8931052684783936, + "learning_rate": 2.9793577386127327e-05, + "loss": 0.19470500946044922, + "step": 877 + }, + { + "epoch": 0.11869877482044783, + "grad_norm": 2.8569531440734863, + "learning_rate": 2.9792444264478784e-05, + "loss": 0.18169474601745605, + "step": 878 + }, + { + "epoch": 0.1188339670468948, + "grad_norm": 5.615286350250244, + "learning_rate": 2.979130806296446e-05, + "loss": 0.18413448333740234, + "step": 879 + }, + { + "epoch": 0.11896915927334178, + "grad_norm": 3.743774652481079, + "learning_rate": 2.9790168781820925e-05, + "loss": 0.23986244201660156, + "step": 880 + }, + { + "epoch": 0.11910435149978876, + "grad_norm": 3.260669469833374, + "learning_rate": 2.9789026421285375e-05, + "loss": 0.21744728088378906, + "step": 881 + }, + { + "epoch": 0.11923954372623574, + "grad_norm": 1.1249809265136719, + "learning_rate": 2.9787880981595663e-05, + "loss": 0.11754035949707031, + "step": 882 + }, + { + "epoch": 0.11937473595268272, + "grad_norm": 3.258342981338501, + "learning_rate": 2.9786732462990267e-05, + "loss": 0.17868423461914062, + "step": 883 + }, + { + "epoch": 0.1195099281791297, + "grad_norm": 1.321682095527649, + "learning_rate": 2.9785580865708323e-05, + "loss": 0.24796104431152344, + "step": 884 + }, + { + "epoch": 0.11964512040557668, + "grad_norm": 1.6667258739471436, + "learning_rate": 2.97844261899896e-05, + "loss": 0.2400188446044922, + "step": 885 + }, + { + "epoch": 0.11978031263202366, + "grad_norm": 3.6601486206054688, + "learning_rate": 2.9783268436074495e-05, + "loss": 0.2026195526123047, + "step": 886 + }, + { + "epoch": 0.11991550485847063, + "grad_norm": 2.875750780105591, + "learning_rate": 2.978210760420407e-05, + "loss": 0.24673175811767578, + "step": 887 + }, + { + "epoch": 0.12005069708491761, + "grad_norm": 2.139686107635498, + "learning_rate": 2.978094369462002e-05, + "loss": 0.17135417461395264, + "step": 888 + }, + { + "epoch": 0.12018588931136459, + "grad_norm": 2.065563440322876, + "learning_rate": 2.977977670756467e-05, + "loss": 0.2228260040283203, + "step": 889 + }, + { + "epoch": 0.12032108153781157, + "grad_norm": 5.263899803161621, + "learning_rate": 2.9778606643280987e-05, + "loss": 0.1907176971435547, + "step": 890 + }, + { + "epoch": 0.12045627376425856, + "grad_norm": 4.728724956512451, + "learning_rate": 2.97774335020126e-05, + "loss": 0.21061134338378906, + "step": 891 + }, + { + "epoch": 0.12059146599070554, + "grad_norm": 2.2337210178375244, + "learning_rate": 2.9776257284003748e-05, + "loss": 0.21288299560546875, + "step": 892 + }, + { + "epoch": 0.12072665821715252, + "grad_norm": 2.4238178730010986, + "learning_rate": 2.9775077989499338e-05, + "loss": 0.22940444946289062, + "step": 893 + }, + { + "epoch": 0.1208618504435995, + "grad_norm": 2.355374336242676, + "learning_rate": 2.97738956187449e-05, + "loss": 0.1994647979736328, + "step": 894 + }, + { + "epoch": 0.12099704267004648, + "grad_norm": 1.5948034524917603, + "learning_rate": 2.9772710171986605e-05, + "loss": 0.19122695922851562, + "step": 895 + }, + { + "epoch": 0.12113223489649345, + "grad_norm": 1.9716911315917969, + "learning_rate": 2.977152164947128e-05, + "loss": 0.22398757934570312, + "step": 896 + }, + { + "epoch": 0.12126742712294043, + "grad_norm": 5.044217586517334, + "learning_rate": 2.9770330051446373e-05, + "loss": 0.2217998504638672, + "step": 897 + }, + { + "epoch": 0.12140261934938741, + "grad_norm": 3.002575635910034, + "learning_rate": 2.976913537815999e-05, + "loss": 0.2006692886352539, + "step": 898 + }, + { + "epoch": 0.12153781157583439, + "grad_norm": 0.9052819609642029, + "learning_rate": 2.9767937629860853e-05, + "loss": 0.21210289001464844, + "step": 899 + }, + { + "epoch": 0.12167300380228137, + "grad_norm": 1.0156062841415405, + "learning_rate": 2.9766736806798353e-05, + "loss": 0.20656299591064453, + "step": 900 + }, + { + "epoch": 0.12180819602872835, + "grad_norm": 2.6435019969940186, + "learning_rate": 2.9765532909222512e-05, + "loss": 0.23062872886657715, + "step": 901 + }, + { + "epoch": 0.12194338825517533, + "grad_norm": 2.5739858150482178, + "learning_rate": 2.976432593738397e-05, + "loss": 0.17801666259765625, + "step": 902 + }, + { + "epoch": 0.1220785804816223, + "grad_norm": 0.9694176316261292, + "learning_rate": 2.9763115891534036e-05, + "loss": 0.20056533813476562, + "step": 903 + }, + { + "epoch": 0.12221377270806928, + "grad_norm": 4.098574161529541, + "learning_rate": 2.9761902771924648e-05, + "loss": 0.22260218858718872, + "step": 904 + }, + { + "epoch": 0.12234896493451626, + "grad_norm": 3.375361204147339, + "learning_rate": 2.9760686578808387e-05, + "loss": 0.20522403717041016, + "step": 905 + }, + { + "epoch": 0.12248415716096324, + "grad_norm": 2.8765158653259277, + "learning_rate": 2.9759467312438462e-05, + "loss": 0.22367095947265625, + "step": 906 + }, + { + "epoch": 0.12261934938741022, + "grad_norm": 2.7609915733337402, + "learning_rate": 2.975824497306874e-05, + "loss": 0.24929046630859375, + "step": 907 + }, + { + "epoch": 0.12275454161385721, + "grad_norm": 1.7472270727157593, + "learning_rate": 2.9757019560953707e-05, + "loss": 0.1735515594482422, + "step": 908 + }, + { + "epoch": 0.12288973384030419, + "grad_norm": 1.9093526601791382, + "learning_rate": 2.9755791076348517e-05, + "loss": 0.2290334701538086, + "step": 909 + }, + { + "epoch": 0.12302492606675117, + "grad_norm": 3.3614683151245117, + "learning_rate": 2.9754559519508924e-05, + "loss": 0.1747417449951172, + "step": 910 + }, + { + "epoch": 0.12316011829319815, + "grad_norm": 1.4478107690811157, + "learning_rate": 2.975332489069137e-05, + "loss": 0.1736927032470703, + "step": 911 + }, + { + "epoch": 0.12329531051964512, + "grad_norm": 2.3478851318359375, + "learning_rate": 2.9752087190152893e-05, + "loss": 0.26886940002441406, + "step": 912 + }, + { + "epoch": 0.1234305027460921, + "grad_norm": 5.618727207183838, + "learning_rate": 2.97508464181512e-05, + "loss": 0.2665853500366211, + "step": 913 + }, + { + "epoch": 0.12356569497253908, + "grad_norm": 2.92346453666687, + "learning_rate": 2.9749602574944615e-05, + "loss": 0.20128726959228516, + "step": 914 + }, + { + "epoch": 0.12370088719898606, + "grad_norm": 2.4036526679992676, + "learning_rate": 2.9748355660792125e-05, + "loss": 0.185821533203125, + "step": 915 + }, + { + "epoch": 0.12383607942543304, + "grad_norm": 3.120410442352295, + "learning_rate": 2.9747105675953338e-05, + "loss": 0.28485107421875, + "step": 916 + }, + { + "epoch": 0.12397127165188002, + "grad_norm": 1.1527925729751587, + "learning_rate": 2.9745852620688506e-05, + "loss": 0.1913928985595703, + "step": 917 + }, + { + "epoch": 0.124106463878327, + "grad_norm": 2.3701677322387695, + "learning_rate": 2.974459649525853e-05, + "loss": 0.3086204528808594, + "step": 918 + }, + { + "epoch": 0.12424165610477397, + "grad_norm": 1.7501299381256104, + "learning_rate": 2.9743337299924925e-05, + "loss": 0.21162652969360352, + "step": 919 + }, + { + "epoch": 0.12437684833122095, + "grad_norm": 1.5002126693725586, + "learning_rate": 2.9742075034949883e-05, + "loss": 0.2451343536376953, + "step": 920 + }, + { + "epoch": 0.12451204055766793, + "grad_norm": 1.4094550609588623, + "learning_rate": 2.97408097005962e-05, + "loss": 0.20383453369140625, + "step": 921 + }, + { + "epoch": 0.12464723278411491, + "grad_norm": 2.4477241039276123, + "learning_rate": 2.973954129712733e-05, + "loss": 0.251678466796875, + "step": 922 + }, + { + "epoch": 0.12478242501056189, + "grad_norm": 1.790040135383606, + "learning_rate": 2.973826982480736e-05, + "loss": 0.2011585235595703, + "step": 923 + }, + { + "epoch": 0.12491761723700887, + "grad_norm": 1.7725608348846436, + "learning_rate": 2.9736995283901022e-05, + "loss": 0.21489334106445312, + "step": 924 + }, + { + "epoch": 0.12505280946345584, + "grad_norm": 1.7747427225112915, + "learning_rate": 2.9735717674673676e-05, + "loss": 0.25714111328125, + "step": 925 + }, + { + "epoch": 0.12518800168990282, + "grad_norm": 2.6696882247924805, + "learning_rate": 2.973443699739133e-05, + "loss": 0.2600440979003906, + "step": 926 + }, + { + "epoch": 0.1253231939163498, + "grad_norm": 3.341881036758423, + "learning_rate": 2.973315325232063e-05, + "loss": 0.19092655181884766, + "step": 927 + }, + { + "epoch": 0.12545838614279678, + "grad_norm": 3.200180768966675, + "learning_rate": 2.9731866439728853e-05, + "loss": 0.17973732948303223, + "step": 928 + }, + { + "epoch": 0.12559357836924376, + "grad_norm": 2.652151584625244, + "learning_rate": 2.9730576559883924e-05, + "loss": 0.1949176788330078, + "step": 929 + }, + { + "epoch": 0.12572877059569074, + "grad_norm": 2.449274778366089, + "learning_rate": 2.97292836130544e-05, + "loss": 0.2210845947265625, + "step": 930 + }, + { + "epoch": 0.12586396282213771, + "grad_norm": 0.970920741558075, + "learning_rate": 2.9727987599509485e-05, + "loss": 0.17169761657714844, + "step": 931 + }, + { + "epoch": 0.1259991550485847, + "grad_norm": 0.8495640754699707, + "learning_rate": 2.972668851951901e-05, + "loss": 0.17377853393554688, + "step": 932 + }, + { + "epoch": 0.12613434727503167, + "grad_norm": 2.3643150329589844, + "learning_rate": 2.9725386373353455e-05, + "loss": 0.2543010711669922, + "step": 933 + }, + { + "epoch": 0.12626953950147868, + "grad_norm": 1.8319456577301025, + "learning_rate": 2.972408116128393e-05, + "loss": 0.15818405151367188, + "step": 934 + }, + { + "epoch": 0.12640473172792566, + "grad_norm": 2.539389133453369, + "learning_rate": 2.972277288358219e-05, + "loss": 0.18526840209960938, + "step": 935 + }, + { + "epoch": 0.12653992395437264, + "grad_norm": 3.0631022453308105, + "learning_rate": 2.9721461540520628e-05, + "loss": 0.26531219482421875, + "step": 936 + }, + { + "epoch": 0.1266751161808196, + "grad_norm": 1.86379075050354, + "learning_rate": 2.9720147132372265e-05, + "loss": 0.2439422607421875, + "step": 937 + }, + { + "epoch": 0.1268103084072666, + "grad_norm": 1.9019217491149902, + "learning_rate": 2.9718829659410772e-05, + "loss": 0.18474483489990234, + "step": 938 + }, + { + "epoch": 0.12694550063371357, + "grad_norm": 1.4551502466201782, + "learning_rate": 2.9717509121910453e-05, + "loss": 0.23337364196777344, + "step": 939 + }, + { + "epoch": 0.12708069286016055, + "grad_norm": 1.7903971672058105, + "learning_rate": 2.971618552014625e-05, + "loss": 0.21827220916748047, + "step": 940 + }, + { + "epoch": 0.12721588508660753, + "grad_norm": 2.658393144607544, + "learning_rate": 2.971485885439375e-05, + "loss": 0.13556480407714844, + "step": 941 + }, + { + "epoch": 0.1273510773130545, + "grad_norm": 3.486389398574829, + "learning_rate": 2.9713529124929163e-05, + "loss": 0.1954631805419922, + "step": 942 + }, + { + "epoch": 0.12748626953950148, + "grad_norm": 1.5871957540512085, + "learning_rate": 2.9712196332029352e-05, + "loss": 0.16504263877868652, + "step": 943 + }, + { + "epoch": 0.12762146176594846, + "grad_norm": 1.9027631282806396, + "learning_rate": 2.971086047597181e-05, + "loss": 0.23107624053955078, + "step": 944 + }, + { + "epoch": 0.12775665399239544, + "grad_norm": 1.362157940864563, + "learning_rate": 2.9709521557034668e-05, + "loss": 0.2621803283691406, + "step": 945 + }, + { + "epoch": 0.12789184621884242, + "grad_norm": 0.897609531879425, + "learning_rate": 2.9708179575496696e-05, + "loss": 0.1943206787109375, + "step": 946 + }, + { + "epoch": 0.1280270384452894, + "grad_norm": 4.036199569702148, + "learning_rate": 2.9706834531637303e-05, + "loss": 0.18705272674560547, + "step": 947 + }, + { + "epoch": 0.12816223067173638, + "grad_norm": 1.63685142993927, + "learning_rate": 2.9705486425736537e-05, + "loss": 0.1947479248046875, + "step": 948 + }, + { + "epoch": 0.12829742289818336, + "grad_norm": 1.523179531097412, + "learning_rate": 2.9704135258075077e-05, + "loss": 0.16611099243164062, + "step": 949 + }, + { + "epoch": 0.12843261512463033, + "grad_norm": 2.705198049545288, + "learning_rate": 2.970278102893424e-05, + "loss": 0.20716285705566406, + "step": 950 + }, + { + "epoch": 0.1285678073510773, + "grad_norm": 2.0752038955688477, + "learning_rate": 2.9701423738595992e-05, + "loss": 0.1510772705078125, + "step": 951 + }, + { + "epoch": 0.1287029995775243, + "grad_norm": 6.194035530090332, + "learning_rate": 2.9700063387342925e-05, + "loss": 0.2208089828491211, + "step": 952 + }, + { + "epoch": 0.12883819180397127, + "grad_norm": 4.746684551239014, + "learning_rate": 2.969869997545827e-05, + "loss": 0.177886962890625, + "step": 953 + }, + { + "epoch": 0.12897338403041825, + "grad_norm": 2.50292706489563, + "learning_rate": 2.9697333503225897e-05, + "loss": 0.22397232055664062, + "step": 954 + }, + { + "epoch": 0.12910857625686523, + "grad_norm": 1.5793941020965576, + "learning_rate": 2.969596397093031e-05, + "loss": 0.22097396850585938, + "step": 955 + }, + { + "epoch": 0.1292437684833122, + "grad_norm": 6.30570650100708, + "learning_rate": 2.969459137885666e-05, + "loss": 0.31198978424072266, + "step": 956 + }, + { + "epoch": 0.12937896070975918, + "grad_norm": 2.05967378616333, + "learning_rate": 2.969321572729072e-05, + "loss": 0.1669597625732422, + "step": 957 + }, + { + "epoch": 0.12951415293620616, + "grad_norm": 3.2239811420440674, + "learning_rate": 2.9691837016518915e-05, + "loss": 0.19611406326293945, + "step": 958 + }, + { + "epoch": 0.12964934516265314, + "grad_norm": 3.287376642227173, + "learning_rate": 2.9690455246828294e-05, + "loss": 0.17228317260742188, + "step": 959 + }, + { + "epoch": 0.12978453738910012, + "grad_norm": 3.6492111682891846, + "learning_rate": 2.968907041850655e-05, + "loss": 0.23149681091308594, + "step": 960 + }, + { + "epoch": 0.1299197296155471, + "grad_norm": 1.3766086101531982, + "learning_rate": 2.968768253184202e-05, + "loss": 0.17496681213378906, + "step": 961 + }, + { + "epoch": 0.13005492184199408, + "grad_norm": 3.6908836364746094, + "learning_rate": 2.9686291587123655e-05, + "loss": 0.23662948608398438, + "step": 962 + }, + { + "epoch": 0.13019011406844105, + "grad_norm": 1.9154052734375, + "learning_rate": 2.968489758464107e-05, + "loss": 0.2047710418701172, + "step": 963 + }, + { + "epoch": 0.13032530629488803, + "grad_norm": 1.989184021949768, + "learning_rate": 2.9683500524684494e-05, + "loss": 0.2490081787109375, + "step": 964 + }, + { + "epoch": 0.130460498521335, + "grad_norm": 1.355475664138794, + "learning_rate": 2.9682100407544812e-05, + "loss": 0.17270660400390625, + "step": 965 + }, + { + "epoch": 0.130595690747782, + "grad_norm": 4.148411273956299, + "learning_rate": 2.9680697233513526e-05, + "loss": 0.20015335083007812, + "step": 966 + }, + { + "epoch": 0.13073088297422897, + "grad_norm": 1.6648255586624146, + "learning_rate": 2.9679291002882793e-05, + "loss": 0.19969749450683594, + "step": 967 + }, + { + "epoch": 0.13086607520067597, + "grad_norm": 4.041023254394531, + "learning_rate": 2.967788171594539e-05, + "loss": 0.17649555206298828, + "step": 968 + }, + { + "epoch": 0.13100126742712295, + "grad_norm": 3.6388397216796875, + "learning_rate": 2.967646937299474e-05, + "loss": 0.187774658203125, + "step": 969 + }, + { + "epoch": 0.13113645965356993, + "grad_norm": 1.9017889499664307, + "learning_rate": 2.9675053974324907e-05, + "loss": 0.2440328598022461, + "step": 970 + }, + { + "epoch": 0.1312716518800169, + "grad_norm": 1.7631199359893799, + "learning_rate": 2.9673635520230576e-05, + "loss": 0.20721054077148438, + "step": 971 + }, + { + "epoch": 0.1314068441064639, + "grad_norm": 2.2709553241729736, + "learning_rate": 2.9672214011007087e-05, + "loss": 0.23993682861328125, + "step": 972 + }, + { + "epoch": 0.13154203633291087, + "grad_norm": 1.8223376274108887, + "learning_rate": 2.9670789446950396e-05, + "loss": 0.2679557800292969, + "step": 973 + }, + { + "epoch": 0.13167722855935785, + "grad_norm": 2.779097318649292, + "learning_rate": 2.9669361828357105e-05, + "loss": 0.20444297790527344, + "step": 974 + }, + { + "epoch": 0.13181242078580482, + "grad_norm": 2.168428421020508, + "learning_rate": 2.9667931155524454e-05, + "loss": 0.1747760772705078, + "step": 975 + }, + { + "epoch": 0.1319476130122518, + "grad_norm": 3.1765952110290527, + "learning_rate": 2.966649742875032e-05, + "loss": 0.20195388793945312, + "step": 976 + }, + { + "epoch": 0.13208280523869878, + "grad_norm": 2.4435763359069824, + "learning_rate": 2.9665060648333206e-05, + "loss": 0.1827259063720703, + "step": 977 + }, + { + "epoch": 0.13221799746514576, + "grad_norm": 1.147352933883667, + "learning_rate": 2.9663620814572266e-05, + "loss": 0.1866617202758789, + "step": 978 + }, + { + "epoch": 0.13235318969159274, + "grad_norm": 0.8455138206481934, + "learning_rate": 2.966217792776728e-05, + "loss": 0.18280696868896484, + "step": 979 + }, + { + "epoch": 0.13248838191803972, + "grad_norm": 1.369072437286377, + "learning_rate": 2.9660731988218652e-05, + "loss": 0.14558029174804688, + "step": 980 + }, + { + "epoch": 0.1326235741444867, + "grad_norm": 2.3237037658691406, + "learning_rate": 2.965928299622745e-05, + "loss": 0.15476417541503906, + "step": 981 + }, + { + "epoch": 0.13275876637093367, + "grad_norm": 1.4892674684524536, + "learning_rate": 2.965783095209535e-05, + "loss": 0.2066631317138672, + "step": 982 + }, + { + "epoch": 0.13289395859738065, + "grad_norm": 1.9498488903045654, + "learning_rate": 2.965637585612469e-05, + "loss": 0.25786781311035156, + "step": 983 + }, + { + "epoch": 0.13302915082382763, + "grad_norm": 3.2511940002441406, + "learning_rate": 2.965491770861841e-05, + "loss": 0.24323606491088867, + "step": 984 + }, + { + "epoch": 0.1331643430502746, + "grad_norm": 4.255857467651367, + "learning_rate": 2.965345650988012e-05, + "loss": 0.1783914566040039, + "step": 985 + }, + { + "epoch": 0.1332995352767216, + "grad_norm": 2.5358378887176514, + "learning_rate": 2.9651992260214035e-05, + "loss": 0.18294262886047363, + "step": 986 + }, + { + "epoch": 0.13343472750316857, + "grad_norm": 2.9594359397888184, + "learning_rate": 2.9650524959925037e-05, + "loss": 0.22311067581176758, + "step": 987 + }, + { + "epoch": 0.13356991972961554, + "grad_norm": 3.5358283519744873, + "learning_rate": 2.9649054609318607e-05, + "loss": 0.2275458574295044, + "step": 988 + }, + { + "epoch": 0.13370511195606252, + "grad_norm": 1.8538273572921753, + "learning_rate": 2.9647581208700894e-05, + "loss": 0.26154327392578125, + "step": 989 + }, + { + "epoch": 0.1338403041825095, + "grad_norm": 2.364600419998169, + "learning_rate": 2.9646104758378666e-05, + "loss": 0.20736408233642578, + "step": 990 + }, + { + "epoch": 0.13397549640895648, + "grad_norm": 4.50392484664917, + "learning_rate": 2.964462525865932e-05, + "loss": 0.2237110137939453, + "step": 991 + }, + { + "epoch": 0.13411068863540346, + "grad_norm": 2.281534194946289, + "learning_rate": 2.96431427098509e-05, + "loss": 0.1638660430908203, + "step": 992 + }, + { + "epoch": 0.13424588086185044, + "grad_norm": 1.7651290893554688, + "learning_rate": 2.9641657112262084e-05, + "loss": 0.2558937072753906, + "step": 993 + }, + { + "epoch": 0.13438107308829741, + "grad_norm": 1.1067181825637817, + "learning_rate": 2.9640168466202174e-05, + "loss": 0.22565078735351562, + "step": 994 + }, + { + "epoch": 0.1345162653147444, + "grad_norm": 1.6819090843200684, + "learning_rate": 2.9638676771981124e-05, + "loss": 0.14623260498046875, + "step": 995 + }, + { + "epoch": 0.13465145754119137, + "grad_norm": 3.1180827617645264, + "learning_rate": 2.9637182029909508e-05, + "loss": 0.20511770248413086, + "step": 996 + }, + { + "epoch": 0.13478664976763835, + "grad_norm": 1.6389938592910767, + "learning_rate": 2.9635684240298532e-05, + "loss": 0.2666358947753906, + "step": 997 + }, + { + "epoch": 0.13492184199408533, + "grad_norm": 0.9389215707778931, + "learning_rate": 2.9634183403460053e-05, + "loss": 0.18558883666992188, + "step": 998 + }, + { + "epoch": 0.1350570342205323, + "grad_norm": 3.4492907524108887, + "learning_rate": 2.9632679519706553e-05, + "loss": 0.1696944236755371, + "step": 999 + }, + { + "epoch": 0.13519222644697929, + "grad_norm": 1.490695834159851, + "learning_rate": 2.9631172589351137e-05, + "loss": 0.24640274047851562, + "step": 1000 + }, + { + "epoch": 0.1353274186734263, + "grad_norm": 2.623732089996338, + "learning_rate": 2.962966261270758e-05, + "loss": 0.25098419189453125, + "step": 1001 + }, + { + "epoch": 0.13546261089987327, + "grad_norm": 1.315132737159729, + "learning_rate": 2.962814959009024e-05, + "loss": 0.14512348175048828, + "step": 1002 + }, + { + "epoch": 0.13559780312632025, + "grad_norm": 3.913986921310425, + "learning_rate": 2.962663352181415e-05, + "loss": 0.21914100646972656, + "step": 1003 + }, + { + "epoch": 0.13573299535276723, + "grad_norm": 4.037308216094971, + "learning_rate": 2.9625114408194966e-05, + "loss": 0.2380237579345703, + "step": 1004 + }, + { + "epoch": 0.1358681875792142, + "grad_norm": 1.155958890914917, + "learning_rate": 2.962359224954897e-05, + "loss": 0.15960216522216797, + "step": 1005 + }, + { + "epoch": 0.13600337980566118, + "grad_norm": 2.270986557006836, + "learning_rate": 2.9622067046193086e-05, + "loss": 0.21088409423828125, + "step": 1006 + }, + { + "epoch": 0.13613857203210816, + "grad_norm": 6.729562759399414, + "learning_rate": 2.9620538798444867e-05, + "loss": 0.2117156982421875, + "step": 1007 + }, + { + "epoch": 0.13627376425855514, + "grad_norm": 6.297460079193115, + "learning_rate": 2.9619007506622506e-05, + "loss": 0.28343963623046875, + "step": 1008 + }, + { + "epoch": 0.13640895648500212, + "grad_norm": 4.332822799682617, + "learning_rate": 2.961747317104482e-05, + "loss": 0.17863941192626953, + "step": 1009 + }, + { + "epoch": 0.1365441487114491, + "grad_norm": 2.627390146255493, + "learning_rate": 2.9615935792031274e-05, + "loss": 0.1876964569091797, + "step": 1010 + }, + { + "epoch": 0.13667934093789608, + "grad_norm": 4.272457599639893, + "learning_rate": 2.9614395369901953e-05, + "loss": 0.23885726928710938, + "step": 1011 + }, + { + "epoch": 0.13681453316434306, + "grad_norm": 3.464097261428833, + "learning_rate": 2.9612851904977582e-05, + "loss": 0.2772798538208008, + "step": 1012 + }, + { + "epoch": 0.13694972539079003, + "grad_norm": 4.8939127922058105, + "learning_rate": 2.9611305397579518e-05, + "loss": 0.28266334533691406, + "step": 1013 + }, + { + "epoch": 0.137084917617237, + "grad_norm": 1.0168622732162476, + "learning_rate": 2.9609755848029755e-05, + "loss": 0.15162897109985352, + "step": 1014 + }, + { + "epoch": 0.137220109843684, + "grad_norm": 4.415287494659424, + "learning_rate": 2.9608203256650916e-05, + "loss": 0.21660614013671875, + "step": 1015 + }, + { + "epoch": 0.13735530207013097, + "grad_norm": 3.1307613849639893, + "learning_rate": 2.9606647623766257e-05, + "loss": 0.17457962036132812, + "step": 1016 + }, + { + "epoch": 0.13749049429657795, + "grad_norm": 4.223004341125488, + "learning_rate": 2.9605088949699672e-05, + "loss": 0.2597503662109375, + "step": 1017 + }, + { + "epoch": 0.13762568652302493, + "grad_norm": 1.541254997253418, + "learning_rate": 2.9603527234775682e-05, + "loss": 0.2314300537109375, + "step": 1018 + }, + { + "epoch": 0.1377608787494719, + "grad_norm": 2.2885689735412598, + "learning_rate": 2.960196247931945e-05, + "loss": 0.18483352661132812, + "step": 1019 + }, + { + "epoch": 0.13789607097591888, + "grad_norm": 3.845386505126953, + "learning_rate": 2.960039468365676e-05, + "loss": 0.24379920959472656, + "step": 1020 + }, + { + "epoch": 0.13803126320236586, + "grad_norm": 1.9371105432510376, + "learning_rate": 2.959882384811404e-05, + "loss": 0.21224021911621094, + "step": 1021 + }, + { + "epoch": 0.13816645542881284, + "grad_norm": 0.9895491600036621, + "learning_rate": 2.9597249973018343e-05, + "loss": 0.17930030822753906, + "step": 1022 + }, + { + "epoch": 0.13830164765525982, + "grad_norm": 1.7604680061340332, + "learning_rate": 2.959567305869736e-05, + "loss": 0.19334888458251953, + "step": 1023 + }, + { + "epoch": 0.1384368398817068, + "grad_norm": 4.62630033493042, + "learning_rate": 2.9594093105479413e-05, + "loss": 0.20189809799194336, + "step": 1024 + }, + { + "epoch": 0.13857203210815378, + "grad_norm": 3.088228702545166, + "learning_rate": 2.959251011369345e-05, + "loss": 0.15636444091796875, + "step": 1025 + }, + { + "epoch": 0.13870722433460075, + "grad_norm": 2.911190986633301, + "learning_rate": 2.959092408366907e-05, + "loss": 0.206268310546875, + "step": 1026 + }, + { + "epoch": 0.13884241656104773, + "grad_norm": 1.934842824935913, + "learning_rate": 2.958933501573649e-05, + "loss": 0.27164459228515625, + "step": 1027 + }, + { + "epoch": 0.1389776087874947, + "grad_norm": 4.05169153213501, + "learning_rate": 2.9587742910226555e-05, + "loss": 0.20975112915039062, + "step": 1028 + }, + { + "epoch": 0.1391128010139417, + "grad_norm": 2.305387496948242, + "learning_rate": 2.958614776747076e-05, + "loss": 0.19959449768066406, + "step": 1029 + }, + { + "epoch": 0.13924799324038867, + "grad_norm": 1.5476231575012207, + "learning_rate": 2.9584549587801213e-05, + "loss": 0.22294235229492188, + "step": 1030 + }, + { + "epoch": 0.13938318546683565, + "grad_norm": 4.097291469573975, + "learning_rate": 2.958294837155067e-05, + "loss": 0.19585752487182617, + "step": 1031 + }, + { + "epoch": 0.13951837769328262, + "grad_norm": 0.7149341106414795, + "learning_rate": 2.9581344119052508e-05, + "loss": 0.09413814544677734, + "step": 1032 + }, + { + "epoch": 0.1396535699197296, + "grad_norm": 1.8023170232772827, + "learning_rate": 2.957973683064074e-05, + "loss": 0.2526130676269531, + "step": 1033 + }, + { + "epoch": 0.13978876214617658, + "grad_norm": 3.4431264400482178, + "learning_rate": 2.957812650665002e-05, + "loss": 0.22739791870117188, + "step": 1034 + }, + { + "epoch": 0.1399239543726236, + "grad_norm": 3.108116626739502, + "learning_rate": 2.957651314741562e-05, + "loss": 0.17715072631835938, + "step": 1035 + }, + { + "epoch": 0.14005914659907057, + "grad_norm": 3.5500175952911377, + "learning_rate": 2.9574896753273454e-05, + "loss": 0.2532081604003906, + "step": 1036 + }, + { + "epoch": 0.14019433882551754, + "grad_norm": 2.8899898529052734, + "learning_rate": 2.9573277324560058e-05, + "loss": 0.252410888671875, + "step": 1037 + }, + { + "epoch": 0.14032953105196452, + "grad_norm": 2.784574270248413, + "learning_rate": 2.9571654861612608e-05, + "loss": 0.1392512321472168, + "step": 1038 + }, + { + "epoch": 0.1404647232784115, + "grad_norm": 4.721029281616211, + "learning_rate": 2.957002936476891e-05, + "loss": 0.2327890396118164, + "step": 1039 + }, + { + "epoch": 0.14059991550485848, + "grad_norm": 1.660416841506958, + "learning_rate": 2.9568400834367406e-05, + "loss": 0.18694639205932617, + "step": 1040 + }, + { + "epoch": 0.14073510773130546, + "grad_norm": 1.355526089668274, + "learning_rate": 2.9566769270747158e-05, + "loss": 0.2514228820800781, + "step": 1041 + }, + { + "epoch": 0.14087029995775244, + "grad_norm": 1.6010046005249023, + "learning_rate": 2.9565134674247864e-05, + "loss": 0.18515491485595703, + "step": 1042 + }, + { + "epoch": 0.14100549218419942, + "grad_norm": 1.5745820999145508, + "learning_rate": 2.9563497045209866e-05, + "loss": 0.17581558227539062, + "step": 1043 + }, + { + "epoch": 0.1411406844106464, + "grad_norm": 2.2860074043273926, + "learning_rate": 2.9561856383974118e-05, + "loss": 0.29541778564453125, + "step": 1044 + }, + { + "epoch": 0.14127587663709337, + "grad_norm": 1.3537960052490234, + "learning_rate": 2.9560212690882218e-05, + "loss": 0.19983530044555664, + "step": 1045 + }, + { + "epoch": 0.14141106886354035, + "grad_norm": 2.137998342514038, + "learning_rate": 2.9558565966276395e-05, + "loss": 0.19737625122070312, + "step": 1046 + }, + { + "epoch": 0.14154626108998733, + "grad_norm": 3.0299360752105713, + "learning_rate": 2.9556916210499497e-05, + "loss": 0.21280860900878906, + "step": 1047 + }, + { + "epoch": 0.1416814533164343, + "grad_norm": 5.062359809875488, + "learning_rate": 2.9555263423895016e-05, + "loss": 0.22482681274414062, + "step": 1048 + }, + { + "epoch": 0.1418166455428813, + "grad_norm": 2.5297317504882812, + "learning_rate": 2.955360760680708e-05, + "loss": 0.19592857360839844, + "step": 1049 + }, + { + "epoch": 0.14195183776932827, + "grad_norm": 1.363262414932251, + "learning_rate": 2.9551948759580423e-05, + "loss": 0.15517520904541016, + "step": 1050 + }, + { + "epoch": 0.14208702999577524, + "grad_norm": 3.23099946975708, + "learning_rate": 2.9550286882560435e-05, + "loss": 0.1935439109802246, + "step": 1051 + }, + { + "epoch": 0.14222222222222222, + "grad_norm": 1.5503112077713013, + "learning_rate": 2.9548621976093126e-05, + "loss": 0.2294635772705078, + "step": 1052 + }, + { + "epoch": 0.1423574144486692, + "grad_norm": 2.5213406085968018, + "learning_rate": 2.9546954040525144e-05, + "loss": 0.20184803009033203, + "step": 1053 + }, + { + "epoch": 0.14249260667511618, + "grad_norm": 2.4858009815216064, + "learning_rate": 2.9545283076203753e-05, + "loss": 0.222733736038208, + "step": 1054 + }, + { + "epoch": 0.14262779890156316, + "grad_norm": 2.538125991821289, + "learning_rate": 2.954360908347686e-05, + "loss": 0.26206207275390625, + "step": 1055 + }, + { + "epoch": 0.14276299112801014, + "grad_norm": 1.4709687232971191, + "learning_rate": 2.9541932062693e-05, + "loss": 0.1612682342529297, + "step": 1056 + }, + { + "epoch": 0.14289818335445711, + "grad_norm": 1.5508737564086914, + "learning_rate": 2.954025201420134e-05, + "loss": 0.21424102783203125, + "step": 1057 + }, + { + "epoch": 0.1430333755809041, + "grad_norm": 1.3205159902572632, + "learning_rate": 2.9538568938351672e-05, + "loss": 0.18906307220458984, + "step": 1058 + }, + { + "epoch": 0.14316856780735107, + "grad_norm": 1.4068984985351562, + "learning_rate": 2.953688283549442e-05, + "loss": 0.18598365783691406, + "step": 1059 + }, + { + "epoch": 0.14330376003379805, + "grad_norm": 1.3567590713500977, + "learning_rate": 2.9535193705980642e-05, + "loss": 0.15661144256591797, + "step": 1060 + }, + { + "epoch": 0.14343895226024503, + "grad_norm": 4.0203728675842285, + "learning_rate": 2.9533501550162028e-05, + "loss": 0.20356178283691406, + "step": 1061 + }, + { + "epoch": 0.143574144486692, + "grad_norm": 4.366781234741211, + "learning_rate": 2.9531806368390882e-05, + "loss": 0.20541906356811523, + "step": 1062 + }, + { + "epoch": 0.14370933671313899, + "grad_norm": 1.332748532295227, + "learning_rate": 2.953010816102016e-05, + "loss": 0.14351272583007812, + "step": 1063 + }, + { + "epoch": 0.14384452893958596, + "grad_norm": 3.0594022274017334, + "learning_rate": 2.952840692840343e-05, + "loss": 0.20209646224975586, + "step": 1064 + }, + { + "epoch": 0.14397972116603294, + "grad_norm": 1.6943562030792236, + "learning_rate": 2.9526702670894914e-05, + "loss": 0.2362823486328125, + "step": 1065 + }, + { + "epoch": 0.14411491339247992, + "grad_norm": 1.2198818922042847, + "learning_rate": 2.952499538884943e-05, + "loss": 0.18984317779541016, + "step": 1066 + }, + { + "epoch": 0.1442501056189269, + "grad_norm": 4.816554069519043, + "learning_rate": 2.9523285082622448e-05, + "loss": 0.14357614517211914, + "step": 1067 + }, + { + "epoch": 0.14438529784537388, + "grad_norm": 3.437350034713745, + "learning_rate": 2.9521571752570064e-05, + "loss": 0.15816020965576172, + "step": 1068 + }, + { + "epoch": 0.14452049007182088, + "grad_norm": 5.826547145843506, + "learning_rate": 2.9519855399049004e-05, + "loss": 0.17350292205810547, + "step": 1069 + }, + { + "epoch": 0.14465568229826786, + "grad_norm": 4.477969169616699, + "learning_rate": 2.951813602241662e-05, + "loss": 0.20851516723632812, + "step": 1070 + }, + { + "epoch": 0.14479087452471484, + "grad_norm": 1.6647350788116455, + "learning_rate": 2.9516413623030896e-05, + "loss": 0.24280357360839844, + "step": 1071 + }, + { + "epoch": 0.14492606675116182, + "grad_norm": 3.3024792671203613, + "learning_rate": 2.951468820125045e-05, + "loss": 0.22267723083496094, + "step": 1072 + }, + { + "epoch": 0.1450612589776088, + "grad_norm": 2.139153242111206, + "learning_rate": 2.9512959757434508e-05, + "loss": 0.19460058212280273, + "step": 1073 + }, + { + "epoch": 0.14519645120405578, + "grad_norm": 2.8732378482818604, + "learning_rate": 2.951122829194296e-05, + "loss": 0.19809532165527344, + "step": 1074 + }, + { + "epoch": 0.14533164343050275, + "grad_norm": 2.5846972465515137, + "learning_rate": 2.9509493805136296e-05, + "loss": 0.17796707153320312, + "step": 1075 + }, + { + "epoch": 0.14546683565694973, + "grad_norm": 1.7688478231430054, + "learning_rate": 2.9507756297375648e-05, + "loss": 0.22654151916503906, + "step": 1076 + }, + { + "epoch": 0.1456020278833967, + "grad_norm": 3.1816835403442383, + "learning_rate": 2.9506015769022778e-05, + "loss": 0.2374286651611328, + "step": 1077 + }, + { + "epoch": 0.1457372201098437, + "grad_norm": 4.030456066131592, + "learning_rate": 2.950427222044006e-05, + "loss": 0.2720985412597656, + "step": 1078 + }, + { + "epoch": 0.14587241233629067, + "grad_norm": 1.2745141983032227, + "learning_rate": 2.9502525651990525e-05, + "loss": 0.2743339538574219, + "step": 1079 + }, + { + "epoch": 0.14600760456273765, + "grad_norm": 0.9128302335739136, + "learning_rate": 2.9500776064037813e-05, + "loss": 0.19506359100341797, + "step": 1080 + }, + { + "epoch": 0.14614279678918463, + "grad_norm": 1.0537209510803223, + "learning_rate": 2.9499023456946194e-05, + "loss": 0.15345001220703125, + "step": 1081 + }, + { + "epoch": 0.1462779890156316, + "grad_norm": 1.4146454334259033, + "learning_rate": 2.9497267831080575e-05, + "loss": 0.2233428955078125, + "step": 1082 + }, + { + "epoch": 0.14641318124207858, + "grad_norm": 3.1179754734039307, + "learning_rate": 2.949550918680649e-05, + "loss": 0.1699848175048828, + "step": 1083 + }, + { + "epoch": 0.14654837346852556, + "grad_norm": 1.597707748413086, + "learning_rate": 2.9493747524490086e-05, + "loss": 0.2307872772216797, + "step": 1084 + }, + { + "epoch": 0.14668356569497254, + "grad_norm": 0.9638230204582214, + "learning_rate": 2.9491982844498156e-05, + "loss": 0.149200439453125, + "step": 1085 + }, + { + "epoch": 0.14681875792141952, + "grad_norm": 2.897291421890259, + "learning_rate": 2.949021514719812e-05, + "loss": 0.2945866584777832, + "step": 1086 + }, + { + "epoch": 0.1469539501478665, + "grad_norm": 4.406247615814209, + "learning_rate": 2.948844443295802e-05, + "loss": 0.19987964630126953, + "step": 1087 + }, + { + "epoch": 0.14708914237431348, + "grad_norm": 2.993009328842163, + "learning_rate": 2.9486670702146526e-05, + "loss": 0.2476806640625, + "step": 1088 + }, + { + "epoch": 0.14722433460076045, + "grad_norm": 1.607150912284851, + "learning_rate": 2.948489395513294e-05, + "loss": 0.2028064727783203, + "step": 1089 + }, + { + "epoch": 0.14735952682720743, + "grad_norm": 2.169950485229492, + "learning_rate": 2.948311419228719e-05, + "loss": 0.19666671752929688, + "step": 1090 + }, + { + "epoch": 0.1474947190536544, + "grad_norm": 4.988809108734131, + "learning_rate": 2.948133141397983e-05, + "loss": 0.19045639038085938, + "step": 1091 + }, + { + "epoch": 0.1476299112801014, + "grad_norm": 1.3648936748504639, + "learning_rate": 2.9479545620582047e-05, + "loss": 0.1711178421974182, + "step": 1092 + }, + { + "epoch": 0.14776510350654837, + "grad_norm": 1.8938064575195312, + "learning_rate": 2.9477756812465652e-05, + "loss": 0.2031574249267578, + "step": 1093 + }, + { + "epoch": 0.14790029573299535, + "grad_norm": 1.1864244937896729, + "learning_rate": 2.9475964990003085e-05, + "loss": 0.10277795791625977, + "step": 1094 + }, + { + "epoch": 0.14803548795944232, + "grad_norm": 1.0426554679870605, + "learning_rate": 2.9474170153567406e-05, + "loss": 0.20920419692993164, + "step": 1095 + }, + { + "epoch": 0.1481706801858893, + "grad_norm": 2.343167543411255, + "learning_rate": 2.947237230353232e-05, + "loss": 0.2818145751953125, + "step": 1096 + }, + { + "epoch": 0.14830587241233628, + "grad_norm": 2.9093923568725586, + "learning_rate": 2.9470571440272147e-05, + "loss": 0.2296142578125, + "step": 1097 + }, + { + "epoch": 0.14844106463878326, + "grad_norm": 1.608169674873352, + "learning_rate": 2.946876756416183e-05, + "loss": 0.21310806274414062, + "step": 1098 + }, + { + "epoch": 0.14857625686523024, + "grad_norm": 1.6651605367660522, + "learning_rate": 2.946696067557695e-05, + "loss": 0.2605419158935547, + "step": 1099 + }, + { + "epoch": 0.14871144909167722, + "grad_norm": 2.4382612705230713, + "learning_rate": 2.9465150774893706e-05, + "loss": 0.2169046401977539, + "step": 1100 + }, + { + "epoch": 0.1488466413181242, + "grad_norm": 2.1044139862060547, + "learning_rate": 2.9463337862488938e-05, + "loss": 0.1279897689819336, + "step": 1101 + }, + { + "epoch": 0.14898183354457117, + "grad_norm": 2.7398595809936523, + "learning_rate": 2.9461521938740096e-05, + "loss": 0.22760009765625, + "step": 1102 + }, + { + "epoch": 0.14911702577101818, + "grad_norm": 2.6128108501434326, + "learning_rate": 2.9459703004025273e-05, + "loss": 0.2618274688720703, + "step": 1103 + }, + { + "epoch": 0.14925221799746516, + "grad_norm": 0.9230862855911255, + "learning_rate": 2.9457881058723174e-05, + "loss": 0.21934127807617188, + "step": 1104 + }, + { + "epoch": 0.14938741022391214, + "grad_norm": 2.928769826889038, + "learning_rate": 2.9456056103213137e-05, + "loss": 0.24720382690429688, + "step": 1105 + }, + { + "epoch": 0.14952260245035912, + "grad_norm": 3.214954137802124, + "learning_rate": 2.945422813787513e-05, + "loss": 0.15367984771728516, + "step": 1106 + }, + { + "epoch": 0.1496577946768061, + "grad_norm": 2.8005459308624268, + "learning_rate": 2.9452397163089748e-05, + "loss": 0.15401554107666016, + "step": 1107 + }, + { + "epoch": 0.14979298690325307, + "grad_norm": 5.035986423492432, + "learning_rate": 2.9450563179238207e-05, + "loss": 0.22300243377685547, + "step": 1108 + }, + { + "epoch": 0.14992817912970005, + "grad_norm": 1.8318477869033813, + "learning_rate": 2.9448726186702354e-05, + "loss": 0.19271504878997803, + "step": 1109 + }, + { + "epoch": 0.15006337135614703, + "grad_norm": 0.7416521310806274, + "learning_rate": 2.9446886185864652e-05, + "loss": 0.12869834899902344, + "step": 1110 + }, + { + "epoch": 0.150198563582594, + "grad_norm": 2.2862319946289062, + "learning_rate": 2.944504317710821e-05, + "loss": 0.19649887084960938, + "step": 1111 + }, + { + "epoch": 0.150333755809041, + "grad_norm": 3.652017831802368, + "learning_rate": 2.944319716081675e-05, + "loss": 0.20375442504882812, + "step": 1112 + }, + { + "epoch": 0.15046894803548796, + "grad_norm": 1.6085723638534546, + "learning_rate": 2.944134813737462e-05, + "loss": 0.22667503356933594, + "step": 1113 + }, + { + "epoch": 0.15060414026193494, + "grad_norm": 2.0221939086914062, + "learning_rate": 2.9439496107166796e-05, + "loss": 0.21702194213867188, + "step": 1114 + }, + { + "epoch": 0.15073933248838192, + "grad_norm": 1.3399120569229126, + "learning_rate": 2.943764107057888e-05, + "loss": 0.2021331787109375, + "step": 1115 + }, + { + "epoch": 0.1508745247148289, + "grad_norm": 1.836799144744873, + "learning_rate": 2.9435783027997106e-05, + "loss": 0.23592090606689453, + "step": 1116 + }, + { + "epoch": 0.15100971694127588, + "grad_norm": 1.072712779045105, + "learning_rate": 2.9433921979808323e-05, + "loss": 0.17313671112060547, + "step": 1117 + }, + { + "epoch": 0.15114490916772286, + "grad_norm": 1.9546438455581665, + "learning_rate": 2.9432057926400014e-05, + "loss": 0.19092273712158203, + "step": 1118 + }, + { + "epoch": 0.15128010139416984, + "grad_norm": 1.7589925527572632, + "learning_rate": 2.943019086816028e-05, + "loss": 0.20804977416992188, + "step": 1119 + }, + { + "epoch": 0.15141529362061681, + "grad_norm": 2.976997137069702, + "learning_rate": 2.9428320805477855e-05, + "loss": 0.21226978302001953, + "step": 1120 + }, + { + "epoch": 0.1515504858470638, + "grad_norm": 2.0414233207702637, + "learning_rate": 2.9426447738742104e-05, + "loss": 0.20100021362304688, + "step": 1121 + }, + { + "epoch": 0.15168567807351077, + "grad_norm": 1.6670335531234741, + "learning_rate": 2.9424571668343e-05, + "loss": 0.2207794189453125, + "step": 1122 + }, + { + "epoch": 0.15182087029995775, + "grad_norm": 0.9890133738517761, + "learning_rate": 2.942269259467115e-05, + "loss": 0.20777225494384766, + "step": 1123 + }, + { + "epoch": 0.15195606252640473, + "grad_norm": 0.6977941393852234, + "learning_rate": 2.9420810518117794e-05, + "loss": 0.13891077041625977, + "step": 1124 + }, + { + "epoch": 0.1520912547528517, + "grad_norm": 2.8817968368530273, + "learning_rate": 2.9418925439074784e-05, + "loss": 0.2390279769897461, + "step": 1125 + }, + { + "epoch": 0.15222644697929869, + "grad_norm": 1.4219186305999756, + "learning_rate": 2.9417037357934606e-05, + "loss": 0.17908287048339844, + "step": 1126 + }, + { + "epoch": 0.15236163920574566, + "grad_norm": 1.7705414295196533, + "learning_rate": 2.9415146275090373e-05, + "loss": 0.18467235565185547, + "step": 1127 + }, + { + "epoch": 0.15249683143219264, + "grad_norm": 2.6281392574310303, + "learning_rate": 2.9413252190935813e-05, + "loss": 0.21423721313476562, + "step": 1128 + }, + { + "epoch": 0.15263202365863962, + "grad_norm": 2.476922035217285, + "learning_rate": 2.9411355105865286e-05, + "loss": 0.19746297597885132, + "step": 1129 + }, + { + "epoch": 0.1527672158850866, + "grad_norm": 1.6135934591293335, + "learning_rate": 2.9409455020273775e-05, + "loss": 0.26868247985839844, + "step": 1130 + }, + { + "epoch": 0.15290240811153358, + "grad_norm": 1.3475333452224731, + "learning_rate": 2.940755193455689e-05, + "loss": 0.1934833526611328, + "step": 1131 + }, + { + "epoch": 0.15303760033798056, + "grad_norm": 1.2489211559295654, + "learning_rate": 2.940564584911086e-05, + "loss": 0.1741466522216797, + "step": 1132 + }, + { + "epoch": 0.15317279256442753, + "grad_norm": 4.067756652832031, + "learning_rate": 2.9403736764332543e-05, + "loss": 0.19297122955322266, + "step": 1133 + }, + { + "epoch": 0.1533079847908745, + "grad_norm": 1.2733547687530518, + "learning_rate": 2.9401824680619423e-05, + "loss": 0.22699928283691406, + "step": 1134 + }, + { + "epoch": 0.1534431770173215, + "grad_norm": 1.0942462682724, + "learning_rate": 2.9399909598369604e-05, + "loss": 0.19097328186035156, + "step": 1135 + }, + { + "epoch": 0.15357836924376847, + "grad_norm": 1.5180552005767822, + "learning_rate": 2.939799151798182e-05, + "loss": 0.22004318237304688, + "step": 1136 + }, + { + "epoch": 0.15371356147021548, + "grad_norm": 1.9421501159667969, + "learning_rate": 2.9396070439855417e-05, + "loss": 0.21540164947509766, + "step": 1137 + }, + { + "epoch": 0.15384875369666245, + "grad_norm": 2.0043678283691406, + "learning_rate": 2.9394146364390382e-05, + "loss": 0.24493980407714844, + "step": 1138 + }, + { + "epoch": 0.15398394592310943, + "grad_norm": 2.582948923110962, + "learning_rate": 2.9392219291987315e-05, + "loss": 0.20032691955566406, + "step": 1139 + }, + { + "epoch": 0.1541191381495564, + "grad_norm": 1.8866467475891113, + "learning_rate": 2.939028922304744e-05, + "loss": 0.22159576416015625, + "step": 1140 + }, + { + "epoch": 0.1542543303760034, + "grad_norm": 2.0384747982025146, + "learning_rate": 2.9388356157972615e-05, + "loss": 0.2097949981689453, + "step": 1141 + }, + { + "epoch": 0.15438952260245037, + "grad_norm": 2.9942688941955566, + "learning_rate": 2.938642009716531e-05, + "loss": 0.2665824890136719, + "step": 1142 + }, + { + "epoch": 0.15452471482889735, + "grad_norm": 1.0237970352172852, + "learning_rate": 2.938448104102862e-05, + "loss": 0.19638824462890625, + "step": 1143 + }, + { + "epoch": 0.15465990705534433, + "grad_norm": 2.868194103240967, + "learning_rate": 2.9382538989966267e-05, + "loss": 0.23156356811523438, + "step": 1144 + }, + { + "epoch": 0.1547950992817913, + "grad_norm": 0.6812503933906555, + "learning_rate": 2.9380593944382605e-05, + "loss": 0.13663482666015625, + "step": 1145 + }, + { + "epoch": 0.15493029150823828, + "grad_norm": 2.0901856422424316, + "learning_rate": 2.9378645904682596e-05, + "loss": 0.14768600463867188, + "step": 1146 + }, + { + "epoch": 0.15506548373468526, + "grad_norm": 1.353996992111206, + "learning_rate": 2.937669487127183e-05, + "loss": 0.19890642166137695, + "step": 1147 + }, + { + "epoch": 0.15520067596113224, + "grad_norm": 2.0099098682403564, + "learning_rate": 2.9374740844556532e-05, + "loss": 0.2002553939819336, + "step": 1148 + }, + { + "epoch": 0.15533586818757922, + "grad_norm": 1.2000133991241455, + "learning_rate": 2.937278382494353e-05, + "loss": 0.26312255859375, + "step": 1149 + }, + { + "epoch": 0.1554710604140262, + "grad_norm": 2.436415433883667, + "learning_rate": 2.9370823812840287e-05, + "loss": 0.2123870849609375, + "step": 1150 + }, + { + "epoch": 0.15560625264047317, + "grad_norm": 2.8203797340393066, + "learning_rate": 2.93688608086549e-05, + "loss": 0.22266101837158203, + "step": 1151 + }, + { + "epoch": 0.15574144486692015, + "grad_norm": 1.9603325128555298, + "learning_rate": 2.9366894812796064e-05, + "loss": 0.21594619750976562, + "step": 1152 + }, + { + "epoch": 0.15587663709336713, + "grad_norm": 1.303475022315979, + "learning_rate": 2.9364925825673117e-05, + "loss": 0.1968402862548828, + "step": 1153 + }, + { + "epoch": 0.1560118293198141, + "grad_norm": 1.8851032257080078, + "learning_rate": 2.9362953847696006e-05, + "loss": 0.2257080078125, + "step": 1154 + }, + { + "epoch": 0.1561470215462611, + "grad_norm": 2.605109214782715, + "learning_rate": 2.9360978879275313e-05, + "loss": 0.24187850952148438, + "step": 1155 + }, + { + "epoch": 0.15628221377270807, + "grad_norm": 1.8437635898590088, + "learning_rate": 2.9359000920822237e-05, + "loss": 0.19596290588378906, + "step": 1156 + }, + { + "epoch": 0.15641740599915505, + "grad_norm": 1.5065345764160156, + "learning_rate": 2.9357019972748594e-05, + "loss": 0.1710672378540039, + "step": 1157 + }, + { + "epoch": 0.15655259822560202, + "grad_norm": 2.136629581451416, + "learning_rate": 2.9355036035466836e-05, + "loss": 0.19841909408569336, + "step": 1158 + }, + { + "epoch": 0.156687790452049, + "grad_norm": 4.33582067489624, + "learning_rate": 2.935304910939002e-05, + "loss": 0.21745777130126953, + "step": 1159 + }, + { + "epoch": 0.15682298267849598, + "grad_norm": 3.8167378902435303, + "learning_rate": 2.935105919493184e-05, + "loss": 0.21950364112854004, + "step": 1160 + }, + { + "epoch": 0.15695817490494296, + "grad_norm": 1.8931597471237183, + "learning_rate": 2.9349066292506613e-05, + "loss": 0.22406768798828125, + "step": 1161 + }, + { + "epoch": 0.15709336713138994, + "grad_norm": 2.715423822402954, + "learning_rate": 2.934707040252926e-05, + "loss": 0.186592698097229, + "step": 1162 + }, + { + "epoch": 0.15722855935783692, + "grad_norm": 1.4493467807769775, + "learning_rate": 2.9345071525415342e-05, + "loss": 0.2242717742919922, + "step": 1163 + }, + { + "epoch": 0.1573637515842839, + "grad_norm": 1.7794873714447021, + "learning_rate": 2.9343069661581035e-05, + "loss": 0.2085585594177246, + "step": 1164 + }, + { + "epoch": 0.15749894381073087, + "grad_norm": 1.7526251077651978, + "learning_rate": 2.9341064811443138e-05, + "loss": 0.21421551704406738, + "step": 1165 + }, + { + "epoch": 0.15763413603717785, + "grad_norm": 4.4980149269104, + "learning_rate": 2.9339056975419078e-05, + "loss": 0.2119426727294922, + "step": 1166 + }, + { + "epoch": 0.15776932826362483, + "grad_norm": 2.7720754146575928, + "learning_rate": 2.9337046153926882e-05, + "loss": 0.18785572052001953, + "step": 1167 + }, + { + "epoch": 0.1579045204900718, + "grad_norm": 2.674401044845581, + "learning_rate": 2.9335032347385224e-05, + "loss": 0.15754222869873047, + "step": 1168 + }, + { + "epoch": 0.1580397127165188, + "grad_norm": 1.8676788806915283, + "learning_rate": 2.933301555621339e-05, + "loss": 0.2265453338623047, + "step": 1169 + }, + { + "epoch": 0.15817490494296577, + "grad_norm": 5.242788314819336, + "learning_rate": 2.933099578083128e-05, + "loss": 0.23511505126953125, + "step": 1170 + }, + { + "epoch": 0.15831009716941277, + "grad_norm": 4.543645858764648, + "learning_rate": 2.932897302165943e-05, + "loss": 0.2626004219055176, + "step": 1171 + }, + { + "epoch": 0.15844528939585975, + "grad_norm": 3.0055973529815674, + "learning_rate": 2.9326947279118983e-05, + "loss": 0.22939300537109375, + "step": 1172 + }, + { + "epoch": 0.15858048162230673, + "grad_norm": 4.152409076690674, + "learning_rate": 2.9324918553631716e-05, + "loss": 0.17406177520751953, + "step": 1173 + }, + { + "epoch": 0.1587156738487537, + "grad_norm": 2.5324268341064453, + "learning_rate": 2.9322886845620013e-05, + "loss": 0.2228221893310547, + "step": 1174 + }, + { + "epoch": 0.1588508660752007, + "grad_norm": 1.541008710861206, + "learning_rate": 2.932085215550689e-05, + "loss": 0.1670060157775879, + "step": 1175 + }, + { + "epoch": 0.15898605830164766, + "grad_norm": 6.707080841064453, + "learning_rate": 2.9318814483715982e-05, + "loss": 0.2936382293701172, + "step": 1176 + }, + { + "epoch": 0.15912125052809464, + "grad_norm": 2.299949884414673, + "learning_rate": 2.9316773830671537e-05, + "loss": 0.19097137451171875, + "step": 1177 + }, + { + "epoch": 0.15925644275454162, + "grad_norm": 2.5141537189483643, + "learning_rate": 2.9314730196798437e-05, + "loss": 0.20153236389160156, + "step": 1178 + }, + { + "epoch": 0.1593916349809886, + "grad_norm": 2.8581597805023193, + "learning_rate": 2.9312683582522178e-05, + "loss": 0.20731830596923828, + "step": 1179 + }, + { + "epoch": 0.15952682720743558, + "grad_norm": 3.6299893856048584, + "learning_rate": 2.9310633988268868e-05, + "loss": 0.2257823944091797, + "step": 1180 + }, + { + "epoch": 0.15966201943388256, + "grad_norm": 1.6399526596069336, + "learning_rate": 2.9308581414465246e-05, + "loss": 0.1636180877685547, + "step": 1181 + }, + { + "epoch": 0.15979721166032954, + "grad_norm": 3.0158963203430176, + "learning_rate": 2.9306525861538674e-05, + "loss": 0.18083667755126953, + "step": 1182 + }, + { + "epoch": 0.15993240388677651, + "grad_norm": 4.761128902435303, + "learning_rate": 2.9304467329917127e-05, + "loss": 0.2058734893798828, + "step": 1183 + }, + { + "epoch": 0.1600675961132235, + "grad_norm": 3.8764705657958984, + "learning_rate": 2.9302405820029198e-05, + "loss": 0.2081432342529297, + "step": 1184 + }, + { + "epoch": 0.16020278833967047, + "grad_norm": 2.9538233280181885, + "learning_rate": 2.9300341332304114e-05, + "loss": 0.2520465850830078, + "step": 1185 + }, + { + "epoch": 0.16033798056611745, + "grad_norm": 1.5842045545578003, + "learning_rate": 2.9298273867171697e-05, + "loss": 0.21493911743164062, + "step": 1186 + }, + { + "epoch": 0.16047317279256443, + "grad_norm": 1.9225190877914429, + "learning_rate": 2.929620342506242e-05, + "loss": 0.1946878433227539, + "step": 1187 + }, + { + "epoch": 0.1606083650190114, + "grad_norm": 0.9797894954681396, + "learning_rate": 2.929413000640735e-05, + "loss": 0.19542312622070312, + "step": 1188 + }, + { + "epoch": 0.16074355724545838, + "grad_norm": 1.8446816205978394, + "learning_rate": 2.9292053611638187e-05, + "loss": 0.1798248291015625, + "step": 1189 + }, + { + "epoch": 0.16087874947190536, + "grad_norm": 2.0895726680755615, + "learning_rate": 2.928997424118725e-05, + "loss": 0.2467883825302124, + "step": 1190 + }, + { + "epoch": 0.16101394169835234, + "grad_norm": 1.1365423202514648, + "learning_rate": 2.928789189548747e-05, + "loss": 0.24303627014160156, + "step": 1191 + }, + { + "epoch": 0.16114913392479932, + "grad_norm": 2.5018985271453857, + "learning_rate": 2.9285806574972405e-05, + "loss": 0.20746994018554688, + "step": 1192 + }, + { + "epoch": 0.1612843261512463, + "grad_norm": 1.2010382413864136, + "learning_rate": 2.928371828007623e-05, + "loss": 0.16903305053710938, + "step": 1193 + }, + { + "epoch": 0.16141951837769328, + "grad_norm": 3.821223735809326, + "learning_rate": 2.928162701123374e-05, + "loss": 0.20270919799804688, + "step": 1194 + }, + { + "epoch": 0.16155471060414026, + "grad_norm": 2.6594810485839844, + "learning_rate": 2.9279532768880345e-05, + "loss": 0.21689224243164062, + "step": 1195 + }, + { + "epoch": 0.16168990283058723, + "grad_norm": 0.9651798605918884, + "learning_rate": 2.9277435553452084e-05, + "loss": 0.1259899139404297, + "step": 1196 + }, + { + "epoch": 0.1618250950570342, + "grad_norm": 2.060258150100708, + "learning_rate": 2.9275335365385602e-05, + "loss": 0.21222925186157227, + "step": 1197 + }, + { + "epoch": 0.1619602872834812, + "grad_norm": 2.437762975692749, + "learning_rate": 2.927323220511817e-05, + "loss": 0.21192359924316406, + "step": 1198 + }, + { + "epoch": 0.16209547950992817, + "grad_norm": 2.5704808235168457, + "learning_rate": 2.9271126073087684e-05, + "loss": 0.2102813720703125, + "step": 1199 + }, + { + "epoch": 0.16223067173637515, + "grad_norm": 2.143406629562378, + "learning_rate": 2.926901696973264e-05, + "loss": 0.1216421127319336, + "step": 1200 + }, + { + "epoch": 0.16236586396282213, + "grad_norm": 1.2513593435287476, + "learning_rate": 2.9266904895492177e-05, + "loss": 0.20844626426696777, + "step": 1201 + }, + { + "epoch": 0.1625010561892691, + "grad_norm": 5.105346202850342, + "learning_rate": 2.926478985080603e-05, + "loss": 0.2205181121826172, + "step": 1202 + }, + { + "epoch": 0.16263624841571608, + "grad_norm": 0.9585946798324585, + "learning_rate": 2.9262671836114568e-05, + "loss": 0.16717815399169922, + "step": 1203 + }, + { + "epoch": 0.16277144064216306, + "grad_norm": 1.400521993637085, + "learning_rate": 2.9260550851858774e-05, + "loss": 0.18681716918945312, + "step": 1204 + }, + { + "epoch": 0.16290663286861007, + "grad_norm": 2.233264684677124, + "learning_rate": 2.9258426898480243e-05, + "loss": 0.26293182373046875, + "step": 1205 + }, + { + "epoch": 0.16304182509505705, + "grad_norm": 1.3776154518127441, + "learning_rate": 2.9256299976421198e-05, + "loss": 0.16571426391601562, + "step": 1206 + }, + { + "epoch": 0.16317701732150403, + "grad_norm": 2.0200002193450928, + "learning_rate": 2.9254170086124474e-05, + "loss": 0.2273101806640625, + "step": 1207 + }, + { + "epoch": 0.163312209547951, + "grad_norm": 2.553645133972168, + "learning_rate": 2.9252037228033526e-05, + "loss": 0.2573814392089844, + "step": 1208 + }, + { + "epoch": 0.16344740177439798, + "grad_norm": 1.7511731386184692, + "learning_rate": 2.9249901402592424e-05, + "loss": 0.19594955444335938, + "step": 1209 + }, + { + "epoch": 0.16358259400084496, + "grad_norm": 1.692636251449585, + "learning_rate": 2.9247762610245863e-05, + "loss": 0.23878955841064453, + "step": 1210 + }, + { + "epoch": 0.16371778622729194, + "grad_norm": 0.7301585078239441, + "learning_rate": 2.9245620851439146e-05, + "loss": 0.1350393295288086, + "step": 1211 + }, + { + "epoch": 0.16385297845373892, + "grad_norm": 1.1896436214447021, + "learning_rate": 2.92434761266182e-05, + "loss": 0.19644403457641602, + "step": 1212 + }, + { + "epoch": 0.1639881706801859, + "grad_norm": 0.7367956042289734, + "learning_rate": 2.924132843622957e-05, + "loss": 0.21621322631835938, + "step": 1213 + }, + { + "epoch": 0.16412336290663287, + "grad_norm": 2.8648135662078857, + "learning_rate": 2.9239177780720418e-05, + "loss": 0.16452407836914062, + "step": 1214 + }, + { + "epoch": 0.16425855513307985, + "grad_norm": 2.5793490409851074, + "learning_rate": 2.923702416053852e-05, + "loss": 0.22870445251464844, + "step": 1215 + }, + { + "epoch": 0.16439374735952683, + "grad_norm": 1.4675109386444092, + "learning_rate": 2.9234867576132268e-05, + "loss": 0.15302658081054688, + "step": 1216 + }, + { + "epoch": 0.1645289395859738, + "grad_norm": 2.1417102813720703, + "learning_rate": 2.923270802795068e-05, + "loss": 0.22233009338378906, + "step": 1217 + }, + { + "epoch": 0.1646641318124208, + "grad_norm": 3.58647084236145, + "learning_rate": 2.9230545516443378e-05, + "loss": 0.18663978576660156, + "step": 1218 + }, + { + "epoch": 0.16479932403886777, + "grad_norm": 3.886446714401245, + "learning_rate": 2.9228380042060615e-05, + "loss": 0.20212745666503906, + "step": 1219 + }, + { + "epoch": 0.16493451626531475, + "grad_norm": 2.8805408477783203, + "learning_rate": 2.9226211605253252e-05, + "loss": 0.14271926879882812, + "step": 1220 + }, + { + "epoch": 0.16506970849176172, + "grad_norm": 3.7256038188934326, + "learning_rate": 2.922404020647277e-05, + "loss": 0.18785858154296875, + "step": 1221 + }, + { + "epoch": 0.1652049007182087, + "grad_norm": 2.530155897140503, + "learning_rate": 2.9221865846171264e-05, + "loss": 0.21385407447814941, + "step": 1222 + }, + { + "epoch": 0.16534009294465568, + "grad_norm": 2.4122066497802734, + "learning_rate": 2.9219688524801446e-05, + "loss": 0.15297985076904297, + "step": 1223 + }, + { + "epoch": 0.16547528517110266, + "grad_norm": 1.780105471611023, + "learning_rate": 2.9217508242816653e-05, + "loss": 0.18054676055908203, + "step": 1224 + }, + { + "epoch": 0.16561047739754964, + "grad_norm": 1.5474873781204224, + "learning_rate": 2.921532500067083e-05, + "loss": 0.1624441146850586, + "step": 1225 + }, + { + "epoch": 0.16574566962399662, + "grad_norm": 2.8681282997131348, + "learning_rate": 2.9213138798818528e-05, + "loss": 0.17368105053901672, + "step": 1226 + }, + { + "epoch": 0.1658808618504436, + "grad_norm": 3.33196759223938, + "learning_rate": 2.921094963771494e-05, + "loss": 0.21352672576904297, + "step": 1227 + }, + { + "epoch": 0.16601605407689057, + "grad_norm": 2.4219534397125244, + "learning_rate": 2.9208757517815855e-05, + "loss": 0.190521240234375, + "step": 1228 + }, + { + "epoch": 0.16615124630333755, + "grad_norm": 0.9082304835319519, + "learning_rate": 2.9206562439577684e-05, + "loss": 0.15903091430664062, + "step": 1229 + }, + { + "epoch": 0.16628643852978453, + "grad_norm": 1.1619412899017334, + "learning_rate": 2.9204364403457452e-05, + "loss": 0.1984710693359375, + "step": 1230 + }, + { + "epoch": 0.1664216307562315, + "grad_norm": 0.7407615184783936, + "learning_rate": 2.9202163409912808e-05, + "loss": 0.21421432495117188, + "step": 1231 + }, + { + "epoch": 0.1665568229826785, + "grad_norm": 2.1070401668548584, + "learning_rate": 2.9199959459402003e-05, + "loss": 0.2463531494140625, + "step": 1232 + }, + { + "epoch": 0.16669201520912547, + "grad_norm": 3.299759864807129, + "learning_rate": 2.919775255238392e-05, + "loss": 0.1620769500732422, + "step": 1233 + }, + { + "epoch": 0.16682720743557244, + "grad_norm": 1.0545693635940552, + "learning_rate": 2.919554268931804e-05, + "loss": 0.21468615531921387, + "step": 1234 + }, + { + "epoch": 0.16696239966201942, + "grad_norm": 0.9881651401519775, + "learning_rate": 2.9193329870664475e-05, + "loss": 0.18878698348999023, + "step": 1235 + }, + { + "epoch": 0.1670975918884664, + "grad_norm": 3.00102162361145, + "learning_rate": 2.9191114096883938e-05, + "loss": 0.21954917907714844, + "step": 1236 + }, + { + "epoch": 0.16723278411491338, + "grad_norm": 3.4775328636169434, + "learning_rate": 2.9188895368437774e-05, + "loss": 0.2082061767578125, + "step": 1237 + }, + { + "epoch": 0.16736797634136036, + "grad_norm": 2.9185492992401123, + "learning_rate": 2.9186673685787926e-05, + "loss": 0.1755695343017578, + "step": 1238 + }, + { + "epoch": 0.16750316856780736, + "grad_norm": 1.8831610679626465, + "learning_rate": 2.918444904939697e-05, + "loss": 0.19309234619140625, + "step": 1239 + }, + { + "epoch": 0.16763836079425434, + "grad_norm": 2.365365982055664, + "learning_rate": 2.9182221459728078e-05, + "loss": 0.17614078521728516, + "step": 1240 + }, + { + "epoch": 0.16777355302070132, + "grad_norm": 1.92036771774292, + "learning_rate": 2.917999091724505e-05, + "loss": 0.21112632751464844, + "step": 1241 + }, + { + "epoch": 0.1679087452471483, + "grad_norm": 3.2891643047332764, + "learning_rate": 2.9177757422412294e-05, + "loss": 0.22065162658691406, + "step": 1242 + }, + { + "epoch": 0.16804393747359528, + "grad_norm": 4.371700763702393, + "learning_rate": 2.917552097569484e-05, + "loss": 0.24998044967651367, + "step": 1243 + }, + { + "epoch": 0.16817912970004226, + "grad_norm": 3.5760509967803955, + "learning_rate": 2.917328157755832e-05, + "loss": 0.20297622680664062, + "step": 1244 + }, + { + "epoch": 0.16831432192648924, + "grad_norm": 4.524680137634277, + "learning_rate": 2.9171039228469003e-05, + "loss": 0.17680931091308594, + "step": 1245 + }, + { + "epoch": 0.16844951415293621, + "grad_norm": 1.991071343421936, + "learning_rate": 2.9168793928893747e-05, + "loss": 0.2197580337524414, + "step": 1246 + }, + { + "epoch": 0.1685847063793832, + "grad_norm": 1.2785810232162476, + "learning_rate": 2.9166545679300036e-05, + "loss": 0.18245315551757812, + "step": 1247 + }, + { + "epoch": 0.16871989860583017, + "grad_norm": 3.6238996982574463, + "learning_rate": 2.9164294480155966e-05, + "loss": 0.21080970764160156, + "step": 1248 + }, + { + "epoch": 0.16885509083227715, + "grad_norm": 2.1705658435821533, + "learning_rate": 2.9162040331930256e-05, + "loss": 0.21681976318359375, + "step": 1249 + }, + { + "epoch": 0.16899028305872413, + "grad_norm": 1.8535723686218262, + "learning_rate": 2.915978323509223e-05, + "loss": 0.22697019577026367, + "step": 1250 + }, + { + "epoch": 0.1691254752851711, + "grad_norm": 1.32503080368042, + "learning_rate": 2.915752319011182e-05, + "loss": 0.22873973846435547, + "step": 1251 + }, + { + "epoch": 0.16926066751161808, + "grad_norm": 0.9266617298126221, + "learning_rate": 2.9155260197459588e-05, + "loss": 0.13932466506958008, + "step": 1252 + }, + { + "epoch": 0.16939585973806506, + "grad_norm": 1.1868035793304443, + "learning_rate": 2.91529942576067e-05, + "loss": 0.15386676788330078, + "step": 1253 + }, + { + "epoch": 0.16953105196451204, + "grad_norm": 1.9122745990753174, + "learning_rate": 2.915072537102493e-05, + "loss": 0.18025827407836914, + "step": 1254 + }, + { + "epoch": 0.16966624419095902, + "grad_norm": 1.0295727252960205, + "learning_rate": 2.914845353818668e-05, + "loss": 0.21793556213378906, + "step": 1255 + }, + { + "epoch": 0.169801436417406, + "grad_norm": 1.1580969095230103, + "learning_rate": 2.9146178759564953e-05, + "loss": 0.20047378540039062, + "step": 1256 + }, + { + "epoch": 0.16993662864385298, + "grad_norm": 2.033372402191162, + "learning_rate": 2.914390103563337e-05, + "loss": 0.22981834411621094, + "step": 1257 + }, + { + "epoch": 0.17007182087029996, + "grad_norm": 1.376724123954773, + "learning_rate": 2.914162036686617e-05, + "loss": 0.2403717041015625, + "step": 1258 + }, + { + "epoch": 0.17020701309674693, + "grad_norm": 1.2238609790802002, + "learning_rate": 2.9139336753738196e-05, + "loss": 0.1927042007446289, + "step": 1259 + }, + { + "epoch": 0.1703422053231939, + "grad_norm": 3.4364662170410156, + "learning_rate": 2.913705019672491e-05, + "loss": 0.22051620483398438, + "step": 1260 + }, + { + "epoch": 0.1704773975496409, + "grad_norm": 1.9599090814590454, + "learning_rate": 2.9134760696302386e-05, + "loss": 0.17123985290527344, + "step": 1261 + }, + { + "epoch": 0.17061258977608787, + "grad_norm": 1.544330358505249, + "learning_rate": 2.9132468252947306e-05, + "loss": 0.23200416564941406, + "step": 1262 + }, + { + "epoch": 0.17074778200253485, + "grad_norm": 1.335659384727478, + "learning_rate": 2.9130172867136974e-05, + "loss": 0.1358184814453125, + "step": 1263 + }, + { + "epoch": 0.17088297422898183, + "grad_norm": 3.2440483570098877, + "learning_rate": 2.91278745393493e-05, + "loss": 0.2698020935058594, + "step": 1264 + }, + { + "epoch": 0.1710181664554288, + "grad_norm": 1.3370169401168823, + "learning_rate": 2.9125573270062812e-05, + "loss": 0.14377784729003906, + "step": 1265 + }, + { + "epoch": 0.17115335868187578, + "grad_norm": 2.1438615322113037, + "learning_rate": 2.9123269059756634e-05, + "loss": 0.13239169120788574, + "step": 1266 + }, + { + "epoch": 0.17128855090832276, + "grad_norm": 1.484450340270996, + "learning_rate": 2.9120961908910528e-05, + "loss": 0.24248695373535156, + "step": 1267 + }, + { + "epoch": 0.17142374313476974, + "grad_norm": 3.4147424697875977, + "learning_rate": 2.911865181800485e-05, + "loss": 0.15267038345336914, + "step": 1268 + }, + { + "epoch": 0.17155893536121672, + "grad_norm": 2.7103404998779297, + "learning_rate": 2.9116338787520577e-05, + "loss": 0.19556808471679688, + "step": 1269 + }, + { + "epoch": 0.1716941275876637, + "grad_norm": 2.1455602645874023, + "learning_rate": 2.9114022817939283e-05, + "loss": 0.20649147033691406, + "step": 1270 + }, + { + "epoch": 0.17182931981411068, + "grad_norm": 2.1904516220092773, + "learning_rate": 2.911170390974318e-05, + "loss": 0.1810760498046875, + "step": 1271 + }, + { + "epoch": 0.17196451204055765, + "grad_norm": 1.3651081323623657, + "learning_rate": 2.9109382063415067e-05, + "loss": 0.20650005340576172, + "step": 1272 + }, + { + "epoch": 0.17209970426700466, + "grad_norm": 1.9265291690826416, + "learning_rate": 2.9107057279438372e-05, + "loss": 0.1748943328857422, + "step": 1273 + }, + { + "epoch": 0.17223489649345164, + "grad_norm": 1.6655797958374023, + "learning_rate": 2.910472955829712e-05, + "loss": 0.20395660400390625, + "step": 1274 + }, + { + "epoch": 0.17237008871989862, + "grad_norm": 1.812628984451294, + "learning_rate": 2.9102398900475958e-05, + "loss": 0.1981515884399414, + "step": 1275 + }, + { + "epoch": 0.1725052809463456, + "grad_norm": 1.1675834655761719, + "learning_rate": 2.910006530646014e-05, + "loss": 0.1596975326538086, + "step": 1276 + }, + { + "epoch": 0.17264047317279257, + "grad_norm": 0.992638349533081, + "learning_rate": 2.909772877673554e-05, + "loss": 0.18923377990722656, + "step": 1277 + }, + { + "epoch": 0.17277566539923955, + "grad_norm": 1.1776173114776611, + "learning_rate": 2.9095389311788626e-05, + "loss": 0.20680618286132812, + "step": 1278 + }, + { + "epoch": 0.17291085762568653, + "grad_norm": 3.43151593208313, + "learning_rate": 2.9093046912106494e-05, + "loss": 0.2683224678039551, + "step": 1279 + }, + { + "epoch": 0.1730460498521335, + "grad_norm": 2.155771017074585, + "learning_rate": 2.909070157817684e-05, + "loss": 0.21654129028320312, + "step": 1280 + }, + { + "epoch": 0.1731812420785805, + "grad_norm": 1.0390444993972778, + "learning_rate": 2.9088353310487976e-05, + "loss": 0.17767047882080078, + "step": 1281 + }, + { + "epoch": 0.17331643430502747, + "grad_norm": 2.0621514320373535, + "learning_rate": 2.9086002109528825e-05, + "loss": 0.2587318420410156, + "step": 1282 + }, + { + "epoch": 0.17345162653147445, + "grad_norm": 1.309306025505066, + "learning_rate": 2.908364797578892e-05, + "loss": 0.14774131774902344, + "step": 1283 + }, + { + "epoch": 0.17358681875792142, + "grad_norm": 3.02162766456604, + "learning_rate": 2.9081290909758405e-05, + "loss": 0.19872283935546875, + "step": 1284 + }, + { + "epoch": 0.1737220109843684, + "grad_norm": 2.652358293533325, + "learning_rate": 2.9078930911928033e-05, + "loss": 0.2086009979248047, + "step": 1285 + }, + { + "epoch": 0.17385720321081538, + "grad_norm": 1.419096827507019, + "learning_rate": 2.907656798278916e-05, + "loss": 0.21865081787109375, + "step": 1286 + }, + { + "epoch": 0.17399239543726236, + "grad_norm": 1.9373184442520142, + "learning_rate": 2.9074202122833773e-05, + "loss": 0.1815357208251953, + "step": 1287 + }, + { + "epoch": 0.17412758766370934, + "grad_norm": 2.5388596057891846, + "learning_rate": 2.907183333255445e-05, + "loss": 0.24713134765625, + "step": 1288 + }, + { + "epoch": 0.17426277989015632, + "grad_norm": 3.0996196269989014, + "learning_rate": 2.9069461612444384e-05, + "loss": 0.23250579833984375, + "step": 1289 + }, + { + "epoch": 0.1743979721166033, + "grad_norm": 2.24444317817688, + "learning_rate": 2.9067086962997385e-05, + "loss": 0.22878265380859375, + "step": 1290 + }, + { + "epoch": 0.17453316434305027, + "grad_norm": 3.0670111179351807, + "learning_rate": 2.9064709384707868e-05, + "loss": 0.1834259033203125, + "step": 1291 + }, + { + "epoch": 0.17466835656949725, + "grad_norm": 5.112541675567627, + "learning_rate": 2.9062328878070855e-05, + "loss": 0.2781410217285156, + "step": 1292 + }, + { + "epoch": 0.17480354879594423, + "grad_norm": 2.009633779525757, + "learning_rate": 2.905994544358198e-05, + "loss": 0.16334152221679688, + "step": 1293 + }, + { + "epoch": 0.1749387410223912, + "grad_norm": 3.704183340072632, + "learning_rate": 2.9057559081737482e-05, + "loss": 0.19887161254882812, + "step": 1294 + }, + { + "epoch": 0.1750739332488382, + "grad_norm": 1.3218884468078613, + "learning_rate": 2.9055169793034225e-05, + "loss": 0.256317138671875, + "step": 1295 + }, + { + "epoch": 0.17520912547528517, + "grad_norm": 2.1976206302642822, + "learning_rate": 2.9052777577969656e-05, + "loss": 0.20646047592163086, + "step": 1296 + }, + { + "epoch": 0.17534431770173214, + "grad_norm": 3.787241220474243, + "learning_rate": 2.9050382437041868e-05, + "loss": 0.16766834259033203, + "step": 1297 + }, + { + "epoch": 0.17547950992817912, + "grad_norm": 4.669532775878906, + "learning_rate": 2.9047984370749526e-05, + "loss": 0.27324581146240234, + "step": 1298 + }, + { + "epoch": 0.1756147021546261, + "grad_norm": 1.2683221101760864, + "learning_rate": 2.9045583379591925e-05, + "loss": 0.17315006256103516, + "step": 1299 + }, + { + "epoch": 0.17574989438107308, + "grad_norm": 3.358713388442993, + "learning_rate": 2.9043179464068965e-05, + "loss": 0.23006439208984375, + "step": 1300 + }, + { + "epoch": 0.17588508660752006, + "grad_norm": 1.9076160192489624, + "learning_rate": 2.9040772624681152e-05, + "loss": 0.18961048126220703, + "step": 1301 + }, + { + "epoch": 0.17602027883396704, + "grad_norm": 4.759708404541016, + "learning_rate": 2.9038362861929603e-05, + "loss": 0.20519065856933594, + "step": 1302 + }, + { + "epoch": 0.17615547106041402, + "grad_norm": 1.4643889665603638, + "learning_rate": 2.903595017631605e-05, + "loss": 0.2100391387939453, + "step": 1303 + }, + { + "epoch": 0.176290663286861, + "grad_norm": 1.7860510349273682, + "learning_rate": 2.903353456834282e-05, + "loss": 0.18061447143554688, + "step": 1304 + }, + { + "epoch": 0.17642585551330797, + "grad_norm": 3.441455364227295, + "learning_rate": 2.903111603851285e-05, + "loss": 0.19680213928222656, + "step": 1305 + }, + { + "epoch": 0.17656104773975495, + "grad_norm": 3.934690475463867, + "learning_rate": 2.9028694587329704e-05, + "loss": 0.18488597869873047, + "step": 1306 + }, + { + "epoch": 0.17669623996620196, + "grad_norm": 2.9123475551605225, + "learning_rate": 2.902627021529753e-05, + "loss": 0.17446136474609375, + "step": 1307 + }, + { + "epoch": 0.17683143219264894, + "grad_norm": 1.947124719619751, + "learning_rate": 2.9023842922921105e-05, + "loss": 0.16583776473999023, + "step": 1308 + }, + { + "epoch": 0.1769666244190959, + "grad_norm": 3.5330393314361572, + "learning_rate": 2.90214127107058e-05, + "loss": 0.24016952514648438, + "step": 1309 + }, + { + "epoch": 0.1771018166455429, + "grad_norm": 2.1990811824798584, + "learning_rate": 2.9018979579157592e-05, + "loss": 0.2100963592529297, + "step": 1310 + }, + { + "epoch": 0.17723700887198987, + "grad_norm": 3.6456589698791504, + "learning_rate": 2.901654352878308e-05, + "loss": 0.21712112426757812, + "step": 1311 + }, + { + "epoch": 0.17737220109843685, + "grad_norm": 1.4664796590805054, + "learning_rate": 2.9014104560089462e-05, + "loss": 0.14890432357788086, + "step": 1312 + }, + { + "epoch": 0.17750739332488383, + "grad_norm": 0.9743348360061646, + "learning_rate": 2.9011662673584538e-05, + "loss": 0.1746203899383545, + "step": 1313 + }, + { + "epoch": 0.1776425855513308, + "grad_norm": 3.7662954330444336, + "learning_rate": 2.900921786977673e-05, + "loss": 0.20755767822265625, + "step": 1314 + }, + { + "epoch": 0.17777777777777778, + "grad_norm": 3.263828754425049, + "learning_rate": 2.900677014917505e-05, + "loss": 0.21925640106201172, + "step": 1315 + }, + { + "epoch": 0.17791297000422476, + "grad_norm": 2.312995433807373, + "learning_rate": 2.9004319512289136e-05, + "loss": 0.22922229766845703, + "step": 1316 + }, + { + "epoch": 0.17804816223067174, + "grad_norm": 1.111663818359375, + "learning_rate": 2.9001865959629222e-05, + "loss": 0.17883014678955078, + "step": 1317 + }, + { + "epoch": 0.17818335445711872, + "grad_norm": 1.9362506866455078, + "learning_rate": 2.8999409491706143e-05, + "loss": 0.183624267578125, + "step": 1318 + }, + { + "epoch": 0.1783185466835657, + "grad_norm": 2.6189606189727783, + "learning_rate": 2.8996950109031355e-05, + "loss": 0.18933868408203125, + "step": 1319 + }, + { + "epoch": 0.17845373891001268, + "grad_norm": 1.5163934230804443, + "learning_rate": 2.8994487812116917e-05, + "loss": 0.21705055236816406, + "step": 1320 + }, + { + "epoch": 0.17858893113645966, + "grad_norm": 0.9534285664558411, + "learning_rate": 2.8992022601475483e-05, + "loss": 0.203521728515625, + "step": 1321 + }, + { + "epoch": 0.17872412336290663, + "grad_norm": 1.0295037031173706, + "learning_rate": 2.8989554477620332e-05, + "loss": 0.14105701446533203, + "step": 1322 + }, + { + "epoch": 0.1788593155893536, + "grad_norm": 2.6572842597961426, + "learning_rate": 2.8987083441065335e-05, + "loss": 0.19663238525390625, + "step": 1323 + }, + { + "epoch": 0.1789945078158006, + "grad_norm": 1.6833895444869995, + "learning_rate": 2.8984609492324983e-05, + "loss": 0.1596360206604004, + "step": 1324 + }, + { + "epoch": 0.17912970004224757, + "grad_norm": 5.08781623840332, + "learning_rate": 2.8982132631914357e-05, + "loss": 0.30466651916503906, + "step": 1325 + }, + { + "epoch": 0.17926489226869455, + "grad_norm": 2.629206657409668, + "learning_rate": 2.8979652860349154e-05, + "loss": 0.2878236770629883, + "step": 1326 + }, + { + "epoch": 0.17940008449514153, + "grad_norm": 1.5497839450836182, + "learning_rate": 2.8977170178145675e-05, + "loss": 0.18839454650878906, + "step": 1327 + }, + { + "epoch": 0.1795352767215885, + "grad_norm": 1.8535789251327515, + "learning_rate": 2.8974684585820833e-05, + "loss": 0.1353282928466797, + "step": 1328 + }, + { + "epoch": 0.17967046894803548, + "grad_norm": 1.1143041849136353, + "learning_rate": 2.8972196083892138e-05, + "loss": 0.1876659393310547, + "step": 1329 + }, + { + "epoch": 0.17980566117448246, + "grad_norm": 1.020523190498352, + "learning_rate": 2.8969704672877707e-05, + "loss": 0.1754608154296875, + "step": 1330 + }, + { + "epoch": 0.17994085340092944, + "grad_norm": 3.169733762741089, + "learning_rate": 2.896721035329627e-05, + "loss": 0.20612144470214844, + "step": 1331 + }, + { + "epoch": 0.18007604562737642, + "grad_norm": 1.071004033088684, + "learning_rate": 2.8964713125667153e-05, + "loss": 0.1641855239868164, + "step": 1332 + }, + { + "epoch": 0.1802112378538234, + "grad_norm": 2.315422773361206, + "learning_rate": 2.8962212990510294e-05, + "loss": 0.19281768798828125, + "step": 1333 + }, + { + "epoch": 0.18034643008027038, + "grad_norm": 1.7461645603179932, + "learning_rate": 2.8959709948346237e-05, + "loss": 0.19316387176513672, + "step": 1334 + }, + { + "epoch": 0.18048162230671735, + "grad_norm": 3.4042670726776123, + "learning_rate": 2.8957203999696124e-05, + "loss": 0.1898670196533203, + "step": 1335 + }, + { + "epoch": 0.18061681453316433, + "grad_norm": 0.979421854019165, + "learning_rate": 2.8954695145081713e-05, + "loss": 0.1663360595703125, + "step": 1336 + }, + { + "epoch": 0.1807520067596113, + "grad_norm": 1.626186490058899, + "learning_rate": 2.8952183385025356e-05, + "loss": 0.23313522338867188, + "step": 1337 + }, + { + "epoch": 0.1808871989860583, + "grad_norm": 5.553304195404053, + "learning_rate": 2.8949668720050014e-05, + "loss": 0.24918651580810547, + "step": 1338 + }, + { + "epoch": 0.18102239121250527, + "grad_norm": 2.7268736362457275, + "learning_rate": 2.8947151150679256e-05, + "loss": 0.12266921997070312, + "step": 1339 + }, + { + "epoch": 0.18115758343895225, + "grad_norm": 4.369574069976807, + "learning_rate": 2.8944630677437255e-05, + "loss": 0.19501686096191406, + "step": 1340 + }, + { + "epoch": 0.18129277566539925, + "grad_norm": 1.12800133228302, + "learning_rate": 2.8942107300848784e-05, + "loss": 0.12608051300048828, + "step": 1341 + }, + { + "epoch": 0.18142796789184623, + "grad_norm": 0.9669626355171204, + "learning_rate": 2.8939581021439225e-05, + "loss": 0.2141742706298828, + "step": 1342 + }, + { + "epoch": 0.1815631601182932, + "grad_norm": 1.275140643119812, + "learning_rate": 2.8937051839734563e-05, + "loss": 0.2077617645263672, + "step": 1343 + }, + { + "epoch": 0.1816983523447402, + "grad_norm": 1.469282865524292, + "learning_rate": 2.8934519756261384e-05, + "loss": 0.216949462890625, + "step": 1344 + }, + { + "epoch": 0.18183354457118717, + "grad_norm": 1.6897363662719727, + "learning_rate": 2.8931984771546885e-05, + "loss": 0.11701011657714844, + "step": 1345 + }, + { + "epoch": 0.18196873679763415, + "grad_norm": 3.3048653602600098, + "learning_rate": 2.8929446886118866e-05, + "loss": 0.20543861389160156, + "step": 1346 + }, + { + "epoch": 0.18210392902408112, + "grad_norm": 2.647064447402954, + "learning_rate": 2.892690610050572e-05, + "loss": 0.19722390174865723, + "step": 1347 + }, + { + "epoch": 0.1822391212505281, + "grad_norm": 1.1585471630096436, + "learning_rate": 2.892436241523646e-05, + "loss": 0.17193031311035156, + "step": 1348 + }, + { + "epoch": 0.18237431347697508, + "grad_norm": 2.3201839923858643, + "learning_rate": 2.8921815830840685e-05, + "loss": 0.18024063110351562, + "step": 1349 + }, + { + "epoch": 0.18250950570342206, + "grad_norm": 1.4231117963790894, + "learning_rate": 2.891926634784862e-05, + "loss": 0.20241689682006836, + "step": 1350 + }, + { + "epoch": 0.18264469792986904, + "grad_norm": 0.9113848209381104, + "learning_rate": 2.8916713966791076e-05, + "loss": 0.19225502014160156, + "step": 1351 + }, + { + "epoch": 0.18277989015631602, + "grad_norm": 1.8199065923690796, + "learning_rate": 2.8914158688199464e-05, + "loss": 0.1899886131286621, + "step": 1352 + }, + { + "epoch": 0.182915082382763, + "grad_norm": 1.0881524085998535, + "learning_rate": 2.891160051260582e-05, + "loss": 0.17791080474853516, + "step": 1353 + }, + { + "epoch": 0.18305027460920997, + "grad_norm": 5.457920551300049, + "learning_rate": 2.8909039440542758e-05, + "loss": 0.26273536682128906, + "step": 1354 + }, + { + "epoch": 0.18318546683565695, + "grad_norm": 4.6656174659729, + "learning_rate": 2.890647547254352e-05, + "loss": 0.19739341735839844, + "step": 1355 + }, + { + "epoch": 0.18332065906210393, + "grad_norm": 3.544001579284668, + "learning_rate": 2.8903908609141923e-05, + "loss": 0.23843002319335938, + "step": 1356 + }, + { + "epoch": 0.1834558512885509, + "grad_norm": 3.8569486141204834, + "learning_rate": 2.8901338850872413e-05, + "loss": 0.24228811264038086, + "step": 1357 + }, + { + "epoch": 0.1835910435149979, + "grad_norm": 1.0984386205673218, + "learning_rate": 2.8898766198270022e-05, + "loss": 0.21202754974365234, + "step": 1358 + }, + { + "epoch": 0.18372623574144487, + "grad_norm": 3.0971057415008545, + "learning_rate": 2.8896190651870392e-05, + "loss": 0.21640777587890625, + "step": 1359 + }, + { + "epoch": 0.18386142796789184, + "grad_norm": 1.6464974880218506, + "learning_rate": 2.8893612212209763e-05, + "loss": 0.23998451232910156, + "step": 1360 + }, + { + "epoch": 0.18399662019433882, + "grad_norm": 1.1478816270828247, + "learning_rate": 2.8891030879824985e-05, + "loss": 0.11033439636230469, + "step": 1361 + }, + { + "epoch": 0.1841318124207858, + "grad_norm": 1.7123949527740479, + "learning_rate": 2.88884466552535e-05, + "loss": 0.1629199981689453, + "step": 1362 + }, + { + "epoch": 0.18426700464723278, + "grad_norm": 3.9064555168151855, + "learning_rate": 2.888585953903336e-05, + "loss": 0.271240234375, + "step": 1363 + }, + { + "epoch": 0.18440219687367976, + "grad_norm": 2.7453773021698, + "learning_rate": 2.888326953170321e-05, + "loss": 0.20867347717285156, + "step": 1364 + }, + { + "epoch": 0.18453738910012674, + "grad_norm": 2.2827341556549072, + "learning_rate": 2.8880676633802314e-05, + "loss": 0.19246673583984375, + "step": 1365 + }, + { + "epoch": 0.18467258132657371, + "grad_norm": 3.1657333374023438, + "learning_rate": 2.8878080845870522e-05, + "loss": 0.14481115341186523, + "step": 1366 + }, + { + "epoch": 0.1848077735530207, + "grad_norm": 3.421050786972046, + "learning_rate": 2.887548216844829e-05, + "loss": 0.2201251983642578, + "step": 1367 + }, + { + "epoch": 0.18494296577946767, + "grad_norm": 2.5648486614227295, + "learning_rate": 2.8872880602076675e-05, + "loss": 0.20283889770507812, + "step": 1368 + }, + { + "epoch": 0.18507815800591465, + "grad_norm": 4.422479629516602, + "learning_rate": 2.8870276147297344e-05, + "loss": 0.2301197052001953, + "step": 1369 + }, + { + "epoch": 0.18521335023236163, + "grad_norm": 1.447017788887024, + "learning_rate": 2.8867668804652552e-05, + "loss": 0.18880653381347656, + "step": 1370 + }, + { + "epoch": 0.1853485424588086, + "grad_norm": 2.7759850025177, + "learning_rate": 2.886505857468516e-05, + "loss": 0.16425418853759766, + "step": 1371 + }, + { + "epoch": 0.18548373468525559, + "grad_norm": 3.4335579872131348, + "learning_rate": 2.8862445457938642e-05, + "loss": 0.22813034057617188, + "step": 1372 + }, + { + "epoch": 0.18561892691170256, + "grad_norm": 3.0456130504608154, + "learning_rate": 2.8859829454957053e-05, + "loss": 0.25408935546875, + "step": 1373 + }, + { + "epoch": 0.18575411913814954, + "grad_norm": 4.762476444244385, + "learning_rate": 2.8857210566285062e-05, + "loss": 0.22141408920288086, + "step": 1374 + }, + { + "epoch": 0.18588931136459655, + "grad_norm": 4.590306758880615, + "learning_rate": 2.8854588792467932e-05, + "loss": 0.2428569793701172, + "step": 1375 + }, + { + "epoch": 0.18602450359104353, + "grad_norm": 2.2845871448516846, + "learning_rate": 2.8851964134051535e-05, + "loss": 0.19602394104003906, + "step": 1376 + }, + { + "epoch": 0.1861596958174905, + "grad_norm": 3.2314529418945312, + "learning_rate": 2.884933659158234e-05, + "loss": 0.2736015319824219, + "step": 1377 + }, + { + "epoch": 0.18629488804393748, + "grad_norm": 2.533949136734009, + "learning_rate": 2.8846706165607415e-05, + "loss": 0.1538395881652832, + "step": 1378 + }, + { + "epoch": 0.18643008027038446, + "grad_norm": 5.120025634765625, + "learning_rate": 2.8844072856674422e-05, + "loss": 0.22174644470214844, + "step": 1379 + }, + { + "epoch": 0.18656527249683144, + "grad_norm": 2.427504062652588, + "learning_rate": 2.8841436665331634e-05, + "loss": 0.22246456146240234, + "step": 1380 + }, + { + "epoch": 0.18670046472327842, + "grad_norm": 1.838721752166748, + "learning_rate": 2.8838797592127927e-05, + "loss": 0.2053365707397461, + "step": 1381 + }, + { + "epoch": 0.1868356569497254, + "grad_norm": 0.906598687171936, + "learning_rate": 2.883615563761276e-05, + "loss": 0.1869983673095703, + "step": 1382 + }, + { + "epoch": 0.18697084917617238, + "grad_norm": 4.842560291290283, + "learning_rate": 2.8833510802336206e-05, + "loss": 0.30701446533203125, + "step": 1383 + }, + { + "epoch": 0.18710604140261936, + "grad_norm": 2.9008145332336426, + "learning_rate": 2.883086308684893e-05, + "loss": 0.17647171020507812, + "step": 1384 + }, + { + "epoch": 0.18724123362906633, + "grad_norm": 5.069915771484375, + "learning_rate": 2.882821249170221e-05, + "loss": 0.23810958862304688, + "step": 1385 + }, + { + "epoch": 0.1873764258555133, + "grad_norm": 3.567537546157837, + "learning_rate": 2.8825559017447905e-05, + "loss": 0.2289581298828125, + "step": 1386 + }, + { + "epoch": 0.1875116180819603, + "grad_norm": 1.2587440013885498, + "learning_rate": 2.8822902664638487e-05, + "loss": 0.20497703552246094, + "step": 1387 + }, + { + "epoch": 0.18764681030840727, + "grad_norm": 1.731467843055725, + "learning_rate": 2.882024343382702e-05, + "loss": 0.18185138702392578, + "step": 1388 + }, + { + "epoch": 0.18778200253485425, + "grad_norm": 1.4052733182907104, + "learning_rate": 2.8817581325567174e-05, + "loss": 0.24340581893920898, + "step": 1389 + }, + { + "epoch": 0.18791719476130123, + "grad_norm": 1.1212856769561768, + "learning_rate": 2.8814916340413205e-05, + "loss": 0.1824474334716797, + "step": 1390 + }, + { + "epoch": 0.1880523869877482, + "grad_norm": 3.4490857124328613, + "learning_rate": 2.881224847891999e-05, + "loss": 0.21667861938476562, + "step": 1391 + }, + { + "epoch": 0.18818757921419518, + "grad_norm": 2.4879043102264404, + "learning_rate": 2.8809577741642987e-05, + "loss": 0.2562370300292969, + "step": 1392 + }, + { + "epoch": 0.18832277144064216, + "grad_norm": 1.0481992959976196, + "learning_rate": 2.8806904129138255e-05, + "loss": 0.1817483901977539, + "step": 1393 + }, + { + "epoch": 0.18845796366708914, + "grad_norm": 1.4090983867645264, + "learning_rate": 2.8804227641962457e-05, + "loss": 0.21759605407714844, + "step": 1394 + }, + { + "epoch": 0.18859315589353612, + "grad_norm": 0.908618688583374, + "learning_rate": 2.8801548280672847e-05, + "loss": 0.14403915405273438, + "step": 1395 + }, + { + "epoch": 0.1887283481199831, + "grad_norm": 2.1785736083984375, + "learning_rate": 2.8798866045827288e-05, + "loss": 0.18030166625976562, + "step": 1396 + }, + { + "epoch": 0.18886354034643008, + "grad_norm": 1.8354631662368774, + "learning_rate": 2.8796180937984234e-05, + "loss": 0.13053417205810547, + "step": 1397 + }, + { + "epoch": 0.18899873257287705, + "grad_norm": 1.5700068473815918, + "learning_rate": 2.8793492957702738e-05, + "loss": 0.14382648468017578, + "step": 1398 + }, + { + "epoch": 0.18913392479932403, + "grad_norm": 0.8196216821670532, + "learning_rate": 2.8790802105542454e-05, + "loss": 0.16629981994628906, + "step": 1399 + }, + { + "epoch": 0.189269117025771, + "grad_norm": 1.1157335042953491, + "learning_rate": 2.8788108382063628e-05, + "loss": 0.20897865295410156, + "step": 1400 + }, + { + "epoch": 0.189404309252218, + "grad_norm": 5.303626537322998, + "learning_rate": 2.878541178782711e-05, + "loss": 0.22238540649414062, + "step": 1401 + }, + { + "epoch": 0.18953950147866497, + "grad_norm": 6.307397842407227, + "learning_rate": 2.8782712323394344e-05, + "loss": 0.25563812255859375, + "step": 1402 + }, + { + "epoch": 0.18967469370511195, + "grad_norm": 7.425320625305176, + "learning_rate": 2.878000998932738e-05, + "loss": 0.23553085327148438, + "step": 1403 + }, + { + "epoch": 0.18980988593155892, + "grad_norm": 3.187054395675659, + "learning_rate": 2.8777304786188847e-05, + "loss": 0.14130496978759766, + "step": 1404 + }, + { + "epoch": 0.1899450781580059, + "grad_norm": 4.92216157913208, + "learning_rate": 2.8774596714541988e-05, + "loss": 0.24230575561523438, + "step": 1405 + }, + { + "epoch": 0.19008027038445288, + "grad_norm": 0.7569880485534668, + "learning_rate": 2.8771885774950637e-05, + "loss": 0.1248779296875, + "step": 1406 + }, + { + "epoch": 0.19021546261089986, + "grad_norm": 1.5575529336929321, + "learning_rate": 2.876917196797923e-05, + "loss": 0.17722320556640625, + "step": 1407 + }, + { + "epoch": 0.19035065483734684, + "grad_norm": 2.14261794090271, + "learning_rate": 2.876645529419279e-05, + "loss": 0.23764801025390625, + "step": 1408 + }, + { + "epoch": 0.19048584706379385, + "grad_norm": 1.227704644203186, + "learning_rate": 2.876373575415695e-05, + "loss": 0.17714691162109375, + "step": 1409 + }, + { + "epoch": 0.19062103929024082, + "grad_norm": 2.7674806118011475, + "learning_rate": 2.8761013348437926e-05, + "loss": 0.21637344360351562, + "step": 1410 + }, + { + "epoch": 0.1907562315166878, + "grad_norm": 2.050769805908203, + "learning_rate": 2.875828807760254e-05, + "loss": 0.23986530303955078, + "step": 1411 + }, + { + "epoch": 0.19089142374313478, + "grad_norm": 4.342446327209473, + "learning_rate": 2.875555994221821e-05, + "loss": 0.23349761962890625, + "step": 1412 + }, + { + "epoch": 0.19102661596958176, + "grad_norm": 1.3309298753738403, + "learning_rate": 2.8752828942852943e-05, + "loss": 0.14211082458496094, + "step": 1413 + }, + { + "epoch": 0.19116180819602874, + "grad_norm": 1.2024198770523071, + "learning_rate": 2.875009508007535e-05, + "loss": 0.1692056655883789, + "step": 1414 + }, + { + "epoch": 0.19129700042247572, + "grad_norm": 1.1278339624404907, + "learning_rate": 2.8747358354454642e-05, + "loss": 0.1775684356689453, + "step": 1415 + }, + { + "epoch": 0.1914321926489227, + "grad_norm": 3.680670738220215, + "learning_rate": 2.8744618766560614e-05, + "loss": 0.23484420776367188, + "step": 1416 + }, + { + "epoch": 0.19156738487536967, + "grad_norm": 1.6394540071487427, + "learning_rate": 2.8741876316963664e-05, + "loss": 0.23087024688720703, + "step": 1417 + }, + { + "epoch": 0.19170257710181665, + "grad_norm": 1.3851091861724854, + "learning_rate": 2.873913100623478e-05, + "loss": 0.20172119140625, + "step": 1418 + }, + { + "epoch": 0.19183776932826363, + "grad_norm": 1.9548044204711914, + "learning_rate": 2.873638283494556e-05, + "loss": 0.2664356231689453, + "step": 1419 + }, + { + "epoch": 0.1919729615547106, + "grad_norm": 1.3033467531204224, + "learning_rate": 2.8733631803668178e-05, + "loss": 0.14479398727416992, + "step": 1420 + }, + { + "epoch": 0.1921081537811576, + "grad_norm": 0.9137272834777832, + "learning_rate": 2.8730877912975418e-05, + "loss": 0.16078853607177734, + "step": 1421 + }, + { + "epoch": 0.19224334600760457, + "grad_norm": 0.9136776328086853, + "learning_rate": 2.8728121163440656e-05, + "loss": 0.17396926879882812, + "step": 1422 + }, + { + "epoch": 0.19237853823405154, + "grad_norm": 5.344334125518799, + "learning_rate": 2.8725361555637863e-05, + "loss": 0.22088146209716797, + "step": 1423 + }, + { + "epoch": 0.19251373046049852, + "grad_norm": 1.1931527853012085, + "learning_rate": 2.8722599090141598e-05, + "loss": 0.2191762924194336, + "step": 1424 + }, + { + "epoch": 0.1926489226869455, + "grad_norm": 1.853440523147583, + "learning_rate": 2.8719833767527026e-05, + "loss": 0.23706769943237305, + "step": 1425 + }, + { + "epoch": 0.19278411491339248, + "grad_norm": 3.5897345542907715, + "learning_rate": 2.8717065588369896e-05, + "loss": 0.22147750854492188, + "step": 1426 + }, + { + "epoch": 0.19291930713983946, + "grad_norm": 1.0917056798934937, + "learning_rate": 2.871429455324657e-05, + "loss": 0.13545799255371094, + "step": 1427 + }, + { + "epoch": 0.19305449936628644, + "grad_norm": 1.1243360042572021, + "learning_rate": 2.871152066273398e-05, + "loss": 0.2462306022644043, + "step": 1428 + }, + { + "epoch": 0.19318969159273341, + "grad_norm": 1.8675858974456787, + "learning_rate": 2.870874391740967e-05, + "loss": 0.1827373504638672, + "step": 1429 + }, + { + "epoch": 0.1933248838191804, + "grad_norm": 2.4345269203186035, + "learning_rate": 2.8705964317851774e-05, + "loss": 0.2342853546142578, + "step": 1430 + }, + { + "epoch": 0.19346007604562737, + "grad_norm": 1.1927862167358398, + "learning_rate": 2.8703181864639013e-05, + "loss": 0.20449209213256836, + "step": 1431 + }, + { + "epoch": 0.19359526827207435, + "grad_norm": 1.2638800144195557, + "learning_rate": 2.870039655835072e-05, + "loss": 0.2043933868408203, + "step": 1432 + }, + { + "epoch": 0.19373046049852133, + "grad_norm": 1.4303911924362183, + "learning_rate": 2.8697608399566796e-05, + "loss": 0.16094160079956055, + "step": 1433 + }, + { + "epoch": 0.1938656527249683, + "grad_norm": 0.7716794610023499, + "learning_rate": 2.869481738886777e-05, + "loss": 0.1574099063873291, + "step": 1434 + }, + { + "epoch": 0.19400084495141529, + "grad_norm": 1.1690324544906616, + "learning_rate": 2.8692023526834725e-05, + "loss": 0.19349193572998047, + "step": 1435 + }, + { + "epoch": 0.19413603717786226, + "grad_norm": 1.04790198802948, + "learning_rate": 2.8689226814049367e-05, + "loss": 0.18207645416259766, + "step": 1436 + }, + { + "epoch": 0.19427122940430924, + "grad_norm": 1.2554149627685547, + "learning_rate": 2.868642725109399e-05, + "loss": 0.2126293182373047, + "step": 1437 + }, + { + "epoch": 0.19440642163075622, + "grad_norm": 1.4943777322769165, + "learning_rate": 2.868362483855147e-05, + "loss": 0.2154521942138672, + "step": 1438 + }, + { + "epoch": 0.1945416138572032, + "grad_norm": 5.169602394104004, + "learning_rate": 2.8680819577005295e-05, + "loss": 0.23990154266357422, + "step": 1439 + }, + { + "epoch": 0.19467680608365018, + "grad_norm": 1.9807720184326172, + "learning_rate": 2.8678011467039526e-05, + "loss": 0.20207691192626953, + "step": 1440 + }, + { + "epoch": 0.19481199831009716, + "grad_norm": 1.802787184715271, + "learning_rate": 2.867520050923883e-05, + "loss": 0.19464683532714844, + "step": 1441 + }, + { + "epoch": 0.19494719053654413, + "grad_norm": 1.0303736925125122, + "learning_rate": 2.8672386704188466e-05, + "loss": 0.1305065155029297, + "step": 1442 + }, + { + "epoch": 0.19508238276299114, + "grad_norm": 1.8220689296722412, + "learning_rate": 2.8669570052474273e-05, + "loss": 0.204681396484375, + "step": 1443 + }, + { + "epoch": 0.19521757498943812, + "grad_norm": 3.049875259399414, + "learning_rate": 2.86667505546827e-05, + "loss": 0.1982402801513672, + "step": 1444 + }, + { + "epoch": 0.1953527672158851, + "grad_norm": 1.4490158557891846, + "learning_rate": 2.866392821140079e-05, + "loss": 0.21976852416992188, + "step": 1445 + }, + { + "epoch": 0.19548795944233208, + "grad_norm": 1.27225923538208, + "learning_rate": 2.8661103023216154e-05, + "loss": 0.18479537963867188, + "step": 1446 + }, + { + "epoch": 0.19562315166877906, + "grad_norm": 1.972221851348877, + "learning_rate": 2.8658274990717018e-05, + "loss": 0.14703655242919922, + "step": 1447 + }, + { + "epoch": 0.19575834389522603, + "grad_norm": 1.6986520290374756, + "learning_rate": 2.86554441144922e-05, + "loss": 0.23073577880859375, + "step": 1448 + }, + { + "epoch": 0.195893536121673, + "grad_norm": 3.859036445617676, + "learning_rate": 2.8652610395131097e-05, + "loss": 0.24255752563476562, + "step": 1449 + }, + { + "epoch": 0.19602872834812, + "grad_norm": 5.375704765319824, + "learning_rate": 2.8649773833223702e-05, + "loss": 0.26262664794921875, + "step": 1450 + }, + { + "epoch": 0.19616392057456697, + "grad_norm": 1.6645658016204834, + "learning_rate": 2.8646934429360606e-05, + "loss": 0.1817626953125, + "step": 1451 + }, + { + "epoch": 0.19629911280101395, + "grad_norm": 4.011504173278809, + "learning_rate": 2.8644092184132986e-05, + "loss": 0.23273086547851562, + "step": 1452 + }, + { + "epoch": 0.19643430502746093, + "grad_norm": 2.6857388019561768, + "learning_rate": 2.864124709813262e-05, + "loss": 0.2297215461730957, + "step": 1453 + }, + { + "epoch": 0.1965694972539079, + "grad_norm": 1.2273344993591309, + "learning_rate": 2.8638399171951856e-05, + "loss": 0.17424488067626953, + "step": 1454 + }, + { + "epoch": 0.19670468948035488, + "grad_norm": 1.7742382287979126, + "learning_rate": 2.8635548406183664e-05, + "loss": 0.16910552978515625, + "step": 1455 + }, + { + "epoch": 0.19683988170680186, + "grad_norm": 2.5885918140411377, + "learning_rate": 2.8632694801421576e-05, + "loss": 0.12966537475585938, + "step": 1456 + }, + { + "epoch": 0.19697507393324884, + "grad_norm": 1.0252225399017334, + "learning_rate": 2.862983835825973e-05, + "loss": 0.22079086303710938, + "step": 1457 + }, + { + "epoch": 0.19711026615969582, + "grad_norm": 1.3225539922714233, + "learning_rate": 2.8626979077292856e-05, + "loss": 0.20037174224853516, + "step": 1458 + }, + { + "epoch": 0.1972454583861428, + "grad_norm": 3.418307065963745, + "learning_rate": 2.862411695911627e-05, + "loss": 0.23319053649902344, + "step": 1459 + }, + { + "epoch": 0.19738065061258978, + "grad_norm": 0.6984314918518066, + "learning_rate": 2.862125200432588e-05, + "loss": 0.127166748046875, + "step": 1460 + }, + { + "epoch": 0.19751584283903675, + "grad_norm": 1.5432121753692627, + "learning_rate": 2.8618384213518188e-05, + "loss": 0.16589832305908203, + "step": 1461 + }, + { + "epoch": 0.19765103506548373, + "grad_norm": 1.9159175157546997, + "learning_rate": 2.861551358729028e-05, + "loss": 0.18456268310546875, + "step": 1462 + }, + { + "epoch": 0.1977862272919307, + "grad_norm": 1.0911264419555664, + "learning_rate": 2.8612640126239836e-05, + "loss": 0.1954631805419922, + "step": 1463 + }, + { + "epoch": 0.1979214195183777, + "grad_norm": 1.3658831119537354, + "learning_rate": 2.8609763830965126e-05, + "loss": 0.2354264259338379, + "step": 1464 + }, + { + "epoch": 0.19805661174482467, + "grad_norm": 2.338291645050049, + "learning_rate": 2.860688470206501e-05, + "loss": 0.2104191780090332, + "step": 1465 + }, + { + "epoch": 0.19819180397127165, + "grad_norm": 2.187239408493042, + "learning_rate": 2.8604002740138936e-05, + "loss": 0.2264385223388672, + "step": 1466 + }, + { + "epoch": 0.19832699619771862, + "grad_norm": 1.0135387182235718, + "learning_rate": 2.860111794578695e-05, + "loss": 0.12063407897949219, + "step": 1467 + }, + { + "epoch": 0.1984621884241656, + "grad_norm": 1.056800127029419, + "learning_rate": 2.8598230319609677e-05, + "loss": 0.1545705795288086, + "step": 1468 + }, + { + "epoch": 0.19859738065061258, + "grad_norm": 1.6733328104019165, + "learning_rate": 2.8595339862208336e-05, + "loss": 0.17456912994384766, + "step": 1469 + }, + { + "epoch": 0.19873257287705956, + "grad_norm": 3.5856924057006836, + "learning_rate": 2.8592446574184733e-05, + "loss": 0.2902717590332031, + "step": 1470 + }, + { + "epoch": 0.19886776510350654, + "grad_norm": 2.837336540222168, + "learning_rate": 2.8589550456141274e-05, + "loss": 0.2137737274169922, + "step": 1471 + }, + { + "epoch": 0.19900295732995352, + "grad_norm": 1.3219761848449707, + "learning_rate": 2.8586651508680942e-05, + "loss": 0.17941951751708984, + "step": 1472 + }, + { + "epoch": 0.1991381495564005, + "grad_norm": 1.946235179901123, + "learning_rate": 2.8583749732407312e-05, + "loss": 0.2377338409423828, + "step": 1473 + }, + { + "epoch": 0.19927334178284747, + "grad_norm": 2.5637385845184326, + "learning_rate": 2.8580845127924546e-05, + "loss": 0.18741416931152344, + "step": 1474 + }, + { + "epoch": 0.19940853400929445, + "grad_norm": 3.8044192790985107, + "learning_rate": 2.8577937695837408e-05, + "loss": 0.22407150268554688, + "step": 1475 + }, + { + "epoch": 0.19954372623574146, + "grad_norm": 2.4386212825775146, + "learning_rate": 2.8575027436751235e-05, + "loss": 0.18420982360839844, + "step": 1476 + }, + { + "epoch": 0.19967891846218844, + "grad_norm": 1.290191650390625, + "learning_rate": 2.8572114351271955e-05, + "loss": 0.17699813842773438, + "step": 1477 + }, + { + "epoch": 0.19981411068863542, + "grad_norm": 3.3647961616516113, + "learning_rate": 2.85691984400061e-05, + "loss": 0.19544219970703125, + "step": 1478 + }, + { + "epoch": 0.1999493029150824, + "grad_norm": 0.8163108825683594, + "learning_rate": 2.8566279703560762e-05, + "loss": 0.18499088287353516, + "step": 1479 + }, + { + "epoch": 0.20008449514152937, + "grad_norm": 1.1311842203140259, + "learning_rate": 2.8563358142543648e-05, + "loss": 0.17033767700195312, + "step": 1480 + }, + { + "epoch": 0.20021968736797635, + "grad_norm": 2.9506378173828125, + "learning_rate": 2.856043375756304e-05, + "loss": 0.27475738525390625, + "step": 1481 + }, + { + "epoch": 0.20035487959442333, + "grad_norm": 2.901394844055176, + "learning_rate": 2.855750654922781e-05, + "loss": 0.24585342407226562, + "step": 1482 + }, + { + "epoch": 0.2004900718208703, + "grad_norm": 2.0981180667877197, + "learning_rate": 2.855457651814742e-05, + "loss": 0.23094463348388672, + "step": 1483 + }, + { + "epoch": 0.2006252640473173, + "grad_norm": 2.154557704925537, + "learning_rate": 2.8551643664931916e-05, + "loss": 0.22888565063476562, + "step": 1484 + }, + { + "epoch": 0.20076045627376427, + "grad_norm": 1.1707566976547241, + "learning_rate": 2.8548707990191933e-05, + "loss": 0.24201107025146484, + "step": 1485 + }, + { + "epoch": 0.20089564850021124, + "grad_norm": 3.0858397483825684, + "learning_rate": 2.8545769494538698e-05, + "loss": 0.21252059936523438, + "step": 1486 + }, + { + "epoch": 0.20103084072665822, + "grad_norm": 3.447730541229248, + "learning_rate": 2.854282817858402e-05, + "loss": 0.16162776947021484, + "step": 1487 + }, + { + "epoch": 0.2011660329531052, + "grad_norm": 4.726772308349609, + "learning_rate": 2.85398840429403e-05, + "loss": 0.24300384521484375, + "step": 1488 + }, + { + "epoch": 0.20130122517955218, + "grad_norm": 1.1938174962997437, + "learning_rate": 2.853693708822051e-05, + "loss": 0.23445892333984375, + "step": 1489 + }, + { + "epoch": 0.20143641740599916, + "grad_norm": 1.2891297340393066, + "learning_rate": 2.8533987315038234e-05, + "loss": 0.1974506378173828, + "step": 1490 + }, + { + "epoch": 0.20157160963244614, + "grad_norm": 2.2705130577087402, + "learning_rate": 2.8531034724007627e-05, + "loss": 0.2105541229248047, + "step": 1491 + }, + { + "epoch": 0.20170680185889311, + "grad_norm": 1.015687108039856, + "learning_rate": 2.8528079315743435e-05, + "loss": 0.16239356994628906, + "step": 1492 + }, + { + "epoch": 0.2018419940853401, + "grad_norm": 2.74764347076416, + "learning_rate": 2.852512109086099e-05, + "loss": 0.20856809616088867, + "step": 1493 + }, + { + "epoch": 0.20197718631178707, + "grad_norm": 5.50916862487793, + "learning_rate": 2.8522160049976208e-05, + "loss": 0.2513465881347656, + "step": 1494 + }, + { + "epoch": 0.20211237853823405, + "grad_norm": 0.9173393249511719, + "learning_rate": 2.8519196193705595e-05, + "loss": 0.14350605010986328, + "step": 1495 + }, + { + "epoch": 0.20224757076468103, + "grad_norm": 1.6260699033737183, + "learning_rate": 2.8516229522666243e-05, + "loss": 0.19754600524902344, + "step": 1496 + }, + { + "epoch": 0.202382762991128, + "grad_norm": 1.496013879776001, + "learning_rate": 2.8513260037475825e-05, + "loss": 0.1762409210205078, + "step": 1497 + }, + { + "epoch": 0.20251795521757499, + "grad_norm": 1.4995561838150024, + "learning_rate": 2.8510287738752604e-05, + "loss": 0.21907901763916016, + "step": 1498 + }, + { + "epoch": 0.20265314744402196, + "grad_norm": 1.9306213855743408, + "learning_rate": 2.8507312627115435e-05, + "loss": 0.13698774576187134, + "step": 1499 + }, + { + "epoch": 0.20278833967046894, + "grad_norm": 1.097343921661377, + "learning_rate": 2.850433470318374e-05, + "loss": 0.1705303192138672, + "step": 1500 + }, + { + "epoch": 0.20292353189691592, + "grad_norm": 2.041163682937622, + "learning_rate": 2.8501353967577556e-05, + "loss": 0.20937156677246094, + "step": 1501 + }, + { + "epoch": 0.2030587241233629, + "grad_norm": 1.0126675367355347, + "learning_rate": 2.8498370420917468e-05, + "loss": 0.16542339324951172, + "step": 1502 + }, + { + "epoch": 0.20319391634980988, + "grad_norm": 1.2650238275527954, + "learning_rate": 2.8495384063824683e-05, + "loss": 0.21272659301757812, + "step": 1503 + }, + { + "epoch": 0.20332910857625686, + "grad_norm": 3.2360408306121826, + "learning_rate": 2.8492394896920964e-05, + "loss": 0.19436931610107422, + "step": 1504 + }, + { + "epoch": 0.20346430080270383, + "grad_norm": 0.7906304001808167, + "learning_rate": 2.848940292082868e-05, + "loss": 0.17124080657958984, + "step": 1505 + }, + { + "epoch": 0.2035994930291508, + "grad_norm": 1.1428529024124146, + "learning_rate": 2.8486408136170772e-05, + "loss": 0.16841888427734375, + "step": 1506 + }, + { + "epoch": 0.2037346852555978, + "grad_norm": 3.7356717586517334, + "learning_rate": 2.8483410543570776e-05, + "loss": 0.22685575485229492, + "step": 1507 + }, + { + "epoch": 0.20386987748204477, + "grad_norm": 3.227050542831421, + "learning_rate": 2.8480410143652803e-05, + "loss": 0.26908111572265625, + "step": 1508 + }, + { + "epoch": 0.20400506970849175, + "grad_norm": 1.2363524436950684, + "learning_rate": 2.8477406937041547e-05, + "loss": 0.2000408172607422, + "step": 1509 + }, + { + "epoch": 0.20414026193493875, + "grad_norm": 2.7056961059570312, + "learning_rate": 2.8474400924362298e-05, + "loss": 0.21938133239746094, + "step": 1510 + }, + { + "epoch": 0.20427545416138573, + "grad_norm": 0.7228403091430664, + "learning_rate": 2.847139210624092e-05, + "loss": 0.1736917495727539, + "step": 1511 + }, + { + "epoch": 0.2044106463878327, + "grad_norm": 2.4037535190582275, + "learning_rate": 2.8468380483303873e-05, + "loss": 0.20244455337524414, + "step": 1512 + }, + { + "epoch": 0.2045458386142797, + "grad_norm": 1.0335203409194946, + "learning_rate": 2.8465366056178183e-05, + "loss": 0.11712980270385742, + "step": 1513 + }, + { + "epoch": 0.20468103084072667, + "grad_norm": 2.4525163173675537, + "learning_rate": 2.8462348825491475e-05, + "loss": 0.21556472778320312, + "step": 1514 + }, + { + "epoch": 0.20481622306717365, + "grad_norm": 1.2312915325164795, + "learning_rate": 2.8459328791871953e-05, + "loss": 0.2537040710449219, + "step": 1515 + }, + { + "epoch": 0.20495141529362063, + "grad_norm": 2.9004316329956055, + "learning_rate": 2.8456305955948402e-05, + "loss": 0.20762920379638672, + "step": 1516 + }, + { + "epoch": 0.2050866075200676, + "grad_norm": 2.7718160152435303, + "learning_rate": 2.845328031835019e-05, + "loss": 0.23495864868164062, + "step": 1517 + }, + { + "epoch": 0.20522179974651458, + "grad_norm": 2.2636141777038574, + "learning_rate": 2.8450251879707277e-05, + "loss": 0.244171142578125, + "step": 1518 + }, + { + "epoch": 0.20535699197296156, + "grad_norm": 1.0177468061447144, + "learning_rate": 2.8447220640650194e-05, + "loss": 0.18434715270996094, + "step": 1519 + }, + { + "epoch": 0.20549218419940854, + "grad_norm": 1.335494875907898, + "learning_rate": 2.8444186601810068e-05, + "loss": 0.24886703491210938, + "step": 1520 + }, + { + "epoch": 0.20562737642585552, + "grad_norm": 3.091848134994507, + "learning_rate": 2.84411497638186e-05, + "loss": 0.20006465911865234, + "step": 1521 + }, + { + "epoch": 0.2057625686523025, + "grad_norm": 3.217728614807129, + "learning_rate": 2.843811012730807e-05, + "loss": 0.2499542236328125, + "step": 1522 + }, + { + "epoch": 0.20589776087874948, + "grad_norm": 1.284584879875183, + "learning_rate": 2.8435067692911353e-05, + "loss": 0.2462329864501953, + "step": 1523 + }, + { + "epoch": 0.20603295310519645, + "grad_norm": 1.2856589555740356, + "learning_rate": 2.8432022461261897e-05, + "loss": 0.19455242156982422, + "step": 1524 + }, + { + "epoch": 0.20616814533164343, + "grad_norm": 3.2996480464935303, + "learning_rate": 2.8428974432993736e-05, + "loss": 0.2416839599609375, + "step": 1525 + }, + { + "epoch": 0.2063033375580904, + "grad_norm": 2.059166431427002, + "learning_rate": 2.8425923608741486e-05, + "loss": 0.22332239151000977, + "step": 1526 + }, + { + "epoch": 0.2064385297845374, + "grad_norm": 1.8817386627197266, + "learning_rate": 2.8422869989140343e-05, + "loss": 0.20667552947998047, + "step": 1527 + }, + { + "epoch": 0.20657372201098437, + "grad_norm": 0.9530999660491943, + "learning_rate": 2.8419813574826093e-05, + "loss": 0.16479206085205078, + "step": 1528 + }, + { + "epoch": 0.20670891423743135, + "grad_norm": 2.440660238265991, + "learning_rate": 2.8416754366435092e-05, + "loss": 0.20564651489257812, + "step": 1529 + }, + { + "epoch": 0.20684410646387832, + "grad_norm": 1.2118011713027954, + "learning_rate": 2.8413692364604285e-05, + "loss": 0.16583633422851562, + "step": 1530 + }, + { + "epoch": 0.2069792986903253, + "grad_norm": 4.1070122718811035, + "learning_rate": 2.8410627569971197e-05, + "loss": 0.2732048034667969, + "step": 1531 + }, + { + "epoch": 0.20711449091677228, + "grad_norm": 3.7998554706573486, + "learning_rate": 2.8407559983173934e-05, + "loss": 0.26544189453125, + "step": 1532 + }, + { + "epoch": 0.20724968314321926, + "grad_norm": 1.8670927286148071, + "learning_rate": 2.8404489604851186e-05, + "loss": 0.23783183097839355, + "step": 1533 + }, + { + "epoch": 0.20738487536966624, + "grad_norm": 2.254140615463257, + "learning_rate": 2.840141643564222e-05, + "loss": 0.2005634307861328, + "step": 1534 + }, + { + "epoch": 0.20752006759611322, + "grad_norm": 1.4189573526382446, + "learning_rate": 2.8398340476186885e-05, + "loss": 0.18670654296875, + "step": 1535 + }, + { + "epoch": 0.2076552598225602, + "grad_norm": 1.2199699878692627, + "learning_rate": 2.8395261727125617e-05, + "loss": 0.23779630661010742, + "step": 1536 + }, + { + "epoch": 0.20779045204900717, + "grad_norm": 4.575888633728027, + "learning_rate": 2.8392180189099425e-05, + "loss": 0.22336578369140625, + "step": 1537 + }, + { + "epoch": 0.20792564427545415, + "grad_norm": 2.3746140003204346, + "learning_rate": 2.83890958627499e-05, + "loss": 0.16201210021972656, + "step": 1538 + }, + { + "epoch": 0.20806083650190113, + "grad_norm": 1.6039785146713257, + "learning_rate": 2.8386008748719216e-05, + "loss": 0.14647293090820312, + "step": 1539 + }, + { + "epoch": 0.2081960287283481, + "grad_norm": 3.3411197662353516, + "learning_rate": 2.838291884765013e-05, + "loss": 0.22340011596679688, + "step": 1540 + }, + { + "epoch": 0.2083312209547951, + "grad_norm": 1.2625120878219604, + "learning_rate": 2.8379826160185975e-05, + "loss": 0.19697189331054688, + "step": 1541 + }, + { + "epoch": 0.20846641318124207, + "grad_norm": 3.770214557647705, + "learning_rate": 2.8376730686970664e-05, + "loss": 0.24465465545654297, + "step": 1542 + }, + { + "epoch": 0.20860160540768904, + "grad_norm": 2.887554407119751, + "learning_rate": 2.8373632428648683e-05, + "loss": 0.21036529541015625, + "step": 1543 + }, + { + "epoch": 0.20873679763413605, + "grad_norm": 1.4231836795806885, + "learning_rate": 2.8370531385865124e-05, + "loss": 0.16444158554077148, + "step": 1544 + }, + { + "epoch": 0.20887198986058303, + "grad_norm": 3.7218923568725586, + "learning_rate": 2.8367427559265622e-05, + "loss": 0.14816904067993164, + "step": 1545 + }, + { + "epoch": 0.20900718208703, + "grad_norm": 1.702355146408081, + "learning_rate": 2.836432094949642e-05, + "loss": 0.20602989196777344, + "step": 1546 + }, + { + "epoch": 0.209142374313477, + "grad_norm": 2.4044106006622314, + "learning_rate": 2.836121155720433e-05, + "loss": 0.16053009033203125, + "step": 1547 + }, + { + "epoch": 0.20927756653992396, + "grad_norm": 1.4207279682159424, + "learning_rate": 2.8358099383036745e-05, + "loss": 0.19476890563964844, + "step": 1548 + }, + { + "epoch": 0.20941275876637094, + "grad_norm": 2.0659759044647217, + "learning_rate": 2.8354984427641634e-05, + "loss": 0.15401077270507812, + "step": 1549 + }, + { + "epoch": 0.20954795099281792, + "grad_norm": 2.910566568374634, + "learning_rate": 2.8351866691667544e-05, + "loss": 0.20627212524414062, + "step": 1550 + }, + { + "epoch": 0.2096831432192649, + "grad_norm": 1.6680594682693481, + "learning_rate": 2.8348746175763613e-05, + "loss": 0.14860153198242188, + "step": 1551 + }, + { + "epoch": 0.20981833544571188, + "grad_norm": 1.254815697669983, + "learning_rate": 2.8345622880579537e-05, + "loss": 0.21012496948242188, + "step": 1552 + }, + { + "epoch": 0.20995352767215886, + "grad_norm": 1.8045488595962524, + "learning_rate": 2.8342496806765615e-05, + "loss": 0.23774147033691406, + "step": 1553 + }, + { + "epoch": 0.21008871989860584, + "grad_norm": 3.642972707748413, + "learning_rate": 2.833936795497271e-05, + "loss": 0.16762542724609375, + "step": 1554 + }, + { + "epoch": 0.21022391212505281, + "grad_norm": 2.241891384124756, + "learning_rate": 2.8336236325852257e-05, + "loss": 0.2126007080078125, + "step": 1555 + }, + { + "epoch": 0.2103591043514998, + "grad_norm": 0.8299334049224854, + "learning_rate": 2.8333101920056285e-05, + "loss": 0.17404937744140625, + "step": 1556 + }, + { + "epoch": 0.21049429657794677, + "grad_norm": 1.1053400039672852, + "learning_rate": 2.8329964738237392e-05, + "loss": 0.1776437759399414, + "step": 1557 + }, + { + "epoch": 0.21062948880439375, + "grad_norm": 3.5212504863739014, + "learning_rate": 2.8326824781048756e-05, + "loss": 0.24463367462158203, + "step": 1558 + }, + { + "epoch": 0.21076468103084073, + "grad_norm": 1.2256594896316528, + "learning_rate": 2.8323682049144135e-05, + "loss": 0.24507904052734375, + "step": 1559 + }, + { + "epoch": 0.2108998732572877, + "grad_norm": 2.7001938819885254, + "learning_rate": 2.832053654317786e-05, + "loss": 0.213897705078125, + "step": 1560 + }, + { + "epoch": 0.21103506548373469, + "grad_norm": 1.0681815147399902, + "learning_rate": 2.8317388263804842e-05, + "loss": 0.24500083923339844, + "step": 1561 + }, + { + "epoch": 0.21117025771018166, + "grad_norm": 1.6112297773361206, + "learning_rate": 2.8314237211680573e-05, + "loss": 0.21057701110839844, + "step": 1562 + }, + { + "epoch": 0.21130544993662864, + "grad_norm": 2.4548401832580566, + "learning_rate": 2.8311083387461118e-05, + "loss": 0.17877674102783203, + "step": 1563 + }, + { + "epoch": 0.21144064216307562, + "grad_norm": 1.5524108409881592, + "learning_rate": 2.8307926791803114e-05, + "loss": 0.23436737060546875, + "step": 1564 + }, + { + "epoch": 0.2115758343895226, + "grad_norm": 2.803323984146118, + "learning_rate": 2.8304767425363785e-05, + "loss": 0.21984291076660156, + "step": 1565 + }, + { + "epoch": 0.21171102661596958, + "grad_norm": 2.1299476623535156, + "learning_rate": 2.830160528880093e-05, + "loss": 0.19360971450805664, + "step": 1566 + }, + { + "epoch": 0.21184621884241656, + "grad_norm": 2.5855629444122314, + "learning_rate": 2.829844038277292e-05, + "loss": 0.21448516845703125, + "step": 1567 + }, + { + "epoch": 0.21198141106886353, + "grad_norm": 0.9516133666038513, + "learning_rate": 2.8295272707938706e-05, + "loss": 0.17955780029296875, + "step": 1568 + }, + { + "epoch": 0.2121166032953105, + "grad_norm": 2.1353797912597656, + "learning_rate": 2.8292102264957817e-05, + "loss": 0.24937820434570312, + "step": 1569 + }, + { + "epoch": 0.2122517955217575, + "grad_norm": 1.1626243591308594, + "learning_rate": 2.8288929054490357e-05, + "loss": 0.2234783172607422, + "step": 1570 + }, + { + "epoch": 0.21238698774820447, + "grad_norm": 2.427438974380493, + "learning_rate": 2.8285753077196998e-05, + "loss": 0.1664104461669922, + "step": 1571 + }, + { + "epoch": 0.21252217997465145, + "grad_norm": 3.7011525630950928, + "learning_rate": 2.8282574333739006e-05, + "loss": 0.18247222900390625, + "step": 1572 + }, + { + "epoch": 0.21265737220109843, + "grad_norm": 2.281188726425171, + "learning_rate": 2.8279392824778197e-05, + "loss": 0.21723365783691406, + "step": 1573 + }, + { + "epoch": 0.2127925644275454, + "grad_norm": 0.8799855709075928, + "learning_rate": 2.8276208550976993e-05, + "loss": 0.18135833740234375, + "step": 1574 + }, + { + "epoch": 0.21292775665399238, + "grad_norm": 1.1784205436706543, + "learning_rate": 2.8273021512998372e-05, + "loss": 0.21408653259277344, + "step": 1575 + }, + { + "epoch": 0.21306294888043936, + "grad_norm": 1.5739400386810303, + "learning_rate": 2.826983171150589e-05, + "loss": 0.21373367309570312, + "step": 1576 + }, + { + "epoch": 0.21319814110688634, + "grad_norm": 2.105349063873291, + "learning_rate": 2.826663914716368e-05, + "loss": 0.2302265167236328, + "step": 1577 + }, + { + "epoch": 0.21333333333333335, + "grad_norm": 2.233496904373169, + "learning_rate": 2.826344382063646e-05, + "loss": 0.19851112365722656, + "step": 1578 + }, + { + "epoch": 0.21346852555978033, + "grad_norm": 2.718311071395874, + "learning_rate": 2.8260245732589503e-05, + "loss": 0.20471858978271484, + "step": 1579 + }, + { + "epoch": 0.2136037177862273, + "grad_norm": 1.2879897356033325, + "learning_rate": 2.8257044883688672e-05, + "loss": 0.18995189666748047, + "step": 1580 + }, + { + "epoch": 0.21373891001267428, + "grad_norm": 0.8471916913986206, + "learning_rate": 2.82538412746004e-05, + "loss": 0.19594573974609375, + "step": 1581 + }, + { + "epoch": 0.21387410223912126, + "grad_norm": 1.4126442670822144, + "learning_rate": 2.8250634905991695e-05, + "loss": 0.1682415008544922, + "step": 1582 + }, + { + "epoch": 0.21400929446556824, + "grad_norm": 1.9348821640014648, + "learning_rate": 2.824742577853015e-05, + "loss": 0.17291641235351562, + "step": 1583 + }, + { + "epoch": 0.21414448669201522, + "grad_norm": 1.182713508605957, + "learning_rate": 2.8244213892883907e-05, + "loss": 0.16258001327514648, + "step": 1584 + }, + { + "epoch": 0.2142796789184622, + "grad_norm": 0.9006469249725342, + "learning_rate": 2.82409992497217e-05, + "loss": 0.1353912353515625, + "step": 1585 + }, + { + "epoch": 0.21441487114490917, + "grad_norm": 3.2851181030273438, + "learning_rate": 2.8237781849712852e-05, + "loss": 0.19801855087280273, + "step": 1586 + }, + { + "epoch": 0.21455006337135615, + "grad_norm": 1.9994642734527588, + "learning_rate": 2.8234561693527222e-05, + "loss": 0.16687679290771484, + "step": 1587 + }, + { + "epoch": 0.21468525559780313, + "grad_norm": 4.265449047088623, + "learning_rate": 2.8231338781835275e-05, + "loss": 0.18825149536132812, + "step": 1588 + }, + { + "epoch": 0.2148204478242501, + "grad_norm": 2.3270583152770996, + "learning_rate": 2.8228113115308032e-05, + "loss": 0.22142553329467773, + "step": 1589 + }, + { + "epoch": 0.2149556400506971, + "grad_norm": 1.2166668176651, + "learning_rate": 2.82248846946171e-05, + "loss": 0.19626617431640625, + "step": 1590 + }, + { + "epoch": 0.21509083227714407, + "grad_norm": 2.1520957946777344, + "learning_rate": 2.822165352043465e-05, + "loss": 0.19588088989257812, + "step": 1591 + }, + { + "epoch": 0.21522602450359105, + "grad_norm": 1.523145318031311, + "learning_rate": 2.8218419593433437e-05, + "loss": 0.20798110961914062, + "step": 1592 + }, + { + "epoch": 0.21536121673003802, + "grad_norm": 2.1777796745300293, + "learning_rate": 2.8215182914286768e-05, + "loss": 0.16910266876220703, + "step": 1593 + }, + { + "epoch": 0.215496408956485, + "grad_norm": 1.4733436107635498, + "learning_rate": 2.8211943483668546e-05, + "loss": 0.21474647521972656, + "step": 1594 + }, + { + "epoch": 0.21563160118293198, + "grad_norm": 0.805931031703949, + "learning_rate": 2.8208701302253237e-05, + "loss": 0.17388534545898438, + "step": 1595 + }, + { + "epoch": 0.21576679340937896, + "grad_norm": 4.49268913269043, + "learning_rate": 2.820545637071588e-05, + "loss": 0.2125091552734375, + "step": 1596 + }, + { + "epoch": 0.21590198563582594, + "grad_norm": 3.891444206237793, + "learning_rate": 2.8202208689732083e-05, + "loss": 0.2002696990966797, + "step": 1597 + }, + { + "epoch": 0.21603717786227292, + "grad_norm": 0.9846544861793518, + "learning_rate": 2.819895825997804e-05, + "loss": 0.13231468200683594, + "step": 1598 + }, + { + "epoch": 0.2161723700887199, + "grad_norm": 1.3987812995910645, + "learning_rate": 2.81957050821305e-05, + "loss": 0.1939091682434082, + "step": 1599 + }, + { + "epoch": 0.21630756231516687, + "grad_norm": 1.9934478998184204, + "learning_rate": 2.8192449156866787e-05, + "loss": 0.1595916748046875, + "step": 1600 + }, + { + "epoch": 0.21644275454161385, + "grad_norm": 1.3430542945861816, + "learning_rate": 2.8189190484864814e-05, + "loss": 0.21430635452270508, + "step": 1601 + }, + { + "epoch": 0.21657794676806083, + "grad_norm": 1.6507396697998047, + "learning_rate": 2.8185929066803052e-05, + "loss": 0.1488208770751953, + "step": 1602 + }, + { + "epoch": 0.2167131389945078, + "grad_norm": 1.8464139699935913, + "learning_rate": 2.818266490336054e-05, + "loss": 0.1778697967529297, + "step": 1603 + }, + { + "epoch": 0.2168483312209548, + "grad_norm": 2.0680527687072754, + "learning_rate": 2.817939799521689e-05, + "loss": 0.20356273651123047, + "step": 1604 + }, + { + "epoch": 0.21698352344740177, + "grad_norm": 1.280734658241272, + "learning_rate": 2.8176128343052304e-05, + "loss": 0.2144622802734375, + "step": 1605 + }, + { + "epoch": 0.21711871567384874, + "grad_norm": 3.2125136852264404, + "learning_rate": 2.817285594754753e-05, + "loss": 0.2136383056640625, + "step": 1606 + }, + { + "epoch": 0.21725390790029572, + "grad_norm": 0.9972038865089417, + "learning_rate": 2.8169580809383902e-05, + "loss": 0.1695270538330078, + "step": 1607 + }, + { + "epoch": 0.2173891001267427, + "grad_norm": 1.9265475273132324, + "learning_rate": 2.8166302929243326e-05, + "loss": 0.21355819702148438, + "step": 1608 + }, + { + "epoch": 0.21752429235318968, + "grad_norm": 3.939326286315918, + "learning_rate": 2.8163022307808264e-05, + "loss": 0.18296241760253906, + "step": 1609 + }, + { + "epoch": 0.21765948457963666, + "grad_norm": 2.347256898880005, + "learning_rate": 2.8159738945761764e-05, + "loss": 0.20624542236328125, + "step": 1610 + }, + { + "epoch": 0.21779467680608364, + "grad_norm": 1.0728425979614258, + "learning_rate": 2.8156452843787438e-05, + "loss": 0.17418861389160156, + "step": 1611 + }, + { + "epoch": 0.21792986903253064, + "grad_norm": 2.456512928009033, + "learning_rate": 2.815316400256947e-05, + "loss": 0.2517204284667969, + "step": 1612 + }, + { + "epoch": 0.21806506125897762, + "grad_norm": 2.0149176120758057, + "learning_rate": 2.814987242279262e-05, + "loss": 0.22298049926757812, + "step": 1613 + }, + { + "epoch": 0.2182002534854246, + "grad_norm": 1.3046553134918213, + "learning_rate": 2.8146578105142202e-05, + "loss": 0.2370433807373047, + "step": 1614 + }, + { + "epoch": 0.21833544571187158, + "grad_norm": 1.6654703617095947, + "learning_rate": 2.814328105030412e-05, + "loss": 0.19285964965820312, + "step": 1615 + }, + { + "epoch": 0.21847063793831856, + "grad_norm": 3.8162431716918945, + "learning_rate": 2.8139981258964836e-05, + "loss": 0.2237834930419922, + "step": 1616 + }, + { + "epoch": 0.21860583016476554, + "grad_norm": 3.192939519882202, + "learning_rate": 2.8136678731811385e-05, + "loss": 0.17301559448242188, + "step": 1617 + }, + { + "epoch": 0.21874102239121251, + "grad_norm": 3.9818694591522217, + "learning_rate": 2.8133373469531362e-05, + "loss": 0.2200603485107422, + "step": 1618 + }, + { + "epoch": 0.2188762146176595, + "grad_norm": 1.1149611473083496, + "learning_rate": 2.8130065472812952e-05, + "loss": 0.13320684432983398, + "step": 1619 + }, + { + "epoch": 0.21901140684410647, + "grad_norm": 2.543436288833618, + "learning_rate": 2.812675474234489e-05, + "loss": 0.18166351318359375, + "step": 1620 + }, + { + "epoch": 0.21914659907055345, + "grad_norm": 2.608145236968994, + "learning_rate": 2.812344127881649e-05, + "loss": 0.2615470886230469, + "step": 1621 + }, + { + "epoch": 0.21928179129700043, + "grad_norm": 0.6870976090431213, + "learning_rate": 2.8120125082917638e-05, + "loss": 0.193450927734375, + "step": 1622 + }, + { + "epoch": 0.2194169835234474, + "grad_norm": 1.0287131071090698, + "learning_rate": 2.8116806155338773e-05, + "loss": 0.1839599609375, + "step": 1623 + }, + { + "epoch": 0.21955217574989438, + "grad_norm": 0.9177045226097107, + "learning_rate": 2.8113484496770923e-05, + "loss": 0.19803810119628906, + "step": 1624 + }, + { + "epoch": 0.21968736797634136, + "grad_norm": 1.7474350929260254, + "learning_rate": 2.811016010790567e-05, + "loss": 0.24471282958984375, + "step": 1625 + }, + { + "epoch": 0.21982256020278834, + "grad_norm": 0.7501457929611206, + "learning_rate": 2.8106832989435165e-05, + "loss": 0.19566917419433594, + "step": 1626 + }, + { + "epoch": 0.21995775242923532, + "grad_norm": 1.048923373222351, + "learning_rate": 2.8103503142052146e-05, + "loss": 0.16785049438476562, + "step": 1627 + }, + { + "epoch": 0.2200929446556823, + "grad_norm": 3.043039321899414, + "learning_rate": 2.8100170566449892e-05, + "loss": 0.19009780883789062, + "step": 1628 + }, + { + "epoch": 0.22022813688212928, + "grad_norm": 2.7366185188293457, + "learning_rate": 2.8096835263322266e-05, + "loss": 0.2093372344970703, + "step": 1629 + }, + { + "epoch": 0.22036332910857626, + "grad_norm": 3.2346158027648926, + "learning_rate": 2.8093497233363702e-05, + "loss": 0.22413063049316406, + "step": 1630 + }, + { + "epoch": 0.22049852133502323, + "grad_norm": 1.3287252187728882, + "learning_rate": 2.8090156477269185e-05, + "loss": 0.182586669921875, + "step": 1631 + }, + { + "epoch": 0.2206337135614702, + "grad_norm": 1.1486190557479858, + "learning_rate": 2.808681299573429e-05, + "loss": 0.18571710586547852, + "step": 1632 + }, + { + "epoch": 0.2207689057879172, + "grad_norm": 1.3951661586761475, + "learning_rate": 2.8083466789455137e-05, + "loss": 0.23805999755859375, + "step": 1633 + }, + { + "epoch": 0.22090409801436417, + "grad_norm": 2.6470999717712402, + "learning_rate": 2.808011785912843e-05, + "loss": 0.22373390197753906, + "step": 1634 + }, + { + "epoch": 0.22103929024081115, + "grad_norm": 2.5507028102874756, + "learning_rate": 2.8076766205451435e-05, + "loss": 0.18219709396362305, + "step": 1635 + }, + { + "epoch": 0.22117448246725813, + "grad_norm": 1.2907202243804932, + "learning_rate": 2.8073411829121983e-05, + "loss": 0.13901138305664062, + "step": 1636 + }, + { + "epoch": 0.2213096746937051, + "grad_norm": 1.0998774766921997, + "learning_rate": 2.8070054730838467e-05, + "loss": 0.23057270050048828, + "step": 1637 + }, + { + "epoch": 0.22144486692015208, + "grad_norm": 1.9028065204620361, + "learning_rate": 2.8066694911299865e-05, + "loss": 0.2582893371582031, + "step": 1638 + }, + { + "epoch": 0.22158005914659906, + "grad_norm": 2.7175650596618652, + "learning_rate": 2.8063332371205698e-05, + "loss": 0.2271575927734375, + "step": 1639 + }, + { + "epoch": 0.22171525137304604, + "grad_norm": 3.8392858505249023, + "learning_rate": 2.8059967111256072e-05, + "loss": 0.19440460205078125, + "step": 1640 + }, + { + "epoch": 0.22185044359949302, + "grad_norm": 1.510316252708435, + "learning_rate": 2.8056599132151647e-05, + "loss": 0.24645233154296875, + "step": 1641 + }, + { + "epoch": 0.22198563582594, + "grad_norm": 2.3433139324188232, + "learning_rate": 2.8053228434593656e-05, + "loss": 0.2302539348602295, + "step": 1642 + }, + { + "epoch": 0.22212082805238698, + "grad_norm": 1.2337480783462524, + "learning_rate": 2.8049855019283895e-05, + "loss": 0.23509979248046875, + "step": 1643 + }, + { + "epoch": 0.22225602027883395, + "grad_norm": 1.0035866498947144, + "learning_rate": 2.8046478886924736e-05, + "loss": 0.17784500122070312, + "step": 1644 + }, + { + "epoch": 0.22239121250528093, + "grad_norm": 1.7777913808822632, + "learning_rate": 2.804310003821909e-05, + "loss": 0.2162342071533203, + "step": 1645 + }, + { + "epoch": 0.22252640473172794, + "grad_norm": 0.9080791473388672, + "learning_rate": 2.8039718473870473e-05, + "loss": 0.23622703552246094, + "step": 1646 + }, + { + "epoch": 0.22266159695817492, + "grad_norm": 1.7890592813491821, + "learning_rate": 2.8036334194582924e-05, + "loss": 0.15596580505371094, + "step": 1647 + }, + { + "epoch": 0.2227967891846219, + "grad_norm": 0.8851687908172607, + "learning_rate": 2.8032947201061084e-05, + "loss": 0.14883136749267578, + "step": 1648 + }, + { + "epoch": 0.22293198141106887, + "grad_norm": 1.5567103624343872, + "learning_rate": 2.8029557494010132e-05, + "loss": 0.23559951782226562, + "step": 1649 + }, + { + "epoch": 0.22306717363751585, + "grad_norm": 2.0446503162384033, + "learning_rate": 2.802616507413583e-05, + "loss": 0.2289886474609375, + "step": 1650 + }, + { + "epoch": 0.22320236586396283, + "grad_norm": 1.520552396774292, + "learning_rate": 2.8022769942144492e-05, + "loss": 0.23146438598632812, + "step": 1651 + }, + { + "epoch": 0.2233375580904098, + "grad_norm": 1.9324134588241577, + "learning_rate": 2.801937209874301e-05, + "loss": 0.19847679138183594, + "step": 1652 + }, + { + "epoch": 0.2234727503168568, + "grad_norm": 1.6959515810012817, + "learning_rate": 2.8015971544638832e-05, + "loss": 0.1936495304107666, + "step": 1653 + }, + { + "epoch": 0.22360794254330377, + "grad_norm": 2.567949056625366, + "learning_rate": 2.8012568280539964e-05, + "loss": 0.25025177001953125, + "step": 1654 + }, + { + "epoch": 0.22374313476975075, + "grad_norm": 1.755247950553894, + "learning_rate": 2.800916230715499e-05, + "loss": 0.16828346252441406, + "step": 1655 + }, + { + "epoch": 0.22387832699619772, + "grad_norm": 2.0015316009521484, + "learning_rate": 2.800575362519305e-05, + "loss": 0.20948708057403564, + "step": 1656 + }, + { + "epoch": 0.2240135192226447, + "grad_norm": 2.239182949066162, + "learning_rate": 2.800234223536385e-05, + "loss": 0.14583969116210938, + "step": 1657 + }, + { + "epoch": 0.22414871144909168, + "grad_norm": 4.86674165725708, + "learning_rate": 2.799892813837766e-05, + "loss": 0.27881574630737305, + "step": 1658 + }, + { + "epoch": 0.22428390367553866, + "grad_norm": 1.4903746843338013, + "learning_rate": 2.7995511334945315e-05, + "loss": 0.21027803421020508, + "step": 1659 + }, + { + "epoch": 0.22441909590198564, + "grad_norm": 1.9718495607376099, + "learning_rate": 2.7992091825778202e-05, + "loss": 0.1566767692565918, + "step": 1660 + }, + { + "epoch": 0.22455428812843262, + "grad_norm": 1.6419471502304077, + "learning_rate": 2.7988669611588295e-05, + "loss": 0.2285451889038086, + "step": 1661 + }, + { + "epoch": 0.2246894803548796, + "grad_norm": 0.7592140436172485, + "learning_rate": 2.7985244693088112e-05, + "loss": 0.12282180786132812, + "step": 1662 + }, + { + "epoch": 0.22482467258132657, + "grad_norm": 3.97247314453125, + "learning_rate": 2.7981817070990736e-05, + "loss": 0.21082305908203125, + "step": 1663 + }, + { + "epoch": 0.22495986480777355, + "grad_norm": 2.3529508113861084, + "learning_rate": 2.7978386746009813e-05, + "loss": 0.20727157592773438, + "step": 1664 + }, + { + "epoch": 0.22509505703422053, + "grad_norm": 1.3219422101974487, + "learning_rate": 2.797495371885957e-05, + "loss": 0.16812896728515625, + "step": 1665 + }, + { + "epoch": 0.2252302492606675, + "grad_norm": 1.6870778799057007, + "learning_rate": 2.7971517990254768e-05, + "loss": 0.1778392791748047, + "step": 1666 + }, + { + "epoch": 0.2253654414871145, + "grad_norm": 1.3643516302108765, + "learning_rate": 2.7968079560910744e-05, + "loss": 0.19860458374023438, + "step": 1667 + }, + { + "epoch": 0.22550063371356147, + "grad_norm": 1.1700143814086914, + "learning_rate": 2.7964638431543402e-05, + "loss": 0.19585037231445312, + "step": 1668 + }, + { + "epoch": 0.22563582594000844, + "grad_norm": 1.4670829772949219, + "learning_rate": 2.7961194602869208e-05, + "loss": 0.1944427490234375, + "step": 1669 + }, + { + "epoch": 0.22577101816645542, + "grad_norm": 1.8440093994140625, + "learning_rate": 2.7957748075605178e-05, + "loss": 0.2509498596191406, + "step": 1670 + }, + { + "epoch": 0.2259062103929024, + "grad_norm": 2.90238356590271, + "learning_rate": 2.7954298850468898e-05, + "loss": 0.1934528350830078, + "step": 1671 + }, + { + "epoch": 0.22604140261934938, + "grad_norm": 3.8909621238708496, + "learning_rate": 2.7950846928178517e-05, + "loss": 0.22019195556640625, + "step": 1672 + }, + { + "epoch": 0.22617659484579636, + "grad_norm": 1.6296356916427612, + "learning_rate": 2.7947392309452744e-05, + "loss": 0.1519021987915039, + "step": 1673 + }, + { + "epoch": 0.22631178707224334, + "grad_norm": 1.9315357208251953, + "learning_rate": 2.7943934995010845e-05, + "loss": 0.22606325149536133, + "step": 1674 + }, + { + "epoch": 0.22644697929869032, + "grad_norm": 1.8270851373672485, + "learning_rate": 2.7940474985572657e-05, + "loss": 0.12842559814453125, + "step": 1675 + }, + { + "epoch": 0.2265821715251373, + "grad_norm": 0.8213422298431396, + "learning_rate": 2.793701228185857e-05, + "loss": 0.14566326141357422, + "step": 1676 + }, + { + "epoch": 0.22671736375158427, + "grad_norm": 2.075437545776367, + "learning_rate": 2.7933546884589536e-05, + "loss": 0.17247819900512695, + "step": 1677 + }, + { + "epoch": 0.22685255597803125, + "grad_norm": 0.8434417843818665, + "learning_rate": 2.7930078794487077e-05, + "loss": 0.1721210479736328, + "step": 1678 + }, + { + "epoch": 0.22698774820447823, + "grad_norm": 1.6527256965637207, + "learning_rate": 2.7926608012273253e-05, + "loss": 0.17689895629882812, + "step": 1679 + }, + { + "epoch": 0.22712294043092524, + "grad_norm": 1.156251311302185, + "learning_rate": 2.7923134538670715e-05, + "loss": 0.18906116485595703, + "step": 1680 + }, + { + "epoch": 0.2272581326573722, + "grad_norm": 1.57206130027771, + "learning_rate": 2.7919658374402645e-05, + "loss": 0.1541461944580078, + "step": 1681 + }, + { + "epoch": 0.2273933248838192, + "grad_norm": 2.581645965576172, + "learning_rate": 2.7916179520192807e-05, + "loss": 0.17291879653930664, + "step": 1682 + }, + { + "epoch": 0.22752851711026617, + "grad_norm": 1.4095185995101929, + "learning_rate": 2.7912697976765516e-05, + "loss": 0.2251582145690918, + "step": 1683 + }, + { + "epoch": 0.22766370933671315, + "grad_norm": 1.3922494649887085, + "learning_rate": 2.790921374484565e-05, + "loss": 0.19935274124145508, + "step": 1684 + }, + { + "epoch": 0.22779890156316013, + "grad_norm": 1.9435997009277344, + "learning_rate": 2.7905726825158637e-05, + "loss": 0.21084976196289062, + "step": 1685 + }, + { + "epoch": 0.2279340937896071, + "grad_norm": 1.2341065406799316, + "learning_rate": 2.7902237218430485e-05, + "loss": 0.14711856842041016, + "step": 1686 + }, + { + "epoch": 0.22806928601605408, + "grad_norm": 1.801182746887207, + "learning_rate": 2.7898744925387735e-05, + "loss": 0.21130895614624023, + "step": 1687 + }, + { + "epoch": 0.22820447824250106, + "grad_norm": 1.7433704137802124, + "learning_rate": 2.7895249946757505e-05, + "loss": 0.1819133758544922, + "step": 1688 + }, + { + "epoch": 0.22833967046894804, + "grad_norm": 1.4107847213745117, + "learning_rate": 2.7891752283267474e-05, + "loss": 0.23377227783203125, + "step": 1689 + }, + { + "epoch": 0.22847486269539502, + "grad_norm": 1.9736055135726929, + "learning_rate": 2.788825193564587e-05, + "loss": 0.14587831497192383, + "step": 1690 + }, + { + "epoch": 0.228610054921842, + "grad_norm": 0.7618198394775391, + "learning_rate": 2.7884748904621483e-05, + "loss": 0.1023244857788086, + "step": 1691 + }, + { + "epoch": 0.22874524714828898, + "grad_norm": 1.1160156726837158, + "learning_rate": 2.7881243190923667e-05, + "loss": 0.19730758666992188, + "step": 1692 + }, + { + "epoch": 0.22888043937473596, + "grad_norm": 1.1658329963684082, + "learning_rate": 2.7877734795282326e-05, + "loss": 0.1697988510131836, + "step": 1693 + }, + { + "epoch": 0.22901563160118293, + "grad_norm": 2.986938953399658, + "learning_rate": 2.7874223718427926e-05, + "loss": 0.20756864547729492, + "step": 1694 + }, + { + "epoch": 0.2291508238276299, + "grad_norm": 1.662099003791809, + "learning_rate": 2.78707099610915e-05, + "loss": 0.1470174789428711, + "step": 1695 + }, + { + "epoch": 0.2292860160540769, + "grad_norm": 1.952976942062378, + "learning_rate": 2.7867193524004618e-05, + "loss": 0.19955062866210938, + "step": 1696 + }, + { + "epoch": 0.22942120828052387, + "grad_norm": 1.5065747499465942, + "learning_rate": 2.786367440789943e-05, + "loss": 0.1539320945739746, + "step": 1697 + }, + { + "epoch": 0.22955640050697085, + "grad_norm": 1.4272929430007935, + "learning_rate": 2.7860152613508634e-05, + "loss": 0.20050048828125, + "step": 1698 + }, + { + "epoch": 0.22969159273341783, + "grad_norm": 1.5121279954910278, + "learning_rate": 2.7856628141565484e-05, + "loss": 0.2255077362060547, + "step": 1699 + }, + { + "epoch": 0.2298267849598648, + "grad_norm": 1.4406825304031372, + "learning_rate": 2.7853100992803797e-05, + "loss": 0.1759946346282959, + "step": 1700 + }, + { + "epoch": 0.22996197718631178, + "grad_norm": 4.906989574432373, + "learning_rate": 2.7849571167957942e-05, + "loss": 0.21625328063964844, + "step": 1701 + }, + { + "epoch": 0.23009716941275876, + "grad_norm": 4.190120697021484, + "learning_rate": 2.784603866776285e-05, + "loss": 0.24311447143554688, + "step": 1702 + }, + { + "epoch": 0.23023236163920574, + "grad_norm": 6.110686302185059, + "learning_rate": 2.7842503492953996e-05, + "loss": 0.23919105529785156, + "step": 1703 + }, + { + "epoch": 0.23036755386565272, + "grad_norm": 1.659705400466919, + "learning_rate": 2.7838965644267435e-05, + "loss": 0.20405960083007812, + "step": 1704 + }, + { + "epoch": 0.2305027460920997, + "grad_norm": 1.594421148300171, + "learning_rate": 2.7835425122439764e-05, + "loss": 0.18706321716308594, + "step": 1705 + }, + { + "epoch": 0.23063793831854668, + "grad_norm": 2.0728964805603027, + "learning_rate": 2.7831881928208128e-05, + "loss": 0.1926860809326172, + "step": 1706 + }, + { + "epoch": 0.23077313054499365, + "grad_norm": 2.2609057426452637, + "learning_rate": 2.7828336062310252e-05, + "loss": 0.20254802703857422, + "step": 1707 + }, + { + "epoch": 0.23090832277144063, + "grad_norm": 1.7946500778198242, + "learning_rate": 2.7824787525484403e-05, + "loss": 0.19638824462890625, + "step": 1708 + }, + { + "epoch": 0.2310435149978876, + "grad_norm": 1.8342368602752686, + "learning_rate": 2.7821236318469395e-05, + "loss": 0.22933197021484375, + "step": 1709 + }, + { + "epoch": 0.2311787072243346, + "grad_norm": 2.01474928855896, + "learning_rate": 2.7817682442004615e-05, + "loss": 0.19035649299621582, + "step": 1710 + }, + { + "epoch": 0.23131389945078157, + "grad_norm": 0.974385142326355, + "learning_rate": 2.781412589683e-05, + "loss": 0.20328903198242188, + "step": 1711 + }, + { + "epoch": 0.23144909167722855, + "grad_norm": 2.2072834968566895, + "learning_rate": 2.781056668368604e-05, + "loss": 0.19321441650390625, + "step": 1712 + }, + { + "epoch": 0.23158428390367553, + "grad_norm": 1.1945016384124756, + "learning_rate": 2.780700480331378e-05, + "loss": 0.1665663719177246, + "step": 1713 + }, + { + "epoch": 0.23171947613012253, + "grad_norm": 1.3647164106369019, + "learning_rate": 2.7803440256454825e-05, + "loss": 0.19145965576171875, + "step": 1714 + }, + { + "epoch": 0.2318546683565695, + "grad_norm": 2.9263415336608887, + "learning_rate": 2.7799873043851337e-05, + "loss": 0.2088308334350586, + "step": 1715 + }, + { + "epoch": 0.2319898605830165, + "grad_norm": 0.8898067474365234, + "learning_rate": 2.7796303166246016e-05, + "loss": 0.1266767978668213, + "step": 1716 + }, + { + "epoch": 0.23212505280946347, + "grad_norm": 1.8486448526382446, + "learning_rate": 2.7792730624382142e-05, + "loss": 0.229888916015625, + "step": 1717 + }, + { + "epoch": 0.23226024503591045, + "grad_norm": 4.6757073402404785, + "learning_rate": 2.778915541900353e-05, + "loss": 0.25392913818359375, + "step": 1718 + }, + { + "epoch": 0.23239543726235742, + "grad_norm": 3.0886518955230713, + "learning_rate": 2.7785577550854566e-05, + "loss": 0.2079906463623047, + "step": 1719 + }, + { + "epoch": 0.2325306294888044, + "grad_norm": 4.418379306793213, + "learning_rate": 2.778199702068017e-05, + "loss": 0.18099403381347656, + "step": 1720 + }, + { + "epoch": 0.23266582171525138, + "grad_norm": 2.943293571472168, + "learning_rate": 2.777841382922583e-05, + "loss": 0.21305084228515625, + "step": 1721 + }, + { + "epoch": 0.23280101394169836, + "grad_norm": 1.3767356872558594, + "learning_rate": 2.7774827977237596e-05, + "loss": 0.168975830078125, + "step": 1722 + }, + { + "epoch": 0.23293620616814534, + "grad_norm": 2.19751238822937, + "learning_rate": 2.777123946546205e-05, + "loss": 0.19993972778320312, + "step": 1723 + }, + { + "epoch": 0.23307139839459232, + "grad_norm": 2.026576519012451, + "learning_rate": 2.776764829464634e-05, + "loss": 0.20911407470703125, + "step": 1724 + }, + { + "epoch": 0.2332065906210393, + "grad_norm": 2.5773041248321533, + "learning_rate": 2.7764054465538173e-05, + "loss": 0.15764427185058594, + "step": 1725 + }, + { + "epoch": 0.23334178284748627, + "grad_norm": 2.362438440322876, + "learning_rate": 2.7760457978885794e-05, + "loss": 0.1825408935546875, + "step": 1726 + }, + { + "epoch": 0.23347697507393325, + "grad_norm": 1.6827441453933716, + "learning_rate": 2.7756858835438022e-05, + "loss": 0.22827482223510742, + "step": 1727 + }, + { + "epoch": 0.23361216730038023, + "grad_norm": 0.820035457611084, + "learning_rate": 2.7753257035944216e-05, + "loss": 0.13376903533935547, + "step": 1728 + }, + { + "epoch": 0.2337473595268272, + "grad_norm": 0.6238997578620911, + "learning_rate": 2.7749652581154277e-05, + "loss": 0.13058167695999146, + "step": 1729 + }, + { + "epoch": 0.2338825517532742, + "grad_norm": 1.0257359743118286, + "learning_rate": 2.7746045471818685e-05, + "loss": 0.17197132110595703, + "step": 1730 + }, + { + "epoch": 0.23401774397972117, + "grad_norm": 1.614213228225708, + "learning_rate": 2.7742435708688458e-05, + "loss": 0.16758191585540771, + "step": 1731 + }, + { + "epoch": 0.23415293620616814, + "grad_norm": 1.3026021718978882, + "learning_rate": 2.7738823292515167e-05, + "loss": 0.16087055206298828, + "step": 1732 + }, + { + "epoch": 0.23428812843261512, + "grad_norm": 2.4020371437072754, + "learning_rate": 2.773520822405093e-05, + "loss": 0.23744964599609375, + "step": 1733 + }, + { + "epoch": 0.2344233206590621, + "grad_norm": 1.2131128311157227, + "learning_rate": 2.7731590504048433e-05, + "loss": 0.11850261688232422, + "step": 1734 + }, + { + "epoch": 0.23455851288550908, + "grad_norm": 1.6539579629898071, + "learning_rate": 2.7727970133260896e-05, + "loss": 0.2256336212158203, + "step": 1735 + }, + { + "epoch": 0.23469370511195606, + "grad_norm": 1.4063458442687988, + "learning_rate": 2.7724347112442106e-05, + "loss": 0.17973995208740234, + "step": 1736 + }, + { + "epoch": 0.23482889733840304, + "grad_norm": 1.7707152366638184, + "learning_rate": 2.772072144234639e-05, + "loss": 0.1767895221710205, + "step": 1737 + }, + { + "epoch": 0.23496408956485001, + "grad_norm": 2.202954053878784, + "learning_rate": 2.7717093123728634e-05, + "loss": 0.19563531875610352, + "step": 1738 + }, + { + "epoch": 0.235099281791297, + "grad_norm": 2.523167610168457, + "learning_rate": 2.771346215734428e-05, + "loss": 0.2375659942626953, + "step": 1739 + }, + { + "epoch": 0.23523447401774397, + "grad_norm": 1.6361443996429443, + "learning_rate": 2.7709828543949302e-05, + "loss": 0.19640731811523438, + "step": 1740 + }, + { + "epoch": 0.23536966624419095, + "grad_norm": 2.5076193809509277, + "learning_rate": 2.770619228430025e-05, + "loss": 0.20153188705444336, + "step": 1741 + }, + { + "epoch": 0.23550485847063793, + "grad_norm": 2.2669665813446045, + "learning_rate": 2.77025533791542e-05, + "loss": 0.19234657287597656, + "step": 1742 + }, + { + "epoch": 0.2356400506970849, + "grad_norm": 0.8073531985282898, + "learning_rate": 2.76989118292688e-05, + "loss": 0.16974449157714844, + "step": 1743 + }, + { + "epoch": 0.23577524292353189, + "grad_norm": 1.5695230960845947, + "learning_rate": 2.7695267635402242e-05, + "loss": 0.22649002075195312, + "step": 1744 + }, + { + "epoch": 0.23591043514997886, + "grad_norm": 2.3050343990325928, + "learning_rate": 2.7691620798313258e-05, + "loss": 0.1620922088623047, + "step": 1745 + }, + { + "epoch": 0.23604562737642584, + "grad_norm": 2.264643669128418, + "learning_rate": 2.7687971318761145e-05, + "loss": 0.11782073974609375, + "step": 1746 + }, + { + "epoch": 0.23618081960287282, + "grad_norm": 2.5366272926330566, + "learning_rate": 2.7684319197505746e-05, + "loss": 0.19781208038330078, + "step": 1747 + }, + { + "epoch": 0.23631601182931983, + "grad_norm": 0.8493217825889587, + "learning_rate": 2.7680664435307446e-05, + "loss": 0.14930152893066406, + "step": 1748 + }, + { + "epoch": 0.2364512040557668, + "grad_norm": 2.8321502208709717, + "learning_rate": 2.767700703292719e-05, + "loss": 0.20679473876953125, + "step": 1749 + }, + { + "epoch": 0.23658639628221378, + "grad_norm": 1.8636189699172974, + "learning_rate": 2.767334699112647e-05, + "loss": 0.2142314910888672, + "step": 1750 + }, + { + "epoch": 0.23672158850866076, + "grad_norm": 0.8879403471946716, + "learning_rate": 2.7669684310667318e-05, + "loss": 0.13498878479003906, + "step": 1751 + }, + { + "epoch": 0.23685678073510774, + "grad_norm": 2.7346439361572266, + "learning_rate": 2.7666018992312333e-05, + "loss": 0.17259657382965088, + "step": 1752 + }, + { + "epoch": 0.23699197296155472, + "grad_norm": 1.0043563842773438, + "learning_rate": 2.7662351036824653e-05, + "loss": 0.182769775390625, + "step": 1753 + }, + { + "epoch": 0.2371271651880017, + "grad_norm": 3.7860617637634277, + "learning_rate": 2.7658680444967964e-05, + "loss": 0.1889791488647461, + "step": 1754 + }, + { + "epoch": 0.23726235741444868, + "grad_norm": 3.9154348373413086, + "learning_rate": 2.76550072175065e-05, + "loss": 0.22542619705200195, + "step": 1755 + }, + { + "epoch": 0.23739754964089566, + "grad_norm": 1.5919619798660278, + "learning_rate": 2.7651331355205044e-05, + "loss": 0.20059490203857422, + "step": 1756 + }, + { + "epoch": 0.23753274186734263, + "grad_norm": 1.0741440057754517, + "learning_rate": 2.7647652858828936e-05, + "loss": 0.18515586853027344, + "step": 1757 + }, + { + "epoch": 0.2376679340937896, + "grad_norm": 1.501904845237732, + "learning_rate": 2.764397172914406e-05, + "loss": 0.2596282958984375, + "step": 1758 + }, + { + "epoch": 0.2378031263202366, + "grad_norm": 2.8270175457000732, + "learning_rate": 2.7640287966916845e-05, + "loss": 0.17212677001953125, + "step": 1759 + }, + { + "epoch": 0.23793831854668357, + "grad_norm": 2.288524866104126, + "learning_rate": 2.7636601572914266e-05, + "loss": 0.20764827728271484, + "step": 1760 + }, + { + "epoch": 0.23807351077313055, + "grad_norm": 1.3019907474517822, + "learning_rate": 2.7632912547903855e-05, + "loss": 0.1292734146118164, + "step": 1761 + }, + { + "epoch": 0.23820870299957753, + "grad_norm": 0.9865933060646057, + "learning_rate": 2.7629220892653685e-05, + "loss": 0.19135475158691406, + "step": 1762 + }, + { + "epoch": 0.2383438952260245, + "grad_norm": 2.4502198696136475, + "learning_rate": 2.7625526607932378e-05, + "loss": 0.21588802337646484, + "step": 1763 + }, + { + "epoch": 0.23847908745247148, + "grad_norm": 1.4543583393096924, + "learning_rate": 2.76218296945091e-05, + "loss": 0.20258712768554688, + "step": 1764 + }, + { + "epoch": 0.23861427967891846, + "grad_norm": 1.5020910501480103, + "learning_rate": 2.7618130153153577e-05, + "loss": 0.1699810028076172, + "step": 1765 + }, + { + "epoch": 0.23874947190536544, + "grad_norm": 0.9557843208312988, + "learning_rate": 2.7614427984636063e-05, + "loss": 0.1514110565185547, + "step": 1766 + }, + { + "epoch": 0.23888466413181242, + "grad_norm": 4.01826810836792, + "learning_rate": 2.7610723189727377e-05, + "loss": 0.1769256591796875, + "step": 1767 + }, + { + "epoch": 0.2390198563582594, + "grad_norm": 1.3220897912979126, + "learning_rate": 2.760701576919888e-05, + "loss": 0.17946910858154297, + "step": 1768 + }, + { + "epoch": 0.23915504858470638, + "grad_norm": 3.866835594177246, + "learning_rate": 2.760330572382246e-05, + "loss": 0.2404017448425293, + "step": 1769 + }, + { + "epoch": 0.23929024081115335, + "grad_norm": 3.6667068004608154, + "learning_rate": 2.7599593054370584e-05, + "loss": 0.19556808471679688, + "step": 1770 + }, + { + "epoch": 0.23942543303760033, + "grad_norm": 1.2638391256332397, + "learning_rate": 2.7595877761616246e-05, + "loss": 0.18737506866455078, + "step": 1771 + }, + { + "epoch": 0.2395606252640473, + "grad_norm": 2.6164989471435547, + "learning_rate": 2.759215984633299e-05, + "loss": 0.220977783203125, + "step": 1772 + }, + { + "epoch": 0.2396958174904943, + "grad_norm": 1.6919876337051392, + "learning_rate": 2.7588439309294902e-05, + "loss": 0.16015052795410156, + "step": 1773 + }, + { + "epoch": 0.23983100971694127, + "grad_norm": 4.301952362060547, + "learning_rate": 2.7584716151276623e-05, + "loss": 0.22281265258789062, + "step": 1774 + }, + { + "epoch": 0.23996620194338825, + "grad_norm": 2.655496120452881, + "learning_rate": 2.7580990373053325e-05, + "loss": 0.18245506286621094, + "step": 1775 + }, + { + "epoch": 0.24010139416983522, + "grad_norm": 4.732064247131348, + "learning_rate": 2.7577261975400747e-05, + "loss": 0.2396221160888672, + "step": 1776 + }, + { + "epoch": 0.2402365863962822, + "grad_norm": 3.0432991981506348, + "learning_rate": 2.7573530959095154e-05, + "loss": 0.18137884140014648, + "step": 1777 + }, + { + "epoch": 0.24037177862272918, + "grad_norm": 1.0096946954727173, + "learning_rate": 2.756979732491336e-05, + "loss": 0.1828598976135254, + "step": 1778 + }, + { + "epoch": 0.24050697084917616, + "grad_norm": 3.0186798572540283, + "learning_rate": 2.756606107363274e-05, + "loss": 0.17733001708984375, + "step": 1779 + }, + { + "epoch": 0.24064216307562314, + "grad_norm": 3.2225539684295654, + "learning_rate": 2.7562322206031192e-05, + "loss": 0.19584250450134277, + "step": 1780 + }, + { + "epoch": 0.24077735530207012, + "grad_norm": 1.3573607206344604, + "learning_rate": 2.7558580722887166e-05, + "loss": 0.19092178344726562, + "step": 1781 + }, + { + "epoch": 0.24091254752851712, + "grad_norm": 2.0956177711486816, + "learning_rate": 2.7554836624979666e-05, + "loss": 0.17461013793945312, + "step": 1782 + }, + { + "epoch": 0.2410477397549641, + "grad_norm": 3.6138434410095215, + "learning_rate": 2.7551089913088233e-05, + "loss": 0.20109272003173828, + "step": 1783 + }, + { + "epoch": 0.24118293198141108, + "grad_norm": 1.2328728437423706, + "learning_rate": 2.7547340587992948e-05, + "loss": 0.2194671630859375, + "step": 1784 + }, + { + "epoch": 0.24131812420785806, + "grad_norm": 1.6233024597167969, + "learning_rate": 2.754358865047444e-05, + "loss": 0.18277359008789062, + "step": 1785 + }, + { + "epoch": 0.24145331643430504, + "grad_norm": 2.790472984313965, + "learning_rate": 2.7539834101313885e-05, + "loss": 0.2407855987548828, + "step": 1786 + }, + { + "epoch": 0.24158850866075202, + "grad_norm": 1.1314349174499512, + "learning_rate": 2.7536076941293003e-05, + "loss": 0.1938343048095703, + "step": 1787 + }, + { + "epoch": 0.241723700887199, + "grad_norm": 0.7296061515808105, + "learning_rate": 2.753231717119405e-05, + "loss": 0.15276622772216797, + "step": 1788 + }, + { + "epoch": 0.24185889311364597, + "grad_norm": 1.7662526369094849, + "learning_rate": 2.7528554791799826e-05, + "loss": 0.13840866088867188, + "step": 1789 + }, + { + "epoch": 0.24199408534009295, + "grad_norm": 1.0116701126098633, + "learning_rate": 2.7524789803893686e-05, + "loss": 0.1967926025390625, + "step": 1790 + }, + { + "epoch": 0.24212927756653993, + "grad_norm": 1.804026484489441, + "learning_rate": 2.7521022208259526e-05, + "loss": 0.21604537963867188, + "step": 1791 + }, + { + "epoch": 0.2422644697929869, + "grad_norm": 1.50075101852417, + "learning_rate": 2.7517252005681762e-05, + "loss": 0.19052696228027344, + "step": 1792 + }, + { + "epoch": 0.2423996620194339, + "grad_norm": 2.2202341556549072, + "learning_rate": 2.7513479196945385e-05, + "loss": 0.19433832168579102, + "step": 1793 + }, + { + "epoch": 0.24253485424588087, + "grad_norm": 1.766616702079773, + "learning_rate": 2.750970378283591e-05, + "loss": 0.2039794921875, + "step": 1794 + }, + { + "epoch": 0.24267004647232784, + "grad_norm": 1.2007391452789307, + "learning_rate": 2.7505925764139398e-05, + "loss": 0.13902640342712402, + "step": 1795 + }, + { + "epoch": 0.24280523869877482, + "grad_norm": 1.093062162399292, + "learning_rate": 2.7502145141642447e-05, + "loss": 0.14444732666015625, + "step": 1796 + }, + { + "epoch": 0.2429404309252218, + "grad_norm": 2.9717624187469482, + "learning_rate": 2.7498361916132212e-05, + "loss": 0.18315601348876953, + "step": 1797 + }, + { + "epoch": 0.24307562315166878, + "grad_norm": 0.974408745765686, + "learning_rate": 2.7494576088396376e-05, + "loss": 0.13589000701904297, + "step": 1798 + }, + { + "epoch": 0.24321081537811576, + "grad_norm": 1.7642040252685547, + "learning_rate": 2.749078765922317e-05, + "loss": 0.1761341094970703, + "step": 1799 + }, + { + "epoch": 0.24334600760456274, + "grad_norm": 2.1651322841644287, + "learning_rate": 2.7486996629401366e-05, + "loss": 0.20318889617919922, + "step": 1800 + }, + { + "epoch": 0.24348119983100971, + "grad_norm": 4.523960590362549, + "learning_rate": 2.7483202999720272e-05, + "loss": 0.23863506317138672, + "step": 1801 + }, + { + "epoch": 0.2436163920574567, + "grad_norm": 2.1422088146209717, + "learning_rate": 2.7479406770969747e-05, + "loss": 0.1845703125, + "step": 1802 + }, + { + "epoch": 0.24375158428390367, + "grad_norm": 1.6567132472991943, + "learning_rate": 2.7475607943940182e-05, + "loss": 0.21222305297851562, + "step": 1803 + }, + { + "epoch": 0.24388677651035065, + "grad_norm": 1.6445326805114746, + "learning_rate": 2.7471806519422514e-05, + "loss": 0.20706558227539062, + "step": 1804 + }, + { + "epoch": 0.24402196873679763, + "grad_norm": 0.7460476756095886, + "learning_rate": 2.746800249820822e-05, + "loss": 0.12541675567626953, + "step": 1805 + }, + { + "epoch": 0.2441571609632446, + "grad_norm": 1.324603796005249, + "learning_rate": 2.7464195881089323e-05, + "loss": 0.2231426239013672, + "step": 1806 + }, + { + "epoch": 0.24429235318969159, + "grad_norm": 1.5851218700408936, + "learning_rate": 2.746038666885837e-05, + "loss": 0.18499135971069336, + "step": 1807 + }, + { + "epoch": 0.24442754541613856, + "grad_norm": 1.8500932455062866, + "learning_rate": 2.7456574862308474e-05, + "loss": 0.28872108459472656, + "step": 1808 + }, + { + "epoch": 0.24456273764258554, + "grad_norm": 0.7610194087028503, + "learning_rate": 2.745276046223326e-05, + "loss": 0.10070991516113281, + "step": 1809 + }, + { + "epoch": 0.24469792986903252, + "grad_norm": 1.1204335689544678, + "learning_rate": 2.744894346942691e-05, + "loss": 0.15925097465515137, + "step": 1810 + }, + { + "epoch": 0.2448331220954795, + "grad_norm": 2.9479923248291016, + "learning_rate": 2.744512388468415e-05, + "loss": 0.22916412353515625, + "step": 1811 + }, + { + "epoch": 0.24496831432192648, + "grad_norm": 2.4301834106445312, + "learning_rate": 2.7441301708800227e-05, + "loss": 0.1831226348876953, + "step": 1812 + }, + { + "epoch": 0.24510350654837346, + "grad_norm": 0.6941435933113098, + "learning_rate": 2.7437476942570942e-05, + "loss": 0.1551222801208496, + "step": 1813 + }, + { + "epoch": 0.24523869877482044, + "grad_norm": 1.6689149141311646, + "learning_rate": 2.7433649586792637e-05, + "loss": 0.12225341796875, + "step": 1814 + }, + { + "epoch": 0.2453738910012674, + "grad_norm": 1.4530420303344727, + "learning_rate": 2.7429819642262178e-05, + "loss": 0.16067218780517578, + "step": 1815 + }, + { + "epoch": 0.24550908322771442, + "grad_norm": 2.3543004989624023, + "learning_rate": 2.7425987109776994e-05, + "loss": 0.17044639587402344, + "step": 1816 + }, + { + "epoch": 0.2456442754541614, + "grad_norm": 1.1938011646270752, + "learning_rate": 2.7422151990135022e-05, + "loss": 0.205535888671875, + "step": 1817 + }, + { + "epoch": 0.24577946768060838, + "grad_norm": 2.3887882232666016, + "learning_rate": 2.741831428413477e-05, + "loss": 0.20858478546142578, + "step": 1818 + }, + { + "epoch": 0.24591465990705536, + "grad_norm": 1.386568307876587, + "learning_rate": 2.7414473992575257e-05, + "loss": 0.15534210205078125, + "step": 1819 + }, + { + "epoch": 0.24604985213350233, + "grad_norm": 0.8983409404754639, + "learning_rate": 2.7410631116256054e-05, + "loss": 0.2323780059814453, + "step": 1820 + }, + { + "epoch": 0.2461850443599493, + "grad_norm": 2.2798032760620117, + "learning_rate": 2.7406785655977275e-05, + "loss": 0.16332530975341797, + "step": 1821 + }, + { + "epoch": 0.2463202365863963, + "grad_norm": 3.062772750854492, + "learning_rate": 2.7402937612539563e-05, + "loss": 0.2181262969970703, + "step": 1822 + }, + { + "epoch": 0.24645542881284327, + "grad_norm": 2.45572829246521, + "learning_rate": 2.7399086986744095e-05, + "loss": 0.19539451599121094, + "step": 1823 + }, + { + "epoch": 0.24659062103929025, + "grad_norm": 1.6240006685256958, + "learning_rate": 2.7395233779392598e-05, + "loss": 0.18505859375, + "step": 1824 + }, + { + "epoch": 0.24672581326573723, + "grad_norm": 3.154146194458008, + "learning_rate": 2.739137799128733e-05, + "loss": 0.1941823959350586, + "step": 1825 + }, + { + "epoch": 0.2468610054921842, + "grad_norm": 1.6081831455230713, + "learning_rate": 2.7387519623231085e-05, + "loss": 0.16661453247070312, + "step": 1826 + }, + { + "epoch": 0.24699619771863118, + "grad_norm": 0.9047930836677551, + "learning_rate": 2.7383658676027195e-05, + "loss": 0.2242288589477539, + "step": 1827 + }, + { + "epoch": 0.24713138994507816, + "grad_norm": 2.931452751159668, + "learning_rate": 2.7379795150479535e-05, + "loss": 0.23974227905273438, + "step": 1828 + }, + { + "epoch": 0.24726658217152514, + "grad_norm": 0.6701819896697998, + "learning_rate": 2.73759290473925e-05, + "loss": 0.169036865234375, + "step": 1829 + }, + { + "epoch": 0.24740177439797212, + "grad_norm": 2.6725099086761475, + "learning_rate": 2.7372060367571044e-05, + "loss": 0.26139163970947266, + "step": 1830 + }, + { + "epoch": 0.2475369666244191, + "grad_norm": 1.5986239910125732, + "learning_rate": 2.7368189111820648e-05, + "loss": 0.23374414443969727, + "step": 1831 + }, + { + "epoch": 0.24767215885086608, + "grad_norm": 1.6976271867752075, + "learning_rate": 2.736431528094732e-05, + "loss": 0.17014694213867188, + "step": 1832 + }, + { + "epoch": 0.24780735107731305, + "grad_norm": 1.7940839529037476, + "learning_rate": 2.7360438875757614e-05, + "loss": 0.2106151580810547, + "step": 1833 + }, + { + "epoch": 0.24794254330376003, + "grad_norm": 1.5498751401901245, + "learning_rate": 2.7356559897058624e-05, + "loss": 0.20965957641601562, + "step": 1834 + }, + { + "epoch": 0.248077735530207, + "grad_norm": 2.21406888961792, + "learning_rate": 2.735267834565797e-05, + "loss": 0.1727910041809082, + "step": 1835 + }, + { + "epoch": 0.248212927756654, + "grad_norm": 1.5922492742538452, + "learning_rate": 2.734879422236381e-05, + "loss": 0.2078418731689453, + "step": 1836 + }, + { + "epoch": 0.24834811998310097, + "grad_norm": 2.4378628730773926, + "learning_rate": 2.734490752798484e-05, + "loss": 0.2581634521484375, + "step": 1837 + }, + { + "epoch": 0.24848331220954795, + "grad_norm": 0.8777881264686584, + "learning_rate": 2.7341018263330296e-05, + "loss": 0.13143301010131836, + "step": 1838 + }, + { + "epoch": 0.24861850443599492, + "grad_norm": 0.789832592010498, + "learning_rate": 2.7337126429209935e-05, + "loss": 0.15216636657714844, + "step": 1839 + }, + { + "epoch": 0.2487536966624419, + "grad_norm": 2.5479233264923096, + "learning_rate": 2.7333232026434064e-05, + "loss": 0.21466970443725586, + "step": 1840 + }, + { + "epoch": 0.24888888888888888, + "grad_norm": 1.6906899213790894, + "learning_rate": 2.7329335055813517e-05, + "loss": 0.169189453125, + "step": 1841 + }, + { + "epoch": 0.24902408111533586, + "grad_norm": 1.350311517715454, + "learning_rate": 2.732543551815966e-05, + "loss": 0.16113853454589844, + "step": 1842 + }, + { + "epoch": 0.24915927334178284, + "grad_norm": 4.948683261871338, + "learning_rate": 2.7321533414284404e-05, + "loss": 0.197845458984375, + "step": 1843 + }, + { + "epoch": 0.24929446556822982, + "grad_norm": 1.508175253868103, + "learning_rate": 2.731762874500018e-05, + "loss": 0.17602157592773438, + "step": 1844 + }, + { + "epoch": 0.2494296577946768, + "grad_norm": 1.8899592161178589, + "learning_rate": 2.7313721511119972e-05, + "loss": 0.188720703125, + "step": 1845 + }, + { + "epoch": 0.24956485002112377, + "grad_norm": 0.7386027574539185, + "learning_rate": 2.7309811713457275e-05, + "loss": 0.15043258666992188, + "step": 1846 + }, + { + "epoch": 0.24970004224757075, + "grad_norm": 1.8178147077560425, + "learning_rate": 2.730589935282614e-05, + "loss": 0.23105621337890625, + "step": 1847 + }, + { + "epoch": 0.24983523447401773, + "grad_norm": 0.8323884010314941, + "learning_rate": 2.7301984430041135e-05, + "loss": 0.16028594970703125, + "step": 1848 + }, + { + "epoch": 0.2499704267004647, + "grad_norm": 1.4203921556472778, + "learning_rate": 2.7298066945917368e-05, + "loss": 0.1997203826904297, + "step": 1849 + }, + { + "epoch": 0.2501056189269117, + "grad_norm": 0.9265887141227722, + "learning_rate": 2.7294146901270482e-05, + "loss": 0.19940805435180664, + "step": 1850 + }, + { + "epoch": 0.25024081115335867, + "grad_norm": 1.0390900373458862, + "learning_rate": 2.7290224296916653e-05, + "loss": 0.2062664031982422, + "step": 1851 + }, + { + "epoch": 0.25037600337980565, + "grad_norm": 2.799058198928833, + "learning_rate": 2.7286299133672584e-05, + "loss": 0.21361827850341797, + "step": 1852 + }, + { + "epoch": 0.2505111956062526, + "grad_norm": 1.4033443927764893, + "learning_rate": 2.728237141235552e-05, + "loss": 0.15416717529296875, + "step": 1853 + }, + { + "epoch": 0.2506463878326996, + "grad_norm": 0.8309803605079651, + "learning_rate": 2.727844113378322e-05, + "loss": 0.19065475463867188, + "step": 1854 + }, + { + "epoch": 0.2507815800591466, + "grad_norm": 1.404348611831665, + "learning_rate": 2.7274508298774013e-05, + "loss": 0.2071218490600586, + "step": 1855 + }, + { + "epoch": 0.25091677228559356, + "grad_norm": 1.0040569305419922, + "learning_rate": 2.727057290814672e-05, + "loss": 0.20615386962890625, + "step": 1856 + }, + { + "epoch": 0.25105196451204054, + "grad_norm": 2.0930609703063965, + "learning_rate": 2.7266634962720704e-05, + "loss": 0.18292236328125, + "step": 1857 + }, + { + "epoch": 0.2511871567384875, + "grad_norm": 1.7553514242172241, + "learning_rate": 2.726269446331588e-05, + "loss": 0.17391109466552734, + "step": 1858 + }, + { + "epoch": 0.2513223489649345, + "grad_norm": 0.946629524230957, + "learning_rate": 2.7258751410752676e-05, + "loss": 0.19818878173828125, + "step": 1859 + }, + { + "epoch": 0.2514575411913815, + "grad_norm": 2.2126502990722656, + "learning_rate": 2.725480580585206e-05, + "loss": 0.1741102933883667, + "step": 1860 + }, + { + "epoch": 0.25159273341782845, + "grad_norm": 1.352871298789978, + "learning_rate": 2.7250857649435522e-05, + "loss": 0.19280147552490234, + "step": 1861 + }, + { + "epoch": 0.25172792564427543, + "grad_norm": 1.9207581281661987, + "learning_rate": 2.724690694232509e-05, + "loss": 0.1536569595336914, + "step": 1862 + }, + { + "epoch": 0.2518631178707224, + "grad_norm": 1.9133845567703247, + "learning_rate": 2.7242953685343327e-05, + "loss": 0.194671630859375, + "step": 1863 + }, + { + "epoch": 0.2519983100971694, + "grad_norm": 1.4482982158660889, + "learning_rate": 2.723899787931332e-05, + "loss": 0.1819009780883789, + "step": 1864 + }, + { + "epoch": 0.25213350232361637, + "grad_norm": 1.0874003171920776, + "learning_rate": 2.7235039525058684e-05, + "loss": 0.17987632751464844, + "step": 1865 + }, + { + "epoch": 0.25226869455006334, + "grad_norm": 2.602050304412842, + "learning_rate": 2.7231078623403575e-05, + "loss": 0.18448734283447266, + "step": 1866 + }, + { + "epoch": 0.2524038867765104, + "grad_norm": 1.9931179285049438, + "learning_rate": 2.722711517517267e-05, + "loss": 0.17292213439941406, + "step": 1867 + }, + { + "epoch": 0.25253907900295736, + "grad_norm": 2.9078757762908936, + "learning_rate": 2.7223149181191187e-05, + "loss": 0.23421669006347656, + "step": 1868 + }, + { + "epoch": 0.25267427122940433, + "grad_norm": 1.5477068424224854, + "learning_rate": 2.7219180642284864e-05, + "loss": 0.19437646865844727, + "step": 1869 + }, + { + "epoch": 0.2528094634558513, + "grad_norm": 1.6026208400726318, + "learning_rate": 2.721520955927997e-05, + "loss": 0.21612930297851562, + "step": 1870 + }, + { + "epoch": 0.2529446556822983, + "grad_norm": 1.3329172134399414, + "learning_rate": 2.7211235933003302e-05, + "loss": 0.2152118682861328, + "step": 1871 + }, + { + "epoch": 0.25307984790874527, + "grad_norm": 0.8584641218185425, + "learning_rate": 2.72072597642822e-05, + "loss": 0.12581825256347656, + "step": 1872 + }, + { + "epoch": 0.25321504013519225, + "grad_norm": 1.4131656885147095, + "learning_rate": 2.7203281053944512e-05, + "loss": 0.2525138854980469, + "step": 1873 + }, + { + "epoch": 0.2533502323616392, + "grad_norm": 1.1658369302749634, + "learning_rate": 2.719929980281864e-05, + "loss": 0.14561176300048828, + "step": 1874 + }, + { + "epoch": 0.2534854245880862, + "grad_norm": 1.277383804321289, + "learning_rate": 2.719531601173349e-05, + "loss": 0.2118206024169922, + "step": 1875 + }, + { + "epoch": 0.2536206168145332, + "grad_norm": 0.8549351692199707, + "learning_rate": 2.7191329681518512e-05, + "loss": 0.1967754364013672, + "step": 1876 + }, + { + "epoch": 0.25375580904098016, + "grad_norm": 1.0922499895095825, + "learning_rate": 2.7187340813003682e-05, + "loss": 0.18384170532226562, + "step": 1877 + }, + { + "epoch": 0.25389100126742714, + "grad_norm": 0.7149601578712463, + "learning_rate": 2.718334940701951e-05, + "loss": 0.14149856567382812, + "step": 1878 + }, + { + "epoch": 0.2540261934938741, + "grad_norm": 1.3993784189224243, + "learning_rate": 2.7179355464397014e-05, + "loss": 0.1161503791809082, + "step": 1879 + }, + { + "epoch": 0.2541613857203211, + "grad_norm": 1.866430640220642, + "learning_rate": 2.7175358985967763e-05, + "loss": 0.24223709106445312, + "step": 1880 + }, + { + "epoch": 0.2542965779467681, + "grad_norm": 1.0008230209350586, + "learning_rate": 2.717135997256385e-05, + "loss": 0.1560840606689453, + "step": 1881 + }, + { + "epoch": 0.25443177017321505, + "grad_norm": 2.41662335395813, + "learning_rate": 2.7167358425017882e-05, + "loss": 0.21687889099121094, + "step": 1882 + }, + { + "epoch": 0.25456696239966203, + "grad_norm": 1.9120348691940308, + "learning_rate": 2.7163354344163004e-05, + "loss": 0.18424415588378906, + "step": 1883 + }, + { + "epoch": 0.254702154626109, + "grad_norm": 1.2551332712173462, + "learning_rate": 2.715934773083289e-05, + "loss": 0.13746243715286255, + "step": 1884 + }, + { + "epoch": 0.254837346852556, + "grad_norm": 4.2883524894714355, + "learning_rate": 2.715533858586174e-05, + "loss": 0.23687076568603516, + "step": 1885 + }, + { + "epoch": 0.25497253907900297, + "grad_norm": 1.8226817846298218, + "learning_rate": 2.715132691008427e-05, + "loss": 0.14057111740112305, + "step": 1886 + }, + { + "epoch": 0.25510773130544995, + "grad_norm": 4.030285835266113, + "learning_rate": 2.714731270433574e-05, + "loss": 0.21898365020751953, + "step": 1887 + }, + { + "epoch": 0.2552429235318969, + "grad_norm": 1.1922999620437622, + "learning_rate": 2.7143295969451933e-05, + "loss": 0.1292862892150879, + "step": 1888 + }, + { + "epoch": 0.2553781157583439, + "grad_norm": 0.9621856808662415, + "learning_rate": 2.7139276706269147e-05, + "loss": 0.13312244415283203, + "step": 1889 + }, + { + "epoch": 0.2555133079847909, + "grad_norm": 0.7738590836524963, + "learning_rate": 2.7135254915624213e-05, + "loss": 0.1671161651611328, + "step": 1890 + }, + { + "epoch": 0.25564850021123786, + "grad_norm": 2.4581139087677, + "learning_rate": 2.7131230598354497e-05, + "loss": 0.22307372093200684, + "step": 1891 + }, + { + "epoch": 0.25578369243768484, + "grad_norm": 3.7177722454071045, + "learning_rate": 2.712720375529787e-05, + "loss": 0.23241043090820312, + "step": 1892 + }, + { + "epoch": 0.2559188846641318, + "grad_norm": 5.150735378265381, + "learning_rate": 2.7123174387292758e-05, + "loss": 0.22222423553466797, + "step": 1893 + }, + { + "epoch": 0.2560540768905788, + "grad_norm": 3.285849094390869, + "learning_rate": 2.7119142495178088e-05, + "loss": 0.2296743392944336, + "step": 1894 + }, + { + "epoch": 0.2561892691170258, + "grad_norm": 2.1516175270080566, + "learning_rate": 2.711510807979333e-05, + "loss": 0.1914815902709961, + "step": 1895 + }, + { + "epoch": 0.25632446134347275, + "grad_norm": 1.3553555011749268, + "learning_rate": 2.7111071141978452e-05, + "loss": 0.19443082809448242, + "step": 1896 + }, + { + "epoch": 0.25645965356991973, + "grad_norm": 1.7527267932891846, + "learning_rate": 2.7107031682573987e-05, + "loss": 0.16572976112365723, + "step": 1897 + }, + { + "epoch": 0.2565948457963667, + "grad_norm": 1.9503977298736572, + "learning_rate": 2.710298970242096e-05, + "loss": 0.19137954711914062, + "step": 1898 + }, + { + "epoch": 0.2567300380228137, + "grad_norm": 2.2335622310638428, + "learning_rate": 2.7098945202360937e-05, + "loss": 0.2009143829345703, + "step": 1899 + }, + { + "epoch": 0.25686523024926067, + "grad_norm": 1.4293417930603027, + "learning_rate": 2.7094898183236e-05, + "loss": 0.23547744750976562, + "step": 1900 + }, + { + "epoch": 0.25700042247570765, + "grad_norm": 3.1159415245056152, + "learning_rate": 2.709084864588877e-05, + "loss": 0.23511123657226562, + "step": 1901 + }, + { + "epoch": 0.2571356147021546, + "grad_norm": 1.6290208101272583, + "learning_rate": 2.708679659116237e-05, + "loss": 0.1189870834350586, + "step": 1902 + }, + { + "epoch": 0.2572708069286016, + "grad_norm": 1.7773970365524292, + "learning_rate": 2.708274201990047e-05, + "loss": 0.21961307525634766, + "step": 1903 + }, + { + "epoch": 0.2574059991550486, + "grad_norm": 3.6391491889953613, + "learning_rate": 2.7078684932947247e-05, + "loss": 0.21735668182373047, + "step": 1904 + }, + { + "epoch": 0.25754119138149556, + "grad_norm": 5.386855602264404, + "learning_rate": 2.7074625331147407e-05, + "loss": 0.24686622619628906, + "step": 1905 + }, + { + "epoch": 0.25767638360794254, + "grad_norm": 1.59454345703125, + "learning_rate": 2.7070563215346184e-05, + "loss": 0.2650566101074219, + "step": 1906 + }, + { + "epoch": 0.2578115758343895, + "grad_norm": 2.7635247707366943, + "learning_rate": 2.7066498586389332e-05, + "loss": 0.23914718627929688, + "step": 1907 + }, + { + "epoch": 0.2579467680608365, + "grad_norm": 5.463364601135254, + "learning_rate": 2.7062431445123127e-05, + "loss": 0.29244232177734375, + "step": 1908 + }, + { + "epoch": 0.2580819602872835, + "grad_norm": 1.0602253675460815, + "learning_rate": 2.705836179239437e-05, + "loss": 0.13002872467041016, + "step": 1909 + }, + { + "epoch": 0.25821715251373045, + "grad_norm": 1.020666241645813, + "learning_rate": 2.705428962905039e-05, + "loss": 0.1340007781982422, + "step": 1910 + }, + { + "epoch": 0.25835234474017743, + "grad_norm": 0.893650233745575, + "learning_rate": 2.705021495593902e-05, + "loss": 0.14308738708496094, + "step": 1911 + }, + { + "epoch": 0.2584875369666244, + "grad_norm": 1.1200660467147827, + "learning_rate": 2.704613777390864e-05, + "loss": 0.1405954360961914, + "step": 1912 + }, + { + "epoch": 0.2586227291930714, + "grad_norm": 0.8722280859947205, + "learning_rate": 2.7042058083808135e-05, + "loss": 0.18476104736328125, + "step": 1913 + }, + { + "epoch": 0.25875792141951837, + "grad_norm": 1.9932823181152344, + "learning_rate": 2.7037975886486928e-05, + "loss": 0.2548637390136719, + "step": 1914 + }, + { + "epoch": 0.25889311364596534, + "grad_norm": 1.901976227760315, + "learning_rate": 2.7033891182794942e-05, + "loss": 0.19043922424316406, + "step": 1915 + }, + { + "epoch": 0.2590283058724123, + "grad_norm": 1.4192148447036743, + "learning_rate": 2.7029803973582642e-05, + "loss": 0.22775554656982422, + "step": 1916 + }, + { + "epoch": 0.2591634980988593, + "grad_norm": 1.6903505325317383, + "learning_rate": 2.7025714259701e-05, + "loss": 0.25521135330200195, + "step": 1917 + }, + { + "epoch": 0.2592986903253063, + "grad_norm": 1.6820746660232544, + "learning_rate": 2.7021622042001524e-05, + "loss": 0.16521263122558594, + "step": 1918 + }, + { + "epoch": 0.25943388255175326, + "grad_norm": 1.0428242683410645, + "learning_rate": 2.701752732133623e-05, + "loss": 0.18391036987304688, + "step": 1919 + }, + { + "epoch": 0.25956907477820024, + "grad_norm": 0.8983698487281799, + "learning_rate": 2.7013430098557664e-05, + "loss": 0.15881729125976562, + "step": 1920 + }, + { + "epoch": 0.2597042670046472, + "grad_norm": 1.1442865133285522, + "learning_rate": 2.7009330374518885e-05, + "loss": 0.212158203125, + "step": 1921 + }, + { + "epoch": 0.2598394592310942, + "grad_norm": 1.9967166185379028, + "learning_rate": 2.7005228150073483e-05, + "loss": 0.17375755310058594, + "step": 1922 + }, + { + "epoch": 0.2599746514575412, + "grad_norm": 1.153822898864746, + "learning_rate": 2.7001123426075558e-05, + "loss": 0.15046024322509766, + "step": 1923 + }, + { + "epoch": 0.26010984368398815, + "grad_norm": 1.318525791168213, + "learning_rate": 2.699701620337974e-05, + "loss": 0.17002296447753906, + "step": 1924 + }, + { + "epoch": 0.26024503591043513, + "grad_norm": 1.0753260850906372, + "learning_rate": 2.699290648284117e-05, + "loss": 0.15815973281860352, + "step": 1925 + }, + { + "epoch": 0.2603802281368821, + "grad_norm": 1.1672066450119019, + "learning_rate": 2.6988794265315522e-05, + "loss": 0.17817401885986328, + "step": 1926 + }, + { + "epoch": 0.2605154203633291, + "grad_norm": 2.196876049041748, + "learning_rate": 2.698467955165897e-05, + "loss": 0.21384429931640625, + "step": 1927 + }, + { + "epoch": 0.26065061258977607, + "grad_norm": 2.0762689113616943, + "learning_rate": 2.6980562342728226e-05, + "loss": 0.21181869506835938, + "step": 1928 + }, + { + "epoch": 0.26078580481622304, + "grad_norm": 0.9911255240440369, + "learning_rate": 2.6976442639380516e-05, + "loss": 0.14367055892944336, + "step": 1929 + }, + { + "epoch": 0.26092099704267, + "grad_norm": 3.002906322479248, + "learning_rate": 2.6972320442473583e-05, + "loss": 0.2427806854248047, + "step": 1930 + }, + { + "epoch": 0.261056189269117, + "grad_norm": 1.7476301193237305, + "learning_rate": 2.6968195752865686e-05, + "loss": 0.2146167755126953, + "step": 1931 + }, + { + "epoch": 0.261191381495564, + "grad_norm": 1.9302798509597778, + "learning_rate": 2.6964068571415613e-05, + "loss": 0.15862274169921875, + "step": 1932 + }, + { + "epoch": 0.26132657372201096, + "grad_norm": 1.2121418714523315, + "learning_rate": 2.6959938898982667e-05, + "loss": 0.15786981582641602, + "step": 1933 + }, + { + "epoch": 0.26146176594845794, + "grad_norm": 0.8861576318740845, + "learning_rate": 2.6955806736426657e-05, + "loss": 0.19966506958007812, + "step": 1934 + }, + { + "epoch": 0.26159695817490497, + "grad_norm": 1.776509165763855, + "learning_rate": 2.6951672084607937e-05, + "loss": 0.1838245391845703, + "step": 1935 + }, + { + "epoch": 0.26173215040135195, + "grad_norm": 1.629281759262085, + "learning_rate": 2.694753494438735e-05, + "loss": 0.2515850067138672, + "step": 1936 + }, + { + "epoch": 0.2618673426277989, + "grad_norm": 1.2359305620193481, + "learning_rate": 2.6943395316626272e-05, + "loss": 0.155120849609375, + "step": 1937 + }, + { + "epoch": 0.2620025348542459, + "grad_norm": 0.9176020622253418, + "learning_rate": 2.69392532021866e-05, + "loss": 0.18673133850097656, + "step": 1938 + }, + { + "epoch": 0.2621377270806929, + "grad_norm": 0.8933517336845398, + "learning_rate": 2.693510860193075e-05, + "loss": 0.15508127212524414, + "step": 1939 + }, + { + "epoch": 0.26227291930713986, + "grad_norm": 3.950157880783081, + "learning_rate": 2.6930961516721638e-05, + "loss": 0.20750141143798828, + "step": 1940 + }, + { + "epoch": 0.26240811153358684, + "grad_norm": 1.0190545320510864, + "learning_rate": 2.6926811947422717e-05, + "loss": 0.16823291778564453, + "step": 1941 + }, + { + "epoch": 0.2625433037600338, + "grad_norm": 3.5451526641845703, + "learning_rate": 2.6922659894897946e-05, + "loss": 0.2516508102416992, + "step": 1942 + }, + { + "epoch": 0.2626784959864808, + "grad_norm": 0.65116947889328, + "learning_rate": 2.6918505360011805e-05, + "loss": 0.12068653106689453, + "step": 1943 + }, + { + "epoch": 0.2628136882129278, + "grad_norm": 2.8797261714935303, + "learning_rate": 2.6914348343629292e-05, + "loss": 0.24213027954101562, + "step": 1944 + }, + { + "epoch": 0.26294888043937475, + "grad_norm": 1.2638421058654785, + "learning_rate": 2.6910188846615918e-05, + "loss": 0.15696048736572266, + "step": 1945 + }, + { + "epoch": 0.26308407266582173, + "grad_norm": 1.1718541383743286, + "learning_rate": 2.6906026869837714e-05, + "loss": 0.17064857482910156, + "step": 1946 + }, + { + "epoch": 0.2632192648922687, + "grad_norm": 0.9279624819755554, + "learning_rate": 2.6901862414161222e-05, + "loss": 0.1836872100830078, + "step": 1947 + }, + { + "epoch": 0.2633544571187157, + "grad_norm": 1.4372190237045288, + "learning_rate": 2.689769548045351e-05, + "loss": 0.18924331665039062, + "step": 1948 + }, + { + "epoch": 0.26348964934516267, + "grad_norm": 1.2390385866165161, + "learning_rate": 2.6893526069582154e-05, + "loss": 0.15457820892333984, + "step": 1949 + }, + { + "epoch": 0.26362484157160965, + "grad_norm": 0.7015904188156128, + "learning_rate": 2.6889354182415245e-05, + "loss": 0.10678696632385254, + "step": 1950 + }, + { + "epoch": 0.2637600337980566, + "grad_norm": 0.9320312738418579, + "learning_rate": 2.688517981982139e-05, + "loss": 0.20911598205566406, + "step": 1951 + }, + { + "epoch": 0.2638952260245036, + "grad_norm": 1.5073645114898682, + "learning_rate": 2.6881002982669723e-05, + "loss": 0.1506037712097168, + "step": 1952 + }, + { + "epoch": 0.2640304182509506, + "grad_norm": 1.2263963222503662, + "learning_rate": 2.6876823671829874e-05, + "loss": 0.16829490661621094, + "step": 1953 + }, + { + "epoch": 0.26416561047739756, + "grad_norm": 1.1362329721450806, + "learning_rate": 2.6872641888172e-05, + "loss": 0.17322063446044922, + "step": 1954 + }, + { + "epoch": 0.26430080270384454, + "grad_norm": 4.183491230010986, + "learning_rate": 2.6868457632566774e-05, + "loss": 0.22624492645263672, + "step": 1955 + }, + { + "epoch": 0.2644359949302915, + "grad_norm": 2.6532790660858154, + "learning_rate": 2.6864270905885377e-05, + "loss": 0.2388458251953125, + "step": 1956 + }, + { + "epoch": 0.2645711871567385, + "grad_norm": 2.1013286113739014, + "learning_rate": 2.6860081708999515e-05, + "loss": 0.15811729431152344, + "step": 1957 + }, + { + "epoch": 0.2647063793831855, + "grad_norm": 1.4955461025238037, + "learning_rate": 2.685589004278139e-05, + "loss": 0.28665924072265625, + "step": 1958 + }, + { + "epoch": 0.26484157160963245, + "grad_norm": 1.7254035472869873, + "learning_rate": 2.6851695908103737e-05, + "loss": 0.18318557739257812, + "step": 1959 + }, + { + "epoch": 0.26497676383607943, + "grad_norm": 2.181720495223999, + "learning_rate": 2.6847499305839796e-05, + "loss": 0.1823415756225586, + "step": 1960 + }, + { + "epoch": 0.2651119560625264, + "grad_norm": 1.5420656204223633, + "learning_rate": 2.684330023686332e-05, + "loss": 0.16841506958007812, + "step": 1961 + }, + { + "epoch": 0.2652471482889734, + "grad_norm": 1.5167038440704346, + "learning_rate": 2.6839098702048577e-05, + "loss": 0.13259124755859375, + "step": 1962 + }, + { + "epoch": 0.26538234051542037, + "grad_norm": 4.060940265655518, + "learning_rate": 2.683489470227035e-05, + "loss": 0.20993614196777344, + "step": 1963 + }, + { + "epoch": 0.26551753274186735, + "grad_norm": 1.478872299194336, + "learning_rate": 2.6830688238403936e-05, + "loss": 0.21984004974365234, + "step": 1964 + }, + { + "epoch": 0.2656527249683143, + "grad_norm": 0.7537587285041809, + "learning_rate": 2.682647931132514e-05, + "loss": 0.13451480865478516, + "step": 1965 + }, + { + "epoch": 0.2657879171947613, + "grad_norm": 0.7682000994682312, + "learning_rate": 2.682226792191029e-05, + "loss": 0.1654500961303711, + "step": 1966 + }, + { + "epoch": 0.2659231094212083, + "grad_norm": 1.8770439624786377, + "learning_rate": 2.681805407103621e-05, + "loss": 0.1817154884338379, + "step": 1967 + }, + { + "epoch": 0.26605830164765526, + "grad_norm": 2.2880940437316895, + "learning_rate": 2.6813837759580253e-05, + "loss": 0.15549159049987793, + "step": 1968 + }, + { + "epoch": 0.26619349387410224, + "grad_norm": 1.4084358215332031, + "learning_rate": 2.6809618988420274e-05, + "loss": 0.2133331298828125, + "step": 1969 + }, + { + "epoch": 0.2663286861005492, + "grad_norm": 0.6391538977622986, + "learning_rate": 2.6805397758434647e-05, + "loss": 0.12550926208496094, + "step": 1970 + }, + { + "epoch": 0.2664638783269962, + "grad_norm": 2.349172592163086, + "learning_rate": 2.6801174070502248e-05, + "loss": 0.14751052856445312, + "step": 1971 + }, + { + "epoch": 0.2665990705534432, + "grad_norm": 1.127495288848877, + "learning_rate": 2.679694792550248e-05, + "loss": 0.2141246795654297, + "step": 1972 + }, + { + "epoch": 0.26673426277989015, + "grad_norm": 1.4714776277542114, + "learning_rate": 2.6792719324315248e-05, + "loss": 0.17395401000976562, + "step": 1973 + }, + { + "epoch": 0.26686945500633713, + "grad_norm": 3.0437843799591064, + "learning_rate": 2.678848826782096e-05, + "loss": 0.2095479965209961, + "step": 1974 + }, + { + "epoch": 0.2670046472327841, + "grad_norm": 1.2225019931793213, + "learning_rate": 2.678425475690055e-05, + "loss": 0.2036905288696289, + "step": 1975 + }, + { + "epoch": 0.2671398394592311, + "grad_norm": 1.1206014156341553, + "learning_rate": 2.6780018792435464e-05, + "loss": 0.19078588485717773, + "step": 1976 + }, + { + "epoch": 0.26727503168567807, + "grad_norm": 1.2350801229476929, + "learning_rate": 2.6775780375307645e-05, + "loss": 0.19570541381835938, + "step": 1977 + }, + { + "epoch": 0.26741022391212504, + "grad_norm": 1.7523033618927002, + "learning_rate": 2.6771539506399555e-05, + "loss": 0.182525634765625, + "step": 1978 + }, + { + "epoch": 0.267545416138572, + "grad_norm": 1.6022891998291016, + "learning_rate": 2.6767296186594165e-05, + "loss": 0.14725637435913086, + "step": 1979 + }, + { + "epoch": 0.267680608365019, + "grad_norm": 1.4451727867126465, + "learning_rate": 2.676305041677496e-05, + "loss": 0.1767730712890625, + "step": 1980 + }, + { + "epoch": 0.267815800591466, + "grad_norm": 2.3180975914001465, + "learning_rate": 2.675880219782593e-05, + "loss": 0.16197192668914795, + "step": 1981 + }, + { + "epoch": 0.26795099281791296, + "grad_norm": 1.615830898284912, + "learning_rate": 2.6754551530631575e-05, + "loss": 0.1683483123779297, + "step": 1982 + }, + { + "epoch": 0.26808618504435994, + "grad_norm": 0.9067396521568298, + "learning_rate": 2.6750298416076907e-05, + "loss": 0.20182037353515625, + "step": 1983 + }, + { + "epoch": 0.2682213772708069, + "grad_norm": 2.105661392211914, + "learning_rate": 2.674604285504745e-05, + "loss": 0.23011016845703125, + "step": 1984 + }, + { + "epoch": 0.2683565694972539, + "grad_norm": 1.848522424697876, + "learning_rate": 2.6741784848429235e-05, + "loss": 0.19468402862548828, + "step": 1985 + }, + { + "epoch": 0.2684917617237009, + "grad_norm": 1.1760467290878296, + "learning_rate": 2.67375243971088e-05, + "loss": 0.22147274017333984, + "step": 1986 + }, + { + "epoch": 0.26862695395014785, + "grad_norm": 2.125758171081543, + "learning_rate": 2.6733261501973192e-05, + "loss": 0.18086528778076172, + "step": 1987 + }, + { + "epoch": 0.26876214617659483, + "grad_norm": 2.056044578552246, + "learning_rate": 2.672899616390997e-05, + "loss": 0.15813064575195312, + "step": 1988 + }, + { + "epoch": 0.2688973384030418, + "grad_norm": 1.2650346755981445, + "learning_rate": 2.67247283838072e-05, + "loss": 0.2342967987060547, + "step": 1989 + }, + { + "epoch": 0.2690325306294888, + "grad_norm": 1.013774037361145, + "learning_rate": 2.6720458162553457e-05, + "loss": 0.1737499237060547, + "step": 1990 + }, + { + "epoch": 0.26916772285593576, + "grad_norm": 0.9324135184288025, + "learning_rate": 2.6716185501037822e-05, + "loss": 0.14194679260253906, + "step": 1991 + }, + { + "epoch": 0.26930291508238274, + "grad_norm": 1.2155331373214722, + "learning_rate": 2.671191040014989e-05, + "loss": 0.18551063537597656, + "step": 1992 + }, + { + "epoch": 0.2694381073088297, + "grad_norm": 2.0454673767089844, + "learning_rate": 2.6707632860779756e-05, + "loss": 0.17071914672851562, + "step": 1993 + }, + { + "epoch": 0.2695732995352767, + "grad_norm": 1.2182201147079468, + "learning_rate": 2.6703352883818024e-05, + "loss": 0.2146005630493164, + "step": 1994 + }, + { + "epoch": 0.2697084917617237, + "grad_norm": 1.9126328229904175, + "learning_rate": 2.6699070470155816e-05, + "loss": 0.1687793731689453, + "step": 1995 + }, + { + "epoch": 0.26984368398817066, + "grad_norm": 1.8489106893539429, + "learning_rate": 2.669478562068475e-05, + "loss": 0.20245933532714844, + "step": 1996 + }, + { + "epoch": 0.26997887621461764, + "grad_norm": 1.096756935119629, + "learning_rate": 2.6690498336296955e-05, + "loss": 0.17561301589012146, + "step": 1997 + }, + { + "epoch": 0.2701140684410646, + "grad_norm": 1.7402915954589844, + "learning_rate": 2.6686208617885057e-05, + "loss": 0.21321487426757812, + "step": 1998 + }, + { + "epoch": 0.2702492606675116, + "grad_norm": 0.9610304832458496, + "learning_rate": 2.668191646634221e-05, + "loss": 0.07543182373046875, + "step": 1999 + }, + { + "epoch": 0.27038445289395857, + "grad_norm": 2.4527642726898193, + "learning_rate": 2.667762188256206e-05, + "loss": 0.24045181274414062, + "step": 2000 + }, + { + "epoch": 0.27051964512040555, + "grad_norm": 0.9730849266052246, + "learning_rate": 2.6673324867438764e-05, + "loss": 0.14896011352539062, + "step": 2001 + }, + { + "epoch": 0.2706548373468526, + "grad_norm": 2.105949878692627, + "learning_rate": 2.666902542186698e-05, + "loss": 0.17322158813476562, + "step": 2002 + }, + { + "epoch": 0.27079002957329956, + "grad_norm": 2.5108237266540527, + "learning_rate": 2.666472354674187e-05, + "loss": 0.28958892822265625, + "step": 2003 + }, + { + "epoch": 0.27092522179974654, + "grad_norm": 2.3679802417755127, + "learning_rate": 2.666041924295912e-05, + "loss": 0.2319955825805664, + "step": 2004 + }, + { + "epoch": 0.2710604140261935, + "grad_norm": 1.2818371057510376, + "learning_rate": 2.6656112511414902e-05, + "loss": 0.1999378204345703, + "step": 2005 + }, + { + "epoch": 0.2711956062526405, + "grad_norm": 1.5476380586624146, + "learning_rate": 2.6651803353005896e-05, + "loss": 0.13533973693847656, + "step": 2006 + }, + { + "epoch": 0.2713307984790875, + "grad_norm": 1.0504987239837646, + "learning_rate": 2.66474917686293e-05, + "loss": 0.2556438446044922, + "step": 2007 + }, + { + "epoch": 0.27146599070553445, + "grad_norm": 2.778482437133789, + "learning_rate": 2.664317775918281e-05, + "loss": 0.18377113342285156, + "step": 2008 + }, + { + "epoch": 0.27160118293198143, + "grad_norm": 0.7494098544120789, + "learning_rate": 2.6638861325564615e-05, + "loss": 0.2092266082763672, + "step": 2009 + }, + { + "epoch": 0.2717363751584284, + "grad_norm": 1.8601988554000854, + "learning_rate": 2.6634542468673432e-05, + "loss": 0.18447113037109375, + "step": 2010 + }, + { + "epoch": 0.2718715673848754, + "grad_norm": 2.059854030609131, + "learning_rate": 2.663022118940846e-05, + "loss": 0.22228622436523438, + "step": 2011 + }, + { + "epoch": 0.27200675961132237, + "grad_norm": 1.053313970565796, + "learning_rate": 2.662589748866942e-05, + "loss": 0.18150997161865234, + "step": 2012 + }, + { + "epoch": 0.27214195183776935, + "grad_norm": 0.9462782740592957, + "learning_rate": 2.6621571367356522e-05, + "loss": 0.17937421798706055, + "step": 2013 + }, + { + "epoch": 0.2722771440642163, + "grad_norm": 5.025650501251221, + "learning_rate": 2.6617242826370495e-05, + "loss": 0.23069477081298828, + "step": 2014 + }, + { + "epoch": 0.2724123362906633, + "grad_norm": 0.6408719420433044, + "learning_rate": 2.661291186661256e-05, + "loss": 0.13713550567626953, + "step": 2015 + }, + { + "epoch": 0.2725475285171103, + "grad_norm": 5.929315567016602, + "learning_rate": 2.6608578488984444e-05, + "loss": 0.24613189697265625, + "step": 2016 + }, + { + "epoch": 0.27268272074355726, + "grad_norm": 1.7467080354690552, + "learning_rate": 2.6604242694388388e-05, + "loss": 0.220123291015625, + "step": 2017 + }, + { + "epoch": 0.27281791297000424, + "grad_norm": 3.739717960357666, + "learning_rate": 2.6599904483727116e-05, + "loss": 0.2046041488647461, + "step": 2018 + }, + { + "epoch": 0.2729531051964512, + "grad_norm": 2.6116576194763184, + "learning_rate": 2.6595563857903872e-05, + "loss": 0.17149639129638672, + "step": 2019 + }, + { + "epoch": 0.2730882974228982, + "grad_norm": 1.0223585367202759, + "learning_rate": 2.6591220817822405e-05, + "loss": 0.17108917236328125, + "step": 2020 + }, + { + "epoch": 0.2732234896493452, + "grad_norm": 1.5758193731307983, + "learning_rate": 2.658687536438694e-05, + "loss": 0.2181262969970703, + "step": 2021 + }, + { + "epoch": 0.27335868187579215, + "grad_norm": 1.0403705835342407, + "learning_rate": 2.6582527498502243e-05, + "loss": 0.1490325927734375, + "step": 2022 + }, + { + "epoch": 0.27349387410223913, + "grad_norm": 2.325801134109497, + "learning_rate": 2.6578177221073556e-05, + "loss": 0.163970947265625, + "step": 2023 + }, + { + "epoch": 0.2736290663286861, + "grad_norm": 2.478518009185791, + "learning_rate": 2.6573824533006628e-05, + "loss": 0.23021697998046875, + "step": 2024 + }, + { + "epoch": 0.2737642585551331, + "grad_norm": 1.0423294305801392, + "learning_rate": 2.6569469435207712e-05, + "loss": 0.1589512825012207, + "step": 2025 + }, + { + "epoch": 0.27389945078158007, + "grad_norm": 2.126283884048462, + "learning_rate": 2.656511192858356e-05, + "loss": 0.15384626388549805, + "step": 2026 + }, + { + "epoch": 0.27403464300802705, + "grad_norm": 1.1336771249771118, + "learning_rate": 2.6560752014041438e-05, + "loss": 0.16162919998168945, + "step": 2027 + }, + { + "epoch": 0.274169835234474, + "grad_norm": 1.238110899925232, + "learning_rate": 2.6556389692489098e-05, + "loss": 0.19629192352294922, + "step": 2028 + }, + { + "epoch": 0.274305027460921, + "grad_norm": 1.2082984447479248, + "learning_rate": 2.6552024964834795e-05, + "loss": 0.23764610290527344, + "step": 2029 + }, + { + "epoch": 0.274440219687368, + "grad_norm": 2.066488742828369, + "learning_rate": 2.6547657831987286e-05, + "loss": 0.23218154907226562, + "step": 2030 + }, + { + "epoch": 0.27457541191381496, + "grad_norm": 1.7499059438705444, + "learning_rate": 2.6543288294855843e-05, + "loss": 0.16686058044433594, + "step": 2031 + }, + { + "epoch": 0.27471060414026194, + "grad_norm": 1.5657683610916138, + "learning_rate": 2.653891635435022e-05, + "loss": 0.16902732849121094, + "step": 2032 + }, + { + "epoch": 0.2748457963667089, + "grad_norm": 1.5561699867248535, + "learning_rate": 2.653454201138068e-05, + "loss": 0.1523299217224121, + "step": 2033 + }, + { + "epoch": 0.2749809885931559, + "grad_norm": 1.3267085552215576, + "learning_rate": 2.653016526685798e-05, + "loss": 0.20015335083007812, + "step": 2034 + }, + { + "epoch": 0.2751161808196029, + "grad_norm": 3.6330716609954834, + "learning_rate": 2.6525786121693387e-05, + "loss": 0.23436641693115234, + "step": 2035 + }, + { + "epoch": 0.27525137304604985, + "grad_norm": 1.9537291526794434, + "learning_rate": 2.652140457679866e-05, + "loss": 0.17034339904785156, + "step": 2036 + }, + { + "epoch": 0.27538656527249683, + "grad_norm": 2.2563674449920654, + "learning_rate": 2.6517020633086064e-05, + "loss": 0.17695999145507812, + "step": 2037 + }, + { + "epoch": 0.2755217574989438, + "grad_norm": 1.1365547180175781, + "learning_rate": 2.6512634291468354e-05, + "loss": 0.18954849243164062, + "step": 2038 + }, + { + "epoch": 0.2756569497253908, + "grad_norm": 0.710971474647522, + "learning_rate": 2.6508245552858792e-05, + "loss": 0.13004636764526367, + "step": 2039 + }, + { + "epoch": 0.27579214195183777, + "grad_norm": 1.1256245374679565, + "learning_rate": 2.6503854418171133e-05, + "loss": 0.18889522552490234, + "step": 2040 + }, + { + "epoch": 0.27592733417828474, + "grad_norm": 1.2299513816833496, + "learning_rate": 2.6499460888319644e-05, + "loss": 0.09887552261352539, + "step": 2041 + }, + { + "epoch": 0.2760625264047317, + "grad_norm": 4.2272047996521, + "learning_rate": 2.6495064964219073e-05, + "loss": 0.25049734115600586, + "step": 2042 + }, + { + "epoch": 0.2761977186311787, + "grad_norm": 0.8845816850662231, + "learning_rate": 2.649066664678467e-05, + "loss": 0.1443471908569336, + "step": 2043 + }, + { + "epoch": 0.2763329108576257, + "grad_norm": 1.8426218032836914, + "learning_rate": 2.6486265936932205e-05, + "loss": 0.23305320739746094, + "step": 2044 + }, + { + "epoch": 0.27646810308407266, + "grad_norm": 1.6967228651046753, + "learning_rate": 2.6481862835577915e-05, + "loss": 0.22339248657226562, + "step": 2045 + }, + { + "epoch": 0.27660329531051964, + "grad_norm": 3.0935661792755127, + "learning_rate": 2.6477457343638557e-05, + "loss": 0.2148580551147461, + "step": 2046 + }, + { + "epoch": 0.2767384875369666, + "grad_norm": 3.3383357524871826, + "learning_rate": 2.647304946203137e-05, + "loss": 0.22873878479003906, + "step": 2047 + }, + { + "epoch": 0.2768736797634136, + "grad_norm": 1.2659884691238403, + "learning_rate": 2.6468639191674106e-05, + "loss": 0.14695262908935547, + "step": 2048 + }, + { + "epoch": 0.2770088719898606, + "grad_norm": 1.1179301738739014, + "learning_rate": 2.6464226533485007e-05, + "loss": 0.21669387817382812, + "step": 2049 + }, + { + "epoch": 0.27714406421630755, + "grad_norm": 1.5578583478927612, + "learning_rate": 2.6459811488382806e-05, + "loss": 0.2233123779296875, + "step": 2050 + }, + { + "epoch": 0.27727925644275453, + "grad_norm": 1.8321536779403687, + "learning_rate": 2.645539405728674e-05, + "loss": 0.20200347900390625, + "step": 2051 + }, + { + "epoch": 0.2774144486692015, + "grad_norm": 2.425370931625366, + "learning_rate": 2.6450974241116545e-05, + "loss": 0.16612529754638672, + "step": 2052 + }, + { + "epoch": 0.2775496408956485, + "grad_norm": 1.711129903793335, + "learning_rate": 2.644655204079245e-05, + "loss": 0.1820354461669922, + "step": 2053 + }, + { + "epoch": 0.27768483312209546, + "grad_norm": 1.1385349035263062, + "learning_rate": 2.6442127457235177e-05, + "loss": 0.16158390045166016, + "step": 2054 + }, + { + "epoch": 0.27782002534854244, + "grad_norm": 1.342136263847351, + "learning_rate": 2.6437700491365957e-05, + "loss": 0.1511087417602539, + "step": 2055 + }, + { + "epoch": 0.2779552175749894, + "grad_norm": 0.6681981086730957, + "learning_rate": 2.6433271144106495e-05, + "loss": 0.17515087127685547, + "step": 2056 + }, + { + "epoch": 0.2780904098014364, + "grad_norm": 2.1738009452819824, + "learning_rate": 2.6428839416379015e-05, + "loss": 0.20028305053710938, + "step": 2057 + }, + { + "epoch": 0.2782256020278834, + "grad_norm": 1.324000358581543, + "learning_rate": 2.642440530910622e-05, + "loss": 0.19054412841796875, + "step": 2058 + }, + { + "epoch": 0.27836079425433036, + "grad_norm": 1.2259124517440796, + "learning_rate": 2.6419968823211318e-05, + "loss": 0.1987910270690918, + "step": 2059 + }, + { + "epoch": 0.27849598648077734, + "grad_norm": 0.9730979800224304, + "learning_rate": 2.641552995961801e-05, + "loss": 0.11353349685668945, + "step": 2060 + }, + { + "epoch": 0.2786311787072243, + "grad_norm": 0.8566688299179077, + "learning_rate": 2.6411088719250484e-05, + "loss": 0.18774032592773438, + "step": 2061 + }, + { + "epoch": 0.2787663709336713, + "grad_norm": 1.7345997095108032, + "learning_rate": 2.6406645103033442e-05, + "loss": 0.20612430572509766, + "step": 2062 + }, + { + "epoch": 0.27890156316011827, + "grad_norm": 0.6053770780563354, + "learning_rate": 2.640219911189206e-05, + "loss": 0.13812801241874695, + "step": 2063 + }, + { + "epoch": 0.27903675538656525, + "grad_norm": 2.2172610759735107, + "learning_rate": 2.6397750746752015e-05, + "loss": 0.17090511322021484, + "step": 2064 + }, + { + "epoch": 0.27917194761301223, + "grad_norm": 1.883966326713562, + "learning_rate": 2.6393300008539488e-05, + "loss": 0.24287033081054688, + "step": 2065 + }, + { + "epoch": 0.2793071398394592, + "grad_norm": 3.5898597240448, + "learning_rate": 2.6388846898181143e-05, + "loss": 0.26644325256347656, + "step": 2066 + }, + { + "epoch": 0.2794423320659062, + "grad_norm": 1.5288238525390625, + "learning_rate": 2.6384391416604142e-05, + "loss": 0.21924781799316406, + "step": 2067 + }, + { + "epoch": 0.27957752429235316, + "grad_norm": 3.033341407775879, + "learning_rate": 2.6379933564736136e-05, + "loss": 0.16727256774902344, + "step": 2068 + }, + { + "epoch": 0.27971271651880014, + "grad_norm": 2.6416549682617188, + "learning_rate": 2.637547334350528e-05, + "loss": 0.18758773803710938, + "step": 2069 + }, + { + "epoch": 0.2798479087452472, + "grad_norm": 0.874432384967804, + "learning_rate": 2.637101075384021e-05, + "loss": 0.18726778030395508, + "step": 2070 + }, + { + "epoch": 0.27998310097169415, + "grad_norm": 1.4221959114074707, + "learning_rate": 2.636654579667006e-05, + "loss": 0.21475791931152344, + "step": 2071 + }, + { + "epoch": 0.28011829319814113, + "grad_norm": 1.5142220258712769, + "learning_rate": 2.6362078472924467e-05, + "loss": 0.23212432861328125, + "step": 2072 + }, + { + "epoch": 0.2802534854245881, + "grad_norm": 1.2241703271865845, + "learning_rate": 2.6357608783533545e-05, + "loss": 0.2368154525756836, + "step": 2073 + }, + { + "epoch": 0.2803886776510351, + "grad_norm": 4.463649749755859, + "learning_rate": 2.6353136729427907e-05, + "loss": 0.23796844482421875, + "step": 2074 + }, + { + "epoch": 0.28052386987748207, + "grad_norm": 3.8595693111419678, + "learning_rate": 2.6348662311538657e-05, + "loss": 0.22257232666015625, + "step": 2075 + }, + { + "epoch": 0.28065906210392905, + "grad_norm": 2.0826025009155273, + "learning_rate": 2.6344185530797398e-05, + "loss": 0.2516956329345703, + "step": 2076 + }, + { + "epoch": 0.280794254330376, + "grad_norm": 1.2585065364837646, + "learning_rate": 2.633970638813622e-05, + "loss": 0.1693572998046875, + "step": 2077 + }, + { + "epoch": 0.280929446556823, + "grad_norm": 2.590708017349243, + "learning_rate": 2.6335224884487698e-05, + "loss": 0.2310924530029297, + "step": 2078 + }, + { + "epoch": 0.28106463878327, + "grad_norm": 1.8109679222106934, + "learning_rate": 2.6330741020784905e-05, + "loss": 0.1899118423461914, + "step": 2079 + }, + { + "epoch": 0.28119983100971696, + "grad_norm": 2.415010452270508, + "learning_rate": 2.6326254797961415e-05, + "loss": 0.19678401947021484, + "step": 2080 + }, + { + "epoch": 0.28133502323616394, + "grad_norm": 0.8185734152793884, + "learning_rate": 2.6321766216951273e-05, + "loss": 0.1306161880493164, + "step": 2081 + }, + { + "epoch": 0.2814702154626109, + "grad_norm": 1.7102469205856323, + "learning_rate": 2.631727527868903e-05, + "loss": 0.14808940887451172, + "step": 2082 + }, + { + "epoch": 0.2816054076890579, + "grad_norm": 1.7741369009017944, + "learning_rate": 2.6312781984109727e-05, + "loss": 0.24209308624267578, + "step": 2083 + }, + { + "epoch": 0.2817405999155049, + "grad_norm": 2.03049373626709, + "learning_rate": 2.6308286334148882e-05, + "loss": 0.204376220703125, + "step": 2084 + }, + { + "epoch": 0.28187579214195185, + "grad_norm": 1.3151803016662598, + "learning_rate": 2.630378832974252e-05, + "loss": 0.21219635009765625, + "step": 2085 + }, + { + "epoch": 0.28201098436839883, + "grad_norm": 1.9331953525543213, + "learning_rate": 2.6299287971827154e-05, + "loss": 0.17891645431518555, + "step": 2086 + }, + { + "epoch": 0.2821461765948458, + "grad_norm": 0.914463460445404, + "learning_rate": 2.629478526133977e-05, + "loss": 0.18464088439941406, + "step": 2087 + }, + { + "epoch": 0.2822813688212928, + "grad_norm": 1.1610910892486572, + "learning_rate": 2.6290280199217867e-05, + "loss": 0.22725963592529297, + "step": 2088 + }, + { + "epoch": 0.28241656104773977, + "grad_norm": 0.8498688340187073, + "learning_rate": 2.6285772786399424e-05, + "loss": 0.1393585205078125, + "step": 2089 + }, + { + "epoch": 0.28255175327418675, + "grad_norm": 1.1159546375274658, + "learning_rate": 2.6281263023822894e-05, + "loss": 0.18888092041015625, + "step": 2090 + }, + { + "epoch": 0.2826869455006337, + "grad_norm": 0.9358221888542175, + "learning_rate": 2.627675091242725e-05, + "loss": 0.1728057861328125, + "step": 2091 + }, + { + "epoch": 0.2828221377270807, + "grad_norm": 0.8911249041557312, + "learning_rate": 2.627223645315193e-05, + "loss": 0.1833019256591797, + "step": 2092 + }, + { + "epoch": 0.2829573299535277, + "grad_norm": 0.6716177463531494, + "learning_rate": 2.6267719646936868e-05, + "loss": 0.15749549865722656, + "step": 2093 + }, + { + "epoch": 0.28309252217997466, + "grad_norm": 0.5911497473716736, + "learning_rate": 2.626320049472249e-05, + "loss": 0.1805562973022461, + "step": 2094 + }, + { + "epoch": 0.28322771440642164, + "grad_norm": 3.019662857055664, + "learning_rate": 2.6258678997449705e-05, + "loss": 0.2205810546875, + "step": 2095 + }, + { + "epoch": 0.2833629066328686, + "grad_norm": 1.71770441532135, + "learning_rate": 2.6254155156059912e-05, + "loss": 0.21925830841064453, + "step": 2096 + }, + { + "epoch": 0.2834980988593156, + "grad_norm": 2.4266555309295654, + "learning_rate": 2.6249628971495006e-05, + "loss": 0.19247817993164062, + "step": 2097 + }, + { + "epoch": 0.2836332910857626, + "grad_norm": 2.4780919551849365, + "learning_rate": 2.6245100444697353e-05, + "loss": 0.19807052612304688, + "step": 2098 + }, + { + "epoch": 0.28376848331220955, + "grad_norm": 1.7566062211990356, + "learning_rate": 2.6240569576609824e-05, + "loss": 0.17220664024353027, + "step": 2099 + }, + { + "epoch": 0.28390367553865653, + "grad_norm": 0.947040855884552, + "learning_rate": 2.623603636817577e-05, + "loss": 0.15949058532714844, + "step": 2100 + }, + { + "epoch": 0.2840388677651035, + "grad_norm": 1.0521942377090454, + "learning_rate": 2.6231500820339024e-05, + "loss": 0.15768814086914062, + "step": 2101 + }, + { + "epoch": 0.2841740599915505, + "grad_norm": 1.3224241733551025, + "learning_rate": 2.6226962934043913e-05, + "loss": 0.17158126831054688, + "step": 2102 + }, + { + "epoch": 0.28430925221799747, + "grad_norm": 0.967298686504364, + "learning_rate": 2.622242271023525e-05, + "loss": 0.14494800567626953, + "step": 2103 + }, + { + "epoch": 0.28444444444444444, + "grad_norm": 1.3841006755828857, + "learning_rate": 2.6217880149858333e-05, + "loss": 0.15308713912963867, + "step": 2104 + }, + { + "epoch": 0.2845796366708914, + "grad_norm": 1.7650747299194336, + "learning_rate": 2.621333525385895e-05, + "loss": 0.21657776832580566, + "step": 2105 + }, + { + "epoch": 0.2847148288973384, + "grad_norm": 2.175161361694336, + "learning_rate": 2.6208788023183366e-05, + "loss": 0.2273712158203125, + "step": 2106 + }, + { + "epoch": 0.2848500211237854, + "grad_norm": 2.157089948654175, + "learning_rate": 2.6204238458778346e-05, + "loss": 0.28598785400390625, + "step": 2107 + }, + { + "epoch": 0.28498521335023236, + "grad_norm": 0.8159327507019043, + "learning_rate": 2.619968656159113e-05, + "loss": 0.1382458209991455, + "step": 2108 + }, + { + "epoch": 0.28512040557667934, + "grad_norm": 2.7145349979400635, + "learning_rate": 2.6195132332569445e-05, + "loss": 0.2370128631591797, + "step": 2109 + }, + { + "epoch": 0.2852555978031263, + "grad_norm": 0.8647373914718628, + "learning_rate": 2.619057577266151e-05, + "loss": 0.18473148345947266, + "step": 2110 + }, + { + "epoch": 0.2853907900295733, + "grad_norm": 1.1508220434188843, + "learning_rate": 2.6186016882816027e-05, + "loss": 0.189117431640625, + "step": 2111 + }, + { + "epoch": 0.28552598225602027, + "grad_norm": 1.8512518405914307, + "learning_rate": 2.6181455663982175e-05, + "loss": 0.19964981079101562, + "step": 2112 + }, + { + "epoch": 0.28566117448246725, + "grad_norm": 1.2716434001922607, + "learning_rate": 2.6176892117109628e-05, + "loss": 0.2062397003173828, + "step": 2113 + }, + { + "epoch": 0.28579636670891423, + "grad_norm": 1.1229850053787231, + "learning_rate": 2.617232624314854e-05, + "loss": 0.20491981506347656, + "step": 2114 + }, + { + "epoch": 0.2859315589353612, + "grad_norm": 0.8642187118530273, + "learning_rate": 2.616775804304955e-05, + "loss": 0.17128610610961914, + "step": 2115 + }, + { + "epoch": 0.2860667511618082, + "grad_norm": 0.7997422814369202, + "learning_rate": 2.616318751776378e-05, + "loss": 0.11554145812988281, + "step": 2116 + }, + { + "epoch": 0.28620194338825516, + "grad_norm": 0.8639920949935913, + "learning_rate": 2.615861466824284e-05, + "loss": 0.15627312660217285, + "step": 2117 + }, + { + "epoch": 0.28633713561470214, + "grad_norm": 0.7817961573600769, + "learning_rate": 2.6154039495438825e-05, + "loss": 0.16451644897460938, + "step": 2118 + }, + { + "epoch": 0.2864723278411491, + "grad_norm": 2.4173061847686768, + "learning_rate": 2.6149462000304302e-05, + "loss": 0.21515464782714844, + "step": 2119 + }, + { + "epoch": 0.2866075200675961, + "grad_norm": 1.2405390739440918, + "learning_rate": 2.6144882183792335e-05, + "loss": 0.1426839828491211, + "step": 2120 + }, + { + "epoch": 0.2867427122940431, + "grad_norm": 1.2057969570159912, + "learning_rate": 2.6140300046856468e-05, + "loss": 0.1967940330505371, + "step": 2121 + }, + { + "epoch": 0.28687790452049006, + "grad_norm": 2.0398120880126953, + "learning_rate": 2.6135715590450722e-05, + "loss": 0.1947154998779297, + "step": 2122 + }, + { + "epoch": 0.28701309674693704, + "grad_norm": 1.1911096572875977, + "learning_rate": 2.6131128815529608e-05, + "loss": 0.22559738159179688, + "step": 2123 + }, + { + "epoch": 0.287148288973384, + "grad_norm": 2.6959095001220703, + "learning_rate": 2.6126539723048115e-05, + "loss": 0.25240516662597656, + "step": 2124 + }, + { + "epoch": 0.287283481199831, + "grad_norm": 1.7841055393218994, + "learning_rate": 2.612194831396172e-05, + "loss": 0.15496039390563965, + "step": 2125 + }, + { + "epoch": 0.28741867342627797, + "grad_norm": 1.9539332389831543, + "learning_rate": 2.611735458922637e-05, + "loss": 0.2016277313232422, + "step": 2126 + }, + { + "epoch": 0.28755386565272495, + "grad_norm": 1.4427502155303955, + "learning_rate": 2.6112758549798515e-05, + "loss": 0.19338607788085938, + "step": 2127 + }, + { + "epoch": 0.2876890578791719, + "grad_norm": 1.5410175323486328, + "learning_rate": 2.610816019663507e-05, + "loss": 0.24729251861572266, + "step": 2128 + }, + { + "epoch": 0.2878242501056189, + "grad_norm": 1.2036269903182983, + "learning_rate": 2.6103559530693436e-05, + "loss": 0.23131179809570312, + "step": 2129 + }, + { + "epoch": 0.2879594423320659, + "grad_norm": 1.691206455230713, + "learning_rate": 2.6098956552931495e-05, + "loss": 0.20843935012817383, + "step": 2130 + }, + { + "epoch": 0.28809463455851286, + "grad_norm": 1.5262420177459717, + "learning_rate": 2.6094351264307613e-05, + "loss": 0.20964813232421875, + "step": 2131 + }, + { + "epoch": 0.28822982678495984, + "grad_norm": 1.7162680625915527, + "learning_rate": 2.6089743665780635e-05, + "loss": 0.2512989044189453, + "step": 2132 + }, + { + "epoch": 0.2883650190114068, + "grad_norm": 2.3404204845428467, + "learning_rate": 2.6085133758309887e-05, + "loss": 0.20935297012329102, + "step": 2133 + }, + { + "epoch": 0.2885002112378538, + "grad_norm": 1.238438606262207, + "learning_rate": 2.6080521542855182e-05, + "loss": 0.2020893096923828, + "step": 2134 + }, + { + "epoch": 0.2886354034643008, + "grad_norm": 1.7752658128738403, + "learning_rate": 2.60759070203768e-05, + "loss": 0.17306995391845703, + "step": 2135 + }, + { + "epoch": 0.28877059569074776, + "grad_norm": 1.4922157526016235, + "learning_rate": 2.607129019183551e-05, + "loss": 0.13219031691551208, + "step": 2136 + }, + { + "epoch": 0.28890578791719473, + "grad_norm": 1.0802258253097534, + "learning_rate": 2.6066671058192566e-05, + "loss": 0.16423606872558594, + "step": 2137 + }, + { + "epoch": 0.28904098014364177, + "grad_norm": 1.4209825992584229, + "learning_rate": 2.606204962040969e-05, + "loss": 0.20698165893554688, + "step": 2138 + }, + { + "epoch": 0.28917617237008875, + "grad_norm": 1.1673367023468018, + "learning_rate": 2.6057425879449095e-05, + "loss": 0.1962413787841797, + "step": 2139 + }, + { + "epoch": 0.2893113645965357, + "grad_norm": 2.984081268310547, + "learning_rate": 2.605279983627347e-05, + "loss": 0.17170333862304688, + "step": 2140 + }, + { + "epoch": 0.2894465568229827, + "grad_norm": 1.107867956161499, + "learning_rate": 2.6048171491845974e-05, + "loss": 0.20021629333496094, + "step": 2141 + }, + { + "epoch": 0.2895817490494297, + "grad_norm": 2.630889892578125, + "learning_rate": 2.604354084713026e-05, + "loss": 0.16148757934570312, + "step": 2142 + }, + { + "epoch": 0.28971694127587666, + "grad_norm": 2.610772132873535, + "learning_rate": 2.6038907903090446e-05, + "loss": 0.1705150604248047, + "step": 2143 + }, + { + "epoch": 0.28985213350232364, + "grad_norm": 1.5406900644302368, + "learning_rate": 2.6034272660691143e-05, + "loss": 0.19655418395996094, + "step": 2144 + }, + { + "epoch": 0.2899873257287706, + "grad_norm": 3.0631935596466064, + "learning_rate": 2.6029635120897434e-05, + "loss": 0.1822071075439453, + "step": 2145 + }, + { + "epoch": 0.2901225179552176, + "grad_norm": 1.0158112049102783, + "learning_rate": 2.6024995284674867e-05, + "loss": 0.21086883544921875, + "step": 2146 + }, + { + "epoch": 0.2902577101816646, + "grad_norm": 1.1145544052124023, + "learning_rate": 2.6020353152989496e-05, + "loss": 0.2055506706237793, + "step": 2147 + }, + { + "epoch": 0.29039290240811155, + "grad_norm": 1.0146127939224243, + "learning_rate": 2.601570872680783e-05, + "loss": 0.15993070602416992, + "step": 2148 + }, + { + "epoch": 0.29052809463455853, + "grad_norm": 1.011948585510254, + "learning_rate": 2.6011062007096857e-05, + "loss": 0.1594257354736328, + "step": 2149 + }, + { + "epoch": 0.2906632868610055, + "grad_norm": 1.379586935043335, + "learning_rate": 2.6006412994824067e-05, + "loss": 0.2632780075073242, + "step": 2150 + }, + { + "epoch": 0.2907984790874525, + "grad_norm": 1.061566710472107, + "learning_rate": 2.6001761690957388e-05, + "loss": 0.1822139024734497, + "step": 2151 + }, + { + "epoch": 0.29093367131389947, + "grad_norm": 1.3806735277175903, + "learning_rate": 2.5997108096465263e-05, + "loss": 0.1765270233154297, + "step": 2152 + }, + { + "epoch": 0.29106886354034645, + "grad_norm": 1.530015230178833, + "learning_rate": 2.599245221231659e-05, + "loss": 0.24334716796875, + "step": 2153 + }, + { + "epoch": 0.2912040557667934, + "grad_norm": 0.8990092277526855, + "learning_rate": 2.5987794039480743e-05, + "loss": 0.15089893341064453, + "step": 2154 + }, + { + "epoch": 0.2913392479932404, + "grad_norm": 1.0194252729415894, + "learning_rate": 2.5983133578927584e-05, + "loss": 0.17722558975219727, + "step": 2155 + }, + { + "epoch": 0.2914744402196874, + "grad_norm": 0.8056871891021729, + "learning_rate": 2.5978470831627444e-05, + "loss": 0.16796493530273438, + "step": 2156 + }, + { + "epoch": 0.29160963244613436, + "grad_norm": 0.8277191519737244, + "learning_rate": 2.597380579855113e-05, + "loss": 0.1648712158203125, + "step": 2157 + }, + { + "epoch": 0.29174482467258134, + "grad_norm": 1.2488585710525513, + "learning_rate": 2.5969138480669936e-05, + "loss": 0.14107894897460938, + "step": 2158 + }, + { + "epoch": 0.2918800168990283, + "grad_norm": 1.1313446760177612, + "learning_rate": 2.5964468878955614e-05, + "loss": 0.2240290641784668, + "step": 2159 + }, + { + "epoch": 0.2920152091254753, + "grad_norm": 2.055948257446289, + "learning_rate": 2.5959796994380397e-05, + "loss": 0.2697296142578125, + "step": 2160 + }, + { + "epoch": 0.2921504013519223, + "grad_norm": 1.0354812145233154, + "learning_rate": 2.5955122827917004e-05, + "loss": 0.17512941360473633, + "step": 2161 + }, + { + "epoch": 0.29228559357836925, + "grad_norm": 1.991715669631958, + "learning_rate": 2.595044638053862e-05, + "loss": 0.22813034057617188, + "step": 2162 + }, + { + "epoch": 0.29242078580481623, + "grad_norm": 1.2075145244598389, + "learning_rate": 2.59457676532189e-05, + "loss": 0.20118427276611328, + "step": 2163 + }, + { + "epoch": 0.2925559780312632, + "grad_norm": 1.4355688095092773, + "learning_rate": 2.594108664693199e-05, + "loss": 0.14262676239013672, + "step": 2164 + }, + { + "epoch": 0.2926911702577102, + "grad_norm": 0.989732027053833, + "learning_rate": 2.5936403362652494e-05, + "loss": 0.14153289794921875, + "step": 2165 + }, + { + "epoch": 0.29282636248415717, + "grad_norm": 0.8272646069526672, + "learning_rate": 2.5931717801355497e-05, + "loss": 0.16473770141601562, + "step": 2166 + }, + { + "epoch": 0.29296155471060414, + "grad_norm": 1.2441359758377075, + "learning_rate": 2.5927029964016556e-05, + "loss": 0.2236347198486328, + "step": 2167 + }, + { + "epoch": 0.2930967469370511, + "grad_norm": 1.6876704692840576, + "learning_rate": 2.592233985161171e-05, + "loss": 0.25666236877441406, + "step": 2168 + }, + { + "epoch": 0.2932319391634981, + "grad_norm": 2.8428456783294678, + "learning_rate": 2.5917647465117463e-05, + "loss": 0.18865680694580078, + "step": 2169 + }, + { + "epoch": 0.2933671313899451, + "grad_norm": 2.0724146366119385, + "learning_rate": 2.591295280551079e-05, + "loss": 0.20254135131835938, + "step": 2170 + }, + { + "epoch": 0.29350232361639206, + "grad_norm": 1.1036986112594604, + "learning_rate": 2.590825587376915e-05, + "loss": 0.2016429901123047, + "step": 2171 + }, + { + "epoch": 0.29363751584283904, + "grad_norm": 1.9135407209396362, + "learning_rate": 2.5903556670870464e-05, + "loss": 0.19191265106201172, + "step": 2172 + }, + { + "epoch": 0.293772708069286, + "grad_norm": 1.7431026697158813, + "learning_rate": 2.589885519779314e-05, + "loss": 0.18315362930297852, + "step": 2173 + }, + { + "epoch": 0.293907900295733, + "grad_norm": 2.34696626663208, + "learning_rate": 2.5894151455516043e-05, + "loss": 0.18303298950195312, + "step": 2174 + }, + { + "epoch": 0.29404309252217997, + "grad_norm": 3.4167516231536865, + "learning_rate": 2.5889445445018513e-05, + "loss": 0.2373952865600586, + "step": 2175 + }, + { + "epoch": 0.29417828474862695, + "grad_norm": 1.6996597051620483, + "learning_rate": 2.5884737167280375e-05, + "loss": 0.21302413940429688, + "step": 2176 + }, + { + "epoch": 0.29431347697507393, + "grad_norm": 2.1689019203186035, + "learning_rate": 2.5880026623281914e-05, + "loss": 0.180267333984375, + "step": 2177 + }, + { + "epoch": 0.2944486692015209, + "grad_norm": 1.6127623319625854, + "learning_rate": 2.5875313814003892e-05, + "loss": 0.22878265380859375, + "step": 2178 + }, + { + "epoch": 0.2945838614279679, + "grad_norm": 1.3531789779663086, + "learning_rate": 2.587059874042754e-05, + "loss": 0.13474464416503906, + "step": 2179 + }, + { + "epoch": 0.29471905365441486, + "grad_norm": 2.838716745376587, + "learning_rate": 2.5865881403534557e-05, + "loss": 0.1846942901611328, + "step": 2180 + }, + { + "epoch": 0.29485424588086184, + "grad_norm": 3.6311187744140625, + "learning_rate": 2.5861161804307124e-05, + "loss": 0.218994140625, + "step": 2181 + }, + { + "epoch": 0.2949894381073088, + "grad_norm": 1.2332576513290405, + "learning_rate": 2.5856439943727886e-05, + "loss": 0.1806640625, + "step": 2182 + }, + { + "epoch": 0.2951246303337558, + "grad_norm": 2.6649587154388428, + "learning_rate": 2.5851715822779954e-05, + "loss": 0.2530508041381836, + "step": 2183 + }, + { + "epoch": 0.2952598225602028, + "grad_norm": 1.1564098596572876, + "learning_rate": 2.5846989442446926e-05, + "loss": 0.2039661407470703, + "step": 2184 + }, + { + "epoch": 0.29539501478664976, + "grad_norm": 1.3246471881866455, + "learning_rate": 2.584226080371285e-05, + "loss": 0.1866436004638672, + "step": 2185 + }, + { + "epoch": 0.29553020701309674, + "grad_norm": 1.6912167072296143, + "learning_rate": 2.5837529907562258e-05, + "loss": 0.17899036407470703, + "step": 2186 + }, + { + "epoch": 0.2956653992395437, + "grad_norm": 1.0368320941925049, + "learning_rate": 2.5832796754980138e-05, + "loss": 0.1720564365386963, + "step": 2187 + }, + { + "epoch": 0.2958005914659907, + "grad_norm": 1.5400532484054565, + "learning_rate": 2.5828061346951974e-05, + "loss": 0.2284374237060547, + "step": 2188 + }, + { + "epoch": 0.29593578369243767, + "grad_norm": 1.1747688055038452, + "learning_rate": 2.5823323684463693e-05, + "loss": 0.1628575325012207, + "step": 2189 + }, + { + "epoch": 0.29607097591888465, + "grad_norm": 1.5635459423065186, + "learning_rate": 2.5818583768501708e-05, + "loss": 0.1734914779663086, + "step": 2190 + }, + { + "epoch": 0.2962061681453316, + "grad_norm": 2.3111798763275146, + "learning_rate": 2.5813841600052887e-05, + "loss": 0.179046630859375, + "step": 2191 + }, + { + "epoch": 0.2963413603717786, + "grad_norm": 2.208799362182617, + "learning_rate": 2.580909718010458e-05, + "loss": 0.22995758056640625, + "step": 2192 + }, + { + "epoch": 0.2964765525982256, + "grad_norm": 1.7966690063476562, + "learning_rate": 2.58043505096446e-05, + "loss": 0.18059682846069336, + "step": 2193 + }, + { + "epoch": 0.29661174482467256, + "grad_norm": 2.3113040924072266, + "learning_rate": 2.5799601589661223e-05, + "loss": 0.23362350463867188, + "step": 2194 + }, + { + "epoch": 0.29674693705111954, + "grad_norm": 0.5754366517066956, + "learning_rate": 2.579485042114321e-05, + "loss": 0.11508560180664062, + "step": 2195 + }, + { + "epoch": 0.2968821292775665, + "grad_norm": 1.8896969556808472, + "learning_rate": 2.5790097005079766e-05, + "loss": 0.1593233346939087, + "step": 2196 + }, + { + "epoch": 0.2970173215040135, + "grad_norm": 2.7346065044403076, + "learning_rate": 2.5785341342460595e-05, + "loss": 0.1968708038330078, + "step": 2197 + }, + { + "epoch": 0.2971525137304605, + "grad_norm": 1.0837639570236206, + "learning_rate": 2.5780583434275837e-05, + "loss": 0.15680599212646484, + "step": 2198 + }, + { + "epoch": 0.29728770595690746, + "grad_norm": 1.816139817237854, + "learning_rate": 2.577582328151612e-05, + "loss": 0.1582932472229004, + "step": 2199 + }, + { + "epoch": 0.29742289818335443, + "grad_norm": 1.0152108669281006, + "learning_rate": 2.5771060885172532e-05, + "loss": 0.15219879150390625, + "step": 2200 + }, + { + "epoch": 0.2975580904098014, + "grad_norm": 3.9077744483947754, + "learning_rate": 2.5766296246236628e-05, + "loss": 0.22844886779785156, + "step": 2201 + }, + { + "epoch": 0.2976932826362484, + "grad_norm": 1.4241547584533691, + "learning_rate": 2.5761529365700437e-05, + "loss": 0.1748981475830078, + "step": 2202 + }, + { + "epoch": 0.29782847486269537, + "grad_norm": 2.3375051021575928, + "learning_rate": 2.5756760244556445e-05, + "loss": 0.14005756378173828, + "step": 2203 + }, + { + "epoch": 0.29796366708914235, + "grad_norm": 1.7697644233703613, + "learning_rate": 2.5751988883797603e-05, + "loss": 0.15149545669555664, + "step": 2204 + }, + { + "epoch": 0.2980988593155893, + "grad_norm": 6.114085674285889, + "learning_rate": 2.574721528441734e-05, + "loss": 0.28840065002441406, + "step": 2205 + }, + { + "epoch": 0.29823405154203636, + "grad_norm": 3.2697112560272217, + "learning_rate": 2.5742439447409545e-05, + "loss": 0.2531318664550781, + "step": 2206 + }, + { + "epoch": 0.29836924376848334, + "grad_norm": 2.8559587001800537, + "learning_rate": 2.5737661373768568e-05, + "loss": 0.23653221130371094, + "step": 2207 + }, + { + "epoch": 0.2985044359949303, + "grad_norm": 1.164550542831421, + "learning_rate": 2.5732881064489237e-05, + "loss": 0.17854595184326172, + "step": 2208 + }, + { + "epoch": 0.2986396282213773, + "grad_norm": 1.0126031637191772, + "learning_rate": 2.572809852056683e-05, + "loss": 0.15370619297027588, + "step": 2209 + }, + { + "epoch": 0.2987748204478243, + "grad_norm": 1.3241701126098633, + "learning_rate": 2.572331374299711e-05, + "loss": 0.2446765899658203, + "step": 2210 + }, + { + "epoch": 0.29891001267427125, + "grad_norm": 1.987065076828003, + "learning_rate": 2.5718526732776276e-05, + "loss": 0.2176513671875, + "step": 2211 + }, + { + "epoch": 0.29904520490071823, + "grad_norm": 2.3771207332611084, + "learning_rate": 2.5713737490901023e-05, + "loss": 0.23114395141601562, + "step": 2212 + }, + { + "epoch": 0.2991803971271652, + "grad_norm": 2.6928560733795166, + "learning_rate": 2.570894601836849e-05, + "loss": 0.19826126098632812, + "step": 2213 + }, + { + "epoch": 0.2993155893536122, + "grad_norm": 3.1843361854553223, + "learning_rate": 2.5704152316176287e-05, + "loss": 0.19635391235351562, + "step": 2214 + }, + { + "epoch": 0.29945078158005917, + "grad_norm": 3.18815541267395, + "learning_rate": 2.5699356385322487e-05, + "loss": 0.25201416015625, + "step": 2215 + }, + { + "epoch": 0.29958597380650615, + "grad_norm": 1.2190203666687012, + "learning_rate": 2.5694558226805643e-05, + "loss": 0.16676998138427734, + "step": 2216 + }, + { + "epoch": 0.2997211660329531, + "grad_norm": 0.8309981226921082, + "learning_rate": 2.568975784162474e-05, + "loss": 0.18161582946777344, + "step": 2217 + }, + { + "epoch": 0.2998563582594001, + "grad_norm": 0.6892561316490173, + "learning_rate": 2.5684955230779245e-05, + "loss": 0.12321281433105469, + "step": 2218 + }, + { + "epoch": 0.2999915504858471, + "grad_norm": 1.2423349618911743, + "learning_rate": 2.5680150395269096e-05, + "loss": 0.2083759307861328, + "step": 2219 + }, + { + "epoch": 0.30012674271229406, + "grad_norm": 1.6214042901992798, + "learning_rate": 2.5675343336094683e-05, + "loss": 0.1895122528076172, + "step": 2220 + }, + { + "epoch": 0.30026193493874104, + "grad_norm": 0.8464069366455078, + "learning_rate": 2.5670534054256855e-05, + "loss": 0.17712879180908203, + "step": 2221 + }, + { + "epoch": 0.300397127165188, + "grad_norm": 0.580012321472168, + "learning_rate": 2.5665722550756937e-05, + "loss": 0.1158294677734375, + "step": 2222 + }, + { + "epoch": 0.300532319391635, + "grad_norm": 2.591585159301758, + "learning_rate": 2.5660908826596707e-05, + "loss": 0.22759437561035156, + "step": 2223 + }, + { + "epoch": 0.300667511618082, + "grad_norm": 3.2795629501342773, + "learning_rate": 2.5656092882778413e-05, + "loss": 0.23479270935058594, + "step": 2224 + }, + { + "epoch": 0.30080270384452895, + "grad_norm": 1.3287330865859985, + "learning_rate": 2.565127472030475e-05, + "loss": 0.20707225799560547, + "step": 2225 + }, + { + "epoch": 0.30093789607097593, + "grad_norm": 1.2254912853240967, + "learning_rate": 2.5646454340178894e-05, + "loss": 0.13736248016357422, + "step": 2226 + }, + { + "epoch": 0.3010730882974229, + "grad_norm": 2.279569149017334, + "learning_rate": 2.564163174340447e-05, + "loss": 0.16182327270507812, + "step": 2227 + }, + { + "epoch": 0.3012082805238699, + "grad_norm": 1.6269927024841309, + "learning_rate": 2.5636806930985565e-05, + "loss": 0.22962188720703125, + "step": 2228 + }, + { + "epoch": 0.30134347275031687, + "grad_norm": 1.3123782873153687, + "learning_rate": 2.5631979903926738e-05, + "loss": 0.18151378631591797, + "step": 2229 + }, + { + "epoch": 0.30147866497676384, + "grad_norm": 1.6872673034667969, + "learning_rate": 2.5627150663233e-05, + "loss": 0.18174314498901367, + "step": 2230 + }, + { + "epoch": 0.3016138572032108, + "grad_norm": 1.3398396968841553, + "learning_rate": 2.5622319209909817e-05, + "loss": 0.17935657501220703, + "step": 2231 + }, + { + "epoch": 0.3017490494296578, + "grad_norm": 1.0318453311920166, + "learning_rate": 2.5617485544963135e-05, + "loss": 0.1580047607421875, + "step": 2232 + }, + { + "epoch": 0.3018842416561048, + "grad_norm": 1.3219295740127563, + "learning_rate": 2.561264966939934e-05, + "loss": 0.1755967140197754, + "step": 2233 + }, + { + "epoch": 0.30201943388255176, + "grad_norm": 1.2998502254486084, + "learning_rate": 2.5607811584225294e-05, + "loss": 0.17787599563598633, + "step": 2234 + }, + { + "epoch": 0.30215462610899874, + "grad_norm": 0.6230987906455994, + "learning_rate": 2.5602971290448305e-05, + "loss": 0.14884185791015625, + "step": 2235 + }, + { + "epoch": 0.3022898183354457, + "grad_norm": 4.1620354652404785, + "learning_rate": 2.5598128789076152e-05, + "loss": 0.1973257064819336, + "step": 2236 + }, + { + "epoch": 0.3024250105618927, + "grad_norm": 2.0815341472625732, + "learning_rate": 2.559328408111707e-05, + "loss": 0.1889514923095703, + "step": 2237 + }, + { + "epoch": 0.30256020278833967, + "grad_norm": 3.2900888919830322, + "learning_rate": 2.5588437167579755e-05, + "loss": 0.23741531372070312, + "step": 2238 + }, + { + "epoch": 0.30269539501478665, + "grad_norm": 2.445986747741699, + "learning_rate": 2.558358804947335e-05, + "loss": 0.2249622344970703, + "step": 2239 + }, + { + "epoch": 0.30283058724123363, + "grad_norm": 1.1287540197372437, + "learning_rate": 2.557873672780748e-05, + "loss": 0.14392662048339844, + "step": 2240 + }, + { + "epoch": 0.3029657794676806, + "grad_norm": 1.2125771045684814, + "learning_rate": 2.557388320359221e-05, + "loss": 0.1726994514465332, + "step": 2241 + }, + { + "epoch": 0.3031009716941276, + "grad_norm": 1.1019476652145386, + "learning_rate": 2.5569027477838068e-05, + "loss": 0.21356868743896484, + "step": 2242 + }, + { + "epoch": 0.30323616392057456, + "grad_norm": 1.0761303901672363, + "learning_rate": 2.5564169551556044e-05, + "loss": 0.17145156860351562, + "step": 2243 + }, + { + "epoch": 0.30337135614702154, + "grad_norm": 2.25685453414917, + "learning_rate": 2.5559309425757586e-05, + "loss": 0.236846923828125, + "step": 2244 + }, + { + "epoch": 0.3035065483734685, + "grad_norm": 1.376454472541809, + "learning_rate": 2.5554447101454597e-05, + "loss": 0.23953962326049805, + "step": 2245 + }, + { + "epoch": 0.3036417405999155, + "grad_norm": 3.6749227046966553, + "learning_rate": 2.554958257965944e-05, + "loss": 0.19617700576782227, + "step": 2246 + }, + { + "epoch": 0.3037769328263625, + "grad_norm": 2.1911346912384033, + "learning_rate": 2.554471586138493e-05, + "loss": 0.155059814453125, + "step": 2247 + }, + { + "epoch": 0.30391212505280946, + "grad_norm": 1.566085696220398, + "learning_rate": 2.5539846947644342e-05, + "loss": 0.1541481614112854, + "step": 2248 + }, + { + "epoch": 0.30404731727925643, + "grad_norm": 1.2843924760818481, + "learning_rate": 2.5534975839451416e-05, + "loss": 0.19762039184570312, + "step": 2249 + }, + { + "epoch": 0.3041825095057034, + "grad_norm": 1.8344876766204834, + "learning_rate": 2.5530102537820348e-05, + "loss": 0.18524169921875, + "step": 2250 + }, + { + "epoch": 0.3043177017321504, + "grad_norm": 2.5840392112731934, + "learning_rate": 2.5525227043765774e-05, + "loss": 0.22797775268554688, + "step": 2251 + }, + { + "epoch": 0.30445289395859737, + "grad_norm": 3.2412056922912598, + "learning_rate": 2.55203493583028e-05, + "loss": 0.20225143432617188, + "step": 2252 + }, + { + "epoch": 0.30458808618504435, + "grad_norm": 4.206811904907227, + "learning_rate": 2.551546948244699e-05, + "loss": 0.16933536529541016, + "step": 2253 + }, + { + "epoch": 0.3047232784114913, + "grad_norm": 1.2663875818252563, + "learning_rate": 2.551058741721436e-05, + "loss": 0.2192707061767578, + "step": 2254 + }, + { + "epoch": 0.3048584706379383, + "grad_norm": 0.8034036755561829, + "learning_rate": 2.550570316362138e-05, + "loss": 0.18777036666870117, + "step": 2255 + }, + { + "epoch": 0.3049936628643853, + "grad_norm": 1.8470826148986816, + "learning_rate": 2.5500816722684975e-05, + "loss": 0.1671457290649414, + "step": 2256 + }, + { + "epoch": 0.30512885509083226, + "grad_norm": 0.9032037258148193, + "learning_rate": 2.549592809542253e-05, + "loss": 0.16002678871154785, + "step": 2257 + }, + { + "epoch": 0.30526404731727924, + "grad_norm": 1.2584655284881592, + "learning_rate": 2.549103728285189e-05, + "loss": 0.20996952056884766, + "step": 2258 + }, + { + "epoch": 0.3053992395437262, + "grad_norm": 1.2696239948272705, + "learning_rate": 2.548614428599134e-05, + "loss": 0.16689586639404297, + "step": 2259 + }, + { + "epoch": 0.3055344317701732, + "grad_norm": 1.2131980657577515, + "learning_rate": 2.5481249105859633e-05, + "loss": 0.13894343376159668, + "step": 2260 + }, + { + "epoch": 0.3056696239966202, + "grad_norm": 1.2188817262649536, + "learning_rate": 2.5476351743475964e-05, + "loss": 0.19422650337219238, + "step": 2261 + }, + { + "epoch": 0.30580481622306716, + "grad_norm": 1.53557550907135, + "learning_rate": 2.547145219986e-05, + "loss": 0.2347869873046875, + "step": 2262 + }, + { + "epoch": 0.30594000844951413, + "grad_norm": 1.2418861389160156, + "learning_rate": 2.5466550476031846e-05, + "loss": 0.15057945251464844, + "step": 2263 + }, + { + "epoch": 0.3060752006759611, + "grad_norm": 0.8434281349182129, + "learning_rate": 2.5461646573012072e-05, + "loss": 0.17255020141601562, + "step": 2264 + }, + { + "epoch": 0.3062103929024081, + "grad_norm": 1.88532292842865, + "learning_rate": 2.5456740491821687e-05, + "loss": 0.15487957000732422, + "step": 2265 + }, + { + "epoch": 0.30634558512885507, + "grad_norm": 1.6142040491104126, + "learning_rate": 2.5451832233482172e-05, + "loss": 0.19624710083007812, + "step": 2266 + }, + { + "epoch": 0.30648077735530205, + "grad_norm": 1.7512913942337036, + "learning_rate": 2.544692179901545e-05, + "loss": 0.21172046661376953, + "step": 2267 + }, + { + "epoch": 0.306615969581749, + "grad_norm": 3.562217950820923, + "learning_rate": 2.5442009189443902e-05, + "loss": 0.15139293670654297, + "step": 2268 + }, + { + "epoch": 0.306751161808196, + "grad_norm": 1.0516456365585327, + "learning_rate": 2.5437094405790355e-05, + "loss": 0.1160745620727539, + "step": 2269 + }, + { + "epoch": 0.306886354034643, + "grad_norm": 2.5979087352752686, + "learning_rate": 2.5432177449078096e-05, + "loss": 0.18936586380004883, + "step": 2270 + }, + { + "epoch": 0.30702154626108996, + "grad_norm": 0.7907645106315613, + "learning_rate": 2.5427258320330857e-05, + "loss": 0.09168052673339844, + "step": 2271 + }, + { + "epoch": 0.30715673848753694, + "grad_norm": 1.0355113744735718, + "learning_rate": 2.5422337020572835e-05, + "loss": 0.16750812530517578, + "step": 2272 + }, + { + "epoch": 0.3072919307139839, + "grad_norm": 1.8528800010681152, + "learning_rate": 2.5417413550828667e-05, + "loss": 0.17857742309570312, + "step": 2273 + }, + { + "epoch": 0.30742712294043095, + "grad_norm": 1.2584067583084106, + "learning_rate": 2.5412487912123444e-05, + "loss": 0.18100261688232422, + "step": 2274 + }, + { + "epoch": 0.30756231516687793, + "grad_norm": 1.2099308967590332, + "learning_rate": 2.5407560105482708e-05, + "loss": 0.1762828826904297, + "step": 2275 + }, + { + "epoch": 0.3076975073933249, + "grad_norm": 2.1157209873199463, + "learning_rate": 2.540263013193246e-05, + "loss": 0.20116424560546875, + "step": 2276 + }, + { + "epoch": 0.3078326996197719, + "grad_norm": 1.435807466506958, + "learning_rate": 2.539769799249915e-05, + "loss": 0.21176719665527344, + "step": 2277 + }, + { + "epoch": 0.30796789184621887, + "grad_norm": 1.0198616981506348, + "learning_rate": 2.5392763688209666e-05, + "loss": 0.1684408187866211, + "step": 2278 + }, + { + "epoch": 0.30810308407266584, + "grad_norm": 1.9164719581604004, + "learning_rate": 2.5387827220091362e-05, + "loss": 0.21175384521484375, + "step": 2279 + }, + { + "epoch": 0.3082382762991128, + "grad_norm": 1.0155837535858154, + "learning_rate": 2.538288858917204e-05, + "loss": 0.1824626922607422, + "step": 2280 + }, + { + "epoch": 0.3083734685255598, + "grad_norm": 0.6968599557876587, + "learning_rate": 2.5377947796479936e-05, + "loss": 0.1112971305847168, + "step": 2281 + }, + { + "epoch": 0.3085086607520068, + "grad_norm": 0.8022904396057129, + "learning_rate": 2.537300484304377e-05, + "loss": 0.15833663940429688, + "step": 2282 + }, + { + "epoch": 0.30864385297845376, + "grad_norm": 1.8228644132614136, + "learning_rate": 2.536805972989267e-05, + "loss": 0.14421063661575317, + "step": 2283 + }, + { + "epoch": 0.30877904520490074, + "grad_norm": 1.1312681436538696, + "learning_rate": 2.5363112458056252e-05, + "loss": 0.1482563018798828, + "step": 2284 + }, + { + "epoch": 0.3089142374313477, + "grad_norm": 1.108189582824707, + "learning_rate": 2.5358163028564552e-05, + "loss": 0.2332611083984375, + "step": 2285 + }, + { + "epoch": 0.3090494296577947, + "grad_norm": 0.6742943525314331, + "learning_rate": 2.535321144244808e-05, + "loss": 0.14126014709472656, + "step": 2286 + }, + { + "epoch": 0.3091846218842417, + "grad_norm": 1.6424269676208496, + "learning_rate": 2.534825770073777e-05, + "loss": 0.16980934143066406, + "step": 2287 + }, + { + "epoch": 0.30931981411068865, + "grad_norm": 1.0887466669082642, + "learning_rate": 2.5343301804465026e-05, + "loss": 0.24367237091064453, + "step": 2288 + }, + { + "epoch": 0.30945500633713563, + "grad_norm": 1.2062550783157349, + "learning_rate": 2.533834375466169e-05, + "loss": 0.187713623046875, + "step": 2289 + }, + { + "epoch": 0.3095901985635826, + "grad_norm": 1.0450830459594727, + "learning_rate": 2.533338355236005e-05, + "loss": 0.16996359825134277, + "step": 2290 + }, + { + "epoch": 0.3097253907900296, + "grad_norm": 0.9624519944190979, + "learning_rate": 2.532842119859285e-05, + "loss": 0.18154144287109375, + "step": 2291 + }, + { + "epoch": 0.30986058301647657, + "grad_norm": 0.9605568647384644, + "learning_rate": 2.532345669439328e-05, + "loss": 0.1711254119873047, + "step": 2292 + }, + { + "epoch": 0.30999577524292354, + "grad_norm": 2.006368637084961, + "learning_rate": 2.5318490040794975e-05, + "loss": 0.17003393173217773, + "step": 2293 + }, + { + "epoch": 0.3101309674693705, + "grad_norm": 3.2251060009002686, + "learning_rate": 2.531352123883202e-05, + "loss": 0.19815653562545776, + "step": 2294 + }, + { + "epoch": 0.3102661596958175, + "grad_norm": 4.171011447906494, + "learning_rate": 2.530855028953894e-05, + "loss": 0.23536014556884766, + "step": 2295 + }, + { + "epoch": 0.3104013519222645, + "grad_norm": 1.8428337574005127, + "learning_rate": 2.5303577193950724e-05, + "loss": 0.137603759765625, + "step": 2296 + }, + { + "epoch": 0.31053654414871146, + "grad_norm": 1.8379453420639038, + "learning_rate": 2.5298601953102785e-05, + "loss": 0.1956644058227539, + "step": 2297 + }, + { + "epoch": 0.31067173637515844, + "grad_norm": 1.0162596702575684, + "learning_rate": 2.5293624568031008e-05, + "loss": 0.17864370346069336, + "step": 2298 + }, + { + "epoch": 0.3108069286016054, + "grad_norm": 1.0062034130096436, + "learning_rate": 2.5288645039771697e-05, + "loss": 0.15030574798583984, + "step": 2299 + }, + { + "epoch": 0.3109421208280524, + "grad_norm": 3.293567657470703, + "learning_rate": 2.5283663369361624e-05, + "loss": 0.13706159591674805, + "step": 2300 + }, + { + "epoch": 0.31107731305449937, + "grad_norm": 3.191436767578125, + "learning_rate": 2.5278679557837998e-05, + "loss": 0.2034149169921875, + "step": 2301 + }, + { + "epoch": 0.31121250528094635, + "grad_norm": 5.852672100067139, + "learning_rate": 2.5273693606238474e-05, + "loss": 0.24721908569335938, + "step": 2302 + }, + { + "epoch": 0.31134769750739333, + "grad_norm": 3.2634170055389404, + "learning_rate": 2.5268705515601164e-05, + "loss": 0.18565797805786133, + "step": 2303 + }, + { + "epoch": 0.3114828897338403, + "grad_norm": 2.484697103500366, + "learning_rate": 2.5263715286964596e-05, + "loss": 0.14623737335205078, + "step": 2304 + }, + { + "epoch": 0.3116180819602873, + "grad_norm": 3.79632568359375, + "learning_rate": 2.525872292136778e-05, + "loss": 0.2135028839111328, + "step": 2305 + }, + { + "epoch": 0.31175327418673426, + "grad_norm": 3.831333637237549, + "learning_rate": 2.525372841985014e-05, + "loss": 0.20622658729553223, + "step": 2306 + }, + { + "epoch": 0.31188846641318124, + "grad_norm": 0.7250213623046875, + "learning_rate": 2.5248731783451567e-05, + "loss": 0.16280746459960938, + "step": 2307 + }, + { + "epoch": 0.3120236586396282, + "grad_norm": 1.3498880863189697, + "learning_rate": 2.524373301321238e-05, + "loss": 0.23999404907226562, + "step": 2308 + }, + { + "epoch": 0.3121588508660752, + "grad_norm": 1.2318817377090454, + "learning_rate": 2.5238732110173356e-05, + "loss": 0.19244003295898438, + "step": 2309 + }, + { + "epoch": 0.3122940430925222, + "grad_norm": 0.956408679485321, + "learning_rate": 2.5233729075375708e-05, + "loss": 0.2026979923248291, + "step": 2310 + }, + { + "epoch": 0.31242923531896916, + "grad_norm": 3.9792239665985107, + "learning_rate": 2.522872390986109e-05, + "loss": 0.2338409423828125, + "step": 2311 + }, + { + "epoch": 0.31256442754541613, + "grad_norm": 2.1870150566101074, + "learning_rate": 2.522371661467161e-05, + "loss": 0.11838912963867188, + "step": 2312 + }, + { + "epoch": 0.3126996197718631, + "grad_norm": 2.9099535942077637, + "learning_rate": 2.521870719084981e-05, + "loss": 0.25728273391723633, + "step": 2313 + }, + { + "epoch": 0.3128348119983101, + "grad_norm": 2.243488073348999, + "learning_rate": 2.5213695639438686e-05, + "loss": 0.1966552734375, + "step": 2314 + }, + { + "epoch": 0.31297000422475707, + "grad_norm": 2.1547231674194336, + "learning_rate": 2.5208681961481657e-05, + "loss": 0.2767219543457031, + "step": 2315 + }, + { + "epoch": 0.31310519645120405, + "grad_norm": 1.801814079284668, + "learning_rate": 2.5203666158022607e-05, + "loss": 0.14489269256591797, + "step": 2316 + }, + { + "epoch": 0.313240388677651, + "grad_norm": 1.970206379890442, + "learning_rate": 2.519864823010585e-05, + "loss": 0.15406322479248047, + "step": 2317 + }, + { + "epoch": 0.313375580904098, + "grad_norm": 1.0015075206756592, + "learning_rate": 2.5193628178776148e-05, + "loss": 0.11316871643066406, + "step": 2318 + }, + { + "epoch": 0.313510773130545, + "grad_norm": 1.687562108039856, + "learning_rate": 2.5188606005078695e-05, + "loss": 0.2304668426513672, + "step": 2319 + }, + { + "epoch": 0.31364596535699196, + "grad_norm": 1.1796659231185913, + "learning_rate": 2.518358171005914e-05, + "loss": 0.2116527557373047, + "step": 2320 + }, + { + "epoch": 0.31378115758343894, + "grad_norm": 3.509256601333618, + "learning_rate": 2.517855529476357e-05, + "loss": 0.23590087890625, + "step": 2321 + }, + { + "epoch": 0.3139163498098859, + "grad_norm": 1.073394775390625, + "learning_rate": 2.517352676023851e-05, + "loss": 0.16514205932617188, + "step": 2322 + }, + { + "epoch": 0.3140515420363329, + "grad_norm": 1.3163100481033325, + "learning_rate": 2.5168496107530925e-05, + "loss": 0.16283893585205078, + "step": 2323 + }, + { + "epoch": 0.3141867342627799, + "grad_norm": 1.2644157409667969, + "learning_rate": 2.5163463337688224e-05, + "loss": 0.19581031799316406, + "step": 2324 + }, + { + "epoch": 0.31432192648922685, + "grad_norm": 1.6979799270629883, + "learning_rate": 2.515842845175826e-05, + "loss": 0.15867233276367188, + "step": 2325 + }, + { + "epoch": 0.31445711871567383, + "grad_norm": 0.714146614074707, + "learning_rate": 2.5153391450789326e-05, + "loss": 0.15462970733642578, + "step": 2326 + }, + { + "epoch": 0.3145923109421208, + "grad_norm": 1.330087661743164, + "learning_rate": 2.514835233583014e-05, + "loss": 0.19726943969726562, + "step": 2327 + }, + { + "epoch": 0.3147275031685678, + "grad_norm": 0.9222365021705627, + "learning_rate": 2.514331110792988e-05, + "loss": 0.1748828887939453, + "step": 2328 + }, + { + "epoch": 0.31486269539501477, + "grad_norm": 2.0946593284606934, + "learning_rate": 2.513826776813816e-05, + "loss": 0.19808578491210938, + "step": 2329 + }, + { + "epoch": 0.31499788762146175, + "grad_norm": 3.8943722248077393, + "learning_rate": 2.5133222317505024e-05, + "loss": 0.22673511505126953, + "step": 2330 + }, + { + "epoch": 0.3151330798479087, + "grad_norm": 1.473652720451355, + "learning_rate": 2.5128174757080965e-05, + "loss": 0.19598007202148438, + "step": 2331 + }, + { + "epoch": 0.3152682720743557, + "grad_norm": 2.0215470790863037, + "learning_rate": 2.5123125087916916e-05, + "loss": 0.20312118530273438, + "step": 2332 + }, + { + "epoch": 0.3154034643008027, + "grad_norm": 1.6779415607452393, + "learning_rate": 2.5118073311064236e-05, + "loss": 0.19646644592285156, + "step": 2333 + }, + { + "epoch": 0.31553865652724966, + "grad_norm": 1.116431713104248, + "learning_rate": 2.5113019427574734e-05, + "loss": 0.16550064086914062, + "step": 2334 + }, + { + "epoch": 0.31567384875369664, + "grad_norm": 1.1136761903762817, + "learning_rate": 2.5107963438500666e-05, + "loss": 0.15624427795410156, + "step": 2335 + }, + { + "epoch": 0.3158090409801436, + "grad_norm": 1.9314128160476685, + "learning_rate": 2.51029053448947e-05, + "loss": 0.18707656860351562, + "step": 2336 + }, + { + "epoch": 0.3159442332065906, + "grad_norm": 3.9178435802459717, + "learning_rate": 2.509784514780997e-05, + "loss": 0.1781320571899414, + "step": 2337 + }, + { + "epoch": 0.3160794254330376, + "grad_norm": 2.66619873046875, + "learning_rate": 2.5092782848300033e-05, + "loss": 0.1796245574951172, + "step": 2338 + }, + { + "epoch": 0.31621461765948455, + "grad_norm": 2.299640417098999, + "learning_rate": 2.5087718447418886e-05, + "loss": 0.20847702026367188, + "step": 2339 + }, + { + "epoch": 0.31634980988593153, + "grad_norm": 1.359383463859558, + "learning_rate": 2.5082651946220958e-05, + "loss": 0.2000293731689453, + "step": 2340 + }, + { + "epoch": 0.3164850021123785, + "grad_norm": 0.9819516539573669, + "learning_rate": 2.507758334576113e-05, + "loss": 0.1815662384033203, + "step": 2341 + }, + { + "epoch": 0.31662019433882554, + "grad_norm": 1.1998976469039917, + "learning_rate": 2.5072512647094713e-05, + "loss": 0.18518829345703125, + "step": 2342 + }, + { + "epoch": 0.3167553865652725, + "grad_norm": 1.0507601499557495, + "learning_rate": 2.506743985127745e-05, + "loss": 0.22673416137695312, + "step": 2343 + }, + { + "epoch": 0.3168905787917195, + "grad_norm": 0.7075194120407104, + "learning_rate": 2.506236495936552e-05, + "loss": 0.17055320739746094, + "step": 2344 + }, + { + "epoch": 0.3170257710181665, + "grad_norm": 2.0761756896972656, + "learning_rate": 2.5057287972415547e-05, + "loss": 0.20184326171875, + "step": 2345 + }, + { + "epoch": 0.31716096324461346, + "grad_norm": 1.518945336341858, + "learning_rate": 2.5052208891484588e-05, + "loss": 0.19922256469726562, + "step": 2346 + }, + { + "epoch": 0.31729615547106044, + "grad_norm": 1.1840366125106812, + "learning_rate": 2.504712771763013e-05, + "loss": 0.1599903106689453, + "step": 2347 + }, + { + "epoch": 0.3174313476975074, + "grad_norm": 2.1613547801971436, + "learning_rate": 2.5042044451910108e-05, + "loss": 0.1673727035522461, + "step": 2348 + }, + { + "epoch": 0.3175665399239544, + "grad_norm": 2.690495252609253, + "learning_rate": 2.5036959095382875e-05, + "loss": 0.222930908203125, + "step": 2349 + }, + { + "epoch": 0.3177017321504014, + "grad_norm": 1.2885096073150635, + "learning_rate": 2.5031871649107233e-05, + "loss": 0.17171669006347656, + "step": 2350 + }, + { + "epoch": 0.31783692437684835, + "grad_norm": 2.8969340324401855, + "learning_rate": 2.5026782114142426e-05, + "loss": 0.22868919372558594, + "step": 2351 + }, + { + "epoch": 0.31797211660329533, + "grad_norm": 2.9183242321014404, + "learning_rate": 2.5021690491548107e-05, + "loss": 0.23976516723632812, + "step": 2352 + }, + { + "epoch": 0.3181073088297423, + "grad_norm": 1.4264113903045654, + "learning_rate": 2.5016596782384387e-05, + "loss": 0.16781902313232422, + "step": 2353 + }, + { + "epoch": 0.3182425010561893, + "grad_norm": 1.1937352418899536, + "learning_rate": 2.5011500987711804e-05, + "loss": 0.26714324951171875, + "step": 2354 + }, + { + "epoch": 0.31837769328263626, + "grad_norm": 0.9137232899665833, + "learning_rate": 2.5006403108591325e-05, + "loss": 0.1682300567626953, + "step": 2355 + }, + { + "epoch": 0.31851288550908324, + "grad_norm": 1.080885648727417, + "learning_rate": 2.500130314608436e-05, + "loss": 0.19979095458984375, + "step": 2356 + }, + { + "epoch": 0.3186480777355302, + "grad_norm": 0.6518683433532715, + "learning_rate": 2.4996201101252742e-05, + "loss": 0.15123838186264038, + "step": 2357 + }, + { + "epoch": 0.3187832699619772, + "grad_norm": 1.095583438873291, + "learning_rate": 2.4991096975158757e-05, + "loss": 0.16352367401123047, + "step": 2358 + }, + { + "epoch": 0.3189184621884242, + "grad_norm": 0.9431547522544861, + "learning_rate": 2.4985990768865095e-05, + "loss": 0.1791553497314453, + "step": 2359 + }, + { + "epoch": 0.31905365441487116, + "grad_norm": 0.7539752125740051, + "learning_rate": 2.4980882483434904e-05, + "loss": 0.16378402709960938, + "step": 2360 + }, + { + "epoch": 0.31918884664131814, + "grad_norm": 1.3075010776519775, + "learning_rate": 2.497577211993176e-05, + "loss": 0.20471477508544922, + "step": 2361 + }, + { + "epoch": 0.3193240388677651, + "grad_norm": 1.2260907888412476, + "learning_rate": 2.4970659679419658e-05, + "loss": 0.16594409942626953, + "step": 2362 + }, + { + "epoch": 0.3194592310942121, + "grad_norm": 2.4159138202667236, + "learning_rate": 2.496554516296304e-05, + "loss": 0.19655799865722656, + "step": 2363 + }, + { + "epoch": 0.31959442332065907, + "grad_norm": 2.0845017433166504, + "learning_rate": 2.4960428571626784e-05, + "loss": 0.19893836975097656, + "step": 2364 + }, + { + "epoch": 0.31972961554710605, + "grad_norm": 1.508702039718628, + "learning_rate": 2.4955309906476177e-05, + "loss": 0.20336627960205078, + "step": 2365 + }, + { + "epoch": 0.31986480777355303, + "grad_norm": 1.4192456007003784, + "learning_rate": 2.495018916857696e-05, + "loss": 0.21660232543945312, + "step": 2366 + }, + { + "epoch": 0.32, + "grad_norm": 3.0699455738067627, + "learning_rate": 2.4945066358995304e-05, + "loss": 0.1481180191040039, + "step": 2367 + }, + { + "epoch": 0.320135192226447, + "grad_norm": 1.7595261335372925, + "learning_rate": 2.493994147879779e-05, + "loss": 0.19792747497558594, + "step": 2368 + }, + { + "epoch": 0.32027038445289396, + "grad_norm": 2.8754079341888428, + "learning_rate": 2.4934814529051458e-05, + "loss": 0.19551849365234375, + "step": 2369 + }, + { + "epoch": 0.32040557667934094, + "grad_norm": 1.2139326333999634, + "learning_rate": 2.4929685510823763e-05, + "loss": 0.2159442901611328, + "step": 2370 + }, + { + "epoch": 0.3205407689057879, + "grad_norm": 2.0129222869873047, + "learning_rate": 2.492455442518259e-05, + "loss": 0.22185516357421875, + "step": 2371 + }, + { + "epoch": 0.3206759611322349, + "grad_norm": 1.0887688398361206, + "learning_rate": 2.4919421273196262e-05, + "loss": 0.18426275253295898, + "step": 2372 + }, + { + "epoch": 0.3208111533586819, + "grad_norm": 1.4184889793395996, + "learning_rate": 2.4914286055933527e-05, + "loss": 0.18798828125, + "step": 2373 + }, + { + "epoch": 0.32094634558512886, + "grad_norm": 1.6306990385055542, + "learning_rate": 2.4909148774463572e-05, + "loss": 0.2216320037841797, + "step": 2374 + }, + { + "epoch": 0.32108153781157583, + "grad_norm": 0.8503995537757874, + "learning_rate": 2.4904009429855992e-05, + "loss": 0.13969135284423828, + "step": 2375 + }, + { + "epoch": 0.3212167300380228, + "grad_norm": 1.226109266281128, + "learning_rate": 2.4898868023180844e-05, + "loss": 0.1608600616455078, + "step": 2376 + }, + { + "epoch": 0.3213519222644698, + "grad_norm": 1.2110795974731445, + "learning_rate": 2.4893724555508575e-05, + "loss": 0.1925792694091797, + "step": 2377 + }, + { + "epoch": 0.32148711449091677, + "grad_norm": 1.0036662817001343, + "learning_rate": 2.4888579027910105e-05, + "loss": 0.1652846336364746, + "step": 2378 + }, + { + "epoch": 0.32162230671736375, + "grad_norm": 1.193157434463501, + "learning_rate": 2.4883431441456738e-05, + "loss": 0.2190074920654297, + "step": 2379 + }, + { + "epoch": 0.3217574989438107, + "grad_norm": 1.5323030948638916, + "learning_rate": 2.4878281797220244e-05, + "loss": 0.17658305168151855, + "step": 2380 + }, + { + "epoch": 0.3218926911702577, + "grad_norm": 2.215569496154785, + "learning_rate": 2.4873130096272805e-05, + "loss": 0.24140548706054688, + "step": 2381 + }, + { + "epoch": 0.3220278833967047, + "grad_norm": 1.2992013692855835, + "learning_rate": 2.4867976339687026e-05, + "loss": 0.1514596939086914, + "step": 2382 + }, + { + "epoch": 0.32216307562315166, + "grad_norm": 2.6177167892456055, + "learning_rate": 2.4862820528535955e-05, + "loss": 0.1799936294555664, + "step": 2383 + }, + { + "epoch": 0.32229826784959864, + "grad_norm": 1.3907675743103027, + "learning_rate": 2.4857662663893054e-05, + "loss": 0.21718692779541016, + "step": 2384 + }, + { + "epoch": 0.3224334600760456, + "grad_norm": 1.0709302425384521, + "learning_rate": 2.485250274683222e-05, + "loss": 0.10641193389892578, + "step": 2385 + }, + { + "epoch": 0.3225686523024926, + "grad_norm": 2.103909492492676, + "learning_rate": 2.4847340778427772e-05, + "loss": 0.2388477325439453, + "step": 2386 + }, + { + "epoch": 0.3227038445289396, + "grad_norm": 1.4906790256500244, + "learning_rate": 2.484217675975446e-05, + "loss": 0.18300247192382812, + "step": 2387 + }, + { + "epoch": 0.32283903675538655, + "grad_norm": 2.170334815979004, + "learning_rate": 2.4837010691887466e-05, + "loss": 0.17945575714111328, + "step": 2388 + }, + { + "epoch": 0.32297422898183353, + "grad_norm": 1.061652660369873, + "learning_rate": 2.4831842575902383e-05, + "loss": 0.2139759063720703, + "step": 2389 + }, + { + "epoch": 0.3231094212082805, + "grad_norm": 2.693559408187866, + "learning_rate": 2.482667241287525e-05, + "loss": 0.1947317123413086, + "step": 2390 + }, + { + "epoch": 0.3232446134347275, + "grad_norm": 1.53031587600708, + "learning_rate": 2.4821500203882517e-05, + "loss": 0.15073871612548828, + "step": 2391 + }, + { + "epoch": 0.32337980566117447, + "grad_norm": 1.3599871397018433, + "learning_rate": 2.4816325950001067e-05, + "loss": 0.205078125, + "step": 2392 + }, + { + "epoch": 0.32351499788762145, + "grad_norm": 0.8806572556495667, + "learning_rate": 2.4811149652308205e-05, + "loss": 0.1681804656982422, + "step": 2393 + }, + { + "epoch": 0.3236501901140684, + "grad_norm": 2.000258445739746, + "learning_rate": 2.480597131188167e-05, + "loss": 0.23520851135253906, + "step": 2394 + }, + { + "epoch": 0.3237853823405154, + "grad_norm": 3.5049009323120117, + "learning_rate": 2.4800790929799614e-05, + "loss": 0.2604351043701172, + "step": 2395 + }, + { + "epoch": 0.3239205745669624, + "grad_norm": 2.268362283706665, + "learning_rate": 2.4795608507140623e-05, + "loss": 0.17398452758789062, + "step": 2396 + }, + { + "epoch": 0.32405576679340936, + "grad_norm": 1.0210410356521606, + "learning_rate": 2.4790424044983705e-05, + "loss": 0.21225357055664062, + "step": 2397 + }, + { + "epoch": 0.32419095901985634, + "grad_norm": 1.2955994606018066, + "learning_rate": 2.4785237544408288e-05, + "loss": 0.2442169189453125, + "step": 2398 + }, + { + "epoch": 0.3243261512463033, + "grad_norm": 1.6955705881118774, + "learning_rate": 2.478004900649424e-05, + "loss": 0.23143577575683594, + "step": 2399 + }, + { + "epoch": 0.3244613434727503, + "grad_norm": 2.3066468238830566, + "learning_rate": 2.477485843232183e-05, + "loss": 0.1939082145690918, + "step": 2400 + }, + { + "epoch": 0.3245965356991973, + "grad_norm": 1.6697825193405151, + "learning_rate": 2.476966582297177e-05, + "loss": 0.20110034942626953, + "step": 2401 + }, + { + "epoch": 0.32473172792564425, + "grad_norm": 3.9010043144226074, + "learning_rate": 2.4764471179525188e-05, + "loss": 0.259124755859375, + "step": 2402 + }, + { + "epoch": 0.32486692015209123, + "grad_norm": 1.8830945491790771, + "learning_rate": 2.4759274503063632e-05, + "loss": 0.21721935272216797, + "step": 2403 + }, + { + "epoch": 0.3250021123785382, + "grad_norm": 0.9610902667045593, + "learning_rate": 2.4754075794669088e-05, + "loss": 0.1812286376953125, + "step": 2404 + }, + { + "epoch": 0.3251373046049852, + "grad_norm": 1.3830190896987915, + "learning_rate": 2.4748875055423942e-05, + "loss": 0.21308517456054688, + "step": 2405 + }, + { + "epoch": 0.32527249683143217, + "grad_norm": 1.1364787817001343, + "learning_rate": 2.4743672286411027e-05, + "loss": 0.17693567276000977, + "step": 2406 + }, + { + "epoch": 0.32540768905787915, + "grad_norm": 1.2751948833465576, + "learning_rate": 2.4738467488713582e-05, + "loss": 0.20960617065429688, + "step": 2407 + }, + { + "epoch": 0.3255428812843261, + "grad_norm": 1.0974429845809937, + "learning_rate": 2.473326066341527e-05, + "loss": 0.1755962371826172, + "step": 2408 + }, + { + "epoch": 0.3256780735107731, + "grad_norm": 2.473176956176758, + "learning_rate": 2.4728051811600184e-05, + "loss": 0.20442771911621094, + "step": 2409 + }, + { + "epoch": 0.32581326573722014, + "grad_norm": 0.8829950094223022, + "learning_rate": 2.4722840934352838e-05, + "loss": 0.1776590347290039, + "step": 2410 + }, + { + "epoch": 0.3259484579636671, + "grad_norm": 1.0712145566940308, + "learning_rate": 2.471762803275816e-05, + "loss": 0.14638614654541016, + "step": 2411 + }, + { + "epoch": 0.3260836501901141, + "grad_norm": 1.307511329650879, + "learning_rate": 2.4712413107901504e-05, + "loss": 0.216033935546875, + "step": 2412 + }, + { + "epoch": 0.3262188424165611, + "grad_norm": 1.1126899719238281, + "learning_rate": 2.470719616086865e-05, + "loss": 0.2010488510131836, + "step": 2413 + }, + { + "epoch": 0.32635403464300805, + "grad_norm": 0.8406078219413757, + "learning_rate": 2.4701977192745785e-05, + "loss": 0.1741199493408203, + "step": 2414 + }, + { + "epoch": 0.32648922686945503, + "grad_norm": 2.324248790740967, + "learning_rate": 2.4696756204619535e-05, + "loss": 0.2655830383300781, + "step": 2415 + }, + { + "epoch": 0.326624419095902, + "grad_norm": 2.4082279205322266, + "learning_rate": 2.469153319757693e-05, + "loss": 0.261277437210083, + "step": 2416 + }, + { + "epoch": 0.326759611322349, + "grad_norm": 1.1285223960876465, + "learning_rate": 2.4686308172705433e-05, + "loss": 0.14336681365966797, + "step": 2417 + }, + { + "epoch": 0.32689480354879596, + "grad_norm": 1.115792155265808, + "learning_rate": 2.4681081131092926e-05, + "loss": 0.15285491943359375, + "step": 2418 + }, + { + "epoch": 0.32702999577524294, + "grad_norm": 1.3496662378311157, + "learning_rate": 2.467585207382769e-05, + "loss": 0.2096233367919922, + "step": 2419 + }, + { + "epoch": 0.3271651880016899, + "grad_norm": 1.0363285541534424, + "learning_rate": 2.4670621001998467e-05, + "loss": 0.1822519302368164, + "step": 2420 + }, + { + "epoch": 0.3273003802281369, + "grad_norm": 0.9571179151535034, + "learning_rate": 2.466538791669437e-05, + "loss": 0.1873304843902588, + "step": 2421 + }, + { + "epoch": 0.3274355724545839, + "grad_norm": 1.0557729005813599, + "learning_rate": 2.4660152819004973e-05, + "loss": 0.20589971542358398, + "step": 2422 + }, + { + "epoch": 0.32757076468103086, + "grad_norm": 1.6920645236968994, + "learning_rate": 2.4654915710020246e-05, + "loss": 0.1682581901550293, + "step": 2423 + }, + { + "epoch": 0.32770595690747784, + "grad_norm": 1.0892901420593262, + "learning_rate": 2.464967659083058e-05, + "loss": 0.21057605743408203, + "step": 2424 + }, + { + "epoch": 0.3278411491339248, + "grad_norm": 2.5834336280822754, + "learning_rate": 2.464443546252679e-05, + "loss": 0.15996742248535156, + "step": 2425 + }, + { + "epoch": 0.3279763413603718, + "grad_norm": 1.8357839584350586, + "learning_rate": 2.4639192326200104e-05, + "loss": 0.16809016466140747, + "step": 2426 + }, + { + "epoch": 0.32811153358681877, + "grad_norm": 0.9272051453590393, + "learning_rate": 2.463394718294218e-05, + "loss": 0.10030841827392578, + "step": 2427 + }, + { + "epoch": 0.32824672581326575, + "grad_norm": 2.409123659133911, + "learning_rate": 2.4628700033845072e-05, + "loss": 0.17902874946594238, + "step": 2428 + }, + { + "epoch": 0.32838191803971273, + "grad_norm": 1.497423768043518, + "learning_rate": 2.4623450880001268e-05, + "loss": 0.24622726440429688, + "step": 2429 + }, + { + "epoch": 0.3285171102661597, + "grad_norm": 1.3953806161880493, + "learning_rate": 2.4618199722503676e-05, + "loss": 0.17273902893066406, + "step": 2430 + }, + { + "epoch": 0.3286523024926067, + "grad_norm": 2.72944712638855, + "learning_rate": 2.4612946562445613e-05, + "loss": 0.17840099334716797, + "step": 2431 + }, + { + "epoch": 0.32878749471905366, + "grad_norm": 2.6016595363616943, + "learning_rate": 2.460769140092081e-05, + "loss": 0.23370933532714844, + "step": 2432 + }, + { + "epoch": 0.32892268694550064, + "grad_norm": 1.0173238515853882, + "learning_rate": 2.460243423902342e-05, + "loss": 0.17308282852172852, + "step": 2433 + }, + { + "epoch": 0.3290578791719476, + "grad_norm": 1.8887403011322021, + "learning_rate": 2.459717507784802e-05, + "loss": 0.18835830688476562, + "step": 2434 + }, + { + "epoch": 0.3291930713983946, + "grad_norm": 1.4456491470336914, + "learning_rate": 2.459191391848959e-05, + "loss": 0.1899099349975586, + "step": 2435 + }, + { + "epoch": 0.3293282636248416, + "grad_norm": 1.0256524085998535, + "learning_rate": 2.4586650762043538e-05, + "loss": 0.16742992401123047, + "step": 2436 + }, + { + "epoch": 0.32946345585128856, + "grad_norm": 1.5991610288619995, + "learning_rate": 2.4581385609605665e-05, + "loss": 0.18927955627441406, + "step": 2437 + }, + { + "epoch": 0.32959864807773553, + "grad_norm": 1.734049916267395, + "learning_rate": 2.4576118462272218e-05, + "loss": 0.22829437255859375, + "step": 2438 + }, + { + "epoch": 0.3297338403041825, + "grad_norm": 1.7728407382965088, + "learning_rate": 2.4570849321139836e-05, + "loss": 0.26377248764038086, + "step": 2439 + }, + { + "epoch": 0.3298690325306295, + "grad_norm": 1.804621696472168, + "learning_rate": 2.4565578187305596e-05, + "loss": 0.1879110336303711, + "step": 2440 + }, + { + "epoch": 0.33000422475707647, + "grad_norm": 1.3535054922103882, + "learning_rate": 2.456030506186696e-05, + "loss": 0.1630997657775879, + "step": 2441 + }, + { + "epoch": 0.33013941698352345, + "grad_norm": 1.120963454246521, + "learning_rate": 2.4555029945921832e-05, + "loss": 0.1389904022216797, + "step": 2442 + }, + { + "epoch": 0.3302746092099704, + "grad_norm": 1.0562878847122192, + "learning_rate": 2.4549752840568516e-05, + "loss": 0.16719341278076172, + "step": 2443 + }, + { + "epoch": 0.3304098014364174, + "grad_norm": 2.646899461746216, + "learning_rate": 2.4544473746905733e-05, + "loss": 0.1888103485107422, + "step": 2444 + }, + { + "epoch": 0.3305449936628644, + "grad_norm": 1.506170392036438, + "learning_rate": 2.4539192666032617e-05, + "loss": 0.16521596908569336, + "step": 2445 + }, + { + "epoch": 0.33068018588931136, + "grad_norm": 0.8474022746086121, + "learning_rate": 2.4533909599048718e-05, + "loss": 0.1264805793762207, + "step": 2446 + }, + { + "epoch": 0.33081537811575834, + "grad_norm": 0.7529324889183044, + "learning_rate": 2.4528624547054003e-05, + "loss": 0.17188549041748047, + "step": 2447 + }, + { + "epoch": 0.3309505703422053, + "grad_norm": 2.3013992309570312, + "learning_rate": 2.4523337511148843e-05, + "loss": 0.2099895477294922, + "step": 2448 + }, + { + "epoch": 0.3310857625686523, + "grad_norm": 2.2117671966552734, + "learning_rate": 2.4518048492434028e-05, + "loss": 0.26207733154296875, + "step": 2449 + }, + { + "epoch": 0.3312209547950993, + "grad_norm": 1.1245808601379395, + "learning_rate": 2.4512757492010762e-05, + "loss": 0.13423165678977966, + "step": 2450 + }, + { + "epoch": 0.33135614702154625, + "grad_norm": 1.275658369064331, + "learning_rate": 2.4507464510980652e-05, + "loss": 0.23124170303344727, + "step": 2451 + }, + { + "epoch": 0.33149133924799323, + "grad_norm": 0.8938910365104675, + "learning_rate": 2.450216955044574e-05, + "loss": 0.175262451171875, + "step": 2452 + }, + { + "epoch": 0.3316265314744402, + "grad_norm": 1.8871339559555054, + "learning_rate": 2.449687261150845e-05, + "loss": 0.2356433868408203, + "step": 2453 + }, + { + "epoch": 0.3317617237008872, + "grad_norm": 1.3848785161972046, + "learning_rate": 2.449157369527164e-05, + "loss": 0.17171788215637207, + "step": 2454 + }, + { + "epoch": 0.33189691592733417, + "grad_norm": 1.3380428552627563, + "learning_rate": 2.448627280283857e-05, + "loss": 0.16664600372314453, + "step": 2455 + }, + { + "epoch": 0.33203210815378115, + "grad_norm": 1.6905087232589722, + "learning_rate": 2.4480969935312917e-05, + "loss": 0.19647979736328125, + "step": 2456 + }, + { + "epoch": 0.3321673003802281, + "grad_norm": 0.928114652633667, + "learning_rate": 2.4475665093798766e-05, + "loss": 0.18549537658691406, + "step": 2457 + }, + { + "epoch": 0.3323024926066751, + "grad_norm": 1.8578013181686401, + "learning_rate": 2.447035827940061e-05, + "loss": 0.21084284782409668, + "step": 2458 + }, + { + "epoch": 0.3324376848331221, + "grad_norm": 1.9083585739135742, + "learning_rate": 2.4465049493223356e-05, + "loss": 0.13677185773849487, + "step": 2459 + }, + { + "epoch": 0.33257287705956906, + "grad_norm": 1.1070064306259155, + "learning_rate": 2.4459738736372327e-05, + "loss": 0.22038650512695312, + "step": 2460 + }, + { + "epoch": 0.33270806928601604, + "grad_norm": 1.0057308673858643, + "learning_rate": 2.4454426009953252e-05, + "loss": 0.14522600173950195, + "step": 2461 + }, + { + "epoch": 0.332843261512463, + "grad_norm": 0.8012288212776184, + "learning_rate": 2.4449111315072254e-05, + "loss": 0.18804454803466797, + "step": 2462 + }, + { + "epoch": 0.33297845373891, + "grad_norm": 2.2348666191101074, + "learning_rate": 2.44437946528359e-05, + "loss": 0.24016952514648438, + "step": 2463 + }, + { + "epoch": 0.333113645965357, + "grad_norm": 1.6972193717956543, + "learning_rate": 2.4438476024351138e-05, + "loss": 0.15994834899902344, + "step": 2464 + }, + { + "epoch": 0.33324883819180395, + "grad_norm": 1.1029222011566162, + "learning_rate": 2.4433155430725333e-05, + "loss": 0.20725440979003906, + "step": 2465 + }, + { + "epoch": 0.33338403041825093, + "grad_norm": 2.482452630996704, + "learning_rate": 2.4427832873066262e-05, + "loss": 0.21537017822265625, + "step": 2466 + }, + { + "epoch": 0.3335192226446979, + "grad_norm": 2.408444404602051, + "learning_rate": 2.4422508352482113e-05, + "loss": 0.16070270538330078, + "step": 2467 + }, + { + "epoch": 0.3336544148711449, + "grad_norm": 1.1893521547317505, + "learning_rate": 2.441718187008148e-05, + "loss": 0.1671581268310547, + "step": 2468 + }, + { + "epoch": 0.33378960709759187, + "grad_norm": 1.0434621572494507, + "learning_rate": 2.441185342697336e-05, + "loss": 0.1521902084350586, + "step": 2469 + }, + { + "epoch": 0.33392479932403885, + "grad_norm": 1.1673152446746826, + "learning_rate": 2.440652302426717e-05, + "loss": 0.20055389404296875, + "step": 2470 + }, + { + "epoch": 0.3340599915504858, + "grad_norm": 0.931617259979248, + "learning_rate": 2.440119066307272e-05, + "loss": 0.1866617202758789, + "step": 2471 + }, + { + "epoch": 0.3341951837769328, + "grad_norm": 0.9258384704589844, + "learning_rate": 2.4395856344500244e-05, + "loss": 0.16117477416992188, + "step": 2472 + }, + { + "epoch": 0.3343303760033798, + "grad_norm": 1.4245527982711792, + "learning_rate": 2.4390520069660377e-05, + "loss": 0.21545791625976562, + "step": 2473 + }, + { + "epoch": 0.33446556822982676, + "grad_norm": 1.1898967027664185, + "learning_rate": 2.4385181839664146e-05, + "loss": 0.24100685119628906, + "step": 2474 + }, + { + "epoch": 0.33460076045627374, + "grad_norm": 1.3068917989730835, + "learning_rate": 2.437984165562301e-05, + "loss": 0.20553207397460938, + "step": 2475 + }, + { + "epoch": 0.3347359526827207, + "grad_norm": 0.9906862378120422, + "learning_rate": 2.4374499518648827e-05, + "loss": 0.11668205261230469, + "step": 2476 + }, + { + "epoch": 0.33487114490916775, + "grad_norm": 0.8711386322975159, + "learning_rate": 2.436915542985385e-05, + "loss": 0.1628131866455078, + "step": 2477 + }, + { + "epoch": 0.33500633713561473, + "grad_norm": 1.1357275247573853, + "learning_rate": 2.436380939035075e-05, + "loss": 0.17133522033691406, + "step": 2478 + }, + { + "epoch": 0.3351415293620617, + "grad_norm": 0.614902675151825, + "learning_rate": 2.43584614012526e-05, + "loss": 0.12802362442016602, + "step": 2479 + }, + { + "epoch": 0.3352767215885087, + "grad_norm": 0.7127976417541504, + "learning_rate": 2.4353111463672882e-05, + "loss": 0.14868545532226562, + "step": 2480 + }, + { + "epoch": 0.33541191381495566, + "grad_norm": 2.096118450164795, + "learning_rate": 2.4347759578725482e-05, + "loss": 0.2213134765625, + "step": 2481 + }, + { + "epoch": 0.33554710604140264, + "grad_norm": 1.228935956954956, + "learning_rate": 2.4342405747524685e-05, + "loss": 0.19279241561889648, + "step": 2482 + }, + { + "epoch": 0.3356822982678496, + "grad_norm": 2.142918825149536, + "learning_rate": 2.4337049971185194e-05, + "loss": 0.24425315856933594, + "step": 2483 + }, + { + "epoch": 0.3358174904942966, + "grad_norm": 1.8035968542099, + "learning_rate": 2.433169225082211e-05, + "loss": 0.20528411865234375, + "step": 2484 + }, + { + "epoch": 0.3359526827207436, + "grad_norm": 0.8939853310585022, + "learning_rate": 2.432633258755093e-05, + "loss": 0.1780223846435547, + "step": 2485 + }, + { + "epoch": 0.33608787494719056, + "grad_norm": 1.1747101545333862, + "learning_rate": 2.432097098248758e-05, + "loss": 0.1714015007019043, + "step": 2486 + }, + { + "epoch": 0.33622306717363754, + "grad_norm": 1.9472585916519165, + "learning_rate": 2.4315607436748362e-05, + "loss": 0.17976665496826172, + "step": 2487 + }, + { + "epoch": 0.3363582594000845, + "grad_norm": 2.374389886856079, + "learning_rate": 2.4310241951449997e-05, + "loss": 0.17896461486816406, + "step": 2488 + }, + { + "epoch": 0.3364934516265315, + "grad_norm": 2.0952980518341064, + "learning_rate": 2.4304874527709614e-05, + "loss": 0.15448570251464844, + "step": 2489 + }, + { + "epoch": 0.33662864385297847, + "grad_norm": 1.0373332500457764, + "learning_rate": 2.429950516664473e-05, + "loss": 0.17436861991882324, + "step": 2490 + }, + { + "epoch": 0.33676383607942545, + "grad_norm": 1.2658730745315552, + "learning_rate": 2.4294133869373284e-05, + "loss": 0.19199371337890625, + "step": 2491 + }, + { + "epoch": 0.33689902830587243, + "grad_norm": 2.53019118309021, + "learning_rate": 2.42887606370136e-05, + "loss": 0.1992034912109375, + "step": 2492 + }, + { + "epoch": 0.3370342205323194, + "grad_norm": 1.8225162029266357, + "learning_rate": 2.428338547068442e-05, + "loss": 0.15047931671142578, + "step": 2493 + }, + { + "epoch": 0.3371694127587664, + "grad_norm": 2.6724796295166016, + "learning_rate": 2.427800837150488e-05, + "loss": 0.1808147430419922, + "step": 2494 + }, + { + "epoch": 0.33730460498521336, + "grad_norm": 1.2367048263549805, + "learning_rate": 2.4272629340594518e-05, + "loss": 0.1918959617614746, + "step": 2495 + }, + { + "epoch": 0.33743979721166034, + "grad_norm": 1.2357866764068604, + "learning_rate": 2.426724837907328e-05, + "loss": 0.20812225341796875, + "step": 2496 + }, + { + "epoch": 0.3375749894381073, + "grad_norm": 1.421576976776123, + "learning_rate": 2.4261865488061512e-05, + "loss": 0.19313526153564453, + "step": 2497 + }, + { + "epoch": 0.3377101816645543, + "grad_norm": 0.901887059211731, + "learning_rate": 2.4256480668679958e-05, + "loss": 0.16237592697143555, + "step": 2498 + }, + { + "epoch": 0.3378453738910013, + "grad_norm": 1.7934291362762451, + "learning_rate": 2.4251093922049766e-05, + "loss": 0.17233753204345703, + "step": 2499 + }, + { + "epoch": 0.33798056611744826, + "grad_norm": 0.7717118859291077, + "learning_rate": 2.4245705249292494e-05, + "loss": 0.14551448822021484, + "step": 2500 + }, + { + "epoch": 0.33811575834389523, + "grad_norm": 2.1062262058258057, + "learning_rate": 2.4240314651530073e-05, + "loss": 0.2516136169433594, + "step": 2501 + }, + { + "epoch": 0.3382509505703422, + "grad_norm": 0.8947479128837585, + "learning_rate": 2.4234922129884873e-05, + "loss": 0.16076993942260742, + "step": 2502 + }, + { + "epoch": 0.3383861427967892, + "grad_norm": 1.0195494890213013, + "learning_rate": 2.4229527685479644e-05, + "loss": 0.2044377326965332, + "step": 2503 + }, + { + "epoch": 0.33852133502323617, + "grad_norm": 0.8918678164482117, + "learning_rate": 2.4224131319437523e-05, + "loss": 0.17592430114746094, + "step": 2504 + }, + { + "epoch": 0.33865652724968315, + "grad_norm": 1.0139936208724976, + "learning_rate": 2.421873303288208e-05, + "loss": 0.1934947967529297, + "step": 2505 + }, + { + "epoch": 0.3387917194761301, + "grad_norm": 1.0904579162597656, + "learning_rate": 2.4213332826937255e-05, + "loss": 0.2616539001464844, + "step": 2506 + }, + { + "epoch": 0.3389269117025771, + "grad_norm": 1.538239598274231, + "learning_rate": 2.4207930702727404e-05, + "loss": 0.1877422332763672, + "step": 2507 + }, + { + "epoch": 0.3390621039290241, + "grad_norm": 1.317819356918335, + "learning_rate": 2.420252666137728e-05, + "loss": 0.15736961364746094, + "step": 2508 + }, + { + "epoch": 0.33919729615547106, + "grad_norm": 0.8357636332511902, + "learning_rate": 2.419712070401203e-05, + "loss": 0.18008995056152344, + "step": 2509 + }, + { + "epoch": 0.33933248838191804, + "grad_norm": 1.422378420829773, + "learning_rate": 2.4191712831757203e-05, + "loss": 0.17917823791503906, + "step": 2510 + }, + { + "epoch": 0.339467680608365, + "grad_norm": 1.3978854417800903, + "learning_rate": 2.418630304573875e-05, + "loss": 0.12674283981323242, + "step": 2511 + }, + { + "epoch": 0.339602872834812, + "grad_norm": 1.2488456964492798, + "learning_rate": 2.418089134708302e-05, + "loss": 0.22544097900390625, + "step": 2512 + }, + { + "epoch": 0.339738065061259, + "grad_norm": 1.9491636753082275, + "learning_rate": 2.4175477736916743e-05, + "loss": 0.2161998748779297, + "step": 2513 + }, + { + "epoch": 0.33987325728770595, + "grad_norm": 2.983546495437622, + "learning_rate": 2.4170062216367082e-05, + "loss": 0.21469497680664062, + "step": 2514 + }, + { + "epoch": 0.34000844951415293, + "grad_norm": 1.4879140853881836, + "learning_rate": 2.416464478656156e-05, + "loss": 0.2046222686767578, + "step": 2515 + }, + { + "epoch": 0.3401436417405999, + "grad_norm": 1.793262004852295, + "learning_rate": 2.4159225448628123e-05, + "loss": 0.19949722290039062, + "step": 2516 + }, + { + "epoch": 0.3402788339670469, + "grad_norm": 2.3415403366088867, + "learning_rate": 2.4153804203695103e-05, + "loss": 0.23149585723876953, + "step": 2517 + }, + { + "epoch": 0.34041402619349387, + "grad_norm": 3.3753397464752197, + "learning_rate": 2.4148381052891236e-05, + "loss": 0.20917415618896484, + "step": 2518 + }, + { + "epoch": 0.34054921841994085, + "grad_norm": 1.089974284172058, + "learning_rate": 2.4142955997345648e-05, + "loss": 0.18297362327575684, + "step": 2519 + }, + { + "epoch": 0.3406844106463878, + "grad_norm": 1.2006945610046387, + "learning_rate": 2.4137529038187864e-05, + "loss": 0.22381591796875, + "step": 2520 + }, + { + "epoch": 0.3408196028728348, + "grad_norm": 1.2854912281036377, + "learning_rate": 2.413210017654781e-05, + "loss": 0.1605682373046875, + "step": 2521 + }, + { + "epoch": 0.3409547950992818, + "grad_norm": 3.5291454792022705, + "learning_rate": 2.4126669413555802e-05, + "loss": 0.19884204864501953, + "step": 2522 + }, + { + "epoch": 0.34108998732572876, + "grad_norm": 0.9140754342079163, + "learning_rate": 2.4121236750342548e-05, + "loss": 0.1940155029296875, + "step": 2523 + }, + { + "epoch": 0.34122517955217574, + "grad_norm": 1.0800786018371582, + "learning_rate": 2.4115802188039165e-05, + "loss": 0.16860485076904297, + "step": 2524 + }, + { + "epoch": 0.3413603717786227, + "grad_norm": 1.7351289987564087, + "learning_rate": 2.4110365727777156e-05, + "loss": 0.190399169921875, + "step": 2525 + }, + { + "epoch": 0.3414955640050697, + "grad_norm": 1.2880914211273193, + "learning_rate": 2.410492737068842e-05, + "loss": 0.1831836700439453, + "step": 2526 + }, + { + "epoch": 0.3416307562315167, + "grad_norm": 0.886844277381897, + "learning_rate": 2.409948711790525e-05, + "loss": 0.18851280212402344, + "step": 2527 + }, + { + "epoch": 0.34176594845796365, + "grad_norm": 1.6285735368728638, + "learning_rate": 2.4094044970560336e-05, + "loss": 0.2408885955810547, + "step": 2528 + }, + { + "epoch": 0.34190114068441063, + "grad_norm": 0.7682656049728394, + "learning_rate": 2.4088600929786767e-05, + "loss": 0.1581580638885498, + "step": 2529 + }, + { + "epoch": 0.3420363329108576, + "grad_norm": 0.9529601335525513, + "learning_rate": 2.408315499671802e-05, + "loss": 0.19876766204833984, + "step": 2530 + }, + { + "epoch": 0.3421715251373046, + "grad_norm": 1.5328891277313232, + "learning_rate": 2.407770717248796e-05, + "loss": 0.13497352600097656, + "step": 2531 + }, + { + "epoch": 0.34230671736375157, + "grad_norm": 1.8998607397079468, + "learning_rate": 2.407225745823086e-05, + "loss": 0.20287132263183594, + "step": 2532 + }, + { + "epoch": 0.34244190959019855, + "grad_norm": 1.6016870737075806, + "learning_rate": 2.4066805855081378e-05, + "loss": 0.1540660858154297, + "step": 2533 + }, + { + "epoch": 0.3425771018166455, + "grad_norm": 3.1063601970672607, + "learning_rate": 2.406135236417457e-05, + "loss": 0.2858104705810547, + "step": 2534 + }, + { + "epoch": 0.3427122940430925, + "grad_norm": 1.2761781215667725, + "learning_rate": 2.4055896986645875e-05, + "loss": 0.19180679321289062, + "step": 2535 + }, + { + "epoch": 0.3428474862695395, + "grad_norm": 2.3274855613708496, + "learning_rate": 2.4050439723631136e-05, + "loss": 0.15848922729492188, + "step": 2536 + }, + { + "epoch": 0.34298267849598646, + "grad_norm": 0.9571801424026489, + "learning_rate": 2.404498057626659e-05, + "loss": 0.16309762001037598, + "step": 2537 + }, + { + "epoch": 0.34311787072243344, + "grad_norm": 4.079365253448486, + "learning_rate": 2.4039519545688848e-05, + "loss": 0.2073078155517578, + "step": 2538 + }, + { + "epoch": 0.3432530629488804, + "grad_norm": 1.35211181640625, + "learning_rate": 2.4034056633034932e-05, + "loss": 0.19197654724121094, + "step": 2539 + }, + { + "epoch": 0.3433882551753274, + "grad_norm": 1.4108097553253174, + "learning_rate": 2.402859183944225e-05, + "loss": 0.23297119140625, + "step": 2540 + }, + { + "epoch": 0.3435234474017744, + "grad_norm": 1.9342842102050781, + "learning_rate": 2.4023125166048597e-05, + "loss": 0.20113182067871094, + "step": 2541 + }, + { + "epoch": 0.34365863962822135, + "grad_norm": 0.9363011121749878, + "learning_rate": 2.401765661399218e-05, + "loss": 0.17429351806640625, + "step": 2542 + }, + { + "epoch": 0.34379383185466833, + "grad_norm": 1.118664264678955, + "learning_rate": 2.4012186184411556e-05, + "loss": 0.1884288787841797, + "step": 2543 + }, + { + "epoch": 0.3439290240811153, + "grad_norm": 1.6545270681381226, + "learning_rate": 2.400671387844571e-05, + "loss": 0.212799072265625, + "step": 2544 + }, + { + "epoch": 0.34406421630756234, + "grad_norm": 1.051919937133789, + "learning_rate": 2.4001239697234008e-05, + "loss": 0.16757678985595703, + "step": 2545 + }, + { + "epoch": 0.3441994085340093, + "grad_norm": 2.737590789794922, + "learning_rate": 2.3995763641916205e-05, + "loss": 0.1410379409790039, + "step": 2546 + }, + { + "epoch": 0.3443346007604563, + "grad_norm": 2.5880064964294434, + "learning_rate": 2.3990285713632436e-05, + "loss": 0.20301103591918945, + "step": 2547 + }, + { + "epoch": 0.3444697929869033, + "grad_norm": 1.4437180757522583, + "learning_rate": 2.398480591352324e-05, + "loss": 0.16388225555419922, + "step": 2548 + }, + { + "epoch": 0.34460498521335026, + "grad_norm": 0.7233229875564575, + "learning_rate": 2.3979324242729537e-05, + "loss": 0.12668228149414062, + "step": 2549 + }, + { + "epoch": 0.34474017743979724, + "grad_norm": 1.4757874011993408, + "learning_rate": 2.3973840702392646e-05, + "loss": 0.20982742309570312, + "step": 2550 + }, + { + "epoch": 0.3448753696662442, + "grad_norm": 0.8445335626602173, + "learning_rate": 2.3968355293654267e-05, + "loss": 0.1392221450805664, + "step": 2551 + }, + { + "epoch": 0.3450105618926912, + "grad_norm": 0.854028046131134, + "learning_rate": 2.396286801765649e-05, + "loss": 0.17388153076171875, + "step": 2552 + }, + { + "epoch": 0.34514575411913817, + "grad_norm": 1.141477346420288, + "learning_rate": 2.3957378875541795e-05, + "loss": 0.15336370468139648, + "step": 2553 + }, + { + "epoch": 0.34528094634558515, + "grad_norm": 2.723266839981079, + "learning_rate": 2.395188786845305e-05, + "loss": 0.2098388671875, + "step": 2554 + }, + { + "epoch": 0.3454161385720321, + "grad_norm": 0.9907428026199341, + "learning_rate": 2.3946394997533516e-05, + "loss": 0.16144514083862305, + "step": 2555 + }, + { + "epoch": 0.3455513307984791, + "grad_norm": 2.478806257247925, + "learning_rate": 2.3940900263926833e-05, + "loss": 0.186614990234375, + "step": 2556 + }, + { + "epoch": 0.3456865230249261, + "grad_norm": 4.923023700714111, + "learning_rate": 2.393540366877704e-05, + "loss": 0.22186851501464844, + "step": 2557 + }, + { + "epoch": 0.34582171525137306, + "grad_norm": 2.883528232574463, + "learning_rate": 2.392990521322855e-05, + "loss": 0.18793392181396484, + "step": 2558 + }, + { + "epoch": 0.34595690747782004, + "grad_norm": 1.6688995361328125, + "learning_rate": 2.392440489842618e-05, + "loss": 0.2287139892578125, + "step": 2559 + }, + { + "epoch": 0.346092099704267, + "grad_norm": 1.709358811378479, + "learning_rate": 2.3918902725515118e-05, + "loss": 0.20283222198486328, + "step": 2560 + }, + { + "epoch": 0.346227291930714, + "grad_norm": 1.1052404642105103, + "learning_rate": 2.391339869564094e-05, + "loss": 0.21852874755859375, + "step": 2561 + }, + { + "epoch": 0.346362484157161, + "grad_norm": 0.6618618965148926, + "learning_rate": 2.3907892809949628e-05, + "loss": 0.13562345504760742, + "step": 2562 + }, + { + "epoch": 0.34649767638360796, + "grad_norm": 1.1674070358276367, + "learning_rate": 2.390238506958753e-05, + "loss": 0.19200897216796875, + "step": 2563 + }, + { + "epoch": 0.34663286861005493, + "grad_norm": 0.8596978783607483, + "learning_rate": 2.3896875475701387e-05, + "loss": 0.17667770385742188, + "step": 2564 + }, + { + "epoch": 0.3467680608365019, + "grad_norm": 0.8014450073242188, + "learning_rate": 2.3891364029438323e-05, + "loss": 0.12021541595458984, + "step": 2565 + }, + { + "epoch": 0.3469032530629489, + "grad_norm": 1.8242905139923096, + "learning_rate": 2.3885850731945857e-05, + "loss": 0.23589324951171875, + "step": 2566 + }, + { + "epoch": 0.34703844528939587, + "grad_norm": 2.3304731845855713, + "learning_rate": 2.3880335584371884e-05, + "loss": 0.18854045867919922, + "step": 2567 + }, + { + "epoch": 0.34717363751584285, + "grad_norm": 1.4412691593170166, + "learning_rate": 2.387481858786468e-05, + "loss": 0.2093658447265625, + "step": 2568 + }, + { + "epoch": 0.3473088297422898, + "grad_norm": 1.801837682723999, + "learning_rate": 2.386929974357293e-05, + "loss": 0.10120463371276855, + "step": 2569 + }, + { + "epoch": 0.3474440219687368, + "grad_norm": 0.917465090751648, + "learning_rate": 2.386377905264567e-05, + "loss": 0.19179609417915344, + "step": 2570 + }, + { + "epoch": 0.3475792141951838, + "grad_norm": 1.4551877975463867, + "learning_rate": 2.3858256516232346e-05, + "loss": 0.2104778289794922, + "step": 2571 + }, + { + "epoch": 0.34771440642163076, + "grad_norm": 1.2551275491714478, + "learning_rate": 2.3852732135482775e-05, + "loss": 0.2025623321533203, + "step": 2572 + }, + { + "epoch": 0.34784959864807774, + "grad_norm": 2.55228853225708, + "learning_rate": 2.3847205911547166e-05, + "loss": 0.1773223876953125, + "step": 2573 + }, + { + "epoch": 0.3479847908745247, + "grad_norm": 0.7685827016830444, + "learning_rate": 2.3841677845576108e-05, + "loss": 0.12705326080322266, + "step": 2574 + }, + { + "epoch": 0.3481199831009717, + "grad_norm": 0.950711190700531, + "learning_rate": 2.383614793872057e-05, + "loss": 0.24342632293701172, + "step": 2575 + }, + { + "epoch": 0.3482551753274187, + "grad_norm": 1.8239257335662842, + "learning_rate": 2.3830616192131913e-05, + "loss": 0.2003173828125, + "step": 2576 + }, + { + "epoch": 0.34839036755386565, + "grad_norm": 0.8463389873504639, + "learning_rate": 2.3825082606961876e-05, + "loss": 0.19254016876220703, + "step": 2577 + }, + { + "epoch": 0.34852555978031263, + "grad_norm": 1.1968097686767578, + "learning_rate": 2.3819547184362575e-05, + "loss": 0.17284011840820312, + "step": 2578 + }, + { + "epoch": 0.3486607520067596, + "grad_norm": 0.8905538320541382, + "learning_rate": 2.3814009925486522e-05, + "loss": 0.21463394165039062, + "step": 2579 + }, + { + "epoch": 0.3487959442332066, + "grad_norm": 0.9455034136772156, + "learning_rate": 2.38084708314866e-05, + "loss": 0.213592529296875, + "step": 2580 + }, + { + "epoch": 0.34893113645965357, + "grad_norm": 0.8118772506713867, + "learning_rate": 2.380292990351608e-05, + "loss": 0.17920255661010742, + "step": 2581 + }, + { + "epoch": 0.34906632868610055, + "grad_norm": 0.8592420816421509, + "learning_rate": 2.3797387142728607e-05, + "loss": 0.18619155883789062, + "step": 2582 + }, + { + "epoch": 0.3492015209125475, + "grad_norm": 1.451931118965149, + "learning_rate": 2.379184255027822e-05, + "loss": 0.19113922119140625, + "step": 2583 + }, + { + "epoch": 0.3493367131389945, + "grad_norm": 1.0150200128555298, + "learning_rate": 2.378629612731933e-05, + "loss": 0.21033525466918945, + "step": 2584 + }, + { + "epoch": 0.3494719053654415, + "grad_norm": 0.9090067744255066, + "learning_rate": 2.3780747875006735e-05, + "loss": 0.18269860744476318, + "step": 2585 + }, + { + "epoch": 0.34960709759188846, + "grad_norm": 1.1425822973251343, + "learning_rate": 2.37751977944956e-05, + "loss": 0.1976017951965332, + "step": 2586 + }, + { + "epoch": 0.34974228981833544, + "grad_norm": 2.177725076675415, + "learning_rate": 2.3769645886941497e-05, + "loss": 0.18825340270996094, + "step": 2587 + }, + { + "epoch": 0.3498774820447824, + "grad_norm": 0.8271177411079407, + "learning_rate": 2.376409215350035e-05, + "loss": 0.20056915283203125, + "step": 2588 + }, + { + "epoch": 0.3500126742712294, + "grad_norm": 2.2640480995178223, + "learning_rate": 2.3758536595328486e-05, + "loss": 0.2184314727783203, + "step": 2589 + }, + { + "epoch": 0.3501478664976764, + "grad_norm": 2.270983934402466, + "learning_rate": 2.375297921358259e-05, + "loss": 0.16173028945922852, + "step": 2590 + }, + { + "epoch": 0.35028305872412335, + "grad_norm": 1.3338305950164795, + "learning_rate": 2.3747420009419745e-05, + "loss": 0.16893577575683594, + "step": 2591 + }, + { + "epoch": 0.35041825095057033, + "grad_norm": 1.061629056930542, + "learning_rate": 2.3741858983997415e-05, + "loss": 0.1718158721923828, + "step": 2592 + }, + { + "epoch": 0.3505534431770173, + "grad_norm": 1.5213676691055298, + "learning_rate": 2.373629613847342e-05, + "loss": 0.16145992279052734, + "step": 2593 + }, + { + "epoch": 0.3506886354034643, + "grad_norm": 1.0195932388305664, + "learning_rate": 2.3730731474005988e-05, + "loss": 0.1998300552368164, + "step": 2594 + }, + { + "epoch": 0.35082382762991127, + "grad_norm": 0.7545379400253296, + "learning_rate": 2.37251649917537e-05, + "loss": 0.09798622131347656, + "step": 2595 + }, + { + "epoch": 0.35095901985635825, + "grad_norm": 0.88923579454422, + "learning_rate": 2.3719596692875534e-05, + "loss": 0.15967369079589844, + "step": 2596 + }, + { + "epoch": 0.3510942120828052, + "grad_norm": 1.2652108669281006, + "learning_rate": 2.3714026578530836e-05, + "loss": 0.2121795415878296, + "step": 2597 + }, + { + "epoch": 0.3512294043092522, + "grad_norm": 1.1806765794754028, + "learning_rate": 2.370845464987934e-05, + "loss": 0.1807718276977539, + "step": 2598 + }, + { + "epoch": 0.3513645965356992, + "grad_norm": 1.8119354248046875, + "learning_rate": 2.370288090808114e-05, + "loss": 0.14874553680419922, + "step": 2599 + }, + { + "epoch": 0.35149978876214616, + "grad_norm": 1.0228831768035889, + "learning_rate": 2.369730535429673e-05, + "loss": 0.13712120056152344, + "step": 2600 + }, + { + "epoch": 0.35163498098859314, + "grad_norm": 1.4343212842941284, + "learning_rate": 2.369172798968697e-05, + "loss": 0.19031810760498047, + "step": 2601 + }, + { + "epoch": 0.3517701732150401, + "grad_norm": 1.2089320421218872, + "learning_rate": 2.3686148815413083e-05, + "loss": 0.20747804641723633, + "step": 2602 + }, + { + "epoch": 0.3519053654414871, + "grad_norm": 1.4964444637298584, + "learning_rate": 2.3680567832636695e-05, + "loss": 0.18310546875, + "step": 2603 + }, + { + "epoch": 0.3520405576679341, + "grad_norm": 2.436677932739258, + "learning_rate": 2.3674985042519795e-05, + "loss": 0.2509002685546875, + "step": 2604 + }, + { + "epoch": 0.35217574989438105, + "grad_norm": 1.6857314109802246, + "learning_rate": 2.366940044622475e-05, + "loss": 0.16338443756103516, + "step": 2605 + }, + { + "epoch": 0.35231094212082803, + "grad_norm": 1.4831477403640747, + "learning_rate": 2.3663814044914302e-05, + "loss": 0.23669815063476562, + "step": 2606 + }, + { + "epoch": 0.352446134347275, + "grad_norm": 1.2123229503631592, + "learning_rate": 2.3658225839751566e-05, + "loss": 0.18099260330200195, + "step": 2607 + }, + { + "epoch": 0.352581326573722, + "grad_norm": 1.2879902124404907, + "learning_rate": 2.3652635831900043e-05, + "loss": 0.16869163513183594, + "step": 2608 + }, + { + "epoch": 0.35271651880016897, + "grad_norm": 1.2032151222229004, + "learning_rate": 2.3647044022523595e-05, + "loss": 0.23766326904296875, + "step": 2609 + }, + { + "epoch": 0.35285171102661594, + "grad_norm": 1.1872565746307373, + "learning_rate": 2.364145041278647e-05, + "loss": 0.19086599349975586, + "step": 2610 + }, + { + "epoch": 0.3529869032530629, + "grad_norm": 0.7739558219909668, + "learning_rate": 2.3635855003853287e-05, + "loss": 0.19658470153808594, + "step": 2611 + }, + { + "epoch": 0.3531220954795099, + "grad_norm": 1.058189034461975, + "learning_rate": 2.363025779688904e-05, + "loss": 0.1699427366256714, + "step": 2612 + }, + { + "epoch": 0.35325728770595693, + "grad_norm": 0.8167728185653687, + "learning_rate": 2.3624658793059103e-05, + "loss": 0.16359329223632812, + "step": 2613 + }, + { + "epoch": 0.3533924799324039, + "grad_norm": 1.3814804553985596, + "learning_rate": 2.3619057993529204e-05, + "loss": 0.1732616424560547, + "step": 2614 + }, + { + "epoch": 0.3535276721588509, + "grad_norm": 1.6405669450759888, + "learning_rate": 2.3613455399465475e-05, + "loss": 0.22738027572631836, + "step": 2615 + }, + { + "epoch": 0.35366286438529787, + "grad_norm": 1.991821527481079, + "learning_rate": 2.3607851012034394e-05, + "loss": 0.2736663818359375, + "step": 2616 + }, + { + "epoch": 0.35379805661174485, + "grad_norm": 1.369095802307129, + "learning_rate": 2.3602244832402838e-05, + "loss": 0.19951248168945312, + "step": 2617 + }, + { + "epoch": 0.3539332488381918, + "grad_norm": 2.1316335201263428, + "learning_rate": 2.3596636861738024e-05, + "loss": 0.24468517303466797, + "step": 2618 + }, + { + "epoch": 0.3540684410646388, + "grad_norm": 0.9557300806045532, + "learning_rate": 2.3591027101207578e-05, + "loss": 0.1684889793395996, + "step": 2619 + }, + { + "epoch": 0.3542036332910858, + "grad_norm": 0.7996432185173035, + "learning_rate": 2.3585415551979476e-05, + "loss": 0.18543052673339844, + "step": 2620 + }, + { + "epoch": 0.35433882551753276, + "grad_norm": 0.9396438002586365, + "learning_rate": 2.3579802215222076e-05, + "loss": 0.16266250610351562, + "step": 2621 + }, + { + "epoch": 0.35447401774397974, + "grad_norm": 1.9129470586776733, + "learning_rate": 2.35741870921041e-05, + "loss": 0.16109466552734375, + "step": 2622 + }, + { + "epoch": 0.3546092099704267, + "grad_norm": 1.0115118026733398, + "learning_rate": 2.3568570183794645e-05, + "loss": 0.16150188446044922, + "step": 2623 + }, + { + "epoch": 0.3547444021968737, + "grad_norm": 2.092118501663208, + "learning_rate": 2.356295149146319e-05, + "loss": 0.2214651107788086, + "step": 2624 + }, + { + "epoch": 0.3548795944233207, + "grad_norm": 2.7650279998779297, + "learning_rate": 2.3557331016279567e-05, + "loss": 0.1821880340576172, + "step": 2625 + }, + { + "epoch": 0.35501478664976766, + "grad_norm": 1.164493203163147, + "learning_rate": 2.3551708759413998e-05, + "loss": 0.2632560729980469, + "step": 2626 + }, + { + "epoch": 0.35514997887621463, + "grad_norm": 0.9716615080833435, + "learning_rate": 2.354608472203706e-05, + "loss": 0.1773982048034668, + "step": 2627 + }, + { + "epoch": 0.3552851711026616, + "grad_norm": 1.2671552896499634, + "learning_rate": 2.3540458905319705e-05, + "loss": 0.18938159942626953, + "step": 2628 + }, + { + "epoch": 0.3554203633291086, + "grad_norm": 1.4439524412155151, + "learning_rate": 2.3534831310433264e-05, + "loss": 0.15556955337524414, + "step": 2629 + }, + { + "epoch": 0.35555555555555557, + "grad_norm": 0.9766533970832825, + "learning_rate": 2.3529201938549434e-05, + "loss": 0.19466400146484375, + "step": 2630 + }, + { + "epoch": 0.35569074778200255, + "grad_norm": 2.0301287174224854, + "learning_rate": 2.3523570790840274e-05, + "loss": 0.2008056640625, + "step": 2631 + }, + { + "epoch": 0.3558259400084495, + "grad_norm": 1.4814296960830688, + "learning_rate": 2.3517937868478228e-05, + "loss": 0.22864580154418945, + "step": 2632 + }, + { + "epoch": 0.3559611322348965, + "grad_norm": 2.8422672748565674, + "learning_rate": 2.3512303172636092e-05, + "loss": 0.21220684051513672, + "step": 2633 + }, + { + "epoch": 0.3560963244613435, + "grad_norm": 1.3261207342147827, + "learning_rate": 2.3506666704487033e-05, + "loss": 0.2205190658569336, + "step": 2634 + }, + { + "epoch": 0.35623151668779046, + "grad_norm": 0.8513601422309875, + "learning_rate": 2.3501028465204614e-05, + "loss": 0.12939000129699707, + "step": 2635 + }, + { + "epoch": 0.35636670891423744, + "grad_norm": 1.9643076658248901, + "learning_rate": 2.3495388455962734e-05, + "loss": 0.22187423706054688, + "step": 2636 + }, + { + "epoch": 0.3565019011406844, + "grad_norm": 0.770757257938385, + "learning_rate": 2.3489746677935673e-05, + "loss": 0.16515254974365234, + "step": 2637 + }, + { + "epoch": 0.3566370933671314, + "grad_norm": 1.5731253623962402, + "learning_rate": 2.3484103132298082e-05, + "loss": 0.19867467880249023, + "step": 2638 + }, + { + "epoch": 0.3567722855935784, + "grad_norm": 1.6666430234909058, + "learning_rate": 2.347845782022497e-05, + "loss": 0.21525192260742188, + "step": 2639 + }, + { + "epoch": 0.35690747782002535, + "grad_norm": 1.06008780002594, + "learning_rate": 2.3472810742891734e-05, + "loss": 0.1654644012451172, + "step": 2640 + }, + { + "epoch": 0.35704267004647233, + "grad_norm": 0.8769435286521912, + "learning_rate": 2.3467161901474118e-05, + "loss": 0.17406654357910156, + "step": 2641 + }, + { + "epoch": 0.3571778622729193, + "grad_norm": 1.8264845609664917, + "learning_rate": 2.346151129714824e-05, + "loss": 0.18178749084472656, + "step": 2642 + }, + { + "epoch": 0.3573130544993663, + "grad_norm": 2.356940507888794, + "learning_rate": 2.3455858931090588e-05, + "loss": 0.19235610961914062, + "step": 2643 + }, + { + "epoch": 0.35744824672581327, + "grad_norm": 3.2623207569122314, + "learning_rate": 2.3450204804478014e-05, + "loss": 0.17163968086242676, + "step": 2644 + }, + { + "epoch": 0.35758343895226025, + "grad_norm": 2.5361011028289795, + "learning_rate": 2.344454891848774e-05, + "loss": 0.19122600555419922, + "step": 2645 + }, + { + "epoch": 0.3577186311787072, + "grad_norm": 0.6441481113433838, + "learning_rate": 2.3438891274297348e-05, + "loss": 0.13372802734375, + "step": 2646 + }, + { + "epoch": 0.3578538234051542, + "grad_norm": 0.6589263081550598, + "learning_rate": 2.343323187308479e-05, + "loss": 0.10570955276489258, + "step": 2647 + }, + { + "epoch": 0.3579890156316012, + "grad_norm": 0.6837125420570374, + "learning_rate": 2.342757071602839e-05, + "loss": 0.161163330078125, + "step": 2648 + }, + { + "epoch": 0.35812420785804816, + "grad_norm": 0.9433907866477966, + "learning_rate": 2.3421907804306816e-05, + "loss": 0.1594400405883789, + "step": 2649 + }, + { + "epoch": 0.35825940008449514, + "grad_norm": 1.3440614938735962, + "learning_rate": 2.341624313909913e-05, + "loss": 0.15282392501831055, + "step": 2650 + }, + { + "epoch": 0.3583945923109421, + "grad_norm": 1.6969292163848877, + "learning_rate": 2.3410576721584742e-05, + "loss": 0.21156692504882812, + "step": 2651 + }, + { + "epoch": 0.3585297845373891, + "grad_norm": 1.724616527557373, + "learning_rate": 2.3404908552943435e-05, + "loss": 0.168853759765625, + "step": 2652 + }, + { + "epoch": 0.3586649767638361, + "grad_norm": 1.5592639446258545, + "learning_rate": 2.339923863435534e-05, + "loss": 0.17954468727111816, + "step": 2653 + }, + { + "epoch": 0.35880016899028305, + "grad_norm": 1.4546220302581787, + "learning_rate": 2.3393566967000974e-05, + "loss": 0.1942293643951416, + "step": 2654 + }, + { + "epoch": 0.35893536121673003, + "grad_norm": 2.1446166038513184, + "learning_rate": 2.3387893552061202e-05, + "loss": 0.1470191478729248, + "step": 2655 + }, + { + "epoch": 0.359070553443177, + "grad_norm": 3.507612943649292, + "learning_rate": 2.3382218390717268e-05, + "loss": 0.20348644256591797, + "step": 2656 + }, + { + "epoch": 0.359205745669624, + "grad_norm": 2.9627695083618164, + "learning_rate": 2.3376541484150762e-05, + "loss": 0.17058563232421875, + "step": 2657 + }, + { + "epoch": 0.35934093789607097, + "grad_norm": 1.885601282119751, + "learning_rate": 2.3370862833543652e-05, + "loss": 0.1741476058959961, + "step": 2658 + }, + { + "epoch": 0.35947613012251795, + "grad_norm": 3.068471670150757, + "learning_rate": 2.336518244007826e-05, + "loss": 0.16492938995361328, + "step": 2659 + }, + { + "epoch": 0.3596113223489649, + "grad_norm": 2.09063720703125, + "learning_rate": 2.3359500304937274e-05, + "loss": 0.2456369400024414, + "step": 2660 + }, + { + "epoch": 0.3597465145754119, + "grad_norm": 1.2787151336669922, + "learning_rate": 2.335381642930375e-05, + "loss": 0.13349342346191406, + "step": 2661 + }, + { + "epoch": 0.3598817068018589, + "grad_norm": 2.2733945846557617, + "learning_rate": 2.3348130814361094e-05, + "loss": 0.2045907974243164, + "step": 2662 + }, + { + "epoch": 0.36001689902830586, + "grad_norm": 1.7334760427474976, + "learning_rate": 2.334244346129309e-05, + "loss": 0.20584678649902344, + "step": 2663 + }, + { + "epoch": 0.36015209125475284, + "grad_norm": 1.9678996801376343, + "learning_rate": 2.3336754371283862e-05, + "loss": 0.14791107177734375, + "step": 2664 + }, + { + "epoch": 0.3602872834811998, + "grad_norm": 0.9949893355369568, + "learning_rate": 2.333106354551792e-05, + "loss": 0.19701766967773438, + "step": 2665 + }, + { + "epoch": 0.3604224757076468, + "grad_norm": 1.2357940673828125, + "learning_rate": 2.332537098518012e-05, + "loss": 0.20240211486816406, + "step": 2666 + }, + { + "epoch": 0.3605576679340938, + "grad_norm": 0.7524257898330688, + "learning_rate": 2.3319676691455686e-05, + "loss": 0.17739391326904297, + "step": 2667 + }, + { + "epoch": 0.36069286016054075, + "grad_norm": 0.9199552536010742, + "learning_rate": 2.3313980665530205e-05, + "loss": 0.20941162109375, + "step": 2668 + }, + { + "epoch": 0.36082805238698773, + "grad_norm": 1.7981719970703125, + "learning_rate": 2.3308282908589606e-05, + "loss": 0.18999576568603516, + "step": 2669 + }, + { + "epoch": 0.3609632446134347, + "grad_norm": 1.6676130294799805, + "learning_rate": 2.330258342182021e-05, + "loss": 0.26366519927978516, + "step": 2670 + }, + { + "epoch": 0.3610984368398817, + "grad_norm": 1.6140620708465576, + "learning_rate": 2.329688220640866e-05, + "loss": 0.19182705879211426, + "step": 2671 + }, + { + "epoch": 0.36123362906632867, + "grad_norm": 0.9684808850288391, + "learning_rate": 2.329117926354199e-05, + "loss": 0.17320823669433594, + "step": 2672 + }, + { + "epoch": 0.36136882129277564, + "grad_norm": 3.644124984741211, + "learning_rate": 2.3285474594407588e-05, + "loss": 0.23334693908691406, + "step": 2673 + }, + { + "epoch": 0.3615040135192226, + "grad_norm": 1.0325514078140259, + "learning_rate": 2.327976820019319e-05, + "loss": 0.1668715476989746, + "step": 2674 + }, + { + "epoch": 0.3616392057456696, + "grad_norm": 2.9641542434692383, + "learning_rate": 2.32740600820869e-05, + "loss": 0.26941871643066406, + "step": 2675 + }, + { + "epoch": 0.3617743979721166, + "grad_norm": 3.1507081985473633, + "learning_rate": 2.326835024127718e-05, + "loss": 0.18852519989013672, + "step": 2676 + }, + { + "epoch": 0.36190959019856356, + "grad_norm": 0.807555615901947, + "learning_rate": 2.326263867895285e-05, + "loss": 0.1259281039237976, + "step": 2677 + }, + { + "epoch": 0.36204478242501054, + "grad_norm": 0.9904191493988037, + "learning_rate": 2.3256925396303076e-05, + "loss": 0.15046119689941406, + "step": 2678 + }, + { + "epoch": 0.3621799746514575, + "grad_norm": 0.9153783917427063, + "learning_rate": 2.3251210394517412e-05, + "loss": 0.22531938552856445, + "step": 2679 + }, + { + "epoch": 0.3623151668779045, + "grad_norm": 1.551855444908142, + "learning_rate": 2.3245493674785742e-05, + "loss": 0.13031911849975586, + "step": 2680 + }, + { + "epoch": 0.3624503591043515, + "grad_norm": 0.7588427066802979, + "learning_rate": 2.3239775238298316e-05, + "loss": 0.15736627578735352, + "step": 2681 + }, + { + "epoch": 0.3625855513307985, + "grad_norm": 1.3534470796585083, + "learning_rate": 2.3234055086245744e-05, + "loss": 0.16922950744628906, + "step": 2682 + }, + { + "epoch": 0.3627207435572455, + "grad_norm": 0.7819817662239075, + "learning_rate": 2.3228333219818998e-05, + "loss": 0.17315673828125, + "step": 2683 + }, + { + "epoch": 0.36285593578369246, + "grad_norm": 1.6962943077087402, + "learning_rate": 2.3222609640209397e-05, + "loss": 0.23409461975097656, + "step": 2684 + }, + { + "epoch": 0.36299112801013944, + "grad_norm": 2.9697887897491455, + "learning_rate": 2.3216884348608614e-05, + "loss": 0.24148082733154297, + "step": 2685 + }, + { + "epoch": 0.3631263202365864, + "grad_norm": 1.3740758895874023, + "learning_rate": 2.32111573462087e-05, + "loss": 0.1751852035522461, + "step": 2686 + }, + { + "epoch": 0.3632615124630334, + "grad_norm": 1.5446159839630127, + "learning_rate": 2.3205428634202028e-05, + "loss": 0.2353992462158203, + "step": 2687 + }, + { + "epoch": 0.3633967046894804, + "grad_norm": 1.7599570751190186, + "learning_rate": 2.3199698213781367e-05, + "loss": 0.211417555809021, + "step": 2688 + }, + { + "epoch": 0.36353189691592735, + "grad_norm": 1.3838764429092407, + "learning_rate": 2.319396608613981e-05, + "loss": 0.1794447898864746, + "step": 2689 + }, + { + "epoch": 0.36366708914237433, + "grad_norm": 1.423200249671936, + "learning_rate": 2.318823225247082e-05, + "loss": 0.20132160186767578, + "step": 2690 + }, + { + "epoch": 0.3638022813688213, + "grad_norm": 1.7932085990905762, + "learning_rate": 2.3182496713968208e-05, + "loss": 0.2605876922607422, + "step": 2691 + }, + { + "epoch": 0.3639374735952683, + "grad_norm": 0.5866106748580933, + "learning_rate": 2.3176759471826143e-05, + "loss": 0.11948943138122559, + "step": 2692 + }, + { + "epoch": 0.36407266582171527, + "grad_norm": 1.108756422996521, + "learning_rate": 2.3171020527239155e-05, + "loss": 0.21635818481445312, + "step": 2693 + }, + { + "epoch": 0.36420785804816225, + "grad_norm": 1.9077092409133911, + "learning_rate": 2.316527988140212e-05, + "loss": 0.2179112434387207, + "step": 2694 + }, + { + "epoch": 0.3643430502746092, + "grad_norm": 1.996392846107483, + "learning_rate": 2.315953753551027e-05, + "loss": 0.14326000213623047, + "step": 2695 + }, + { + "epoch": 0.3644782425010562, + "grad_norm": 3.499091863632202, + "learning_rate": 2.3153793490759197e-05, + "loss": 0.16757678985595703, + "step": 2696 + }, + { + "epoch": 0.3646134347275032, + "grad_norm": 1.0090104341506958, + "learning_rate": 2.3148047748344835e-05, + "loss": 0.15411949157714844, + "step": 2697 + }, + { + "epoch": 0.36474862695395016, + "grad_norm": 0.8679322600364685, + "learning_rate": 2.314230030946348e-05, + "loss": 0.13198280334472656, + "step": 2698 + }, + { + "epoch": 0.36488381918039714, + "grad_norm": 0.8545182943344116, + "learning_rate": 2.3136551175311782e-05, + "loss": 0.1557769775390625, + "step": 2699 + }, + { + "epoch": 0.3650190114068441, + "grad_norm": 0.679964542388916, + "learning_rate": 2.313080034708674e-05, + "loss": 0.15737056732177734, + "step": 2700 + }, + { + "epoch": 0.3651542036332911, + "grad_norm": 2.1893441677093506, + "learning_rate": 2.312504782598571e-05, + "loss": 0.16611766815185547, + "step": 2701 + }, + { + "epoch": 0.3652893958597381, + "grad_norm": 1.1812126636505127, + "learning_rate": 2.311929361320639e-05, + "loss": 0.16109466552734375, + "step": 2702 + }, + { + "epoch": 0.36542458808618505, + "grad_norm": 1.9342776536941528, + "learning_rate": 2.311353770994684e-05, + "loss": 0.23283231258392334, + "step": 2703 + }, + { + "epoch": 0.36555978031263203, + "grad_norm": 1.5147558450698853, + "learning_rate": 2.310778011740548e-05, + "loss": 0.205535888671875, + "step": 2704 + }, + { + "epoch": 0.365694972539079, + "grad_norm": 3.4692702293395996, + "learning_rate": 2.310202083678106e-05, + "loss": 0.2441082000732422, + "step": 2705 + }, + { + "epoch": 0.365830164765526, + "grad_norm": 1.031876564025879, + "learning_rate": 2.3096259869272694e-05, + "loss": 0.13544178009033203, + "step": 2706 + }, + { + "epoch": 0.36596535699197297, + "grad_norm": 0.927740752696991, + "learning_rate": 2.309049721607985e-05, + "loss": 0.1540679931640625, + "step": 2707 + }, + { + "epoch": 0.36610054921841995, + "grad_norm": 0.795728325843811, + "learning_rate": 2.3084732878402342e-05, + "loss": 0.14947128295898438, + "step": 2708 + }, + { + "epoch": 0.3662357414448669, + "grad_norm": 1.3317497968673706, + "learning_rate": 2.307896685744034e-05, + "loss": 0.20220565795898438, + "step": 2709 + }, + { + "epoch": 0.3663709336713139, + "grad_norm": 1.7826968431472778, + "learning_rate": 2.3073199154394352e-05, + "loss": 0.2117633819580078, + "step": 2710 + }, + { + "epoch": 0.3665061258977609, + "grad_norm": 0.8682212233543396, + "learning_rate": 2.3067429770465246e-05, + "loss": 0.20788288116455078, + "step": 2711 + }, + { + "epoch": 0.36664131812420786, + "grad_norm": 1.17942214012146, + "learning_rate": 2.3061658706854244e-05, + "loss": 0.1511554718017578, + "step": 2712 + }, + { + "epoch": 0.36677651035065484, + "grad_norm": 1.595288634300232, + "learning_rate": 2.3055885964762907e-05, + "loss": 0.21232032775878906, + "step": 2713 + }, + { + "epoch": 0.3669117025771018, + "grad_norm": 0.8747696280479431, + "learning_rate": 2.3050111545393156e-05, + "loss": 0.19792938232421875, + "step": 2714 + }, + { + "epoch": 0.3670468948035488, + "grad_norm": 1.2315130233764648, + "learning_rate": 2.304433544994725e-05, + "loss": 0.20705533027648926, + "step": 2715 + }, + { + "epoch": 0.3671820870299958, + "grad_norm": 1.1320106983184814, + "learning_rate": 2.303855767962781e-05, + "loss": 0.24963951110839844, + "step": 2716 + }, + { + "epoch": 0.36731727925644275, + "grad_norm": 0.7152423858642578, + "learning_rate": 2.303277823563779e-05, + "loss": 0.12834644317626953, + "step": 2717 + }, + { + "epoch": 0.36745247148288973, + "grad_norm": 0.7315583825111389, + "learning_rate": 2.3026997119180507e-05, + "loss": 0.1488189697265625, + "step": 2718 + }, + { + "epoch": 0.3675876637093367, + "grad_norm": 1.0991500616073608, + "learning_rate": 2.3021214331459616e-05, + "loss": 0.1364074945449829, + "step": 2719 + }, + { + "epoch": 0.3677228559357837, + "grad_norm": 1.0755302906036377, + "learning_rate": 2.301542987367913e-05, + "loss": 0.1743030548095703, + "step": 2720 + }, + { + "epoch": 0.36785804816223067, + "grad_norm": 1.206933856010437, + "learning_rate": 2.3009643747043403e-05, + "loss": 0.19913291931152344, + "step": 2721 + }, + { + "epoch": 0.36799324038867764, + "grad_norm": 1.1955817937850952, + "learning_rate": 2.3003855952757132e-05, + "loss": 0.1390678882598877, + "step": 2722 + }, + { + "epoch": 0.3681284326151246, + "grad_norm": 1.1761120557785034, + "learning_rate": 2.2998066492025372e-05, + "loss": 0.21669769287109375, + "step": 2723 + }, + { + "epoch": 0.3682636248415716, + "grad_norm": 1.9794600009918213, + "learning_rate": 2.2992275366053513e-05, + "loss": 0.17833518981933594, + "step": 2724 + }, + { + "epoch": 0.3683988170680186, + "grad_norm": 1.0502009391784668, + "learning_rate": 2.2986482576047305e-05, + "loss": 0.20044803619384766, + "step": 2725 + }, + { + "epoch": 0.36853400929446556, + "grad_norm": 2.7960731983184814, + "learning_rate": 2.298068812321284e-05, + "loss": 0.2517871856689453, + "step": 2726 + }, + { + "epoch": 0.36866920152091254, + "grad_norm": 2.1069982051849365, + "learning_rate": 2.297489200875654e-05, + "loss": 0.1832141876220703, + "step": 2727 + }, + { + "epoch": 0.3688043937473595, + "grad_norm": 1.8392685651779175, + "learning_rate": 2.2969094233885204e-05, + "loss": 0.17749810218811035, + "step": 2728 + }, + { + "epoch": 0.3689395859738065, + "grad_norm": 0.811008632183075, + "learning_rate": 2.296329479980595e-05, + "loss": 0.16436004638671875, + "step": 2729 + }, + { + "epoch": 0.3690747782002535, + "grad_norm": 0.6516792178153992, + "learning_rate": 2.2957493707726252e-05, + "loss": 0.17687416076660156, + "step": 2730 + }, + { + "epoch": 0.36920997042670045, + "grad_norm": 2.243013858795166, + "learning_rate": 2.2951690958853932e-05, + "loss": 0.19929885864257812, + "step": 2731 + }, + { + "epoch": 0.36934516265314743, + "grad_norm": 1.7597277164459229, + "learning_rate": 2.2945886554397154e-05, + "loss": 0.205718994140625, + "step": 2732 + }, + { + "epoch": 0.3694803548795944, + "grad_norm": 2.5990350246429443, + "learning_rate": 2.294008049556441e-05, + "loss": 0.21671009063720703, + "step": 2733 + }, + { + "epoch": 0.3696155471060414, + "grad_norm": 1.0671941041946411, + "learning_rate": 2.2934272783564577e-05, + "loss": 0.19755804538726807, + "step": 2734 + }, + { + "epoch": 0.36975073933248837, + "grad_norm": 2.4794886112213135, + "learning_rate": 2.2928463419606835e-05, + "loss": 0.19269847869873047, + "step": 2735 + }, + { + "epoch": 0.36988593155893534, + "grad_norm": 1.0212067365646362, + "learning_rate": 2.292265240490073e-05, + "loss": 0.1706867218017578, + "step": 2736 + }, + { + "epoch": 0.3700211237853823, + "grad_norm": 1.8596913814544678, + "learning_rate": 2.2916839740656154e-05, + "loss": 0.18334484100341797, + "step": 2737 + }, + { + "epoch": 0.3701563160118293, + "grad_norm": 1.3385870456695557, + "learning_rate": 2.2911025428083316e-05, + "loss": 0.14020609855651855, + "step": 2738 + }, + { + "epoch": 0.3702915082382763, + "grad_norm": 1.8333052396774292, + "learning_rate": 2.2905209468392798e-05, + "loss": 0.14725017547607422, + "step": 2739 + }, + { + "epoch": 0.37042670046472326, + "grad_norm": 3.6977462768554688, + "learning_rate": 2.2899391862795514e-05, + "loss": 0.2024831771850586, + "step": 2740 + }, + { + "epoch": 0.37056189269117024, + "grad_norm": 1.22323739528656, + "learning_rate": 2.2893572612502718e-05, + "loss": 0.09719038009643555, + "step": 2741 + }, + { + "epoch": 0.3706970849176172, + "grad_norm": 0.8222039341926575, + "learning_rate": 2.2887751718726013e-05, + "loss": 0.16521143913269043, + "step": 2742 + }, + { + "epoch": 0.3708322771440642, + "grad_norm": 1.368192434310913, + "learning_rate": 2.288192918267734e-05, + "loss": 0.14769744873046875, + "step": 2743 + }, + { + "epoch": 0.37096746937051117, + "grad_norm": 1.356164574623108, + "learning_rate": 2.2876105005568974e-05, + "loss": 0.19460105895996094, + "step": 2744 + }, + { + "epoch": 0.37110266159695815, + "grad_norm": 1.188924789428711, + "learning_rate": 2.287027918861355e-05, + "loss": 0.2086029052734375, + "step": 2745 + }, + { + "epoch": 0.37123785382340513, + "grad_norm": 1.7186880111694336, + "learning_rate": 2.2864451733024024e-05, + "loss": 0.1468663215637207, + "step": 2746 + }, + { + "epoch": 0.3713730460498521, + "grad_norm": 3.6514415740966797, + "learning_rate": 2.2858622640013716e-05, + "loss": 0.20410096645355225, + "step": 2747 + }, + { + "epoch": 0.3715082382762991, + "grad_norm": 1.6800239086151123, + "learning_rate": 2.285279191079626e-05, + "loss": 0.27765846252441406, + "step": 2748 + }, + { + "epoch": 0.3716434305027461, + "grad_norm": 1.6330631971359253, + "learning_rate": 2.2846959546585656e-05, + "loss": 0.2166886329650879, + "step": 2749 + }, + { + "epoch": 0.3717786227291931, + "grad_norm": 0.7831001281738281, + "learning_rate": 2.2841125548596225e-05, + "loss": 0.1688394546508789, + "step": 2750 + }, + { + "epoch": 0.3719138149556401, + "grad_norm": 1.1121560335159302, + "learning_rate": 2.2835289918042648e-05, + "loss": 0.18171215057373047, + "step": 2751 + }, + { + "epoch": 0.37204900718208705, + "grad_norm": 4.414970874786377, + "learning_rate": 2.282945265613992e-05, + "loss": 0.2252330780029297, + "step": 2752 + }, + { + "epoch": 0.37218419940853403, + "grad_norm": 1.740721344947815, + "learning_rate": 2.2823613764103406e-05, + "loss": 0.2262563705444336, + "step": 2753 + }, + { + "epoch": 0.372319391634981, + "grad_norm": 1.5421509742736816, + "learning_rate": 2.2817773243148776e-05, + "loss": 0.1658763885498047, + "step": 2754 + }, + { + "epoch": 0.372454583861428, + "grad_norm": 1.3289058208465576, + "learning_rate": 2.2811931094492074e-05, + "loss": 0.14342939853668213, + "step": 2755 + }, + { + "epoch": 0.37258977608787497, + "grad_norm": 1.6568609476089478, + "learning_rate": 2.280608731934966e-05, + "loss": 0.2000293731689453, + "step": 2756 + }, + { + "epoch": 0.37272496831432195, + "grad_norm": 1.32503080368042, + "learning_rate": 2.280024191893823e-05, + "loss": 0.18233680725097656, + "step": 2757 + }, + { + "epoch": 0.3728601605407689, + "grad_norm": 4.2607903480529785, + "learning_rate": 2.279439489447485e-05, + "loss": 0.24362659454345703, + "step": 2758 + }, + { + "epoch": 0.3729953527672159, + "grad_norm": 1.0344898700714111, + "learning_rate": 2.278854624717688e-05, + "loss": 0.16037487983703613, + "step": 2759 + }, + { + "epoch": 0.3731305449936629, + "grad_norm": 2.1257076263427734, + "learning_rate": 2.2782695978262045e-05, + "loss": 0.2311878204345703, + "step": 2760 + }, + { + "epoch": 0.37326573722010986, + "grad_norm": 0.8441314101219177, + "learning_rate": 2.2776844088948406e-05, + "loss": 0.1779491901397705, + "step": 2761 + }, + { + "epoch": 0.37340092944655684, + "grad_norm": 0.8698086738586426, + "learning_rate": 2.2770990580454364e-05, + "loss": 0.20713424682617188, + "step": 2762 + }, + { + "epoch": 0.3735361216730038, + "grad_norm": 1.9039814472198486, + "learning_rate": 2.276513545399864e-05, + "loss": 0.15225887298583984, + "step": 2763 + }, + { + "epoch": 0.3736713138994508, + "grad_norm": 1.314050316810608, + "learning_rate": 2.2759278710800306e-05, + "loss": 0.2066965103149414, + "step": 2764 + }, + { + "epoch": 0.3738065061258978, + "grad_norm": 2.8302485942840576, + "learning_rate": 2.275342035207876e-05, + "loss": 0.21586287021636963, + "step": 2765 + }, + { + "epoch": 0.37394169835234475, + "grad_norm": 1.9034204483032227, + "learning_rate": 2.2747560379053752e-05, + "loss": 0.19992351531982422, + "step": 2766 + }, + { + "epoch": 0.37407689057879173, + "grad_norm": 0.96148282289505, + "learning_rate": 2.2741698792945364e-05, + "loss": 0.1269235610961914, + "step": 2767 + }, + { + "epoch": 0.3742120828052387, + "grad_norm": 1.278441309928894, + "learning_rate": 2.2735835594974003e-05, + "loss": 0.1609492301940918, + "step": 2768 + }, + { + "epoch": 0.3743472750316857, + "grad_norm": 0.8664324283599854, + "learning_rate": 2.272997078636042e-05, + "loss": 0.16405844688415527, + "step": 2769 + }, + { + "epoch": 0.37448246725813267, + "grad_norm": 1.8115465641021729, + "learning_rate": 2.272410436832569e-05, + "loss": 0.1721198558807373, + "step": 2770 + }, + { + "epoch": 0.37461765948457965, + "grad_norm": 0.8689283132553101, + "learning_rate": 2.2718236342091248e-05, + "loss": 0.12782704830169678, + "step": 2771 + }, + { + "epoch": 0.3747528517110266, + "grad_norm": 2.6778218746185303, + "learning_rate": 2.2712366708878838e-05, + "loss": 0.18440723419189453, + "step": 2772 + }, + { + "epoch": 0.3748880439374736, + "grad_norm": 1.32063627243042, + "learning_rate": 2.2706495469910552e-05, + "loss": 0.1354503631591797, + "step": 2773 + }, + { + "epoch": 0.3750232361639206, + "grad_norm": 2.570680618286133, + "learning_rate": 2.2700622626408814e-05, + "loss": 0.14022397994995117, + "step": 2774 + }, + { + "epoch": 0.37515842839036756, + "grad_norm": 1.262670636177063, + "learning_rate": 2.2694748179596375e-05, + "loss": 0.17854344844818115, + "step": 2775 + }, + { + "epoch": 0.37529362061681454, + "grad_norm": 2.77175235748291, + "learning_rate": 2.2688872130696342e-05, + "loss": 0.23230743408203125, + "step": 2776 + }, + { + "epoch": 0.3754288128432615, + "grad_norm": 1.1634495258331299, + "learning_rate": 2.268299448093212e-05, + "loss": 0.16405296325683594, + "step": 2777 + }, + { + "epoch": 0.3755640050697085, + "grad_norm": 2.1187777519226074, + "learning_rate": 2.2677115231527482e-05, + "loss": 0.17717647552490234, + "step": 2778 + }, + { + "epoch": 0.3756991972961555, + "grad_norm": 2.0991415977478027, + "learning_rate": 2.267123438370651e-05, + "loss": 0.16820430755615234, + "step": 2779 + }, + { + "epoch": 0.37583438952260245, + "grad_norm": 4.217456340789795, + "learning_rate": 2.266535193869363e-05, + "loss": 0.19469070434570312, + "step": 2780 + }, + { + "epoch": 0.37596958174904943, + "grad_norm": 3.2345173358917236, + "learning_rate": 2.2659467897713604e-05, + "loss": 0.13300704956054688, + "step": 2781 + }, + { + "epoch": 0.3761047739754964, + "grad_norm": 1.4967330694198608, + "learning_rate": 2.2653582261991516e-05, + "loss": 0.1920604705810547, + "step": 2782 + }, + { + "epoch": 0.3762399662019434, + "grad_norm": 0.719992995262146, + "learning_rate": 2.2647695032752785e-05, + "loss": 0.1298656463623047, + "step": 2783 + }, + { + "epoch": 0.37637515842839037, + "grad_norm": 1.2852716445922852, + "learning_rate": 2.264180621122317e-05, + "loss": 0.17065811157226562, + "step": 2784 + }, + { + "epoch": 0.37651035065483734, + "grad_norm": 1.0930293798446655, + "learning_rate": 2.2635915798628747e-05, + "loss": 0.2602882385253906, + "step": 2785 + }, + { + "epoch": 0.3766455428812843, + "grad_norm": 1.5484962463378906, + "learning_rate": 2.2630023796195932e-05, + "loss": 0.19918441772460938, + "step": 2786 + }, + { + "epoch": 0.3767807351077313, + "grad_norm": 1.6997473239898682, + "learning_rate": 2.262413020515148e-05, + "loss": 0.17521190643310547, + "step": 2787 + }, + { + "epoch": 0.3769159273341783, + "grad_norm": 1.4968205690383911, + "learning_rate": 2.261823502672246e-05, + "loss": 0.19996929168701172, + "step": 2788 + }, + { + "epoch": 0.37705111956062526, + "grad_norm": 1.352866768836975, + "learning_rate": 2.261233826213628e-05, + "loss": 0.14621496200561523, + "step": 2789 + }, + { + "epoch": 0.37718631178707224, + "grad_norm": 3.2397778034210205, + "learning_rate": 2.2606439912620688e-05, + "loss": 0.20677757263183594, + "step": 2790 + }, + { + "epoch": 0.3773215040135192, + "grad_norm": 1.1323494911193848, + "learning_rate": 2.2600539979403734e-05, + "loss": 0.2451915740966797, + "step": 2791 + }, + { + "epoch": 0.3774566962399662, + "grad_norm": 1.2870954275131226, + "learning_rate": 2.259463846371383e-05, + "loss": 0.18496131896972656, + "step": 2792 + }, + { + "epoch": 0.3775918884664132, + "grad_norm": 0.7410619258880615, + "learning_rate": 2.2588735366779698e-05, + "loss": 0.15869379043579102, + "step": 2793 + }, + { + "epoch": 0.37772708069286015, + "grad_norm": 0.7197501063346863, + "learning_rate": 2.2582830689830394e-05, + "loss": 0.1327822208404541, + "step": 2794 + }, + { + "epoch": 0.37786227291930713, + "grad_norm": 0.5694572925567627, + "learning_rate": 2.2576924434095305e-05, + "loss": 0.14859962463378906, + "step": 2795 + }, + { + "epoch": 0.3779974651457541, + "grad_norm": 0.8164991736412048, + "learning_rate": 2.257101660080414e-05, + "loss": 0.14496421813964844, + "step": 2796 + }, + { + "epoch": 0.3781326573722011, + "grad_norm": 2.050577402114868, + "learning_rate": 2.256510719118695e-05, + "loss": 0.17336273193359375, + "step": 2797 + }, + { + "epoch": 0.37826784959864806, + "grad_norm": 0.8579196929931641, + "learning_rate": 2.2559196206474094e-05, + "loss": 0.13872623443603516, + "step": 2798 + }, + { + "epoch": 0.37840304182509504, + "grad_norm": 2.3823320865631104, + "learning_rate": 2.2553283647896287e-05, + "loss": 0.14835309982299805, + "step": 2799 + }, + { + "epoch": 0.378538234051542, + "grad_norm": 2.0684659481048584, + "learning_rate": 2.254736951668454e-05, + "loss": 0.17647743225097656, + "step": 2800 + }, + { + "epoch": 0.378673426277989, + "grad_norm": 0.877079963684082, + "learning_rate": 2.2541453814070212e-05, + "loss": 0.18906688690185547, + "step": 2801 + }, + { + "epoch": 0.378808618504436, + "grad_norm": 1.0572319030761719, + "learning_rate": 2.2535536541284983e-05, + "loss": 0.18941116333007812, + "step": 2802 + }, + { + "epoch": 0.37894381073088296, + "grad_norm": 2.116623640060425, + "learning_rate": 2.2529617699560857e-05, + "loss": 0.15986156463623047, + "step": 2803 + }, + { + "epoch": 0.37907900295732994, + "grad_norm": 2.5547878742218018, + "learning_rate": 2.2523697290130185e-05, + "loss": 0.1714191436767578, + "step": 2804 + }, + { + "epoch": 0.3792141951837769, + "grad_norm": 1.0090625286102295, + "learning_rate": 2.251777531422561e-05, + "loss": 0.20269203186035156, + "step": 2805 + }, + { + "epoch": 0.3793493874102239, + "grad_norm": 1.1625059843063354, + "learning_rate": 2.2511851773080127e-05, + "loss": 0.2607383728027344, + "step": 2806 + }, + { + "epoch": 0.37948457963667087, + "grad_norm": 0.9749452471733093, + "learning_rate": 2.2505926667927043e-05, + "loss": 0.16684675216674805, + "step": 2807 + }, + { + "epoch": 0.37961977186311785, + "grad_norm": 2.202070713043213, + "learning_rate": 2.25e-05, + "loss": 0.17569732666015625, + "step": 2808 + }, + { + "epoch": 0.37975496408956483, + "grad_norm": 0.9541611671447754, + "learning_rate": 2.2494071770532966e-05, + "loss": 0.16417407989501953, + "step": 2809 + }, + { + "epoch": 0.3798901563160118, + "grad_norm": 1.2085424661636353, + "learning_rate": 2.2488141980760223e-05, + "loss": 0.1471109390258789, + "step": 2810 + }, + { + "epoch": 0.3800253485424588, + "grad_norm": 1.2389203310012817, + "learning_rate": 2.248221063191639e-05, + "loss": 0.22618675231933594, + "step": 2811 + }, + { + "epoch": 0.38016054076890576, + "grad_norm": 1.2779831886291504, + "learning_rate": 2.24762777252364e-05, + "loss": 0.16464471817016602, + "step": 2812 + }, + { + "epoch": 0.38029573299535274, + "grad_norm": 0.9568815231323242, + "learning_rate": 2.2470343261955525e-05, + "loss": 0.1331338882446289, + "step": 2813 + }, + { + "epoch": 0.3804309252217997, + "grad_norm": 0.8668561577796936, + "learning_rate": 2.246440724330934e-05, + "loss": 0.1792125701904297, + "step": 2814 + }, + { + "epoch": 0.3805661174482467, + "grad_norm": 2.066789150238037, + "learning_rate": 2.2458469670533765e-05, + "loss": 0.13564109802246094, + "step": 2815 + }, + { + "epoch": 0.3807013096746937, + "grad_norm": 1.1422011852264404, + "learning_rate": 2.2452530544865034e-05, + "loss": 0.20665264129638672, + "step": 2816 + }, + { + "epoch": 0.3808365019011407, + "grad_norm": 0.8526986837387085, + "learning_rate": 2.24465898675397e-05, + "loss": 0.17189264297485352, + "step": 2817 + }, + { + "epoch": 0.3809716941275877, + "grad_norm": 0.8208529949188232, + "learning_rate": 2.244064763979464e-05, + "loss": 0.17484569549560547, + "step": 2818 + }, + { + "epoch": 0.38110688635403467, + "grad_norm": 1.3694156408309937, + "learning_rate": 2.2434703862867068e-05, + "loss": 0.13419246673583984, + "step": 2819 + }, + { + "epoch": 0.38124207858048165, + "grad_norm": 0.882036566734314, + "learning_rate": 2.2428758537994504e-05, + "loss": 0.14685344696044922, + "step": 2820 + }, + { + "epoch": 0.3813772708069286, + "grad_norm": 1.2096648216247559, + "learning_rate": 2.24228116664148e-05, + "loss": 0.19815731048583984, + "step": 2821 + }, + { + "epoch": 0.3815124630333756, + "grad_norm": 2.187608242034912, + "learning_rate": 2.2416863249366125e-05, + "loss": 0.2264232635498047, + "step": 2822 + }, + { + "epoch": 0.3816476552598226, + "grad_norm": 2.656158685684204, + "learning_rate": 2.241091328808696e-05, + "loss": 0.1796717643737793, + "step": 2823 + }, + { + "epoch": 0.38178284748626956, + "grad_norm": 1.072092890739441, + "learning_rate": 2.240496178381614e-05, + "loss": 0.1416279673576355, + "step": 2824 + }, + { + "epoch": 0.38191803971271654, + "grad_norm": 1.5303592681884766, + "learning_rate": 2.239900873779278e-05, + "loss": 0.19420289993286133, + "step": 2825 + }, + { + "epoch": 0.3820532319391635, + "grad_norm": 0.7878977656364441, + "learning_rate": 2.2393054151256352e-05, + "loss": 0.14476585388183594, + "step": 2826 + }, + { + "epoch": 0.3821884241656105, + "grad_norm": 3.1602046489715576, + "learning_rate": 2.238709802544662e-05, + "loss": 0.224456787109375, + "step": 2827 + }, + { + "epoch": 0.3823236163920575, + "grad_norm": 1.2856125831604004, + "learning_rate": 2.2381140361603686e-05, + "loss": 0.2051258087158203, + "step": 2828 + }, + { + "epoch": 0.38245880861850445, + "grad_norm": 1.5704478025436401, + "learning_rate": 2.237518116096797e-05, + "loss": 0.168792724609375, + "step": 2829 + }, + { + "epoch": 0.38259400084495143, + "grad_norm": 1.0216450691223145, + "learning_rate": 2.2369220424780203e-05, + "loss": 0.18901348114013672, + "step": 2830 + }, + { + "epoch": 0.3827291930713984, + "grad_norm": 1.7724815607070923, + "learning_rate": 2.2363258154281452e-05, + "loss": 0.24583053588867188, + "step": 2831 + }, + { + "epoch": 0.3828643852978454, + "grad_norm": 0.8602866530418396, + "learning_rate": 2.2357294350713088e-05, + "loss": 0.14009666442871094, + "step": 2832 + }, + { + "epoch": 0.38299957752429237, + "grad_norm": 1.2809284925460815, + "learning_rate": 2.2351329015316802e-05, + "loss": 0.1760730743408203, + "step": 2833 + }, + { + "epoch": 0.38313476975073935, + "grad_norm": 1.180512547492981, + "learning_rate": 2.2345362149334613e-05, + "loss": 0.23828506469726562, + "step": 2834 + }, + { + "epoch": 0.3832699619771863, + "grad_norm": 1.7512367963790894, + "learning_rate": 2.2339393754008854e-05, + "loss": 0.22279834747314453, + "step": 2835 + }, + { + "epoch": 0.3834051542036333, + "grad_norm": 1.1981666088104248, + "learning_rate": 2.233342383058218e-05, + "loss": 0.1991875171661377, + "step": 2836 + }, + { + "epoch": 0.3835403464300803, + "grad_norm": 1.8916113376617432, + "learning_rate": 2.2327452380297554e-05, + "loss": 0.13935136795043945, + "step": 2837 + }, + { + "epoch": 0.38367553865652726, + "grad_norm": 1.4282495975494385, + "learning_rate": 2.232147940439827e-05, + "loss": 0.19857406616210938, + "step": 2838 + }, + { + "epoch": 0.38381073088297424, + "grad_norm": 1.0461626052856445, + "learning_rate": 2.2315504904127936e-05, + "loss": 0.21667861938476562, + "step": 2839 + }, + { + "epoch": 0.3839459231094212, + "grad_norm": 2.026803731918335, + "learning_rate": 2.2309528880730463e-05, + "loss": 0.21555709838867188, + "step": 2840 + }, + { + "epoch": 0.3840811153358682, + "grad_norm": 0.7633315920829773, + "learning_rate": 2.2303551335450096e-05, + "loss": 0.14227962493896484, + "step": 2841 + }, + { + "epoch": 0.3842163075623152, + "grad_norm": 0.7599472403526306, + "learning_rate": 2.2297572269531398e-05, + "loss": 0.16444778442382812, + "step": 2842 + }, + { + "epoch": 0.38435149978876215, + "grad_norm": 0.9680823683738708, + "learning_rate": 2.2291591684219243e-05, + "loss": 0.14874744415283203, + "step": 2843 + }, + { + "epoch": 0.38448669201520913, + "grad_norm": 2.3640425205230713, + "learning_rate": 2.2285609580758806e-05, + "loss": 0.1773529052734375, + "step": 2844 + }, + { + "epoch": 0.3846218842416561, + "grad_norm": 0.7894426584243774, + "learning_rate": 2.227962596039561e-05, + "loss": 0.14577817916870117, + "step": 2845 + }, + { + "epoch": 0.3847570764681031, + "grad_norm": 1.8325927257537842, + "learning_rate": 2.2273640824375462e-05, + "loss": 0.15846896171569824, + "step": 2846 + }, + { + "epoch": 0.38489226869455007, + "grad_norm": 0.8720874190330505, + "learning_rate": 2.2267654173944515e-05, + "loss": 0.1635441780090332, + "step": 2847 + }, + { + "epoch": 0.38502746092099704, + "grad_norm": 1.0309906005859375, + "learning_rate": 2.2261666010349212e-05, + "loss": 0.16109275817871094, + "step": 2848 + }, + { + "epoch": 0.385162653147444, + "grad_norm": 1.1663784980773926, + "learning_rate": 2.2255676334836317e-05, + "loss": 0.19454479217529297, + "step": 2849 + }, + { + "epoch": 0.385297845373891, + "grad_norm": 2.7666244506835938, + "learning_rate": 2.2249685148652917e-05, + "loss": 0.19118833541870117, + "step": 2850 + }, + { + "epoch": 0.385433037600338, + "grad_norm": 3.6009457111358643, + "learning_rate": 2.224369245304641e-05, + "loss": 0.21427249908447266, + "step": 2851 + }, + { + "epoch": 0.38556822982678496, + "grad_norm": 2.510897159576416, + "learning_rate": 2.2237698249264507e-05, + "loss": 0.19642353057861328, + "step": 2852 + }, + { + "epoch": 0.38570342205323194, + "grad_norm": 2.7096643447875977, + "learning_rate": 2.2231702538555235e-05, + "loss": 0.18151569366455078, + "step": 2853 + }, + { + "epoch": 0.3858386142796789, + "grad_norm": 0.8593248724937439, + "learning_rate": 2.2225705322166928e-05, + "loss": 0.17100143432617188, + "step": 2854 + }, + { + "epoch": 0.3859738065061259, + "grad_norm": 1.2871543169021606, + "learning_rate": 2.2219706601348242e-05, + "loss": 0.1817951202392578, + "step": 2855 + }, + { + "epoch": 0.3861089987325729, + "grad_norm": 0.8620615005493164, + "learning_rate": 2.221370637734814e-05, + "loss": 0.17870712280273438, + "step": 2856 + }, + { + "epoch": 0.38624419095901985, + "grad_norm": 1.0389586687088013, + "learning_rate": 2.22077046514159e-05, + "loss": 0.18694639205932617, + "step": 2857 + }, + { + "epoch": 0.38637938318546683, + "grad_norm": 1.508880615234375, + "learning_rate": 2.220170142480112e-05, + "loss": 0.17932653427124023, + "step": 2858 + }, + { + "epoch": 0.3865145754119138, + "grad_norm": 0.8926329016685486, + "learning_rate": 2.2195696698753695e-05, + "loss": 0.15302467346191406, + "step": 2859 + }, + { + "epoch": 0.3866497676383608, + "grad_norm": 0.655914843082428, + "learning_rate": 2.2189690474523844e-05, + "loss": 0.11203479766845703, + "step": 2860 + }, + { + "epoch": 0.38678495986480776, + "grad_norm": 1.0332105159759521, + "learning_rate": 2.21836827533621e-05, + "loss": 0.20537948608398438, + "step": 2861 + }, + { + "epoch": 0.38692015209125474, + "grad_norm": 0.8247001767158508, + "learning_rate": 2.2177673536519297e-05, + "loss": 0.1389141082763672, + "step": 2862 + }, + { + "epoch": 0.3870553443177017, + "grad_norm": 1.8151029348373413, + "learning_rate": 2.217166282524659e-05, + "loss": 0.21303224563598633, + "step": 2863 + }, + { + "epoch": 0.3871905365441487, + "grad_norm": 2.5937812328338623, + "learning_rate": 2.216565062079544e-05, + "loss": 0.24167728424072266, + "step": 2864 + }, + { + "epoch": 0.3873257287705957, + "grad_norm": 2.4652202129364014, + "learning_rate": 2.2159636924417612e-05, + "loss": 0.15409326553344727, + "step": 2865 + }, + { + "epoch": 0.38746092099704266, + "grad_norm": 0.8946840763092041, + "learning_rate": 2.2153621737365205e-05, + "loss": 0.18581390380859375, + "step": 2866 + }, + { + "epoch": 0.38759611322348964, + "grad_norm": 1.3004682064056396, + "learning_rate": 2.2147605060890598e-05, + "loss": 0.18044090270996094, + "step": 2867 + }, + { + "epoch": 0.3877313054499366, + "grad_norm": 1.2560685873031616, + "learning_rate": 2.2141586896246503e-05, + "loss": 0.13004255294799805, + "step": 2868 + }, + { + "epoch": 0.3878664976763836, + "grad_norm": 1.7169181108474731, + "learning_rate": 2.2135567244685933e-05, + "loss": 0.17757415771484375, + "step": 2869 + }, + { + "epoch": 0.38800168990283057, + "grad_norm": 1.2001307010650635, + "learning_rate": 2.2129546107462214e-05, + "loss": 0.2307891845703125, + "step": 2870 + }, + { + "epoch": 0.38813688212927755, + "grad_norm": 2.2192158699035645, + "learning_rate": 2.212352348582897e-05, + "loss": 0.20961380004882812, + "step": 2871 + }, + { + "epoch": 0.38827207435572453, + "grad_norm": 0.6095111966133118, + "learning_rate": 2.2117499381040157e-05, + "loss": 0.14725971221923828, + "step": 2872 + }, + { + "epoch": 0.3884072665821715, + "grad_norm": 0.7406976819038391, + "learning_rate": 2.211147379435001e-05, + "loss": 0.16102075576782227, + "step": 2873 + }, + { + "epoch": 0.3885424588086185, + "grad_norm": 1.516456127166748, + "learning_rate": 2.2105446727013098e-05, + "loss": 0.2035961151123047, + "step": 2874 + }, + { + "epoch": 0.38867765103506546, + "grad_norm": 1.0755302906036377, + "learning_rate": 2.209941818028429e-05, + "loss": 0.1721811294555664, + "step": 2875 + }, + { + "epoch": 0.38881284326151244, + "grad_norm": 1.366456389427185, + "learning_rate": 2.2093388155418757e-05, + "loss": 0.20836257934570312, + "step": 2876 + }, + { + "epoch": 0.3889480354879594, + "grad_norm": 0.9442431330680847, + "learning_rate": 2.2087356653671982e-05, + "loss": 0.19383478164672852, + "step": 2877 + }, + { + "epoch": 0.3890832277144064, + "grad_norm": 1.488297700881958, + "learning_rate": 2.2081323676299756e-05, + "loss": 0.1557302474975586, + "step": 2878 + }, + { + "epoch": 0.3892184199408534, + "grad_norm": 3.0893261432647705, + "learning_rate": 2.207528922455818e-05, + "loss": 0.15931320190429688, + "step": 2879 + }, + { + "epoch": 0.38935361216730036, + "grad_norm": 0.9239708781242371, + "learning_rate": 2.206925329970366e-05, + "loss": 0.13497304916381836, + "step": 2880 + }, + { + "epoch": 0.38948880439374733, + "grad_norm": 1.2790453433990479, + "learning_rate": 2.20632159029929e-05, + "loss": 0.249176025390625, + "step": 2881 + }, + { + "epoch": 0.3896239966201943, + "grad_norm": 0.8639877438545227, + "learning_rate": 2.2057177035682926e-05, + "loss": 0.17080211639404297, + "step": 2882 + }, + { + "epoch": 0.3897591888466413, + "grad_norm": 1.3367589712142944, + "learning_rate": 2.2051136699031058e-05, + "loss": 0.18737030029296875, + "step": 2883 + }, + { + "epoch": 0.38989438107308827, + "grad_norm": 1.1957826614379883, + "learning_rate": 2.2045094894294933e-05, + "loss": 0.17303848266601562, + "step": 2884 + }, + { + "epoch": 0.3900295732995353, + "grad_norm": 2.638127326965332, + "learning_rate": 2.203905162273248e-05, + "loss": 0.14135169982910156, + "step": 2885 + }, + { + "epoch": 0.3901647655259823, + "grad_norm": 1.1049944162368774, + "learning_rate": 2.203300688560194e-05, + "loss": 0.23536300659179688, + "step": 2886 + }, + { + "epoch": 0.39029995775242926, + "grad_norm": 2.236567974090576, + "learning_rate": 2.2026960684161862e-05, + "loss": 0.15195465087890625, + "step": 2887 + }, + { + "epoch": 0.39043514997887624, + "grad_norm": 0.8347408175468445, + "learning_rate": 2.2020913019671097e-05, + "loss": 0.1467151641845703, + "step": 2888 + }, + { + "epoch": 0.3905703422053232, + "grad_norm": 2.1340439319610596, + "learning_rate": 2.20148638933888e-05, + "loss": 0.166839599609375, + "step": 2889 + }, + { + "epoch": 0.3907055344317702, + "grad_norm": 1.1151121854782104, + "learning_rate": 2.2008813306574438e-05, + "loss": 0.16907787322998047, + "step": 2890 + }, + { + "epoch": 0.3908407266582172, + "grad_norm": 1.3802108764648438, + "learning_rate": 2.200276126048777e-05, + "loss": 0.22904014587402344, + "step": 2891 + }, + { + "epoch": 0.39097591888466415, + "grad_norm": 1.897668480873108, + "learning_rate": 2.199670775638886e-05, + "loss": 0.2270374298095703, + "step": 2892 + }, + { + "epoch": 0.39111111111111113, + "grad_norm": 1.322569489479065, + "learning_rate": 2.1990652795538085e-05, + "loss": 0.13013172149658203, + "step": 2893 + }, + { + "epoch": 0.3912463033375581, + "grad_norm": 1.019840955734253, + "learning_rate": 2.1984596379196117e-05, + "loss": 0.13911151885986328, + "step": 2894 + }, + { + "epoch": 0.3913814955640051, + "grad_norm": 1.2421302795410156, + "learning_rate": 2.1978538508623942e-05, + "loss": 0.18236064910888672, + "step": 2895 + }, + { + "epoch": 0.39151668779045207, + "grad_norm": 0.871039867401123, + "learning_rate": 2.197247918508283e-05, + "loss": 0.11935877799987793, + "step": 2896 + }, + { + "epoch": 0.39165188001689905, + "grad_norm": 2.6692447662353516, + "learning_rate": 2.1966418409834374e-05, + "loss": 0.21228408813476562, + "step": 2897 + }, + { + "epoch": 0.391787072243346, + "grad_norm": 1.360025405883789, + "learning_rate": 2.1960356184140453e-05, + "loss": 0.1808757781982422, + "step": 2898 + }, + { + "epoch": 0.391922264469793, + "grad_norm": 0.9737027287483215, + "learning_rate": 2.1954292509263258e-05, + "loss": 0.1680135726928711, + "step": 2899 + }, + { + "epoch": 0.39205745669624, + "grad_norm": 1.5282942056655884, + "learning_rate": 2.194822738646528e-05, + "loss": 0.12001419067382812, + "step": 2900 + }, + { + "epoch": 0.39219264892268696, + "grad_norm": 1.55282461643219, + "learning_rate": 2.1942160817009304e-05, + "loss": 0.12455630302429199, + "step": 2901 + }, + { + "epoch": 0.39232784114913394, + "grad_norm": 0.8353968262672424, + "learning_rate": 2.193609280215843e-05, + "loss": 0.16773700714111328, + "step": 2902 + }, + { + "epoch": 0.3924630333755809, + "grad_norm": 1.0280730724334717, + "learning_rate": 2.1930023343176044e-05, + "loss": 0.20391559600830078, + "step": 2903 + }, + { + "epoch": 0.3925982256020279, + "grad_norm": 0.9376540780067444, + "learning_rate": 2.1923952441325837e-05, + "loss": 0.1752481460571289, + "step": 2904 + }, + { + "epoch": 0.3927334178284749, + "grad_norm": 0.9284510612487793, + "learning_rate": 2.191788009787182e-05, + "loss": 0.18593358993530273, + "step": 2905 + }, + { + "epoch": 0.39286861005492185, + "grad_norm": 1.2415282726287842, + "learning_rate": 2.1911806314078267e-05, + "loss": 0.22683334350585938, + "step": 2906 + }, + { + "epoch": 0.39300380228136883, + "grad_norm": 1.5311275720596313, + "learning_rate": 2.1905731091209786e-05, + "loss": 0.2314624786376953, + "step": 2907 + }, + { + "epoch": 0.3931389945078158, + "grad_norm": 2.4889657497406006, + "learning_rate": 2.1899654430531262e-05, + "loss": 0.20072174072265625, + "step": 2908 + }, + { + "epoch": 0.3932741867342628, + "grad_norm": 2.7938175201416016, + "learning_rate": 2.18935763333079e-05, + "loss": 0.20580291748046875, + "step": 2909 + }, + { + "epoch": 0.39340937896070977, + "grad_norm": 2.0887486934661865, + "learning_rate": 2.1887496800805175e-05, + "loss": 0.1824359893798828, + "step": 2910 + }, + { + "epoch": 0.39354457118715674, + "grad_norm": 2.5393736362457275, + "learning_rate": 2.188141583428889e-05, + "loss": 0.2472076416015625, + "step": 2911 + }, + { + "epoch": 0.3936797634136037, + "grad_norm": 0.8902279138565063, + "learning_rate": 2.1875333435025138e-05, + "loss": 0.08413231372833252, + "step": 2912 + }, + { + "epoch": 0.3938149556400507, + "grad_norm": 0.8436653017997742, + "learning_rate": 2.1869249604280296e-05, + "loss": 0.1276235580444336, + "step": 2913 + }, + { + "epoch": 0.3939501478664977, + "grad_norm": 1.749403715133667, + "learning_rate": 2.1863164343321057e-05, + "loss": 0.19369029998779297, + "step": 2914 + }, + { + "epoch": 0.39408534009294466, + "grad_norm": 1.6402604579925537, + "learning_rate": 2.1857077653414397e-05, + "loss": 0.21311521530151367, + "step": 2915 + }, + { + "epoch": 0.39422053231939164, + "grad_norm": 2.640350580215454, + "learning_rate": 2.185098953582761e-05, + "loss": 0.22010326385498047, + "step": 2916 + }, + { + "epoch": 0.3943557245458386, + "grad_norm": 1.7663654088974, + "learning_rate": 2.1844899991828265e-05, + "loss": 0.218597412109375, + "step": 2917 + }, + { + "epoch": 0.3944909167722856, + "grad_norm": 1.1774868965148926, + "learning_rate": 2.1838809022684247e-05, + "loss": 0.17959117889404297, + "step": 2918 + }, + { + "epoch": 0.39462610899873257, + "grad_norm": 1.1452014446258545, + "learning_rate": 2.1832716629663712e-05, + "loss": 0.14960861206054688, + "step": 2919 + }, + { + "epoch": 0.39476130122517955, + "grad_norm": 1.0479589700698853, + "learning_rate": 2.1826622814035138e-05, + "loss": 0.19725799560546875, + "step": 2920 + }, + { + "epoch": 0.39489649345162653, + "grad_norm": 0.906482994556427, + "learning_rate": 2.1820527577067293e-05, + "loss": 0.14765214920043945, + "step": 2921 + }, + { + "epoch": 0.3950316856780735, + "grad_norm": 2.3501925468444824, + "learning_rate": 2.1814430920029238e-05, + "loss": 0.24921202659606934, + "step": 2922 + }, + { + "epoch": 0.3951668779045205, + "grad_norm": 2.188354969024658, + "learning_rate": 2.1808332844190325e-05, + "loss": 0.19327163696289062, + "step": 2923 + }, + { + "epoch": 0.39530207013096746, + "grad_norm": 2.953916072845459, + "learning_rate": 2.1802233350820203e-05, + "loss": 0.1996135711669922, + "step": 2924 + }, + { + "epoch": 0.39543726235741444, + "grad_norm": 2.9287643432617188, + "learning_rate": 2.179613244118883e-05, + "loss": 0.19552922248840332, + "step": 2925 + }, + { + "epoch": 0.3955724545838614, + "grad_norm": 1.8541208505630493, + "learning_rate": 2.1790030116566436e-05, + "loss": 0.18764495849609375, + "step": 2926 + }, + { + "epoch": 0.3957076468103084, + "grad_norm": 1.2976611852645874, + "learning_rate": 2.1783926378223563e-05, + "loss": 0.19126343727111816, + "step": 2927 + }, + { + "epoch": 0.3958428390367554, + "grad_norm": 2.2463831901550293, + "learning_rate": 2.1777821227431048e-05, + "loss": 0.15668010711669922, + "step": 2928 + }, + { + "epoch": 0.39597803126320236, + "grad_norm": 1.0793263912200928, + "learning_rate": 2.1771714665460005e-05, + "loss": 0.21531438827514648, + "step": 2929 + }, + { + "epoch": 0.39611322348964934, + "grad_norm": 1.7339545488357544, + "learning_rate": 2.1765606693581857e-05, + "loss": 0.16473960876464844, + "step": 2930 + }, + { + "epoch": 0.3962484157160963, + "grad_norm": 3.6025266647338867, + "learning_rate": 2.1759497313068316e-05, + "loss": 0.23078536987304688, + "step": 2931 + }, + { + "epoch": 0.3963836079425433, + "grad_norm": 1.894962191581726, + "learning_rate": 2.175338652519139e-05, + "loss": 0.1701984405517578, + "step": 2932 + }, + { + "epoch": 0.39651880016899027, + "grad_norm": 1.0657076835632324, + "learning_rate": 2.1747274331223377e-05, + "loss": 0.13462257385253906, + "step": 2933 + }, + { + "epoch": 0.39665399239543725, + "grad_norm": 1.743187427520752, + "learning_rate": 2.1741160732436865e-05, + "loss": 0.1734457015991211, + "step": 2934 + }, + { + "epoch": 0.39678918462188423, + "grad_norm": 1.6830084323883057, + "learning_rate": 2.1735045730104746e-05, + "loss": 0.2747783660888672, + "step": 2935 + }, + { + "epoch": 0.3969243768483312, + "grad_norm": 0.7709751725196838, + "learning_rate": 2.1728929325500183e-05, + "loss": 0.15062332153320312, + "step": 2936 + }, + { + "epoch": 0.3970595690747782, + "grad_norm": 0.9073984622955322, + "learning_rate": 2.1722811519896654e-05, + "loss": 0.1971604824066162, + "step": 2937 + }, + { + "epoch": 0.39719476130122516, + "grad_norm": 0.7754150032997131, + "learning_rate": 2.171669231456792e-05, + "loss": 0.17679405212402344, + "step": 2938 + }, + { + "epoch": 0.39732995352767214, + "grad_norm": 1.0466722249984741, + "learning_rate": 2.1710571710788025e-05, + "loss": 0.18363523483276367, + "step": 2939 + }, + { + "epoch": 0.3974651457541191, + "grad_norm": 0.5604305863380432, + "learning_rate": 2.1704449709831312e-05, + "loss": 0.13359642028808594, + "step": 2940 + }, + { + "epoch": 0.3976003379805661, + "grad_norm": 0.877889096736908, + "learning_rate": 2.1698326312972423e-05, + "loss": 0.1728067398071289, + "step": 2941 + }, + { + "epoch": 0.3977355302070131, + "grad_norm": 0.6807836294174194, + "learning_rate": 2.1692201521486268e-05, + "loss": 0.12730789184570312, + "step": 2942 + }, + { + "epoch": 0.39787072243346006, + "grad_norm": 1.5109617710113525, + "learning_rate": 2.1686075336648075e-05, + "loss": 0.19756031036376953, + "step": 2943 + }, + { + "epoch": 0.39800591465990703, + "grad_norm": 1.3805861473083496, + "learning_rate": 2.167994775973334e-05, + "loss": 0.1860179901123047, + "step": 2944 + }, + { + "epoch": 0.398141106886354, + "grad_norm": 0.9127232432365417, + "learning_rate": 2.167381879201786e-05, + "loss": 0.1761932373046875, + "step": 2945 + }, + { + "epoch": 0.398276299112801, + "grad_norm": 1.7958850860595703, + "learning_rate": 2.166768843477772e-05, + "loss": 0.19452857971191406, + "step": 2946 + }, + { + "epoch": 0.39841149133924797, + "grad_norm": 2.4171359539031982, + "learning_rate": 2.166155668928929e-05, + "loss": 0.19708538055419922, + "step": 2947 + }, + { + "epoch": 0.39854668356569495, + "grad_norm": 2.2775895595550537, + "learning_rate": 2.1655423556829233e-05, + "loss": 0.1575450897216797, + "step": 2948 + }, + { + "epoch": 0.3986818757921419, + "grad_norm": 1.1040598154067993, + "learning_rate": 2.1649289038674504e-05, + "loss": 0.14584064483642578, + "step": 2949 + }, + { + "epoch": 0.3988170680185889, + "grad_norm": 2.0849831104278564, + "learning_rate": 2.1643153136102333e-05, + "loss": 0.17862319946289062, + "step": 2950 + }, + { + "epoch": 0.3989522602450359, + "grad_norm": 0.9875862002372742, + "learning_rate": 2.1637015850390255e-05, + "loss": 0.15175437927246094, + "step": 2951 + }, + { + "epoch": 0.3990874524714829, + "grad_norm": 1.0719780921936035, + "learning_rate": 2.1630877182816087e-05, + "loss": 0.18024969100952148, + "step": 2952 + }, + { + "epoch": 0.3992226446979299, + "grad_norm": 1.6491544246673584, + "learning_rate": 2.162473713465793e-05, + "loss": 0.1746959686279297, + "step": 2953 + }, + { + "epoch": 0.3993578369243769, + "grad_norm": 1.344290018081665, + "learning_rate": 2.161859570719417e-05, + "loss": 0.1865081787109375, + "step": 2954 + }, + { + "epoch": 0.39949302915082385, + "grad_norm": 1.221110224723816, + "learning_rate": 2.161245290170349e-05, + "loss": 0.24373531341552734, + "step": 2955 + }, + { + "epoch": 0.39962822137727083, + "grad_norm": 0.9813793301582336, + "learning_rate": 2.1606308719464858e-05, + "loss": 0.217193603515625, + "step": 2956 + }, + { + "epoch": 0.3997634136037178, + "grad_norm": 1.84911048412323, + "learning_rate": 2.160016316175752e-05, + "loss": 0.22316360473632812, + "step": 2957 + }, + { + "epoch": 0.3998986058301648, + "grad_norm": 0.7422873377799988, + "learning_rate": 2.159401622986101e-05, + "loss": 0.17644762992858887, + "step": 2958 + }, + { + "epoch": 0.40003379805661177, + "grad_norm": 0.6138271689414978, + "learning_rate": 2.1587867925055165e-05, + "loss": 0.12640142440795898, + "step": 2959 + }, + { + "epoch": 0.40016899028305875, + "grad_norm": 1.374695897102356, + "learning_rate": 2.158171824862008e-05, + "loss": 0.17972755432128906, + "step": 2960 + }, + { + "epoch": 0.4003041825095057, + "grad_norm": 1.806179165840149, + "learning_rate": 2.157556720183616e-05, + "loss": 0.16175079345703125, + "step": 2961 + }, + { + "epoch": 0.4004393747359527, + "grad_norm": 1.0433707237243652, + "learning_rate": 2.156941478598409e-05, + "loss": 0.19652175903320312, + "step": 2962 + }, + { + "epoch": 0.4005745669623997, + "grad_norm": 0.9381135106086731, + "learning_rate": 2.156326100234482e-05, + "loss": 0.1557760238647461, + "step": 2963 + }, + { + "epoch": 0.40070975918884666, + "grad_norm": 1.6645901203155518, + "learning_rate": 2.1557105852199612e-05, + "loss": 0.20635509490966797, + "step": 2964 + }, + { + "epoch": 0.40084495141529364, + "grad_norm": 1.121830701828003, + "learning_rate": 2.155094933683e-05, + "loss": 0.1860337257385254, + "step": 2965 + }, + { + "epoch": 0.4009801436417406, + "grad_norm": 0.8452627062797546, + "learning_rate": 2.1544791457517802e-05, + "loss": 0.16274452209472656, + "step": 2966 + }, + { + "epoch": 0.4011153358681876, + "grad_norm": 1.5445414781570435, + "learning_rate": 2.1538632215545126e-05, + "loss": 0.17372894287109375, + "step": 2967 + }, + { + "epoch": 0.4012505280946346, + "grad_norm": 0.8613083958625793, + "learning_rate": 2.153247161219435e-05, + "loss": 0.15696048736572266, + "step": 2968 + }, + { + "epoch": 0.40138572032108155, + "grad_norm": 0.9513514637947083, + "learning_rate": 2.1526309648748147e-05, + "loss": 0.18019723892211914, + "step": 2969 + }, + { + "epoch": 0.40152091254752853, + "grad_norm": 1.529106855392456, + "learning_rate": 2.1520146326489476e-05, + "loss": 0.1893787384033203, + "step": 2970 + }, + { + "epoch": 0.4016561047739755, + "grad_norm": 1.6951128244400024, + "learning_rate": 2.151398164670157e-05, + "loss": 0.19144916534423828, + "step": 2971 + }, + { + "epoch": 0.4017912970004225, + "grad_norm": 0.7875958681106567, + "learning_rate": 2.1507815610667948e-05, + "loss": 0.15167522430419922, + "step": 2972 + }, + { + "epoch": 0.40192648922686947, + "grad_norm": 1.6109178066253662, + "learning_rate": 2.1501648219672407e-05, + "loss": 0.1681361198425293, + "step": 2973 + }, + { + "epoch": 0.40206168145331644, + "grad_norm": 3.321265459060669, + "learning_rate": 2.149547947499904e-05, + "loss": 0.2428741455078125, + "step": 2974 + }, + { + "epoch": 0.4021968736797634, + "grad_norm": 1.5932071208953857, + "learning_rate": 2.1489309377932212e-05, + "loss": 0.1852436065673828, + "step": 2975 + }, + { + "epoch": 0.4023320659062104, + "grad_norm": 1.3729808330535889, + "learning_rate": 2.1483137929756562e-05, + "loss": 0.1788616180419922, + "step": 2976 + }, + { + "epoch": 0.4024672581326574, + "grad_norm": 1.4057748317718506, + "learning_rate": 2.147696513175702e-05, + "loss": 0.17625093460083008, + "step": 2977 + }, + { + "epoch": 0.40260245035910436, + "grad_norm": 1.1885308027267456, + "learning_rate": 2.1470790985218804e-05, + "loss": 0.17126131057739258, + "step": 2978 + }, + { + "epoch": 0.40273764258555134, + "grad_norm": 1.4859938621520996, + "learning_rate": 2.1464615491427393e-05, + "loss": 0.18955612182617188, + "step": 2979 + }, + { + "epoch": 0.4028728348119983, + "grad_norm": 1.176913857460022, + "learning_rate": 2.1458438651668567e-05, + "loss": 0.14587020874023438, + "step": 2980 + }, + { + "epoch": 0.4030080270384453, + "grad_norm": 1.2134369611740112, + "learning_rate": 2.1452260467228376e-05, + "loss": 0.1571359634399414, + "step": 2981 + }, + { + "epoch": 0.40314321926489227, + "grad_norm": 1.9966589212417603, + "learning_rate": 2.144608093939314e-05, + "loss": 0.2558879852294922, + "step": 2982 + }, + { + "epoch": 0.40327841149133925, + "grad_norm": 1.0463056564331055, + "learning_rate": 2.1439900069449483e-05, + "loss": 0.16789817810058594, + "step": 2983 + }, + { + "epoch": 0.40341360371778623, + "grad_norm": 3.088319778442383, + "learning_rate": 2.1433717858684286e-05, + "loss": 0.21585512161254883, + "step": 2984 + }, + { + "epoch": 0.4035487959442332, + "grad_norm": 1.1387271881103516, + "learning_rate": 2.1427534308384724e-05, + "loss": 0.17087364196777344, + "step": 2985 + }, + { + "epoch": 0.4036839881706802, + "grad_norm": 1.410264492034912, + "learning_rate": 2.1421349419838245e-05, + "loss": 0.2035980224609375, + "step": 2986 + }, + { + "epoch": 0.40381918039712716, + "grad_norm": 2.4529712200164795, + "learning_rate": 2.1415163194332574e-05, + "loss": 0.23231029510498047, + "step": 2987 + }, + { + "epoch": 0.40395437262357414, + "grad_norm": 2.6798970699310303, + "learning_rate": 2.1408975633155715e-05, + "loss": 0.1841106414794922, + "step": 2988 + }, + { + "epoch": 0.4040895648500211, + "grad_norm": 1.6458081007003784, + "learning_rate": 2.140278673759595e-05, + "loss": 0.16478300094604492, + "step": 2989 + }, + { + "epoch": 0.4042247570764681, + "grad_norm": 0.8627346158027649, + "learning_rate": 2.1396596508941847e-05, + "loss": 0.20490455627441406, + "step": 2990 + }, + { + "epoch": 0.4043599493029151, + "grad_norm": 1.2831915616989136, + "learning_rate": 2.1390404948482238e-05, + "loss": 0.1735677719116211, + "step": 2991 + }, + { + "epoch": 0.40449514152936206, + "grad_norm": 1.3130743503570557, + "learning_rate": 2.1384212057506243e-05, + "loss": 0.20348739624023438, + "step": 2992 + }, + { + "epoch": 0.40463033375580904, + "grad_norm": 0.8924576640129089, + "learning_rate": 2.137801783730325e-05, + "loss": 0.16890335083007812, + "step": 2993 + }, + { + "epoch": 0.404765525982256, + "grad_norm": 1.6816691160202026, + "learning_rate": 2.137182228916293e-05, + "loss": 0.16756820678710938, + "step": 2994 + }, + { + "epoch": 0.404900718208703, + "grad_norm": 0.7269234657287598, + "learning_rate": 2.136562541437523e-05, + "loss": 0.16210460662841797, + "step": 2995 + }, + { + "epoch": 0.40503591043514997, + "grad_norm": 2.38232421875, + "learning_rate": 2.135942721423038e-05, + "loss": 0.12434983253479004, + "step": 2996 + }, + { + "epoch": 0.40517110266159695, + "grad_norm": 0.9577071070671082, + "learning_rate": 2.1353227690018865e-05, + "loss": 0.14405155181884766, + "step": 2997 + }, + { + "epoch": 0.4053062948880439, + "grad_norm": 2.890902280807495, + "learning_rate": 2.1347026843031467e-05, + "loss": 0.23656272888183594, + "step": 2998 + }, + { + "epoch": 0.4054414871144909, + "grad_norm": 1.5602978467941284, + "learning_rate": 2.1340824674559238e-05, + "loss": 0.19223594665527344, + "step": 2999 + }, + { + "epoch": 0.4055766793409379, + "grad_norm": 2.897514581680298, + "learning_rate": 2.133462118589349e-05, + "loss": 0.19192218780517578, + "step": 3000 + }, + { + "epoch": 0.40571187156738486, + "grad_norm": 0.7892051935195923, + "learning_rate": 2.1328416378325837e-05, + "loss": 0.11046719551086426, + "step": 3001 + }, + { + "epoch": 0.40584706379383184, + "grad_norm": 0.5690903067588806, + "learning_rate": 2.1322210253148144e-05, + "loss": 0.13419723510742188, + "step": 3002 + }, + { + "epoch": 0.4059822560202788, + "grad_norm": 1.5261783599853516, + "learning_rate": 2.131600281165257e-05, + "loss": 0.15524673461914062, + "step": 3003 + }, + { + "epoch": 0.4061174482467258, + "grad_norm": 2.154695749282837, + "learning_rate": 2.130979405513152e-05, + "loss": 0.1865830421447754, + "step": 3004 + }, + { + "epoch": 0.4062526404731728, + "grad_norm": 4.571969032287598, + "learning_rate": 2.1303583984877697e-05, + "loss": 0.24930477142333984, + "step": 3005 + }, + { + "epoch": 0.40638783269961976, + "grad_norm": 1.1895395517349243, + "learning_rate": 2.1297372602184085e-05, + "loss": 0.14764881134033203, + "step": 3006 + }, + { + "epoch": 0.40652302492606673, + "grad_norm": 1.1962355375289917, + "learning_rate": 2.1291159908343907e-05, + "loss": 0.20009803771972656, + "step": 3007 + }, + { + "epoch": 0.4066582171525137, + "grad_norm": 1.4184571504592896, + "learning_rate": 2.1284945904650693e-05, + "loss": 0.23280048370361328, + "step": 3008 + }, + { + "epoch": 0.4067934093789607, + "grad_norm": 1.2918347120285034, + "learning_rate": 2.127873059239822e-05, + "loss": 0.20061492919921875, + "step": 3009 + }, + { + "epoch": 0.40692860160540767, + "grad_norm": 1.2394102811813354, + "learning_rate": 2.127251397288056e-05, + "loss": 0.16162443161010742, + "step": 3010 + }, + { + "epoch": 0.40706379383185465, + "grad_norm": 1.4771368503570557, + "learning_rate": 2.126629604739204e-05, + "loss": 0.19285202026367188, + "step": 3011 + }, + { + "epoch": 0.4071989860583016, + "grad_norm": 0.8390077948570251, + "learning_rate": 2.1260076817227268e-05, + "loss": 0.19829559326171875, + "step": 3012 + }, + { + "epoch": 0.4073341782847486, + "grad_norm": 0.9760518074035645, + "learning_rate": 2.1253856283681122e-05, + "loss": 0.176422119140625, + "step": 3013 + }, + { + "epoch": 0.4074693705111956, + "grad_norm": 0.837346613407135, + "learning_rate": 2.1247634448048743e-05, + "loss": 0.19060707092285156, + "step": 3014 + }, + { + "epoch": 0.40760456273764256, + "grad_norm": 1.304717779159546, + "learning_rate": 2.1241411311625562e-05, + "loss": 0.1276702880859375, + "step": 3015 + }, + { + "epoch": 0.40773975496408954, + "grad_norm": 0.9274672865867615, + "learning_rate": 2.1235186875707257e-05, + "loss": 0.1623210906982422, + "step": 3016 + }, + { + "epoch": 0.4078749471905365, + "grad_norm": 1.5505789518356323, + "learning_rate": 2.1228961141589797e-05, + "loss": 0.20755767822265625, + "step": 3017 + }, + { + "epoch": 0.4080101394169835, + "grad_norm": 0.6763966083526611, + "learning_rate": 2.122273411056941e-05, + "loss": 0.1399059295654297, + "step": 3018 + }, + { + "epoch": 0.4081453316434305, + "grad_norm": 2.3691086769104004, + "learning_rate": 2.1216505783942592e-05, + "loss": 0.1951141357421875, + "step": 3019 + }, + { + "epoch": 0.4082805238698775, + "grad_norm": 1.3408936262130737, + "learning_rate": 2.121027616300613e-05, + "loss": 0.2190837860107422, + "step": 3020 + }, + { + "epoch": 0.4084157160963245, + "grad_norm": 0.9116777777671814, + "learning_rate": 2.1204045249057043e-05, + "loss": 0.1967792510986328, + "step": 3021 + }, + { + "epoch": 0.40855090832277147, + "grad_norm": 2.6176626682281494, + "learning_rate": 2.119781304339266e-05, + "loss": 0.19192123413085938, + "step": 3022 + }, + { + "epoch": 0.40868610054921845, + "grad_norm": 1.1024119853973389, + "learning_rate": 2.1191579547310547e-05, + "loss": 0.17528629302978516, + "step": 3023 + }, + { + "epoch": 0.4088212927756654, + "grad_norm": 0.7836039662361145, + "learning_rate": 2.1185344762108556e-05, + "loss": 0.16887688636779785, + "step": 3024 + }, + { + "epoch": 0.4089564850021124, + "grad_norm": 1.98976731300354, + "learning_rate": 2.11791086890848e-05, + "loss": 0.1696305274963379, + "step": 3025 + }, + { + "epoch": 0.4090916772285594, + "grad_norm": 1.0534249544143677, + "learning_rate": 2.1172871329537662e-05, + "loss": 0.170501708984375, + "step": 3026 + }, + { + "epoch": 0.40922686945500636, + "grad_norm": 0.6537767648696899, + "learning_rate": 2.1166632684765794e-05, + "loss": 0.12562775611877441, + "step": 3027 + }, + { + "epoch": 0.40936206168145334, + "grad_norm": 3.7160472869873047, + "learning_rate": 2.1160392756068124e-05, + "loss": 0.24985885620117188, + "step": 3028 + }, + { + "epoch": 0.4094972539079003, + "grad_norm": 1.416527509689331, + "learning_rate": 2.1154151544743826e-05, + "loss": 0.19873285293579102, + "step": 3029 + }, + { + "epoch": 0.4096324461343473, + "grad_norm": 0.950940728187561, + "learning_rate": 2.114790905209236e-05, + "loss": 0.1541004180908203, + "step": 3030 + }, + { + "epoch": 0.4097676383607943, + "grad_norm": 1.276268720626831, + "learning_rate": 2.1141665279413444e-05, + "loss": 0.17284393310546875, + "step": 3031 + }, + { + "epoch": 0.40990283058724125, + "grad_norm": 0.7973108887672424, + "learning_rate": 2.1135420228007062e-05, + "loss": 0.1438283920288086, + "step": 3032 + }, + { + "epoch": 0.41003802281368823, + "grad_norm": 0.7642303705215454, + "learning_rate": 2.1129173899173474e-05, + "loss": 0.1288890838623047, + "step": 3033 + }, + { + "epoch": 0.4101732150401352, + "grad_norm": 1.076489806175232, + "learning_rate": 2.11229262942132e-05, + "loss": 0.21950244903564453, + "step": 3034 + }, + { + "epoch": 0.4103084072665822, + "grad_norm": 1.766666293144226, + "learning_rate": 2.1116677414427008e-05, + "loss": 0.2514934539794922, + "step": 3035 + }, + { + "epoch": 0.41044359949302917, + "grad_norm": 1.5698294639587402, + "learning_rate": 2.1110427261115972e-05, + "loss": 0.22774696350097656, + "step": 3036 + }, + { + "epoch": 0.41057879171947614, + "grad_norm": 1.140944242477417, + "learning_rate": 2.1104175835581386e-05, + "loss": 0.2371673583984375, + "step": 3037 + }, + { + "epoch": 0.4107139839459231, + "grad_norm": 3.5694587230682373, + "learning_rate": 2.1097923139124846e-05, + "loss": 0.2121124267578125, + "step": 3038 + }, + { + "epoch": 0.4108491761723701, + "grad_norm": 2.5289413928985596, + "learning_rate": 2.109166917304819e-05, + "loss": 0.2229328155517578, + "step": 3039 + }, + { + "epoch": 0.4109843683988171, + "grad_norm": 1.4896842241287231, + "learning_rate": 2.1085413938653532e-05, + "loss": 0.18973636627197266, + "step": 3040 + }, + { + "epoch": 0.41111956062526406, + "grad_norm": 1.3239771127700806, + "learning_rate": 2.107915743724323e-05, + "loss": 0.17284321784973145, + "step": 3041 + }, + { + "epoch": 0.41125475285171104, + "grad_norm": 0.9550789594650269, + "learning_rate": 2.1072899670119935e-05, + "loss": 0.18607521057128906, + "step": 3042 + }, + { + "epoch": 0.411389945078158, + "grad_norm": 1.1535742282867432, + "learning_rate": 2.1066640638586543e-05, + "loss": 0.22809982299804688, + "step": 3043 + }, + { + "epoch": 0.411525137304605, + "grad_norm": 0.6176862716674805, + "learning_rate": 2.1060380343946223e-05, + "loss": 0.11718082427978516, + "step": 3044 + }, + { + "epoch": 0.41166032953105197, + "grad_norm": 1.288192868232727, + "learning_rate": 2.10541187875024e-05, + "loss": 0.17744064331054688, + "step": 3045 + }, + { + "epoch": 0.41179552175749895, + "grad_norm": 1.1375256776809692, + "learning_rate": 2.1047855970558753e-05, + "loss": 0.11963081359863281, + "step": 3046 + }, + { + "epoch": 0.41193071398394593, + "grad_norm": 1.8950451612472534, + "learning_rate": 2.1041591894419244e-05, + "loss": 0.19255638122558594, + "step": 3047 + }, + { + "epoch": 0.4120659062103929, + "grad_norm": 1.9351564645767212, + "learning_rate": 2.1035326560388087e-05, + "loss": 0.24155616760253906, + "step": 3048 + }, + { + "epoch": 0.4122010984368399, + "grad_norm": 1.6658726930618286, + "learning_rate": 2.1029059969769756e-05, + "loss": 0.19255447387695312, + "step": 3049 + }, + { + "epoch": 0.41233629066328686, + "grad_norm": 2.594334363937378, + "learning_rate": 2.1022792123868986e-05, + "loss": 0.22394180297851562, + "step": 3050 + }, + { + "epoch": 0.41247148288973384, + "grad_norm": 1.028749942779541, + "learning_rate": 2.1016523023990783e-05, + "loss": 0.19150543212890625, + "step": 3051 + }, + { + "epoch": 0.4126066751161808, + "grad_norm": 0.5970864295959473, + "learning_rate": 2.1010252671440398e-05, + "loss": 0.11243534088134766, + "step": 3052 + }, + { + "epoch": 0.4127418673426278, + "grad_norm": 1.1464234590530396, + "learning_rate": 2.1003981067523358e-05, + "loss": 0.15420818328857422, + "step": 3053 + }, + { + "epoch": 0.4128770595690748, + "grad_norm": 1.7887133359909058, + "learning_rate": 2.099770821354544e-05, + "loss": 0.19118118286132812, + "step": 3054 + }, + { + "epoch": 0.41301225179552176, + "grad_norm": 1.2951503992080688, + "learning_rate": 2.0991434110812692e-05, + "loss": 0.14705228805541992, + "step": 3055 + }, + { + "epoch": 0.41314744402196873, + "grad_norm": 2.0299723148345947, + "learning_rate": 2.0985158760631415e-05, + "loss": 0.19005227088928223, + "step": 3056 + }, + { + "epoch": 0.4132826362484157, + "grad_norm": 1.116276741027832, + "learning_rate": 2.0978882164308157e-05, + "loss": 0.16593730449676514, + "step": 3057 + }, + { + "epoch": 0.4134178284748627, + "grad_norm": 1.097907543182373, + "learning_rate": 2.0972604323149755e-05, + "loss": 0.16720247268676758, + "step": 3058 + }, + { + "epoch": 0.41355302070130967, + "grad_norm": 0.9085415601730347, + "learning_rate": 2.0966325238463283e-05, + "loss": 0.23619651794433594, + "step": 3059 + }, + { + "epoch": 0.41368821292775665, + "grad_norm": 0.7797397971153259, + "learning_rate": 2.096004491155608e-05, + "loss": 0.1812114715576172, + "step": 3060 + }, + { + "epoch": 0.4138234051542036, + "grad_norm": 1.6356220245361328, + "learning_rate": 2.0953763343735746e-05, + "loss": 0.2852611541748047, + "step": 3061 + }, + { + "epoch": 0.4139585973806506, + "grad_norm": 0.9383690357208252, + "learning_rate": 2.0947480536310133e-05, + "loss": 0.16326236724853516, + "step": 3062 + }, + { + "epoch": 0.4140937896070976, + "grad_norm": 2.589324474334717, + "learning_rate": 2.0941196490587352e-05, + "loss": 0.2070094347000122, + "step": 3063 + }, + { + "epoch": 0.41422898183354456, + "grad_norm": 0.9352585673332214, + "learning_rate": 2.0934911207875782e-05, + "loss": 0.19829416275024414, + "step": 3064 + }, + { + "epoch": 0.41436417405999154, + "grad_norm": 1.3081955909729004, + "learning_rate": 2.092862468948405e-05, + "loss": 0.24007797241210938, + "step": 3065 + }, + { + "epoch": 0.4144993662864385, + "grad_norm": 1.246611475944519, + "learning_rate": 2.0922336936721044e-05, + "loss": 0.2225322723388672, + "step": 3066 + }, + { + "epoch": 0.4146345585128855, + "grad_norm": 0.7423734068870544, + "learning_rate": 2.0916047950895907e-05, + "loss": 0.1749706268310547, + "step": 3067 + }, + { + "epoch": 0.4147697507393325, + "grad_norm": 3.3691582679748535, + "learning_rate": 2.0909757733318035e-05, + "loss": 0.19530868530273438, + "step": 3068 + }, + { + "epoch": 0.41490494296577946, + "grad_norm": 2.329500675201416, + "learning_rate": 2.090346628529709e-05, + "loss": 0.21277809143066406, + "step": 3069 + }, + { + "epoch": 0.41504013519222643, + "grad_norm": 0.8931111097335815, + "learning_rate": 2.089717360814298e-05, + "loss": 0.2049236297607422, + "step": 3070 + }, + { + "epoch": 0.4151753274186734, + "grad_norm": 0.7539544701576233, + "learning_rate": 2.0890879703165885e-05, + "loss": 0.16530132293701172, + "step": 3071 + }, + { + "epoch": 0.4153105196451204, + "grad_norm": 1.1251816749572754, + "learning_rate": 2.0884584571676217e-05, + "loss": 0.14655494689941406, + "step": 3072 + }, + { + "epoch": 0.41544571187156737, + "grad_norm": 2.418135404586792, + "learning_rate": 2.0878288214984657e-05, + "loss": 0.22464561462402344, + "step": 3073 + }, + { + "epoch": 0.41558090409801435, + "grad_norm": 1.0123423337936401, + "learning_rate": 2.0871990634402147e-05, + "loss": 0.19751358032226562, + "step": 3074 + }, + { + "epoch": 0.4157160963244613, + "grad_norm": 1.1898612976074219, + "learning_rate": 2.0865691831239877e-05, + "loss": 0.18123340606689453, + "step": 3075 + }, + { + "epoch": 0.4158512885509083, + "grad_norm": 1.1554439067840576, + "learning_rate": 2.0859391806809285e-05, + "loss": 0.21947240829467773, + "step": 3076 + }, + { + "epoch": 0.4159864807773553, + "grad_norm": 2.1257498264312744, + "learning_rate": 2.0853090562422072e-05, + "loss": 0.16626596450805664, + "step": 3077 + }, + { + "epoch": 0.41612167300380226, + "grad_norm": 1.8598438501358032, + "learning_rate": 2.084678809939019e-05, + "loss": 0.14995288848876953, + "step": 3078 + }, + { + "epoch": 0.41625686523024924, + "grad_norm": 2.615832805633545, + "learning_rate": 2.084048441902585e-05, + "loss": 0.22571182250976562, + "step": 3079 + }, + { + "epoch": 0.4163920574566962, + "grad_norm": 0.9203280806541443, + "learning_rate": 2.0834179522641508e-05, + "loss": 0.15561556816101074, + "step": 3080 + }, + { + "epoch": 0.4165272496831432, + "grad_norm": 1.5997341871261597, + "learning_rate": 2.0827873411549877e-05, + "loss": 0.21852684020996094, + "step": 3081 + }, + { + "epoch": 0.4166624419095902, + "grad_norm": 1.297373652458191, + "learning_rate": 2.0821566087063926e-05, + "loss": 0.20093536376953125, + "step": 3082 + }, + { + "epoch": 0.41679763413603715, + "grad_norm": 1.8051400184631348, + "learning_rate": 2.081525755049687e-05, + "loss": 0.21544265747070312, + "step": 3083 + }, + { + "epoch": 0.41693282636248413, + "grad_norm": 1.1666992902755737, + "learning_rate": 2.0808947803162182e-05, + "loss": 0.15682530403137207, + "step": 3084 + }, + { + "epoch": 0.4170680185889311, + "grad_norm": 0.9536730051040649, + "learning_rate": 2.0802636846373578e-05, + "loss": 0.13433504104614258, + "step": 3085 + }, + { + "epoch": 0.4172032108153781, + "grad_norm": 1.1640393733978271, + "learning_rate": 2.0796324681445045e-05, + "loss": 0.2030181884765625, + "step": 3086 + }, + { + "epoch": 0.41733840304182507, + "grad_norm": 0.8382679224014282, + "learning_rate": 2.0790011309690806e-05, + "loss": 0.1530303955078125, + "step": 3087 + }, + { + "epoch": 0.4174735952682721, + "grad_norm": 1.199198603630066, + "learning_rate": 2.0783696732425332e-05, + "loss": 0.2301959991455078, + "step": 3088 + }, + { + "epoch": 0.4176087874947191, + "grad_norm": 1.8117302656173706, + "learning_rate": 2.0777380950963355e-05, + "loss": 0.16292476654052734, + "step": 3089 + }, + { + "epoch": 0.41774397972116606, + "grad_norm": 1.130401849746704, + "learning_rate": 2.0771063966619854e-05, + "loss": 0.19435930252075195, + "step": 3090 + }, + { + "epoch": 0.41787917194761304, + "grad_norm": 2.5911643505096436, + "learning_rate": 2.0764745780710065e-05, + "loss": 0.23030471801757812, + "step": 3091 + }, + { + "epoch": 0.41801436417406, + "grad_norm": 0.7333688139915466, + "learning_rate": 2.075842639454946e-05, + "loss": 0.1255178451538086, + "step": 3092 + }, + { + "epoch": 0.418149556400507, + "grad_norm": 1.3307783603668213, + "learning_rate": 2.075210580945378e-05, + "loss": 0.18999814987182617, + "step": 3093 + }, + { + "epoch": 0.418284748626954, + "grad_norm": 0.862221896648407, + "learning_rate": 2.0745784026738984e-05, + "loss": 0.17731380462646484, + "step": 3094 + }, + { + "epoch": 0.41841994085340095, + "grad_norm": 1.3271745443344116, + "learning_rate": 2.073946104772132e-05, + "loss": 0.21765542030334473, + "step": 3095 + }, + { + "epoch": 0.41855513307984793, + "grad_norm": 1.9001739025115967, + "learning_rate": 2.0733136873717258e-05, + "loss": 0.14759445190429688, + "step": 3096 + }, + { + "epoch": 0.4186903253062949, + "grad_norm": 1.3153339624404907, + "learning_rate": 2.0726811506043527e-05, + "loss": 0.18173694610595703, + "step": 3097 + }, + { + "epoch": 0.4188255175327419, + "grad_norm": 1.5945225954055786, + "learning_rate": 2.0720484946017104e-05, + "loss": 0.16118812561035156, + "step": 3098 + }, + { + "epoch": 0.41896070975918887, + "grad_norm": 1.2689669132232666, + "learning_rate": 2.0714157194955202e-05, + "loss": 0.20510482788085938, + "step": 3099 + }, + { + "epoch": 0.41909590198563584, + "grad_norm": 0.6630727648735046, + "learning_rate": 2.070782825417531e-05, + "loss": 0.14899134635925293, + "step": 3100 + }, + { + "epoch": 0.4192310942120828, + "grad_norm": 1.7634410858154297, + "learning_rate": 2.0701498124995127e-05, + "loss": 0.2501850128173828, + "step": 3101 + }, + { + "epoch": 0.4193662864385298, + "grad_norm": 1.1146520376205444, + "learning_rate": 2.069516680873264e-05, + "loss": 0.20702743530273438, + "step": 3102 + }, + { + "epoch": 0.4195014786649768, + "grad_norm": 1.5814372301101685, + "learning_rate": 2.0688834306706047e-05, + "loss": 0.18718814849853516, + "step": 3103 + }, + { + "epoch": 0.41963667089142376, + "grad_norm": 2.1391141414642334, + "learning_rate": 2.0682500620233815e-05, + "loss": 0.21801185607910156, + "step": 3104 + }, + { + "epoch": 0.41977186311787074, + "grad_norm": 2.015735149383545, + "learning_rate": 2.0676165750634656e-05, + "loss": 0.14991378784179688, + "step": 3105 + }, + { + "epoch": 0.4199070553443177, + "grad_norm": 1.3970814943313599, + "learning_rate": 2.0669829699227513e-05, + "loss": 0.1437692642211914, + "step": 3106 + }, + { + "epoch": 0.4200422475707647, + "grad_norm": 1.0428743362426758, + "learning_rate": 2.06634924673316e-05, + "loss": 0.1648116111755371, + "step": 3107 + }, + { + "epoch": 0.42017743979721167, + "grad_norm": 3.6171913146972656, + "learning_rate": 2.0657154056266346e-05, + "loss": 0.31303977966308594, + "step": 3108 + }, + { + "epoch": 0.42031263202365865, + "grad_norm": 1.9231373071670532, + "learning_rate": 2.0650814467351452e-05, + "loss": 0.18438935279846191, + "step": 3109 + }, + { + "epoch": 0.42044782425010563, + "grad_norm": 0.8312044143676758, + "learning_rate": 2.064447370190685e-05, + "loss": 0.1562342643737793, + "step": 3110 + }, + { + "epoch": 0.4205830164765526, + "grad_norm": 1.5872935056686401, + "learning_rate": 2.0638131761252724e-05, + "loss": 0.20205354690551758, + "step": 3111 + }, + { + "epoch": 0.4207182087029996, + "grad_norm": 2.4010915756225586, + "learning_rate": 2.06317886467095e-05, + "loss": 0.24871063232421875, + "step": 3112 + }, + { + "epoch": 0.42085340092944656, + "grad_norm": 1.1522927284240723, + "learning_rate": 2.0625444359597847e-05, + "loss": 0.16045093536376953, + "step": 3113 + }, + { + "epoch": 0.42098859315589354, + "grad_norm": 1.4989211559295654, + "learning_rate": 2.0619098901238684e-05, + "loss": 0.18958330154418945, + "step": 3114 + }, + { + "epoch": 0.4211237853823405, + "grad_norm": 2.8187615871429443, + "learning_rate": 2.0612752272953158e-05, + "loss": 0.1924142837524414, + "step": 3115 + }, + { + "epoch": 0.4212589776087875, + "grad_norm": 1.6889748573303223, + "learning_rate": 2.060640447606268e-05, + "loss": 0.14674735069274902, + "step": 3116 + }, + { + "epoch": 0.4213941698352345, + "grad_norm": 1.0009809732437134, + "learning_rate": 2.0600055511888895e-05, + "loss": 0.16613101959228516, + "step": 3117 + }, + { + "epoch": 0.42152936206168146, + "grad_norm": 0.9665440320968628, + "learning_rate": 2.059370538175369e-05, + "loss": 0.1789846420288086, + "step": 3118 + }, + { + "epoch": 0.42166455428812843, + "grad_norm": 1.3382807970046997, + "learning_rate": 2.0587354086979194e-05, + "loss": 0.13223552703857422, + "step": 3119 + }, + { + "epoch": 0.4217997465145754, + "grad_norm": 2.7905945777893066, + "learning_rate": 2.0581001628887785e-05, + "loss": 0.21348845958709717, + "step": 3120 + }, + { + "epoch": 0.4219349387410224, + "grad_norm": 1.914476990699768, + "learning_rate": 2.057464800880207e-05, + "loss": 0.1931157112121582, + "step": 3121 + }, + { + "epoch": 0.42207013096746937, + "grad_norm": 1.1248724460601807, + "learning_rate": 2.0568293228044914e-05, + "loss": 0.19129371643066406, + "step": 3122 + }, + { + "epoch": 0.42220532319391635, + "grad_norm": 0.9605360627174377, + "learning_rate": 2.0561937287939413e-05, + "loss": 0.13921260833740234, + "step": 3123 + }, + { + "epoch": 0.4223405154203633, + "grad_norm": 1.276961088180542, + "learning_rate": 2.055558018980891e-05, + "loss": 0.1785411834716797, + "step": 3124 + }, + { + "epoch": 0.4224757076468103, + "grad_norm": 1.0074282884597778, + "learning_rate": 2.0549221934976987e-05, + "loss": 0.1901702880859375, + "step": 3125 + }, + { + "epoch": 0.4226108998732573, + "grad_norm": 0.9029681086540222, + "learning_rate": 2.054286252476746e-05, + "loss": 0.1786508560180664, + "step": 3126 + }, + { + "epoch": 0.42274609209970426, + "grad_norm": 1.8974783420562744, + "learning_rate": 2.05365019605044e-05, + "loss": 0.16581153869628906, + "step": 3127 + }, + { + "epoch": 0.42288128432615124, + "grad_norm": 0.8541675806045532, + "learning_rate": 2.053014024351211e-05, + "loss": 0.1798553466796875, + "step": 3128 + }, + { + "epoch": 0.4230164765525982, + "grad_norm": 1.5723378658294678, + "learning_rate": 2.0523777375115133e-05, + "loss": 0.17410755157470703, + "step": 3129 + }, + { + "epoch": 0.4231516687790452, + "grad_norm": 0.8442850112915039, + "learning_rate": 2.0517413356638245e-05, + "loss": 0.1694507598876953, + "step": 3130 + }, + { + "epoch": 0.4232868610054922, + "grad_norm": 1.1846411228179932, + "learning_rate": 2.0511048189406472e-05, + "loss": 0.1890854835510254, + "step": 3131 + }, + { + "epoch": 0.42342205323193916, + "grad_norm": 1.237890601158142, + "learning_rate": 2.0504681874745082e-05, + "loss": 0.17635726928710938, + "step": 3132 + }, + { + "epoch": 0.42355724545838613, + "grad_norm": 1.8898024559020996, + "learning_rate": 2.049831441397957e-05, + "loss": 0.1778106689453125, + "step": 3133 + }, + { + "epoch": 0.4236924376848331, + "grad_norm": 0.9419623613357544, + "learning_rate": 2.0491945808435674e-05, + "loss": 0.21422576904296875, + "step": 3134 + }, + { + "epoch": 0.4238276299112801, + "grad_norm": 1.1858009099960327, + "learning_rate": 2.048557605943938e-05, + "loss": 0.10719728469848633, + "step": 3135 + }, + { + "epoch": 0.42396282213772707, + "grad_norm": 0.794576108455658, + "learning_rate": 2.047920516831689e-05, + "loss": 0.1824202537536621, + "step": 3136 + }, + { + "epoch": 0.42409801436417405, + "grad_norm": 0.9986293315887451, + "learning_rate": 2.047283313639467e-05, + "loss": 0.22907638549804688, + "step": 3137 + }, + { + "epoch": 0.424233206590621, + "grad_norm": 1.4526969194412231, + "learning_rate": 2.0466459964999408e-05, + "loss": 0.16235113143920898, + "step": 3138 + }, + { + "epoch": 0.424368398817068, + "grad_norm": 0.857375979423523, + "learning_rate": 2.0460085655458025e-05, + "loss": 0.15218162536621094, + "step": 3139 + }, + { + "epoch": 0.424503591043515, + "grad_norm": 0.9026086330413818, + "learning_rate": 2.0453710209097697e-05, + "loss": 0.20074081420898438, + "step": 3140 + }, + { + "epoch": 0.42463878326996196, + "grad_norm": 1.767188310623169, + "learning_rate": 2.044733362724582e-05, + "loss": 0.19459891319274902, + "step": 3141 + }, + { + "epoch": 0.42477397549640894, + "grad_norm": 1.2165381908416748, + "learning_rate": 2.0440955911230028e-05, + "loss": 0.19650840759277344, + "step": 3142 + }, + { + "epoch": 0.4249091677228559, + "grad_norm": 1.5772616863250732, + "learning_rate": 2.0434577062378203e-05, + "loss": 0.21356773376464844, + "step": 3143 + }, + { + "epoch": 0.4250443599493029, + "grad_norm": 1.4389405250549316, + "learning_rate": 2.0428197082018458e-05, + "loss": 0.2306346893310547, + "step": 3144 + }, + { + "epoch": 0.4251795521757499, + "grad_norm": 1.2169252634048462, + "learning_rate": 2.042181597147913e-05, + "loss": 0.21268272399902344, + "step": 3145 + }, + { + "epoch": 0.42531474440219685, + "grad_norm": 1.5070998668670654, + "learning_rate": 2.0415433732088806e-05, + "loss": 0.18635624647140503, + "step": 3146 + }, + { + "epoch": 0.42544993662864383, + "grad_norm": 1.115201711654663, + "learning_rate": 2.0409050365176294e-05, + "loss": 0.20785045623779297, + "step": 3147 + }, + { + "epoch": 0.4255851288550908, + "grad_norm": 1.2995914220809937, + "learning_rate": 2.0402665872070656e-05, + "loss": 0.22623825073242188, + "step": 3148 + }, + { + "epoch": 0.4257203210815378, + "grad_norm": 0.7083000540733337, + "learning_rate": 2.0396280254101172e-05, + "loss": 0.11201667785644531, + "step": 3149 + }, + { + "epoch": 0.42585551330798477, + "grad_norm": 2.1817712783813477, + "learning_rate": 2.0389893512597364e-05, + "loss": 0.17748546600341797, + "step": 3150 + }, + { + "epoch": 0.42599070553443175, + "grad_norm": 0.8166563510894775, + "learning_rate": 2.0383505648888986e-05, + "loss": 0.1644129753112793, + "step": 3151 + }, + { + "epoch": 0.4261258977608787, + "grad_norm": 0.7137550115585327, + "learning_rate": 2.037711666430602e-05, + "loss": 0.16689682006835938, + "step": 3152 + }, + { + "epoch": 0.4262610899873257, + "grad_norm": 0.8674426674842834, + "learning_rate": 2.0370726560178693e-05, + "loss": 0.20224666595458984, + "step": 3153 + }, + { + "epoch": 0.4263962822137727, + "grad_norm": 1.0230633020401, + "learning_rate": 2.036433533783745e-05, + "loss": 0.15178608894348145, + "step": 3154 + }, + { + "epoch": 0.42653147444021966, + "grad_norm": 0.762922465801239, + "learning_rate": 2.0357942998612988e-05, + "loss": 0.1956329345703125, + "step": 3155 + }, + { + "epoch": 0.4266666666666667, + "grad_norm": 1.6153441667556763, + "learning_rate": 2.0351549543836224e-05, + "loss": 0.1943340301513672, + "step": 3156 + }, + { + "epoch": 0.4268018588931137, + "grad_norm": 1.0588141679763794, + "learning_rate": 2.0345154974838307e-05, + "loss": 0.13901329040527344, + "step": 3157 + }, + { + "epoch": 0.42693705111956065, + "grad_norm": 1.4903565645217896, + "learning_rate": 2.0338759292950618e-05, + "loss": 0.20383358001708984, + "step": 3158 + }, + { + "epoch": 0.42707224334600763, + "grad_norm": 1.0255467891693115, + "learning_rate": 2.033236249950477e-05, + "loss": 0.14542841911315918, + "step": 3159 + }, + { + "epoch": 0.4272074355724546, + "grad_norm": 0.7896550297737122, + "learning_rate": 2.0325964595832618e-05, + "loss": 0.15297317504882812, + "step": 3160 + }, + { + "epoch": 0.4273426277989016, + "grad_norm": 1.841535210609436, + "learning_rate": 2.031956558326624e-05, + "loss": 0.2152252197265625, + "step": 3161 + }, + { + "epoch": 0.42747782002534856, + "grad_norm": 0.9769876599311829, + "learning_rate": 2.0313165463137935e-05, + "loss": 0.16324138641357422, + "step": 3162 + }, + { + "epoch": 0.42761301225179554, + "grad_norm": 1.0629949569702148, + "learning_rate": 2.030676423678025e-05, + "loss": 0.1883869171142578, + "step": 3163 + }, + { + "epoch": 0.4277482044782425, + "grad_norm": 1.3850748538970947, + "learning_rate": 2.030036190552595e-05, + "loss": 0.2057476043701172, + "step": 3164 + }, + { + "epoch": 0.4278833967046895, + "grad_norm": 1.2737865447998047, + "learning_rate": 2.029395847070803e-05, + "loss": 0.21024131774902344, + "step": 3165 + }, + { + "epoch": 0.4280185889311365, + "grad_norm": 1.1816891431808472, + "learning_rate": 2.0287553933659735e-05, + "loss": 0.18680477142333984, + "step": 3166 + }, + { + "epoch": 0.42815378115758346, + "grad_norm": 1.6376953125, + "learning_rate": 2.0281148295714512e-05, + "loss": 0.21474647521972656, + "step": 3167 + }, + { + "epoch": 0.42828897338403044, + "grad_norm": 1.059326171875, + "learning_rate": 2.027474155820605e-05, + "loss": 0.18851947784423828, + "step": 3168 + }, + { + "epoch": 0.4284241656104774, + "grad_norm": 3.019373893737793, + "learning_rate": 2.026833372246827e-05, + "loss": 0.2694053649902344, + "step": 3169 + }, + { + "epoch": 0.4285593578369244, + "grad_norm": 1.0689465999603271, + "learning_rate": 2.026192478983531e-05, + "loss": 0.19356346130371094, + "step": 3170 + }, + { + "epoch": 0.42869455006337137, + "grad_norm": 2.4855387210845947, + "learning_rate": 2.0255514761641555e-05, + "loss": 0.21857261657714844, + "step": 3171 + }, + { + "epoch": 0.42882974228981835, + "grad_norm": 2.4556994438171387, + "learning_rate": 2.0249103639221597e-05, + "loss": 0.15001726150512695, + "step": 3172 + }, + { + "epoch": 0.42896493451626533, + "grad_norm": 2.4733104705810547, + "learning_rate": 2.024269142391027e-05, + "loss": 0.21355438232421875, + "step": 3173 + }, + { + "epoch": 0.4291001267427123, + "grad_norm": 1.1895849704742432, + "learning_rate": 2.023627811704263e-05, + "loss": 0.1783924102783203, + "step": 3174 + }, + { + "epoch": 0.4292353189691593, + "grad_norm": 1.321651816368103, + "learning_rate": 2.0229863719953963e-05, + "loss": 0.2277669906616211, + "step": 3175 + }, + { + "epoch": 0.42937051119560626, + "grad_norm": 1.9732818603515625, + "learning_rate": 2.0223448233979785e-05, + "loss": 0.18292999267578125, + "step": 3176 + }, + { + "epoch": 0.42950570342205324, + "grad_norm": 1.8285123109817505, + "learning_rate": 2.0217031660455825e-05, + "loss": 0.16721820831298828, + "step": 3177 + }, + { + "epoch": 0.4296408956485002, + "grad_norm": 1.2731223106384277, + "learning_rate": 2.0210614000718054e-05, + "loss": 0.1712942123413086, + "step": 3178 + }, + { + "epoch": 0.4297760878749472, + "grad_norm": 2.2243106365203857, + "learning_rate": 2.020419525610266e-05, + "loss": 0.13637590408325195, + "step": 3179 + }, + { + "epoch": 0.4299112801013942, + "grad_norm": 2.3708536624908447, + "learning_rate": 2.0197775427946066e-05, + "loss": 0.19263267517089844, + "step": 3180 + }, + { + "epoch": 0.43004647232784116, + "grad_norm": 0.9503238797187805, + "learning_rate": 2.0191354517584902e-05, + "loss": 0.22425031661987305, + "step": 3181 + }, + { + "epoch": 0.43018166455428813, + "grad_norm": 0.7855859398841858, + "learning_rate": 2.018493252635605e-05, + "loss": 0.14455795288085938, + "step": 3182 + }, + { + "epoch": 0.4303168567807351, + "grad_norm": 1.6721371412277222, + "learning_rate": 2.0178509455596598e-05, + "loss": 0.15391921997070312, + "step": 3183 + }, + { + "epoch": 0.4304520490071821, + "grad_norm": 1.3813170194625854, + "learning_rate": 2.017208530664386e-05, + "loss": 0.14330005645751953, + "step": 3184 + }, + { + "epoch": 0.43058724123362907, + "grad_norm": 0.6997906565666199, + "learning_rate": 2.016566008083538e-05, + "loss": 0.1602010726928711, + "step": 3185 + }, + { + "epoch": 0.43072243346007605, + "grad_norm": 1.4364275932312012, + "learning_rate": 2.0159233779508923e-05, + "loss": 0.11324286460876465, + "step": 3186 + }, + { + "epoch": 0.430857625686523, + "grad_norm": 1.631349802017212, + "learning_rate": 2.0152806404002482e-05, + "loss": 0.1696319580078125, + "step": 3187 + }, + { + "epoch": 0.43099281791297, + "grad_norm": 1.1576733589172363, + "learning_rate": 2.014637795565427e-05, + "loss": 0.17049694061279297, + "step": 3188 + }, + { + "epoch": 0.431128010139417, + "grad_norm": 1.0393377542495728, + "learning_rate": 2.0139948435802722e-05, + "loss": 0.16859054565429688, + "step": 3189 + }, + { + "epoch": 0.43126320236586396, + "grad_norm": 0.9363009333610535, + "learning_rate": 2.0133517845786504e-05, + "loss": 0.17458343505859375, + "step": 3190 + }, + { + "epoch": 0.43139839459231094, + "grad_norm": 1.162930965423584, + "learning_rate": 2.012708618694449e-05, + "loss": 0.22877025604248047, + "step": 3191 + }, + { + "epoch": 0.4315335868187579, + "grad_norm": 1.1705584526062012, + "learning_rate": 2.0120653460615795e-05, + "loss": 0.16747665405273438, + "step": 3192 + }, + { + "epoch": 0.4316687790452049, + "grad_norm": 2.058824300765991, + "learning_rate": 2.011421966813974e-05, + "loss": 0.17766547203063965, + "step": 3193 + }, + { + "epoch": 0.4318039712716519, + "grad_norm": 1.0490683317184448, + "learning_rate": 2.0107784810855882e-05, + "loss": 0.1860666275024414, + "step": 3194 + }, + { + "epoch": 0.43193916349809885, + "grad_norm": 1.3689110279083252, + "learning_rate": 2.0101348890103985e-05, + "loss": 0.2069377899169922, + "step": 3195 + }, + { + "epoch": 0.43207435572454583, + "grad_norm": 1.6334882974624634, + "learning_rate": 2.0094911907224043e-05, + "loss": 0.16409778594970703, + "step": 3196 + }, + { + "epoch": 0.4322095479509928, + "grad_norm": 1.1300592422485352, + "learning_rate": 2.008847386355628e-05, + "loss": 0.19797420501708984, + "step": 3197 + }, + { + "epoch": 0.4323447401774398, + "grad_norm": 1.1207606792449951, + "learning_rate": 2.008203476044112e-05, + "loss": 0.2129497528076172, + "step": 3198 + }, + { + "epoch": 0.43247993240388677, + "grad_norm": 1.706355333328247, + "learning_rate": 2.007559459921922e-05, + "loss": 0.26381683349609375, + "step": 3199 + }, + { + "epoch": 0.43261512463033375, + "grad_norm": 0.8641300201416016, + "learning_rate": 2.0069153381231456e-05, + "loss": 0.17691802978515625, + "step": 3200 + }, + { + "epoch": 0.4327503168567807, + "grad_norm": 2.3879542350769043, + "learning_rate": 2.0062711107818933e-05, + "loss": 0.19820594787597656, + "step": 3201 + }, + { + "epoch": 0.4328855090832277, + "grad_norm": 1.6514135599136353, + "learning_rate": 2.0056267780322953e-05, + "loss": 0.1948375701904297, + "step": 3202 + }, + { + "epoch": 0.4330207013096747, + "grad_norm": 0.7712211608886719, + "learning_rate": 2.004982340008506e-05, + "loss": 0.18509674072265625, + "step": 3203 + }, + { + "epoch": 0.43315589353612166, + "grad_norm": 0.714439332485199, + "learning_rate": 2.004337796844701e-05, + "loss": 0.1577749252319336, + "step": 3204 + }, + { + "epoch": 0.43329108576256864, + "grad_norm": 1.6942423582077026, + "learning_rate": 2.003693148675077e-05, + "loss": 0.1829838752746582, + "step": 3205 + }, + { + "epoch": 0.4334262779890156, + "grad_norm": 1.2012410163879395, + "learning_rate": 2.003048395633853e-05, + "loss": 0.20160293579101562, + "step": 3206 + }, + { + "epoch": 0.4335614702154626, + "grad_norm": 2.221736431121826, + "learning_rate": 2.0024035378552708e-05, + "loss": 0.17109394073486328, + "step": 3207 + }, + { + "epoch": 0.4336966624419096, + "grad_norm": 0.9062662720680237, + "learning_rate": 2.001758575473593e-05, + "loss": 0.13499832153320312, + "step": 3208 + }, + { + "epoch": 0.43383185466835655, + "grad_norm": 1.8224637508392334, + "learning_rate": 2.0011135086231042e-05, + "loss": 0.1153252124786377, + "step": 3209 + }, + { + "epoch": 0.43396704689480353, + "grad_norm": 1.7240568399429321, + "learning_rate": 2.0004683374381104e-05, + "loss": 0.1922140121459961, + "step": 3210 + }, + { + "epoch": 0.4341022391212505, + "grad_norm": 1.0431997776031494, + "learning_rate": 1.9998230620529395e-05, + "loss": 0.12793731689453125, + "step": 3211 + }, + { + "epoch": 0.4342374313476975, + "grad_norm": 1.7353354692459106, + "learning_rate": 1.999177682601942e-05, + "loss": 0.20990562438964844, + "step": 3212 + }, + { + "epoch": 0.43437262357414447, + "grad_norm": 1.041535496711731, + "learning_rate": 1.9985321992194896e-05, + "loss": 0.1896677017211914, + "step": 3213 + }, + { + "epoch": 0.43450781580059145, + "grad_norm": 0.7205056548118591, + "learning_rate": 1.9978866120399746e-05, + "loss": 0.12275075912475586, + "step": 3214 + }, + { + "epoch": 0.4346430080270384, + "grad_norm": 1.9319335222244263, + "learning_rate": 1.9972409211978116e-05, + "loss": 0.20810317993164062, + "step": 3215 + }, + { + "epoch": 0.4347782002534854, + "grad_norm": 0.9305655360221863, + "learning_rate": 1.9965951268274373e-05, + "loss": 0.16515445709228516, + "step": 3216 + }, + { + "epoch": 0.4349133924799324, + "grad_norm": 1.829504132270813, + "learning_rate": 1.9959492290633093e-05, + "loss": 0.20354700088500977, + "step": 3217 + }, + { + "epoch": 0.43504858470637936, + "grad_norm": 1.781173586845398, + "learning_rate": 1.995303228039907e-05, + "loss": 0.24132061004638672, + "step": 3218 + }, + { + "epoch": 0.43518377693282634, + "grad_norm": 1.7699229717254639, + "learning_rate": 1.994657123891732e-05, + "loss": 0.1830425262451172, + "step": 3219 + }, + { + "epoch": 0.4353189691592733, + "grad_norm": 1.302095651626587, + "learning_rate": 1.9940109167533055e-05, + "loss": 0.2108612060546875, + "step": 3220 + }, + { + "epoch": 0.4354541613857203, + "grad_norm": 1.7591477632522583, + "learning_rate": 1.9933646067591716e-05, + "loss": 0.12987709045410156, + "step": 3221 + }, + { + "epoch": 0.4355893536121673, + "grad_norm": 2.4805960655212402, + "learning_rate": 1.992718194043896e-05, + "loss": 0.19744253158569336, + "step": 3222 + }, + { + "epoch": 0.43572454583861425, + "grad_norm": 2.7962231636047363, + "learning_rate": 1.9920716787420643e-05, + "loss": 0.2092113494873047, + "step": 3223 + }, + { + "epoch": 0.4358597380650613, + "grad_norm": 1.1938811540603638, + "learning_rate": 1.9914250609882858e-05, + "loss": 0.20505475997924805, + "step": 3224 + }, + { + "epoch": 0.43599493029150826, + "grad_norm": 1.1170421838760376, + "learning_rate": 1.9907783409171885e-05, + "loss": 0.2086191177368164, + "step": 3225 + }, + { + "epoch": 0.43613012251795524, + "grad_norm": 2.823751449584961, + "learning_rate": 1.990131518663424e-05, + "loss": 0.2563009262084961, + "step": 3226 + }, + { + "epoch": 0.4362653147444022, + "grad_norm": 0.9505105018615723, + "learning_rate": 1.9894845943616632e-05, + "loss": 0.1636509895324707, + "step": 3227 + }, + { + "epoch": 0.4364005069708492, + "grad_norm": 1.3886934518814087, + "learning_rate": 1.988837568146599e-05, + "loss": 0.2254321575164795, + "step": 3228 + }, + { + "epoch": 0.4365356991972962, + "grad_norm": 0.9767918586730957, + "learning_rate": 1.988190440152947e-05, + "loss": 0.1766366958618164, + "step": 3229 + }, + { + "epoch": 0.43667089142374316, + "grad_norm": 2.4880735874176025, + "learning_rate": 1.9875432105154424e-05, + "loss": 0.26550865173339844, + "step": 3230 + }, + { + "epoch": 0.43680608365019014, + "grad_norm": 0.9030943512916565, + "learning_rate": 1.9868958793688412e-05, + "loss": 0.2024822235107422, + "step": 3231 + }, + { + "epoch": 0.4369412758766371, + "grad_norm": 1.3507105112075806, + "learning_rate": 1.9862484468479213e-05, + "loss": 0.16112804412841797, + "step": 3232 + }, + { + "epoch": 0.4370764681030841, + "grad_norm": 0.9378476738929749, + "learning_rate": 1.985600913087482e-05, + "loss": 0.1256561279296875, + "step": 3233 + }, + { + "epoch": 0.43721166032953107, + "grad_norm": 0.9202724099159241, + "learning_rate": 1.9849532782223425e-05, + "loss": 0.1743021011352539, + "step": 3234 + }, + { + "epoch": 0.43734685255597805, + "grad_norm": 0.6298282146453857, + "learning_rate": 1.9843055423873447e-05, + "loss": 0.16308021545410156, + "step": 3235 + }, + { + "epoch": 0.43748204478242503, + "grad_norm": 0.6978808045387268, + "learning_rate": 1.9836577057173507e-05, + "loss": 0.1531229019165039, + "step": 3236 + }, + { + "epoch": 0.437617237008872, + "grad_norm": 1.0858879089355469, + "learning_rate": 1.9830097683472427e-05, + "loss": 0.19670486450195312, + "step": 3237 + }, + { + "epoch": 0.437752429235319, + "grad_norm": 0.8587451577186584, + "learning_rate": 1.9823617304119252e-05, + "loss": 0.16200923919677734, + "step": 3238 + }, + { + "epoch": 0.43788762146176596, + "grad_norm": 1.0380136966705322, + "learning_rate": 1.9817135920463232e-05, + "loss": 0.1998729705810547, + "step": 3239 + }, + { + "epoch": 0.43802281368821294, + "grad_norm": 0.9099397659301758, + "learning_rate": 1.9810653533853826e-05, + "loss": 0.1688380241394043, + "step": 3240 + }, + { + "epoch": 0.4381580059146599, + "grad_norm": 1.3872203826904297, + "learning_rate": 1.9804170145640706e-05, + "loss": 0.2563629150390625, + "step": 3241 + }, + { + "epoch": 0.4382931981411069, + "grad_norm": 0.8556606769561768, + "learning_rate": 1.9797685757173737e-05, + "loss": 0.1521892547607422, + "step": 3242 + }, + { + "epoch": 0.4384283903675539, + "grad_norm": 1.9313888549804688, + "learning_rate": 1.979120036980301e-05, + "loss": 0.1937112808227539, + "step": 3243 + }, + { + "epoch": 0.43856358259400086, + "grad_norm": 0.9642466902732849, + "learning_rate": 1.9784713984878814e-05, + "loss": 0.17397403717041016, + "step": 3244 + }, + { + "epoch": 0.43869877482044783, + "grad_norm": 1.062937617301941, + "learning_rate": 1.9778226603751652e-05, + "loss": 0.18570709228515625, + "step": 3245 + }, + { + "epoch": 0.4388339670468948, + "grad_norm": 2.1490941047668457, + "learning_rate": 1.9771738227772235e-05, + "loss": 0.15308094024658203, + "step": 3246 + }, + { + "epoch": 0.4389691592733418, + "grad_norm": 0.8808411955833435, + "learning_rate": 1.976524885829147e-05, + "loss": 0.16611814498901367, + "step": 3247 + }, + { + "epoch": 0.43910435149978877, + "grad_norm": 1.0755809545516968, + "learning_rate": 1.975875849666048e-05, + "loss": 0.13784503936767578, + "step": 3248 + }, + { + "epoch": 0.43923954372623575, + "grad_norm": 2.4913408756256104, + "learning_rate": 1.9752267144230595e-05, + "loss": 0.27016448974609375, + "step": 3249 + }, + { + "epoch": 0.4393747359526827, + "grad_norm": 1.0091131925582886, + "learning_rate": 1.9745774802353347e-05, + "loss": 0.17917919158935547, + "step": 3250 + }, + { + "epoch": 0.4395099281791297, + "grad_norm": 1.1258560419082642, + "learning_rate": 1.973928147238048e-05, + "loss": 0.1880321502685547, + "step": 3251 + }, + { + "epoch": 0.4396451204055767, + "grad_norm": 2.2914650440216064, + "learning_rate": 1.973278715566394e-05, + "loss": 0.19746017456054688, + "step": 3252 + }, + { + "epoch": 0.43978031263202366, + "grad_norm": 1.6040184497833252, + "learning_rate": 1.972629185355587e-05, + "loss": 0.13663291931152344, + "step": 3253 + }, + { + "epoch": 0.43991550485847064, + "grad_norm": 1.7001338005065918, + "learning_rate": 1.971979556740864e-05, + "loss": 0.19942855834960938, + "step": 3254 + }, + { + "epoch": 0.4400506970849176, + "grad_norm": 0.8530957102775574, + "learning_rate": 1.971329829857479e-05, + "loss": 0.1932353973388672, + "step": 3255 + }, + { + "epoch": 0.4401858893113646, + "grad_norm": 1.4182236194610596, + "learning_rate": 1.9706800048407112e-05, + "loss": 0.24430084228515625, + "step": 3256 + }, + { + "epoch": 0.4403210815378116, + "grad_norm": 1.5841634273529053, + "learning_rate": 1.9700300818258566e-05, + "loss": 0.15078258514404297, + "step": 3257 + }, + { + "epoch": 0.44045627376425855, + "grad_norm": 1.5203839540481567, + "learning_rate": 1.9693800609482318e-05, + "loss": 0.1559772491455078, + "step": 3258 + }, + { + "epoch": 0.44059146599070553, + "grad_norm": 1.0354779958724976, + "learning_rate": 1.9687299423431754e-05, + "loss": 0.19831275939941406, + "step": 3259 + }, + { + "epoch": 0.4407266582171525, + "grad_norm": 0.8177117705345154, + "learning_rate": 1.968079726146045e-05, + "loss": 0.13483619689941406, + "step": 3260 + }, + { + "epoch": 0.4408618504435995, + "grad_norm": 0.9529993534088135, + "learning_rate": 1.9674294124922204e-05, + "loss": 0.16558146476745605, + "step": 3261 + }, + { + "epoch": 0.44099704267004647, + "grad_norm": 0.6532862782478333, + "learning_rate": 1.966779001517099e-05, + "loss": 0.14177227020263672, + "step": 3262 + }, + { + "epoch": 0.44113223489649345, + "grad_norm": 1.7624483108520508, + "learning_rate": 1.9661284933561006e-05, + "loss": 0.2037334442138672, + "step": 3263 + }, + { + "epoch": 0.4412674271229404, + "grad_norm": 1.1440997123718262, + "learning_rate": 1.9654778881446636e-05, + "loss": 0.1915740966796875, + "step": 3264 + }, + { + "epoch": 0.4414026193493874, + "grad_norm": 2.2073144912719727, + "learning_rate": 1.9648271860182487e-05, + "loss": 0.1768202781677246, + "step": 3265 + }, + { + "epoch": 0.4415378115758344, + "grad_norm": 2.678293466567993, + "learning_rate": 1.9641763871123345e-05, + "loss": 0.17505693435668945, + "step": 3266 + }, + { + "epoch": 0.44167300380228136, + "grad_norm": 0.8177661299705505, + "learning_rate": 1.963525491562421e-05, + "loss": 0.2051105499267578, + "step": 3267 + }, + { + "epoch": 0.44180819602872834, + "grad_norm": 1.999330759048462, + "learning_rate": 1.9628744995040287e-05, + "loss": 0.3141021728515625, + "step": 3268 + }, + { + "epoch": 0.4419433882551753, + "grad_norm": 0.7680009603500366, + "learning_rate": 1.9622234110726976e-05, + "loss": 0.15154409408569336, + "step": 3269 + }, + { + "epoch": 0.4420785804816223, + "grad_norm": 1.16696035861969, + "learning_rate": 1.9615722264039868e-05, + "loss": 0.1818697452545166, + "step": 3270 + }, + { + "epoch": 0.4422137727080693, + "grad_norm": 2.3816661834716797, + "learning_rate": 1.9609209456334772e-05, + "loss": 0.14307022094726562, + "step": 3271 + }, + { + "epoch": 0.44234896493451625, + "grad_norm": 2.4581425189971924, + "learning_rate": 1.960269568896769e-05, + "loss": 0.1819601058959961, + "step": 3272 + }, + { + "epoch": 0.44248415716096323, + "grad_norm": 2.0372302532196045, + "learning_rate": 1.9596180963294822e-05, + "loss": 0.19658660888671875, + "step": 3273 + }, + { + "epoch": 0.4426193493874102, + "grad_norm": 1.296618103981018, + "learning_rate": 1.9589665280672564e-05, + "loss": 0.17101097106933594, + "step": 3274 + }, + { + "epoch": 0.4427545416138572, + "grad_norm": 0.9417622685432434, + "learning_rate": 1.958314864245752e-05, + "loss": 0.18425512313842773, + "step": 3275 + }, + { + "epoch": 0.44288973384030417, + "grad_norm": 1.3346128463745117, + "learning_rate": 1.957663105000649e-05, + "loss": 0.21772289276123047, + "step": 3276 + }, + { + "epoch": 0.44302492606675115, + "grad_norm": 2.2654311656951904, + "learning_rate": 1.957011250467647e-05, + "loss": 0.1448230743408203, + "step": 3277 + }, + { + "epoch": 0.4431601182931981, + "grad_norm": 1.141088843345642, + "learning_rate": 1.9563593007824658e-05, + "loss": 0.1553792953491211, + "step": 3278 + }, + { + "epoch": 0.4432953105196451, + "grad_norm": 1.3459566831588745, + "learning_rate": 1.9557072560808442e-05, + "loss": 0.24167442321777344, + "step": 3279 + }, + { + "epoch": 0.4434305027460921, + "grad_norm": 3.7929370403289795, + "learning_rate": 1.9550551164985418e-05, + "loss": 0.191436767578125, + "step": 3280 + }, + { + "epoch": 0.44356569497253906, + "grad_norm": 1.4540438652038574, + "learning_rate": 1.9544028821713372e-05, + "loss": 0.19977283477783203, + "step": 3281 + }, + { + "epoch": 0.44370088719898604, + "grad_norm": 1.1990866661071777, + "learning_rate": 1.9537505532350298e-05, + "loss": 0.21065521240234375, + "step": 3282 + }, + { + "epoch": 0.443836079425433, + "grad_norm": 2.6398627758026123, + "learning_rate": 1.9530981298254376e-05, + "loss": 0.20041465759277344, + "step": 3283 + }, + { + "epoch": 0.44397127165188, + "grad_norm": 1.9614109992980957, + "learning_rate": 1.9524456120783983e-05, + "loss": 0.20159626007080078, + "step": 3284 + }, + { + "epoch": 0.444106463878327, + "grad_norm": 0.8138933181762695, + "learning_rate": 1.95179300012977e-05, + "loss": 0.14339160919189453, + "step": 3285 + }, + { + "epoch": 0.44424165610477395, + "grad_norm": 0.836086094379425, + "learning_rate": 1.9511402941154296e-05, + "loss": 0.1988210678100586, + "step": 3286 + }, + { + "epoch": 0.44437684833122093, + "grad_norm": 3.8873000144958496, + "learning_rate": 1.950487494171274e-05, + "loss": 0.23756694793701172, + "step": 3287 + }, + { + "epoch": 0.4445120405576679, + "grad_norm": 2.2183263301849365, + "learning_rate": 1.9498346004332203e-05, + "loss": 0.14792919158935547, + "step": 3288 + }, + { + "epoch": 0.4446472327841149, + "grad_norm": 2.675966739654541, + "learning_rate": 1.949181613037204e-05, + "loss": 0.2288188934326172, + "step": 3289 + }, + { + "epoch": 0.44478242501056187, + "grad_norm": 0.961983323097229, + "learning_rate": 1.9485285321191804e-05, + "loss": 0.14125490188598633, + "step": 3290 + }, + { + "epoch": 0.44491761723700884, + "grad_norm": 2.292262315750122, + "learning_rate": 1.9478753578151244e-05, + "loss": 0.21262693405151367, + "step": 3291 + }, + { + "epoch": 0.4450528094634559, + "grad_norm": 1.2248820066452026, + "learning_rate": 1.9472220902610304e-05, + "loss": 0.2552909851074219, + "step": 3292 + }, + { + "epoch": 0.44518800168990286, + "grad_norm": 0.7249830365180969, + "learning_rate": 1.9465687295929127e-05, + "loss": 0.126265287399292, + "step": 3293 + }, + { + "epoch": 0.44532319391634984, + "grad_norm": 0.6301270723342896, + "learning_rate": 1.945915275946804e-05, + "loss": 0.13928532600402832, + "step": 3294 + }, + { + "epoch": 0.4454583861427968, + "grad_norm": 1.0418184995651245, + "learning_rate": 1.9452617294587573e-05, + "loss": 0.1963043212890625, + "step": 3295 + }, + { + "epoch": 0.4455935783692438, + "grad_norm": 1.510541558265686, + "learning_rate": 1.9446080902648435e-05, + "loss": 0.14599323272705078, + "step": 3296 + }, + { + "epoch": 0.44572877059569077, + "grad_norm": 0.8976702690124512, + "learning_rate": 1.943954358501154e-05, + "loss": 0.19105148315429688, + "step": 3297 + }, + { + "epoch": 0.44586396282213775, + "grad_norm": 1.173773169517517, + "learning_rate": 1.943300534303801e-05, + "loss": 0.12724590301513672, + "step": 3298 + }, + { + "epoch": 0.44599915504858473, + "grad_norm": 0.8885128498077393, + "learning_rate": 1.9426466178089116e-05, + "loss": 0.16897010803222656, + "step": 3299 + }, + { + "epoch": 0.4461343472750317, + "grad_norm": 1.7829830646514893, + "learning_rate": 1.9419926091526367e-05, + "loss": 0.23958587646484375, + "step": 3300 + }, + { + "epoch": 0.4462695395014787, + "grad_norm": 1.337445855140686, + "learning_rate": 1.9413385084711425e-05, + "loss": 0.2074289321899414, + "step": 3301 + }, + { + "epoch": 0.44640473172792566, + "grad_norm": 1.976184368133545, + "learning_rate": 1.9406843159006183e-05, + "loss": 0.21131229400634766, + "step": 3302 + }, + { + "epoch": 0.44653992395437264, + "grad_norm": 1.662597417831421, + "learning_rate": 1.940030031577269e-05, + "loss": 0.1674041748046875, + "step": 3303 + }, + { + "epoch": 0.4466751161808196, + "grad_norm": 1.4795536994934082, + "learning_rate": 1.9393756556373212e-05, + "loss": 0.21033668518066406, + "step": 3304 + }, + { + "epoch": 0.4468103084072666, + "grad_norm": 0.9193311333656311, + "learning_rate": 1.9387211882170184e-05, + "loss": 0.19032049179077148, + "step": 3305 + }, + { + "epoch": 0.4469455006337136, + "grad_norm": 1.493209719657898, + "learning_rate": 1.9380666294526243e-05, + "loss": 0.19511127471923828, + "step": 3306 + }, + { + "epoch": 0.44708069286016056, + "grad_norm": 2.4407761096954346, + "learning_rate": 1.9374119794804228e-05, + "loss": 0.1898479461669922, + "step": 3307 + }, + { + "epoch": 0.44721588508660753, + "grad_norm": 1.7308591604232788, + "learning_rate": 1.936757238436714e-05, + "loss": 0.17169570922851562, + "step": 3308 + }, + { + "epoch": 0.4473510773130545, + "grad_norm": 1.2203428745269775, + "learning_rate": 1.936102406457819e-05, + "loss": 0.1848452091217041, + "step": 3309 + }, + { + "epoch": 0.4474862695395015, + "grad_norm": 0.9986810684204102, + "learning_rate": 1.935447483680078e-05, + "loss": 0.2286396026611328, + "step": 3310 + }, + { + "epoch": 0.44762146176594847, + "grad_norm": 0.8714733719825745, + "learning_rate": 1.9347924702398484e-05, + "loss": 0.156044602394104, + "step": 3311 + }, + { + "epoch": 0.44775665399239545, + "grad_norm": 2.1321327686309814, + "learning_rate": 1.9341373662735075e-05, + "loss": 0.21015453338623047, + "step": 3312 + }, + { + "epoch": 0.4478918462188424, + "grad_norm": 1.5197192430496216, + "learning_rate": 1.9334821719174524e-05, + "loss": 0.1696155071258545, + "step": 3313 + }, + { + "epoch": 0.4480270384452894, + "grad_norm": 1.585639238357544, + "learning_rate": 1.9328268873080974e-05, + "loss": 0.23131179809570312, + "step": 3314 + }, + { + "epoch": 0.4481622306717364, + "grad_norm": 1.7585625648498535, + "learning_rate": 1.9321715125818765e-05, + "loss": 0.2266082763671875, + "step": 3315 + }, + { + "epoch": 0.44829742289818336, + "grad_norm": 1.6605536937713623, + "learning_rate": 1.931516047875242e-05, + "loss": 0.14570701122283936, + "step": 3316 + }, + { + "epoch": 0.44843261512463034, + "grad_norm": 2.61189866065979, + "learning_rate": 1.930860493324665e-05, + "loss": 0.17184185981750488, + "step": 3317 + }, + { + "epoch": 0.4485678073510773, + "grad_norm": 1.735080599784851, + "learning_rate": 1.9302048490666356e-05, + "loss": 0.1675701141357422, + "step": 3318 + }, + { + "epoch": 0.4487029995775243, + "grad_norm": 1.6295245885849, + "learning_rate": 1.9295491152376623e-05, + "loss": 0.1856555938720703, + "step": 3319 + }, + { + "epoch": 0.4488381918039713, + "grad_norm": 1.884494423866272, + "learning_rate": 1.928893291974273e-05, + "loss": 0.16524791717529297, + "step": 3320 + }, + { + "epoch": 0.44897338403041825, + "grad_norm": 1.0411334037780762, + "learning_rate": 1.9282373794130132e-05, + "loss": 0.1746060848236084, + "step": 3321 + }, + { + "epoch": 0.44910857625686523, + "grad_norm": 0.9504089951515198, + "learning_rate": 1.9275813776904472e-05, + "loss": 0.14795160293579102, + "step": 3322 + }, + { + "epoch": 0.4492437684833122, + "grad_norm": 0.7142082452774048, + "learning_rate": 1.9269252869431582e-05, + "loss": 0.18247127532958984, + "step": 3323 + }, + { + "epoch": 0.4493789607097592, + "grad_norm": 1.5052988529205322, + "learning_rate": 1.9262691073077476e-05, + "loss": 0.18030238151550293, + "step": 3324 + }, + { + "epoch": 0.44951415293620617, + "grad_norm": 0.9043465256690979, + "learning_rate": 1.9256128389208363e-05, + "loss": 0.15996450185775757, + "step": 3325 + }, + { + "epoch": 0.44964934516265315, + "grad_norm": 2.2750658988952637, + "learning_rate": 1.924956481919062e-05, + "loss": 0.1936359405517578, + "step": 3326 + }, + { + "epoch": 0.4497845373891001, + "grad_norm": 1.260048270225525, + "learning_rate": 1.9243000364390825e-05, + "loss": 0.23260053992271423, + "step": 3327 + }, + { + "epoch": 0.4499197296155471, + "grad_norm": 1.9068249464035034, + "learning_rate": 1.9236435026175717e-05, + "loss": 0.1737051010131836, + "step": 3328 + }, + { + "epoch": 0.4500549218419941, + "grad_norm": 2.0965352058410645, + "learning_rate": 1.9229868805912248e-05, + "loss": 0.2030625343322754, + "step": 3329 + }, + { + "epoch": 0.45019011406844106, + "grad_norm": 0.904119074344635, + "learning_rate": 1.9223301704967543e-05, + "loss": 0.16301298141479492, + "step": 3330 + }, + { + "epoch": 0.45032530629488804, + "grad_norm": 1.8079923391342163, + "learning_rate": 1.92167337247089e-05, + "loss": 0.19826650619506836, + "step": 3331 + }, + { + "epoch": 0.450460498521335, + "grad_norm": 0.7676612138748169, + "learning_rate": 1.9210164866503808e-05, + "loss": 0.1650075912475586, + "step": 3332 + }, + { + "epoch": 0.450595690747782, + "grad_norm": 0.7464839816093445, + "learning_rate": 1.9203595131719935e-05, + "loss": 0.14877605438232422, + "step": 3333 + }, + { + "epoch": 0.450730882974229, + "grad_norm": 0.860927402973175, + "learning_rate": 1.9197024521725148e-05, + "loss": 0.24932289123535156, + "step": 3334 + }, + { + "epoch": 0.45086607520067595, + "grad_norm": 1.0704501867294312, + "learning_rate": 1.9190453037887465e-05, + "loss": 0.22809600830078125, + "step": 3335 + }, + { + "epoch": 0.45100126742712293, + "grad_norm": 1.003853440284729, + "learning_rate": 1.918388068157512e-05, + "loss": 0.15314126014709473, + "step": 3336 + }, + { + "epoch": 0.4511364596535699, + "grad_norm": 1.2322697639465332, + "learning_rate": 1.9177307454156507e-05, + "loss": 0.17522716522216797, + "step": 3337 + }, + { + "epoch": 0.4512716518800169, + "grad_norm": 1.9328501224517822, + "learning_rate": 1.9170733357000202e-05, + "loss": 0.20365238189697266, + "step": 3338 + }, + { + "epoch": 0.45140684410646387, + "grad_norm": 0.9317488074302673, + "learning_rate": 1.916415839147497e-05, + "loss": 0.18757057189941406, + "step": 3339 + }, + { + "epoch": 0.45154203633291085, + "grad_norm": 1.2405551671981812, + "learning_rate": 1.9157582558949756e-05, + "loss": 0.12496471405029297, + "step": 3340 + }, + { + "epoch": 0.4516772285593578, + "grad_norm": 1.2460386753082275, + "learning_rate": 1.9151005860793682e-05, + "loss": 0.194488525390625, + "step": 3341 + }, + { + "epoch": 0.4518124207858048, + "grad_norm": 1.045401930809021, + "learning_rate": 1.9144428298376056e-05, + "loss": 0.19829177856445312, + "step": 3342 + }, + { + "epoch": 0.4519476130122518, + "grad_norm": 2.408951759338379, + "learning_rate": 1.9137849873066355e-05, + "loss": 0.20370006561279297, + "step": 3343 + }, + { + "epoch": 0.45208280523869876, + "grad_norm": 1.8167108297348022, + "learning_rate": 1.9131270586234243e-05, + "loss": 0.18300151824951172, + "step": 3344 + }, + { + "epoch": 0.45221799746514574, + "grad_norm": 0.9507236480712891, + "learning_rate": 1.9124690439249564e-05, + "loss": 0.13016855716705322, + "step": 3345 + }, + { + "epoch": 0.4523531896915927, + "grad_norm": 1.3461902141571045, + "learning_rate": 1.9118109433482342e-05, + "loss": 0.16587352752685547, + "step": 3346 + }, + { + "epoch": 0.4524883819180397, + "grad_norm": 0.8641530275344849, + "learning_rate": 1.911152757030278e-05, + "loss": 0.1675124168395996, + "step": 3347 + }, + { + "epoch": 0.4526235741444867, + "grad_norm": 0.699028730392456, + "learning_rate": 1.9104944851081247e-05, + "loss": 0.12414193153381348, + "step": 3348 + }, + { + "epoch": 0.45275876637093365, + "grad_norm": 3.3927464485168457, + "learning_rate": 1.9098361277188303e-05, + "loss": 0.22945332527160645, + "step": 3349 + }, + { + "epoch": 0.45289395859738063, + "grad_norm": 2.2899057865142822, + "learning_rate": 1.909177684999469e-05, + "loss": 0.17086172103881836, + "step": 3350 + }, + { + "epoch": 0.4530291508238276, + "grad_norm": 1.3996446132659912, + "learning_rate": 1.9085191570871316e-05, + "loss": 0.1568918228149414, + "step": 3351 + }, + { + "epoch": 0.4531643430502746, + "grad_norm": 0.8151464462280273, + "learning_rate": 1.9078605441189275e-05, + "loss": 0.14997220039367676, + "step": 3352 + }, + { + "epoch": 0.45329953527672157, + "grad_norm": 0.8316568732261658, + "learning_rate": 1.9072018462319828e-05, + "loss": 0.14484214782714844, + "step": 3353 + }, + { + "epoch": 0.45343472750316854, + "grad_norm": 1.5172685384750366, + "learning_rate": 1.9065430635634422e-05, + "loss": 0.17213410139083862, + "step": 3354 + }, + { + "epoch": 0.4535699197296155, + "grad_norm": 1.184983730316162, + "learning_rate": 1.9058841962504677e-05, + "loss": 0.1961350440979004, + "step": 3355 + }, + { + "epoch": 0.4537051119560625, + "grad_norm": 1.0499653816223145, + "learning_rate": 1.9052252444302394e-05, + "loss": 0.15592074394226074, + "step": 3356 + }, + { + "epoch": 0.4538403041825095, + "grad_norm": 1.0566301345825195, + "learning_rate": 1.904566208239954e-05, + "loss": 0.14187121391296387, + "step": 3357 + }, + { + "epoch": 0.45397549640895646, + "grad_norm": 1.418910026550293, + "learning_rate": 1.903907087816827e-05, + "loss": 0.1613149642944336, + "step": 3358 + }, + { + "epoch": 0.45411068863540344, + "grad_norm": 1.500810980796814, + "learning_rate": 1.9032478832980902e-05, + "loss": 0.24725914001464844, + "step": 3359 + }, + { + "epoch": 0.45424588086185047, + "grad_norm": 2.5046122074127197, + "learning_rate": 1.9025885948209938e-05, + "loss": 0.1636638641357422, + "step": 3360 + }, + { + "epoch": 0.45438107308829745, + "grad_norm": 1.6339011192321777, + "learning_rate": 1.901929222522805e-05, + "loss": 0.21809005737304688, + "step": 3361 + }, + { + "epoch": 0.4545162653147444, + "grad_norm": 1.0242033004760742, + "learning_rate": 1.901269766540809e-05, + "loss": 0.2426128387451172, + "step": 3362 + }, + { + "epoch": 0.4546514575411914, + "grad_norm": 0.7857242226600647, + "learning_rate": 1.9006102270123076e-05, + "loss": 0.11364269256591797, + "step": 3363 + }, + { + "epoch": 0.4547866497676384, + "grad_norm": 1.756072998046875, + "learning_rate": 1.8999506040746208e-05, + "loss": 0.1663675308227539, + "step": 3364 + }, + { + "epoch": 0.45492184199408536, + "grad_norm": 0.8138302564620972, + "learning_rate": 1.899290897865085e-05, + "loss": 0.17125940322875977, + "step": 3365 + }, + { + "epoch": 0.45505703422053234, + "grad_norm": 1.630271315574646, + "learning_rate": 1.898631108521055e-05, + "loss": 0.1776294708251953, + "step": 3366 + }, + { + "epoch": 0.4551922264469793, + "grad_norm": 0.5550186038017273, + "learning_rate": 1.8979712361799027e-05, + "loss": 0.16408538818359375, + "step": 3367 + }, + { + "epoch": 0.4553274186734263, + "grad_norm": 2.5210983753204346, + "learning_rate": 1.8973112809790168e-05, + "loss": 0.2393512725830078, + "step": 3368 + }, + { + "epoch": 0.4554626108998733, + "grad_norm": 0.9126302599906921, + "learning_rate": 1.8966512430558036e-05, + "loss": 0.20035552978515625, + "step": 3369 + }, + { + "epoch": 0.45559780312632026, + "grad_norm": 0.8338290452957153, + "learning_rate": 1.8959911225476858e-05, + "loss": 0.1625652313232422, + "step": 3370 + }, + { + "epoch": 0.45573299535276723, + "grad_norm": 0.9934579133987427, + "learning_rate": 1.895330919592105e-05, + "loss": 0.1514444351196289, + "step": 3371 + }, + { + "epoch": 0.4558681875792142, + "grad_norm": 0.7505517601966858, + "learning_rate": 1.8946706343265187e-05, + "loss": 0.1527557373046875, + "step": 3372 + }, + { + "epoch": 0.4560033798056612, + "grad_norm": 1.4540859460830688, + "learning_rate": 1.8940102668884016e-05, + "loss": 0.20904541015625, + "step": 3373 + }, + { + "epoch": 0.45613857203210817, + "grad_norm": 1.0088428258895874, + "learning_rate": 1.893349817415246e-05, + "loss": 0.17722797393798828, + "step": 3374 + }, + { + "epoch": 0.45627376425855515, + "grad_norm": 0.9405586123466492, + "learning_rate": 1.8926892860445607e-05, + "loss": 0.19090843200683594, + "step": 3375 + }, + { + "epoch": 0.4564089564850021, + "grad_norm": 0.8506881594657898, + "learning_rate": 1.8920286729138718e-05, + "loss": 0.15937280654907227, + "step": 3376 + }, + { + "epoch": 0.4565441487114491, + "grad_norm": 1.9556361436843872, + "learning_rate": 1.8913679781607225e-05, + "loss": 0.22687864303588867, + "step": 3377 + }, + { + "epoch": 0.4566793409378961, + "grad_norm": 2.596022605895996, + "learning_rate": 1.8907072019226734e-05, + "loss": 0.1916961669921875, + "step": 3378 + }, + { + "epoch": 0.45681453316434306, + "grad_norm": 1.1734687089920044, + "learning_rate": 1.8900463443373015e-05, + "loss": 0.20528650283813477, + "step": 3379 + }, + { + "epoch": 0.45694972539079004, + "grad_norm": 0.8709990382194519, + "learning_rate": 1.889385405542201e-05, + "loss": 0.15732574462890625, + "step": 3380 + }, + { + "epoch": 0.457084917617237, + "grad_norm": 1.492586374282837, + "learning_rate": 1.8887243856749816e-05, + "loss": 0.2473926544189453, + "step": 3381 + }, + { + "epoch": 0.457220109843684, + "grad_norm": 1.4273171424865723, + "learning_rate": 1.8880632848732723e-05, + "loss": 0.2291412353515625, + "step": 3382 + }, + { + "epoch": 0.457355302070131, + "grad_norm": 0.8740034103393555, + "learning_rate": 1.8874021032747185e-05, + "loss": 0.12774276733398438, + "step": 3383 + }, + { + "epoch": 0.45749049429657795, + "grad_norm": 1.173427939414978, + "learning_rate": 1.8867408410169803e-05, + "loss": 0.13009929656982422, + "step": 3384 + }, + { + "epoch": 0.45762568652302493, + "grad_norm": 0.7898250222206116, + "learning_rate": 1.886079498237737e-05, + "loss": 0.1951894760131836, + "step": 3385 + }, + { + "epoch": 0.4577608787494719, + "grad_norm": 0.9126966595649719, + "learning_rate": 1.885418075074683e-05, + "loss": 0.18280935287475586, + "step": 3386 + }, + { + "epoch": 0.4578960709759189, + "grad_norm": 1.1672455072402954, + "learning_rate": 1.884756571665531e-05, + "loss": 0.20835494995117188, + "step": 3387 + }, + { + "epoch": 0.45803126320236587, + "grad_norm": 2.5968024730682373, + "learning_rate": 1.8840949881480085e-05, + "loss": 0.21082544326782227, + "step": 3388 + }, + { + "epoch": 0.45816645542881285, + "grad_norm": 1.2569278478622437, + "learning_rate": 1.8834333246598613e-05, + "loss": 0.16714727878570557, + "step": 3389 + }, + { + "epoch": 0.4583016476552598, + "grad_norm": 1.5037062168121338, + "learning_rate": 1.8827715813388514e-05, + "loss": 0.14668774604797363, + "step": 3390 + }, + { + "epoch": 0.4584368398817068, + "grad_norm": 1.1261630058288574, + "learning_rate": 1.8821097583227572e-05, + "loss": 0.18340826034545898, + "step": 3391 + }, + { + "epoch": 0.4585720321081538, + "grad_norm": 0.8690721988677979, + "learning_rate": 1.8814478557493732e-05, + "loss": 0.15972423553466797, + "step": 3392 + }, + { + "epoch": 0.45870722433460076, + "grad_norm": 1.0738197565078735, + "learning_rate": 1.8807858737565118e-05, + "loss": 0.2241535186767578, + "step": 3393 + }, + { + "epoch": 0.45884241656104774, + "grad_norm": 1.2549222707748413, + "learning_rate": 1.880123812482001e-05, + "loss": 0.1848316192626953, + "step": 3394 + }, + { + "epoch": 0.4589776087874947, + "grad_norm": 1.4770159721374512, + "learning_rate": 1.8794616720636853e-05, + "loss": 0.16968345642089844, + "step": 3395 + }, + { + "epoch": 0.4591128010139417, + "grad_norm": 0.9515596628189087, + "learning_rate": 1.8787994526394257e-05, + "loss": 0.22729110717773438, + "step": 3396 + }, + { + "epoch": 0.4592479932403887, + "grad_norm": 0.9397410750389099, + "learning_rate": 1.8781371543471e-05, + "loss": 0.18487882614135742, + "step": 3397 + }, + { + "epoch": 0.45938318546683565, + "grad_norm": 1.3566805124282837, + "learning_rate": 1.8774747773246024e-05, + "loss": 0.11250877380371094, + "step": 3398 + }, + { + "epoch": 0.45951837769328263, + "grad_norm": 0.9017617702484131, + "learning_rate": 1.8768123217098438e-05, + "loss": 0.12335491180419922, + "step": 3399 + }, + { + "epoch": 0.4596535699197296, + "grad_norm": 1.3379122018814087, + "learning_rate": 1.8761497876407496e-05, + "loss": 0.18169236183166504, + "step": 3400 + }, + { + "epoch": 0.4597887621461766, + "grad_norm": 0.795560896396637, + "learning_rate": 1.8754871752552646e-05, + "loss": 0.16091537475585938, + "step": 3401 + }, + { + "epoch": 0.45992395437262357, + "grad_norm": 0.7444160580635071, + "learning_rate": 1.8748244846913463e-05, + "loss": 0.09851551055908203, + "step": 3402 + }, + { + "epoch": 0.46005914659907055, + "grad_norm": 1.6525068283081055, + "learning_rate": 1.874161716086972e-05, + "loss": 0.18190288543701172, + "step": 3403 + }, + { + "epoch": 0.4601943388255175, + "grad_norm": 1.3186357021331787, + "learning_rate": 1.8734988695801333e-05, + "loss": 0.2541618347167969, + "step": 3404 + }, + { + "epoch": 0.4603295310519645, + "grad_norm": 0.966378390789032, + "learning_rate": 1.8728359453088382e-05, + "loss": 0.18045425415039062, + "step": 3405 + }, + { + "epoch": 0.4604647232784115, + "grad_norm": 0.6301789879798889, + "learning_rate": 1.8721729434111108e-05, + "loss": 0.11144638061523438, + "step": 3406 + }, + { + "epoch": 0.46059991550485846, + "grad_norm": 0.8427004814147949, + "learning_rate": 1.871509864024992e-05, + "loss": 0.2093334197998047, + "step": 3407 + }, + { + "epoch": 0.46073510773130544, + "grad_norm": 1.0891212224960327, + "learning_rate": 1.8708467072885385e-05, + "loss": 0.1659259796142578, + "step": 3408 + }, + { + "epoch": 0.4608702999577524, + "grad_norm": 1.1697626113891602, + "learning_rate": 1.8701834733398227e-05, + "loss": 0.21251678466796875, + "step": 3409 + }, + { + "epoch": 0.4610054921841994, + "grad_norm": 0.9912316203117371, + "learning_rate": 1.8695201623169335e-05, + "loss": 0.15319347381591797, + "step": 3410 + }, + { + "epoch": 0.4611406844106464, + "grad_norm": 1.261716365814209, + "learning_rate": 1.868856774357977e-05, + "loss": 0.20406246185302734, + "step": 3411 + }, + { + "epoch": 0.46127587663709335, + "grad_norm": 1.725563645362854, + "learning_rate": 1.868193309601072e-05, + "loss": 0.20885467529296875, + "step": 3412 + }, + { + "epoch": 0.46141106886354033, + "grad_norm": 1.0795154571533203, + "learning_rate": 1.867529768184357e-05, + "loss": 0.13947153091430664, + "step": 3413 + }, + { + "epoch": 0.4615462610899873, + "grad_norm": 1.2873276472091675, + "learning_rate": 1.8668661502459842e-05, + "loss": 0.16528213024139404, + "step": 3414 + }, + { + "epoch": 0.4616814533164343, + "grad_norm": 1.6167263984680176, + "learning_rate": 1.866202455924123e-05, + "loss": 0.17933082580566406, + "step": 3415 + }, + { + "epoch": 0.46181664554288127, + "grad_norm": 1.0261482000350952, + "learning_rate": 1.865538685356957e-05, + "loss": 0.20423603057861328, + "step": 3416 + }, + { + "epoch": 0.46195183776932824, + "grad_norm": 1.0881630182266235, + "learning_rate": 1.864874838682688e-05, + "loss": 0.1706758737564087, + "step": 3417 + }, + { + "epoch": 0.4620870299957752, + "grad_norm": 0.9593113660812378, + "learning_rate": 1.864210916039531e-05, + "loss": 0.1978282928466797, + "step": 3418 + }, + { + "epoch": 0.4622222222222222, + "grad_norm": 1.883331298828125, + "learning_rate": 1.86354691756572e-05, + "loss": 0.16776585578918457, + "step": 3419 + }, + { + "epoch": 0.4623574144486692, + "grad_norm": 1.0900118350982666, + "learning_rate": 1.8628828433995013e-05, + "loss": 0.13504266738891602, + "step": 3420 + }, + { + "epoch": 0.46249260667511616, + "grad_norm": 1.8107829093933105, + "learning_rate": 1.86221869367914e-05, + "loss": 0.2114863395690918, + "step": 3421 + }, + { + "epoch": 0.46262779890156314, + "grad_norm": 0.8562608957290649, + "learning_rate": 1.8615544685429153e-05, + "loss": 0.1545248031616211, + "step": 3422 + }, + { + "epoch": 0.4627629911280101, + "grad_norm": 1.1713788509368896, + "learning_rate": 1.860890168129122e-05, + "loss": 0.1962137222290039, + "step": 3423 + }, + { + "epoch": 0.4628981833544571, + "grad_norm": 0.8066584467887878, + "learning_rate": 1.8602257925760708e-05, + "loss": 0.14803075790405273, + "step": 3424 + }, + { + "epoch": 0.46303337558090407, + "grad_norm": 1.6488244533538818, + "learning_rate": 1.859561342022089e-05, + "loss": 0.15384674072265625, + "step": 3425 + }, + { + "epoch": 0.46316856780735105, + "grad_norm": 1.595786452293396, + "learning_rate": 1.8588968166055185e-05, + "loss": 0.15847110748291016, + "step": 3426 + }, + { + "epoch": 0.4633037600337981, + "grad_norm": 3.0756123065948486, + "learning_rate": 1.8582322164647166e-05, + "loss": 0.1841282844543457, + "step": 3427 + }, + { + "epoch": 0.46343895226024506, + "grad_norm": 0.981618344783783, + "learning_rate": 1.8575675417380568e-05, + "loss": 0.15735626220703125, + "step": 3428 + }, + { + "epoch": 0.46357414448669204, + "grad_norm": 1.1846190690994263, + "learning_rate": 1.856902792563928e-05, + "loss": 0.2256298065185547, + "step": 3429 + }, + { + "epoch": 0.463709336713139, + "grad_norm": 1.0386713743209839, + "learning_rate": 1.856237969080735e-05, + "loss": 0.21704483032226562, + "step": 3430 + }, + { + "epoch": 0.463844528939586, + "grad_norm": 0.9064835906028748, + "learning_rate": 1.8555730714268967e-05, + "loss": 0.1602954864501953, + "step": 3431 + }, + { + "epoch": 0.463979721166033, + "grad_norm": 1.2755295038223267, + "learning_rate": 1.8549080997408492e-05, + "loss": 0.18534564971923828, + "step": 3432 + }, + { + "epoch": 0.46411491339247996, + "grad_norm": 0.9447746276855469, + "learning_rate": 1.8542430541610426e-05, + "loss": 0.16979122161865234, + "step": 3433 + }, + { + "epoch": 0.46425010561892693, + "grad_norm": 1.1349126100540161, + "learning_rate": 1.8535779348259428e-05, + "loss": 0.1866673231124878, + "step": 3434 + }, + { + "epoch": 0.4643852978453739, + "grad_norm": 0.7974268198013306, + "learning_rate": 1.852912741874032e-05, + "loss": 0.1420574188232422, + "step": 3435 + }, + { + "epoch": 0.4645204900718209, + "grad_norm": 1.0906926393508911, + "learning_rate": 1.8522474754438056e-05, + "loss": 0.20548301935195923, + "step": 3436 + }, + { + "epoch": 0.46465568229826787, + "grad_norm": 1.2485060691833496, + "learning_rate": 1.851582135673777e-05, + "loss": 0.1719493865966797, + "step": 3437 + }, + { + "epoch": 0.46479087452471485, + "grad_norm": 1.1147270202636719, + "learning_rate": 1.850916722702473e-05, + "loss": 0.23005104064941406, + "step": 3438 + }, + { + "epoch": 0.4649260667511618, + "grad_norm": 1.42415452003479, + "learning_rate": 1.8502512366684355e-05, + "loss": 0.17417442798614502, + "step": 3439 + }, + { + "epoch": 0.4650612589776088, + "grad_norm": 3.0755507946014404, + "learning_rate": 1.8495856777102232e-05, + "loss": 0.19609451293945312, + "step": 3440 + }, + { + "epoch": 0.4651964512040558, + "grad_norm": 1.1640913486480713, + "learning_rate": 1.848920045966408e-05, + "loss": 0.17813396453857422, + "step": 3441 + }, + { + "epoch": 0.46533164343050276, + "grad_norm": 0.8279566168785095, + "learning_rate": 1.8482543415755797e-05, + "loss": 0.14305484294891357, + "step": 3442 + }, + { + "epoch": 0.46546683565694974, + "grad_norm": 2.2585926055908203, + "learning_rate": 1.8475885646763394e-05, + "loss": 0.22636747360229492, + "step": 3443 + }, + { + "epoch": 0.4656020278833967, + "grad_norm": 0.796954333782196, + "learning_rate": 1.8469227154073064e-05, + "loss": 0.1601715087890625, + "step": 3444 + }, + { + "epoch": 0.4657372201098437, + "grad_norm": 0.6723203659057617, + "learning_rate": 1.8462567939071142e-05, + "loss": 0.14592552185058594, + "step": 3445 + }, + { + "epoch": 0.4658724123362907, + "grad_norm": 2.2775490283966064, + "learning_rate": 1.845590800314411e-05, + "loss": 0.20641326904296875, + "step": 3446 + }, + { + "epoch": 0.46600760456273765, + "grad_norm": 1.12370765209198, + "learning_rate": 1.8449247347678607e-05, + "loss": 0.18240737915039062, + "step": 3447 + }, + { + "epoch": 0.46614279678918463, + "grad_norm": 1.2684212923049927, + "learning_rate": 1.8442585974061405e-05, + "loss": 0.14149951934814453, + "step": 3448 + }, + { + "epoch": 0.4662779890156316, + "grad_norm": 1.2761445045471191, + "learning_rate": 1.8435923883679452e-05, + "loss": 0.1410980224609375, + "step": 3449 + }, + { + "epoch": 0.4664131812420786, + "grad_norm": 0.988216757774353, + "learning_rate": 1.8429261077919818e-05, + "loss": 0.2061624526977539, + "step": 3450 + }, + { + "epoch": 0.46654837346852557, + "grad_norm": 1.1586878299713135, + "learning_rate": 1.8422597558169742e-05, + "loss": 0.14987945556640625, + "step": 3451 + }, + { + "epoch": 0.46668356569497255, + "grad_norm": 2.1076486110687256, + "learning_rate": 1.84159333258166e-05, + "loss": 0.21510791778564453, + "step": 3452 + }, + { + "epoch": 0.4668187579214195, + "grad_norm": 1.7654314041137695, + "learning_rate": 1.8409268382247933e-05, + "loss": 0.18716001510620117, + "step": 3453 + }, + { + "epoch": 0.4669539501478665, + "grad_norm": 0.6629133224487305, + "learning_rate": 1.8402602728851405e-05, + "loss": 0.15287065505981445, + "step": 3454 + }, + { + "epoch": 0.4670891423743135, + "grad_norm": 0.46503645181655884, + "learning_rate": 1.839593636701484e-05, + "loss": 0.09570884704589844, + "step": 3455 + }, + { + "epoch": 0.46722433460076046, + "grad_norm": 1.818081259727478, + "learning_rate": 1.8389269298126214e-05, + "loss": 0.16983318328857422, + "step": 3456 + }, + { + "epoch": 0.46735952682720744, + "grad_norm": 2.8512046337127686, + "learning_rate": 1.838260152357365e-05, + "loss": 0.2278270721435547, + "step": 3457 + }, + { + "epoch": 0.4674947190536544, + "grad_norm": 1.2884511947631836, + "learning_rate": 1.837593304474541e-05, + "loss": 0.19524097442626953, + "step": 3458 + }, + { + "epoch": 0.4676299112801014, + "grad_norm": 1.0646090507507324, + "learning_rate": 1.836926386302991e-05, + "loss": 0.15915775299072266, + "step": 3459 + }, + { + "epoch": 0.4677651035065484, + "grad_norm": 1.881972312927246, + "learning_rate": 1.8362593979815696e-05, + "loss": 0.16900920867919922, + "step": 3460 + }, + { + "epoch": 0.46790029573299535, + "grad_norm": 2.5855352878570557, + "learning_rate": 1.8355923396491496e-05, + "loss": 0.15507221221923828, + "step": 3461 + }, + { + "epoch": 0.46803548795944233, + "grad_norm": 0.6759036183357239, + "learning_rate": 1.8349252114446138e-05, + "loss": 0.10190773010253906, + "step": 3462 + }, + { + "epoch": 0.4681706801858893, + "grad_norm": 3.0451323986053467, + "learning_rate": 1.834258013506864e-05, + "loss": 0.2116556167602539, + "step": 3463 + }, + { + "epoch": 0.4683058724123363, + "grad_norm": 0.7880670428276062, + "learning_rate": 1.833590745974813e-05, + "loss": 0.1902482509613037, + "step": 3464 + }, + { + "epoch": 0.46844106463878327, + "grad_norm": 1.0126115083694458, + "learning_rate": 1.8329234089873892e-05, + "loss": 0.16979169845581055, + "step": 3465 + }, + { + "epoch": 0.46857625686523025, + "grad_norm": 1.4585893154144287, + "learning_rate": 1.8322560026835366e-05, + "loss": 0.19237709045410156, + "step": 3466 + }, + { + "epoch": 0.4687114490916772, + "grad_norm": 1.211708426475525, + "learning_rate": 1.8315885272022125e-05, + "loss": 0.15447306632995605, + "step": 3467 + }, + { + "epoch": 0.4688466413181242, + "grad_norm": 1.0668014287948608, + "learning_rate": 1.830920982682389e-05, + "loss": 0.19243741035461426, + "step": 3468 + }, + { + "epoch": 0.4689818335445712, + "grad_norm": 1.4674735069274902, + "learning_rate": 1.830253369263052e-05, + "loss": 0.14574575424194336, + "step": 3469 + }, + { + "epoch": 0.46911702577101816, + "grad_norm": 0.730385422706604, + "learning_rate": 1.8295856870832024e-05, + "loss": 0.19635009765625, + "step": 3470 + }, + { + "epoch": 0.46925221799746514, + "grad_norm": 1.3035799264907837, + "learning_rate": 1.828917936281855e-05, + "loss": 0.14718365669250488, + "step": 3471 + }, + { + "epoch": 0.4693874102239121, + "grad_norm": 0.5446547865867615, + "learning_rate": 1.8282501169980396e-05, + "loss": 0.10541534423828125, + "step": 3472 + }, + { + "epoch": 0.4695226024503591, + "grad_norm": 1.5150097608566284, + "learning_rate": 1.8275822293707992e-05, + "loss": 0.18842267990112305, + "step": 3473 + }, + { + "epoch": 0.4696577946768061, + "grad_norm": 0.9034777879714966, + "learning_rate": 1.8269142735391917e-05, + "loss": 0.15021824836730957, + "step": 3474 + }, + { + "epoch": 0.46979298690325305, + "grad_norm": 0.8535484671592712, + "learning_rate": 1.8262462496422893e-05, + "loss": 0.13802433013916016, + "step": 3475 + }, + { + "epoch": 0.46992817912970003, + "grad_norm": 1.2840960025787354, + "learning_rate": 1.8255781578191778e-05, + "loss": 0.18625903129577637, + "step": 3476 + }, + { + "epoch": 0.470063371356147, + "grad_norm": 1.8340035676956177, + "learning_rate": 1.824909998208958e-05, + "loss": 0.18075108528137207, + "step": 3477 + }, + { + "epoch": 0.470198563582594, + "grad_norm": 0.8349461555480957, + "learning_rate": 1.8242417709507434e-05, + "loss": 0.1463489532470703, + "step": 3478 + }, + { + "epoch": 0.47033375580904097, + "grad_norm": 0.9402061700820923, + "learning_rate": 1.8235734761836637e-05, + "loss": 0.19009113311767578, + "step": 3479 + }, + { + "epoch": 0.47046894803548794, + "grad_norm": 1.6222805976867676, + "learning_rate": 1.82290511404686e-05, + "loss": 0.1735401153564453, + "step": 3480 + }, + { + "epoch": 0.4706041402619349, + "grad_norm": 1.6301549673080444, + "learning_rate": 1.8222366846794904e-05, + "loss": 0.24803972244262695, + "step": 3481 + }, + { + "epoch": 0.4707393324883819, + "grad_norm": 1.3860433101654053, + "learning_rate": 1.8215681882207238e-05, + "loss": 0.196044921875, + "step": 3482 + }, + { + "epoch": 0.4708745247148289, + "grad_norm": 1.435080647468567, + "learning_rate": 1.8208996248097462e-05, + "loss": 0.15694522857666016, + "step": 3483 + }, + { + "epoch": 0.47100971694127586, + "grad_norm": 0.806428074836731, + "learning_rate": 1.8202309945857557e-05, + "loss": 0.10889911651611328, + "step": 3484 + }, + { + "epoch": 0.47114490916772284, + "grad_norm": 1.4828214645385742, + "learning_rate": 1.8195622976879638e-05, + "loss": 0.12778782844543457, + "step": 3485 + }, + { + "epoch": 0.4712801013941698, + "grad_norm": 1.0211106538772583, + "learning_rate": 1.8188935342555977e-05, + "loss": 0.1554047167301178, + "step": 3486 + }, + { + "epoch": 0.4714152936206168, + "grad_norm": 1.030672311782837, + "learning_rate": 1.818224704427897e-05, + "loss": 0.14796066284179688, + "step": 3487 + }, + { + "epoch": 0.47155048584706377, + "grad_norm": 1.0293545722961426, + "learning_rate": 1.8175558083441162e-05, + "loss": 0.19671058654785156, + "step": 3488 + }, + { + "epoch": 0.47168567807351075, + "grad_norm": 0.7458539009094238, + "learning_rate": 1.8168868461435225e-05, + "loss": 0.18127822875976562, + "step": 3489 + }, + { + "epoch": 0.47182087029995773, + "grad_norm": 0.8370358347892761, + "learning_rate": 1.8162178179653977e-05, + "loss": 0.167605459690094, + "step": 3490 + }, + { + "epoch": 0.4719560625264047, + "grad_norm": 0.582918107509613, + "learning_rate": 1.815548723949037e-05, + "loss": 0.12580633163452148, + "step": 3491 + }, + { + "epoch": 0.4720912547528517, + "grad_norm": 1.2632253170013428, + "learning_rate": 1.814879564233749e-05, + "loss": 0.15083742141723633, + "step": 3492 + }, + { + "epoch": 0.47222644697929866, + "grad_norm": 1.1416912078857422, + "learning_rate": 1.8142103389588567e-05, + "loss": 0.17843055725097656, + "step": 3493 + }, + { + "epoch": 0.47236163920574564, + "grad_norm": 2.356801748275757, + "learning_rate": 1.813541048263696e-05, + "loss": 0.1441950798034668, + "step": 3494 + }, + { + "epoch": 0.4724968314321927, + "grad_norm": 0.8117491602897644, + "learning_rate": 1.8128716922876178e-05, + "loss": 0.19518089294433594, + "step": 3495 + }, + { + "epoch": 0.47263202365863966, + "grad_norm": 0.8480263948440552, + "learning_rate": 1.812202271169984e-05, + "loss": 0.18100261688232422, + "step": 3496 + }, + { + "epoch": 0.47276721588508663, + "grad_norm": 1.5033318996429443, + "learning_rate": 1.8115327850501726e-05, + "loss": 0.20951157808303833, + "step": 3497 + }, + { + "epoch": 0.4729024081115336, + "grad_norm": 1.579942226409912, + "learning_rate": 1.8108632340675746e-05, + "loss": 0.16703033447265625, + "step": 3498 + }, + { + "epoch": 0.4730376003379806, + "grad_norm": 0.76331627368927, + "learning_rate": 1.810193618361593e-05, + "loss": 0.15021705627441406, + "step": 3499 + }, + { + "epoch": 0.47317279256442757, + "grad_norm": 0.8473283648490906, + "learning_rate": 1.8095239380716464e-05, + "loss": 0.20316314697265625, + "step": 3500 + }, + { + "epoch": 0.47330798479087455, + "grad_norm": 0.9171915054321289, + "learning_rate": 1.808854193337165e-05, + "loss": 0.1653122901916504, + "step": 3501 + }, + { + "epoch": 0.4734431770173215, + "grad_norm": 0.8565853834152222, + "learning_rate": 1.8081843842975935e-05, + "loss": 0.1794414520263672, + "step": 3502 + }, + { + "epoch": 0.4735783692437685, + "grad_norm": 1.4016047716140747, + "learning_rate": 1.80751451109239e-05, + "loss": 0.17153263092041016, + "step": 3503 + }, + { + "epoch": 0.4737135614702155, + "grad_norm": 0.8179826140403748, + "learning_rate": 1.806844573861025e-05, + "loss": 0.15096187591552734, + "step": 3504 + }, + { + "epoch": 0.47384875369666246, + "grad_norm": 1.8277356624603271, + "learning_rate": 1.806174572742984e-05, + "loss": 0.1335737705230713, + "step": 3505 + }, + { + "epoch": 0.47398394592310944, + "grad_norm": 2.086305856704712, + "learning_rate": 1.8055045078777634e-05, + "loss": 0.1453789472579956, + "step": 3506 + }, + { + "epoch": 0.4741191381495564, + "grad_norm": 1.1727648973464966, + "learning_rate": 1.8048343794048762e-05, + "loss": 0.18294525146484375, + "step": 3507 + }, + { + "epoch": 0.4742543303760034, + "grad_norm": 0.7482226490974426, + "learning_rate": 1.8041641874638445e-05, + "loss": 0.14932632446289062, + "step": 3508 + }, + { + "epoch": 0.4743895226024504, + "grad_norm": 1.0309417247772217, + "learning_rate": 1.8034939321942077e-05, + "loss": 0.16088390350341797, + "step": 3509 + }, + { + "epoch": 0.47452471482889735, + "grad_norm": 0.9488224387168884, + "learning_rate": 1.8028236137355154e-05, + "loss": 0.15990209579467773, + "step": 3510 + }, + { + "epoch": 0.47465990705534433, + "grad_norm": 1.1417396068572998, + "learning_rate": 1.8021532322273327e-05, + "loss": 0.1642436981201172, + "step": 3511 + }, + { + "epoch": 0.4747950992817913, + "grad_norm": 0.6780730485916138, + "learning_rate": 1.8014827878092347e-05, + "loss": 0.13392972946166992, + "step": 3512 + }, + { + "epoch": 0.4749302915082383, + "grad_norm": 0.7557806372642517, + "learning_rate": 1.800812280620813e-05, + "loss": 0.15101051330566406, + "step": 3513 + }, + { + "epoch": 0.47506548373468527, + "grad_norm": 2.0491278171539307, + "learning_rate": 1.80014171080167e-05, + "loss": 0.2699241638183594, + "step": 3514 + }, + { + "epoch": 0.47520067596113225, + "grad_norm": 1.1695258617401123, + "learning_rate": 1.7994710784914227e-05, + "loss": 0.15940046310424805, + "step": 3515 + }, + { + "epoch": 0.4753358681875792, + "grad_norm": 1.0257755517959595, + "learning_rate": 1.7988003838297e-05, + "loss": 0.15984225273132324, + "step": 3516 + }, + { + "epoch": 0.4754710604140262, + "grad_norm": 1.2689498662948608, + "learning_rate": 1.7981296269561438e-05, + "loss": 0.15655231475830078, + "step": 3517 + }, + { + "epoch": 0.4756062526404732, + "grad_norm": 1.4033067226409912, + "learning_rate": 1.7974588080104095e-05, + "loss": 0.15663385391235352, + "step": 3518 + }, + { + "epoch": 0.47574144486692016, + "grad_norm": 1.3479342460632324, + "learning_rate": 1.7967879271321648e-05, + "loss": 0.15413331985473633, + "step": 3519 + }, + { + "epoch": 0.47587663709336714, + "grad_norm": 1.155164361000061, + "learning_rate": 1.7961169844610913e-05, + "loss": 0.204315185546875, + "step": 3520 + }, + { + "epoch": 0.4760118293198141, + "grad_norm": 0.8277118802070618, + "learning_rate": 1.795445980136883e-05, + "loss": 0.15469026565551758, + "step": 3521 + }, + { + "epoch": 0.4761470215462611, + "grad_norm": 1.588104009628296, + "learning_rate": 1.794774914299245e-05, + "loss": 0.14812660217285156, + "step": 3522 + }, + { + "epoch": 0.4762822137727081, + "grad_norm": 1.9750038385391235, + "learning_rate": 1.794103787087899e-05, + "loss": 0.19992446899414062, + "step": 3523 + }, + { + "epoch": 0.47641740599915505, + "grad_norm": 0.6098859310150146, + "learning_rate": 1.7934325986425755e-05, + "loss": 0.12087976932525635, + "step": 3524 + }, + { + "epoch": 0.47655259822560203, + "grad_norm": 0.879057765007019, + "learning_rate": 1.7927613491030204e-05, + "loss": 0.14919328689575195, + "step": 3525 + }, + { + "epoch": 0.476687790452049, + "grad_norm": 1.123022198677063, + "learning_rate": 1.7920900386089913e-05, + "loss": 0.2040081024169922, + "step": 3526 + }, + { + "epoch": 0.476822982678496, + "grad_norm": 1.0244865417480469, + "learning_rate": 1.7914186673002588e-05, + "loss": 0.21220779418945312, + "step": 3527 + }, + { + "epoch": 0.47695817490494297, + "grad_norm": 2.2303543090820312, + "learning_rate": 1.790747235316605e-05, + "loss": 0.15620040893554688, + "step": 3528 + }, + { + "epoch": 0.47709336713138994, + "grad_norm": 1.6135231256484985, + "learning_rate": 1.790075742797827e-05, + "loss": 0.23848295211791992, + "step": 3529 + }, + { + "epoch": 0.4772285593578369, + "grad_norm": 0.9326907992362976, + "learning_rate": 1.789404189883732e-05, + "loss": 0.1906147003173828, + "step": 3530 + }, + { + "epoch": 0.4773637515842839, + "grad_norm": 1.431780457496643, + "learning_rate": 1.7887325767141415e-05, + "loss": 0.24800872802734375, + "step": 3531 + }, + { + "epoch": 0.4774989438107309, + "grad_norm": 0.8666039109230042, + "learning_rate": 1.7880609034288894e-05, + "loss": 0.1570730209350586, + "step": 3532 + }, + { + "epoch": 0.47763413603717786, + "grad_norm": 1.049560308456421, + "learning_rate": 1.7873891701678208e-05, + "loss": 0.1844654083251953, + "step": 3533 + }, + { + "epoch": 0.47776932826362484, + "grad_norm": 1.7594057321548462, + "learning_rate": 1.786717377070794e-05, + "loss": 0.16901493072509766, + "step": 3534 + }, + { + "epoch": 0.4779045204900718, + "grad_norm": 0.7886475324630737, + "learning_rate": 1.7860455242776803e-05, + "loss": 0.18465614318847656, + "step": 3535 + }, + { + "epoch": 0.4780397127165188, + "grad_norm": 1.2098989486694336, + "learning_rate": 1.7853736119283635e-05, + "loss": 0.19279992580413818, + "step": 3536 + }, + { + "epoch": 0.4781749049429658, + "grad_norm": 0.8562732338905334, + "learning_rate": 1.7847016401627388e-05, + "loss": 0.20071792602539062, + "step": 3537 + }, + { + "epoch": 0.47831009716941275, + "grad_norm": 2.1108808517456055, + "learning_rate": 1.7840296091207144e-05, + "loss": 0.17426657676696777, + "step": 3538 + }, + { + "epoch": 0.47844528939585973, + "grad_norm": 0.8799384236335754, + "learning_rate": 1.7833575189422107e-05, + "loss": 0.17475128173828125, + "step": 3539 + }, + { + "epoch": 0.4785804816223067, + "grad_norm": 1.1155035495758057, + "learning_rate": 1.7826853697671604e-05, + "loss": 0.164093017578125, + "step": 3540 + }, + { + "epoch": 0.4787156738487537, + "grad_norm": 3.17952561378479, + "learning_rate": 1.782013161735509e-05, + "loss": 0.23760032653808594, + "step": 3541 + }, + { + "epoch": 0.47885086607520067, + "grad_norm": 1.0523217916488647, + "learning_rate": 1.781340894987213e-05, + "loss": 0.16167926788330078, + "step": 3542 + }, + { + "epoch": 0.47898605830164764, + "grad_norm": 1.0254875421524048, + "learning_rate": 1.7806685696622426e-05, + "loss": 0.1648855209350586, + "step": 3543 + }, + { + "epoch": 0.4791212505280946, + "grad_norm": 1.4180539846420288, + "learning_rate": 1.779996185900579e-05, + "loss": 0.17851829528808594, + "step": 3544 + }, + { + "epoch": 0.4792564427545416, + "grad_norm": 1.5110746622085571, + "learning_rate": 1.7793237438422165e-05, + "loss": 0.13212299346923828, + "step": 3545 + }, + { + "epoch": 0.4793916349809886, + "grad_norm": 1.038517713546753, + "learning_rate": 1.7786512436271617e-05, + "loss": 0.23049545288085938, + "step": 3546 + }, + { + "epoch": 0.47952682720743556, + "grad_norm": 2.5513124465942383, + "learning_rate": 1.777978685395431e-05, + "loss": 0.21246719360351562, + "step": 3547 + }, + { + "epoch": 0.47966201943388254, + "grad_norm": 2.3631436824798584, + "learning_rate": 1.7773060692870564e-05, + "loss": 0.15679168701171875, + "step": 3548 + }, + { + "epoch": 0.4797972116603295, + "grad_norm": 1.9723563194274902, + "learning_rate": 1.7766333954420794e-05, + "loss": 0.2117905616760254, + "step": 3549 + }, + { + "epoch": 0.4799324038867765, + "grad_norm": 1.1882332563400269, + "learning_rate": 1.775960664000554e-05, + "loss": 0.2038745880126953, + "step": 3550 + }, + { + "epoch": 0.48006759611322347, + "grad_norm": 2.519015073776245, + "learning_rate": 1.7752878751025463e-05, + "loss": 0.2249622344970703, + "step": 3551 + }, + { + "epoch": 0.48020278833967045, + "grad_norm": 0.7844443917274475, + "learning_rate": 1.7746150288881352e-05, + "loss": 0.1720128059387207, + "step": 3552 + }, + { + "epoch": 0.48033798056611743, + "grad_norm": 1.2197054624557495, + "learning_rate": 1.7739421254974114e-05, + "loss": 0.2151031494140625, + "step": 3553 + }, + { + "epoch": 0.4804731727925644, + "grad_norm": 1.1257039308547974, + "learning_rate": 1.7732691650704756e-05, + "loss": 0.16568279266357422, + "step": 3554 + }, + { + "epoch": 0.4806083650190114, + "grad_norm": 1.1142538785934448, + "learning_rate": 1.7725961477474423e-05, + "loss": 0.14854001998901367, + "step": 3555 + }, + { + "epoch": 0.48074355724545836, + "grad_norm": 0.723314642906189, + "learning_rate": 1.7719230736684375e-05, + "loss": 0.12540531158447266, + "step": 3556 + }, + { + "epoch": 0.48087874947190534, + "grad_norm": 0.7580430507659912, + "learning_rate": 1.771249942973599e-05, + "loss": 0.16405725479125977, + "step": 3557 + }, + { + "epoch": 0.4810139416983523, + "grad_norm": 2.516895294189453, + "learning_rate": 1.7705767558030756e-05, + "loss": 0.1693258285522461, + "step": 3558 + }, + { + "epoch": 0.4811491339247993, + "grad_norm": 1.2762389183044434, + "learning_rate": 1.769903512297029e-05, + "loss": 0.20034027099609375, + "step": 3559 + }, + { + "epoch": 0.4812843261512463, + "grad_norm": 2.2060084342956543, + "learning_rate": 1.7692302125956315e-05, + "loss": 0.24058151245117188, + "step": 3560 + }, + { + "epoch": 0.48141951837769326, + "grad_norm": 1.346879243850708, + "learning_rate": 1.768556856839068e-05, + "loss": 0.19856834411621094, + "step": 3561 + }, + { + "epoch": 0.48155471060414023, + "grad_norm": 2.351047992706299, + "learning_rate": 1.767883445167535e-05, + "loss": 0.2039813995361328, + "step": 3562 + }, + { + "epoch": 0.48168990283058727, + "grad_norm": 2.3243563175201416, + "learning_rate": 1.7672099777212398e-05, + "loss": 0.18050861358642578, + "step": 3563 + }, + { + "epoch": 0.48182509505703425, + "grad_norm": 3.619431972503662, + "learning_rate": 1.7665364546404034e-05, + "loss": 0.22363626956939697, + "step": 3564 + }, + { + "epoch": 0.4819602872834812, + "grad_norm": 1.8494421243667603, + "learning_rate": 1.7658628760652548e-05, + "loss": 0.1747303009033203, + "step": 3565 + }, + { + "epoch": 0.4820954795099282, + "grad_norm": 2.077983856201172, + "learning_rate": 1.765189242136038e-05, + "loss": 0.21093487739562988, + "step": 3566 + }, + { + "epoch": 0.4822306717363752, + "grad_norm": 0.6678910851478577, + "learning_rate": 1.7645155529930065e-05, + "loss": 0.14583158493041992, + "step": 3567 + }, + { + "epoch": 0.48236586396282216, + "grad_norm": 1.4192137718200684, + "learning_rate": 1.763841808776426e-05, + "loss": 0.21659469604492188, + "step": 3568 + }, + { + "epoch": 0.48250105618926914, + "grad_norm": 1.13424551486969, + "learning_rate": 1.763168009626575e-05, + "loss": 0.18615365028381348, + "step": 3569 + }, + { + "epoch": 0.4826362484157161, + "grad_norm": 2.837808847427368, + "learning_rate": 1.7624941556837406e-05, + "loss": 0.16075420379638672, + "step": 3570 + }, + { + "epoch": 0.4827714406421631, + "grad_norm": 0.8824772834777832, + "learning_rate": 1.7618202470882233e-05, + "loss": 0.1805715560913086, + "step": 3571 + }, + { + "epoch": 0.4829066328686101, + "grad_norm": 1.3237603902816772, + "learning_rate": 1.7611462839803336e-05, + "loss": 0.14923858642578125, + "step": 3572 + }, + { + "epoch": 0.48304182509505705, + "grad_norm": 2.017673969268799, + "learning_rate": 1.760472266500396e-05, + "loss": 0.22539281845092773, + "step": 3573 + }, + { + "epoch": 0.48317701732150403, + "grad_norm": 1.4548580646514893, + "learning_rate": 1.759798194788743e-05, + "loss": 0.15132999420166016, + "step": 3574 + }, + { + "epoch": 0.483312209547951, + "grad_norm": 1.2383757829666138, + "learning_rate": 1.75912406898572e-05, + "loss": 0.18599319458007812, + "step": 3575 + }, + { + "epoch": 0.483447401774398, + "grad_norm": 0.8775805234909058, + "learning_rate": 1.758449889231685e-05, + "loss": 0.19205284118652344, + "step": 3576 + }, + { + "epoch": 0.48358259400084497, + "grad_norm": 1.3527287244796753, + "learning_rate": 1.757775655667004e-05, + "loss": 0.26311492919921875, + "step": 3577 + }, + { + "epoch": 0.48371778622729195, + "grad_norm": 1.782068133354187, + "learning_rate": 1.757101368432057e-05, + "loss": 0.19730281829833984, + "step": 3578 + }, + { + "epoch": 0.4838529784537389, + "grad_norm": 2.465442180633545, + "learning_rate": 1.7564270276672343e-05, + "loss": 0.19033241271972656, + "step": 3579 + }, + { + "epoch": 0.4839881706801859, + "grad_norm": 1.1994714736938477, + "learning_rate": 1.7557526335129372e-05, + "loss": 0.17396140098571777, + "step": 3580 + }, + { + "epoch": 0.4841233629066329, + "grad_norm": 2.444040298461914, + "learning_rate": 1.7550781861095774e-05, + "loss": 0.20524978637695312, + "step": 3581 + }, + { + "epoch": 0.48425855513307986, + "grad_norm": 0.9775839447975159, + "learning_rate": 1.754403685597579e-05, + "loss": 0.12641441822052002, + "step": 3582 + }, + { + "epoch": 0.48439374735952684, + "grad_norm": 1.0033881664276123, + "learning_rate": 1.7537291321173773e-05, + "loss": 0.20652008056640625, + "step": 3583 + }, + { + "epoch": 0.4845289395859738, + "grad_norm": 1.5384173393249512, + "learning_rate": 1.7530545258094165e-05, + "loss": 0.18323099613189697, + "step": 3584 + }, + { + "epoch": 0.4846641318124208, + "grad_norm": 1.7562642097473145, + "learning_rate": 1.7523798668141548e-05, + "loss": 0.19761085510253906, + "step": 3585 + }, + { + "epoch": 0.4847993240388678, + "grad_norm": 2.245157241821289, + "learning_rate": 1.7517051552720584e-05, + "loss": 0.24962902069091797, + "step": 3586 + }, + { + "epoch": 0.48493451626531475, + "grad_norm": 1.066164255142212, + "learning_rate": 1.7510303913236066e-05, + "loss": 0.18131589889526367, + "step": 3587 + }, + { + "epoch": 0.48506970849176173, + "grad_norm": 0.8821468949317932, + "learning_rate": 1.7503555751092883e-05, + "loss": 0.13342809677124023, + "step": 3588 + }, + { + "epoch": 0.4852049007182087, + "grad_norm": 1.1430426836013794, + "learning_rate": 1.7496807067696046e-05, + "loss": 0.1805424690246582, + "step": 3589 + }, + { + "epoch": 0.4853400929446557, + "grad_norm": 0.9823818802833557, + "learning_rate": 1.7490057864450665e-05, + "loss": 0.20342540740966797, + "step": 3590 + }, + { + "epoch": 0.48547528517110267, + "grad_norm": 0.8003551959991455, + "learning_rate": 1.748330814276195e-05, + "loss": 0.1759796142578125, + "step": 3591 + }, + { + "epoch": 0.48561047739754964, + "grad_norm": 1.0914113521575928, + "learning_rate": 1.7476557904035243e-05, + "loss": 0.17968034744262695, + "step": 3592 + }, + { + "epoch": 0.4857456696239966, + "grad_norm": 1.8646550178527832, + "learning_rate": 1.7469807149675973e-05, + "loss": 0.1932525634765625, + "step": 3593 + }, + { + "epoch": 0.4858808618504436, + "grad_norm": 1.0126187801361084, + "learning_rate": 1.7463055881089685e-05, + "loss": 0.22491276264190674, + "step": 3594 + }, + { + "epoch": 0.4860160540768906, + "grad_norm": 1.840192437171936, + "learning_rate": 1.7456304099682024e-05, + "loss": 0.1873321533203125, + "step": 3595 + }, + { + "epoch": 0.48615124630333756, + "grad_norm": 1.2671908140182495, + "learning_rate": 1.7449551806858756e-05, + "loss": 0.12606430053710938, + "step": 3596 + }, + { + "epoch": 0.48628643852978454, + "grad_norm": 1.216102123260498, + "learning_rate": 1.7442799004025733e-05, + "loss": 0.16867345571517944, + "step": 3597 + }, + { + "epoch": 0.4864216307562315, + "grad_norm": 0.6935027837753296, + "learning_rate": 1.7436045692588934e-05, + "loss": 0.09225940704345703, + "step": 3598 + }, + { + "epoch": 0.4865568229826785, + "grad_norm": 1.234215259552002, + "learning_rate": 1.742929187395443e-05, + "loss": 0.23068809509277344, + "step": 3599 + }, + { + "epoch": 0.4866920152091255, + "grad_norm": 1.2269054651260376, + "learning_rate": 1.7422537549528402e-05, + "loss": 0.21541976928710938, + "step": 3600 + }, + { + "epoch": 0.48682720743557245, + "grad_norm": 0.6758655905723572, + "learning_rate": 1.7415782720717147e-05, + "loss": 0.12720251083374023, + "step": 3601 + }, + { + "epoch": 0.48696239966201943, + "grad_norm": 0.6554602980613708, + "learning_rate": 1.740902738892704e-05, + "loss": 0.15577435493469238, + "step": 3602 + }, + { + "epoch": 0.4870975918884664, + "grad_norm": 1.1283648014068604, + "learning_rate": 1.7402271555564585e-05, + "loss": 0.1436305046081543, + "step": 3603 + }, + { + "epoch": 0.4872327841149134, + "grad_norm": 1.6576296091079712, + "learning_rate": 1.739551522203638e-05, + "loss": 0.18545818328857422, + "step": 3604 + }, + { + "epoch": 0.48736797634136036, + "grad_norm": 1.4805735349655151, + "learning_rate": 1.738875838974913e-05, + "loss": 0.16046762466430664, + "step": 3605 + }, + { + "epoch": 0.48750316856780734, + "grad_norm": 0.9514274001121521, + "learning_rate": 1.7382001060109652e-05, + "loss": 0.17522907257080078, + "step": 3606 + }, + { + "epoch": 0.4876383607942543, + "grad_norm": 1.2427281141281128, + "learning_rate": 1.7375243234524843e-05, + "loss": 0.20197582244873047, + "step": 3607 + }, + { + "epoch": 0.4877735530207013, + "grad_norm": 1.6158555746078491, + "learning_rate": 1.736848491440173e-05, + "loss": 0.1747041940689087, + "step": 3608 + }, + { + "epoch": 0.4879087452471483, + "grad_norm": 0.7927384376525879, + "learning_rate": 1.7361726101147424e-05, + "loss": 0.1530170440673828, + "step": 3609 + }, + { + "epoch": 0.48804393747359526, + "grad_norm": 1.0317902565002441, + "learning_rate": 1.7354966796169157e-05, + "loss": 0.18799781799316406, + "step": 3610 + }, + { + "epoch": 0.48817912970004224, + "grad_norm": 1.3067508935928345, + "learning_rate": 1.734820700087424e-05, + "loss": 0.20744752883911133, + "step": 3611 + }, + { + "epoch": 0.4883143219264892, + "grad_norm": 1.0842506885528564, + "learning_rate": 1.7341446716670103e-05, + "loss": 0.20425748825073242, + "step": 3612 + }, + { + "epoch": 0.4884495141529362, + "grad_norm": 2.8209221363067627, + "learning_rate": 1.7334685944964272e-05, + "loss": 0.19693565368652344, + "step": 3613 + }, + { + "epoch": 0.48858470637938317, + "grad_norm": 1.5135897397994995, + "learning_rate": 1.732792468716438e-05, + "loss": 0.1494002342224121, + "step": 3614 + }, + { + "epoch": 0.48871989860583015, + "grad_norm": 2.901961326599121, + "learning_rate": 1.7321162944678155e-05, + "loss": 0.1833209991455078, + "step": 3615 + }, + { + "epoch": 0.48885509083227713, + "grad_norm": 0.9659711718559265, + "learning_rate": 1.7314400718913425e-05, + "loss": 0.20191383361816406, + "step": 3616 + }, + { + "epoch": 0.4889902830587241, + "grad_norm": 0.9311013221740723, + "learning_rate": 1.7307638011278126e-05, + "loss": 0.18041229248046875, + "step": 3617 + }, + { + "epoch": 0.4891254752851711, + "grad_norm": 0.6251784563064575, + "learning_rate": 1.7300874823180284e-05, + "loss": 0.13950061798095703, + "step": 3618 + }, + { + "epoch": 0.48926066751161806, + "grad_norm": 1.6965829133987427, + "learning_rate": 1.7294111156028034e-05, + "loss": 0.17876195907592773, + "step": 3619 + }, + { + "epoch": 0.48939585973806504, + "grad_norm": 0.8463488221168518, + "learning_rate": 1.7287347011229605e-05, + "loss": 0.20867156982421875, + "step": 3620 + }, + { + "epoch": 0.489531051964512, + "grad_norm": 0.7450462579727173, + "learning_rate": 1.7280582390193333e-05, + "loss": 0.1433115005493164, + "step": 3621 + }, + { + "epoch": 0.489666244190959, + "grad_norm": 2.0672967433929443, + "learning_rate": 1.7273817294327653e-05, + "loss": 0.20038509368896484, + "step": 3622 + }, + { + "epoch": 0.489801436417406, + "grad_norm": 1.4547683000564575, + "learning_rate": 1.726705172504108e-05, + "loss": 0.19504737854003906, + "step": 3623 + }, + { + "epoch": 0.48993662864385296, + "grad_norm": 1.000737190246582, + "learning_rate": 1.7260285683742248e-05, + "loss": 0.17860937118530273, + "step": 3624 + }, + { + "epoch": 0.49007182087029993, + "grad_norm": 0.5829146504402161, + "learning_rate": 1.7253519171839883e-05, + "loss": 0.12136948108673096, + "step": 3625 + }, + { + "epoch": 0.4902070130967469, + "grad_norm": 0.9649888873100281, + "learning_rate": 1.724675219074281e-05, + "loss": 0.1799163818359375, + "step": 3626 + }, + { + "epoch": 0.4903422053231939, + "grad_norm": 1.3372387886047363, + "learning_rate": 1.7239984741859957e-05, + "loss": 0.11334705352783203, + "step": 3627 + }, + { + "epoch": 0.49047739754964087, + "grad_norm": 0.9142506718635559, + "learning_rate": 1.7233216826600324e-05, + "loss": 0.16736221313476562, + "step": 3628 + }, + { + "epoch": 0.49061258977608785, + "grad_norm": 1.42210853099823, + "learning_rate": 1.7226448446373047e-05, + "loss": 0.12766671180725098, + "step": 3629 + }, + { + "epoch": 0.4907477820025348, + "grad_norm": 1.2180275917053223, + "learning_rate": 1.7219679602587326e-05, + "loss": 0.1990499496459961, + "step": 3630 + }, + { + "epoch": 0.49088297422898186, + "grad_norm": 1.6852333545684814, + "learning_rate": 1.7212910296652476e-05, + "loss": 0.16421890258789062, + "step": 3631 + }, + { + "epoch": 0.49101816645542884, + "grad_norm": 0.8113219738006592, + "learning_rate": 1.7206140529977904e-05, + "loss": 0.11036968231201172, + "step": 3632 + }, + { + "epoch": 0.4911533586818758, + "grad_norm": 0.9482988715171814, + "learning_rate": 1.719937030397311e-05, + "loss": 0.1797924041748047, + "step": 3633 + }, + { + "epoch": 0.4912885509083228, + "grad_norm": 0.8661326169967651, + "learning_rate": 1.7192599620047683e-05, + "loss": 0.11438751220703125, + "step": 3634 + }, + { + "epoch": 0.4914237431347698, + "grad_norm": 2.282918691635132, + "learning_rate": 1.718582847961133e-05, + "loss": 0.23289012908935547, + "step": 3635 + }, + { + "epoch": 0.49155893536121675, + "grad_norm": 1.3346805572509766, + "learning_rate": 1.7179056884073826e-05, + "loss": 0.19357538223266602, + "step": 3636 + }, + { + "epoch": 0.49169412758766373, + "grad_norm": 1.8274191617965698, + "learning_rate": 1.717228483484506e-05, + "loss": 0.1923694610595703, + "step": 3637 + }, + { + "epoch": 0.4918293198141107, + "grad_norm": 2.077373743057251, + "learning_rate": 1.7165512333335013e-05, + "loss": 0.17790699005126953, + "step": 3638 + }, + { + "epoch": 0.4919645120405577, + "grad_norm": 0.8957392573356628, + "learning_rate": 1.715873938095374e-05, + "loss": 0.1672954559326172, + "step": 3639 + }, + { + "epoch": 0.49209970426700467, + "grad_norm": 1.4486415386199951, + "learning_rate": 1.7151965979111427e-05, + "loss": 0.1927928924560547, + "step": 3640 + }, + { + "epoch": 0.49223489649345165, + "grad_norm": 1.1866776943206787, + "learning_rate": 1.7145192129218313e-05, + "loss": 0.16849040985107422, + "step": 3641 + }, + { + "epoch": 0.4923700887198986, + "grad_norm": 1.1296336650848389, + "learning_rate": 1.7138417832684763e-05, + "loss": 0.17771363258361816, + "step": 3642 + }, + { + "epoch": 0.4925052809463456, + "grad_norm": 0.9979537725448608, + "learning_rate": 1.7131643090921216e-05, + "loss": 0.1480550765991211, + "step": 3643 + }, + { + "epoch": 0.4926404731727926, + "grad_norm": 2.2543773651123047, + "learning_rate": 1.712486790533821e-05, + "loss": 0.2135171890258789, + "step": 3644 + }, + { + "epoch": 0.49277566539923956, + "grad_norm": 1.6149193048477173, + "learning_rate": 1.7118092277346372e-05, + "loss": 0.18088722229003906, + "step": 3645 + }, + { + "epoch": 0.49291085762568654, + "grad_norm": 1.2460503578186035, + "learning_rate": 1.7111316208356428e-05, + "loss": 0.2027912139892578, + "step": 3646 + }, + { + "epoch": 0.4930460498521335, + "grad_norm": 0.648792564868927, + "learning_rate": 1.7104539699779192e-05, + "loss": 0.12471961975097656, + "step": 3647 + }, + { + "epoch": 0.4931812420785805, + "grad_norm": 0.8805149793624878, + "learning_rate": 1.709776275302557e-05, + "loss": 0.15224552154541016, + "step": 3648 + }, + { + "epoch": 0.4933164343050275, + "grad_norm": 3.027792453765869, + "learning_rate": 1.7090985369506555e-05, + "loss": 0.27625274658203125, + "step": 3649 + }, + { + "epoch": 0.49345162653147445, + "grad_norm": 0.8288435339927673, + "learning_rate": 1.708420755063323e-05, + "loss": 0.1549924612045288, + "step": 3650 + }, + { + "epoch": 0.49358681875792143, + "grad_norm": 0.9256892800331116, + "learning_rate": 1.707742929781678e-05, + "loss": 0.13416099548339844, + "step": 3651 + }, + { + "epoch": 0.4937220109843684, + "grad_norm": 0.6725975275039673, + "learning_rate": 1.707065061246848e-05, + "loss": 0.1769275665283203, + "step": 3652 + }, + { + "epoch": 0.4938572032108154, + "grad_norm": 1.0161848068237305, + "learning_rate": 1.7063871495999677e-05, + "loss": 0.1681079864501953, + "step": 3653 + }, + { + "epoch": 0.49399239543726237, + "grad_norm": 0.9416725635528564, + "learning_rate": 1.705709194982182e-05, + "loss": 0.15160465240478516, + "step": 3654 + }, + { + "epoch": 0.49412758766370934, + "grad_norm": 0.9839097261428833, + "learning_rate": 1.7050311975346447e-05, + "loss": 0.22127151489257812, + "step": 3655 + }, + { + "epoch": 0.4942627798901563, + "grad_norm": 1.8227262496948242, + "learning_rate": 1.704353157398519e-05, + "loss": 0.16324293613433838, + "step": 3656 + }, + { + "epoch": 0.4943979721166033, + "grad_norm": 0.7980266809463501, + "learning_rate": 1.7036750747149764e-05, + "loss": 0.14069843292236328, + "step": 3657 + }, + { + "epoch": 0.4945331643430503, + "grad_norm": 1.1018174886703491, + "learning_rate": 1.702996949625197e-05, + "loss": 0.1280226707458496, + "step": 3658 + }, + { + "epoch": 0.49466835656949726, + "grad_norm": 1.1234852075576782, + "learning_rate": 1.7023187822703702e-05, + "loss": 0.17341399192810059, + "step": 3659 + }, + { + "epoch": 0.49480354879594424, + "grad_norm": 1.6097593307495117, + "learning_rate": 1.7016405727916936e-05, + "loss": 0.21378421783447266, + "step": 3660 + }, + { + "epoch": 0.4949387410223912, + "grad_norm": 1.690238118171692, + "learning_rate": 1.700962321330375e-05, + "loss": 0.18404769897460938, + "step": 3661 + }, + { + "epoch": 0.4950739332488382, + "grad_norm": 1.0678889751434326, + "learning_rate": 1.700284028027629e-05, + "loss": 0.18759942054748535, + "step": 3662 + }, + { + "epoch": 0.4952091254752852, + "grad_norm": 0.9970592260360718, + "learning_rate": 1.6996056930246807e-05, + "loss": 0.18451976776123047, + "step": 3663 + }, + { + "epoch": 0.49534431770173215, + "grad_norm": 2.1461830139160156, + "learning_rate": 1.6989273164627626e-05, + "loss": 0.1855611801147461, + "step": 3664 + }, + { + "epoch": 0.49547950992817913, + "grad_norm": 1.6247203350067139, + "learning_rate": 1.6982488984831163e-05, + "loss": 0.19552898406982422, + "step": 3665 + }, + { + "epoch": 0.4956147021546261, + "grad_norm": 0.6429634690284729, + "learning_rate": 1.697570439226992e-05, + "loss": 0.1354351043701172, + "step": 3666 + }, + { + "epoch": 0.4957498943810731, + "grad_norm": 1.2305651903152466, + "learning_rate": 1.6968919388356486e-05, + "loss": 0.21979141235351562, + "step": 3667 + }, + { + "epoch": 0.49588508660752006, + "grad_norm": 0.9940130710601807, + "learning_rate": 1.696213397450354e-05, + "loss": 0.16416454315185547, + "step": 3668 + }, + { + "epoch": 0.49602027883396704, + "grad_norm": 1.403414249420166, + "learning_rate": 1.695534815212384e-05, + "loss": 0.18146896362304688, + "step": 3669 + }, + { + "epoch": 0.496155471060414, + "grad_norm": 1.5657511949539185, + "learning_rate": 1.6948561922630223e-05, + "loss": 0.1825275421142578, + "step": 3670 + }, + { + "epoch": 0.496290663286861, + "grad_norm": 1.4954921007156372, + "learning_rate": 1.694177528743562e-05, + "loss": 0.19891357421875, + "step": 3671 + }, + { + "epoch": 0.496425855513308, + "grad_norm": 1.6047428846359253, + "learning_rate": 1.6934988247953053e-05, + "loss": 0.1822667121887207, + "step": 3672 + }, + { + "epoch": 0.49656104773975496, + "grad_norm": 0.9843138456344604, + "learning_rate": 1.6928200805595606e-05, + "loss": 0.18076705932617188, + "step": 3673 + }, + { + "epoch": 0.49669623996620194, + "grad_norm": 0.8712040185928345, + "learning_rate": 1.6921412961776475e-05, + "loss": 0.1888713836669922, + "step": 3674 + }, + { + "epoch": 0.4968314321926489, + "grad_norm": 1.2058136463165283, + "learning_rate": 1.6914624717908922e-05, + "loss": 0.16952574253082275, + "step": 3675 + }, + { + "epoch": 0.4969666244190959, + "grad_norm": 0.7816467881202698, + "learning_rate": 1.6907836075406288e-05, + "loss": 0.15458011627197266, + "step": 3676 + }, + { + "epoch": 0.49710181664554287, + "grad_norm": 1.2901582717895508, + "learning_rate": 1.690104703568201e-05, + "loss": 0.15792584419250488, + "step": 3677 + }, + { + "epoch": 0.49723700887198985, + "grad_norm": 1.174155831336975, + "learning_rate": 1.68942576001496e-05, + "loss": 0.15606403350830078, + "step": 3678 + }, + { + "epoch": 0.49737220109843683, + "grad_norm": 1.0145599842071533, + "learning_rate": 1.6887467770222658e-05, + "loss": 0.17368626594543457, + "step": 3679 + }, + { + "epoch": 0.4975073933248838, + "grad_norm": 0.8530884385108948, + "learning_rate": 1.6880677547314865e-05, + "loss": 0.17932939529418945, + "step": 3680 + }, + { + "epoch": 0.4976425855513308, + "grad_norm": 1.446481466293335, + "learning_rate": 1.6873886932839973e-05, + "loss": 0.17049932479858398, + "step": 3681 + }, + { + "epoch": 0.49777777777777776, + "grad_norm": 1.398655891418457, + "learning_rate": 1.686709592821183e-05, + "loss": 0.14878273010253906, + "step": 3682 + }, + { + "epoch": 0.49791297000422474, + "grad_norm": 0.8881034851074219, + "learning_rate": 1.6860304534844355e-05, + "loss": 0.19350051879882812, + "step": 3683 + }, + { + "epoch": 0.4980481622306717, + "grad_norm": 1.6435049772262573, + "learning_rate": 1.6853512754151556e-05, + "loss": 0.23624801635742188, + "step": 3684 + }, + { + "epoch": 0.4981833544571187, + "grad_norm": 0.7483974695205688, + "learning_rate": 1.684672058754752e-05, + "loss": 0.17313575744628906, + "step": 3685 + }, + { + "epoch": 0.4983185466835657, + "grad_norm": 1.5263569355010986, + "learning_rate": 1.6839928036446416e-05, + "loss": 0.2442340850830078, + "step": 3686 + }, + { + "epoch": 0.49845373891001266, + "grad_norm": 1.6226603984832764, + "learning_rate": 1.6833135102262473e-05, + "loss": 0.17629051208496094, + "step": 3687 + }, + { + "epoch": 0.49858893113645963, + "grad_norm": 1.338348388671875, + "learning_rate": 1.682634178641003e-05, + "loss": 0.17438220977783203, + "step": 3688 + }, + { + "epoch": 0.4987241233629066, + "grad_norm": 1.0375688076019287, + "learning_rate": 1.6819548090303485e-05, + "loss": 0.24475574493408203, + "step": 3689 + }, + { + "epoch": 0.4988593155893536, + "grad_norm": 0.9845676422119141, + "learning_rate": 1.6812754015357328e-05, + "loss": 0.20128250122070312, + "step": 3690 + }, + { + "epoch": 0.49899450781580057, + "grad_norm": 0.7193557024002075, + "learning_rate": 1.680595956298612e-05, + "loss": 0.17079925537109375, + "step": 3691 + }, + { + "epoch": 0.49912970004224755, + "grad_norm": 1.9443861246109009, + "learning_rate": 1.6799164734604497e-05, + "loss": 0.18760833144187927, + "step": 3692 + }, + { + "epoch": 0.4992648922686945, + "grad_norm": 1.9532657861709595, + "learning_rate": 1.6792369531627186e-05, + "loss": 0.21140480041503906, + "step": 3693 + }, + { + "epoch": 0.4994000844951415, + "grad_norm": 0.8906942009925842, + "learning_rate": 1.6785573955468974e-05, + "loss": 0.16641902923583984, + "step": 3694 + }, + { + "epoch": 0.4995352767215885, + "grad_norm": 0.9688380360603333, + "learning_rate": 1.6778778007544745e-05, + "loss": 0.12552356719970703, + "step": 3695 + }, + { + "epoch": 0.49967046894803546, + "grad_norm": 0.8391817808151245, + "learning_rate": 1.6771981689269452e-05, + "loss": 0.18170928955078125, + "step": 3696 + }, + { + "epoch": 0.49980566117448244, + "grad_norm": 0.7562984824180603, + "learning_rate": 1.6765185002058123e-05, + "loss": 0.14937639236450195, + "step": 3697 + }, + { + "epoch": 0.4999408534009294, + "grad_norm": 1.4676051139831543, + "learning_rate": 1.6758387947325856e-05, + "loss": 0.20389175415039062, + "step": 3698 + }, + { + "epoch": 0.5000760456273764, + "grad_norm": 0.8659172654151917, + "learning_rate": 1.6751590526487843e-05, + "loss": 0.13585340976715088, + "step": 3699 + }, + { + "epoch": 0.5002112378538234, + "grad_norm": 2.341825485229492, + "learning_rate": 1.6744792740959347e-05, + "loss": 0.21474266052246094, + "step": 3700 + }, + { + "epoch": 0.5003464300802704, + "grad_norm": 1.3531475067138672, + "learning_rate": 1.6737994592155697e-05, + "loss": 0.2002429962158203, + "step": 3701 + }, + { + "epoch": 0.5004816223067173, + "grad_norm": 0.9007554650306702, + "learning_rate": 1.6731196081492307e-05, + "loss": 0.16272640228271484, + "step": 3702 + }, + { + "epoch": 0.5006168145331643, + "grad_norm": 1.2967593669891357, + "learning_rate": 1.6724397210384655e-05, + "loss": 0.13285541534423828, + "step": 3703 + }, + { + "epoch": 0.5007520067596113, + "grad_norm": 1.3202705383300781, + "learning_rate": 1.6717597980248308e-05, + "loss": 0.19582271575927734, + "step": 3704 + }, + { + "epoch": 0.5008871989860583, + "grad_norm": 0.8645565509796143, + "learning_rate": 1.6710798392498904e-05, + "loss": 0.18326187133789062, + "step": 3705 + }, + { + "epoch": 0.5010223912125052, + "grad_norm": 2.8827970027923584, + "learning_rate": 1.6703998448552154e-05, + "loss": 0.2470703125, + "step": 3706 + }, + { + "epoch": 0.5011575834389522, + "grad_norm": 2.1755011081695557, + "learning_rate": 1.669719814982384e-05, + "loss": 0.1576862335205078, + "step": 3707 + }, + { + "epoch": 0.5012927756653992, + "grad_norm": 1.4156084060668945, + "learning_rate": 1.6690397497729818e-05, + "loss": 0.23415184020996094, + "step": 3708 + }, + { + "epoch": 0.5014279678918462, + "grad_norm": 1.570707082748413, + "learning_rate": 1.6683596493686028e-05, + "loss": 0.19475746154785156, + "step": 3709 + }, + { + "epoch": 0.5015631601182932, + "grad_norm": 0.8530634045600891, + "learning_rate": 1.667679513910846e-05, + "loss": 0.17422962188720703, + "step": 3710 + }, + { + "epoch": 0.5016983523447401, + "grad_norm": 1.0972626209259033, + "learning_rate": 1.666999343541321e-05, + "loss": 0.16504573822021484, + "step": 3711 + }, + { + "epoch": 0.5018335445711871, + "grad_norm": 1.0667897462844849, + "learning_rate": 1.6663191384016422e-05, + "loss": 0.19575119018554688, + "step": 3712 + }, + { + "epoch": 0.5019687367976341, + "grad_norm": 2.908559560775757, + "learning_rate": 1.6656388986334315e-05, + "loss": 0.18538391590118408, + "step": 3713 + }, + { + "epoch": 0.5021039290240811, + "grad_norm": 0.9312616586685181, + "learning_rate": 1.6649586243783186e-05, + "loss": 0.21172237396240234, + "step": 3714 + }, + { + "epoch": 0.502239121250528, + "grad_norm": 1.4984121322631836, + "learning_rate": 1.6642783157779405e-05, + "loss": 0.17607545852661133, + "step": 3715 + }, + { + "epoch": 0.502374313476975, + "grad_norm": 0.6843975782394409, + "learning_rate": 1.6635979729739417e-05, + "loss": 0.1334661841392517, + "step": 3716 + }, + { + "epoch": 0.502509505703422, + "grad_norm": 1.4057646989822388, + "learning_rate": 1.662917596107972e-05, + "loss": 0.23779678344726562, + "step": 3717 + }, + { + "epoch": 0.502644697929869, + "grad_norm": 0.9410556554794312, + "learning_rate": 1.6622371853216904e-05, + "loss": 0.17829036712646484, + "step": 3718 + }, + { + "epoch": 0.502779890156316, + "grad_norm": 1.2214949131011963, + "learning_rate": 1.661556740756761e-05, + "loss": 0.1530008316040039, + "step": 3719 + }, + { + "epoch": 0.502915082382763, + "grad_norm": 0.9343261122703552, + "learning_rate": 1.6608762625548572e-05, + "loss": 0.19121551513671875, + "step": 3720 + }, + { + "epoch": 0.5030502746092099, + "grad_norm": 1.7384676933288574, + "learning_rate": 1.6601957508576573e-05, + "loss": 0.21249008178710938, + "step": 3721 + }, + { + "epoch": 0.5031854668356569, + "grad_norm": 1.3669605255126953, + "learning_rate": 1.659515205806848e-05, + "loss": 0.1611955165863037, + "step": 3722 + }, + { + "epoch": 0.5033206590621039, + "grad_norm": 1.129101037979126, + "learning_rate": 1.6588346275441224e-05, + "loss": 0.15634536743164062, + "step": 3723 + }, + { + "epoch": 0.5034558512885509, + "grad_norm": 0.840228259563446, + "learning_rate": 1.65815401621118e-05, + "loss": 0.1559734344482422, + "step": 3724 + }, + { + "epoch": 0.5035910435149978, + "grad_norm": 0.9109746217727661, + "learning_rate": 1.657473371949728e-05, + "loss": 0.14329051971435547, + "step": 3725 + }, + { + "epoch": 0.5037262357414448, + "grad_norm": 0.9348533749580383, + "learning_rate": 1.6567926949014805e-05, + "loss": 0.18874645233154297, + "step": 3726 + }, + { + "epoch": 0.5038614279678918, + "grad_norm": 1.6442244052886963, + "learning_rate": 1.6561119852081574e-05, + "loss": 0.1718158721923828, + "step": 3727 + }, + { + "epoch": 0.5039966201943388, + "grad_norm": 1.7377567291259766, + "learning_rate": 1.6554312430114868e-05, + "loss": 0.16811561584472656, + "step": 3728 + }, + { + "epoch": 0.5041318124207858, + "grad_norm": 0.892093300819397, + "learning_rate": 1.6547504684532026e-05, + "loss": 0.09259796142578125, + "step": 3729 + }, + { + "epoch": 0.5042670046472327, + "grad_norm": 1.0890238285064697, + "learning_rate": 1.6540696616750454e-05, + "loss": 0.14309978485107422, + "step": 3730 + }, + { + "epoch": 0.5044021968736797, + "grad_norm": 1.2063267230987549, + "learning_rate": 1.6533888228187628e-05, + "loss": 0.1577291488647461, + "step": 3731 + }, + { + "epoch": 0.5045373891001267, + "grad_norm": 0.7079459428787231, + "learning_rate": 1.6527079520261103e-05, + "loss": 0.1163473129272461, + "step": 3732 + }, + { + "epoch": 0.5046725813265738, + "grad_norm": 1.76418936252594, + "learning_rate": 1.6520270494388472e-05, + "loss": 0.23038387298583984, + "step": 3733 + }, + { + "epoch": 0.5048077735530208, + "grad_norm": 1.257030725479126, + "learning_rate": 1.6513461151987418e-05, + "loss": 0.1804513931274414, + "step": 3734 + }, + { + "epoch": 0.5049429657794677, + "grad_norm": 2.04951810836792, + "learning_rate": 1.6506651494475678e-05, + "loss": 0.17595577239990234, + "step": 3735 + }, + { + "epoch": 0.5050781580059147, + "grad_norm": 1.092045783996582, + "learning_rate": 1.6499841523271062e-05, + "loss": 0.17078399658203125, + "step": 3736 + }, + { + "epoch": 0.5052133502323617, + "grad_norm": 1.4462261199951172, + "learning_rate": 1.649303123979145e-05, + "loss": 0.1940937042236328, + "step": 3737 + }, + { + "epoch": 0.5053485424588087, + "grad_norm": 1.9332692623138428, + "learning_rate": 1.648622064545477e-05, + "loss": 0.1506175994873047, + "step": 3738 + }, + { + "epoch": 0.5054837346852556, + "grad_norm": 0.7635692358016968, + "learning_rate": 1.6479409741679025e-05, + "loss": 0.17371559143066406, + "step": 3739 + }, + { + "epoch": 0.5056189269117026, + "grad_norm": 1.1756696701049805, + "learning_rate": 1.6472598529882277e-05, + "loss": 0.15775585174560547, + "step": 3740 + }, + { + "epoch": 0.5057541191381496, + "grad_norm": 0.6450092792510986, + "learning_rate": 1.646578701148267e-05, + "loss": 0.1482095718383789, + "step": 3741 + }, + { + "epoch": 0.5058893113645966, + "grad_norm": 1.0582799911499023, + "learning_rate": 1.6458975187898384e-05, + "loss": 0.21964550018310547, + "step": 3742 + }, + { + "epoch": 0.5060245035910436, + "grad_norm": 0.8152438998222351, + "learning_rate": 1.6452163060547687e-05, + "loss": 0.189666748046875, + "step": 3743 + }, + { + "epoch": 0.5061596958174905, + "grad_norm": 0.7217793464660645, + "learning_rate": 1.64453506308489e-05, + "loss": 0.1564791202545166, + "step": 3744 + }, + { + "epoch": 0.5062948880439375, + "grad_norm": 3.440624952316284, + "learning_rate": 1.64385379002204e-05, + "loss": 0.24029541015625, + "step": 3745 + }, + { + "epoch": 0.5064300802703845, + "grad_norm": 0.9713578224182129, + "learning_rate": 1.643172487008064e-05, + "loss": 0.16650390625, + "step": 3746 + }, + { + "epoch": 0.5065652724968315, + "grad_norm": 1.1922633647918701, + "learning_rate": 1.6424911541848124e-05, + "loss": 0.1462726593017578, + "step": 3747 + }, + { + "epoch": 0.5067004647232785, + "grad_norm": 1.1119835376739502, + "learning_rate": 1.641809791694143e-05, + "loss": 0.18700838088989258, + "step": 3748 + }, + { + "epoch": 0.5068356569497254, + "grad_norm": 1.0173624753952026, + "learning_rate": 1.6411283996779184e-05, + "loss": 0.19211769104003906, + "step": 3749 + }, + { + "epoch": 0.5069708491761724, + "grad_norm": 1.719925045967102, + "learning_rate": 1.6404469782780088e-05, + "loss": 0.1684722900390625, + "step": 3750 + }, + { + "epoch": 0.5071060414026194, + "grad_norm": 1.3538093566894531, + "learning_rate": 1.639765527636289e-05, + "loss": 0.2041792869567871, + "step": 3751 + }, + { + "epoch": 0.5072412336290664, + "grad_norm": 0.927183985710144, + "learning_rate": 1.639084047894641e-05, + "loss": 0.19186663627624512, + "step": 3752 + }, + { + "epoch": 0.5073764258555133, + "grad_norm": 1.4545674324035645, + "learning_rate": 1.638402539194953e-05, + "loss": 0.18865203857421875, + "step": 3753 + }, + { + "epoch": 0.5075116180819603, + "grad_norm": 0.9324459433555603, + "learning_rate": 1.6377210016791182e-05, + "loss": 0.15943527221679688, + "step": 3754 + }, + { + "epoch": 0.5076468103084073, + "grad_norm": 1.7523940801620483, + "learning_rate": 1.6370394354890364e-05, + "loss": 0.19582366943359375, + "step": 3755 + }, + { + "epoch": 0.5077820025348543, + "grad_norm": 1.3089617490768433, + "learning_rate": 1.636357840766613e-05, + "loss": 0.2016620635986328, + "step": 3756 + }, + { + "epoch": 0.5079171947613013, + "grad_norm": 0.8876031637191772, + "learning_rate": 1.6356762176537606e-05, + "loss": 0.11628293991088867, + "step": 3757 + }, + { + "epoch": 0.5080523869877482, + "grad_norm": 1.1217848062515259, + "learning_rate": 1.6349945662923953e-05, + "loss": 0.1965618133544922, + "step": 3758 + }, + { + "epoch": 0.5081875792141952, + "grad_norm": 0.9714937806129456, + "learning_rate": 1.634312886824442e-05, + "loss": 0.18343448638916016, + "step": 3759 + }, + { + "epoch": 0.5083227714406422, + "grad_norm": 0.7153787612915039, + "learning_rate": 1.6336311793918298e-05, + "loss": 0.16303634643554688, + "step": 3760 + }, + { + "epoch": 0.5084579636670892, + "grad_norm": 1.2874958515167236, + "learning_rate": 1.6329494441364925e-05, + "loss": 0.21447038650512695, + "step": 3761 + }, + { + "epoch": 0.5085931558935362, + "grad_norm": 0.9138535857200623, + "learning_rate": 1.6322676812003727e-05, + "loss": 0.15356206893920898, + "step": 3762 + }, + { + "epoch": 0.5087283481199831, + "grad_norm": 1.6435976028442383, + "learning_rate": 1.631585890725416e-05, + "loss": 0.17088913917541504, + "step": 3763 + }, + { + "epoch": 0.5088635403464301, + "grad_norm": 1.1555720567703247, + "learning_rate": 1.630904072853575e-05, + "loss": 0.15492010116577148, + "step": 3764 + }, + { + "epoch": 0.5089987325728771, + "grad_norm": 1.2766672372817993, + "learning_rate": 1.6302222277268085e-05, + "loss": 0.2268085479736328, + "step": 3765 + }, + { + "epoch": 0.5091339247993241, + "grad_norm": 0.8496496081352234, + "learning_rate": 1.6295403554870794e-05, + "loss": 0.1459789276123047, + "step": 3766 + }, + { + "epoch": 0.509269117025771, + "grad_norm": 1.3848164081573486, + "learning_rate": 1.6288584562763572e-05, + "loss": 0.2046680450439453, + "step": 3767 + }, + { + "epoch": 0.509404309252218, + "grad_norm": 2.6479380130767822, + "learning_rate": 1.6281765302366176e-05, + "loss": 0.24046897888183594, + "step": 3768 + }, + { + "epoch": 0.509539501478665, + "grad_norm": 1.484397530555725, + "learning_rate": 1.6274945775098412e-05, + "loss": 0.16905546188354492, + "step": 3769 + }, + { + "epoch": 0.509674693705112, + "grad_norm": 0.9125504493713379, + "learning_rate": 1.6268125982380135e-05, + "loss": 0.15143680572509766, + "step": 3770 + }, + { + "epoch": 0.509809885931559, + "grad_norm": 1.1373809576034546, + "learning_rate": 1.626130592563127e-05, + "loss": 0.23040485382080078, + "step": 3771 + }, + { + "epoch": 0.5099450781580059, + "grad_norm": 0.9369140267372131, + "learning_rate": 1.6254485606271778e-05, + "loss": 0.13931798934936523, + "step": 3772 + }, + { + "epoch": 0.5100802703844529, + "grad_norm": 1.2104058265686035, + "learning_rate": 1.6247665025721698e-05, + "loss": 0.2452993392944336, + "step": 3773 + }, + { + "epoch": 0.5102154626108999, + "grad_norm": 1.185890555381775, + "learning_rate": 1.62408441854011e-05, + "loss": 0.2255420684814453, + "step": 3774 + }, + { + "epoch": 0.5103506548373469, + "grad_norm": 0.9688547849655151, + "learning_rate": 1.6234023086730136e-05, + "loss": 0.1433429718017578, + "step": 3775 + }, + { + "epoch": 0.5104858470637939, + "grad_norm": 0.9642335772514343, + "learning_rate": 1.622720173112898e-05, + "loss": 0.16822528839111328, + "step": 3776 + }, + { + "epoch": 0.5106210392902408, + "grad_norm": 1.0118387937545776, + "learning_rate": 1.6220380120017874e-05, + "loss": 0.15408706665039062, + "step": 3777 + }, + { + "epoch": 0.5107562315166878, + "grad_norm": 2.235281467437744, + "learning_rate": 1.6213558254817128e-05, + "loss": 0.1883697509765625, + "step": 3778 + }, + { + "epoch": 0.5108914237431348, + "grad_norm": 1.6634868383407593, + "learning_rate": 1.6206736136947074e-05, + "loss": 0.1699237823486328, + "step": 3779 + }, + { + "epoch": 0.5110266159695818, + "grad_norm": 0.9525468349456787, + "learning_rate": 1.6199913767828126e-05, + "loss": 0.17570972442626953, + "step": 3780 + }, + { + "epoch": 0.5111618081960287, + "grad_norm": 2.252650499343872, + "learning_rate": 1.6193091148880733e-05, + "loss": 0.1870652437210083, + "step": 3781 + }, + { + "epoch": 0.5112970004224757, + "grad_norm": 1.009068489074707, + "learning_rate": 1.61862682815254e-05, + "loss": 0.18193817138671875, + "step": 3782 + }, + { + "epoch": 0.5114321926489227, + "grad_norm": 1.1845040321350098, + "learning_rate": 1.617944516718268e-05, + "loss": 0.17667770385742188, + "step": 3783 + }, + { + "epoch": 0.5115673848753697, + "grad_norm": 0.9172955751419067, + "learning_rate": 1.617262180727319e-05, + "loss": 0.24310684204101562, + "step": 3784 + }, + { + "epoch": 0.5117025771018167, + "grad_norm": 3.2803969383239746, + "learning_rate": 1.6165798203217588e-05, + "loss": 0.17811203002929688, + "step": 3785 + }, + { + "epoch": 0.5118377693282636, + "grad_norm": 0.9216747283935547, + "learning_rate": 1.6158974356436585e-05, + "loss": 0.19390869140625, + "step": 3786 + }, + { + "epoch": 0.5119729615547106, + "grad_norm": 1.0886200666427612, + "learning_rate": 1.6152150268350938e-05, + "loss": 0.2002582550048828, + "step": 3787 + }, + { + "epoch": 0.5121081537811576, + "grad_norm": 0.9912815093994141, + "learning_rate": 1.6145325940381458e-05, + "loss": 0.1157693862915039, + "step": 3788 + }, + { + "epoch": 0.5122433460076046, + "grad_norm": 1.2103458642959595, + "learning_rate": 1.6138501373949018e-05, + "loss": 0.207794189453125, + "step": 3789 + }, + { + "epoch": 0.5123785382340516, + "grad_norm": 0.9280535578727722, + "learning_rate": 1.613167657047451e-05, + "loss": 0.13831615447998047, + "step": 3790 + }, + { + "epoch": 0.5125137304604985, + "grad_norm": 1.5934362411499023, + "learning_rate": 1.612485153137891e-05, + "loss": 0.10633134841918945, + "step": 3791 + }, + { + "epoch": 0.5126489226869455, + "grad_norm": 2.1842994689941406, + "learning_rate": 1.611802625808323e-05, + "loss": 0.17312049865722656, + "step": 3792 + }, + { + "epoch": 0.5127841149133925, + "grad_norm": 2.5150809288024902, + "learning_rate": 1.611120075200851e-05, + "loss": 0.20072698593139648, + "step": 3793 + }, + { + "epoch": 0.5129193071398395, + "grad_norm": 1.4305858612060547, + "learning_rate": 1.610437501457587e-05, + "loss": 0.19089317321777344, + "step": 3794 + }, + { + "epoch": 0.5130544993662864, + "grad_norm": 2.1283395290374756, + "learning_rate": 1.6097549047206464e-05, + "loss": 0.18995332717895508, + "step": 3795 + }, + { + "epoch": 0.5131896915927334, + "grad_norm": 1.5025591850280762, + "learning_rate": 1.6090722851321497e-05, + "loss": 0.15617609024047852, + "step": 3796 + }, + { + "epoch": 0.5133248838191804, + "grad_norm": 1.674791932106018, + "learning_rate": 1.6083896428342213e-05, + "loss": 0.14823579788208008, + "step": 3797 + }, + { + "epoch": 0.5134600760456274, + "grad_norm": 0.7367554903030396, + "learning_rate": 1.6077069779689915e-05, + "loss": 0.14664316177368164, + "step": 3798 + }, + { + "epoch": 0.5135952682720744, + "grad_norm": 0.8756303787231445, + "learning_rate": 1.607024290678594e-05, + "loss": 0.13385021686553955, + "step": 3799 + }, + { + "epoch": 0.5137304604985213, + "grad_norm": 0.7423316836357117, + "learning_rate": 1.6063415811051686e-05, + "loss": 0.17638158798217773, + "step": 3800 + }, + { + "epoch": 0.5138656527249683, + "grad_norm": 1.1345577239990234, + "learning_rate": 1.6056588493908596e-05, + "loss": 0.18978214263916016, + "step": 3801 + }, + { + "epoch": 0.5140008449514153, + "grad_norm": 1.9460718631744385, + "learning_rate": 1.604976095677814e-05, + "loss": 0.23801612854003906, + "step": 3802 + }, + { + "epoch": 0.5141360371778623, + "grad_norm": 0.9300512671470642, + "learning_rate": 1.604293320108186e-05, + "loss": 0.12186884880065918, + "step": 3803 + }, + { + "epoch": 0.5142712294043092, + "grad_norm": 1.2782626152038574, + "learning_rate": 1.603610522824132e-05, + "loss": 0.1538238525390625, + "step": 3804 + }, + { + "epoch": 0.5144064216307562, + "grad_norm": 1.2012690305709839, + "learning_rate": 1.6029277039678153e-05, + "loss": 0.17549800872802734, + "step": 3805 + }, + { + "epoch": 0.5145416138572032, + "grad_norm": 1.1604268550872803, + "learning_rate": 1.602244863681401e-05, + "loss": 0.16648483276367188, + "step": 3806 + }, + { + "epoch": 0.5146768060836502, + "grad_norm": 0.9503032565116882, + "learning_rate": 1.6015620021070613e-05, + "loss": 0.16939926147460938, + "step": 3807 + }, + { + "epoch": 0.5148119983100972, + "grad_norm": 1.2746299505233765, + "learning_rate": 1.6008791193869714e-05, + "loss": 0.14973747730255127, + "step": 3808 + }, + { + "epoch": 0.5149471905365441, + "grad_norm": 1.1287305355072021, + "learning_rate": 1.6001962156633102e-05, + "loss": 0.1815018653869629, + "step": 3809 + }, + { + "epoch": 0.5150823827629911, + "grad_norm": 1.3691349029541016, + "learning_rate": 1.5995132910782632e-05, + "loss": 0.19733047485351562, + "step": 3810 + }, + { + "epoch": 0.5152175749894381, + "grad_norm": 0.9607386589050293, + "learning_rate": 1.5988303457740178e-05, + "loss": 0.15164661407470703, + "step": 3811 + }, + { + "epoch": 0.5153527672158851, + "grad_norm": 1.290601134300232, + "learning_rate": 1.598147379892768e-05, + "loss": 0.12627220153808594, + "step": 3812 + }, + { + "epoch": 0.515487959442332, + "grad_norm": 0.7313801050186157, + "learning_rate": 1.5974643935767098e-05, + "loss": 0.15091419219970703, + "step": 3813 + }, + { + "epoch": 0.515623151668779, + "grad_norm": 1.0458593368530273, + "learning_rate": 1.5967813869680452e-05, + "loss": 0.17737579345703125, + "step": 3814 + }, + { + "epoch": 0.515758343895226, + "grad_norm": 1.1229420900344849, + "learning_rate": 1.59609836020898e-05, + "loss": 0.19245529174804688, + "step": 3815 + }, + { + "epoch": 0.515893536121673, + "grad_norm": 1.4036811590194702, + "learning_rate": 1.5954153134417236e-05, + "loss": 0.201324462890625, + "step": 3816 + }, + { + "epoch": 0.51602872834812, + "grad_norm": 1.4691399335861206, + "learning_rate": 1.59473224680849e-05, + "loss": 0.1512455940246582, + "step": 3817 + }, + { + "epoch": 0.516163920574567, + "grad_norm": 0.9885697960853577, + "learning_rate": 1.5940491604514976e-05, + "loss": 0.15338516235351562, + "step": 3818 + }, + { + "epoch": 0.5162991128010139, + "grad_norm": 2.1241343021392822, + "learning_rate": 1.5933660545129683e-05, + "loss": 0.2154073715209961, + "step": 3819 + }, + { + "epoch": 0.5164343050274609, + "grad_norm": 1.6376478672027588, + "learning_rate": 1.5926829291351288e-05, + "loss": 0.1667041778564453, + "step": 3820 + }, + { + "epoch": 0.5165694972539079, + "grad_norm": 1.0530023574829102, + "learning_rate": 1.591999784460209e-05, + "loss": 0.14536833763122559, + "step": 3821 + }, + { + "epoch": 0.5167046894803549, + "grad_norm": 1.5427544116973877, + "learning_rate": 1.5913166206304435e-05, + "loss": 0.22985076904296875, + "step": 3822 + }, + { + "epoch": 0.5168398817068018, + "grad_norm": 1.008499026298523, + "learning_rate": 1.5906334377880707e-05, + "loss": 0.15213680267333984, + "step": 3823 + }, + { + "epoch": 0.5169750739332488, + "grad_norm": 1.6367738246917725, + "learning_rate": 1.589950236075333e-05, + "loss": 0.2294178009033203, + "step": 3824 + }, + { + "epoch": 0.5171102661596958, + "grad_norm": 1.2452484369277954, + "learning_rate": 1.5892670156344764e-05, + "loss": 0.18032073974609375, + "step": 3825 + }, + { + "epoch": 0.5172454583861428, + "grad_norm": 1.0863773822784424, + "learning_rate": 1.588583776607751e-05, + "loss": 0.16354870796203613, + "step": 3826 + }, + { + "epoch": 0.5173806506125898, + "grad_norm": 2.9368748664855957, + "learning_rate": 1.5879005191374106e-05, + "loss": 0.1796102523803711, + "step": 3827 + }, + { + "epoch": 0.5175158428390367, + "grad_norm": 1.875327467918396, + "learning_rate": 1.587217243365714e-05, + "loss": 0.2396857738494873, + "step": 3828 + }, + { + "epoch": 0.5176510350654837, + "grad_norm": 2.1258962154388428, + "learning_rate": 1.586533949434922e-05, + "loss": 0.14569997787475586, + "step": 3829 + }, + { + "epoch": 0.5177862272919307, + "grad_norm": 2.010382890701294, + "learning_rate": 1.5858506374872998e-05, + "loss": 0.17071533203125, + "step": 3830 + }, + { + "epoch": 0.5179214195183777, + "grad_norm": 1.021146297454834, + "learning_rate": 1.5851673076651178e-05, + "loss": 0.17508220672607422, + "step": 3831 + }, + { + "epoch": 0.5180566117448246, + "grad_norm": 1.0530067682266235, + "learning_rate": 1.5844839601106477e-05, + "loss": 0.2031698226928711, + "step": 3832 + }, + { + "epoch": 0.5181918039712716, + "grad_norm": 0.8794388175010681, + "learning_rate": 1.583800594966167e-05, + "loss": 0.18463897705078125, + "step": 3833 + }, + { + "epoch": 0.5183269961977186, + "grad_norm": 1.0238717794418335, + "learning_rate": 1.583117212373955e-05, + "loss": 0.18951892852783203, + "step": 3834 + }, + { + "epoch": 0.5184621884241656, + "grad_norm": 1.1634663343429565, + "learning_rate": 1.5824338124762967e-05, + "loss": 0.14718055725097656, + "step": 3835 + }, + { + "epoch": 0.5185973806506126, + "grad_norm": 0.9456940293312073, + "learning_rate": 1.581750395415479e-05, + "loss": 0.13039398193359375, + "step": 3836 + }, + { + "epoch": 0.5187325728770595, + "grad_norm": 1.6109614372253418, + "learning_rate": 1.5810669613337922e-05, + "loss": 0.2257823944091797, + "step": 3837 + }, + { + "epoch": 0.5188677651035065, + "grad_norm": 0.9401279091835022, + "learning_rate": 1.5803835103735327e-05, + "loss": 0.15442514419555664, + "step": 3838 + }, + { + "epoch": 0.5190029573299535, + "grad_norm": 1.3372737169265747, + "learning_rate": 1.5797000426769973e-05, + "loss": 0.18842506408691406, + "step": 3839 + }, + { + "epoch": 0.5191381495564005, + "grad_norm": 1.5018569231033325, + "learning_rate": 1.579016558386488e-05, + "loss": 0.2101898193359375, + "step": 3840 + }, + { + "epoch": 0.5192733417828475, + "grad_norm": 2.964165210723877, + "learning_rate": 1.5783330576443096e-05, + "loss": 0.19091320037841797, + "step": 3841 + }, + { + "epoch": 0.5194085340092944, + "grad_norm": 1.3822510242462158, + "learning_rate": 1.5776495405927716e-05, + "loss": 0.1699199676513672, + "step": 3842 + }, + { + "epoch": 0.5195437262357414, + "grad_norm": 1.023774266242981, + "learning_rate": 1.5769660073741844e-05, + "loss": 0.19399261474609375, + "step": 3843 + }, + { + "epoch": 0.5196789184621884, + "grad_norm": 2.095381259918213, + "learning_rate": 1.5762824581308645e-05, + "loss": 0.1503143310546875, + "step": 3844 + }, + { + "epoch": 0.5198141106886354, + "grad_norm": 1.578182578086853, + "learning_rate": 1.5755988930051304e-05, + "loss": 0.16045784950256348, + "step": 3845 + }, + { + "epoch": 0.5199493029150823, + "grad_norm": 0.5900046825408936, + "learning_rate": 1.5749153121393025e-05, + "loss": 0.11719131469726562, + "step": 3846 + }, + { + "epoch": 0.5200844951415293, + "grad_norm": 0.9304651021957397, + "learning_rate": 1.574231715675708e-05, + "loss": 0.1772136688232422, + "step": 3847 + }, + { + "epoch": 0.5202196873679763, + "grad_norm": 0.824239194393158, + "learning_rate": 1.573548103756674e-05, + "loss": 0.14716565608978271, + "step": 3848 + }, + { + "epoch": 0.5203548795944233, + "grad_norm": 0.9432123303413391, + "learning_rate": 1.572864476524533e-05, + "loss": 0.17438125610351562, + "step": 3849 + }, + { + "epoch": 0.5204900718208703, + "grad_norm": 1.3992304801940918, + "learning_rate": 1.5721808341216195e-05, + "loss": 0.11055135726928711, + "step": 3850 + }, + { + "epoch": 0.5206252640473172, + "grad_norm": 0.7463445067405701, + "learning_rate": 1.571497176690271e-05, + "loss": 0.20017051696777344, + "step": 3851 + }, + { + "epoch": 0.5207604562737642, + "grad_norm": 1.0995190143585205, + "learning_rate": 1.570813504372829e-05, + "loss": 0.20088672637939453, + "step": 3852 + }, + { + "epoch": 0.5208956485002112, + "grad_norm": 1.2843900918960571, + "learning_rate": 1.570129817311638e-05, + "loss": 0.19963359832763672, + "step": 3853 + }, + { + "epoch": 0.5210308407266582, + "grad_norm": 1.325605869293213, + "learning_rate": 1.5694461156490452e-05, + "loss": 0.20557022094726562, + "step": 3854 + }, + { + "epoch": 0.5211660329531052, + "grad_norm": 0.8538478016853333, + "learning_rate": 1.5687623995274008e-05, + "loss": 0.11966896057128906, + "step": 3855 + }, + { + "epoch": 0.5213012251795521, + "grad_norm": 0.7925046682357788, + "learning_rate": 1.568078669089058e-05, + "loss": 0.18639755249023438, + "step": 3856 + }, + { + "epoch": 0.5214364174059991, + "grad_norm": 1.9237161874771118, + "learning_rate": 1.567394924476373e-05, + "loss": 0.20045089721679688, + "step": 3857 + }, + { + "epoch": 0.5215716096324461, + "grad_norm": 1.660418152809143, + "learning_rate": 1.5667111658317057e-05, + "loss": 0.1979236602783203, + "step": 3858 + }, + { + "epoch": 0.5217068018588931, + "grad_norm": 0.50294429063797, + "learning_rate": 1.5660273932974177e-05, + "loss": 0.12451362609863281, + "step": 3859 + }, + { + "epoch": 0.52184199408534, + "grad_norm": 2.575995445251465, + "learning_rate": 1.5653436070158743e-05, + "loss": 0.1822052001953125, + "step": 3860 + }, + { + "epoch": 0.521977186311787, + "grad_norm": 1.4888801574707031, + "learning_rate": 1.564659807129444e-05, + "loss": 0.18964385986328125, + "step": 3861 + }, + { + "epoch": 0.522112378538234, + "grad_norm": 0.8506982326507568, + "learning_rate": 1.5639759937804962e-05, + "loss": 0.16748619079589844, + "step": 3862 + }, + { + "epoch": 0.522247570764681, + "grad_norm": 1.2662361860275269, + "learning_rate": 1.5632921671114055e-05, + "loss": 0.20804977416992188, + "step": 3863 + }, + { + "epoch": 0.522382762991128, + "grad_norm": 0.7970473170280457, + "learning_rate": 1.5626083272645485e-05, + "loss": 0.17293357849121094, + "step": 3864 + }, + { + "epoch": 0.5225179552175749, + "grad_norm": 1.7305662631988525, + "learning_rate": 1.5619244743823038e-05, + "loss": 0.1803497076034546, + "step": 3865 + }, + { + "epoch": 0.5226531474440219, + "grad_norm": 1.322920799255371, + "learning_rate": 1.5612406086070534e-05, + "loss": 0.20030593872070312, + "step": 3866 + }, + { + "epoch": 0.5227883396704689, + "grad_norm": 0.6300215125083923, + "learning_rate": 1.560556730081181e-05, + "loss": 0.15634965896606445, + "step": 3867 + }, + { + "epoch": 0.5229235318969159, + "grad_norm": 1.0737369060516357, + "learning_rate": 1.5598728389470754e-05, + "loss": 0.2353343963623047, + "step": 3868 + }, + { + "epoch": 0.523058724123363, + "grad_norm": 1.2187846899032593, + "learning_rate": 1.5591889353471245e-05, + "loss": 0.17796707153320312, + "step": 3869 + }, + { + "epoch": 0.5231939163498099, + "grad_norm": 1.4197511672973633, + "learning_rate": 1.5585050194237226e-05, + "loss": 0.1568613052368164, + "step": 3870 + }, + { + "epoch": 0.5233291085762569, + "grad_norm": 1.05025315284729, + "learning_rate": 1.557821091319263e-05, + "loss": 0.22429275512695312, + "step": 3871 + }, + { + "epoch": 0.5234643008027039, + "grad_norm": 2.2244699001312256, + "learning_rate": 1.5571371511761446e-05, + "loss": 0.20125532150268555, + "step": 3872 + }, + { + "epoch": 0.5235994930291509, + "grad_norm": 1.295419454574585, + "learning_rate": 1.5564531991367658e-05, + "loss": 0.16208553314208984, + "step": 3873 + }, + { + "epoch": 0.5237346852555979, + "grad_norm": 1.144305944442749, + "learning_rate": 1.5557692353435302e-05, + "loss": 0.17605972290039062, + "step": 3874 + }, + { + "epoch": 0.5238698774820448, + "grad_norm": 0.6235769987106323, + "learning_rate": 1.5550852599388424e-05, + "loss": 0.13022327423095703, + "step": 3875 + }, + { + "epoch": 0.5240050697084918, + "grad_norm": 2.545581817626953, + "learning_rate": 1.5544012730651096e-05, + "loss": 0.2225794792175293, + "step": 3876 + }, + { + "epoch": 0.5241402619349388, + "grad_norm": 0.7501353621482849, + "learning_rate": 1.5537172748647422e-05, + "loss": 0.13853168487548828, + "step": 3877 + }, + { + "epoch": 0.5242754541613858, + "grad_norm": 1.1100043058395386, + "learning_rate": 1.553033265480151e-05, + "loss": 0.17278385162353516, + "step": 3878 + }, + { + "epoch": 0.5244106463878327, + "grad_norm": 1.5144108533859253, + "learning_rate": 1.552349245053752e-05, + "loss": 0.2465207576751709, + "step": 3879 + }, + { + "epoch": 0.5245458386142797, + "grad_norm": 1.1437766551971436, + "learning_rate": 1.5516652137279597e-05, + "loss": 0.1776876449584961, + "step": 3880 + }, + { + "epoch": 0.5246810308407267, + "grad_norm": 0.7986635565757751, + "learning_rate": 1.5509811716451955e-05, + "loss": 0.1603860855102539, + "step": 3881 + }, + { + "epoch": 0.5248162230671737, + "grad_norm": 0.7467828392982483, + "learning_rate": 1.550297118947879e-05, + "loss": 0.16000699996948242, + "step": 3882 + }, + { + "epoch": 0.5249514152936207, + "grad_norm": 1.5045711994171143, + "learning_rate": 1.5496130557784343e-05, + "loss": 0.17192697525024414, + "step": 3883 + }, + { + "epoch": 0.5250866075200676, + "grad_norm": 2.0142416954040527, + "learning_rate": 1.5489289822792868e-05, + "loss": 0.15709877014160156, + "step": 3884 + }, + { + "epoch": 0.5252217997465146, + "grad_norm": 1.7170177698135376, + "learning_rate": 1.5482448985928645e-05, + "loss": 0.1925201416015625, + "step": 3885 + }, + { + "epoch": 0.5253569919729616, + "grad_norm": 0.6801953911781311, + "learning_rate": 1.5475608048615964e-05, + "loss": 0.13994693756103516, + "step": 3886 + }, + { + "epoch": 0.5254921841994086, + "grad_norm": 1.4294174909591675, + "learning_rate": 1.546876701227916e-05, + "loss": 0.17708587646484375, + "step": 3887 + }, + { + "epoch": 0.5256273764258556, + "grad_norm": 0.9180756211280823, + "learning_rate": 1.5461925878342558e-05, + "loss": 0.20695972442626953, + "step": 3888 + }, + { + "epoch": 0.5257625686523025, + "grad_norm": 0.8466778993606567, + "learning_rate": 1.5455084648230527e-05, + "loss": 0.20012283325195312, + "step": 3889 + }, + { + "epoch": 0.5258977608787495, + "grad_norm": 1.1864982843399048, + "learning_rate": 1.5448243323367438e-05, + "loss": 0.20632076263427734, + "step": 3890 + }, + { + "epoch": 0.5260329531051965, + "grad_norm": 1.3084194660186768, + "learning_rate": 1.544140190517771e-05, + "loss": 0.2099590301513672, + "step": 3891 + }, + { + "epoch": 0.5261681453316435, + "grad_norm": 0.9205005764961243, + "learning_rate": 1.5434560395085745e-05, + "loss": 0.1709880828857422, + "step": 3892 + }, + { + "epoch": 0.5263033375580904, + "grad_norm": 0.9179989695549011, + "learning_rate": 1.542771879451599e-05, + "loss": 0.17632102966308594, + "step": 3893 + }, + { + "epoch": 0.5264385297845374, + "grad_norm": 0.6690536737442017, + "learning_rate": 1.54208771048929e-05, + "loss": 0.1686573028564453, + "step": 3894 + }, + { + "epoch": 0.5265737220109844, + "grad_norm": 1.3778417110443115, + "learning_rate": 1.5414035327640958e-05, + "loss": 0.21535873413085938, + "step": 3895 + }, + { + "epoch": 0.5267089142374314, + "grad_norm": 0.7535449862480164, + "learning_rate": 1.5407193464184644e-05, + "loss": 0.1034994125366211, + "step": 3896 + }, + { + "epoch": 0.5268441064638784, + "grad_norm": 2.7647576332092285, + "learning_rate": 1.5400351515948485e-05, + "loss": 0.18512439727783203, + "step": 3897 + }, + { + "epoch": 0.5269792986903253, + "grad_norm": 1.2415040731430054, + "learning_rate": 1.5393509484357006e-05, + "loss": 0.21524810791015625, + "step": 3898 + }, + { + "epoch": 0.5271144909167723, + "grad_norm": 1.24103581905365, + "learning_rate": 1.538666737083475e-05, + "loss": 0.18302297592163086, + "step": 3899 + }, + { + "epoch": 0.5272496831432193, + "grad_norm": 0.8943217992782593, + "learning_rate": 1.537982517680629e-05, + "loss": 0.16332626342773438, + "step": 3900 + }, + { + "epoch": 0.5273848753696663, + "grad_norm": 1.4180735349655151, + "learning_rate": 1.5372982903696196e-05, + "loss": 0.1447221338748932, + "step": 3901 + }, + { + "epoch": 0.5275200675961133, + "grad_norm": 1.6521670818328857, + "learning_rate": 1.536614055292908e-05, + "loss": 0.18349266052246094, + "step": 3902 + }, + { + "epoch": 0.5276552598225602, + "grad_norm": 2.2906532287597656, + "learning_rate": 1.535929812592955e-05, + "loss": 0.1721649169921875, + "step": 3903 + }, + { + "epoch": 0.5277904520490072, + "grad_norm": 0.9075860381126404, + "learning_rate": 1.5352455624122227e-05, + "loss": 0.16194629669189453, + "step": 3904 + }, + { + "epoch": 0.5279256442754542, + "grad_norm": 2.6471893787384033, + "learning_rate": 1.5345613048931765e-05, + "loss": 0.207489013671875, + "step": 3905 + }, + { + "epoch": 0.5280608365019012, + "grad_norm": 2.408668279647827, + "learning_rate": 1.5338770401782822e-05, + "loss": 0.1840343475341797, + "step": 3906 + }, + { + "epoch": 0.5281960287283481, + "grad_norm": 2.299936294555664, + "learning_rate": 1.5331927684100077e-05, + "loss": 0.17118358612060547, + "step": 3907 + }, + { + "epoch": 0.5283312209547951, + "grad_norm": 1.0176359415054321, + "learning_rate": 1.5325084897308218e-05, + "loss": 0.1513686180114746, + "step": 3908 + }, + { + "epoch": 0.5284664131812421, + "grad_norm": 0.8170287609100342, + "learning_rate": 1.5318242042831952e-05, + "loss": 0.14023494720458984, + "step": 3909 + }, + { + "epoch": 0.5286016054076891, + "grad_norm": 1.2909241914749146, + "learning_rate": 1.5311399122095992e-05, + "loss": 0.1741032600402832, + "step": 3910 + }, + { + "epoch": 0.5287367976341361, + "grad_norm": 1.2220346927642822, + "learning_rate": 1.5304556136525074e-05, + "loss": 0.2351207733154297, + "step": 3911 + }, + { + "epoch": 0.528871989860583, + "grad_norm": 1.4754970073699951, + "learning_rate": 1.5297713087543948e-05, + "loss": 0.1788029670715332, + "step": 3912 + }, + { + "epoch": 0.52900718208703, + "grad_norm": 1.1380589008331299, + "learning_rate": 1.5290869976577365e-05, + "loss": 0.17387104034423828, + "step": 3913 + }, + { + "epoch": 0.529142374313477, + "grad_norm": 0.7459555864334106, + "learning_rate": 1.5284026805050107e-05, + "loss": 0.17338323593139648, + "step": 3914 + }, + { + "epoch": 0.529277566539924, + "grad_norm": 0.9824241399765015, + "learning_rate": 1.5277183574386947e-05, + "loss": 0.21282148361206055, + "step": 3915 + }, + { + "epoch": 0.529412758766371, + "grad_norm": 1.485780119895935, + "learning_rate": 1.5270340286012694e-05, + "loss": 0.20425796508789062, + "step": 3916 + }, + { + "epoch": 0.5295479509928179, + "grad_norm": 1.4654784202575684, + "learning_rate": 1.526349694135215e-05, + "loss": 0.20690631866455078, + "step": 3917 + }, + { + "epoch": 0.5296831432192649, + "grad_norm": 1.7467727661132812, + "learning_rate": 1.525665354183014e-05, + "loss": 0.17418479919433594, + "step": 3918 + }, + { + "epoch": 0.5298183354457119, + "grad_norm": 2.730034351348877, + "learning_rate": 1.5249810088871493e-05, + "loss": 0.18279647827148438, + "step": 3919 + }, + { + "epoch": 0.5299535276721589, + "grad_norm": 1.9890397787094116, + "learning_rate": 1.5242966583901052e-05, + "loss": 0.1901264190673828, + "step": 3920 + }, + { + "epoch": 0.5300887198986058, + "grad_norm": 1.2000585794448853, + "learning_rate": 1.523612302834367e-05, + "loss": 0.17255020141601562, + "step": 3921 + }, + { + "epoch": 0.5302239121250528, + "grad_norm": 1.0145665407180786, + "learning_rate": 1.5229279423624217e-05, + "loss": 0.18259716033935547, + "step": 3922 + }, + { + "epoch": 0.5303591043514998, + "grad_norm": 0.7533456683158875, + "learning_rate": 1.5222435771167566e-05, + "loss": 0.15386199951171875, + "step": 3923 + }, + { + "epoch": 0.5304942965779468, + "grad_norm": 1.1919670104980469, + "learning_rate": 1.5215592072398602e-05, + "loss": 0.18500709533691406, + "step": 3924 + }, + { + "epoch": 0.5306294888043938, + "grad_norm": 0.9636706709861755, + "learning_rate": 1.520874832874222e-05, + "loss": 0.17650437355041504, + "step": 3925 + }, + { + "epoch": 0.5307646810308407, + "grad_norm": 1.0850481986999512, + "learning_rate": 1.5201904541623318e-05, + "loss": 0.19089877605438232, + "step": 3926 + }, + { + "epoch": 0.5308998732572877, + "grad_norm": 0.6094186902046204, + "learning_rate": 1.5195060712466817e-05, + "loss": 0.11193490028381348, + "step": 3927 + }, + { + "epoch": 0.5310350654837347, + "grad_norm": 1.2321534156799316, + "learning_rate": 1.5188216842697635e-05, + "loss": 0.23956584930419922, + "step": 3928 + }, + { + "epoch": 0.5311702577101817, + "grad_norm": 0.8972972631454468, + "learning_rate": 1.5181372933740703e-05, + "loss": 0.1798996925354004, + "step": 3929 + }, + { + "epoch": 0.5313054499366286, + "grad_norm": 0.752771258354187, + "learning_rate": 1.5174528987020958e-05, + "loss": 0.13164401054382324, + "step": 3930 + }, + { + "epoch": 0.5314406421630756, + "grad_norm": 1.8373531103134155, + "learning_rate": 1.5167685003963345e-05, + "loss": 0.16129350662231445, + "step": 3931 + }, + { + "epoch": 0.5315758343895226, + "grad_norm": 0.9029563665390015, + "learning_rate": 1.5160840985992824e-05, + "loss": 0.1434469223022461, + "step": 3932 + }, + { + "epoch": 0.5317110266159696, + "grad_norm": 1.1272697448730469, + "learning_rate": 1.515399693453435e-05, + "loss": 0.17893600463867188, + "step": 3933 + }, + { + "epoch": 0.5318462188424166, + "grad_norm": 1.0933237075805664, + "learning_rate": 1.5147152851012894e-05, + "loss": 0.1831519603729248, + "step": 3934 + }, + { + "epoch": 0.5319814110688635, + "grad_norm": 1.3981655836105347, + "learning_rate": 1.514030873685343e-05, + "loss": 0.18175792694091797, + "step": 3935 + }, + { + "epoch": 0.5321166032953105, + "grad_norm": 0.9870057702064514, + "learning_rate": 1.513346459348094e-05, + "loss": 0.15972137451171875, + "step": 3936 + }, + { + "epoch": 0.5322517955217575, + "grad_norm": 1.3061848878860474, + "learning_rate": 1.5126620422320405e-05, + "loss": 0.1402750015258789, + "step": 3937 + }, + { + "epoch": 0.5323869877482045, + "grad_norm": 2.1066627502441406, + "learning_rate": 1.5119776224796823e-05, + "loss": 0.14604568481445312, + "step": 3938 + }, + { + "epoch": 0.5325221799746515, + "grad_norm": 0.9654192924499512, + "learning_rate": 1.5112932002335195e-05, + "loss": 0.19241619110107422, + "step": 3939 + }, + { + "epoch": 0.5326573722010984, + "grad_norm": 1.8721652030944824, + "learning_rate": 1.5106087756360524e-05, + "loss": 0.17813873291015625, + "step": 3940 + }, + { + "epoch": 0.5327925644275454, + "grad_norm": 0.9445196390151978, + "learning_rate": 1.5099243488297816e-05, + "loss": 0.187713623046875, + "step": 3941 + }, + { + "epoch": 0.5329277566539924, + "grad_norm": 1.3595408201217651, + "learning_rate": 1.5092399199572083e-05, + "loss": 0.1769847869873047, + "step": 3942 + }, + { + "epoch": 0.5330629488804394, + "grad_norm": 0.720579981803894, + "learning_rate": 1.5085554891608343e-05, + "loss": 0.1292252540588379, + "step": 3943 + }, + { + "epoch": 0.5331981411068863, + "grad_norm": 0.9848388433456421, + "learning_rate": 1.5078710565831616e-05, + "loss": 0.16658592224121094, + "step": 3944 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 1.2487695217132568, + "learning_rate": 1.5071866223666935e-05, + "loss": 0.1735973358154297, + "step": 3945 + }, + { + "epoch": 0.5334685255597803, + "grad_norm": 1.0072109699249268, + "learning_rate": 1.5065021866539323e-05, + "loss": 0.17850494384765625, + "step": 3946 + }, + { + "epoch": 0.5336037177862273, + "grad_norm": 1.576037049293518, + "learning_rate": 1.5058177495873805e-05, + "loss": 0.17388248443603516, + "step": 3947 + }, + { + "epoch": 0.5337389100126743, + "grad_norm": 2.016763925552368, + "learning_rate": 1.5051333113095429e-05, + "loss": 0.21162033081054688, + "step": 3948 + }, + { + "epoch": 0.5338741022391212, + "grad_norm": 1.004820704460144, + "learning_rate": 1.5044488719629218e-05, + "loss": 0.1365799903869629, + "step": 3949 + }, + { + "epoch": 0.5340092944655682, + "grad_norm": 0.5475895404815674, + "learning_rate": 1.5037644316900227e-05, + "loss": 0.12336695194244385, + "step": 3950 + }, + { + "epoch": 0.5341444866920152, + "grad_norm": 1.0990207195281982, + "learning_rate": 1.5030799906333484e-05, + "loss": 0.2123265266418457, + "step": 3951 + }, + { + "epoch": 0.5342796789184622, + "grad_norm": 0.7240240573883057, + "learning_rate": 1.5023955489354031e-05, + "loss": 0.14586830139160156, + "step": 3952 + }, + { + "epoch": 0.5344148711449092, + "grad_norm": 0.8073884844779968, + "learning_rate": 1.5017111067386927e-05, + "loss": 0.19486570358276367, + "step": 3953 + }, + { + "epoch": 0.5345500633713561, + "grad_norm": 2.3705556392669678, + "learning_rate": 1.50102666418572e-05, + "loss": 0.2147655487060547, + "step": 3954 + }, + { + "epoch": 0.5346852555978031, + "grad_norm": 1.6359292268753052, + "learning_rate": 1.500342221418991e-05, + "loss": 0.20354461669921875, + "step": 3955 + }, + { + "epoch": 0.5348204478242501, + "grad_norm": 2.809063673019409, + "learning_rate": 1.4996577785810094e-05, + "loss": 0.18563270568847656, + "step": 3956 + }, + { + "epoch": 0.5349556400506971, + "grad_norm": 1.2359619140625, + "learning_rate": 1.4989733358142798e-05, + "loss": 0.1841496229171753, + "step": 3957 + }, + { + "epoch": 0.535090832277144, + "grad_norm": 1.5012120008468628, + "learning_rate": 1.498288893261308e-05, + "loss": 0.1871471405029297, + "step": 3958 + }, + { + "epoch": 0.535226024503591, + "grad_norm": 0.83632493019104, + "learning_rate": 1.497604451064597e-05, + "loss": 0.18277502059936523, + "step": 3959 + }, + { + "epoch": 0.535361216730038, + "grad_norm": 1.2533302307128906, + "learning_rate": 1.496920009366652e-05, + "loss": 0.1358942985534668, + "step": 3960 + }, + { + "epoch": 0.535496408956485, + "grad_norm": 0.887109637260437, + "learning_rate": 1.4962355683099777e-05, + "loss": 0.18335533142089844, + "step": 3961 + }, + { + "epoch": 0.535631601182932, + "grad_norm": 0.9484407305717468, + "learning_rate": 1.4955511280370782e-05, + "loss": 0.17981112003326416, + "step": 3962 + }, + { + "epoch": 0.5357667934093789, + "grad_norm": 1.69333815574646, + "learning_rate": 1.4948666886904579e-05, + "loss": 0.17372846603393555, + "step": 3963 + }, + { + "epoch": 0.5359019856358259, + "grad_norm": 1.7836371660232544, + "learning_rate": 1.4941822504126199e-05, + "loss": 0.20348739624023438, + "step": 3964 + }, + { + "epoch": 0.5360371778622729, + "grad_norm": 1.3293485641479492, + "learning_rate": 1.4934978133460681e-05, + "loss": 0.19939088821411133, + "step": 3965 + }, + { + "epoch": 0.5361723700887199, + "grad_norm": 0.968438982963562, + "learning_rate": 1.4928133776333068e-05, + "loss": 0.2183971405029297, + "step": 3966 + }, + { + "epoch": 0.5363075623151669, + "grad_norm": 1.8090099096298218, + "learning_rate": 1.4921289434168386e-05, + "loss": 0.15348024666309357, + "step": 3967 + }, + { + "epoch": 0.5364427545416138, + "grad_norm": 2.4087274074554443, + "learning_rate": 1.4914445108391663e-05, + "loss": 0.18173694610595703, + "step": 3968 + }, + { + "epoch": 0.5365779467680608, + "grad_norm": 1.1351317167282104, + "learning_rate": 1.4907600800427922e-05, + "loss": 0.20950984954833984, + "step": 3969 + }, + { + "epoch": 0.5367131389945078, + "grad_norm": 1.7744807004928589, + "learning_rate": 1.4900756511702188e-05, + "loss": 0.27730560302734375, + "step": 3970 + }, + { + "epoch": 0.5368483312209548, + "grad_norm": 1.2004674673080444, + "learning_rate": 1.4893912243639479e-05, + "loss": 0.18226146697998047, + "step": 3971 + }, + { + "epoch": 0.5369835234474017, + "grad_norm": 0.9677853584289551, + "learning_rate": 1.4887067997664807e-05, + "loss": 0.215972900390625, + "step": 3972 + }, + { + "epoch": 0.5371187156738487, + "grad_norm": 0.9946901202201843, + "learning_rate": 1.488022377520318e-05, + "loss": 0.2423381805419922, + "step": 3973 + }, + { + "epoch": 0.5372539079002957, + "grad_norm": 1.6103684902191162, + "learning_rate": 1.4873379577679599e-05, + "loss": 0.1437664031982422, + "step": 3974 + }, + { + "epoch": 0.5373891001267427, + "grad_norm": 1.8710675239562988, + "learning_rate": 1.4866535406519063e-05, + "loss": 0.1977243423461914, + "step": 3975 + }, + { + "epoch": 0.5375242923531897, + "grad_norm": 1.0028784275054932, + "learning_rate": 1.4859691263146574e-05, + "loss": 0.13887786865234375, + "step": 3976 + }, + { + "epoch": 0.5376594845796366, + "grad_norm": 0.8750423192977905, + "learning_rate": 1.485284714898711e-05, + "loss": 0.139312744140625, + "step": 3977 + }, + { + "epoch": 0.5377946768060836, + "grad_norm": 0.7252755761146545, + "learning_rate": 1.4846003065465653e-05, + "loss": 0.14550018310546875, + "step": 3978 + }, + { + "epoch": 0.5379298690325306, + "grad_norm": 3.0151162147521973, + "learning_rate": 1.4839159014007179e-05, + "loss": 0.23079371452331543, + "step": 3979 + }, + { + "epoch": 0.5380650612589776, + "grad_norm": 2.0465519428253174, + "learning_rate": 1.4832314996036653e-05, + "loss": 0.18709182739257812, + "step": 3980 + }, + { + "epoch": 0.5382002534854246, + "grad_norm": 2.1044864654541016, + "learning_rate": 1.4825471012979048e-05, + "loss": 0.1925182342529297, + "step": 3981 + }, + { + "epoch": 0.5383354457118715, + "grad_norm": 2.3994622230529785, + "learning_rate": 1.4818627066259301e-05, + "loss": 0.17262601852416992, + "step": 3982 + }, + { + "epoch": 0.5384706379383185, + "grad_norm": 0.9226011037826538, + "learning_rate": 1.481178315730237e-05, + "loss": 0.17860889434814453, + "step": 3983 + }, + { + "epoch": 0.5386058301647655, + "grad_norm": 1.1245869398117065, + "learning_rate": 1.4804939287533184e-05, + "loss": 0.21307754516601562, + "step": 3984 + }, + { + "epoch": 0.5387410223912125, + "grad_norm": 2.064107894897461, + "learning_rate": 1.4798095458376682e-05, + "loss": 0.21994924545288086, + "step": 3985 + }, + { + "epoch": 0.5388762146176594, + "grad_norm": 2.0353598594665527, + "learning_rate": 1.4791251671257788e-05, + "loss": 0.20468425750732422, + "step": 3986 + }, + { + "epoch": 0.5390114068441064, + "grad_norm": 1.3710945844650269, + "learning_rate": 1.4784407927601401e-05, + "loss": 0.21075963973999023, + "step": 3987 + }, + { + "epoch": 0.5391465990705534, + "grad_norm": 2.1930136680603027, + "learning_rate": 1.4777564228832436e-05, + "loss": 0.2022418975830078, + "step": 3988 + }, + { + "epoch": 0.5392817912970004, + "grad_norm": 0.5666084289550781, + "learning_rate": 1.4770720576375782e-05, + "loss": 0.10650634765625, + "step": 3989 + }, + { + "epoch": 0.5394169835234474, + "grad_norm": 1.0654162168502808, + "learning_rate": 1.4763876971656334e-05, + "loss": 0.16987371444702148, + "step": 3990 + }, + { + "epoch": 0.5395521757498943, + "grad_norm": 0.8482323884963989, + "learning_rate": 1.4757033416098953e-05, + "loss": 0.2015666961669922, + "step": 3991 + }, + { + "epoch": 0.5396873679763413, + "grad_norm": 3.253553867340088, + "learning_rate": 1.4750189911128511e-05, + "loss": 0.235931396484375, + "step": 3992 + }, + { + "epoch": 0.5398225602027883, + "grad_norm": 1.031988501548767, + "learning_rate": 1.4743346458169863e-05, + "loss": 0.09942924976348877, + "step": 3993 + }, + { + "epoch": 0.5399577524292353, + "grad_norm": 2.237548589706421, + "learning_rate": 1.473650305864785e-05, + "loss": 0.2098541259765625, + "step": 3994 + }, + { + "epoch": 0.5400929446556822, + "grad_norm": 0.927822470664978, + "learning_rate": 1.472965971398731e-05, + "loss": 0.16896724700927734, + "step": 3995 + }, + { + "epoch": 0.5402281368821292, + "grad_norm": 1.737018346786499, + "learning_rate": 1.4722816425613054e-05, + "loss": 0.16241240501403809, + "step": 3996 + }, + { + "epoch": 0.5403633291085762, + "grad_norm": 0.7754279971122742, + "learning_rate": 1.4715973194949895e-05, + "loss": 0.15568161010742188, + "step": 3997 + }, + { + "epoch": 0.5404985213350232, + "grad_norm": 1.9644675254821777, + "learning_rate": 1.4709130023422636e-05, + "loss": 0.18276691436767578, + "step": 3998 + }, + { + "epoch": 0.5406337135614702, + "grad_norm": 0.9808417558670044, + "learning_rate": 1.4702286912456052e-05, + "loss": 0.1619720458984375, + "step": 3999 + }, + { + "epoch": 0.5407689057879171, + "grad_norm": 1.5291649103164673, + "learning_rate": 1.4695443863474928e-05, + "loss": 0.1965045928955078, + "step": 4000 + }, + { + "epoch": 0.5409040980143641, + "grad_norm": 1.6361366510391235, + "learning_rate": 1.4688600877904012e-05, + "loss": 0.16273021697998047, + "step": 4001 + }, + { + "epoch": 0.5410392902408111, + "grad_norm": 1.2697741985321045, + "learning_rate": 1.468175795716805e-05, + "loss": 0.17620277404785156, + "step": 4002 + }, + { + "epoch": 0.5411744824672581, + "grad_norm": 1.1318798065185547, + "learning_rate": 1.4674915102691783e-05, + "loss": 0.19409847259521484, + "step": 4003 + }, + { + "epoch": 0.5413096746937052, + "grad_norm": 0.7105750441551208, + "learning_rate": 1.4668072315899926e-05, + "loss": 0.114227294921875, + "step": 4004 + }, + { + "epoch": 0.5414448669201521, + "grad_norm": 1.1478139162063599, + "learning_rate": 1.466122959821718e-05, + "loss": 0.2017955780029297, + "step": 4005 + }, + { + "epoch": 0.5415800591465991, + "grad_norm": 0.979746401309967, + "learning_rate": 1.4654386951068239e-05, + "loss": 0.1895599365234375, + "step": 4006 + }, + { + "epoch": 0.5417152513730461, + "grad_norm": 1.0143623352050781, + "learning_rate": 1.4647544375877776e-05, + "loss": 0.13962459564208984, + "step": 4007 + }, + { + "epoch": 0.5418504435994931, + "grad_norm": 1.5708460807800293, + "learning_rate": 1.4640701874070457e-05, + "loss": 0.19917583465576172, + "step": 4008 + }, + { + "epoch": 0.5419856358259401, + "grad_norm": 1.867903470993042, + "learning_rate": 1.4633859447070922e-05, + "loss": 0.21210479736328125, + "step": 4009 + }, + { + "epoch": 0.542120828052387, + "grad_norm": 1.0648705959320068, + "learning_rate": 1.4627017096303805e-05, + "loss": 0.21725082397460938, + "step": 4010 + }, + { + "epoch": 0.542256020278834, + "grad_norm": 2.2926185131073, + "learning_rate": 1.4620174823193711e-05, + "loss": 0.2010936737060547, + "step": 4011 + }, + { + "epoch": 0.542391212505281, + "grad_norm": 1.7901463508605957, + "learning_rate": 1.4613332629165249e-05, + "loss": 0.14879417419433594, + "step": 4012 + }, + { + "epoch": 0.542526404731728, + "grad_norm": 1.015810251235962, + "learning_rate": 1.4606490515642998e-05, + "loss": 0.16561293601989746, + "step": 4013 + }, + { + "epoch": 0.542661596958175, + "grad_norm": 1.0519723892211914, + "learning_rate": 1.4599648484051516e-05, + "loss": 0.2098369598388672, + "step": 4014 + }, + { + "epoch": 0.5427967891846219, + "grad_norm": 0.973822295665741, + "learning_rate": 1.4592806535815358e-05, + "loss": 0.18896007537841797, + "step": 4015 + }, + { + "epoch": 0.5429319814110689, + "grad_norm": 1.7192699909210205, + "learning_rate": 1.4585964672359045e-05, + "loss": 0.18070220947265625, + "step": 4016 + }, + { + "epoch": 0.5430671736375159, + "grad_norm": 1.3553988933563232, + "learning_rate": 1.4579122895107098e-05, + "loss": 0.1761951446533203, + "step": 4017 + }, + { + "epoch": 0.5432023658639629, + "grad_norm": 1.4251456260681152, + "learning_rate": 1.4572281205484012e-05, + "loss": 0.2001185417175293, + "step": 4018 + }, + { + "epoch": 0.5433375580904098, + "grad_norm": 0.7896031141281128, + "learning_rate": 1.4565439604914256e-05, + "loss": 0.1615065336227417, + "step": 4019 + }, + { + "epoch": 0.5434727503168568, + "grad_norm": 1.0697122812271118, + "learning_rate": 1.4558598094822294e-05, + "loss": 0.19288349151611328, + "step": 4020 + }, + { + "epoch": 0.5436079425433038, + "grad_norm": 0.9079411029815674, + "learning_rate": 1.455175667663256e-05, + "loss": 0.13894164562225342, + "step": 4021 + }, + { + "epoch": 0.5437431347697508, + "grad_norm": 0.5077937245368958, + "learning_rate": 1.4544915351769476e-05, + "loss": 0.08949291706085205, + "step": 4022 + }, + { + "epoch": 0.5438783269961978, + "grad_norm": 0.8175442218780518, + "learning_rate": 1.4538074121657448e-05, + "loss": 0.13100862503051758, + "step": 4023 + }, + { + "epoch": 0.5440135192226447, + "grad_norm": 1.2403984069824219, + "learning_rate": 1.4531232987720846e-05, + "loss": 0.2016468048095703, + "step": 4024 + }, + { + "epoch": 0.5441487114490917, + "grad_norm": 0.92755526304245, + "learning_rate": 1.4524391951384037e-05, + "loss": 0.19174766540527344, + "step": 4025 + }, + { + "epoch": 0.5442839036755387, + "grad_norm": 2.0467112064361572, + "learning_rate": 1.4517551014071358e-05, + "loss": 0.20962762832641602, + "step": 4026 + }, + { + "epoch": 0.5444190959019857, + "grad_norm": 0.8494007587432861, + "learning_rate": 1.4510710177207137e-05, + "loss": 0.15510821342468262, + "step": 4027 + }, + { + "epoch": 0.5445542881284327, + "grad_norm": 0.6568759679794312, + "learning_rate": 1.450386944221566e-05, + "loss": 0.11837959289550781, + "step": 4028 + }, + { + "epoch": 0.5446894803548796, + "grad_norm": 1.319154143333435, + "learning_rate": 1.449702881052121e-05, + "loss": 0.16934967041015625, + "step": 4029 + }, + { + "epoch": 0.5448246725813266, + "grad_norm": 1.1929186582565308, + "learning_rate": 1.4490188283548048e-05, + "loss": 0.20438766479492188, + "step": 4030 + }, + { + "epoch": 0.5449598648077736, + "grad_norm": 1.232987642288208, + "learning_rate": 1.44833478627204e-05, + "loss": 0.2279338836669922, + "step": 4031 + }, + { + "epoch": 0.5450950570342206, + "grad_norm": 0.8117069602012634, + "learning_rate": 1.447650754946249e-05, + "loss": 0.1895742416381836, + "step": 4032 + }, + { + "epoch": 0.5452302492606675, + "grad_norm": 1.0740529298782349, + "learning_rate": 1.4469667345198492e-05, + "loss": 0.19192028045654297, + "step": 4033 + }, + { + "epoch": 0.5453654414871145, + "grad_norm": 2.671614170074463, + "learning_rate": 1.446282725135258e-05, + "loss": 0.22579193115234375, + "step": 4034 + }, + { + "epoch": 0.5455006337135615, + "grad_norm": 1.099198579788208, + "learning_rate": 1.4455987269348904e-05, + "loss": 0.17916584014892578, + "step": 4035 + }, + { + "epoch": 0.5456358259400085, + "grad_norm": 1.3676178455352783, + "learning_rate": 1.4449147400611578e-05, + "loss": 0.186309814453125, + "step": 4036 + }, + { + "epoch": 0.5457710181664555, + "grad_norm": 0.7914870977401733, + "learning_rate": 1.4442307646564702e-05, + "loss": 0.17490005493164062, + "step": 4037 + }, + { + "epoch": 0.5459062103929024, + "grad_norm": 1.3446155786514282, + "learning_rate": 1.4435468008632345e-05, + "loss": 0.1973438262939453, + "step": 4038 + }, + { + "epoch": 0.5460414026193494, + "grad_norm": 0.9203872084617615, + "learning_rate": 1.4428628488238557e-05, + "loss": 0.17908954620361328, + "step": 4039 + }, + { + "epoch": 0.5461765948457964, + "grad_norm": 1.1400136947631836, + "learning_rate": 1.442178908680737e-05, + "loss": 0.21097850799560547, + "step": 4040 + }, + { + "epoch": 0.5463117870722434, + "grad_norm": 0.7576656937599182, + "learning_rate": 1.4414949805762779e-05, + "loss": 0.11867785453796387, + "step": 4041 + }, + { + "epoch": 0.5464469792986903, + "grad_norm": 0.8881217241287231, + "learning_rate": 1.4408110646528757e-05, + "loss": 0.1385340690612793, + "step": 4042 + }, + { + "epoch": 0.5465821715251373, + "grad_norm": 1.2633638381958008, + "learning_rate": 1.440127161052925e-05, + "loss": 0.19174587726593018, + "step": 4043 + }, + { + "epoch": 0.5467173637515843, + "grad_norm": 0.8522090911865234, + "learning_rate": 1.4394432699188188e-05, + "loss": 0.17963600158691406, + "step": 4044 + }, + { + "epoch": 0.5468525559780313, + "grad_norm": 0.7230494022369385, + "learning_rate": 1.4387593913929472e-05, + "loss": 0.10930228233337402, + "step": 4045 + }, + { + "epoch": 0.5469877482044783, + "grad_norm": 1.814190149307251, + "learning_rate": 1.4380755256176968e-05, + "loss": 0.1976947784423828, + "step": 4046 + }, + { + "epoch": 0.5471229404309252, + "grad_norm": 0.6891637444496155, + "learning_rate": 1.437391672735452e-05, + "loss": 0.13751459121704102, + "step": 4047 + }, + { + "epoch": 0.5472581326573722, + "grad_norm": 0.8325259685516357, + "learning_rate": 1.4367078328885946e-05, + "loss": 0.1578693389892578, + "step": 4048 + }, + { + "epoch": 0.5473933248838192, + "grad_norm": 1.03226900100708, + "learning_rate": 1.4360240062195039e-05, + "loss": 0.12672805786132812, + "step": 4049 + }, + { + "epoch": 0.5475285171102662, + "grad_norm": 0.7664415836334229, + "learning_rate": 1.435340192870557e-05, + "loss": 0.18357086181640625, + "step": 4050 + }, + { + "epoch": 0.5476637093367132, + "grad_norm": 1.0694026947021484, + "learning_rate": 1.434656392984126e-05, + "loss": 0.1797046661376953, + "step": 4051 + }, + { + "epoch": 0.5477989015631601, + "grad_norm": 3.5499792098999023, + "learning_rate": 1.4339726067025828e-05, + "loss": 0.2503662109375, + "step": 4052 + }, + { + "epoch": 0.5479340937896071, + "grad_norm": 1.7963324785232544, + "learning_rate": 1.4332888341682947e-05, + "loss": 0.2182636260986328, + "step": 4053 + }, + { + "epoch": 0.5480692860160541, + "grad_norm": 1.3704324960708618, + "learning_rate": 1.432605075523627e-05, + "loss": 0.24613571166992188, + "step": 4054 + }, + { + "epoch": 0.5482044782425011, + "grad_norm": 0.8698115944862366, + "learning_rate": 1.4319213309109426e-05, + "loss": 0.1654510498046875, + "step": 4055 + }, + { + "epoch": 0.548339670468948, + "grad_norm": 1.5419005155563354, + "learning_rate": 1.4312376004725996e-05, + "loss": 0.1967763900756836, + "step": 4056 + }, + { + "epoch": 0.548474862695395, + "grad_norm": 1.1779868602752686, + "learning_rate": 1.430553884350955e-05, + "loss": 0.1338977813720703, + "step": 4057 + }, + { + "epoch": 0.548610054921842, + "grad_norm": 1.7484750747680664, + "learning_rate": 1.429870182688362e-05, + "loss": 0.12793350219726562, + "step": 4058 + }, + { + "epoch": 0.548745247148289, + "grad_norm": 0.6911334991455078, + "learning_rate": 1.4291864956271713e-05, + "loss": 0.14394855499267578, + "step": 4059 + }, + { + "epoch": 0.548880439374736, + "grad_norm": 2.775631904602051, + "learning_rate": 1.4285028233097293e-05, + "loss": 0.19048500061035156, + "step": 4060 + }, + { + "epoch": 0.5490156316011829, + "grad_norm": 1.3515230417251587, + "learning_rate": 1.4278191658783809e-05, + "loss": 0.22266387939453125, + "step": 4061 + }, + { + "epoch": 0.5491508238276299, + "grad_norm": 1.1564401388168335, + "learning_rate": 1.427135523475467e-05, + "loss": 0.2068958282470703, + "step": 4062 + }, + { + "epoch": 0.5492860160540769, + "grad_norm": 1.4923704862594604, + "learning_rate": 1.4264518962433258e-05, + "loss": 0.2165374755859375, + "step": 4063 + }, + { + "epoch": 0.5494212082805239, + "grad_norm": 0.792876660823822, + "learning_rate": 1.4257682843242925e-05, + "loss": 0.16881990432739258, + "step": 4064 + }, + { + "epoch": 0.5495564005069709, + "grad_norm": 1.3603845834732056, + "learning_rate": 1.4250846878606974e-05, + "loss": 0.16334915161132812, + "step": 4065 + }, + { + "epoch": 0.5496915927334178, + "grad_norm": 0.7456710934638977, + "learning_rate": 1.4244011069948702e-05, + "loss": 0.1499490737915039, + "step": 4066 + }, + { + "epoch": 0.5498267849598648, + "grad_norm": 0.8540663719177246, + "learning_rate": 1.4237175418691357e-05, + "loss": 0.14925193786621094, + "step": 4067 + }, + { + "epoch": 0.5499619771863118, + "grad_norm": 1.8437916040420532, + "learning_rate": 1.4230339926258153e-05, + "loss": 0.1911764144897461, + "step": 4068 + }, + { + "epoch": 0.5500971694127588, + "grad_norm": 1.10478675365448, + "learning_rate": 1.422350459407229e-05, + "loss": 0.16003799438476562, + "step": 4069 + }, + { + "epoch": 0.5502323616392057, + "grad_norm": 1.7669198513031006, + "learning_rate": 1.4216669423556903e-05, + "loss": 0.19570636749267578, + "step": 4070 + }, + { + "epoch": 0.5503675538656527, + "grad_norm": 1.269566297531128, + "learning_rate": 1.420983441613512e-05, + "loss": 0.1048593521118164, + "step": 4071 + }, + { + "epoch": 0.5505027460920997, + "grad_norm": 1.4541712999343872, + "learning_rate": 1.420299957323003e-05, + "loss": 0.13025665283203125, + "step": 4072 + }, + { + "epoch": 0.5506379383185467, + "grad_norm": 2.567148208618164, + "learning_rate": 1.4196164896264679e-05, + "loss": 0.24313735961914062, + "step": 4073 + }, + { + "epoch": 0.5507731305449937, + "grad_norm": 1.2427109479904175, + "learning_rate": 1.418933038666208e-05, + "loss": 0.20119094848632812, + "step": 4074 + }, + { + "epoch": 0.5509083227714406, + "grad_norm": 1.6346986293792725, + "learning_rate": 1.4182496045845217e-05, + "loss": 0.1792583465576172, + "step": 4075 + }, + { + "epoch": 0.5510435149978876, + "grad_norm": 0.8594448566436768, + "learning_rate": 1.4175661875237036e-05, + "loss": 0.12098884582519531, + "step": 4076 + }, + { + "epoch": 0.5511787072243346, + "grad_norm": 1.3619225025177002, + "learning_rate": 1.416882787626045e-05, + "loss": 0.10511493682861328, + "step": 4077 + }, + { + "epoch": 0.5513138994507816, + "grad_norm": 1.0802544355392456, + "learning_rate": 1.4161994050338334e-05, + "loss": 0.15864920616149902, + "step": 4078 + }, + { + "epoch": 0.5514490916772286, + "grad_norm": 2.0858240127563477, + "learning_rate": 1.4155160398893528e-05, + "loss": 0.19161272048950195, + "step": 4079 + }, + { + "epoch": 0.5515842839036755, + "grad_norm": 1.680711030960083, + "learning_rate": 1.4148326923348824e-05, + "loss": 0.22324275970458984, + "step": 4080 + }, + { + "epoch": 0.5517194761301225, + "grad_norm": 1.3554102182388306, + "learning_rate": 1.4141493625127e-05, + "loss": 0.15987396240234375, + "step": 4081 + }, + { + "epoch": 0.5518546683565695, + "grad_norm": 1.3237963914871216, + "learning_rate": 1.4134660505650786e-05, + "loss": 0.19043636322021484, + "step": 4082 + }, + { + "epoch": 0.5519898605830165, + "grad_norm": 1.440773844718933, + "learning_rate": 1.4127827566342864e-05, + "loss": 0.14103984832763672, + "step": 4083 + }, + { + "epoch": 0.5521250528094634, + "grad_norm": 1.2290815114974976, + "learning_rate": 1.4120994808625896e-05, + "loss": 0.15516376495361328, + "step": 4084 + }, + { + "epoch": 0.5522602450359104, + "grad_norm": 0.8551283478736877, + "learning_rate": 1.4114162233922494e-05, + "loss": 0.1876659393310547, + "step": 4085 + }, + { + "epoch": 0.5523954372623574, + "grad_norm": 0.7810075283050537, + "learning_rate": 1.4107329843655238e-05, + "loss": 0.17477798461914062, + "step": 4086 + }, + { + "epoch": 0.5525306294888044, + "grad_norm": 1.6059144735336304, + "learning_rate": 1.4100497639246675e-05, + "loss": 0.1829977035522461, + "step": 4087 + }, + { + "epoch": 0.5526658217152514, + "grad_norm": 1.0176339149475098, + "learning_rate": 1.4093665622119294e-05, + "loss": 0.16252994537353516, + "step": 4088 + }, + { + "epoch": 0.5528010139416983, + "grad_norm": 0.9489703178405762, + "learning_rate": 1.4086833793695566e-05, + "loss": 0.16922378540039062, + "step": 4089 + }, + { + "epoch": 0.5529362061681453, + "grad_norm": 1.301771640777588, + "learning_rate": 1.408000215539791e-05, + "loss": 0.15185487270355225, + "step": 4090 + }, + { + "epoch": 0.5530713983945923, + "grad_norm": 0.7590004801750183, + "learning_rate": 1.4073170708648711e-05, + "loss": 0.1298379898071289, + "step": 4091 + }, + { + "epoch": 0.5532065906210393, + "grad_norm": 1.467963695526123, + "learning_rate": 1.406633945487032e-05, + "loss": 0.21006393432617188, + "step": 4092 + }, + { + "epoch": 0.5533417828474863, + "grad_norm": 1.3346192836761475, + "learning_rate": 1.4059508395485026e-05, + "loss": 0.1307516098022461, + "step": 4093 + }, + { + "epoch": 0.5534769750739332, + "grad_norm": 0.5424954295158386, + "learning_rate": 1.4052677531915102e-05, + "loss": 0.14656639099121094, + "step": 4094 + }, + { + "epoch": 0.5536121673003802, + "grad_norm": 1.3153436183929443, + "learning_rate": 1.4045846865582765e-05, + "loss": 0.2225494384765625, + "step": 4095 + }, + { + "epoch": 0.5537473595268272, + "grad_norm": 1.6059702634811401, + "learning_rate": 1.4039016397910206e-05, + "loss": 0.1785566806793213, + "step": 4096 + }, + { + "epoch": 0.5538825517532742, + "grad_norm": 1.0207138061523438, + "learning_rate": 1.403218613031955e-05, + "loss": 0.1328721046447754, + "step": 4097 + }, + { + "epoch": 0.5540177439797211, + "grad_norm": 0.9160784482955933, + "learning_rate": 1.4025356064232903e-05, + "loss": 0.128265380859375, + "step": 4098 + }, + { + "epoch": 0.5541529362061681, + "grad_norm": 1.027923583984375, + "learning_rate": 1.4018526201072324e-05, + "loss": 0.186886727809906, + "step": 4099 + }, + { + "epoch": 0.5542881284326151, + "grad_norm": 0.7982254028320312, + "learning_rate": 1.4011696542259821e-05, + "loss": 0.17944121360778809, + "step": 4100 + }, + { + "epoch": 0.5544233206590621, + "grad_norm": 1.5268313884735107, + "learning_rate": 1.4004867089217376e-05, + "loss": 0.1860370635986328, + "step": 4101 + }, + { + "epoch": 0.5545585128855091, + "grad_norm": 1.6268715858459473, + "learning_rate": 1.39980378433669e-05, + "loss": 0.15576553344726562, + "step": 4102 + }, + { + "epoch": 0.554693705111956, + "grad_norm": 1.6642669439315796, + "learning_rate": 1.399120880613029e-05, + "loss": 0.19647693634033203, + "step": 4103 + }, + { + "epoch": 0.554828897338403, + "grad_norm": 0.889184296131134, + "learning_rate": 1.3984379978929388e-05, + "loss": 0.12539100646972656, + "step": 4104 + }, + { + "epoch": 0.55496408956485, + "grad_norm": 1.6981139183044434, + "learning_rate": 1.3977551363185995e-05, + "loss": 0.16975784301757812, + "step": 4105 + }, + { + "epoch": 0.555099281791297, + "grad_norm": 0.9955816268920898, + "learning_rate": 1.3970722960321854e-05, + "loss": 0.1915283203125, + "step": 4106 + }, + { + "epoch": 0.555234474017744, + "grad_norm": 1.5031569004058838, + "learning_rate": 1.3963894771758682e-05, + "loss": 0.09496653079986572, + "step": 4107 + }, + { + "epoch": 0.5553696662441909, + "grad_norm": 0.4611392617225647, + "learning_rate": 1.3957066798918143e-05, + "loss": 0.10890722274780273, + "step": 4108 + }, + { + "epoch": 0.5555048584706379, + "grad_norm": 0.6820783615112305, + "learning_rate": 1.3950239043221861e-05, + "loss": 0.1425970196723938, + "step": 4109 + }, + { + "epoch": 0.5556400506970849, + "grad_norm": 1.1353121995925903, + "learning_rate": 1.3943411506091408e-05, + "loss": 0.15273046493530273, + "step": 4110 + }, + { + "epoch": 0.5557752429235319, + "grad_norm": 1.0135598182678223, + "learning_rate": 1.3936584188948313e-05, + "loss": 0.1280224323272705, + "step": 4111 + }, + { + "epoch": 0.5559104351499788, + "grad_norm": 1.030727744102478, + "learning_rate": 1.3929757093214059e-05, + "loss": 0.14476251602172852, + "step": 4112 + }, + { + "epoch": 0.5560456273764258, + "grad_norm": 1.4061270952224731, + "learning_rate": 1.3922930220310085e-05, + "loss": 0.20631122589111328, + "step": 4113 + }, + { + "epoch": 0.5561808196028728, + "grad_norm": 1.3502142429351807, + "learning_rate": 1.3916103571657786e-05, + "loss": 0.2161102294921875, + "step": 4114 + }, + { + "epoch": 0.5563160118293198, + "grad_norm": 3.845872402191162, + "learning_rate": 1.3909277148678504e-05, + "loss": 0.26122093200683594, + "step": 4115 + }, + { + "epoch": 0.5564512040557668, + "grad_norm": 1.0589784383773804, + "learning_rate": 1.3902450952793536e-05, + "loss": 0.20694923400878906, + "step": 4116 + }, + { + "epoch": 0.5565863962822137, + "grad_norm": 1.228968620300293, + "learning_rate": 1.389562498542413e-05, + "loss": 0.1755162477493286, + "step": 4117 + }, + { + "epoch": 0.5567215885086607, + "grad_norm": 1.1686471700668335, + "learning_rate": 1.388879924799149e-05, + "loss": 0.2123098373413086, + "step": 4118 + }, + { + "epoch": 0.5568567807351077, + "grad_norm": 2.5104527473449707, + "learning_rate": 1.388197374191678e-05, + "loss": 0.18030071258544922, + "step": 4119 + }, + { + "epoch": 0.5569919729615547, + "grad_norm": 1.3864128589630127, + "learning_rate": 1.387514846862109e-05, + "loss": 0.19811248779296875, + "step": 4120 + }, + { + "epoch": 0.5571271651880016, + "grad_norm": 1.5137351751327515, + "learning_rate": 1.3868323429525492e-05, + "loss": 0.17752790451049805, + "step": 4121 + }, + { + "epoch": 0.5572623574144486, + "grad_norm": 1.1227834224700928, + "learning_rate": 1.3861498626050986e-05, + "loss": 0.20056915283203125, + "step": 4122 + }, + { + "epoch": 0.5573975496408956, + "grad_norm": 0.5824602842330933, + "learning_rate": 1.385467405961854e-05, + "loss": 0.0982813835144043, + "step": 4123 + }, + { + "epoch": 0.5575327418673426, + "grad_norm": 0.7411953210830688, + "learning_rate": 1.3847849731649066e-05, + "loss": 0.1483306884765625, + "step": 4124 + }, + { + "epoch": 0.5576679340937896, + "grad_norm": 1.3560596704483032, + "learning_rate": 1.3841025643563418e-05, + "loss": 0.17437362670898438, + "step": 4125 + }, + { + "epoch": 0.5578031263202365, + "grad_norm": 1.6405913829803467, + "learning_rate": 1.3834201796782413e-05, + "loss": 0.16826295852661133, + "step": 4126 + }, + { + "epoch": 0.5579383185466835, + "grad_norm": 1.0828807353973389, + "learning_rate": 1.3827378192726808e-05, + "loss": 0.23982620239257812, + "step": 4127 + }, + { + "epoch": 0.5580735107731305, + "grad_norm": 0.6675437688827515, + "learning_rate": 1.3820554832817324e-05, + "loss": 0.13168621063232422, + "step": 4128 + }, + { + "epoch": 0.5582087029995775, + "grad_norm": 1.4097481966018677, + "learning_rate": 1.3813731718474606e-05, + "loss": 0.14971518516540527, + "step": 4129 + }, + { + "epoch": 0.5583438952260245, + "grad_norm": 0.921987771987915, + "learning_rate": 1.380690885111927e-05, + "loss": 0.22249794006347656, + "step": 4130 + }, + { + "epoch": 0.5584790874524714, + "grad_norm": 1.6194636821746826, + "learning_rate": 1.3800086232171877e-05, + "loss": 0.19646787643432617, + "step": 4131 + }, + { + "epoch": 0.5586142796789184, + "grad_norm": 0.6181746125221252, + "learning_rate": 1.3793263863052926e-05, + "loss": 0.1591939926147461, + "step": 4132 + }, + { + "epoch": 0.5587494719053654, + "grad_norm": 0.7877742648124695, + "learning_rate": 1.3786441745182881e-05, + "loss": 0.17911720275878906, + "step": 4133 + }, + { + "epoch": 0.5588846641318124, + "grad_norm": 2.1206464767456055, + "learning_rate": 1.3779619879982127e-05, + "loss": 0.17891883850097656, + "step": 4134 + }, + { + "epoch": 0.5590198563582593, + "grad_norm": 0.9240705966949463, + "learning_rate": 1.3772798268871025e-05, + "loss": 0.1595001220703125, + "step": 4135 + }, + { + "epoch": 0.5591550485847063, + "grad_norm": 0.9859445691108704, + "learning_rate": 1.376597691326987e-05, + "loss": 0.1372842788696289, + "step": 4136 + }, + { + "epoch": 0.5592902408111533, + "grad_norm": 1.3918176889419556, + "learning_rate": 1.3759155814598898e-05, + "loss": 0.1839895248413086, + "step": 4137 + }, + { + "epoch": 0.5594254330376003, + "grad_norm": 2.131326675415039, + "learning_rate": 1.3752334974278308e-05, + "loss": 0.19667625427246094, + "step": 4138 + }, + { + "epoch": 0.5595606252640473, + "grad_norm": 1.300350308418274, + "learning_rate": 1.3745514393728225e-05, + "loss": 0.15147972106933594, + "step": 4139 + }, + { + "epoch": 0.5596958174904944, + "grad_norm": 0.7752305865287781, + "learning_rate": 1.3738694074368735e-05, + "loss": 0.09185075759887695, + "step": 4140 + }, + { + "epoch": 0.5598310097169413, + "grad_norm": 1.0831496715545654, + "learning_rate": 1.3731874017619868e-05, + "loss": 0.16763591766357422, + "step": 4141 + }, + { + "epoch": 0.5599662019433883, + "grad_norm": 0.7103542685508728, + "learning_rate": 1.3725054224901597e-05, + "loss": 0.12990140914916992, + "step": 4142 + }, + { + "epoch": 0.5601013941698353, + "grad_norm": 1.1584399938583374, + "learning_rate": 1.3718234697633826e-05, + "loss": 0.1871471405029297, + "step": 4143 + }, + { + "epoch": 0.5602365863962823, + "grad_norm": 1.0294724702835083, + "learning_rate": 1.3711415437236427e-05, + "loss": 0.1923675537109375, + "step": 4144 + }, + { + "epoch": 0.5603717786227292, + "grad_norm": 0.7195061445236206, + "learning_rate": 1.3704596445129207e-05, + "loss": 0.17370319366455078, + "step": 4145 + }, + { + "epoch": 0.5605069708491762, + "grad_norm": 0.8394173979759216, + "learning_rate": 1.369777772273192e-05, + "loss": 0.15137863159179688, + "step": 4146 + }, + { + "epoch": 0.5606421630756232, + "grad_norm": 1.0501354932785034, + "learning_rate": 1.369095927146425e-05, + "loss": 0.15513992309570312, + "step": 4147 + }, + { + "epoch": 0.5607773553020702, + "grad_norm": 0.7999750375747681, + "learning_rate": 1.3684141092745846e-05, + "loss": 0.1793060302734375, + "step": 4148 + }, + { + "epoch": 0.5609125475285172, + "grad_norm": 1.0264382362365723, + "learning_rate": 1.3677323187996276e-05, + "loss": 0.16245555877685547, + "step": 4149 + }, + { + "epoch": 0.5610477397549641, + "grad_norm": 0.9214684367179871, + "learning_rate": 1.3670505558635074e-05, + "loss": 0.1819000244140625, + "step": 4150 + }, + { + "epoch": 0.5611829319814111, + "grad_norm": 1.9814612865447998, + "learning_rate": 1.366368820608171e-05, + "loss": 0.1803140640258789, + "step": 4151 + }, + { + "epoch": 0.5613181242078581, + "grad_norm": 0.5672913193702698, + "learning_rate": 1.365687113175558e-05, + "loss": 0.12874889373779297, + "step": 4152 + }, + { + "epoch": 0.5614533164343051, + "grad_norm": 0.7467692494392395, + "learning_rate": 1.3650054337076049e-05, + "loss": 0.15250778198242188, + "step": 4153 + }, + { + "epoch": 0.561588508660752, + "grad_norm": 0.9375015497207642, + "learning_rate": 1.3643237823462398e-05, + "loss": 0.15077543258666992, + "step": 4154 + }, + { + "epoch": 0.561723700887199, + "grad_norm": 1.0464015007019043, + "learning_rate": 1.363642159233387e-05, + "loss": 0.18739771842956543, + "step": 4155 + }, + { + "epoch": 0.561858893113646, + "grad_norm": 1.2129905223846436, + "learning_rate": 1.3629605645109642e-05, + "loss": 0.13581228256225586, + "step": 4156 + }, + { + "epoch": 0.561994085340093, + "grad_norm": 1.5026555061340332, + "learning_rate": 1.362278998320882e-05, + "loss": 0.2496814727783203, + "step": 4157 + }, + { + "epoch": 0.56212927756654, + "grad_norm": 1.0084865093231201, + "learning_rate": 1.3615974608050472e-05, + "loss": 0.19599342346191406, + "step": 4158 + }, + { + "epoch": 0.5622644697929869, + "grad_norm": 0.9029825329780579, + "learning_rate": 1.3609159521053588e-05, + "loss": 0.2166290283203125, + "step": 4159 + }, + { + "epoch": 0.5623996620194339, + "grad_norm": 2.849496841430664, + "learning_rate": 1.3602344723637107e-05, + "loss": 0.20038199424743652, + "step": 4160 + }, + { + "epoch": 0.5625348542458809, + "grad_norm": 0.9239959120750427, + "learning_rate": 1.3595530217219916e-05, + "loss": 0.1507434844970703, + "step": 4161 + }, + { + "epoch": 0.5626700464723279, + "grad_norm": 0.9023704528808594, + "learning_rate": 1.3588716003220815e-05, + "loss": 0.19112777709960938, + "step": 4162 + }, + { + "epoch": 0.5628052386987749, + "grad_norm": 0.9706947803497314, + "learning_rate": 1.3581902083058574e-05, + "loss": 0.16707897186279297, + "step": 4163 + }, + { + "epoch": 0.5629404309252218, + "grad_norm": 0.8416209816932678, + "learning_rate": 1.3575088458151877e-05, + "loss": 0.1408233642578125, + "step": 4164 + }, + { + "epoch": 0.5630756231516688, + "grad_norm": 1.327923059463501, + "learning_rate": 1.3568275129919367e-05, + "loss": 0.19009003043174744, + "step": 4165 + }, + { + "epoch": 0.5632108153781158, + "grad_norm": 0.9411091804504395, + "learning_rate": 1.3561462099779604e-05, + "loss": 0.18981170654296875, + "step": 4166 + }, + { + "epoch": 0.5633460076045628, + "grad_norm": 0.9290975332260132, + "learning_rate": 1.3554649369151104e-05, + "loss": 0.18311023712158203, + "step": 4167 + }, + { + "epoch": 0.5634811998310097, + "grad_norm": 1.2058131694793701, + "learning_rate": 1.3547836939452315e-05, + "loss": 0.1944713592529297, + "step": 4168 + }, + { + "epoch": 0.5636163920574567, + "grad_norm": 1.922459363937378, + "learning_rate": 1.3541024812101615e-05, + "loss": 0.18303394317626953, + "step": 4169 + }, + { + "epoch": 0.5637515842839037, + "grad_norm": 0.9371699690818787, + "learning_rate": 1.3534212988517339e-05, + "loss": 0.20040130615234375, + "step": 4170 + }, + { + "epoch": 0.5638867765103507, + "grad_norm": 0.8874692916870117, + "learning_rate": 1.3527401470117726e-05, + "loss": 0.16982078552246094, + "step": 4171 + }, + { + "epoch": 0.5640219687367977, + "grad_norm": 1.2609591484069824, + "learning_rate": 1.3520590258320981e-05, + "loss": 0.12705135345458984, + "step": 4172 + }, + { + "epoch": 0.5641571609632446, + "grad_norm": 0.6744574904441833, + "learning_rate": 1.3513779354545235e-05, + "loss": 0.145172119140625, + "step": 4173 + }, + { + "epoch": 0.5642923531896916, + "grad_norm": 0.9199692010879517, + "learning_rate": 1.3506968760208557e-05, + "loss": 0.18248939514160156, + "step": 4174 + }, + { + "epoch": 0.5644275454161386, + "grad_norm": 1.445523977279663, + "learning_rate": 1.3500158476728938e-05, + "loss": 0.13695749640464783, + "step": 4175 + }, + { + "epoch": 0.5645627376425856, + "grad_norm": 2.014815092086792, + "learning_rate": 1.3493348505524325e-05, + "loss": 0.16543793678283691, + "step": 4176 + }, + { + "epoch": 0.5646979298690326, + "grad_norm": 1.3540127277374268, + "learning_rate": 1.3486538848012586e-05, + "loss": 0.16884231567382812, + "step": 4177 + }, + { + "epoch": 0.5648331220954795, + "grad_norm": 0.8074826598167419, + "learning_rate": 1.3479729505611532e-05, + "loss": 0.18150711059570312, + "step": 4178 + }, + { + "epoch": 0.5649683143219265, + "grad_norm": 1.3512455224990845, + "learning_rate": 1.3472920479738906e-05, + "loss": 0.18186187744140625, + "step": 4179 + }, + { + "epoch": 0.5651035065483735, + "grad_norm": 2.8973007202148438, + "learning_rate": 1.346611177181237e-05, + "loss": 0.18483352661132812, + "step": 4180 + }, + { + "epoch": 0.5652386987748205, + "grad_norm": 1.1826560497283936, + "learning_rate": 1.3459303383249547e-05, + "loss": 0.17414379119873047, + "step": 4181 + }, + { + "epoch": 0.5653738910012674, + "grad_norm": 0.9242448806762695, + "learning_rate": 1.3452495315467975e-05, + "loss": 0.2081432342529297, + "step": 4182 + }, + { + "epoch": 0.5655090832277144, + "grad_norm": 1.034800410270691, + "learning_rate": 1.3445687569885132e-05, + "loss": 0.14751625061035156, + "step": 4183 + }, + { + "epoch": 0.5656442754541614, + "grad_norm": 0.6133968234062195, + "learning_rate": 1.3438880147918429e-05, + "loss": 0.10791802406311035, + "step": 4184 + }, + { + "epoch": 0.5657794676806084, + "grad_norm": 0.7634124159812927, + "learning_rate": 1.3432073050985201e-05, + "loss": 0.1399097442626953, + "step": 4185 + }, + { + "epoch": 0.5659146599070554, + "grad_norm": 1.112557053565979, + "learning_rate": 1.3425266280502721e-05, + "loss": 0.16903066635131836, + "step": 4186 + }, + { + "epoch": 0.5660498521335023, + "grad_norm": 1.5856046676635742, + "learning_rate": 1.3418459837888202e-05, + "loss": 0.20407485961914062, + "step": 4187 + }, + { + "epoch": 0.5661850443599493, + "grad_norm": 1.1973490715026855, + "learning_rate": 1.3411653724558784e-05, + "loss": 0.1922316551208496, + "step": 4188 + }, + { + "epoch": 0.5663202365863963, + "grad_norm": 1.8968784809112549, + "learning_rate": 1.3404847941931523e-05, + "loss": 0.21197509765625, + "step": 4189 + }, + { + "epoch": 0.5664554288128433, + "grad_norm": 0.8597639799118042, + "learning_rate": 1.339804249142343e-05, + "loss": 0.20575904846191406, + "step": 4190 + }, + { + "epoch": 0.5665906210392903, + "grad_norm": 0.7887629866600037, + "learning_rate": 1.3391237374451429e-05, + "loss": 0.13595199584960938, + "step": 4191 + }, + { + "epoch": 0.5667258132657372, + "grad_norm": 1.6754926443099976, + "learning_rate": 1.3384432592432388e-05, + "loss": 0.2043018341064453, + "step": 4192 + }, + { + "epoch": 0.5668610054921842, + "grad_norm": 2.6410346031188965, + "learning_rate": 1.3377628146783102e-05, + "loss": 0.23443031311035156, + "step": 4193 + }, + { + "epoch": 0.5669961977186312, + "grad_norm": 1.1812888383865356, + "learning_rate": 1.3370824038920281e-05, + "loss": 0.1658635139465332, + "step": 4194 + }, + { + "epoch": 0.5671313899450782, + "grad_norm": 1.013667345046997, + "learning_rate": 1.3364020270260586e-05, + "loss": 0.2060832977294922, + "step": 4195 + }, + { + "epoch": 0.5672665821715251, + "grad_norm": 1.507516622543335, + "learning_rate": 1.335721684222059e-05, + "loss": 0.19167089462280273, + "step": 4196 + }, + { + "epoch": 0.5674017743979721, + "grad_norm": 0.7882696390151978, + "learning_rate": 1.3350413756216816e-05, + "loss": 0.13694250583648682, + "step": 4197 + }, + { + "epoch": 0.5675369666244191, + "grad_norm": 1.3457995653152466, + "learning_rate": 1.334361101366569e-05, + "loss": 0.18742942810058594, + "step": 4198 + }, + { + "epoch": 0.5676721588508661, + "grad_norm": 1.2155758142471313, + "learning_rate": 1.3336808615983582e-05, + "loss": 0.16469287872314453, + "step": 4199 + }, + { + "epoch": 0.5678073510773131, + "grad_norm": 2.1494460105895996, + "learning_rate": 1.3330006564586791e-05, + "loss": 0.21450424194335938, + "step": 4200 + }, + { + "epoch": 0.56794254330376, + "grad_norm": 4.148214817047119, + "learning_rate": 1.3323204860891539e-05, + "loss": 0.278839111328125, + "step": 4201 + }, + { + "epoch": 0.568077735530207, + "grad_norm": 0.7228233218193054, + "learning_rate": 1.3316403506313981e-05, + "loss": 0.16372346878051758, + "step": 4202 + }, + { + "epoch": 0.568212927756654, + "grad_norm": 0.6536335945129395, + "learning_rate": 1.3309602502270184e-05, + "loss": 0.13706541061401367, + "step": 4203 + }, + { + "epoch": 0.568348119983101, + "grad_norm": 1.010862112045288, + "learning_rate": 1.3302801850176161e-05, + "loss": 0.14612197875976562, + "step": 4204 + }, + { + "epoch": 0.568483312209548, + "grad_norm": 1.074796199798584, + "learning_rate": 1.3296001551447848e-05, + "loss": 0.1476306915283203, + "step": 4205 + }, + { + "epoch": 0.5686185044359949, + "grad_norm": 2.4724109172821045, + "learning_rate": 1.32892016075011e-05, + "loss": 0.21670913696289062, + "step": 4206 + }, + { + "epoch": 0.5687536966624419, + "grad_norm": 1.6053893566131592, + "learning_rate": 1.3282402019751694e-05, + "loss": 0.1446981430053711, + "step": 4207 + }, + { + "epoch": 0.5688888888888889, + "grad_norm": 0.9083832502365112, + "learning_rate": 1.327560278961535e-05, + "loss": 0.15733730792999268, + "step": 4208 + }, + { + "epoch": 0.5690240811153359, + "grad_norm": 1.1596590280532837, + "learning_rate": 1.3268803918507699e-05, + "loss": 0.16233444213867188, + "step": 4209 + }, + { + "epoch": 0.5691592733417828, + "grad_norm": 1.3215627670288086, + "learning_rate": 1.3262005407844306e-05, + "loss": 0.14342010021209717, + "step": 4210 + }, + { + "epoch": 0.5692944655682298, + "grad_norm": 0.9823212027549744, + "learning_rate": 1.325520725904066e-05, + "loss": 0.15561389923095703, + "step": 4211 + }, + { + "epoch": 0.5694296577946768, + "grad_norm": 0.7310113906860352, + "learning_rate": 1.3248409473512158e-05, + "loss": 0.1578388214111328, + "step": 4212 + }, + { + "epoch": 0.5695648500211238, + "grad_norm": 3.474916696548462, + "learning_rate": 1.3241612052674146e-05, + "loss": 0.2415449619293213, + "step": 4213 + }, + { + "epoch": 0.5697000422475708, + "grad_norm": 1.611437201499939, + "learning_rate": 1.3234814997941883e-05, + "loss": 0.22538185119628906, + "step": 4214 + }, + { + "epoch": 0.5698352344740177, + "grad_norm": 0.7980181574821472, + "learning_rate": 1.322801831073055e-05, + "loss": 0.14082515239715576, + "step": 4215 + }, + { + "epoch": 0.5699704267004647, + "grad_norm": 0.9239475131034851, + "learning_rate": 1.322122199245526e-05, + "loss": 0.16942787170410156, + "step": 4216 + }, + { + "epoch": 0.5701056189269117, + "grad_norm": 0.9296362996101379, + "learning_rate": 1.321442604453103e-05, + "loss": 0.16514015197753906, + "step": 4217 + }, + { + "epoch": 0.5702408111533587, + "grad_norm": 2.047433376312256, + "learning_rate": 1.320763046837282e-05, + "loss": 0.20308876037597656, + "step": 4218 + }, + { + "epoch": 0.5703760033798057, + "grad_norm": 2.0884346961975098, + "learning_rate": 1.3200835265395504e-05, + "loss": 0.18378257751464844, + "step": 4219 + }, + { + "epoch": 0.5705111956062526, + "grad_norm": 1.5572584867477417, + "learning_rate": 1.3194040437013885e-05, + "loss": 0.2159261703491211, + "step": 4220 + }, + { + "epoch": 0.5706463878326996, + "grad_norm": 1.6841769218444824, + "learning_rate": 1.3187245984642673e-05, + "loss": 0.17226409912109375, + "step": 4221 + }, + { + "epoch": 0.5707815800591466, + "grad_norm": 0.8878856897354126, + "learning_rate": 1.3180451909696517e-05, + "loss": 0.134521484375, + "step": 4222 + }, + { + "epoch": 0.5709167722855936, + "grad_norm": 1.563896656036377, + "learning_rate": 1.3173658213589972e-05, + "loss": 0.2036285400390625, + "step": 4223 + }, + { + "epoch": 0.5710519645120405, + "grad_norm": 1.085018277168274, + "learning_rate": 1.3166864897737526e-05, + "loss": 0.15758800506591797, + "step": 4224 + }, + { + "epoch": 0.5711871567384875, + "grad_norm": 0.744596004486084, + "learning_rate": 1.3160071963553593e-05, + "loss": 0.15124034881591797, + "step": 4225 + }, + { + "epoch": 0.5713223489649345, + "grad_norm": 2.013113260269165, + "learning_rate": 1.315327941245248e-05, + "loss": 0.1838216781616211, + "step": 4226 + }, + { + "epoch": 0.5714575411913815, + "grad_norm": 2.0539445877075195, + "learning_rate": 1.3146487245848445e-05, + "loss": 0.1830911636352539, + "step": 4227 + }, + { + "epoch": 0.5715927334178285, + "grad_norm": 2.651798725128174, + "learning_rate": 1.3139695465155645e-05, + "loss": 0.1342296600341797, + "step": 4228 + }, + { + "epoch": 0.5717279256442754, + "grad_norm": 1.0911517143249512, + "learning_rate": 1.3132904071788177e-05, + "loss": 0.17379283905029297, + "step": 4229 + }, + { + "epoch": 0.5718631178707224, + "grad_norm": 1.005952000617981, + "learning_rate": 1.3126113067160031e-05, + "loss": 0.1800060272216797, + "step": 4230 + }, + { + "epoch": 0.5719983100971694, + "grad_norm": 1.9741744995117188, + "learning_rate": 1.3119322452685139e-05, + "loss": 0.2034893035888672, + "step": 4231 + }, + { + "epoch": 0.5721335023236164, + "grad_norm": 1.0766539573669434, + "learning_rate": 1.3112532229777344e-05, + "loss": 0.20059823989868164, + "step": 4232 + }, + { + "epoch": 0.5722686945500634, + "grad_norm": 1.4954265356063843, + "learning_rate": 1.3105742399850399e-05, + "loss": 0.21318721771240234, + "step": 4233 + }, + { + "epoch": 0.5724038867765103, + "grad_norm": 0.8464428186416626, + "learning_rate": 1.3098952964317996e-05, + "loss": 0.12308788299560547, + "step": 4234 + }, + { + "epoch": 0.5725390790029573, + "grad_norm": 0.996900200843811, + "learning_rate": 1.3092163924593717e-05, + "loss": 0.19365215301513672, + "step": 4235 + }, + { + "epoch": 0.5726742712294043, + "grad_norm": 1.1676437854766846, + "learning_rate": 1.308537528209108e-05, + "loss": 0.1657238006591797, + "step": 4236 + }, + { + "epoch": 0.5728094634558513, + "grad_norm": 1.6026939153671265, + "learning_rate": 1.3078587038223525e-05, + "loss": 0.20125532150268555, + "step": 4237 + }, + { + "epoch": 0.5729446556822982, + "grad_norm": 1.07183039188385, + "learning_rate": 1.3071799194404392e-05, + "loss": 0.2249908447265625, + "step": 4238 + }, + { + "epoch": 0.5730798479087452, + "grad_norm": 1.5556108951568604, + "learning_rate": 1.3065011752046955e-05, + "loss": 0.20391273498535156, + "step": 4239 + }, + { + "epoch": 0.5732150401351922, + "grad_norm": 1.349663257598877, + "learning_rate": 1.3058224712564382e-05, + "loss": 0.14986801147460938, + "step": 4240 + }, + { + "epoch": 0.5733502323616392, + "grad_norm": 0.6696560382843018, + "learning_rate": 1.305143807736978e-05, + "loss": 0.17470741271972656, + "step": 4241 + }, + { + "epoch": 0.5734854245880862, + "grad_norm": 1.268415093421936, + "learning_rate": 1.3044651847876163e-05, + "loss": 0.233642578125, + "step": 4242 + }, + { + "epoch": 0.5736206168145331, + "grad_norm": 0.8891600370407104, + "learning_rate": 1.3037866025496466e-05, + "loss": 0.22421646118164062, + "step": 4243 + }, + { + "epoch": 0.5737558090409801, + "grad_norm": 0.9408543705940247, + "learning_rate": 1.3031080611643514e-05, + "loss": 0.1760845184326172, + "step": 4244 + }, + { + "epoch": 0.5738910012674271, + "grad_norm": 1.0780693292617798, + "learning_rate": 1.3024295607730083e-05, + "loss": 0.16676855087280273, + "step": 4245 + }, + { + "epoch": 0.5740261934938741, + "grad_norm": 1.180047631263733, + "learning_rate": 1.301751101516884e-05, + "loss": 0.19536399841308594, + "step": 4246 + }, + { + "epoch": 0.574161385720321, + "grad_norm": 0.9813616275787354, + "learning_rate": 1.3010726835372377e-05, + "loss": 0.17902040481567383, + "step": 4247 + }, + { + "epoch": 0.574296577946768, + "grad_norm": 1.3495491743087769, + "learning_rate": 1.30039430697532e-05, + "loss": 0.23540306091308594, + "step": 4248 + }, + { + "epoch": 0.574431770173215, + "grad_norm": 1.459987998008728, + "learning_rate": 1.2997159719723713e-05, + "loss": 0.16062259674072266, + "step": 4249 + }, + { + "epoch": 0.574566962399662, + "grad_norm": 0.9632294178009033, + "learning_rate": 1.2990376786696254e-05, + "loss": 0.19474411010742188, + "step": 4250 + }, + { + "epoch": 0.574702154626109, + "grad_norm": 1.340670108795166, + "learning_rate": 1.2983594272083063e-05, + "loss": 0.2207050323486328, + "step": 4251 + }, + { + "epoch": 0.5748373468525559, + "grad_norm": 1.134849190711975, + "learning_rate": 1.2976812177296307e-05, + "loss": 0.17090415954589844, + "step": 4252 + }, + { + "epoch": 0.5749725390790029, + "grad_norm": 1.022345781326294, + "learning_rate": 1.2970030503748039e-05, + "loss": 0.1649923324584961, + "step": 4253 + }, + { + "epoch": 0.5751077313054499, + "grad_norm": 0.7901409864425659, + "learning_rate": 1.2963249252850242e-05, + "loss": 0.12773704528808594, + "step": 4254 + }, + { + "epoch": 0.5752429235318969, + "grad_norm": 0.9397100806236267, + "learning_rate": 1.295646842601481e-05, + "loss": 0.1429595947265625, + "step": 4255 + }, + { + "epoch": 0.5753781157583439, + "grad_norm": 2.5093753337860107, + "learning_rate": 1.294968802465355e-05, + "loss": 0.2624092102050781, + "step": 4256 + }, + { + "epoch": 0.5755133079847908, + "grad_norm": 1.3152304887771606, + "learning_rate": 1.2942908050178187e-05, + "loss": 0.15006160736083984, + "step": 4257 + }, + { + "epoch": 0.5756485002112378, + "grad_norm": 1.5723315477371216, + "learning_rate": 1.293612850400033e-05, + "loss": 0.16363239288330078, + "step": 4258 + }, + { + "epoch": 0.5757836924376848, + "grad_norm": 1.3714478015899658, + "learning_rate": 1.2929349387531525e-05, + "loss": 0.24256324768066406, + "step": 4259 + }, + { + "epoch": 0.5759188846641318, + "grad_norm": 2.4423773288726807, + "learning_rate": 1.2922570702183217e-05, + "loss": 0.1466083526611328, + "step": 4260 + }, + { + "epoch": 0.5760540768905787, + "grad_norm": 0.7902779579162598, + "learning_rate": 1.2915792449366768e-05, + "loss": 0.17400836944580078, + "step": 4261 + }, + { + "epoch": 0.5761892691170257, + "grad_norm": 1.0598139762878418, + "learning_rate": 1.2909014630493451e-05, + "loss": 0.15865302085876465, + "step": 4262 + }, + { + "epoch": 0.5763244613434727, + "grad_norm": 1.1641024351119995, + "learning_rate": 1.2902237246974432e-05, + "loss": 0.18436670303344727, + "step": 4263 + }, + { + "epoch": 0.5764596535699197, + "grad_norm": 1.0875691175460815, + "learning_rate": 1.289546030022081e-05, + "loss": 0.16290664672851562, + "step": 4264 + }, + { + "epoch": 0.5765948457963667, + "grad_norm": 0.5964146256446838, + "learning_rate": 1.2888683791643572e-05, + "loss": 0.12008476257324219, + "step": 4265 + }, + { + "epoch": 0.5767300380228136, + "grad_norm": 0.897089958190918, + "learning_rate": 1.2881907722653633e-05, + "loss": 0.144439697265625, + "step": 4266 + }, + { + "epoch": 0.5768652302492606, + "grad_norm": 0.595203697681427, + "learning_rate": 1.2875132094661796e-05, + "loss": 0.1365680694580078, + "step": 4267 + }, + { + "epoch": 0.5770004224757076, + "grad_norm": 2.0556488037109375, + "learning_rate": 1.2868356909078787e-05, + "loss": 0.1913890838623047, + "step": 4268 + }, + { + "epoch": 0.5771356147021546, + "grad_norm": 0.9243132472038269, + "learning_rate": 1.286158216731524e-05, + "loss": 0.16130924224853516, + "step": 4269 + }, + { + "epoch": 0.5772708069286016, + "grad_norm": 0.823891282081604, + "learning_rate": 1.2854807870781686e-05, + "loss": 0.17481136322021484, + "step": 4270 + }, + { + "epoch": 0.5774059991550485, + "grad_norm": 0.9245734214782715, + "learning_rate": 1.284803402088858e-05, + "loss": 0.1632232666015625, + "step": 4271 + }, + { + "epoch": 0.5775411913814955, + "grad_norm": 0.7338927984237671, + "learning_rate": 1.284126061904626e-05, + "loss": 0.15558815002441406, + "step": 4272 + }, + { + "epoch": 0.5776763836079425, + "grad_norm": 1.0215390920639038, + "learning_rate": 1.283448766666499e-05, + "loss": 0.2491474151611328, + "step": 4273 + }, + { + "epoch": 0.5778115758343895, + "grad_norm": 0.8469446301460266, + "learning_rate": 1.282771516515494e-05, + "loss": 0.13725662231445312, + "step": 4274 + }, + { + "epoch": 0.5779467680608364, + "grad_norm": 1.7314738035202026, + "learning_rate": 1.282094311592618e-05, + "loss": 0.1603851318359375, + "step": 4275 + }, + { + "epoch": 0.5780819602872835, + "grad_norm": 0.8443201184272766, + "learning_rate": 1.2814171520388676e-05, + "loss": 0.1697230339050293, + "step": 4276 + }, + { + "epoch": 0.5782171525137305, + "grad_norm": 0.8805035948753357, + "learning_rate": 1.2807400379952318e-05, + "loss": 0.17308807373046875, + "step": 4277 + }, + { + "epoch": 0.5783523447401775, + "grad_norm": 1.3650004863739014, + "learning_rate": 1.2800629696026895e-05, + "loss": 0.2112903594970703, + "step": 4278 + }, + { + "epoch": 0.5784875369666245, + "grad_norm": 2.4215996265411377, + "learning_rate": 1.2793859470022098e-05, + "loss": 0.18595027923583984, + "step": 4279 + }, + { + "epoch": 0.5786227291930715, + "grad_norm": 1.419944167137146, + "learning_rate": 1.278708970334753e-05, + "loss": 0.2301921844482422, + "step": 4280 + }, + { + "epoch": 0.5787579214195184, + "grad_norm": 1.2153390645980835, + "learning_rate": 1.2780320397412678e-05, + "loss": 0.20734024047851562, + "step": 4281 + }, + { + "epoch": 0.5788931136459654, + "grad_norm": 1.2576645612716675, + "learning_rate": 1.2773551553626957e-05, + "loss": 0.13971877098083496, + "step": 4282 + }, + { + "epoch": 0.5790283058724124, + "grad_norm": 0.8711782097816467, + "learning_rate": 1.2766783173399675e-05, + "loss": 0.17128467559814453, + "step": 4283 + }, + { + "epoch": 0.5791634980988594, + "grad_norm": 1.9798860549926758, + "learning_rate": 1.276001525814005e-05, + "loss": 0.20389437675476074, + "step": 4284 + }, + { + "epoch": 0.5792986903253063, + "grad_norm": 1.053961157798767, + "learning_rate": 1.2753247809257192e-05, + "loss": 0.20755290985107422, + "step": 4285 + }, + { + "epoch": 0.5794338825517533, + "grad_norm": 0.9589759707450867, + "learning_rate": 1.2746480828160119e-05, + "loss": 0.19939422607421875, + "step": 4286 + }, + { + "epoch": 0.5795690747782003, + "grad_norm": 1.6030961275100708, + "learning_rate": 1.2739714316257753e-05, + "loss": 0.19925212860107422, + "step": 4287 + }, + { + "epoch": 0.5797042670046473, + "grad_norm": 1.0283443927764893, + "learning_rate": 1.273294827495892e-05, + "loss": 0.16189134120941162, + "step": 4288 + }, + { + "epoch": 0.5798394592310943, + "grad_norm": 0.5068125128746033, + "learning_rate": 1.2726182705672352e-05, + "loss": 0.12534713745117188, + "step": 4289 + }, + { + "epoch": 0.5799746514575412, + "grad_norm": 1.5926272869110107, + "learning_rate": 1.271941760980667e-05, + "loss": 0.22414684295654297, + "step": 4290 + }, + { + "epoch": 0.5801098436839882, + "grad_norm": 1.0084165334701538, + "learning_rate": 1.2712652988770396e-05, + "loss": 0.20436549186706543, + "step": 4291 + }, + { + "epoch": 0.5802450359104352, + "grad_norm": 0.9874210357666016, + "learning_rate": 1.2705888843971967e-05, + "loss": 0.18202590942382812, + "step": 4292 + }, + { + "epoch": 0.5803802281368822, + "grad_norm": 1.4401410818099976, + "learning_rate": 1.2699125176819717e-05, + "loss": 0.20728063583374023, + "step": 4293 + }, + { + "epoch": 0.5805154203633291, + "grad_norm": 0.6970511674880981, + "learning_rate": 1.269236198872188e-05, + "loss": 0.11986136436462402, + "step": 4294 + }, + { + "epoch": 0.5806506125897761, + "grad_norm": 0.7394571900367737, + "learning_rate": 1.2685599281086577e-05, + "loss": 0.16261768341064453, + "step": 4295 + }, + { + "epoch": 0.5807858048162231, + "grad_norm": 0.6774092316627502, + "learning_rate": 1.2678837055321849e-05, + "loss": 0.13828563690185547, + "step": 4296 + }, + { + "epoch": 0.5809209970426701, + "grad_norm": 0.9117124676704407, + "learning_rate": 1.267207531283562e-05, + "loss": 0.19450092315673828, + "step": 4297 + }, + { + "epoch": 0.5810561892691171, + "grad_norm": 0.9277790784835815, + "learning_rate": 1.266531405503573e-05, + "loss": 0.13715744018554688, + "step": 4298 + }, + { + "epoch": 0.581191381495564, + "grad_norm": 1.0747668743133545, + "learning_rate": 1.26585532833299e-05, + "loss": 0.21448707580566406, + "step": 4299 + }, + { + "epoch": 0.581326573722011, + "grad_norm": 1.1016261577606201, + "learning_rate": 1.2651792999125763e-05, + "loss": 0.19028282165527344, + "step": 4300 + }, + { + "epoch": 0.581461765948458, + "grad_norm": 1.1361908912658691, + "learning_rate": 1.2645033203830846e-05, + "loss": 0.1358637809753418, + "step": 4301 + }, + { + "epoch": 0.581596958174905, + "grad_norm": 0.6379583477973938, + "learning_rate": 1.2638273898852573e-05, + "loss": 0.13873910903930664, + "step": 4302 + }, + { + "epoch": 0.581732150401352, + "grad_norm": 1.1414406299591064, + "learning_rate": 1.2631515085598275e-05, + "loss": 0.20029830932617188, + "step": 4303 + }, + { + "epoch": 0.5818673426277989, + "grad_norm": 2.173987627029419, + "learning_rate": 1.262475676547516e-05, + "loss": 0.17449665069580078, + "step": 4304 + }, + { + "epoch": 0.5820025348542459, + "grad_norm": 0.9249931573867798, + "learning_rate": 1.2617998939890352e-05, + "loss": 0.15088510513305664, + "step": 4305 + }, + { + "epoch": 0.5821377270806929, + "grad_norm": 1.3683372735977173, + "learning_rate": 1.261124161025087e-05, + "loss": 0.20980072021484375, + "step": 4306 + }, + { + "epoch": 0.5822729193071399, + "grad_norm": 2.326523780822754, + "learning_rate": 1.260448477796362e-05, + "loss": 0.23107528686523438, + "step": 4307 + }, + { + "epoch": 0.5824081115335868, + "grad_norm": 0.7506234645843506, + "learning_rate": 1.259772844443542e-05, + "loss": 0.12270912528038025, + "step": 4308 + }, + { + "epoch": 0.5825433037600338, + "grad_norm": 1.0304185152053833, + "learning_rate": 1.2590972611072964e-05, + "loss": 0.20026206970214844, + "step": 4309 + }, + { + "epoch": 0.5826784959864808, + "grad_norm": 1.0934737920761108, + "learning_rate": 1.2584217279282855e-05, + "loss": 0.16681957244873047, + "step": 4310 + }, + { + "epoch": 0.5828136882129278, + "grad_norm": 2.0285887718200684, + "learning_rate": 1.2577462450471593e-05, + "loss": 0.16617146134376526, + "step": 4311 + }, + { + "epoch": 0.5829488804393748, + "grad_norm": 1.667677402496338, + "learning_rate": 1.2570708126045574e-05, + "loss": 0.23253631591796875, + "step": 4312 + }, + { + "epoch": 0.5830840726658217, + "grad_norm": 1.3718093633651733, + "learning_rate": 1.256395430741107e-05, + "loss": 0.14468622207641602, + "step": 4313 + }, + { + "epoch": 0.5832192648922687, + "grad_norm": 1.5100034475326538, + "learning_rate": 1.2557200995974268e-05, + "loss": 0.202911376953125, + "step": 4314 + }, + { + "epoch": 0.5833544571187157, + "grad_norm": 1.465610384941101, + "learning_rate": 1.2550448193141248e-05, + "loss": 0.15888404846191406, + "step": 4315 + }, + { + "epoch": 0.5834896493451627, + "grad_norm": 1.3126379251480103, + "learning_rate": 1.2543695900317977e-05, + "loss": 0.2035231590270996, + "step": 4316 + }, + { + "epoch": 0.5836248415716097, + "grad_norm": 1.0890021324157715, + "learning_rate": 1.2536944118910323e-05, + "loss": 0.1485280990600586, + "step": 4317 + }, + { + "epoch": 0.5837600337980566, + "grad_norm": 1.1250413656234741, + "learning_rate": 1.2530192850324032e-05, + "loss": 0.15525531768798828, + "step": 4318 + }, + { + "epoch": 0.5838952260245036, + "grad_norm": 0.991051971912384, + "learning_rate": 1.252344209596476e-05, + "loss": 0.1225278377532959, + "step": 4319 + }, + { + "epoch": 0.5840304182509506, + "grad_norm": 1.389427661895752, + "learning_rate": 1.251669185723805e-05, + "loss": 0.21108627319335938, + "step": 4320 + }, + { + "epoch": 0.5841656104773976, + "grad_norm": 2.044124126434326, + "learning_rate": 1.2509942135549344e-05, + "loss": 0.15939569473266602, + "step": 4321 + }, + { + "epoch": 0.5843008027038445, + "grad_norm": 1.3384172916412354, + "learning_rate": 1.250319293230396e-05, + "loss": 0.1753253936767578, + "step": 4322 + }, + { + "epoch": 0.5844359949302915, + "grad_norm": 1.1628210544586182, + "learning_rate": 1.2496444248907121e-05, + "loss": 0.20353269577026367, + "step": 4323 + }, + { + "epoch": 0.5845711871567385, + "grad_norm": 0.6496607661247253, + "learning_rate": 1.2489696086763939e-05, + "loss": 0.13666749000549316, + "step": 4324 + }, + { + "epoch": 0.5847063793831855, + "grad_norm": 1.9256809949874878, + "learning_rate": 1.2482948447279417e-05, + "loss": 0.20899200439453125, + "step": 4325 + }, + { + "epoch": 0.5848415716096325, + "grad_norm": 1.220988154411316, + "learning_rate": 1.2476201331858458e-05, + "loss": 0.17862725257873535, + "step": 4326 + }, + { + "epoch": 0.5849767638360794, + "grad_norm": 1.3501471281051636, + "learning_rate": 1.2469454741905839e-05, + "loss": 0.24008893966674805, + "step": 4327 + }, + { + "epoch": 0.5851119560625264, + "grad_norm": 1.2645536661148071, + "learning_rate": 1.2462708678826233e-05, + "loss": 0.22155380249023438, + "step": 4328 + }, + { + "epoch": 0.5852471482889734, + "grad_norm": 1.2619504928588867, + "learning_rate": 1.245596314402421e-05, + "loss": 0.2033100128173828, + "step": 4329 + }, + { + "epoch": 0.5853823405154204, + "grad_norm": 0.8149838447570801, + "learning_rate": 1.2449218138904225e-05, + "loss": 0.1532745361328125, + "step": 4330 + }, + { + "epoch": 0.5855175327418674, + "grad_norm": 0.9254529476165771, + "learning_rate": 1.2442473664870636e-05, + "loss": 0.17912769317626953, + "step": 4331 + }, + { + "epoch": 0.5856527249683143, + "grad_norm": 0.7227391600608826, + "learning_rate": 1.2435729723327661e-05, + "loss": 0.15714073181152344, + "step": 4332 + }, + { + "epoch": 0.5857879171947613, + "grad_norm": 1.2180477380752563, + "learning_rate": 1.2428986315679433e-05, + "loss": 0.18432140350341797, + "step": 4333 + }, + { + "epoch": 0.5859231094212083, + "grad_norm": 0.9233143925666809, + "learning_rate": 1.2422243443329962e-05, + "loss": 0.1938343048095703, + "step": 4334 + }, + { + "epoch": 0.5860583016476553, + "grad_norm": 1.5898863077163696, + "learning_rate": 1.241550110768316e-05, + "loss": 0.1848278045654297, + "step": 4335 + }, + { + "epoch": 0.5861934938741022, + "grad_norm": 1.0914925336837769, + "learning_rate": 1.2408759310142803e-05, + "loss": 0.23800277709960938, + "step": 4336 + }, + { + "epoch": 0.5863286861005492, + "grad_norm": 0.9458726048469543, + "learning_rate": 1.2402018052112576e-05, + "loss": 0.1806468963623047, + "step": 4337 + }, + { + "epoch": 0.5864638783269962, + "grad_norm": 0.7406532764434814, + "learning_rate": 1.2395277334996045e-05, + "loss": 0.1611042022705078, + "step": 4338 + }, + { + "epoch": 0.5865990705534432, + "grad_norm": 1.0693137645721436, + "learning_rate": 1.2388537160196663e-05, + "loss": 0.171966552734375, + "step": 4339 + }, + { + "epoch": 0.5867342627798902, + "grad_norm": 1.371740460395813, + "learning_rate": 1.2381797529117776e-05, + "loss": 0.21809077262878418, + "step": 4340 + }, + { + "epoch": 0.5868694550063371, + "grad_norm": 1.7710998058319092, + "learning_rate": 1.23750584431626e-05, + "loss": 0.19530391693115234, + "step": 4341 + }, + { + "epoch": 0.5870046472327841, + "grad_norm": 1.2489817142486572, + "learning_rate": 1.236831990373425e-05, + "loss": 0.17886734008789062, + "step": 4342 + }, + { + "epoch": 0.5871398394592311, + "grad_norm": 1.2524535655975342, + "learning_rate": 1.2361581912235736e-05, + "loss": 0.20679569244384766, + "step": 4343 + }, + { + "epoch": 0.5872750316856781, + "grad_norm": 1.4351321458816528, + "learning_rate": 1.235484447006994e-05, + "loss": 0.16537857055664062, + "step": 4344 + }, + { + "epoch": 0.587410223912125, + "grad_norm": 1.0270174741744995, + "learning_rate": 1.2348107578639627e-05, + "loss": 0.14121007919311523, + "step": 4345 + }, + { + "epoch": 0.587545416138572, + "grad_norm": 0.9509152173995972, + "learning_rate": 1.2341371239347454e-05, + "loss": 0.2061767578125, + "step": 4346 + }, + { + "epoch": 0.587680608365019, + "grad_norm": 0.839234471321106, + "learning_rate": 1.233463545359597e-05, + "loss": 0.21115493774414062, + "step": 4347 + }, + { + "epoch": 0.587815800591466, + "grad_norm": 1.3698605298995972, + "learning_rate": 1.23279002227876e-05, + "loss": 0.1501150131225586, + "step": 4348 + }, + { + "epoch": 0.587950992817913, + "grad_norm": 1.0913116931915283, + "learning_rate": 1.2321165548324655e-05, + "loss": 0.18481826782226562, + "step": 4349 + }, + { + "epoch": 0.5880861850443599, + "grad_norm": 1.304708480834961, + "learning_rate": 1.2314431431609323e-05, + "loss": 0.1565408706665039, + "step": 4350 + }, + { + "epoch": 0.5882213772708069, + "grad_norm": 1.2944470643997192, + "learning_rate": 1.2307697874043687e-05, + "loss": 0.21492767333984375, + "step": 4351 + }, + { + "epoch": 0.5883565694972539, + "grad_norm": 1.3525173664093018, + "learning_rate": 1.2300964877029712e-05, + "loss": 0.16132545471191406, + "step": 4352 + }, + { + "epoch": 0.5884917617237009, + "grad_norm": 0.8785399794578552, + "learning_rate": 1.2294232441969246e-05, + "loss": 0.17656803131103516, + "step": 4353 + }, + { + "epoch": 0.5886269539501479, + "grad_norm": 1.5672249794006348, + "learning_rate": 1.2287500570264017e-05, + "loss": 0.18292903900146484, + "step": 4354 + }, + { + "epoch": 0.5887621461765948, + "grad_norm": 0.6533482670783997, + "learning_rate": 1.2280769263315628e-05, + "loss": 0.13961410522460938, + "step": 4355 + }, + { + "epoch": 0.5888973384030418, + "grad_norm": 1.1324665546417236, + "learning_rate": 1.2274038522525577e-05, + "loss": 0.21323680877685547, + "step": 4356 + }, + { + "epoch": 0.5890325306294888, + "grad_norm": 1.1106525659561157, + "learning_rate": 1.2267308349295246e-05, + "loss": 0.1803441047668457, + "step": 4357 + }, + { + "epoch": 0.5891677228559358, + "grad_norm": 0.6501247882843018, + "learning_rate": 1.2260578745025892e-05, + "loss": 0.11105108261108398, + "step": 4358 + }, + { + "epoch": 0.5893029150823828, + "grad_norm": 1.8832035064697266, + "learning_rate": 1.225384971111865e-05, + "loss": 0.21324539184570312, + "step": 4359 + }, + { + "epoch": 0.5894381073088297, + "grad_norm": 0.7464386820793152, + "learning_rate": 1.224712124897454e-05, + "loss": 0.1534595489501953, + "step": 4360 + }, + { + "epoch": 0.5895732995352767, + "grad_norm": 1.8309890031814575, + "learning_rate": 1.2240393359994466e-05, + "loss": 0.19975852966308594, + "step": 4361 + }, + { + "epoch": 0.5897084917617237, + "grad_norm": 1.187455415725708, + "learning_rate": 1.2233666045579209e-05, + "loss": 0.1963634490966797, + "step": 4362 + }, + { + "epoch": 0.5898436839881707, + "grad_norm": 1.1307337284088135, + "learning_rate": 1.222693930712944e-05, + "loss": 0.22422409057617188, + "step": 4363 + }, + { + "epoch": 0.5899788762146176, + "grad_norm": 1.4619524478912354, + "learning_rate": 1.2220213146045691e-05, + "loss": 0.15769386291503906, + "step": 4364 + }, + { + "epoch": 0.5901140684410646, + "grad_norm": 0.8249306678771973, + "learning_rate": 1.2213487563728389e-05, + "loss": 0.11738276481628418, + "step": 4365 + }, + { + "epoch": 0.5902492606675116, + "grad_norm": 2.279766798019409, + "learning_rate": 1.220676256157783e-05, + "loss": 0.17304039001464844, + "step": 4366 + }, + { + "epoch": 0.5903844528939586, + "grad_norm": 1.2244853973388672, + "learning_rate": 1.2200038140994212e-05, + "loss": 0.16664791107177734, + "step": 4367 + }, + { + "epoch": 0.5905196451204056, + "grad_norm": 0.8576146364212036, + "learning_rate": 1.2193314303377578e-05, + "loss": 0.14099502563476562, + "step": 4368 + }, + { + "epoch": 0.5906548373468525, + "grad_norm": 0.9393298625946045, + "learning_rate": 1.2186591050127874e-05, + "loss": 0.14593619108200073, + "step": 4369 + }, + { + "epoch": 0.5907900295732995, + "grad_norm": 1.0864542722702026, + "learning_rate": 1.2179868382644916e-05, + "loss": 0.1727609634399414, + "step": 4370 + }, + { + "epoch": 0.5909252217997465, + "grad_norm": 1.1678662300109863, + "learning_rate": 1.2173146302328396e-05, + "loss": 0.17487037181854248, + "step": 4371 + }, + { + "epoch": 0.5910604140261935, + "grad_norm": 0.7419041991233826, + "learning_rate": 1.21664248105779e-05, + "loss": 0.15142822265625, + "step": 4372 + }, + { + "epoch": 0.5911956062526404, + "grad_norm": 1.0202295780181885, + "learning_rate": 1.2159703908792858e-05, + "loss": 0.23473358154296875, + "step": 4373 + }, + { + "epoch": 0.5913307984790874, + "grad_norm": 0.9415817856788635, + "learning_rate": 1.2152983598372613e-05, + "loss": 0.1876983642578125, + "step": 4374 + }, + { + "epoch": 0.5914659907055344, + "grad_norm": 0.9046608805656433, + "learning_rate": 1.2146263880716366e-05, + "loss": 0.19054603576660156, + "step": 4375 + }, + { + "epoch": 0.5916011829319814, + "grad_norm": 1.2391260862350464, + "learning_rate": 1.2139544757223194e-05, + "loss": 0.2619590759277344, + "step": 4376 + }, + { + "epoch": 0.5917363751584284, + "grad_norm": 1.1138972043991089, + "learning_rate": 1.2132826229292066e-05, + "loss": 0.1624593734741211, + "step": 4377 + }, + { + "epoch": 0.5918715673848753, + "grad_norm": 2.4832799434661865, + "learning_rate": 1.2126108298321798e-05, + "loss": 0.15798282623291016, + "step": 4378 + }, + { + "epoch": 0.5920067596113223, + "grad_norm": 1.0790153741836548, + "learning_rate": 1.2119390965711107e-05, + "loss": 0.19666290283203125, + "step": 4379 + }, + { + "epoch": 0.5921419518377693, + "grad_norm": 2.42635178565979, + "learning_rate": 1.2112674232858582e-05, + "loss": 0.15904903411865234, + "step": 4380 + }, + { + "epoch": 0.5922771440642163, + "grad_norm": 0.7873253226280212, + "learning_rate": 1.2105958101162684e-05, + "loss": 0.1550908088684082, + "step": 4381 + }, + { + "epoch": 0.5924123362906633, + "grad_norm": 1.2129740715026855, + "learning_rate": 1.2099242572021735e-05, + "loss": 0.16709661483764648, + "step": 4382 + }, + { + "epoch": 0.5925475285171102, + "grad_norm": 1.0263253450393677, + "learning_rate": 1.209252764683395e-05, + "loss": 0.1557321548461914, + "step": 4383 + }, + { + "epoch": 0.5926827207435572, + "grad_norm": 1.5654892921447754, + "learning_rate": 1.2085813326997414e-05, + "loss": 0.1923818588256836, + "step": 4384 + }, + { + "epoch": 0.5928179129700042, + "grad_norm": 1.7086323499679565, + "learning_rate": 1.2079099613910088e-05, + "loss": 0.24108505249023438, + "step": 4385 + }, + { + "epoch": 0.5929531051964512, + "grad_norm": 1.155551552772522, + "learning_rate": 1.20723865089698e-05, + "loss": 0.13469314575195312, + "step": 4386 + }, + { + "epoch": 0.5930882974228981, + "grad_norm": 1.0771546363830566, + "learning_rate": 1.2065674013574248e-05, + "loss": 0.13491582870483398, + "step": 4387 + }, + { + "epoch": 0.5932234896493451, + "grad_norm": 1.1418342590332031, + "learning_rate": 1.2058962129121013e-05, + "loss": 0.14437389373779297, + "step": 4388 + }, + { + "epoch": 0.5933586818757921, + "grad_norm": 1.1176729202270508, + "learning_rate": 1.2052250857007548e-05, + "loss": 0.20442867279052734, + "step": 4389 + }, + { + "epoch": 0.5934938741022391, + "grad_norm": 0.7867580652236938, + "learning_rate": 1.2045540198631177e-05, + "loss": 0.15497398376464844, + "step": 4390 + }, + { + "epoch": 0.5936290663286861, + "grad_norm": 0.716128945350647, + "learning_rate": 1.2038830155389091e-05, + "loss": 0.1474003791809082, + "step": 4391 + }, + { + "epoch": 0.593764258555133, + "grad_norm": 1.2444361448287964, + "learning_rate": 1.2032120728678354e-05, + "loss": 0.15840959548950195, + "step": 4392 + }, + { + "epoch": 0.59389945078158, + "grad_norm": 0.9370972514152527, + "learning_rate": 1.2025411919895907e-05, + "loss": 0.15124869346618652, + "step": 4393 + }, + { + "epoch": 0.594034643008027, + "grad_norm": 1.0189954042434692, + "learning_rate": 1.2018703730438561e-05, + "loss": 0.18697166442871094, + "step": 4394 + }, + { + "epoch": 0.594169835234474, + "grad_norm": 1.2457313537597656, + "learning_rate": 1.2011996161703003e-05, + "loss": 0.1989736557006836, + "step": 4395 + }, + { + "epoch": 0.594305027460921, + "grad_norm": 2.136608600616455, + "learning_rate": 1.2005289215085775e-05, + "loss": 0.21790874004364014, + "step": 4396 + }, + { + "epoch": 0.5944402196873679, + "grad_norm": 0.718353807926178, + "learning_rate": 1.19985828919833e-05, + "loss": 0.13578176498413086, + "step": 4397 + }, + { + "epoch": 0.5945754119138149, + "grad_norm": 0.8081104755401611, + "learning_rate": 1.1991877193791872e-05, + "loss": 0.1490478515625, + "step": 4398 + }, + { + "epoch": 0.5947106041402619, + "grad_norm": 1.5630050897598267, + "learning_rate": 1.1985172121907653e-05, + "loss": 0.20532894134521484, + "step": 4399 + }, + { + "epoch": 0.5948457963667089, + "grad_norm": 1.1921988725662231, + "learning_rate": 1.1978467677726682e-05, + "loss": 0.17661619186401367, + "step": 4400 + }, + { + "epoch": 0.5949809885931558, + "grad_norm": 1.5652927160263062, + "learning_rate": 1.197176386264485e-05, + "loss": 0.21953392028808594, + "step": 4401 + }, + { + "epoch": 0.5951161808196028, + "grad_norm": 1.1749805212020874, + "learning_rate": 1.1965060678057927e-05, + "loss": 0.1577920913696289, + "step": 4402 + }, + { + "epoch": 0.5952513730460498, + "grad_norm": 1.7236204147338867, + "learning_rate": 1.1958358125361554e-05, + "loss": 0.26123809814453125, + "step": 4403 + }, + { + "epoch": 0.5953865652724968, + "grad_norm": 0.6865897178649902, + "learning_rate": 1.1951656205951247e-05, + "loss": 0.14724159240722656, + "step": 4404 + }, + { + "epoch": 0.5955217574989438, + "grad_norm": 1.1327730417251587, + "learning_rate": 1.1944954921222367e-05, + "loss": 0.1978607177734375, + "step": 4405 + }, + { + "epoch": 0.5956569497253907, + "grad_norm": 1.3412612676620483, + "learning_rate": 1.1938254272570167e-05, + "loss": 0.09381437301635742, + "step": 4406 + }, + { + "epoch": 0.5957921419518377, + "grad_norm": 1.0048580169677734, + "learning_rate": 1.1931554261389751e-05, + "loss": 0.1890707015991211, + "step": 4407 + }, + { + "epoch": 0.5959273341782847, + "grad_norm": 1.0373151302337646, + "learning_rate": 1.1924854889076103e-05, + "loss": 0.2082977294921875, + "step": 4408 + }, + { + "epoch": 0.5960625264047317, + "grad_norm": 1.0999053716659546, + "learning_rate": 1.191815615702407e-05, + "loss": 0.1708965301513672, + "step": 4409 + }, + { + "epoch": 0.5961977186311787, + "grad_norm": 1.4999499320983887, + "learning_rate": 1.1911458066628353e-05, + "loss": 0.13686561584472656, + "step": 4410 + }, + { + "epoch": 0.5963329108576257, + "grad_norm": 0.7241935133934021, + "learning_rate": 1.1904760619283537e-05, + "loss": 0.11888504028320312, + "step": 4411 + }, + { + "epoch": 0.5964681030840727, + "grad_norm": 1.7231944799423218, + "learning_rate": 1.1898063816384069e-05, + "loss": 0.24175643920898438, + "step": 4412 + }, + { + "epoch": 0.5966032953105197, + "grad_norm": 1.0629514455795288, + "learning_rate": 1.189136765932426e-05, + "loss": 0.17063546180725098, + "step": 4413 + }, + { + "epoch": 0.5967384875369667, + "grad_norm": 1.0609545707702637, + "learning_rate": 1.1884672149498276e-05, + "loss": 0.20450544357299805, + "step": 4414 + }, + { + "epoch": 0.5968736797634137, + "grad_norm": 1.1088476181030273, + "learning_rate": 1.187797728830016e-05, + "loss": 0.13601970672607422, + "step": 4415 + }, + { + "epoch": 0.5970088719898606, + "grad_norm": 0.9738326668739319, + "learning_rate": 1.1871283077123823e-05, + "loss": 0.15453529357910156, + "step": 4416 + }, + { + "epoch": 0.5971440642163076, + "grad_norm": 0.7930399775505066, + "learning_rate": 1.1864589517363038e-05, + "loss": 0.16332721710205078, + "step": 4417 + }, + { + "epoch": 0.5972792564427546, + "grad_norm": 0.9717445373535156, + "learning_rate": 1.185789661041144e-05, + "loss": 0.16792798042297363, + "step": 4418 + }, + { + "epoch": 0.5974144486692016, + "grad_norm": 1.0329456329345703, + "learning_rate": 1.1851204357662513e-05, + "loss": 0.16846752166748047, + "step": 4419 + }, + { + "epoch": 0.5975496408956485, + "grad_norm": 0.8872194886207581, + "learning_rate": 1.1844512760509634e-05, + "loss": 0.15872907638549805, + "step": 4420 + }, + { + "epoch": 0.5976848331220955, + "grad_norm": 1.2731571197509766, + "learning_rate": 1.1837821820346022e-05, + "loss": 0.19169270992279053, + "step": 4421 + }, + { + "epoch": 0.5978200253485425, + "grad_norm": 1.4863935708999634, + "learning_rate": 1.1831131538564775e-05, + "loss": 0.2197713851928711, + "step": 4422 + }, + { + "epoch": 0.5979552175749895, + "grad_norm": 0.89424729347229, + "learning_rate": 1.1824441916558843e-05, + "loss": 0.12622642517089844, + "step": 4423 + }, + { + "epoch": 0.5980904098014365, + "grad_norm": 1.5346304178237915, + "learning_rate": 1.1817752955721031e-05, + "loss": 0.18735313415527344, + "step": 4424 + }, + { + "epoch": 0.5982256020278834, + "grad_norm": 1.4477431774139404, + "learning_rate": 1.1811064657444023e-05, + "loss": 0.1607341766357422, + "step": 4425 + }, + { + "epoch": 0.5983607942543304, + "grad_norm": 1.0291216373443604, + "learning_rate": 1.1804377023120361e-05, + "loss": 0.17717933654785156, + "step": 4426 + }, + { + "epoch": 0.5984959864807774, + "grad_norm": 1.7882074117660522, + "learning_rate": 1.1797690054142451e-05, + "loss": 0.16795873641967773, + "step": 4427 + }, + { + "epoch": 0.5986311787072244, + "grad_norm": 0.7566695213317871, + "learning_rate": 1.1791003751902542e-05, + "loss": 0.1444687843322754, + "step": 4428 + }, + { + "epoch": 0.5987663709336714, + "grad_norm": 1.025880217552185, + "learning_rate": 1.1784318117792763e-05, + "loss": 0.14234542846679688, + "step": 4429 + }, + { + "epoch": 0.5989015631601183, + "grad_norm": 2.395387887954712, + "learning_rate": 1.17776331532051e-05, + "loss": 0.18979501724243164, + "step": 4430 + }, + { + "epoch": 0.5990367553865653, + "grad_norm": 0.7047892212867737, + "learning_rate": 1.1770948859531397e-05, + "loss": 0.14166879653930664, + "step": 4431 + }, + { + "epoch": 0.5991719476130123, + "grad_norm": 0.931225597858429, + "learning_rate": 1.1764265238163369e-05, + "loss": 0.16495800018310547, + "step": 4432 + }, + { + "epoch": 0.5993071398394593, + "grad_norm": 1.7586725950241089, + "learning_rate": 1.1757582290492568e-05, + "loss": 0.22754478454589844, + "step": 4433 + }, + { + "epoch": 0.5994423320659062, + "grad_norm": 1.6298688650131226, + "learning_rate": 1.1750900017910425e-05, + "loss": 0.25134849548339844, + "step": 4434 + }, + { + "epoch": 0.5995775242923532, + "grad_norm": 0.5789780020713806, + "learning_rate": 1.1744218421808221e-05, + "loss": 0.16058588027954102, + "step": 4435 + }, + { + "epoch": 0.5997127165188002, + "grad_norm": 0.5974397659301758, + "learning_rate": 1.1737537503577112e-05, + "loss": 0.1259450912475586, + "step": 4436 + }, + { + "epoch": 0.5998479087452472, + "grad_norm": 1.842922568321228, + "learning_rate": 1.1730857264608086e-05, + "loss": 0.18219757080078125, + "step": 4437 + }, + { + "epoch": 0.5999831009716942, + "grad_norm": 1.8020470142364502, + "learning_rate": 1.1724177706292013e-05, + "loss": 0.16417217254638672, + "step": 4438 + }, + { + "epoch": 0.6001182931981411, + "grad_norm": 0.6571294665336609, + "learning_rate": 1.1717498830019607e-05, + "loss": 0.1324167251586914, + "step": 4439 + }, + { + "epoch": 0.6002534854245881, + "grad_norm": 0.9516823887825012, + "learning_rate": 1.1710820637181449e-05, + "loss": 0.16005659103393555, + "step": 4440 + }, + { + "epoch": 0.6003886776510351, + "grad_norm": 1.3139857053756714, + "learning_rate": 1.170414312916798e-05, + "loss": 0.18193626403808594, + "step": 4441 + }, + { + "epoch": 0.6005238698774821, + "grad_norm": 0.8724102973937988, + "learning_rate": 1.1697466307369484e-05, + "loss": 0.21126365661621094, + "step": 4442 + }, + { + "epoch": 0.600659062103929, + "grad_norm": 1.7499313354492188, + "learning_rate": 1.1690790173176116e-05, + "loss": 0.1791229248046875, + "step": 4443 + }, + { + "epoch": 0.600794254330376, + "grad_norm": 1.04434072971344, + "learning_rate": 1.1684114727977876e-05, + "loss": 0.12543082237243652, + "step": 4444 + }, + { + "epoch": 0.600929446556823, + "grad_norm": 1.4621626138687134, + "learning_rate": 1.167743997316464e-05, + "loss": 0.19497299194335938, + "step": 4445 + }, + { + "epoch": 0.60106463878327, + "grad_norm": 1.579993486404419, + "learning_rate": 1.1670765910126112e-05, + "loss": 0.2030773162841797, + "step": 4446 + }, + { + "epoch": 0.601199831009717, + "grad_norm": 0.7601368427276611, + "learning_rate": 1.1664092540251877e-05, + "loss": 0.15359234809875488, + "step": 4447 + }, + { + "epoch": 0.601335023236164, + "grad_norm": 2.2608797550201416, + "learning_rate": 1.1657419864931361e-05, + "loss": 0.1562957763671875, + "step": 4448 + }, + { + "epoch": 0.6014702154626109, + "grad_norm": 2.7142677307128906, + "learning_rate": 1.165074788555386e-05, + "loss": 0.2184542417526245, + "step": 4449 + }, + { + "epoch": 0.6016054076890579, + "grad_norm": 1.0566056966781616, + "learning_rate": 1.1644076603508514e-05, + "loss": 0.1809086799621582, + "step": 4450 + }, + { + "epoch": 0.6017405999155049, + "grad_norm": 1.276042103767395, + "learning_rate": 1.1637406020184305e-05, + "loss": 0.12530803680419922, + "step": 4451 + }, + { + "epoch": 0.6018757921419519, + "grad_norm": 1.224887728691101, + "learning_rate": 1.1630736136970097e-05, + "loss": 0.18129348754882812, + "step": 4452 + }, + { + "epoch": 0.6020109843683988, + "grad_norm": 0.8334834575653076, + "learning_rate": 1.162406695525459e-05, + "loss": 0.11057472229003906, + "step": 4453 + }, + { + "epoch": 0.6021461765948458, + "grad_norm": 0.8606310486793518, + "learning_rate": 1.161739847642635e-05, + "loss": 0.16757774353027344, + "step": 4454 + }, + { + "epoch": 0.6022813688212928, + "grad_norm": 2.4583969116210938, + "learning_rate": 1.1610730701873788e-05, + "loss": 0.1543407440185547, + "step": 4455 + }, + { + "epoch": 0.6024165610477398, + "grad_norm": 1.5592225790023804, + "learning_rate": 1.1604063632985163e-05, + "loss": 0.20044517517089844, + "step": 4456 + }, + { + "epoch": 0.6025517532741868, + "grad_norm": 1.4451719522476196, + "learning_rate": 1.1597397271148598e-05, + "loss": 0.2137298583984375, + "step": 4457 + }, + { + "epoch": 0.6026869455006337, + "grad_norm": 1.1640915870666504, + "learning_rate": 1.1590731617752067e-05, + "loss": 0.2288990020751953, + "step": 4458 + }, + { + "epoch": 0.6028221377270807, + "grad_norm": 1.4748117923736572, + "learning_rate": 1.1584066674183398e-05, + "loss": 0.21102356910705566, + "step": 4459 + }, + { + "epoch": 0.6029573299535277, + "grad_norm": 0.6651380062103271, + "learning_rate": 1.1577402441830262e-05, + "loss": 0.12137174606323242, + "step": 4460 + }, + { + "epoch": 0.6030925221799747, + "grad_norm": 1.3792213201522827, + "learning_rate": 1.1570738922080185e-05, + "loss": 0.1801152229309082, + "step": 4461 + }, + { + "epoch": 0.6032277144064216, + "grad_norm": 0.8088310360908508, + "learning_rate": 1.1564076116320552e-05, + "loss": 0.14694571495056152, + "step": 4462 + }, + { + "epoch": 0.6033629066328686, + "grad_norm": 1.3152133226394653, + "learning_rate": 1.1557414025938592e-05, + "loss": 0.1998577117919922, + "step": 4463 + }, + { + "epoch": 0.6034980988593156, + "grad_norm": 1.5952835083007812, + "learning_rate": 1.15507526523214e-05, + "loss": 0.21207046508789062, + "step": 4464 + }, + { + "epoch": 0.6036332910857626, + "grad_norm": 0.8917785882949829, + "learning_rate": 1.1544091996855895e-05, + "loss": 0.14701461791992188, + "step": 4465 + }, + { + "epoch": 0.6037684833122096, + "grad_norm": 1.1096436977386475, + "learning_rate": 1.153743206092886e-05, + "loss": 0.1688985824584961, + "step": 4466 + }, + { + "epoch": 0.6039036755386565, + "grad_norm": 1.0827115774154663, + "learning_rate": 1.1530772845926936e-05, + "loss": 0.17384815216064453, + "step": 4467 + }, + { + "epoch": 0.6040388677651035, + "grad_norm": 0.8188608288764954, + "learning_rate": 1.1524114353236614e-05, + "loss": 0.11463069915771484, + "step": 4468 + }, + { + "epoch": 0.6041740599915505, + "grad_norm": 1.302260160446167, + "learning_rate": 1.151745658424421e-05, + "loss": 0.1435256004333496, + "step": 4469 + }, + { + "epoch": 0.6043092522179975, + "grad_norm": 1.0960304737091064, + "learning_rate": 1.151079954033592e-05, + "loss": 0.2195911407470703, + "step": 4470 + }, + { + "epoch": 0.6044444444444445, + "grad_norm": 0.7673820853233337, + "learning_rate": 1.150414322289777e-05, + "loss": 0.1555652618408203, + "step": 4471 + }, + { + "epoch": 0.6045796366708914, + "grad_norm": 0.9879593253135681, + "learning_rate": 1.1497487633315643e-05, + "loss": 0.2014293670654297, + "step": 4472 + }, + { + "epoch": 0.6047148288973384, + "grad_norm": 1.3240058422088623, + "learning_rate": 1.1490832772975275e-05, + "loss": 0.1659221649169922, + "step": 4473 + }, + { + "epoch": 0.6048500211237854, + "grad_norm": 0.9766626358032227, + "learning_rate": 1.148417864326223e-05, + "loss": 0.1854991912841797, + "step": 4474 + }, + { + "epoch": 0.6049852133502324, + "grad_norm": 0.8749812245368958, + "learning_rate": 1.1477525245561944e-05, + "loss": 0.12531614303588867, + "step": 4475 + }, + { + "epoch": 0.6051204055766793, + "grad_norm": 1.1721289157867432, + "learning_rate": 1.1470872581259684e-05, + "loss": 0.12496829032897949, + "step": 4476 + }, + { + "epoch": 0.6052555978031263, + "grad_norm": 1.0978264808654785, + "learning_rate": 1.146422065174057e-05, + "loss": 0.20592308044433594, + "step": 4477 + }, + { + "epoch": 0.6053907900295733, + "grad_norm": 1.0824264287948608, + "learning_rate": 1.1457569458389578e-05, + "loss": 0.1690044403076172, + "step": 4478 + }, + { + "epoch": 0.6055259822560203, + "grad_norm": 2.1679115295410156, + "learning_rate": 1.145091900259151e-05, + "loss": 0.21513795852661133, + "step": 4479 + }, + { + "epoch": 0.6056611744824673, + "grad_norm": 1.1737805604934692, + "learning_rate": 1.1444269285731032e-05, + "loss": 0.18422222137451172, + "step": 4480 + }, + { + "epoch": 0.6057963667089142, + "grad_norm": 1.9953927993774414, + "learning_rate": 1.1437620309192652e-05, + "loss": 0.20594406127929688, + "step": 4481 + }, + { + "epoch": 0.6059315589353612, + "grad_norm": 1.491796612739563, + "learning_rate": 1.1430972074360722e-05, + "loss": 0.1867208480834961, + "step": 4482 + }, + { + "epoch": 0.6060667511618082, + "grad_norm": 1.4249210357666016, + "learning_rate": 1.1424324582619435e-05, + "loss": 0.2382183074951172, + "step": 4483 + }, + { + "epoch": 0.6062019433882552, + "grad_norm": 0.9030844569206238, + "learning_rate": 1.1417677835352837e-05, + "loss": 0.1659536361694336, + "step": 4484 + }, + { + "epoch": 0.6063371356147021, + "grad_norm": 1.4421099424362183, + "learning_rate": 1.1411031833944816e-05, + "loss": 0.16944313049316406, + "step": 4485 + }, + { + "epoch": 0.6064723278411491, + "grad_norm": 1.5993998050689697, + "learning_rate": 1.1404386579779111e-05, + "loss": 0.21051788330078125, + "step": 4486 + }, + { + "epoch": 0.6066075200675961, + "grad_norm": 1.6755239963531494, + "learning_rate": 1.1397742074239296e-05, + "loss": 0.223663330078125, + "step": 4487 + }, + { + "epoch": 0.6067427122940431, + "grad_norm": 2.1034271717071533, + "learning_rate": 1.1391098318708785e-05, + "loss": 0.20969676971435547, + "step": 4488 + }, + { + "epoch": 0.6068779045204901, + "grad_norm": 1.2152445316314697, + "learning_rate": 1.1384455314570848e-05, + "loss": 0.18887662887573242, + "step": 4489 + }, + { + "epoch": 0.607013096746937, + "grad_norm": 1.2689993381500244, + "learning_rate": 1.1377813063208596e-05, + "loss": 0.20824241638183594, + "step": 4490 + }, + { + "epoch": 0.607148288973384, + "grad_norm": 1.4393547773361206, + "learning_rate": 1.1371171566004986e-05, + "loss": 0.1833667755126953, + "step": 4491 + }, + { + "epoch": 0.607283481199831, + "grad_norm": 2.1438379287719727, + "learning_rate": 1.1364530824342806e-05, + "loss": 0.19746971130371094, + "step": 4492 + }, + { + "epoch": 0.607418673426278, + "grad_norm": 0.6704422235488892, + "learning_rate": 1.1357890839604688e-05, + "loss": 0.1542677879333496, + "step": 4493 + }, + { + "epoch": 0.607553865652725, + "grad_norm": 0.672864556312561, + "learning_rate": 1.1351251613173122e-05, + "loss": 0.16314935684204102, + "step": 4494 + }, + { + "epoch": 0.6076890578791719, + "grad_norm": 1.3669929504394531, + "learning_rate": 1.1344613146430428e-05, + "loss": 0.20238876342773438, + "step": 4495 + }, + { + "epoch": 0.6078242501056189, + "grad_norm": 1.352858304977417, + "learning_rate": 1.1337975440758775e-05, + "loss": 0.18425321578979492, + "step": 4496 + }, + { + "epoch": 0.6079594423320659, + "grad_norm": 1.45911705493927, + "learning_rate": 1.133133849754016e-05, + "loss": 0.1382887363433838, + "step": 4497 + }, + { + "epoch": 0.6080946345585129, + "grad_norm": 0.6741175055503845, + "learning_rate": 1.1324702318156431e-05, + "loss": 0.11459493637084961, + "step": 4498 + }, + { + "epoch": 0.6082298267849598, + "grad_norm": 0.6408833265304565, + "learning_rate": 1.1318066903989279e-05, + "loss": 0.12037158012390137, + "step": 4499 + }, + { + "epoch": 0.6083650190114068, + "grad_norm": 0.9546215534210205, + "learning_rate": 1.1311432256420232e-05, + "loss": 0.18200302124023438, + "step": 4500 + }, + { + "epoch": 0.6085002112378538, + "grad_norm": 2.613563060760498, + "learning_rate": 1.1304798376830664e-05, + "loss": 0.26177215576171875, + "step": 4501 + }, + { + "epoch": 0.6086354034643008, + "grad_norm": 2.309908390045166, + "learning_rate": 1.1298165266601778e-05, + "loss": 0.20312881469726562, + "step": 4502 + }, + { + "epoch": 0.6087705956907478, + "grad_norm": 1.8486065864562988, + "learning_rate": 1.129153292711462e-05, + "loss": 0.20337486267089844, + "step": 4503 + }, + { + "epoch": 0.6089057879171947, + "grad_norm": 1.1266438961029053, + "learning_rate": 1.1284901359750082e-05, + "loss": 0.17210209369659424, + "step": 4504 + }, + { + "epoch": 0.6090409801436417, + "grad_norm": 0.7883851528167725, + "learning_rate": 1.1278270565888897e-05, + "loss": 0.1486492156982422, + "step": 4505 + }, + { + "epoch": 0.6091761723700887, + "grad_norm": 1.0527299642562866, + "learning_rate": 1.1271640546911624e-05, + "loss": 0.20038795471191406, + "step": 4506 + }, + { + "epoch": 0.6093113645965357, + "grad_norm": 1.2081286907196045, + "learning_rate": 1.1265011304198672e-05, + "loss": 0.17427825927734375, + "step": 4507 + }, + { + "epoch": 0.6094465568229827, + "grad_norm": 1.2031521797180176, + "learning_rate": 1.1258382839130282e-05, + "loss": 0.17670536041259766, + "step": 4508 + }, + { + "epoch": 0.6095817490494296, + "grad_norm": 1.716536283493042, + "learning_rate": 1.1251755153086536e-05, + "loss": 0.17140483856201172, + "step": 4509 + }, + { + "epoch": 0.6097169412758766, + "grad_norm": 1.6569764614105225, + "learning_rate": 1.1245128247447362e-05, + "loss": 0.20662498474121094, + "step": 4510 + }, + { + "epoch": 0.6098521335023236, + "grad_norm": 0.9101887941360474, + "learning_rate": 1.1238502123592507e-05, + "loss": 0.17058944702148438, + "step": 4511 + }, + { + "epoch": 0.6099873257287706, + "grad_norm": 1.4276028871536255, + "learning_rate": 1.1231876782901568e-05, + "loss": 0.1376333236694336, + "step": 4512 + }, + { + "epoch": 0.6101225179552175, + "grad_norm": 1.7719577550888062, + "learning_rate": 1.1225252226753975e-05, + "loss": 0.17164087295532227, + "step": 4513 + }, + { + "epoch": 0.6102577101816645, + "grad_norm": 1.115468144416809, + "learning_rate": 1.1218628456529005e-05, + "loss": 0.18319129943847656, + "step": 4514 + }, + { + "epoch": 0.6103929024081115, + "grad_norm": 2.283583879470825, + "learning_rate": 1.1212005473605746e-05, + "loss": 0.1563892364501953, + "step": 4515 + }, + { + "epoch": 0.6105280946345585, + "grad_norm": 1.5471925735473633, + "learning_rate": 1.120538327936315e-05, + "loss": 0.21539688110351562, + "step": 4516 + }, + { + "epoch": 0.6106632868610055, + "grad_norm": 1.5506393909454346, + "learning_rate": 1.1198761875179993e-05, + "loss": 0.19168996810913086, + "step": 4517 + }, + { + "epoch": 0.6107984790874524, + "grad_norm": 2.4129598140716553, + "learning_rate": 1.1192141262434883e-05, + "loss": 0.1796398162841797, + "step": 4518 + }, + { + "epoch": 0.6109336713138994, + "grad_norm": 0.9621261954307556, + "learning_rate": 1.1185521442506272e-05, + "loss": 0.17345809936523438, + "step": 4519 + }, + { + "epoch": 0.6110688635403464, + "grad_norm": 1.6808950901031494, + "learning_rate": 1.1178902416772432e-05, + "loss": 0.1947035789489746, + "step": 4520 + }, + { + "epoch": 0.6112040557667934, + "grad_norm": 1.3471368551254272, + "learning_rate": 1.1172284186611485e-05, + "loss": 0.23317337036132812, + "step": 4521 + }, + { + "epoch": 0.6113392479932404, + "grad_norm": 0.9267129302024841, + "learning_rate": 1.1165666753401384e-05, + "loss": 0.1781902313232422, + "step": 4522 + }, + { + "epoch": 0.6114744402196873, + "grad_norm": 1.2934696674346924, + "learning_rate": 1.1159050118519914e-05, + "loss": 0.1874551773071289, + "step": 4523 + }, + { + "epoch": 0.6116096324461343, + "grad_norm": 0.8290989995002747, + "learning_rate": 1.1152434283344696e-05, + "loss": 0.13314247131347656, + "step": 4524 + }, + { + "epoch": 0.6117448246725813, + "grad_norm": 1.0614969730377197, + "learning_rate": 1.114581924925317e-05, + "loss": 0.19978570938110352, + "step": 4525 + }, + { + "epoch": 0.6118800168990283, + "grad_norm": 1.0362046957015991, + "learning_rate": 1.113920501762263e-05, + "loss": 0.16478729248046875, + "step": 4526 + }, + { + "epoch": 0.6120152091254752, + "grad_norm": 1.1429672241210938, + "learning_rate": 1.1132591589830193e-05, + "loss": 0.19054794311523438, + "step": 4527 + }, + { + "epoch": 0.6121504013519222, + "grad_norm": 1.873545527458191, + "learning_rate": 1.1125978967252818e-05, + "loss": 0.21915864944458008, + "step": 4528 + }, + { + "epoch": 0.6122855935783692, + "grad_norm": 1.033534288406372, + "learning_rate": 1.1119367151267278e-05, + "loss": 0.182769775390625, + "step": 4529 + }, + { + "epoch": 0.6124207858048162, + "grad_norm": 0.9385781288146973, + "learning_rate": 1.1112756143250186e-05, + "loss": 0.1590590476989746, + "step": 4530 + }, + { + "epoch": 0.6125559780312632, + "grad_norm": 1.1570844650268555, + "learning_rate": 1.1106145944577995e-05, + "loss": 0.18547821044921875, + "step": 4531 + }, + { + "epoch": 0.6126911702577101, + "grad_norm": 1.081661581993103, + "learning_rate": 1.1099536556626984e-05, + "loss": 0.1681685447692871, + "step": 4532 + }, + { + "epoch": 0.6128263624841571, + "grad_norm": 0.8565942049026489, + "learning_rate": 1.1092927980773269e-05, + "loss": 0.15418148040771484, + "step": 4533 + }, + { + "epoch": 0.6129615547106041, + "grad_norm": 1.021229863166809, + "learning_rate": 1.1086320218392777e-05, + "loss": 0.19008445739746094, + "step": 4534 + }, + { + "epoch": 0.6130967469370511, + "grad_norm": 2.2541840076446533, + "learning_rate": 1.1079713270861286e-05, + "loss": 0.20617389678955078, + "step": 4535 + }, + { + "epoch": 0.613231939163498, + "grad_norm": 1.0166759490966797, + "learning_rate": 1.1073107139554395e-05, + "loss": 0.1794281005859375, + "step": 4536 + }, + { + "epoch": 0.613367131389945, + "grad_norm": 1.092777967453003, + "learning_rate": 1.1066501825847545e-05, + "loss": 0.1987934112548828, + "step": 4537 + }, + { + "epoch": 0.613502323616392, + "grad_norm": 0.8734856247901917, + "learning_rate": 1.1059897331115985e-05, + "loss": 0.15904521942138672, + "step": 4538 + }, + { + "epoch": 0.613637515842839, + "grad_norm": 1.6738489866256714, + "learning_rate": 1.1053293656734816e-05, + "loss": 0.1717853546142578, + "step": 4539 + }, + { + "epoch": 0.613772708069286, + "grad_norm": 0.768803060054779, + "learning_rate": 1.1046690804078949e-05, + "loss": 0.14902973175048828, + "step": 4540 + }, + { + "epoch": 0.613907900295733, + "grad_norm": 2.584709882736206, + "learning_rate": 1.1040088774523139e-05, + "loss": 0.17228436470031738, + "step": 4541 + }, + { + "epoch": 0.6140430925221799, + "grad_norm": 0.5402894616127014, + "learning_rate": 1.1033487569441971e-05, + "loss": 0.07872390747070312, + "step": 4542 + }, + { + "epoch": 0.6141782847486269, + "grad_norm": 0.8832460045814514, + "learning_rate": 1.1026887190209834e-05, + "loss": 0.1846160888671875, + "step": 4543 + }, + { + "epoch": 0.6143134769750739, + "grad_norm": 1.4036839008331299, + "learning_rate": 1.1020287638200977e-05, + "loss": 0.12274360656738281, + "step": 4544 + }, + { + "epoch": 0.6144486692015209, + "grad_norm": 2.664909601211548, + "learning_rate": 1.1013688914789452e-05, + "loss": 0.2253737449645996, + "step": 4545 + }, + { + "epoch": 0.6145838614279678, + "grad_norm": 1.306464433670044, + "learning_rate": 1.100709102134915e-05, + "loss": 0.19986629486083984, + "step": 4546 + }, + { + "epoch": 0.6147190536544149, + "grad_norm": 1.1906028985977173, + "learning_rate": 1.10004939592538e-05, + "loss": 0.18352508544921875, + "step": 4547 + }, + { + "epoch": 0.6148542458808619, + "grad_norm": 1.2507046461105347, + "learning_rate": 1.0993897729876927e-05, + "loss": 0.14834576845169067, + "step": 4548 + }, + { + "epoch": 0.6149894381073089, + "grad_norm": 1.3494844436645508, + "learning_rate": 1.0987302334591915e-05, + "loss": 0.21949195861816406, + "step": 4549 + }, + { + "epoch": 0.6151246303337559, + "grad_norm": 1.3425730466842651, + "learning_rate": 1.098070777477195e-05, + "loss": 0.13867855072021484, + "step": 4550 + }, + { + "epoch": 0.6152598225602028, + "grad_norm": 0.8596582412719727, + "learning_rate": 1.0974114051790067e-05, + "loss": 0.18651771545410156, + "step": 4551 + }, + { + "epoch": 0.6153950147866498, + "grad_norm": 0.7119019031524658, + "learning_rate": 1.09675211670191e-05, + "loss": 0.13357830047607422, + "step": 4552 + }, + { + "epoch": 0.6155302070130968, + "grad_norm": 1.2550837993621826, + "learning_rate": 1.0960929121831732e-05, + "loss": 0.18177032470703125, + "step": 4553 + }, + { + "epoch": 0.6156653992395438, + "grad_norm": 1.6899585723876953, + "learning_rate": 1.095433791760046e-05, + "loss": 0.19745254516601562, + "step": 4554 + }, + { + "epoch": 0.6158005914659908, + "grad_norm": 1.235771894454956, + "learning_rate": 1.0947747555697609e-05, + "loss": 0.1887187957763672, + "step": 4555 + }, + { + "epoch": 0.6159357836924377, + "grad_norm": 0.8981272578239441, + "learning_rate": 1.0941158037495328e-05, + "loss": 0.1479482650756836, + "step": 4556 + }, + { + "epoch": 0.6160709759188847, + "grad_norm": 1.0570513010025024, + "learning_rate": 1.0934569364365583e-05, + "loss": 0.1391596794128418, + "step": 4557 + }, + { + "epoch": 0.6162061681453317, + "grad_norm": 1.5715014934539795, + "learning_rate": 1.0927981537680176e-05, + "loss": 0.220855712890625, + "step": 4558 + }, + { + "epoch": 0.6163413603717787, + "grad_norm": 1.3209017515182495, + "learning_rate": 1.0921394558810726e-05, + "loss": 0.18042564392089844, + "step": 4559 + }, + { + "epoch": 0.6164765525982256, + "grad_norm": 1.069432020187378, + "learning_rate": 1.0914808429128688e-05, + "loss": 0.15283775329589844, + "step": 4560 + }, + { + "epoch": 0.6166117448246726, + "grad_norm": 1.0885248184204102, + "learning_rate": 1.0908223150005315e-05, + "loss": 0.1771860122680664, + "step": 4561 + }, + { + "epoch": 0.6167469370511196, + "grad_norm": 1.6780335903167725, + "learning_rate": 1.09016387228117e-05, + "loss": 0.1519308090209961, + "step": 4562 + }, + { + "epoch": 0.6168821292775666, + "grad_norm": 1.3289042711257935, + "learning_rate": 1.0895055148918758e-05, + "loss": 0.13169002532958984, + "step": 4563 + }, + { + "epoch": 0.6170173215040136, + "grad_norm": 0.828385591506958, + "learning_rate": 1.0888472429697223e-05, + "loss": 0.12916278839111328, + "step": 4564 + }, + { + "epoch": 0.6171525137304605, + "grad_norm": 1.1332522630691528, + "learning_rate": 1.088189056651766e-05, + "loss": 0.19527626037597656, + "step": 4565 + }, + { + "epoch": 0.6172877059569075, + "grad_norm": 1.0133165121078491, + "learning_rate": 1.0875309560750438e-05, + "loss": 0.1661849021911621, + "step": 4566 + }, + { + "epoch": 0.6174228981833545, + "grad_norm": 0.9010225534439087, + "learning_rate": 1.086872941376576e-05, + "loss": 0.19013690948486328, + "step": 4567 + }, + { + "epoch": 0.6175580904098015, + "grad_norm": 2.1900336742401123, + "learning_rate": 1.0862150126933648e-05, + "loss": 0.20239639282226562, + "step": 4568 + }, + { + "epoch": 0.6176932826362485, + "grad_norm": 1.2862699031829834, + "learning_rate": 1.0855571701623942e-05, + "loss": 0.15459442138671875, + "step": 4569 + }, + { + "epoch": 0.6178284748626954, + "grad_norm": 2.8855538368225098, + "learning_rate": 1.0848994139206317e-05, + "loss": 0.20385360717773438, + "step": 4570 + }, + { + "epoch": 0.6179636670891424, + "grad_norm": 2.246278762817383, + "learning_rate": 1.0842417441050247e-05, + "loss": 0.17681121826171875, + "step": 4571 + }, + { + "epoch": 0.6180988593155894, + "grad_norm": 1.1702877283096313, + "learning_rate": 1.0835841608525031e-05, + "loss": 0.16691112518310547, + "step": 4572 + }, + { + "epoch": 0.6182340515420364, + "grad_norm": 1.3400757312774658, + "learning_rate": 1.08292666429998e-05, + "loss": 0.18357467651367188, + "step": 4573 + }, + { + "epoch": 0.6183692437684833, + "grad_norm": 1.0004241466522217, + "learning_rate": 1.08226925458435e-05, + "loss": 0.15882110595703125, + "step": 4574 + }, + { + "epoch": 0.6185044359949303, + "grad_norm": 0.7759889364242554, + "learning_rate": 1.0816119318424882e-05, + "loss": 0.13988018035888672, + "step": 4575 + }, + { + "epoch": 0.6186396282213773, + "grad_norm": 1.545602560043335, + "learning_rate": 1.0809546962112535e-05, + "loss": 0.1817493438720703, + "step": 4576 + }, + { + "epoch": 0.6187748204478243, + "grad_norm": 0.690873920917511, + "learning_rate": 1.0802975478274856e-05, + "loss": 0.15880346298217773, + "step": 4577 + }, + { + "epoch": 0.6189100126742713, + "grad_norm": 0.995508074760437, + "learning_rate": 1.0796404868280062e-05, + "loss": 0.15073108673095703, + "step": 4578 + }, + { + "epoch": 0.6190452049007182, + "grad_norm": 1.0737624168395996, + "learning_rate": 1.07898351334962e-05, + "loss": 0.188201904296875, + "step": 4579 + }, + { + "epoch": 0.6191803971271652, + "grad_norm": 1.2057877779006958, + "learning_rate": 1.0783266275291103e-05, + "loss": 0.1840190887451172, + "step": 4580 + }, + { + "epoch": 0.6193155893536122, + "grad_norm": 1.266737937927246, + "learning_rate": 1.077669829503246e-05, + "loss": 0.1401052474975586, + "step": 4581 + }, + { + "epoch": 0.6194507815800592, + "grad_norm": 2.0364530086517334, + "learning_rate": 1.077013119408775e-05, + "loss": 0.207000732421875, + "step": 4582 + }, + { + "epoch": 0.6195859738065062, + "grad_norm": 0.8128786683082581, + "learning_rate": 1.0763564973824289e-05, + "loss": 0.1413872241973877, + "step": 4583 + }, + { + "epoch": 0.6197211660329531, + "grad_norm": 1.1461583375930786, + "learning_rate": 1.0756999635609185e-05, + "loss": 0.16029882431030273, + "step": 4584 + }, + { + "epoch": 0.6198563582594001, + "grad_norm": 1.1196434497833252, + "learning_rate": 1.0750435180809381e-05, + "loss": 0.13834571838378906, + "step": 4585 + }, + { + "epoch": 0.6199915504858471, + "grad_norm": 0.9671342372894287, + "learning_rate": 1.074387161079164e-05, + "loss": 0.15489912033081055, + "step": 4586 + }, + { + "epoch": 0.6201267427122941, + "grad_norm": 0.6395117044448853, + "learning_rate": 1.0737308926922521e-05, + "loss": 0.12819957733154297, + "step": 4587 + }, + { + "epoch": 0.620261934938741, + "grad_norm": 0.5339891314506531, + "learning_rate": 1.0730747130568424e-05, + "loss": 0.09637451171875, + "step": 4588 + }, + { + "epoch": 0.620397127165188, + "grad_norm": 0.9782006740570068, + "learning_rate": 1.0724186223095532e-05, + "loss": 0.14755463600158691, + "step": 4589 + }, + { + "epoch": 0.620532319391635, + "grad_norm": 0.717842698097229, + "learning_rate": 1.071762620586987e-05, + "loss": 0.12798690795898438, + "step": 4590 + }, + { + "epoch": 0.620667511618082, + "grad_norm": 0.6801381707191467, + "learning_rate": 1.0711067080257273e-05, + "loss": 0.12725210189819336, + "step": 4591 + }, + { + "epoch": 0.620802703844529, + "grad_norm": 1.1541473865509033, + "learning_rate": 1.0704508847623374e-05, + "loss": 0.16517877578735352, + "step": 4592 + }, + { + "epoch": 0.6209378960709759, + "grad_norm": 1.15793776512146, + "learning_rate": 1.069795150933365e-05, + "loss": 0.16428565979003906, + "step": 4593 + }, + { + "epoch": 0.6210730882974229, + "grad_norm": 1.8597896099090576, + "learning_rate": 1.0691395066753357e-05, + "loss": 0.1530294418334961, + "step": 4594 + }, + { + "epoch": 0.6212082805238699, + "grad_norm": 1.3026847839355469, + "learning_rate": 1.0684839521247584e-05, + "loss": 0.1327953338623047, + "step": 4595 + }, + { + "epoch": 0.6213434727503169, + "grad_norm": 2.3333561420440674, + "learning_rate": 1.0678284874181234e-05, + "loss": 0.19743061065673828, + "step": 4596 + }, + { + "epoch": 0.6214786649767639, + "grad_norm": 1.294751763343811, + "learning_rate": 1.0671731126919028e-05, + "loss": 0.1623539924621582, + "step": 4597 + }, + { + "epoch": 0.6216138572032108, + "grad_norm": 1.0483520030975342, + "learning_rate": 1.066517828082548e-05, + "loss": 0.18813586235046387, + "step": 4598 + }, + { + "epoch": 0.6217490494296578, + "grad_norm": 1.2937839031219482, + "learning_rate": 1.0658626337264926e-05, + "loss": 0.18982410430908203, + "step": 4599 + }, + { + "epoch": 0.6218842416561048, + "grad_norm": 1.4427005052566528, + "learning_rate": 1.0652075297601518e-05, + "loss": 0.1804485321044922, + "step": 4600 + }, + { + "epoch": 0.6220194338825518, + "grad_norm": 1.1247267723083496, + "learning_rate": 1.0645525163199222e-05, + "loss": 0.17714977264404297, + "step": 4601 + }, + { + "epoch": 0.6221546261089987, + "grad_norm": 0.7889978885650635, + "learning_rate": 1.063897593542181e-05, + "loss": 0.13114452362060547, + "step": 4602 + }, + { + "epoch": 0.6222898183354457, + "grad_norm": 1.139345407485962, + "learning_rate": 1.0632427615632864e-05, + "loss": 0.15349483489990234, + "step": 4603 + }, + { + "epoch": 0.6224250105618927, + "grad_norm": 1.434650182723999, + "learning_rate": 1.0625880205195776e-05, + "loss": 0.1331329345703125, + "step": 4604 + }, + { + "epoch": 0.6225602027883397, + "grad_norm": 1.0262975692749023, + "learning_rate": 1.0619333705473754e-05, + "loss": 0.1393299102783203, + "step": 4605 + }, + { + "epoch": 0.6226953950147867, + "grad_norm": 1.4420390129089355, + "learning_rate": 1.0612788117829821e-05, + "loss": 0.18594121932983398, + "step": 4606 + }, + { + "epoch": 0.6228305872412336, + "grad_norm": 1.669246792793274, + "learning_rate": 1.0606243443626792e-05, + "loss": 0.17644691467285156, + "step": 4607 + }, + { + "epoch": 0.6229657794676806, + "grad_norm": 0.8634865283966064, + "learning_rate": 1.0599699684227313e-05, + "loss": 0.12969398498535156, + "step": 4608 + }, + { + "epoch": 0.6231009716941276, + "grad_norm": 1.1777290105819702, + "learning_rate": 1.0593156840993818e-05, + "loss": 0.1725749969482422, + "step": 4609 + }, + { + "epoch": 0.6232361639205746, + "grad_norm": 0.7007025480270386, + "learning_rate": 1.0586614915288571e-05, + "loss": 0.11710739135742188, + "step": 4610 + }, + { + "epoch": 0.6233713561470215, + "grad_norm": 0.9938804507255554, + "learning_rate": 1.0580073908473641e-05, + "loss": 0.16475486755371094, + "step": 4611 + }, + { + "epoch": 0.6235065483734685, + "grad_norm": 1.4000250101089478, + "learning_rate": 1.0573533821910885e-05, + "loss": 0.15743255615234375, + "step": 4612 + }, + { + "epoch": 0.6236417405999155, + "grad_norm": 1.3600869178771973, + "learning_rate": 1.0566994656961997e-05, + "loss": 0.16325855255126953, + "step": 4613 + }, + { + "epoch": 0.6237769328263625, + "grad_norm": 1.289290428161621, + "learning_rate": 1.0560456414988456e-05, + "loss": 0.2019367218017578, + "step": 4614 + }, + { + "epoch": 0.6239121250528095, + "grad_norm": 2.0171430110931396, + "learning_rate": 1.0553919097351564e-05, + "loss": 0.15810012817382812, + "step": 4615 + }, + { + "epoch": 0.6240473172792564, + "grad_norm": 1.670272946357727, + "learning_rate": 1.0547382705412434e-05, + "loss": 0.16502857208251953, + "step": 4616 + }, + { + "epoch": 0.6241825095057034, + "grad_norm": 1.7739546298980713, + "learning_rate": 1.054084724053196e-05, + "loss": 0.21545028686523438, + "step": 4617 + }, + { + "epoch": 0.6243177017321504, + "grad_norm": 1.5325731039047241, + "learning_rate": 1.0534312704070875e-05, + "loss": 0.1570906639099121, + "step": 4618 + }, + { + "epoch": 0.6244528939585974, + "grad_norm": 1.1488697528839111, + "learning_rate": 1.0527779097389695e-05, + "loss": 0.19235801696777344, + "step": 4619 + }, + { + "epoch": 0.6245880861850444, + "grad_norm": 1.7705705165863037, + "learning_rate": 1.0521246421848762e-05, + "loss": 0.22964859008789062, + "step": 4620 + }, + { + "epoch": 0.6247232784114913, + "grad_norm": 0.9314525127410889, + "learning_rate": 1.0514714678808202e-05, + "loss": 0.18545055389404297, + "step": 4621 + }, + { + "epoch": 0.6248584706379383, + "grad_norm": 1.6340751647949219, + "learning_rate": 1.0508183869627962e-05, + "loss": 0.16865485906600952, + "step": 4622 + }, + { + "epoch": 0.6249936628643853, + "grad_norm": 1.7807961702346802, + "learning_rate": 1.0501653995667798e-05, + "loss": 0.16479110717773438, + "step": 4623 + }, + { + "epoch": 0.6251288550908323, + "grad_norm": 1.5432101488113403, + "learning_rate": 1.0495125058287258e-05, + "loss": 0.23844242095947266, + "step": 4624 + }, + { + "epoch": 0.6252640473172792, + "grad_norm": 1.0514414310455322, + "learning_rate": 1.0488597058845708e-05, + "loss": 0.1761188507080078, + "step": 4625 + }, + { + "epoch": 0.6253992395437262, + "grad_norm": 1.1294559240341187, + "learning_rate": 1.0482069998702304e-05, + "loss": 0.18339115381240845, + "step": 4626 + }, + { + "epoch": 0.6255344317701732, + "grad_norm": 0.8533125519752502, + "learning_rate": 1.0475543879216017e-05, + "loss": 0.17749595642089844, + "step": 4627 + }, + { + "epoch": 0.6256696239966202, + "grad_norm": 1.6903786659240723, + "learning_rate": 1.0469018701745626e-05, + "loss": 0.21721458435058594, + "step": 4628 + }, + { + "epoch": 0.6258048162230672, + "grad_norm": 1.4242613315582275, + "learning_rate": 1.0462494467649704e-05, + "loss": 0.196502685546875, + "step": 4629 + }, + { + "epoch": 0.6259400084495141, + "grad_norm": 0.8546478152275085, + "learning_rate": 1.045597117828663e-05, + "loss": 0.13039493560791016, + "step": 4630 + }, + { + "epoch": 0.6260752006759611, + "grad_norm": 0.9507201313972473, + "learning_rate": 1.0449448835014586e-05, + "loss": 0.16056489944458008, + "step": 4631 + }, + { + "epoch": 0.6262103929024081, + "grad_norm": 0.7374429106712341, + "learning_rate": 1.044292743919156e-05, + "loss": 0.1510152816772461, + "step": 4632 + }, + { + "epoch": 0.6263455851288551, + "grad_norm": 0.72164386510849, + "learning_rate": 1.0436406992175343e-05, + "loss": 0.12612628936767578, + "step": 4633 + }, + { + "epoch": 0.626480777355302, + "grad_norm": 1.7215973138809204, + "learning_rate": 1.0429887495323532e-05, + "loss": 0.13476836681365967, + "step": 4634 + }, + { + "epoch": 0.626615969581749, + "grad_norm": 1.4045487642288208, + "learning_rate": 1.0423368949993512e-05, + "loss": 0.1385326385498047, + "step": 4635 + }, + { + "epoch": 0.626751161808196, + "grad_norm": 1.3238190412521362, + "learning_rate": 1.041685135754248e-05, + "loss": 0.20109033584594727, + "step": 4636 + }, + { + "epoch": 0.626886354034643, + "grad_norm": 0.9641658067703247, + "learning_rate": 1.0410334719327435e-05, + "loss": 0.1602001190185547, + "step": 4637 + }, + { + "epoch": 0.62702154626109, + "grad_norm": 0.9967225790023804, + "learning_rate": 1.0403819036705177e-05, + "loss": 0.1710834503173828, + "step": 4638 + }, + { + "epoch": 0.627156738487537, + "grad_norm": 1.0205364227294922, + "learning_rate": 1.0397304311032311e-05, + "loss": 0.1914135217666626, + "step": 4639 + }, + { + "epoch": 0.6272919307139839, + "grad_norm": 1.4185870885849, + "learning_rate": 1.039079054366523e-05, + "loss": 0.15804290771484375, + "step": 4640 + }, + { + "epoch": 0.6274271229404309, + "grad_norm": 1.4510555267333984, + "learning_rate": 1.0384277735960133e-05, + "loss": 0.23080825805664062, + "step": 4641 + }, + { + "epoch": 0.6275623151668779, + "grad_norm": 1.5419342517852783, + "learning_rate": 1.0377765889273025e-05, + "loss": 0.17972993850708008, + "step": 4642 + }, + { + "epoch": 0.6276975073933249, + "grad_norm": 1.2421633005142212, + "learning_rate": 1.0371255004959715e-05, + "loss": 0.19145584106445312, + "step": 4643 + }, + { + "epoch": 0.6278326996197718, + "grad_norm": 1.2397582530975342, + "learning_rate": 1.036474508437579e-05, + "loss": 0.12718677520751953, + "step": 4644 + }, + { + "epoch": 0.6279678918462188, + "grad_norm": 1.7837036848068237, + "learning_rate": 1.035823612887666e-05, + "loss": 0.1256704330444336, + "step": 4645 + }, + { + "epoch": 0.6281030840726658, + "grad_norm": 0.6432226300239563, + "learning_rate": 1.0351728139817517e-05, + "loss": 0.11899089813232422, + "step": 4646 + }, + { + "epoch": 0.6282382762991128, + "grad_norm": 1.33556067943573, + "learning_rate": 1.0345221118553362e-05, + "loss": 0.19678497314453125, + "step": 4647 + }, + { + "epoch": 0.6283734685255598, + "grad_norm": 1.135422706604004, + "learning_rate": 1.0338715066439002e-05, + "loss": 0.1705026626586914, + "step": 4648 + }, + { + "epoch": 0.6285086607520067, + "grad_norm": 1.9654052257537842, + "learning_rate": 1.0332209984829013e-05, + "loss": 0.17806053161621094, + "step": 4649 + }, + { + "epoch": 0.6286438529784537, + "grad_norm": 0.8713955879211426, + "learning_rate": 1.03257058750778e-05, + "loss": 0.19050979614257812, + "step": 4650 + }, + { + "epoch": 0.6287790452049007, + "grad_norm": 0.8905146718025208, + "learning_rate": 1.0319202738539548e-05, + "loss": 0.14739418029785156, + "step": 4651 + }, + { + "epoch": 0.6289142374313477, + "grad_norm": 2.4853031635284424, + "learning_rate": 1.0312700576568253e-05, + "loss": 0.2068653106689453, + "step": 4652 + }, + { + "epoch": 0.6290494296577946, + "grad_norm": 0.8443086743354797, + "learning_rate": 1.0306199390517688e-05, + "loss": 0.13300514221191406, + "step": 4653 + }, + { + "epoch": 0.6291846218842416, + "grad_norm": 2.0395450592041016, + "learning_rate": 1.0299699181741439e-05, + "loss": 0.15018737316131592, + "step": 4654 + }, + { + "epoch": 0.6293198141106886, + "grad_norm": 2.7070350646972656, + "learning_rate": 1.0293199951592889e-05, + "loss": 0.21428871154785156, + "step": 4655 + }, + { + "epoch": 0.6294550063371356, + "grad_norm": 2.3074026107788086, + "learning_rate": 1.0286701701425206e-05, + "loss": 0.20762348175048828, + "step": 4656 + }, + { + "epoch": 0.6295901985635826, + "grad_norm": 2.9708187580108643, + "learning_rate": 1.0280204432591369e-05, + "loss": 0.25496482849121094, + "step": 4657 + }, + { + "epoch": 0.6297253907900295, + "grad_norm": 1.287521481513977, + "learning_rate": 1.0273708146444133e-05, + "loss": 0.20575332641601562, + "step": 4658 + }, + { + "epoch": 0.6298605830164765, + "grad_norm": 1.2072495222091675, + "learning_rate": 1.0267212844336062e-05, + "loss": 0.1679697036743164, + "step": 4659 + }, + { + "epoch": 0.6299957752429235, + "grad_norm": 0.9251106381416321, + "learning_rate": 1.026071852761952e-05, + "loss": 0.18853187561035156, + "step": 4660 + }, + { + "epoch": 0.6301309674693705, + "grad_norm": 1.6173293590545654, + "learning_rate": 1.025422519764665e-05, + "loss": 0.2219095230102539, + "step": 4661 + }, + { + "epoch": 0.6302661596958175, + "grad_norm": 1.0239653587341309, + "learning_rate": 1.024773285576941e-05, + "loss": 0.1710376739501953, + "step": 4662 + }, + { + "epoch": 0.6304013519222644, + "grad_norm": 0.6747888922691345, + "learning_rate": 1.0241241503339524e-05, + "loss": 0.13608551025390625, + "step": 4663 + }, + { + "epoch": 0.6305365441487114, + "grad_norm": 0.7485164999961853, + "learning_rate": 1.023475114170853e-05, + "loss": 0.13349580764770508, + "step": 4664 + }, + { + "epoch": 0.6306717363751584, + "grad_norm": 0.8306536078453064, + "learning_rate": 1.0228261772227768e-05, + "loss": 0.17373108863830566, + "step": 4665 + }, + { + "epoch": 0.6308069286016054, + "grad_norm": 1.2872484922409058, + "learning_rate": 1.0221773396248349e-05, + "loss": 0.20141887664794922, + "step": 4666 + }, + { + "epoch": 0.6309421208280523, + "grad_norm": 1.0703446865081787, + "learning_rate": 1.021528601512119e-05, + "loss": 0.18709754943847656, + "step": 4667 + }, + { + "epoch": 0.6310773130544993, + "grad_norm": 1.3038949966430664, + "learning_rate": 1.0208799630196994e-05, + "loss": 0.1746535301208496, + "step": 4668 + }, + { + "epoch": 0.6312125052809463, + "grad_norm": 0.9164425134658813, + "learning_rate": 1.0202314242826264e-05, + "loss": 0.1842174530029297, + "step": 4669 + }, + { + "epoch": 0.6313476975073933, + "grad_norm": 1.1847476959228516, + "learning_rate": 1.0195829854359299e-05, + "loss": 0.18784749507904053, + "step": 4670 + }, + { + "epoch": 0.6314828897338403, + "grad_norm": 0.886667788028717, + "learning_rate": 1.0189346466146175e-05, + "loss": 0.15140533447265625, + "step": 4671 + }, + { + "epoch": 0.6316180819602872, + "grad_norm": 1.0732970237731934, + "learning_rate": 1.018286407953677e-05, + "loss": 0.16356849670410156, + "step": 4672 + }, + { + "epoch": 0.6317532741867342, + "grad_norm": 1.7637754678726196, + "learning_rate": 1.017638269588075e-05, + "loss": 0.16754722595214844, + "step": 4673 + }, + { + "epoch": 0.6318884664131812, + "grad_norm": 1.2803517580032349, + "learning_rate": 1.0169902316527575e-05, + "loss": 0.11214661598205566, + "step": 4674 + }, + { + "epoch": 0.6320236586396282, + "grad_norm": 2.3433589935302734, + "learning_rate": 1.0163422942826502e-05, + "loss": 0.2077770233154297, + "step": 4675 + }, + { + "epoch": 0.6321588508660752, + "grad_norm": 1.4271533489227295, + "learning_rate": 1.0156944576126555e-05, + "loss": 0.19047927856445312, + "step": 4676 + }, + { + "epoch": 0.6322940430925221, + "grad_norm": 0.8583289980888367, + "learning_rate": 1.0150467217776579e-05, + "loss": 0.1617131233215332, + "step": 4677 + }, + { + "epoch": 0.6324292353189691, + "grad_norm": 0.7472401857376099, + "learning_rate": 1.0143990869125185e-05, + "loss": 0.1176985502243042, + "step": 4678 + }, + { + "epoch": 0.6325644275454161, + "grad_norm": 1.6210994720458984, + "learning_rate": 1.013751553152079e-05, + "loss": 0.18180227279663086, + "step": 4679 + }, + { + "epoch": 0.6326996197718631, + "grad_norm": 1.6664046049118042, + "learning_rate": 1.0131041206311594e-05, + "loss": 0.13129043579101562, + "step": 4680 + }, + { + "epoch": 0.63283481199831, + "grad_norm": 1.600602626800537, + "learning_rate": 1.0124567894845578e-05, + "loss": 0.1798548698425293, + "step": 4681 + }, + { + "epoch": 0.632970004224757, + "grad_norm": 1.4209963083267212, + "learning_rate": 1.0118095598470528e-05, + "loss": 0.15433979034423828, + "step": 4682 + }, + { + "epoch": 0.6331051964512041, + "grad_norm": 1.3669917583465576, + "learning_rate": 1.0111624318534006e-05, + "loss": 0.18619203567504883, + "step": 4683 + }, + { + "epoch": 0.6332403886776511, + "grad_norm": 0.7033229470252991, + "learning_rate": 1.0105154056383377e-05, + "loss": 0.13695907592773438, + "step": 4684 + }, + { + "epoch": 0.6333755809040981, + "grad_norm": 2.507122755050659, + "learning_rate": 1.0098684813365764e-05, + "loss": 0.23016929626464844, + "step": 4685 + }, + { + "epoch": 0.633510773130545, + "grad_norm": 2.0399060249328613, + "learning_rate": 1.0092216590828115e-05, + "loss": 0.20401859283447266, + "step": 4686 + }, + { + "epoch": 0.633645965356992, + "grad_norm": 2.3903968334198, + "learning_rate": 1.0085749390117146e-05, + "loss": 0.17146015167236328, + "step": 4687 + }, + { + "epoch": 0.633781157583439, + "grad_norm": 1.0674290657043457, + "learning_rate": 1.0079283212579354e-05, + "loss": 0.17020606994628906, + "step": 4688 + }, + { + "epoch": 0.633916349809886, + "grad_norm": 1.0933576822280884, + "learning_rate": 1.0072818059561045e-05, + "loss": 0.19450807571411133, + "step": 4689 + }, + { + "epoch": 0.634051542036333, + "grad_norm": 1.3950855731964111, + "learning_rate": 1.0066353932408285e-05, + "loss": 0.2066326141357422, + "step": 4690 + }, + { + "epoch": 0.6341867342627799, + "grad_norm": 0.7375941872596741, + "learning_rate": 1.0059890832466948e-05, + "loss": 0.1607685089111328, + "step": 4691 + }, + { + "epoch": 0.6343219264892269, + "grad_norm": 0.872260570526123, + "learning_rate": 1.0053428761082684e-05, + "loss": 0.16034507751464844, + "step": 4692 + }, + { + "epoch": 0.6344571187156739, + "grad_norm": 1.213697910308838, + "learning_rate": 1.0046967719600927e-05, + "loss": 0.158735990524292, + "step": 4693 + }, + { + "epoch": 0.6345923109421209, + "grad_norm": 0.8235481977462769, + "learning_rate": 1.0040507709366912e-05, + "loss": 0.14668965339660645, + "step": 4694 + }, + { + "epoch": 0.6347275031685679, + "grad_norm": 1.2809909582138062, + "learning_rate": 1.0034048731725631e-05, + "loss": 0.19062519073486328, + "step": 4695 + }, + { + "epoch": 0.6348626953950148, + "grad_norm": 0.9800539612770081, + "learning_rate": 1.0027590788021886e-05, + "loss": 0.13780784606933594, + "step": 4696 + }, + { + "epoch": 0.6349978876214618, + "grad_norm": 1.1619529724121094, + "learning_rate": 1.0021133879600258e-05, + "loss": 0.21240901947021484, + "step": 4697 + }, + { + "epoch": 0.6351330798479088, + "grad_norm": 0.5779402852058411, + "learning_rate": 1.0014678007805108e-05, + "loss": 0.12523365020751953, + "step": 4698 + }, + { + "epoch": 0.6352682720743558, + "grad_norm": 1.340545892715454, + "learning_rate": 1.0008223173980579e-05, + "loss": 0.1750335693359375, + "step": 4699 + }, + { + "epoch": 0.6354034643008027, + "grad_norm": 1.4963045120239258, + "learning_rate": 1.0001769379470604e-05, + "loss": 0.1944108009338379, + "step": 4700 + }, + { + "epoch": 0.6355386565272497, + "grad_norm": 1.1744086742401123, + "learning_rate": 9.995316625618898e-06, + "loss": 0.16965961456298828, + "step": 4701 + }, + { + "epoch": 0.6356738487536967, + "grad_norm": 1.0566115379333496, + "learning_rate": 9.988864913768962e-06, + "loss": 0.1754918098449707, + "step": 4702 + }, + { + "epoch": 0.6358090409801437, + "grad_norm": 0.8660688400268555, + "learning_rate": 9.982414245264071e-06, + "loss": 0.11942672729492188, + "step": 4703 + }, + { + "epoch": 0.6359442332065907, + "grad_norm": 0.7203137874603271, + "learning_rate": 9.975964621447293e-06, + "loss": 0.13349008560180664, + "step": 4704 + }, + { + "epoch": 0.6360794254330376, + "grad_norm": 1.598728895187378, + "learning_rate": 9.96951604366147e-06, + "loss": 0.18683242797851562, + "step": 4705 + }, + { + "epoch": 0.6362146176594846, + "grad_norm": 0.7741524577140808, + "learning_rate": 9.963068513249233e-06, + "loss": 0.1319897174835205, + "step": 4706 + }, + { + "epoch": 0.6363498098859316, + "grad_norm": 0.8531058430671692, + "learning_rate": 9.956622031552996e-06, + "loss": 0.10056877136230469, + "step": 4707 + }, + { + "epoch": 0.6364850021123786, + "grad_norm": 0.8294493556022644, + "learning_rate": 9.950176599914942e-06, + "loss": 0.1370983123779297, + "step": 4708 + }, + { + "epoch": 0.6366201943388256, + "grad_norm": 1.1041579246520996, + "learning_rate": 9.943732219677048e-06, + "loss": 0.18423080444335938, + "step": 4709 + }, + { + "epoch": 0.6367553865652725, + "grad_norm": 1.2175341844558716, + "learning_rate": 9.93728889218107e-06, + "loss": 0.20542526245117188, + "step": 4710 + }, + { + "epoch": 0.6368905787917195, + "grad_norm": 0.9677608609199524, + "learning_rate": 9.930846618768543e-06, + "loss": 0.15720367431640625, + "step": 4711 + }, + { + "epoch": 0.6370257710181665, + "grad_norm": 0.9568702578544617, + "learning_rate": 9.924405400780784e-06, + "loss": 0.1775522232055664, + "step": 4712 + }, + { + "epoch": 0.6371609632446135, + "grad_norm": 0.7599431872367859, + "learning_rate": 9.917965239558885e-06, + "loss": 0.1610431671142578, + "step": 4713 + }, + { + "epoch": 0.6372961554710604, + "grad_norm": 0.9448076486587524, + "learning_rate": 9.911526136443726e-06, + "loss": 0.1650867462158203, + "step": 4714 + }, + { + "epoch": 0.6374313476975074, + "grad_norm": 1.0108996629714966, + "learning_rate": 9.905088092775956e-06, + "loss": 0.1370830535888672, + "step": 4715 + }, + { + "epoch": 0.6375665399239544, + "grad_norm": 0.9641760587692261, + "learning_rate": 9.898651109896015e-06, + "loss": 0.16497325897216797, + "step": 4716 + }, + { + "epoch": 0.6377017321504014, + "grad_norm": 0.9114315509796143, + "learning_rate": 9.892215189144123e-06, + "loss": 0.1178368330001831, + "step": 4717 + }, + { + "epoch": 0.6378369243768484, + "grad_norm": 0.8271059393882751, + "learning_rate": 9.88578033186026e-06, + "loss": 0.14463472366333008, + "step": 4718 + }, + { + "epoch": 0.6379721166032953, + "grad_norm": 1.1193820238113403, + "learning_rate": 9.879346539384207e-06, + "loss": 0.18987798690795898, + "step": 4719 + }, + { + "epoch": 0.6381073088297423, + "grad_norm": 1.1091747283935547, + "learning_rate": 9.87291381305551e-06, + "loss": 0.1528254747390747, + "step": 4720 + }, + { + "epoch": 0.6382425010561893, + "grad_norm": 1.3743292093276978, + "learning_rate": 9.866482154213502e-06, + "loss": 0.16873931884765625, + "step": 4721 + }, + { + "epoch": 0.6383776932826363, + "grad_norm": 1.1861999034881592, + "learning_rate": 9.86005156419728e-06, + "loss": 0.22124958038330078, + "step": 4722 + }, + { + "epoch": 0.6385128855090833, + "grad_norm": 1.2370994091033936, + "learning_rate": 9.853622044345732e-06, + "loss": 0.2176356315612793, + "step": 4723 + }, + { + "epoch": 0.6386480777355302, + "grad_norm": 1.1271567344665527, + "learning_rate": 9.847193595997522e-06, + "loss": 0.17400717735290527, + "step": 4724 + }, + { + "epoch": 0.6387832699619772, + "grad_norm": 1.503694772720337, + "learning_rate": 9.840766220491078e-06, + "loss": 0.24648284912109375, + "step": 4725 + }, + { + "epoch": 0.6389184621884242, + "grad_norm": 0.7546154260635376, + "learning_rate": 9.834339919164625e-06, + "loss": 0.1584264039993286, + "step": 4726 + }, + { + "epoch": 0.6390536544148712, + "grad_norm": 1.7379469871520996, + "learning_rate": 9.827914693356145e-06, + "loss": 0.21053314208984375, + "step": 4727 + }, + { + "epoch": 0.6391888466413181, + "grad_norm": 1.36887526512146, + "learning_rate": 9.821490544403403e-06, + "loss": 0.17522525787353516, + "step": 4728 + }, + { + "epoch": 0.6393240388677651, + "grad_norm": 3.0597784519195557, + "learning_rate": 9.815067473643951e-06, + "loss": 0.15152215957641602, + "step": 4729 + }, + { + "epoch": 0.6394592310942121, + "grad_norm": 1.0458091497421265, + "learning_rate": 9.808645482415097e-06, + "loss": 0.181732177734375, + "step": 4730 + }, + { + "epoch": 0.6395944233206591, + "grad_norm": 0.9721728563308716, + "learning_rate": 9.80222457205394e-06, + "loss": 0.16955041885375977, + "step": 4731 + }, + { + "epoch": 0.6397296155471061, + "grad_norm": 1.3634389638900757, + "learning_rate": 9.795804743897341e-06, + "loss": 0.23503875732421875, + "step": 4732 + }, + { + "epoch": 0.639864807773553, + "grad_norm": 0.8009504675865173, + "learning_rate": 9.789385999281948e-06, + "loss": 0.16812801361083984, + "step": 4733 + }, + { + "epoch": 0.64, + "grad_norm": 0.45024386048316956, + "learning_rate": 9.782968339544179e-06, + "loss": 0.10645103454589844, + "step": 4734 + }, + { + "epoch": 0.640135192226447, + "grad_norm": 1.0312482118606567, + "learning_rate": 9.776551766020219e-06, + "loss": 0.14143896102905273, + "step": 4735 + }, + { + "epoch": 0.640270384452894, + "grad_norm": 0.6227663159370422, + "learning_rate": 9.77013628004604e-06, + "loss": 0.12190437316894531, + "step": 4736 + }, + { + "epoch": 0.640405576679341, + "grad_norm": 0.8853806853294373, + "learning_rate": 9.763721882957371e-06, + "loss": 0.16942214965820312, + "step": 4737 + }, + { + "epoch": 0.6405407689057879, + "grad_norm": 1.0890270471572876, + "learning_rate": 9.757308576089732e-06, + "loss": 0.1676945686340332, + "step": 4738 + }, + { + "epoch": 0.6406759611322349, + "grad_norm": 0.8477455973625183, + "learning_rate": 9.750896360778404e-06, + "loss": 0.14232635498046875, + "step": 4739 + }, + { + "epoch": 0.6408111533586819, + "grad_norm": 1.1622205972671509, + "learning_rate": 9.744485238358448e-06, + "loss": 0.20201444625854492, + "step": 4740 + }, + { + "epoch": 0.6409463455851289, + "grad_norm": 0.91047203540802, + "learning_rate": 9.73807521016469e-06, + "loss": 0.1355438232421875, + "step": 4741 + }, + { + "epoch": 0.6410815378115758, + "grad_norm": 0.7997440695762634, + "learning_rate": 9.731666277531732e-06, + "loss": 0.15801620483398438, + "step": 4742 + }, + { + "epoch": 0.6412167300380228, + "grad_norm": 1.0661485195159912, + "learning_rate": 9.725258441793947e-06, + "loss": 0.1851511001586914, + "step": 4743 + }, + { + "epoch": 0.6413519222644698, + "grad_norm": 0.7698494791984558, + "learning_rate": 9.71885170428549e-06, + "loss": 0.15169906616210938, + "step": 4744 + }, + { + "epoch": 0.6414871144909168, + "grad_norm": 0.5379022359848022, + "learning_rate": 9.712446066340265e-06, + "loss": 0.11092925071716309, + "step": 4745 + }, + { + "epoch": 0.6416223067173638, + "grad_norm": 1.1444133520126343, + "learning_rate": 9.70604152929197e-06, + "loss": 0.1769847869873047, + "step": 4746 + }, + { + "epoch": 0.6417574989438107, + "grad_norm": 1.4192321300506592, + "learning_rate": 9.699638094474054e-06, + "loss": 0.222747802734375, + "step": 4747 + }, + { + "epoch": 0.6418926911702577, + "grad_norm": 2.2647628784179688, + "learning_rate": 9.693235763219752e-06, + "loss": 0.16417694091796875, + "step": 4748 + }, + { + "epoch": 0.6420278833967047, + "grad_norm": 1.2556986808776855, + "learning_rate": 9.68683453686207e-06, + "loss": 0.21502017974853516, + "step": 4749 + }, + { + "epoch": 0.6421630756231517, + "grad_norm": 0.8340602517127991, + "learning_rate": 9.680434416733763e-06, + "loss": 0.14348602294921875, + "step": 4750 + }, + { + "epoch": 0.6422982678495986, + "grad_norm": 1.0318548679351807, + "learning_rate": 9.674035404167381e-06, + "loss": 0.16250038146972656, + "step": 4751 + }, + { + "epoch": 0.6424334600760456, + "grad_norm": 1.3777258396148682, + "learning_rate": 9.66763750049523e-06, + "loss": 0.21093463897705078, + "step": 4752 + }, + { + "epoch": 0.6425686523024926, + "grad_norm": 0.8274357914924622, + "learning_rate": 9.66124070704939e-06, + "loss": 0.13401508331298828, + "step": 4753 + }, + { + "epoch": 0.6427038445289396, + "grad_norm": 0.9177843928337097, + "learning_rate": 9.654845025161699e-06, + "loss": 0.11472606658935547, + "step": 4754 + }, + { + "epoch": 0.6428390367553866, + "grad_norm": 0.9275671243667603, + "learning_rate": 9.648450456163777e-06, + "loss": 0.14565658569335938, + "step": 4755 + }, + { + "epoch": 0.6429742289818335, + "grad_norm": 2.507289171218872, + "learning_rate": 9.64205700138701e-06, + "loss": 0.2075939178466797, + "step": 4756 + }, + { + "epoch": 0.6431094212082805, + "grad_norm": 1.0546643733978271, + "learning_rate": 9.635664662162548e-06, + "loss": 0.21128082275390625, + "step": 4757 + }, + { + "epoch": 0.6432446134347275, + "grad_norm": 1.3004906177520752, + "learning_rate": 9.629273439821315e-06, + "loss": 0.2620086669921875, + "step": 4758 + }, + { + "epoch": 0.6433798056611745, + "grad_norm": 1.4010204076766968, + "learning_rate": 9.622883335693984e-06, + "loss": 0.22907114028930664, + "step": 4759 + }, + { + "epoch": 0.6435149978876215, + "grad_norm": 1.6909691095352173, + "learning_rate": 9.616494351111017e-06, + "loss": 0.17894935607910156, + "step": 4760 + }, + { + "epoch": 0.6436501901140684, + "grad_norm": 2.6781859397888184, + "learning_rate": 9.610106487402637e-06, + "loss": 0.19934654235839844, + "step": 4761 + }, + { + "epoch": 0.6437853823405154, + "grad_norm": 1.2084870338439941, + "learning_rate": 9.603719745898826e-06, + "loss": 0.23883819580078125, + "step": 4762 + }, + { + "epoch": 0.6439205745669624, + "grad_norm": 1.1885716915130615, + "learning_rate": 9.597334127929346e-06, + "loss": 0.18436622619628906, + "step": 4763 + }, + { + "epoch": 0.6440557667934094, + "grad_norm": 0.98525470495224, + "learning_rate": 9.590949634823707e-06, + "loss": 0.1514650583267212, + "step": 4764 + }, + { + "epoch": 0.6441909590198563, + "grad_norm": 0.7663836479187012, + "learning_rate": 9.584566267911198e-06, + "loss": 0.11879348754882812, + "step": 4765 + }, + { + "epoch": 0.6443261512463033, + "grad_norm": 0.8613570332527161, + "learning_rate": 9.578184028520874e-06, + "loss": 0.21599197387695312, + "step": 4766 + }, + { + "epoch": 0.6444613434727503, + "grad_norm": 1.2138065099716187, + "learning_rate": 9.571802917981548e-06, + "loss": 0.14101147651672363, + "step": 4767 + }, + { + "epoch": 0.6445965356991973, + "grad_norm": 1.5802611112594604, + "learning_rate": 9.565422937621798e-06, + "loss": 0.18843746185302734, + "step": 4768 + }, + { + "epoch": 0.6447317279256443, + "grad_norm": 1.049242377281189, + "learning_rate": 9.559044088769971e-06, + "loss": 0.13640689849853516, + "step": 4769 + }, + { + "epoch": 0.6448669201520912, + "grad_norm": 0.9434901475906372, + "learning_rate": 9.552666372754182e-06, + "loss": 0.10306429862976074, + "step": 4770 + }, + { + "epoch": 0.6450021123785382, + "grad_norm": 1.4389883279800415, + "learning_rate": 9.546289790902307e-06, + "loss": 0.16845756769180298, + "step": 4771 + }, + { + "epoch": 0.6451373046049852, + "grad_norm": 0.7540355324745178, + "learning_rate": 9.539914344541976e-06, + "loss": 0.12136662006378174, + "step": 4772 + }, + { + "epoch": 0.6452724968314322, + "grad_norm": 1.3766987323760986, + "learning_rate": 9.533540035000598e-06, + "loss": 0.15412092208862305, + "step": 4773 + }, + { + "epoch": 0.6454076890578792, + "grad_norm": 0.7467913627624512, + "learning_rate": 9.52716686360533e-06, + "loss": 0.19866180419921875, + "step": 4774 + }, + { + "epoch": 0.6455428812843261, + "grad_norm": 1.2792859077453613, + "learning_rate": 9.520794831683108e-06, + "loss": 0.180267333984375, + "step": 4775 + }, + { + "epoch": 0.6456780735107731, + "grad_norm": 0.7078832983970642, + "learning_rate": 9.514423940560627e-06, + "loss": 0.12905120849609375, + "step": 4776 + }, + { + "epoch": 0.6458132657372201, + "grad_norm": 1.7478652000427246, + "learning_rate": 9.508054191564326e-06, + "loss": 0.24469757080078125, + "step": 4777 + }, + { + "epoch": 0.6459484579636671, + "grad_norm": 1.5673394203186035, + "learning_rate": 9.501685586020434e-06, + "loss": 0.15714645385742188, + "step": 4778 + }, + { + "epoch": 0.646083650190114, + "grad_norm": 1.3585560321807861, + "learning_rate": 9.495318125254919e-06, + "loss": 0.21356773376464844, + "step": 4779 + }, + { + "epoch": 0.646218842416561, + "grad_norm": 1.7007721662521362, + "learning_rate": 9.488951810593527e-06, + "loss": 0.14565658569335938, + "step": 4780 + }, + { + "epoch": 0.646354034643008, + "grad_norm": 1.9243805408477783, + "learning_rate": 9.48258664336176e-06, + "loss": 0.1699962615966797, + "step": 4781 + }, + { + "epoch": 0.646489226869455, + "grad_norm": 0.8874245285987854, + "learning_rate": 9.476222624884873e-06, + "loss": 0.14676380157470703, + "step": 4782 + }, + { + "epoch": 0.646624419095902, + "grad_norm": 1.179166316986084, + "learning_rate": 9.469859756487893e-06, + "loss": 0.17524433135986328, + "step": 4783 + }, + { + "epoch": 0.6467596113223489, + "grad_norm": 1.0857069492340088, + "learning_rate": 9.463498039495598e-06, + "loss": 0.13597440719604492, + "step": 4784 + }, + { + "epoch": 0.6468948035487959, + "grad_norm": 1.7896784543991089, + "learning_rate": 9.457137475232537e-06, + "loss": 0.1766681671142578, + "step": 4785 + }, + { + "epoch": 0.6470299957752429, + "grad_norm": 0.9283929467201233, + "learning_rate": 9.450778065023019e-06, + "loss": 0.19150161743164062, + "step": 4786 + }, + { + "epoch": 0.6471651880016899, + "grad_norm": 0.9286468625068665, + "learning_rate": 9.444419810191091e-06, + "loss": 0.1736125946044922, + "step": 4787 + }, + { + "epoch": 0.6473003802281369, + "grad_norm": 1.52741277217865, + "learning_rate": 9.43806271206059e-06, + "loss": 0.1750640869140625, + "step": 4788 + }, + { + "epoch": 0.6474355724545838, + "grad_norm": 0.7907344102859497, + "learning_rate": 9.431706771955089e-06, + "loss": 0.15364623069763184, + "step": 4789 + }, + { + "epoch": 0.6475707646810308, + "grad_norm": 0.9713426828384399, + "learning_rate": 9.425351991197937e-06, + "loss": 0.16255807876586914, + "step": 4790 + }, + { + "epoch": 0.6477059569074778, + "grad_norm": 0.7919119596481323, + "learning_rate": 9.418998371112221e-06, + "loss": 0.16147422790527344, + "step": 4791 + }, + { + "epoch": 0.6478411491339248, + "grad_norm": 1.0449354648590088, + "learning_rate": 9.412645913020807e-06, + "loss": 0.14014887809753418, + "step": 4792 + }, + { + "epoch": 0.6479763413603717, + "grad_norm": 0.8172423243522644, + "learning_rate": 9.406294618246313e-06, + "loss": 0.18064022064208984, + "step": 4793 + }, + { + "epoch": 0.6481115335868187, + "grad_norm": 1.11039137840271, + "learning_rate": 9.399944488111103e-06, + "loss": 0.13756990432739258, + "step": 4794 + }, + { + "epoch": 0.6482467258132657, + "grad_norm": 0.9745544791221619, + "learning_rate": 9.39359552393732e-06, + "loss": 0.16774463653564453, + "step": 4795 + }, + { + "epoch": 0.6483819180397127, + "grad_norm": 0.7310024499893188, + "learning_rate": 9.387247727046845e-06, + "loss": 0.13253021240234375, + "step": 4796 + }, + { + "epoch": 0.6485171102661597, + "grad_norm": 2.6191494464874268, + "learning_rate": 9.380901098761319e-06, + "loss": 0.2328033447265625, + "step": 4797 + }, + { + "epoch": 0.6486523024926066, + "grad_norm": 0.949286937713623, + "learning_rate": 9.374555640402153e-06, + "loss": 0.13180160522460938, + "step": 4798 + }, + { + "epoch": 0.6487874947190536, + "grad_norm": 1.2516103982925415, + "learning_rate": 9.368211353290503e-06, + "loss": 0.14205217361450195, + "step": 4799 + }, + { + "epoch": 0.6489226869455006, + "grad_norm": 1.8843960762023926, + "learning_rate": 9.36186823874728e-06, + "loss": 0.20497512817382812, + "step": 4800 + }, + { + "epoch": 0.6490578791719476, + "grad_norm": 1.146822214126587, + "learning_rate": 9.355526298093152e-06, + "loss": 0.17856502532958984, + "step": 4801 + }, + { + "epoch": 0.6491930713983946, + "grad_norm": 1.0923248529434204, + "learning_rate": 9.34918553264855e-06, + "loss": 0.18214702606201172, + "step": 4802 + }, + { + "epoch": 0.6493282636248415, + "grad_norm": 1.3734220266342163, + "learning_rate": 9.342845943733658e-06, + "loss": 0.2081599235534668, + "step": 4803 + }, + { + "epoch": 0.6494634558512885, + "grad_norm": 1.463240146636963, + "learning_rate": 9.336507532668407e-06, + "loss": 0.196807861328125, + "step": 4804 + }, + { + "epoch": 0.6495986480777355, + "grad_norm": 0.8848334550857544, + "learning_rate": 9.33017030077249e-06, + "loss": 0.17956097424030304, + "step": 4805 + }, + { + "epoch": 0.6497338403041825, + "grad_norm": 1.438395380973816, + "learning_rate": 9.323834249365346e-06, + "loss": 0.1773967742919922, + "step": 4806 + }, + { + "epoch": 0.6498690325306294, + "grad_norm": 1.0371705293655396, + "learning_rate": 9.317499379766183e-06, + "loss": 0.1246175765991211, + "step": 4807 + }, + { + "epoch": 0.6500042247570764, + "grad_norm": 1.0779000520706177, + "learning_rate": 9.311165693293954e-06, + "loss": 0.19726181030273438, + "step": 4808 + }, + { + "epoch": 0.6501394169835234, + "grad_norm": 0.8488254547119141, + "learning_rate": 9.304833191267364e-06, + "loss": 0.13262367248535156, + "step": 4809 + }, + { + "epoch": 0.6502746092099704, + "grad_norm": 0.9494053721427917, + "learning_rate": 9.298501875004874e-06, + "loss": 0.17995429039001465, + "step": 4810 + }, + { + "epoch": 0.6504098014364174, + "grad_norm": 1.0145442485809326, + "learning_rate": 9.292171745824695e-06, + "loss": 0.21031951904296875, + "step": 4811 + }, + { + "epoch": 0.6505449936628643, + "grad_norm": 1.9540719985961914, + "learning_rate": 9.285842805044797e-06, + "loss": 0.22034549713134766, + "step": 4812 + }, + { + "epoch": 0.6506801858893113, + "grad_norm": 0.7179235219955444, + "learning_rate": 9.279515053982905e-06, + "loss": 0.12684202194213867, + "step": 4813 + }, + { + "epoch": 0.6508153781157583, + "grad_norm": 1.0111486911773682, + "learning_rate": 9.273188493956476e-06, + "loss": 0.16587591171264648, + "step": 4814 + }, + { + "epoch": 0.6509505703422053, + "grad_norm": 0.9578974843025208, + "learning_rate": 9.266863126282746e-06, + "loss": 0.14514923095703125, + "step": 4815 + }, + { + "epoch": 0.6510857625686522, + "grad_norm": 1.45926833152771, + "learning_rate": 9.260538952278683e-06, + "loss": 0.1788005828857422, + "step": 4816 + }, + { + "epoch": 0.6512209547950992, + "grad_norm": 1.0004585981369019, + "learning_rate": 9.254215973261014e-06, + "loss": 0.1754007339477539, + "step": 4817 + }, + { + "epoch": 0.6513561470215462, + "grad_norm": 1.0461431741714478, + "learning_rate": 9.247894190546228e-06, + "loss": 0.1740589141845703, + "step": 4818 + }, + { + "epoch": 0.6514913392479933, + "grad_norm": 1.23405122756958, + "learning_rate": 9.241573605450539e-06, + "loss": 0.17757678031921387, + "step": 4819 + }, + { + "epoch": 0.6516265314744403, + "grad_norm": 0.6931892037391663, + "learning_rate": 9.235254219289937e-06, + "loss": 0.12467765808105469, + "step": 4820 + }, + { + "epoch": 0.6517617237008873, + "grad_norm": 1.4731919765472412, + "learning_rate": 9.228936033380143e-06, + "loss": 0.17371177673339844, + "step": 4821 + }, + { + "epoch": 0.6518969159273342, + "grad_norm": 1.4253627061843872, + "learning_rate": 9.222619049036649e-06, + "loss": 0.2474212646484375, + "step": 4822 + }, + { + "epoch": 0.6520321081537812, + "grad_norm": 1.13967764377594, + "learning_rate": 9.216303267574674e-06, + "loss": 0.2549152374267578, + "step": 4823 + }, + { + "epoch": 0.6521673003802282, + "grad_norm": 0.86136394739151, + "learning_rate": 9.209988690309198e-06, + "loss": 0.1515655517578125, + "step": 4824 + }, + { + "epoch": 0.6523024926066752, + "grad_norm": 1.041922688484192, + "learning_rate": 9.203675318554956e-06, + "loss": 0.14273762702941895, + "step": 4825 + }, + { + "epoch": 0.6524376848331221, + "grad_norm": 1.1738505363464355, + "learning_rate": 9.19736315362642e-06, + "loss": 0.16986465454101562, + "step": 4826 + }, + { + "epoch": 0.6525728770595691, + "grad_norm": 1.1166892051696777, + "learning_rate": 9.191052196837825e-06, + "loss": 0.1899890899658203, + "step": 4827 + }, + { + "epoch": 0.6527080692860161, + "grad_norm": 0.7434604167938232, + "learning_rate": 9.184742449503135e-06, + "loss": 0.11754012107849121, + "step": 4828 + }, + { + "epoch": 0.6528432615124631, + "grad_norm": 0.8548848628997803, + "learning_rate": 9.178433912936077e-06, + "loss": 0.18799066543579102, + "step": 4829 + }, + { + "epoch": 0.6529784537389101, + "grad_norm": 1.4570649862289429, + "learning_rate": 9.172126588450125e-06, + "loss": 0.2090167999267578, + "step": 4830 + }, + { + "epoch": 0.653113645965357, + "grad_norm": 1.1536593437194824, + "learning_rate": 9.165820477358491e-06, + "loss": 0.1553363800048828, + "step": 4831 + }, + { + "epoch": 0.653248838191804, + "grad_norm": 2.8031346797943115, + "learning_rate": 9.159515580974154e-06, + "loss": 0.24471917748451233, + "step": 4832 + }, + { + "epoch": 0.653384030418251, + "grad_norm": 0.8729983568191528, + "learning_rate": 9.15321190060981e-06, + "loss": 0.1508469581604004, + "step": 4833 + }, + { + "epoch": 0.653519222644698, + "grad_norm": 0.9529052972793579, + "learning_rate": 9.14690943757793e-06, + "loss": 0.1912670135498047, + "step": 4834 + }, + { + "epoch": 0.653654414871145, + "grad_norm": 1.3464492559432983, + "learning_rate": 9.14060819319072e-06, + "loss": 0.22151947021484375, + "step": 4835 + }, + { + "epoch": 0.6537896070975919, + "grad_norm": 0.8646287322044373, + "learning_rate": 9.134308168760127e-06, + "loss": 0.15400314331054688, + "step": 4836 + }, + { + "epoch": 0.6539247993240389, + "grad_norm": 0.8091931343078613, + "learning_rate": 9.128009365597854e-06, + "loss": 0.15532279014587402, + "step": 4837 + }, + { + "epoch": 0.6540599915504859, + "grad_norm": 2.07232928276062, + "learning_rate": 9.121711785015342e-06, + "loss": 0.18629729747772217, + "step": 4838 + }, + { + "epoch": 0.6541951837769329, + "grad_norm": 1.9756603240966797, + "learning_rate": 9.115415428323787e-06, + "loss": 0.23978805541992188, + "step": 4839 + }, + { + "epoch": 0.6543303760033798, + "grad_norm": 1.6293244361877441, + "learning_rate": 9.109120296834118e-06, + "loss": 0.1784200668334961, + "step": 4840 + }, + { + "epoch": 0.6544655682298268, + "grad_norm": 1.9594029188156128, + "learning_rate": 9.10282639185702e-06, + "loss": 0.17702198028564453, + "step": 4841 + }, + { + "epoch": 0.6546007604562738, + "grad_norm": 1.0720446109771729, + "learning_rate": 9.096533714702913e-06, + "loss": 0.17490577697753906, + "step": 4842 + }, + { + "epoch": 0.6547359526827208, + "grad_norm": 1.2960160970687866, + "learning_rate": 9.090242266681967e-06, + "loss": 0.15349197387695312, + "step": 4843 + }, + { + "epoch": 0.6548711449091678, + "grad_norm": 1.0827581882476807, + "learning_rate": 9.083952049104094e-06, + "loss": 0.1511077880859375, + "step": 4844 + }, + { + "epoch": 0.6550063371356147, + "grad_norm": 0.8267826437950134, + "learning_rate": 9.07766306327896e-06, + "loss": 0.19099807739257812, + "step": 4845 + }, + { + "epoch": 0.6551415293620617, + "grad_norm": 0.8066474199295044, + "learning_rate": 9.071375310515949e-06, + "loss": 0.1575620174407959, + "step": 4846 + }, + { + "epoch": 0.6552767215885087, + "grad_norm": 1.2896349430084229, + "learning_rate": 9.065088792124219e-06, + "loss": 0.1339585781097412, + "step": 4847 + }, + { + "epoch": 0.6554119138149557, + "grad_norm": 2.021620512008667, + "learning_rate": 9.058803509412647e-06, + "loss": 0.19025659561157227, + "step": 4848 + }, + { + "epoch": 0.6555471060414026, + "grad_norm": 1.1733931303024292, + "learning_rate": 9.05251946368987e-06, + "loss": 0.14709186553955078, + "step": 4849 + }, + { + "epoch": 0.6556822982678496, + "grad_norm": 1.1147786378860474, + "learning_rate": 9.046236656264258e-06, + "loss": 0.17572641372680664, + "step": 4850 + }, + { + "epoch": 0.6558174904942966, + "grad_norm": 1.1058241128921509, + "learning_rate": 9.03995508844392e-06, + "loss": 0.14606094360351562, + "step": 4851 + }, + { + "epoch": 0.6559526827207436, + "grad_norm": 0.9976809024810791, + "learning_rate": 9.033674761536718e-06, + "loss": 0.14615631103515625, + "step": 4852 + }, + { + "epoch": 0.6560878749471906, + "grad_norm": 1.7091542482376099, + "learning_rate": 9.027395676850244e-06, + "loss": 0.14660930633544922, + "step": 4853 + }, + { + "epoch": 0.6562230671736375, + "grad_norm": 2.5047571659088135, + "learning_rate": 9.02111783569184e-06, + "loss": 0.2225649356842041, + "step": 4854 + }, + { + "epoch": 0.6563582594000845, + "grad_norm": 0.7046009302139282, + "learning_rate": 9.014841239368591e-06, + "loss": 0.09901881217956543, + "step": 4855 + }, + { + "epoch": 0.6564934516265315, + "grad_norm": 1.403560757637024, + "learning_rate": 9.008565889187308e-06, + "loss": 0.17946624755859375, + "step": 4856 + }, + { + "epoch": 0.6566286438529785, + "grad_norm": 1.2385027408599854, + "learning_rate": 9.00229178645456e-06, + "loss": 0.24045753479003906, + "step": 4857 + }, + { + "epoch": 0.6567638360794255, + "grad_norm": 1.1896494626998901, + "learning_rate": 8.996018932476641e-06, + "loss": 0.19264793395996094, + "step": 4858 + }, + { + "epoch": 0.6568990283058724, + "grad_norm": 1.398219108581543, + "learning_rate": 8.989747328559606e-06, + "loss": 0.2144947052001953, + "step": 4859 + }, + { + "epoch": 0.6570342205323194, + "grad_norm": 1.5924408435821533, + "learning_rate": 8.98347697600922e-06, + "loss": 0.2008814811706543, + "step": 4860 + }, + { + "epoch": 0.6571694127587664, + "grad_norm": 0.7375935316085815, + "learning_rate": 8.977207876131013e-06, + "loss": 0.13837194442749023, + "step": 4861 + }, + { + "epoch": 0.6573046049852134, + "grad_norm": 2.3050174713134766, + "learning_rate": 8.970940030230245e-06, + "loss": 0.2113971710205078, + "step": 4862 + }, + { + "epoch": 0.6574397972116603, + "grad_norm": 1.702325463294983, + "learning_rate": 8.96467343961191e-06, + "loss": 0.17105674743652344, + "step": 4863 + }, + { + "epoch": 0.6575749894381073, + "grad_norm": 1.138919711112976, + "learning_rate": 8.958408105580759e-06, + "loss": 0.1725482940673828, + "step": 4864 + }, + { + "epoch": 0.6577101816645543, + "grad_norm": 1.8034353256225586, + "learning_rate": 8.952144029441248e-06, + "loss": 0.18815898895263672, + "step": 4865 + }, + { + "epoch": 0.6578453738910013, + "grad_norm": 0.9733583927154541, + "learning_rate": 8.945881212497603e-06, + "loss": 0.17118453979492188, + "step": 4866 + }, + { + "epoch": 0.6579805661174483, + "grad_norm": 0.9002045392990112, + "learning_rate": 8.939619656053777e-06, + "loss": 0.15566253662109375, + "step": 4867 + }, + { + "epoch": 0.6581157583438952, + "grad_norm": 1.1197967529296875, + "learning_rate": 8.933359361413456e-06, + "loss": 0.13778018951416016, + "step": 4868 + }, + { + "epoch": 0.6582509505703422, + "grad_norm": 0.8414996266365051, + "learning_rate": 8.92710032988007e-06, + "loss": 0.12005412578582764, + "step": 4869 + }, + { + "epoch": 0.6583861427967892, + "grad_norm": 1.6632550954818726, + "learning_rate": 8.920842562756773e-06, + "loss": 0.14882755279541016, + "step": 4870 + }, + { + "epoch": 0.6585213350232362, + "grad_norm": 1.0913041830062866, + "learning_rate": 8.914586061346474e-06, + "loss": 0.1559741497039795, + "step": 4871 + }, + { + "epoch": 0.6586565272496832, + "grad_norm": 1.557337999343872, + "learning_rate": 8.908330826951811e-06, + "loss": 0.19379496574401855, + "step": 4872 + }, + { + "epoch": 0.6587917194761301, + "grad_norm": 1.44503653049469, + "learning_rate": 8.902076860875155e-06, + "loss": 0.21662044525146484, + "step": 4873 + }, + { + "epoch": 0.6589269117025771, + "grad_norm": 0.9538995027542114, + "learning_rate": 8.895824164418615e-06, + "loss": 0.1930559277534485, + "step": 4874 + }, + { + "epoch": 0.6590621039290241, + "grad_norm": 1.781646966934204, + "learning_rate": 8.889572738884033e-06, + "loss": 0.1397991180419922, + "step": 4875 + }, + { + "epoch": 0.6591972961554711, + "grad_norm": 1.3334068059921265, + "learning_rate": 8.88332258557299e-06, + "loss": 0.09713077545166016, + "step": 4876 + }, + { + "epoch": 0.659332488381918, + "grad_norm": 0.8527290225028992, + "learning_rate": 8.877073705786806e-06, + "loss": 0.16060161590576172, + "step": 4877 + }, + { + "epoch": 0.659467680608365, + "grad_norm": 1.114309549331665, + "learning_rate": 8.870826100826527e-06, + "loss": 0.21113228797912598, + "step": 4878 + }, + { + "epoch": 0.659602872834812, + "grad_norm": 1.1392513513565063, + "learning_rate": 8.86457977199294e-06, + "loss": 0.16429519653320312, + "step": 4879 + }, + { + "epoch": 0.659738065061259, + "grad_norm": 0.9237369298934937, + "learning_rate": 8.85833472058656e-06, + "loss": 0.16629791259765625, + "step": 4880 + }, + { + "epoch": 0.659873257287706, + "grad_norm": 1.204338788986206, + "learning_rate": 8.852090947907643e-06, + "loss": 0.15123748779296875, + "step": 4881 + }, + { + "epoch": 0.6600084495141529, + "grad_norm": 1.6933914422988892, + "learning_rate": 8.84584845525618e-06, + "loss": 0.21339750289916992, + "step": 4882 + }, + { + "epoch": 0.6601436417405999, + "grad_norm": 1.514875054359436, + "learning_rate": 8.83960724393188e-06, + "loss": 0.143829345703125, + "step": 4883 + }, + { + "epoch": 0.6602788339670469, + "grad_norm": 1.519263505935669, + "learning_rate": 8.833367315234206e-06, + "loss": 0.23859786987304688, + "step": 4884 + }, + { + "epoch": 0.6604140261934939, + "grad_norm": 0.8517480492591858, + "learning_rate": 8.82712867046234e-06, + "loss": 0.14067935943603516, + "step": 4885 + }, + { + "epoch": 0.6605492184199409, + "grad_norm": 1.4438402652740479, + "learning_rate": 8.820891310915203e-06, + "loss": 0.14026641845703125, + "step": 4886 + }, + { + "epoch": 0.6606844106463878, + "grad_norm": 1.4862960577011108, + "learning_rate": 8.81465523789145e-06, + "loss": 0.17629718780517578, + "step": 4887 + }, + { + "epoch": 0.6608196028728348, + "grad_norm": 0.6990572214126587, + "learning_rate": 8.808420452689455e-06, + "loss": 0.10553503036499023, + "step": 4888 + }, + { + "epoch": 0.6609547950992818, + "grad_norm": 0.9661646485328674, + "learning_rate": 8.802186956607344e-06, + "loss": 0.15777969360351562, + "step": 4889 + }, + { + "epoch": 0.6610899873257288, + "grad_norm": 1.2454967498779297, + "learning_rate": 8.795954750942954e-06, + "loss": 0.23040008544921875, + "step": 4890 + }, + { + "epoch": 0.6612251795521757, + "grad_norm": 1.3803699016571045, + "learning_rate": 8.789723836993878e-06, + "loss": 0.1743154525756836, + "step": 4891 + }, + { + "epoch": 0.6613603717786227, + "grad_norm": 1.5137220621109009, + "learning_rate": 8.783494216057407e-06, + "loss": 0.20363807678222656, + "step": 4892 + }, + { + "epoch": 0.6614955640050697, + "grad_norm": 1.015931248664856, + "learning_rate": 8.777265889430593e-06, + "loss": 0.18856453895568848, + "step": 4893 + }, + { + "epoch": 0.6616307562315167, + "grad_norm": 0.7822278738021851, + "learning_rate": 8.771038858410206e-06, + "loss": 0.1442861557006836, + "step": 4894 + }, + { + "epoch": 0.6617659484579637, + "grad_norm": 1.3010385036468506, + "learning_rate": 8.764813124292744e-06, + "loss": 0.20556259155273438, + "step": 4895 + }, + { + "epoch": 0.6619011406844106, + "grad_norm": 0.921440064907074, + "learning_rate": 8.758588688374445e-06, + "loss": 0.14670181274414062, + "step": 4896 + }, + { + "epoch": 0.6620363329108576, + "grad_norm": 1.2484965324401855, + "learning_rate": 8.752365551951262e-06, + "loss": 0.20201778411865234, + "step": 4897 + }, + { + "epoch": 0.6621715251373046, + "grad_norm": 1.240796685218811, + "learning_rate": 8.74614371631888e-06, + "loss": 0.15339231491088867, + "step": 4898 + }, + { + "epoch": 0.6623067173637516, + "grad_norm": 1.7703746557235718, + "learning_rate": 8.739923182772732e-06, + "loss": 0.20116877555847168, + "step": 4899 + }, + { + "epoch": 0.6624419095901986, + "grad_norm": 1.1591278314590454, + "learning_rate": 8.733703952607956e-06, + "loss": 0.21904993057250977, + "step": 4900 + }, + { + "epoch": 0.6625771018166455, + "grad_norm": 1.3635591268539429, + "learning_rate": 8.727486027119443e-06, + "loss": 0.19544029235839844, + "step": 4901 + }, + { + "epoch": 0.6627122940430925, + "grad_norm": 1.079126000404358, + "learning_rate": 8.721269407601783e-06, + "loss": 0.15197277069091797, + "step": 4902 + }, + { + "epoch": 0.6628474862695395, + "grad_norm": 1.6907446384429932, + "learning_rate": 8.71505409534931e-06, + "loss": 0.1521596908569336, + "step": 4903 + }, + { + "epoch": 0.6629826784959865, + "grad_norm": 1.5907058715820312, + "learning_rate": 8.708840091656093e-06, + "loss": 0.2548694610595703, + "step": 4904 + }, + { + "epoch": 0.6631178707224334, + "grad_norm": 1.1447182893753052, + "learning_rate": 8.70262739781592e-06, + "loss": 0.17750167846679688, + "step": 4905 + }, + { + "epoch": 0.6632530629488804, + "grad_norm": 0.8437352776527405, + "learning_rate": 8.696416015122302e-06, + "loss": 0.14332294464111328, + "step": 4906 + }, + { + "epoch": 0.6633882551753274, + "grad_norm": 0.9016723036766052, + "learning_rate": 8.690205944868487e-06, + "loss": 0.18363571166992188, + "step": 4907 + }, + { + "epoch": 0.6635234474017744, + "grad_norm": 0.9682764410972595, + "learning_rate": 8.683997188347436e-06, + "loss": 0.151594877243042, + "step": 4908 + }, + { + "epoch": 0.6636586396282214, + "grad_norm": 1.3980848789215088, + "learning_rate": 8.677789746851855e-06, + "loss": 0.12291526794433594, + "step": 4909 + }, + { + "epoch": 0.6637938318546683, + "grad_norm": 0.981627881526947, + "learning_rate": 8.671583621674167e-06, + "loss": 0.18784523010253906, + "step": 4910 + }, + { + "epoch": 0.6639290240811153, + "grad_norm": 0.7934214472770691, + "learning_rate": 8.665378814106512e-06, + "loss": 0.1471269130706787, + "step": 4911 + }, + { + "epoch": 0.6640642163075623, + "grad_norm": 0.8068694472312927, + "learning_rate": 8.65917532544077e-06, + "loss": 0.16522598266601562, + "step": 4912 + }, + { + "epoch": 0.6641994085340093, + "grad_norm": 2.6476728916168213, + "learning_rate": 8.652973156968532e-06, + "loss": 0.18506383895874023, + "step": 4913 + }, + { + "epoch": 0.6643346007604563, + "grad_norm": 1.2200329303741455, + "learning_rate": 8.646772309981141e-06, + "loss": 0.1431293487548828, + "step": 4914 + }, + { + "epoch": 0.6644697929869032, + "grad_norm": 0.896338939666748, + "learning_rate": 8.640572785769624e-06, + "loss": 0.1825408935546875, + "step": 4915 + }, + { + "epoch": 0.6646049852133502, + "grad_norm": 1.2054084539413452, + "learning_rate": 8.63437458562477e-06, + "loss": 0.19852542877197266, + "step": 4916 + }, + { + "epoch": 0.6647401774397972, + "grad_norm": 0.9763993620872498, + "learning_rate": 8.628177710837068e-06, + "loss": 0.15083742141723633, + "step": 4917 + }, + { + "epoch": 0.6648753696662442, + "grad_norm": 1.5249390602111816, + "learning_rate": 8.621982162696752e-06, + "loss": 0.19697189331054688, + "step": 4918 + }, + { + "epoch": 0.6650105618926911, + "grad_norm": 1.2886157035827637, + "learning_rate": 8.615787942493766e-06, + "loss": 0.1869983673095703, + "step": 4919 + }, + { + "epoch": 0.6651457541191381, + "grad_norm": 0.8438150882720947, + "learning_rate": 8.609595051517765e-06, + "loss": 0.16390323638916016, + "step": 4920 + }, + { + "epoch": 0.6652809463455851, + "grad_norm": 1.4547886848449707, + "learning_rate": 8.603403491058157e-06, + "loss": 0.2007460594177246, + "step": 4921 + }, + { + "epoch": 0.6654161385720321, + "grad_norm": 1.0295220613479614, + "learning_rate": 8.597213262404046e-06, + "loss": 0.1322331428527832, + "step": 4922 + }, + { + "epoch": 0.6655513307984791, + "grad_norm": 2.981210470199585, + "learning_rate": 8.591024366844291e-06, + "loss": 0.2532529830932617, + "step": 4923 + }, + { + "epoch": 0.665686523024926, + "grad_norm": 2.019127607345581, + "learning_rate": 8.584836805667434e-06, + "loss": 0.18584585189819336, + "step": 4924 + }, + { + "epoch": 0.665821715251373, + "grad_norm": 0.7941632270812988, + "learning_rate": 8.578650580161754e-06, + "loss": 0.18793773651123047, + "step": 4925 + }, + { + "epoch": 0.66595690747782, + "grad_norm": 1.9173439741134644, + "learning_rate": 8.572465691615275e-06, + "loss": 0.2226409912109375, + "step": 4926 + }, + { + "epoch": 0.666092099704267, + "grad_norm": 0.9492316246032715, + "learning_rate": 8.56628214131571e-06, + "loss": 0.10794901847839355, + "step": 4927 + }, + { + "epoch": 0.666227291930714, + "grad_norm": 1.4376837015151978, + "learning_rate": 8.560099930550523e-06, + "loss": 0.17420196533203125, + "step": 4928 + }, + { + "epoch": 0.6663624841571609, + "grad_norm": 2.9954895973205566, + "learning_rate": 8.553919060606866e-06, + "loss": 0.19881820678710938, + "step": 4929 + }, + { + "epoch": 0.6664976763836079, + "grad_norm": 1.1781864166259766, + "learning_rate": 8.54773953277163e-06, + "loss": 0.20312881469726562, + "step": 4930 + }, + { + "epoch": 0.6666328686100549, + "grad_norm": 0.8118711709976196, + "learning_rate": 8.541561348331433e-06, + "loss": 0.11457443237304688, + "step": 4931 + }, + { + "epoch": 0.6667680608365019, + "grad_norm": 0.7794628143310547, + "learning_rate": 8.535384508572603e-06, + "loss": 0.15274116396903992, + "step": 4932 + }, + { + "epoch": 0.6669032530629488, + "grad_norm": 1.098728895187378, + "learning_rate": 8.529209014781202e-06, + "loss": 0.1983509063720703, + "step": 4933 + }, + { + "epoch": 0.6670384452893958, + "grad_norm": 2.0450127124786377, + "learning_rate": 8.523034868242984e-06, + "loss": 0.25242042541503906, + "step": 4934 + }, + { + "epoch": 0.6671736375158428, + "grad_norm": 1.6885724067687988, + "learning_rate": 8.51686207024344e-06, + "loss": 0.13765239715576172, + "step": 4935 + }, + { + "epoch": 0.6673088297422898, + "grad_norm": 1.2371320724487305, + "learning_rate": 8.510690622067792e-06, + "loss": 0.13949227333068848, + "step": 4936 + }, + { + "epoch": 0.6674440219687368, + "grad_norm": 1.3392064571380615, + "learning_rate": 8.50452052500096e-06, + "loss": 0.18907546997070312, + "step": 4937 + }, + { + "epoch": 0.6675792141951837, + "grad_norm": 1.7679469585418701, + "learning_rate": 8.498351780327594e-06, + "loss": 0.16900354623794556, + "step": 4938 + }, + { + "epoch": 0.6677144064216307, + "grad_norm": 0.8894747495651245, + "learning_rate": 8.492184389332061e-06, + "loss": 0.16308212280273438, + "step": 4939 + }, + { + "epoch": 0.6678495986480777, + "grad_norm": 1.581634521484375, + "learning_rate": 8.486018353298432e-06, + "loss": 0.18794822692871094, + "step": 4940 + }, + { + "epoch": 0.6679847908745247, + "grad_norm": 1.3632307052612305, + "learning_rate": 8.479853673510528e-06, + "loss": 0.1606616973876953, + "step": 4941 + }, + { + "epoch": 0.6681199831009716, + "grad_norm": 1.3677546977996826, + "learning_rate": 8.473690351251855e-06, + "loss": 0.22364234924316406, + "step": 4942 + }, + { + "epoch": 0.6682551753274186, + "grad_norm": 1.4845232963562012, + "learning_rate": 8.467528387805656e-06, + "loss": 0.14280271530151367, + "step": 4943 + }, + { + "epoch": 0.6683903675538656, + "grad_norm": 1.0892442464828491, + "learning_rate": 8.461367784454881e-06, + "loss": 0.1562657356262207, + "step": 4944 + }, + { + "epoch": 0.6685255597803126, + "grad_norm": 0.8801248669624329, + "learning_rate": 8.455208542482195e-06, + "loss": 0.18584251403808594, + "step": 4945 + }, + { + "epoch": 0.6686607520067596, + "grad_norm": 0.8785949945449829, + "learning_rate": 8.449050663170004e-06, + "loss": 0.1080223023891449, + "step": 4946 + }, + { + "epoch": 0.6687959442332065, + "grad_norm": 1.0281901359558105, + "learning_rate": 8.442894147800387e-06, + "loss": 0.13111495971679688, + "step": 4947 + }, + { + "epoch": 0.6689311364596535, + "grad_norm": 1.1611011028289795, + "learning_rate": 8.436738997655184e-06, + "loss": 0.17256391048431396, + "step": 4948 + }, + { + "epoch": 0.6690663286861005, + "grad_norm": 1.9226800203323364, + "learning_rate": 8.430585214015918e-06, + "loss": 0.1781473159790039, + "step": 4949 + }, + { + "epoch": 0.6692015209125475, + "grad_norm": 1.2277345657348633, + "learning_rate": 8.424432798163838e-06, + "loss": 0.1698395013809204, + "step": 4950 + }, + { + "epoch": 0.6693367131389945, + "grad_norm": 1.5692715644836426, + "learning_rate": 8.418281751379926e-06, + "loss": 0.15980815887451172, + "step": 4951 + }, + { + "epoch": 0.6694719053654414, + "grad_norm": 1.5422580242156982, + "learning_rate": 8.41213207494484e-06, + "loss": 0.17629623413085938, + "step": 4952 + }, + { + "epoch": 0.6696070975918884, + "grad_norm": 1.3754093647003174, + "learning_rate": 8.405983770138992e-06, + "loss": 0.11031877994537354, + "step": 4953 + }, + { + "epoch": 0.6697422898183355, + "grad_norm": 1.1165724992752075, + "learning_rate": 8.399836838242479e-06, + "loss": 0.16813087463378906, + "step": 4954 + }, + { + "epoch": 0.6698774820447825, + "grad_norm": 1.586594581604004, + "learning_rate": 8.393691280535143e-06, + "loss": 0.19069862365722656, + "step": 4955 + }, + { + "epoch": 0.6700126742712295, + "grad_norm": 1.0211690664291382, + "learning_rate": 8.387547098296516e-06, + "loss": 0.14371871948242188, + "step": 4956 + }, + { + "epoch": 0.6701478664976764, + "grad_norm": 0.7847884297370911, + "learning_rate": 8.38140429280583e-06, + "loss": 0.1388864517211914, + "step": 4957 + }, + { + "epoch": 0.6702830587241234, + "grad_norm": 2.0592474937438965, + "learning_rate": 8.375262865342073e-06, + "loss": 0.21479415893554688, + "step": 4958 + }, + { + "epoch": 0.6704182509505704, + "grad_norm": 0.9477532505989075, + "learning_rate": 8.36912281718391e-06, + "loss": 0.18639659881591797, + "step": 4959 + }, + { + "epoch": 0.6705534431770174, + "grad_norm": 0.7580112218856812, + "learning_rate": 8.362984149609748e-06, + "loss": 0.1933574676513672, + "step": 4960 + }, + { + "epoch": 0.6706886354034644, + "grad_norm": 2.17968487739563, + "learning_rate": 8.356846863897672e-06, + "loss": 0.18117523193359375, + "step": 4961 + }, + { + "epoch": 0.6708238276299113, + "grad_norm": 1.4100232124328613, + "learning_rate": 8.350710961325498e-06, + "loss": 0.22339344024658203, + "step": 4962 + }, + { + "epoch": 0.6709590198563583, + "grad_norm": 0.8292788863182068, + "learning_rate": 8.344576443170768e-06, + "loss": 0.1196737289428711, + "step": 4963 + }, + { + "epoch": 0.6710942120828053, + "grad_norm": 1.2195991277694702, + "learning_rate": 8.338443310710708e-06, + "loss": 0.1936655044555664, + "step": 4964 + }, + { + "epoch": 0.6712294043092523, + "grad_norm": 2.9747374057769775, + "learning_rate": 8.332311565222284e-06, + "loss": 0.24501800537109375, + "step": 4965 + }, + { + "epoch": 0.6713645965356992, + "grad_norm": 2.13307523727417, + "learning_rate": 8.326181207982145e-06, + "loss": 0.22800636291503906, + "step": 4966 + }, + { + "epoch": 0.6714997887621462, + "grad_norm": 1.2976917028427124, + "learning_rate": 8.32005224026666e-06, + "loss": 0.14375638961791992, + "step": 4967 + }, + { + "epoch": 0.6716349809885932, + "grad_norm": 1.534257411956787, + "learning_rate": 8.313924663351927e-06, + "loss": 0.2206401824951172, + "step": 4968 + }, + { + "epoch": 0.6717701732150402, + "grad_norm": 0.9613286852836609, + "learning_rate": 8.307798478513733e-06, + "loss": 0.16698837280273438, + "step": 4969 + }, + { + "epoch": 0.6719053654414872, + "grad_norm": 0.8958679437637329, + "learning_rate": 8.301673687027583e-06, + "loss": 0.20035743713378906, + "step": 4970 + }, + { + "epoch": 0.6720405576679341, + "grad_norm": 0.5598917603492737, + "learning_rate": 8.295550290168692e-06, + "loss": 0.08476781845092773, + "step": 4971 + }, + { + "epoch": 0.6721757498943811, + "grad_norm": 1.2285544872283936, + "learning_rate": 8.289428289211977e-06, + "loss": 0.15395641326904297, + "step": 4972 + }, + { + "epoch": 0.6723109421208281, + "grad_norm": 2.167975425720215, + "learning_rate": 8.283307685432083e-06, + "loss": 0.1150202751159668, + "step": 4973 + }, + { + "epoch": 0.6724461343472751, + "grad_norm": 0.7897242903709412, + "learning_rate": 8.277188480103348e-06, + "loss": 0.1756892204284668, + "step": 4974 + }, + { + "epoch": 0.672581326573722, + "grad_norm": 0.9730179309844971, + "learning_rate": 8.271070674499821e-06, + "loss": 0.1560632884502411, + "step": 4975 + }, + { + "epoch": 0.672716518800169, + "grad_norm": 1.4615154266357422, + "learning_rate": 8.264954269895262e-06, + "loss": 0.17297935485839844, + "step": 4976 + }, + { + "epoch": 0.672851711026616, + "grad_norm": 2.755807876586914, + "learning_rate": 8.258839267563134e-06, + "loss": 0.23811721801757812, + "step": 4977 + }, + { + "epoch": 0.672986903253063, + "grad_norm": 1.1392247676849365, + "learning_rate": 8.252725668776623e-06, + "loss": 0.16143178939819336, + "step": 4978 + }, + { + "epoch": 0.67312209547951, + "grad_norm": 1.0451160669326782, + "learning_rate": 8.24661347480861e-06, + "loss": 0.19554519653320312, + "step": 4979 + }, + { + "epoch": 0.6732572877059569, + "grad_norm": 1.4334434270858765, + "learning_rate": 8.240502686931686e-06, + "loss": 0.2019824981689453, + "step": 4980 + }, + { + "epoch": 0.6733924799324039, + "grad_norm": 1.269405484199524, + "learning_rate": 8.234393306418148e-06, + "loss": 0.15655136108398438, + "step": 4981 + }, + { + "epoch": 0.6735276721588509, + "grad_norm": 1.2012618780136108, + "learning_rate": 8.228285334539995e-06, + "loss": 0.14350223541259766, + "step": 4982 + }, + { + "epoch": 0.6736628643852979, + "grad_norm": 0.9708613753318787, + "learning_rate": 8.22217877256896e-06, + "loss": 0.1885991096496582, + "step": 4983 + }, + { + "epoch": 0.6737980566117449, + "grad_norm": 1.4719375371932983, + "learning_rate": 8.216073621776436e-06, + "loss": 0.16818499565124512, + "step": 4984 + }, + { + "epoch": 0.6739332488381918, + "grad_norm": 0.8668748736381531, + "learning_rate": 8.209969883433566e-06, + "loss": 0.1496891975402832, + "step": 4985 + }, + { + "epoch": 0.6740684410646388, + "grad_norm": 0.7099021673202515, + "learning_rate": 8.203867558811177e-06, + "loss": 0.15021228790283203, + "step": 4986 + }, + { + "epoch": 0.6742036332910858, + "grad_norm": 2.1604654788970947, + "learning_rate": 8.197766649179795e-06, + "loss": 0.1982102394104004, + "step": 4987 + }, + { + "epoch": 0.6743388255175328, + "grad_norm": 1.5797946453094482, + "learning_rate": 8.191667155809684e-06, + "loss": 0.22808456420898438, + "step": 4988 + }, + { + "epoch": 0.6744740177439797, + "grad_norm": 1.4254366159439087, + "learning_rate": 8.185569079970764e-06, + "loss": 0.13309884071350098, + "step": 4989 + }, + { + "epoch": 0.6746092099704267, + "grad_norm": 1.9066100120544434, + "learning_rate": 8.179472422932709e-06, + "loss": 0.18539905548095703, + "step": 4990 + }, + { + "epoch": 0.6747444021968737, + "grad_norm": 0.628756046295166, + "learning_rate": 8.17337718596486e-06, + "loss": 0.11668860912322998, + "step": 4991 + }, + { + "epoch": 0.6748795944233207, + "grad_norm": 0.9980939030647278, + "learning_rate": 8.167283370336295e-06, + "loss": 0.1680002212524414, + "step": 4992 + }, + { + "epoch": 0.6750147866497677, + "grad_norm": 1.0438536405563354, + "learning_rate": 8.161190977315766e-06, + "loss": 0.18843841552734375, + "step": 4993 + }, + { + "epoch": 0.6751499788762146, + "grad_norm": 0.9829400181770325, + "learning_rate": 8.155100008171736e-06, + "loss": 0.1763577163219452, + "step": 4994 + }, + { + "epoch": 0.6752851711026616, + "grad_norm": 0.8536770939826965, + "learning_rate": 8.149010464172392e-06, + "loss": 0.14682388305664062, + "step": 4995 + }, + { + "epoch": 0.6754203633291086, + "grad_norm": 1.2554261684417725, + "learning_rate": 8.142922346585597e-06, + "loss": 0.2022233009338379, + "step": 4996 + }, + { + "epoch": 0.6755555555555556, + "grad_norm": 0.8678463697433472, + "learning_rate": 8.13683565667895e-06, + "loss": 0.18359661102294922, + "step": 4997 + }, + { + "epoch": 0.6756907477820026, + "grad_norm": 1.1854984760284424, + "learning_rate": 8.13075039571971e-06, + "loss": 0.1778567135334015, + "step": 4998 + }, + { + "epoch": 0.6758259400084495, + "grad_norm": 1.0105594396591187, + "learning_rate": 8.124666564974864e-06, + "loss": 0.12804698944091797, + "step": 4999 + }, + { + "epoch": 0.6759611322348965, + "grad_norm": 2.407771587371826, + "learning_rate": 8.11858416571111e-06, + "loss": 0.20404052734375, + "step": 5000 + }, + { + "epoch": 0.6760963244613435, + "grad_norm": 0.7561890482902527, + "learning_rate": 8.112503199194821e-06, + "loss": 0.15114164352416992, + "step": 5001 + }, + { + "epoch": 0.6762315166877905, + "grad_norm": 1.409066081047058, + "learning_rate": 8.106423666692108e-06, + "loss": 0.1584477424621582, + "step": 5002 + }, + { + "epoch": 0.6763667089142374, + "grad_norm": 1.0179040431976318, + "learning_rate": 8.100345569468742e-06, + "loss": 0.1906733512878418, + "step": 5003 + }, + { + "epoch": 0.6765019011406844, + "grad_norm": 0.9477241635322571, + "learning_rate": 8.094268908790215e-06, + "loss": 0.19038772583007812, + "step": 5004 + }, + { + "epoch": 0.6766370933671314, + "grad_norm": 1.7646867036819458, + "learning_rate": 8.088193685921733e-06, + "loss": 0.2060375213623047, + "step": 5005 + }, + { + "epoch": 0.6767722855935784, + "grad_norm": 0.7761819362640381, + "learning_rate": 8.082119902128185e-06, + "loss": 0.12443733215332031, + "step": 5006 + }, + { + "epoch": 0.6769074778200254, + "grad_norm": 0.9273766875267029, + "learning_rate": 8.076047558674164e-06, + "loss": 0.1891918182373047, + "step": 5007 + }, + { + "epoch": 0.6770426700464723, + "grad_norm": 1.034024953842163, + "learning_rate": 8.069976656823964e-06, + "loss": 0.1491626501083374, + "step": 5008 + }, + { + "epoch": 0.6771778622729193, + "grad_norm": 1.2532517910003662, + "learning_rate": 8.063907197841574e-06, + "loss": 0.1720867156982422, + "step": 5009 + }, + { + "epoch": 0.6773130544993663, + "grad_norm": 0.8703910708427429, + "learning_rate": 8.057839182990698e-06, + "loss": 0.1313270926475525, + "step": 5010 + }, + { + "epoch": 0.6774482467258133, + "grad_norm": 2.6156110763549805, + "learning_rate": 8.051772613534725e-06, + "loss": 0.18322277069091797, + "step": 5011 + }, + { + "epoch": 0.6775834389522603, + "grad_norm": 0.9662993550300598, + "learning_rate": 8.045707490736745e-06, + "loss": 0.1251966953277588, + "step": 5012 + }, + { + "epoch": 0.6777186311787072, + "grad_norm": 2.5010697841644287, + "learning_rate": 8.039643815859552e-06, + "loss": 0.253537654876709, + "step": 5013 + }, + { + "epoch": 0.6778538234051542, + "grad_norm": 1.1776468753814697, + "learning_rate": 8.033581590165627e-06, + "loss": 0.19998162984848022, + "step": 5014 + }, + { + "epoch": 0.6779890156316012, + "grad_norm": 1.942829966545105, + "learning_rate": 8.027520814917175e-06, + "loss": 0.24843978881835938, + "step": 5015 + }, + { + "epoch": 0.6781242078580482, + "grad_norm": 1.1005936861038208, + "learning_rate": 8.021461491376064e-06, + "loss": 0.21546363830566406, + "step": 5016 + }, + { + "epoch": 0.6782594000844951, + "grad_norm": 0.8666654229164124, + "learning_rate": 8.015403620803885e-06, + "loss": 0.14625072479248047, + "step": 5017 + }, + { + "epoch": 0.6783945923109421, + "grad_norm": 0.8627327680587769, + "learning_rate": 8.009347204461922e-06, + "loss": 0.20458602905273438, + "step": 5018 + }, + { + "epoch": 0.6785297845373891, + "grad_norm": 1.170483112335205, + "learning_rate": 8.003292243611143e-06, + "loss": 0.17746615409851074, + "step": 5019 + }, + { + "epoch": 0.6786649767638361, + "grad_norm": 1.3867892026901245, + "learning_rate": 7.99723873951224e-06, + "loss": 0.18867969512939453, + "step": 5020 + }, + { + "epoch": 0.6788001689902831, + "grad_norm": 1.124878168106079, + "learning_rate": 7.991186693425563e-06, + "loss": 0.13965702056884766, + "step": 5021 + }, + { + "epoch": 0.67893536121673, + "grad_norm": 0.8524463772773743, + "learning_rate": 7.9851361066112e-06, + "loss": 0.17600560188293457, + "step": 5022 + }, + { + "epoch": 0.679070553443177, + "grad_norm": 0.7314344048500061, + "learning_rate": 7.979086980328907e-06, + "loss": 0.10720205307006836, + "step": 5023 + }, + { + "epoch": 0.679205745669624, + "grad_norm": 0.6285949349403381, + "learning_rate": 7.973039315838137e-06, + "loss": 0.08309197425842285, + "step": 5024 + }, + { + "epoch": 0.679340937896071, + "grad_norm": 1.6278624534606934, + "learning_rate": 7.966993114398067e-06, + "loss": 0.19066143035888672, + "step": 5025 + }, + { + "epoch": 0.679476130122518, + "grad_norm": 1.0072762966156006, + "learning_rate": 7.960948377267524e-06, + "loss": 0.18010520935058594, + "step": 5026 + }, + { + "epoch": 0.6796113223489649, + "grad_norm": 1.5092215538024902, + "learning_rate": 7.954905105705071e-06, + "loss": 0.16632843017578125, + "step": 5027 + }, + { + "epoch": 0.6797465145754119, + "grad_norm": 1.8192689418792725, + "learning_rate": 7.948863300968938e-06, + "loss": 0.2074413299560547, + "step": 5028 + }, + { + "epoch": 0.6798817068018589, + "grad_norm": 1.9579755067825317, + "learning_rate": 7.942822964317078e-06, + "loss": 0.1721668243408203, + "step": 5029 + }, + { + "epoch": 0.6800168990283059, + "grad_norm": 1.0593005418777466, + "learning_rate": 7.936784097007105e-06, + "loss": 0.1670999526977539, + "step": 5030 + }, + { + "epoch": 0.6801520912547528, + "grad_norm": 1.8515827655792236, + "learning_rate": 7.930746700296344e-06, + "loss": 0.20845794677734375, + "step": 5031 + }, + { + "epoch": 0.6802872834811998, + "grad_norm": 0.953555703163147, + "learning_rate": 7.924710775441822e-06, + "loss": 0.1791372299194336, + "step": 5032 + }, + { + "epoch": 0.6804224757076468, + "grad_norm": 0.9557312726974487, + "learning_rate": 7.918676323700241e-06, + "loss": 0.15536212921142578, + "step": 5033 + }, + { + "epoch": 0.6805576679340938, + "grad_norm": 1.7691465616226196, + "learning_rate": 7.912643346328023e-06, + "loss": 0.15507960319519043, + "step": 5034 + }, + { + "epoch": 0.6806928601605408, + "grad_norm": 1.0645300149917603, + "learning_rate": 7.906611844581251e-06, + "loss": 0.10835909843444824, + "step": 5035 + }, + { + "epoch": 0.6808280523869877, + "grad_norm": 1.416193962097168, + "learning_rate": 7.900581819715713e-06, + "loss": 0.181915283203125, + "step": 5036 + }, + { + "epoch": 0.6809632446134347, + "grad_norm": 1.3164618015289307, + "learning_rate": 7.894553272986901e-06, + "loss": 0.19794172048568726, + "step": 5037 + }, + { + "epoch": 0.6810984368398817, + "grad_norm": 0.9206435680389404, + "learning_rate": 7.888526205649993e-06, + "loss": 0.17533397674560547, + "step": 5038 + }, + { + "epoch": 0.6812336290663287, + "grad_norm": 1.720955491065979, + "learning_rate": 7.882500618959849e-06, + "loss": 0.20714569091796875, + "step": 5039 + }, + { + "epoch": 0.6813688212927757, + "grad_norm": 0.8436741828918457, + "learning_rate": 7.876476514171033e-06, + "loss": 0.21134567260742188, + "step": 5040 + }, + { + "epoch": 0.6815040135192226, + "grad_norm": 1.3161191940307617, + "learning_rate": 7.870453892537788e-06, + "loss": 0.15209579467773438, + "step": 5041 + }, + { + "epoch": 0.6816392057456696, + "grad_norm": 1.0485806465148926, + "learning_rate": 7.864432755314068e-06, + "loss": 0.13231611251831055, + "step": 5042 + }, + { + "epoch": 0.6817743979721166, + "grad_norm": 1.1092981100082397, + "learning_rate": 7.858413103753499e-06, + "loss": 0.17936325073242188, + "step": 5043 + }, + { + "epoch": 0.6819095901985636, + "grad_norm": 1.2274843454360962, + "learning_rate": 7.852394939109408e-06, + "loss": 0.176383376121521, + "step": 5044 + }, + { + "epoch": 0.6820447824250105, + "grad_norm": 1.3140506744384766, + "learning_rate": 7.846378262634803e-06, + "loss": 0.16873395442962646, + "step": 5045 + }, + { + "epoch": 0.6821799746514575, + "grad_norm": 0.6792768836021423, + "learning_rate": 7.840363075582385e-06, + "loss": 0.15387141704559326, + "step": 5046 + }, + { + "epoch": 0.6823151668779045, + "grad_norm": 0.7771419286727905, + "learning_rate": 7.834349379204565e-06, + "loss": 0.1563504934310913, + "step": 5047 + }, + { + "epoch": 0.6824503591043515, + "grad_norm": 0.9997833967208862, + "learning_rate": 7.828337174753411e-06, + "loss": 0.14500713348388672, + "step": 5048 + }, + { + "epoch": 0.6825855513307985, + "grad_norm": 2.215217351913452, + "learning_rate": 7.822326463480703e-06, + "loss": 0.23760569095611572, + "step": 5049 + }, + { + "epoch": 0.6827207435572454, + "grad_norm": 1.3574687242507935, + "learning_rate": 7.816317246637901e-06, + "loss": 0.19251084327697754, + "step": 5050 + }, + { + "epoch": 0.6828559357836924, + "grad_norm": 2.2461235523223877, + "learning_rate": 7.810309525476152e-06, + "loss": 0.23035430908203125, + "step": 5051 + }, + { + "epoch": 0.6829911280101394, + "grad_norm": 0.5272597670555115, + "learning_rate": 7.804303301246311e-06, + "loss": 0.1177072525024414, + "step": 5052 + }, + { + "epoch": 0.6831263202365864, + "grad_norm": 1.1031173467636108, + "learning_rate": 7.798298575198884e-06, + "loss": 0.18041610717773438, + "step": 5053 + }, + { + "epoch": 0.6832615124630333, + "grad_norm": 0.86440509557724, + "learning_rate": 7.792295348584103e-06, + "loss": 0.19728469848632812, + "step": 5054 + }, + { + "epoch": 0.6833967046894803, + "grad_norm": 1.4030793905258179, + "learning_rate": 7.786293622651866e-06, + "loss": 0.21994972229003906, + "step": 5055 + }, + { + "epoch": 0.6835318969159273, + "grad_norm": 1.1506909132003784, + "learning_rate": 7.78029339865176e-06, + "loss": 0.1763622760772705, + "step": 5056 + }, + { + "epoch": 0.6836670891423743, + "grad_norm": 1.2515126466751099, + "learning_rate": 7.774294677833078e-06, + "loss": 0.1659860610961914, + "step": 5057 + }, + { + "epoch": 0.6838022813688213, + "grad_norm": 1.3858851194381714, + "learning_rate": 7.768297461444766e-06, + "loss": 0.19391918182373047, + "step": 5058 + }, + { + "epoch": 0.6839374735952682, + "grad_norm": 2.3579061031341553, + "learning_rate": 7.762301750735494e-06, + "loss": 0.1669478416442871, + "step": 5059 + }, + { + "epoch": 0.6840726658217152, + "grad_norm": 1.0830591917037964, + "learning_rate": 7.756307546953592e-06, + "loss": 0.1611347198486328, + "step": 5060 + }, + { + "epoch": 0.6842078580481622, + "grad_norm": 1.2107855081558228, + "learning_rate": 7.750314851347087e-06, + "loss": 0.21772003173828125, + "step": 5061 + }, + { + "epoch": 0.6843430502746092, + "grad_norm": 1.972565770149231, + "learning_rate": 7.74432366516369e-06, + "loss": 0.1912527084350586, + "step": 5062 + }, + { + "epoch": 0.6844782425010562, + "grad_norm": 1.1781563758850098, + "learning_rate": 7.738333989650794e-06, + "loss": 0.13153600692749023, + "step": 5063 + }, + { + "epoch": 0.6846134347275031, + "grad_norm": 0.6033469438552856, + "learning_rate": 7.732345826055487e-06, + "loss": 0.1273174285888672, + "step": 5064 + }, + { + "epoch": 0.6847486269539501, + "grad_norm": 0.9970125555992126, + "learning_rate": 7.726359175624537e-06, + "loss": 0.1446322202682495, + "step": 5065 + }, + { + "epoch": 0.6848838191803971, + "grad_norm": 1.2162835597991943, + "learning_rate": 7.720374039604395e-06, + "loss": 0.1799755096435547, + "step": 5066 + }, + { + "epoch": 0.6850190114068441, + "grad_norm": 2.279236078262329, + "learning_rate": 7.714390419241198e-06, + "loss": 0.18764285743236542, + "step": 5067 + }, + { + "epoch": 0.685154203633291, + "grad_norm": 1.2881633043289185, + "learning_rate": 7.70840831578076e-06, + "loss": 0.17615699768066406, + "step": 5068 + }, + { + "epoch": 0.685289395859738, + "grad_norm": 0.8145703673362732, + "learning_rate": 7.702427730468601e-06, + "loss": 0.1748189926147461, + "step": 5069 + }, + { + "epoch": 0.685424588086185, + "grad_norm": 0.9014973640441895, + "learning_rate": 7.696448664549898e-06, + "loss": 0.20568609237670898, + "step": 5070 + }, + { + "epoch": 0.685559780312632, + "grad_norm": 2.335993766784668, + "learning_rate": 7.690471119269541e-06, + "loss": 0.19422391057014465, + "step": 5071 + }, + { + "epoch": 0.685694972539079, + "grad_norm": 1.0950287580490112, + "learning_rate": 7.684495095872073e-06, + "loss": 0.17508697509765625, + "step": 5072 + }, + { + "epoch": 0.6858301647655259, + "grad_norm": 1.2751617431640625, + "learning_rate": 7.678520595601728e-06, + "loss": 0.16922283172607422, + "step": 5073 + }, + { + "epoch": 0.6859653569919729, + "grad_norm": 2.023818254470825, + "learning_rate": 7.672547619702445e-06, + "loss": 0.18618106842041016, + "step": 5074 + }, + { + "epoch": 0.6861005492184199, + "grad_norm": 1.076277494430542, + "learning_rate": 7.666576169417823e-06, + "loss": 0.16872596740722656, + "step": 5075 + }, + { + "epoch": 0.6862357414448669, + "grad_norm": 1.1945850849151611, + "learning_rate": 7.660606245991147e-06, + "loss": 0.13384151458740234, + "step": 5076 + }, + { + "epoch": 0.6863709336713139, + "grad_norm": 1.4496071338653564, + "learning_rate": 7.654637850665393e-06, + "loss": 0.18155241012573242, + "step": 5077 + }, + { + "epoch": 0.6865061258977608, + "grad_norm": 1.0253264904022217, + "learning_rate": 7.648670984683199e-06, + "loss": 0.16184139251708984, + "step": 5078 + }, + { + "epoch": 0.6866413181242078, + "grad_norm": 0.8470093607902527, + "learning_rate": 7.642705649286916e-06, + "loss": 0.1437368392944336, + "step": 5079 + }, + { + "epoch": 0.6867765103506548, + "grad_norm": 0.615933358669281, + "learning_rate": 7.63674184571855e-06, + "loss": 0.1031641960144043, + "step": 5080 + }, + { + "epoch": 0.6869117025771018, + "grad_norm": 0.9475538730621338, + "learning_rate": 7.630779575219797e-06, + "loss": 0.18244290351867676, + "step": 5081 + }, + { + "epoch": 0.6870468948035487, + "grad_norm": 0.6023814082145691, + "learning_rate": 7.6248188390320344e-06, + "loss": 0.11127090454101562, + "step": 5082 + }, + { + "epoch": 0.6871820870299957, + "grad_norm": 0.9950649738311768, + "learning_rate": 7.6188596383963135e-06, + "loss": 0.12942171096801758, + "step": 5083 + }, + { + "epoch": 0.6873172792564427, + "grad_norm": 1.4469207525253296, + "learning_rate": 7.612901974553388e-06, + "loss": 0.1913074254989624, + "step": 5084 + }, + { + "epoch": 0.6874524714828897, + "grad_norm": 1.0802984237670898, + "learning_rate": 7.606945848743653e-06, + "loss": 0.14243316650390625, + "step": 5085 + }, + { + "epoch": 0.6875876637093367, + "grad_norm": 1.1903811693191528, + "learning_rate": 7.600991262207221e-06, + "loss": 0.18616104125976562, + "step": 5086 + }, + { + "epoch": 0.6877228559357836, + "grad_norm": 1.2925255298614502, + "learning_rate": 7.595038216183867e-06, + "loss": 0.15991497039794922, + "step": 5087 + }, + { + "epoch": 0.6878580481622306, + "grad_norm": 0.7834618091583252, + "learning_rate": 7.589086711913037e-06, + "loss": 0.11574745178222656, + "step": 5088 + }, + { + "epoch": 0.6879932403886776, + "grad_norm": 0.7726534008979797, + "learning_rate": 7.583136750633885e-06, + "loss": 0.15797996520996094, + "step": 5089 + }, + { + "epoch": 0.6881284326151247, + "grad_norm": 1.7054098844528198, + "learning_rate": 7.577188333585202e-06, + "loss": 0.18839263916015625, + "step": 5090 + }, + { + "epoch": 0.6882636248415717, + "grad_norm": 1.546455979347229, + "learning_rate": 7.5712414620054975e-06, + "loss": 0.18819141387939453, + "step": 5091 + }, + { + "epoch": 0.6883988170680186, + "grad_norm": 3.0678529739379883, + "learning_rate": 7.565296137132935e-06, + "loss": 0.2936263084411621, + "step": 5092 + }, + { + "epoch": 0.6885340092944656, + "grad_norm": 1.0985461473464966, + "learning_rate": 7.559352360205357e-06, + "loss": 0.20450496673583984, + "step": 5093 + }, + { + "epoch": 0.6886692015209126, + "grad_norm": 0.9503136873245239, + "learning_rate": 7.553410132460308e-06, + "loss": 0.20096588134765625, + "step": 5094 + }, + { + "epoch": 0.6888043937473596, + "grad_norm": 0.8361325263977051, + "learning_rate": 7.547469455134968e-06, + "loss": 0.1712038516998291, + "step": 5095 + }, + { + "epoch": 0.6889395859738066, + "grad_norm": 1.18292236328125, + "learning_rate": 7.541530329466236e-06, + "loss": 0.19519996643066406, + "step": 5096 + }, + { + "epoch": 0.6890747782002535, + "grad_norm": 1.4044870138168335, + "learning_rate": 7.535592756690661e-06, + "loss": 0.15503311157226562, + "step": 5097 + }, + { + "epoch": 0.6892099704267005, + "grad_norm": 0.6633076667785645, + "learning_rate": 7.52965673804448e-06, + "loss": 0.1333456039428711, + "step": 5098 + }, + { + "epoch": 0.6893451626531475, + "grad_norm": 0.8061087131500244, + "learning_rate": 7.5237222747636025e-06, + "loss": 0.13283157348632812, + "step": 5099 + }, + { + "epoch": 0.6894803548795945, + "grad_norm": 1.84687077999115, + "learning_rate": 7.517789368083611e-06, + "loss": 0.16704010963439941, + "step": 5100 + }, + { + "epoch": 0.6896155471060414, + "grad_norm": 0.8452058434486389, + "learning_rate": 7.511858019239778e-06, + "loss": 0.17380380630493164, + "step": 5101 + }, + { + "epoch": 0.6897507393324884, + "grad_norm": 1.1292773485183716, + "learning_rate": 7.505928229467038e-06, + "loss": 0.20114898681640625, + "step": 5102 + }, + { + "epoch": 0.6898859315589354, + "grad_norm": 1.2943975925445557, + "learning_rate": 7.500000000000004e-06, + "loss": 0.17506837844848633, + "step": 5103 + }, + { + "epoch": 0.6900211237853824, + "grad_norm": 1.6584535837173462, + "learning_rate": 7.494073332072963e-06, + "loss": 0.18539047241210938, + "step": 5104 + }, + { + "epoch": 0.6901563160118294, + "grad_norm": 1.3279589414596558, + "learning_rate": 7.488148226919877e-06, + "loss": 0.21048736572265625, + "step": 5105 + }, + { + "epoch": 0.6902915082382763, + "grad_norm": 1.2370537519454956, + "learning_rate": 7.482224685774393e-06, + "loss": 0.1621685028076172, + "step": 5106 + }, + { + "epoch": 0.6904267004647233, + "grad_norm": 1.1495203971862793, + "learning_rate": 7.4763027098698184e-06, + "loss": 0.2266845703125, + "step": 5107 + }, + { + "epoch": 0.6905618926911703, + "grad_norm": 2.1210896968841553, + "learning_rate": 7.470382300439143e-06, + "loss": 0.1802058219909668, + "step": 5108 + }, + { + "epoch": 0.6906970849176173, + "grad_norm": 0.7871782779693604, + "learning_rate": 7.4644634587150225e-06, + "loss": 0.17002034187316895, + "step": 5109 + }, + { + "epoch": 0.6908322771440643, + "grad_norm": 0.8943871259689331, + "learning_rate": 7.4585461859297906e-06, + "loss": 0.1412220001220703, + "step": 5110 + }, + { + "epoch": 0.6909674693705112, + "grad_norm": 0.8312017917633057, + "learning_rate": 7.452630483315463e-06, + "loss": 0.16393661499023438, + "step": 5111 + }, + { + "epoch": 0.6911026615969582, + "grad_norm": 0.9605817794799805, + "learning_rate": 7.4467163521037186e-06, + "loss": 0.18793749809265137, + "step": 5112 + }, + { + "epoch": 0.6912378538234052, + "grad_norm": 2.580165147781372, + "learning_rate": 7.440803793525907e-06, + "loss": 0.19863653182983398, + "step": 5113 + }, + { + "epoch": 0.6913730460498522, + "grad_norm": 1.0944455862045288, + "learning_rate": 7.434892808813056e-06, + "loss": 0.1680774688720703, + "step": 5114 + }, + { + "epoch": 0.6915082382762991, + "grad_norm": 2.1677603721618652, + "learning_rate": 7.42898339919586e-06, + "loss": 0.16836369037628174, + "step": 5115 + }, + { + "epoch": 0.6916434305027461, + "grad_norm": 1.1497037410736084, + "learning_rate": 7.423075565904698e-06, + "loss": 0.2027263641357422, + "step": 5116 + }, + { + "epoch": 0.6917786227291931, + "grad_norm": 1.6549288034439087, + "learning_rate": 7.417169310169609e-06, + "loss": 0.18097591400146484, + "step": 5117 + }, + { + "epoch": 0.6919138149556401, + "grad_norm": 0.9023087620735168, + "learning_rate": 7.411264633220305e-06, + "loss": 0.13146638870239258, + "step": 5118 + }, + { + "epoch": 0.6920490071820871, + "grad_norm": 1.3073419332504272, + "learning_rate": 7.405361536286174e-06, + "loss": 0.16814422607421875, + "step": 5119 + }, + { + "epoch": 0.692184199408534, + "grad_norm": 1.0796542167663574, + "learning_rate": 7.399460020596266e-06, + "loss": 0.16030216217041016, + "step": 5120 + }, + { + "epoch": 0.692319391634981, + "grad_norm": 2.0547728538513184, + "learning_rate": 7.393560087379322e-06, + "loss": 0.21028709411621094, + "step": 5121 + }, + { + "epoch": 0.692454583861428, + "grad_norm": 1.0796337127685547, + "learning_rate": 7.3876617378637195e-06, + "loss": 0.16209030151367188, + "step": 5122 + }, + { + "epoch": 0.692589776087875, + "grad_norm": 0.9031347036361694, + "learning_rate": 7.381764973277543e-06, + "loss": 0.16449451446533203, + "step": 5123 + }, + { + "epoch": 0.692724968314322, + "grad_norm": 2.1749205589294434, + "learning_rate": 7.375869794848525e-06, + "loss": 0.16182827949523926, + "step": 5124 + }, + { + "epoch": 0.6928601605407689, + "grad_norm": 0.8099848031997681, + "learning_rate": 7.3699762038040654e-06, + "loss": 0.14237475395202637, + "step": 5125 + }, + { + "epoch": 0.6929953527672159, + "grad_norm": 1.0861082077026367, + "learning_rate": 7.364084201371261e-06, + "loss": 0.1667957305908203, + "step": 5126 + }, + { + "epoch": 0.6931305449936629, + "grad_norm": 1.9657409191131592, + "learning_rate": 7.3581937887768334e-06, + "loss": 0.24158883094787598, + "step": 5127 + }, + { + "epoch": 0.6932657372201099, + "grad_norm": 0.9278636574745178, + "learning_rate": 7.352304967247217e-06, + "loss": 0.16916131973266602, + "step": 5128 + }, + { + "epoch": 0.6934009294465568, + "grad_norm": 0.8497210144996643, + "learning_rate": 7.346417738008487e-06, + "loss": 0.15870952606201172, + "step": 5129 + }, + { + "epoch": 0.6935361216730038, + "grad_norm": 2.4723055362701416, + "learning_rate": 7.340532102286399e-06, + "loss": 0.19055747985839844, + "step": 5130 + }, + { + "epoch": 0.6936713138994508, + "grad_norm": 0.7040371894836426, + "learning_rate": 7.3346480613063725e-06, + "loss": 0.12305355072021484, + "step": 5131 + }, + { + "epoch": 0.6938065061258978, + "grad_norm": 1.3402516841888428, + "learning_rate": 7.328765616293491e-06, + "loss": 0.17723703384399414, + "step": 5132 + }, + { + "epoch": 0.6939416983523448, + "grad_norm": 0.7311825156211853, + "learning_rate": 7.322884768472521e-06, + "loss": 0.11639022827148438, + "step": 5133 + }, + { + "epoch": 0.6940768905787917, + "grad_norm": 1.3027299642562866, + "learning_rate": 7.317005519067881e-06, + "loss": 0.1964874267578125, + "step": 5134 + }, + { + "epoch": 0.6942120828052387, + "grad_norm": 1.1012269258499146, + "learning_rate": 7.311127869303665e-06, + "loss": 0.17229747772216797, + "step": 5135 + }, + { + "epoch": 0.6943472750316857, + "grad_norm": 1.2746553421020508, + "learning_rate": 7.305251820403628e-06, + "loss": 0.16065621376037598, + "step": 5136 + }, + { + "epoch": 0.6944824672581327, + "grad_norm": 1.0306593179702759, + "learning_rate": 7.299377373591188e-06, + "loss": 0.15151715278625488, + "step": 5137 + }, + { + "epoch": 0.6946176594845797, + "grad_norm": 1.45720636844635, + "learning_rate": 7.29350453008945e-06, + "loss": 0.21277475357055664, + "step": 5138 + }, + { + "epoch": 0.6947528517110266, + "grad_norm": 0.8701133728027344, + "learning_rate": 7.287633291121166e-06, + "loss": 0.188720703125, + "step": 5139 + }, + { + "epoch": 0.6948880439374736, + "grad_norm": 0.6854133009910583, + "learning_rate": 7.281763657908756e-06, + "loss": 0.15523910522460938, + "step": 5140 + }, + { + "epoch": 0.6950232361639206, + "grad_norm": 0.8548727035522461, + "learning_rate": 7.275895631674313e-06, + "loss": 0.155379056930542, + "step": 5141 + }, + { + "epoch": 0.6951584283903676, + "grad_norm": 0.8193026185035706, + "learning_rate": 7.2700292136395826e-06, + "loss": 0.10057544708251953, + "step": 5142 + }, + { + "epoch": 0.6952936206168145, + "grad_norm": 0.7833569049835205, + "learning_rate": 7.264164405025997e-06, + "loss": 0.1626291275024414, + "step": 5143 + }, + { + "epoch": 0.6954288128432615, + "grad_norm": 2.0511326789855957, + "learning_rate": 7.2583012070546364e-06, + "loss": 0.26996612548828125, + "step": 5144 + }, + { + "epoch": 0.6955640050697085, + "grad_norm": 0.8838818669319153, + "learning_rate": 7.252439620946247e-06, + "loss": 0.16716909408569336, + "step": 5145 + }, + { + "epoch": 0.6956991972961555, + "grad_norm": 0.7456916570663452, + "learning_rate": 7.246579647921243e-06, + "loss": 0.1417551040649414, + "step": 5146 + }, + { + "epoch": 0.6958343895226025, + "grad_norm": 1.6904188394546509, + "learning_rate": 7.240721289199699e-06, + "loss": 0.19864225387573242, + "step": 5147 + }, + { + "epoch": 0.6959695817490494, + "grad_norm": 1.2677772045135498, + "learning_rate": 7.234864546001364e-06, + "loss": 0.15386009216308594, + "step": 5148 + }, + { + "epoch": 0.6961047739754964, + "grad_norm": 1.5002886056900024, + "learning_rate": 7.229009419545638e-06, + "loss": 0.18121910095214844, + "step": 5149 + }, + { + "epoch": 0.6962399662019434, + "grad_norm": 1.368133783340454, + "learning_rate": 7.223155911051593e-06, + "loss": 0.1557598114013672, + "step": 5150 + }, + { + "epoch": 0.6963751584283904, + "grad_norm": 1.5539847612380981, + "learning_rate": 7.2173040217379575e-06, + "loss": 0.16486740112304688, + "step": 5151 + }, + { + "epoch": 0.6965103506548374, + "grad_norm": 0.887169361114502, + "learning_rate": 7.211453752823122e-06, + "loss": 0.15547466278076172, + "step": 5152 + }, + { + "epoch": 0.6966455428812843, + "grad_norm": 0.8338747620582581, + "learning_rate": 7.205605105525161e-06, + "loss": 0.12071585655212402, + "step": 5153 + }, + { + "epoch": 0.6967807351077313, + "grad_norm": 1.4051493406295776, + "learning_rate": 7.19975808106177e-06, + "loss": 0.20055007934570312, + "step": 5154 + }, + { + "epoch": 0.6969159273341783, + "grad_norm": 0.7138229012489319, + "learning_rate": 7.193912680650346e-06, + "loss": 0.1400771141052246, + "step": 5155 + }, + { + "epoch": 0.6970511195606253, + "grad_norm": 0.8744693398475647, + "learning_rate": 7.188068905507931e-06, + "loss": 0.17476320266723633, + "step": 5156 + }, + { + "epoch": 0.6971863117870722, + "grad_norm": 1.7933357954025269, + "learning_rate": 7.182226756851223e-06, + "loss": 0.20525169372558594, + "step": 5157 + }, + { + "epoch": 0.6973215040135192, + "grad_norm": 1.2666850090026855, + "learning_rate": 7.176386235896603e-06, + "loss": 0.16199016571044922, + "step": 5158 + }, + { + "epoch": 0.6974566962399662, + "grad_norm": 1.23150634765625, + "learning_rate": 7.170547343860079e-06, + "loss": 0.16907596588134766, + "step": 5159 + }, + { + "epoch": 0.6975918884664132, + "grad_norm": 1.0402631759643555, + "learning_rate": 7.164710081957355e-06, + "loss": 0.17164039611816406, + "step": 5160 + }, + { + "epoch": 0.6977270806928602, + "grad_norm": 0.7841763496398926, + "learning_rate": 7.158874451403777e-06, + "loss": 0.13807106018066406, + "step": 5161 + }, + { + "epoch": 0.6978622729193071, + "grad_norm": 2.068512201309204, + "learning_rate": 7.15304045341435e-06, + "loss": 0.18516921997070312, + "step": 5162 + }, + { + "epoch": 0.6979974651457541, + "grad_norm": 1.0427666902542114, + "learning_rate": 7.147208089203745e-06, + "loss": 0.1490764617919922, + "step": 5163 + }, + { + "epoch": 0.6981326573722011, + "grad_norm": 1.014174461364746, + "learning_rate": 7.141377359986288e-06, + "loss": 0.15070199966430664, + "step": 5164 + }, + { + "epoch": 0.6982678495986481, + "grad_norm": 0.6417869925498962, + "learning_rate": 7.135548266975978e-06, + "loss": 0.12025153636932373, + "step": 5165 + }, + { + "epoch": 0.698403041825095, + "grad_norm": 1.6225730180740356, + "learning_rate": 7.129720811386456e-06, + "loss": 0.21914386749267578, + "step": 5166 + }, + { + "epoch": 0.698538234051542, + "grad_norm": 2.340458869934082, + "learning_rate": 7.12389499443103e-06, + "loss": 0.1927943229675293, + "step": 5167 + }, + { + "epoch": 0.698673426277989, + "grad_norm": 1.1614012718200684, + "learning_rate": 7.118070817322668e-06, + "loss": 0.17183303833007812, + "step": 5168 + }, + { + "epoch": 0.698808618504436, + "grad_norm": 1.1580455303192139, + "learning_rate": 7.1122482812739885e-06, + "loss": 0.17609024047851562, + "step": 5169 + }, + { + "epoch": 0.698943810730883, + "grad_norm": 0.8738818764686584, + "learning_rate": 7.106427387497283e-06, + "loss": 0.14845871925354004, + "step": 5170 + }, + { + "epoch": 0.6990790029573299, + "grad_norm": 1.3449870347976685, + "learning_rate": 7.10060813720449e-06, + "loss": 0.17636489868164062, + "step": 5171 + }, + { + "epoch": 0.6992141951837769, + "grad_norm": 1.9404939413070679, + "learning_rate": 7.094790531607207e-06, + "loss": 0.2740974426269531, + "step": 5172 + }, + { + "epoch": 0.6993493874102239, + "grad_norm": 1.31975519657135, + "learning_rate": 7.088974571916692e-06, + "loss": 0.17537879943847656, + "step": 5173 + }, + { + "epoch": 0.6994845796366709, + "grad_norm": 0.7084823250770569, + "learning_rate": 7.0831602593438515e-06, + "loss": 0.12566089630126953, + "step": 5174 + }, + { + "epoch": 0.6996197718631179, + "grad_norm": 1.1030217409133911, + "learning_rate": 7.077347595099269e-06, + "loss": 0.15218579769134521, + "step": 5175 + }, + { + "epoch": 0.6997549640895648, + "grad_norm": 1.0435913801193237, + "learning_rate": 7.071536580393166e-06, + "loss": 0.1305384635925293, + "step": 5176 + }, + { + "epoch": 0.6998901563160118, + "grad_norm": 1.9203248023986816, + "learning_rate": 7.065727216435426e-06, + "loss": 0.11629164218902588, + "step": 5177 + }, + { + "epoch": 0.7000253485424588, + "grad_norm": 2.108100175857544, + "learning_rate": 7.05991950443559e-06, + "loss": 0.18920421600341797, + "step": 5178 + }, + { + "epoch": 0.7001605407689058, + "grad_norm": 0.8961849212646484, + "learning_rate": 7.05411344560285e-06, + "loss": 0.13516521453857422, + "step": 5179 + }, + { + "epoch": 0.7002957329953527, + "grad_norm": 1.6319124698638916, + "learning_rate": 7.048309041146069e-06, + "loss": 0.19341468811035156, + "step": 5180 + }, + { + "epoch": 0.7004309252217997, + "grad_norm": 0.7201474905014038, + "learning_rate": 7.0425062922737495e-06, + "loss": 0.1359405517578125, + "step": 5181 + }, + { + "epoch": 0.7005661174482467, + "grad_norm": 0.9397055506706238, + "learning_rate": 7.036705200194053e-06, + "loss": 0.1709786057472229, + "step": 5182 + }, + { + "epoch": 0.7007013096746937, + "grad_norm": 1.546152114868164, + "learning_rate": 7.0309057661148e-06, + "loss": 0.19952011108398438, + "step": 5183 + }, + { + "epoch": 0.7008365019011407, + "grad_norm": 1.1358003616333008, + "learning_rate": 7.0251079912434565e-06, + "loss": 0.15515518188476562, + "step": 5184 + }, + { + "epoch": 0.7009716941275876, + "grad_norm": 1.0617985725402832, + "learning_rate": 7.019311876787169e-06, + "loss": 0.16869735717773438, + "step": 5185 + }, + { + "epoch": 0.7011068863540346, + "grad_norm": 1.3697569370269775, + "learning_rate": 7.013517423952696e-06, + "loss": 0.14263129234313965, + "step": 5186 + }, + { + "epoch": 0.7012420785804816, + "grad_norm": 1.6162440776824951, + "learning_rate": 7.0077246339464904e-06, + "loss": 0.24807357788085938, + "step": 5187 + }, + { + "epoch": 0.7013772708069286, + "grad_norm": 1.3330588340759277, + "learning_rate": 7.001933507974635e-06, + "loss": 0.15120649337768555, + "step": 5188 + }, + { + "epoch": 0.7015124630333756, + "grad_norm": 1.3398762941360474, + "learning_rate": 6.996144047242868e-06, + "loss": 0.20532011985778809, + "step": 5189 + }, + { + "epoch": 0.7016476552598225, + "grad_norm": 0.8158812522888184, + "learning_rate": 6.9903562529566044e-06, + "loss": 0.10885810852050781, + "step": 5190 + }, + { + "epoch": 0.7017828474862695, + "grad_norm": 1.7342039346694946, + "learning_rate": 6.984570126320869e-06, + "loss": 0.17320013046264648, + "step": 5191 + }, + { + "epoch": 0.7019180397127165, + "grad_norm": 1.240268588066101, + "learning_rate": 6.978785668540384e-06, + "loss": 0.15944337844848633, + "step": 5192 + }, + { + "epoch": 0.7020532319391635, + "grad_norm": 1.0960705280303955, + "learning_rate": 6.973002880819496e-06, + "loss": 0.20203948020935059, + "step": 5193 + }, + { + "epoch": 0.7021884241656104, + "grad_norm": 3.224013566970825, + "learning_rate": 6.96722176436221e-06, + "loss": 0.2921295166015625, + "step": 5194 + }, + { + "epoch": 0.7023236163920574, + "grad_norm": 1.1440491676330566, + "learning_rate": 6.9614423203721975e-06, + "loss": 0.12075090408325195, + "step": 5195 + }, + { + "epoch": 0.7024588086185044, + "grad_norm": 2.2682206630706787, + "learning_rate": 6.955664550052749e-06, + "loss": 0.2271726131439209, + "step": 5196 + }, + { + "epoch": 0.7025940008449514, + "grad_norm": 1.2267061471939087, + "learning_rate": 6.949888454606847e-06, + "loss": 0.1933155059814453, + "step": 5197 + }, + { + "epoch": 0.7027291930713984, + "grad_norm": 1.8831148147583008, + "learning_rate": 6.944114035237095e-06, + "loss": 0.20353317260742188, + "step": 5198 + }, + { + "epoch": 0.7028643852978453, + "grad_norm": 1.0325020551681519, + "learning_rate": 6.93834129314576e-06, + "loss": 0.16477584838867188, + "step": 5199 + }, + { + "epoch": 0.7029995775242923, + "grad_norm": 1.0077366828918457, + "learning_rate": 6.932570229534759e-06, + "loss": 0.16625213623046875, + "step": 5200 + }, + { + "epoch": 0.7031347697507393, + "grad_norm": 1.5973337888717651, + "learning_rate": 6.9268008456056505e-06, + "loss": 0.1799304485321045, + "step": 5201 + }, + { + "epoch": 0.7032699619771863, + "grad_norm": 1.5032504796981812, + "learning_rate": 6.921033142559664e-06, + "loss": 0.1950300931930542, + "step": 5202 + }, + { + "epoch": 0.7034051542036333, + "grad_norm": 1.4120783805847168, + "learning_rate": 6.915267121597659e-06, + "loss": 0.15298080444335938, + "step": 5203 + }, + { + "epoch": 0.7035403464300802, + "grad_norm": 1.5226410627365112, + "learning_rate": 6.909502783920153e-06, + "loss": 0.162506103515625, + "step": 5204 + }, + { + "epoch": 0.7036755386565272, + "grad_norm": 1.3698943853378296, + "learning_rate": 6.903740130727312e-06, + "loss": 0.1988658905029297, + "step": 5205 + }, + { + "epoch": 0.7038107308829742, + "grad_norm": 0.7702086567878723, + "learning_rate": 6.8979791632189425e-06, + "loss": 0.12569665908813477, + "step": 5206 + }, + { + "epoch": 0.7039459231094212, + "grad_norm": 1.0782551765441895, + "learning_rate": 6.892219882594523e-06, + "loss": 0.1438922882080078, + "step": 5207 + }, + { + "epoch": 0.7040811153358681, + "grad_norm": 0.8102098107337952, + "learning_rate": 6.886462290053159e-06, + "loss": 0.14880084991455078, + "step": 5208 + }, + { + "epoch": 0.7042163075623151, + "grad_norm": 0.5309339165687561, + "learning_rate": 6.880706386793614e-06, + "loss": 0.10606718063354492, + "step": 5209 + }, + { + "epoch": 0.7043514997887621, + "grad_norm": 1.839189887046814, + "learning_rate": 6.874952174014298e-06, + "loss": 0.14774513244628906, + "step": 5210 + }, + { + "epoch": 0.7044866920152091, + "grad_norm": 1.2432509660720825, + "learning_rate": 6.8691996529132585e-06, + "loss": 0.18129825592041016, + "step": 5211 + }, + { + "epoch": 0.7046218842416561, + "grad_norm": 1.2936617136001587, + "learning_rate": 6.863448824688217e-06, + "loss": 0.1758289337158203, + "step": 5212 + }, + { + "epoch": 0.704757076468103, + "grad_norm": 1.2065259218215942, + "learning_rate": 6.857699690536521e-06, + "loss": 0.19231557846069336, + "step": 5213 + }, + { + "epoch": 0.70489226869455, + "grad_norm": 1.1868269443511963, + "learning_rate": 6.8519522516551685e-06, + "loss": 0.14506006240844727, + "step": 5214 + }, + { + "epoch": 0.705027460920997, + "grad_norm": 2.1638193130493164, + "learning_rate": 6.846206509240807e-06, + "loss": 0.2016735076904297, + "step": 5215 + }, + { + "epoch": 0.705162653147444, + "grad_norm": 2.3246593475341797, + "learning_rate": 6.840462464489726e-06, + "loss": 0.2314605712890625, + "step": 5216 + }, + { + "epoch": 0.705297845373891, + "grad_norm": 1.1544358730316162, + "learning_rate": 6.834720118597879e-06, + "loss": 0.1596202850341797, + "step": 5217 + }, + { + "epoch": 0.7054330376003379, + "grad_norm": 1.245569109916687, + "learning_rate": 6.828979472760846e-06, + "loss": 0.21908187866210938, + "step": 5218 + }, + { + "epoch": 0.7055682298267849, + "grad_norm": 0.7290953397750854, + "learning_rate": 6.823240528173858e-06, + "loss": 0.13218283653259277, + "step": 5219 + }, + { + "epoch": 0.7057034220532319, + "grad_norm": 1.1778936386108398, + "learning_rate": 6.817503286031797e-06, + "loss": 0.1854863166809082, + "step": 5220 + }, + { + "epoch": 0.7058386142796789, + "grad_norm": 1.1369704008102417, + "learning_rate": 6.811767747529181e-06, + "loss": 0.1791229248046875, + "step": 5221 + }, + { + "epoch": 0.7059738065061258, + "grad_norm": 1.5306711196899414, + "learning_rate": 6.806033913860195e-06, + "loss": 0.21477317810058594, + "step": 5222 + }, + { + "epoch": 0.7061089987325728, + "grad_norm": 3.785911798477173, + "learning_rate": 6.800301786218634e-06, + "loss": 0.23604393005371094, + "step": 5223 + }, + { + "epoch": 0.7062441909590198, + "grad_norm": 1.475637435913086, + "learning_rate": 6.794571365797971e-06, + "loss": 0.18343877792358398, + "step": 5224 + }, + { + "epoch": 0.7063793831854668, + "grad_norm": 1.4161368608474731, + "learning_rate": 6.788842653791308e-06, + "loss": 0.17431139945983887, + "step": 5225 + }, + { + "epoch": 0.7065145754119139, + "grad_norm": 2.020963668823242, + "learning_rate": 6.7831156513913864e-06, + "loss": 0.1591472625732422, + "step": 5226 + }, + { + "epoch": 0.7066497676383608, + "grad_norm": 1.020849347114563, + "learning_rate": 6.777390359790614e-06, + "loss": 0.14255046844482422, + "step": 5227 + }, + { + "epoch": 0.7067849598648078, + "grad_norm": 1.9783674478530884, + "learning_rate": 6.771666780181004e-06, + "loss": 0.20640087127685547, + "step": 5228 + }, + { + "epoch": 0.7069201520912548, + "grad_norm": 0.7509810924530029, + "learning_rate": 6.765944913754258e-06, + "loss": 0.10895109176635742, + "step": 5229 + }, + { + "epoch": 0.7070553443177018, + "grad_norm": 1.8413426876068115, + "learning_rate": 6.7602247617016885e-06, + "loss": 0.22287940979003906, + "step": 5230 + }, + { + "epoch": 0.7071905365441488, + "grad_norm": 0.9900017380714417, + "learning_rate": 6.754506325214265e-06, + "loss": 0.1849372386932373, + "step": 5231 + }, + { + "epoch": 0.7073257287705957, + "grad_norm": 1.370452642440796, + "learning_rate": 6.748789605482593e-06, + "loss": 0.20341110229492188, + "step": 5232 + }, + { + "epoch": 0.7074609209970427, + "grad_norm": 0.938466489315033, + "learning_rate": 6.743074603696922e-06, + "loss": 0.12316513061523438, + "step": 5233 + }, + { + "epoch": 0.7075961132234897, + "grad_norm": 2.760768175125122, + "learning_rate": 6.737361321047155e-06, + "loss": 0.17108726501464844, + "step": 5234 + }, + { + "epoch": 0.7077313054499367, + "grad_norm": 1.2002313137054443, + "learning_rate": 6.731649758722823e-06, + "loss": 0.22731781005859375, + "step": 5235 + }, + { + "epoch": 0.7078664976763837, + "grad_norm": 0.6055838465690613, + "learning_rate": 6.725939917913102e-06, + "loss": 0.11905288696289062, + "step": 5236 + }, + { + "epoch": 0.7080016899028306, + "grad_norm": 0.7012165188789368, + "learning_rate": 6.720231799806814e-06, + "loss": 0.1473172903060913, + "step": 5237 + }, + { + "epoch": 0.7081368821292776, + "grad_norm": 0.5644224286079407, + "learning_rate": 6.7145254055924136e-06, + "loss": 0.11808204650878906, + "step": 5238 + }, + { + "epoch": 0.7082720743557246, + "grad_norm": 0.7486729621887207, + "learning_rate": 6.70882073645801e-06, + "loss": 0.12996768951416016, + "step": 5239 + }, + { + "epoch": 0.7084072665821716, + "grad_norm": 0.8076403737068176, + "learning_rate": 6.703117793591346e-06, + "loss": 0.13739728927612305, + "step": 5240 + }, + { + "epoch": 0.7085424588086185, + "grad_norm": 0.963392972946167, + "learning_rate": 6.6974165781798e-06, + "loss": 0.16089248657226562, + "step": 5241 + }, + { + "epoch": 0.7086776510350655, + "grad_norm": 0.965478241443634, + "learning_rate": 6.691717091410398e-06, + "loss": 0.1577134132385254, + "step": 5242 + }, + { + "epoch": 0.7088128432615125, + "grad_norm": 0.8361654877662659, + "learning_rate": 6.686019334469797e-06, + "loss": 0.14540749788284302, + "step": 5243 + }, + { + "epoch": 0.7089480354879595, + "grad_norm": 1.4129126071929932, + "learning_rate": 6.680323308544312e-06, + "loss": 0.1485309600830078, + "step": 5244 + }, + { + "epoch": 0.7090832277144065, + "grad_norm": 1.2083866596221924, + "learning_rate": 6.674629014819879e-06, + "loss": 0.1945018768310547, + "step": 5245 + }, + { + "epoch": 0.7092184199408534, + "grad_norm": 1.7138794660568237, + "learning_rate": 6.668936454482082e-06, + "loss": 0.19281005859375, + "step": 5246 + }, + { + "epoch": 0.7093536121673004, + "grad_norm": 1.8031753301620483, + "learning_rate": 6.6632456287161426e-06, + "loss": 0.18366622924804688, + "step": 5247 + }, + { + "epoch": 0.7094888043937474, + "grad_norm": 1.0596013069152832, + "learning_rate": 6.657556538706914e-06, + "loss": 0.1574840545654297, + "step": 5248 + }, + { + "epoch": 0.7096239966201944, + "grad_norm": 1.1774144172668457, + "learning_rate": 6.651869185638907e-06, + "loss": 0.19628477096557617, + "step": 5249 + }, + { + "epoch": 0.7097591888466414, + "grad_norm": 1.7380789518356323, + "learning_rate": 6.646183570696253e-06, + "loss": 0.2093358039855957, + "step": 5250 + }, + { + "epoch": 0.7098943810730883, + "grad_norm": 0.9401379227638245, + "learning_rate": 6.6404996950627275e-06, + "loss": 0.1821298599243164, + "step": 5251 + }, + { + "epoch": 0.7100295732995353, + "grad_norm": 1.5139966011047363, + "learning_rate": 6.634817559921744e-06, + "loss": 0.16340315341949463, + "step": 5252 + }, + { + "epoch": 0.7101647655259823, + "grad_norm": 1.1462069749832153, + "learning_rate": 6.629137166456348e-06, + "loss": 0.1474614143371582, + "step": 5253 + }, + { + "epoch": 0.7102999577524293, + "grad_norm": 0.8312404751777649, + "learning_rate": 6.623458515849244e-06, + "loss": 0.13142013549804688, + "step": 5254 + }, + { + "epoch": 0.7104351499788762, + "grad_norm": 1.0690691471099854, + "learning_rate": 6.6177816092827354e-06, + "loss": 0.2099456787109375, + "step": 5255 + }, + { + "epoch": 0.7105703422053232, + "grad_norm": 1.587024211883545, + "learning_rate": 6.6121064479388e-06, + "loss": 0.19501399993896484, + "step": 5256 + }, + { + "epoch": 0.7107055344317702, + "grad_norm": 0.8954169750213623, + "learning_rate": 6.606433032999031e-06, + "loss": 0.17804574966430664, + "step": 5257 + }, + { + "epoch": 0.7108407266582172, + "grad_norm": 0.8076308369636536, + "learning_rate": 6.60076136564466e-06, + "loss": 0.16410017013549805, + "step": 5258 + }, + { + "epoch": 0.7109759188846642, + "grad_norm": 1.0554468631744385, + "learning_rate": 6.595091447056574e-06, + "loss": 0.21441853046417236, + "step": 5259 + }, + { + "epoch": 0.7111111111111111, + "grad_norm": 1.2991645336151123, + "learning_rate": 6.589423278415259e-06, + "loss": 0.2372903823852539, + "step": 5260 + }, + { + "epoch": 0.7112463033375581, + "grad_norm": 1.1390784978866577, + "learning_rate": 6.583756860900872e-06, + "loss": 0.13801372051239014, + "step": 5261 + }, + { + "epoch": 0.7113814955640051, + "grad_norm": 1.022175669670105, + "learning_rate": 6.578092195693187e-06, + "loss": 0.1260509490966797, + "step": 5262 + }, + { + "epoch": 0.7115166877904521, + "grad_norm": 0.7669751048088074, + "learning_rate": 6.572429283971614e-06, + "loss": 0.12987041473388672, + "step": 5263 + }, + { + "epoch": 0.711651880016899, + "grad_norm": 0.8804888725280762, + "learning_rate": 6.566768126915215e-06, + "loss": 0.15361404418945312, + "step": 5264 + }, + { + "epoch": 0.711787072243346, + "grad_norm": 1.2320399284362793, + "learning_rate": 6.561108725702653e-06, + "loss": 0.146209716796875, + "step": 5265 + }, + { + "epoch": 0.711922264469793, + "grad_norm": 1.347063422203064, + "learning_rate": 6.555451081512262e-06, + "loss": 0.2016773223876953, + "step": 5266 + }, + { + "epoch": 0.71205745669624, + "grad_norm": 1.02095627784729, + "learning_rate": 6.549795195521988e-06, + "loss": 0.1999359130859375, + "step": 5267 + }, + { + "epoch": 0.712192648922687, + "grad_norm": 0.8457715511322021, + "learning_rate": 6.544141068909416e-06, + "loss": 0.11187124252319336, + "step": 5268 + }, + { + "epoch": 0.712327841149134, + "grad_norm": 0.8879594206809998, + "learning_rate": 6.5384887028517645e-06, + "loss": 0.12949371337890625, + "step": 5269 + }, + { + "epoch": 0.7124630333755809, + "grad_norm": 1.1260361671447754, + "learning_rate": 6.532838098525883e-06, + "loss": 0.1906595230102539, + "step": 5270 + }, + { + "epoch": 0.7125982256020279, + "grad_norm": 0.8502476215362549, + "learning_rate": 6.5271892571082655e-06, + "loss": 0.14899611473083496, + "step": 5271 + }, + { + "epoch": 0.7127334178284749, + "grad_norm": 1.2261719703674316, + "learning_rate": 6.521542179775029e-06, + "loss": 0.16855430603027344, + "step": 5272 + }, + { + "epoch": 0.7128686100549219, + "grad_norm": 1.238086223602295, + "learning_rate": 6.515896867701924e-06, + "loss": 0.15983033180236816, + "step": 5273 + }, + { + "epoch": 0.7130038022813688, + "grad_norm": 0.9716398119926453, + "learning_rate": 6.510253322064333e-06, + "loss": 0.1437387466430664, + "step": 5274 + }, + { + "epoch": 0.7131389945078158, + "grad_norm": 1.5535765886306763, + "learning_rate": 6.504611544037267e-06, + "loss": 0.21181392669677734, + "step": 5275 + }, + { + "epoch": 0.7132741867342628, + "grad_norm": 1.6955326795578003, + "learning_rate": 6.498971534795387e-06, + "loss": 0.21105480194091797, + "step": 5276 + }, + { + "epoch": 0.7134093789607098, + "grad_norm": 1.180829405784607, + "learning_rate": 6.493333295512965e-06, + "loss": 0.14973068237304688, + "step": 5277 + }, + { + "epoch": 0.7135445711871568, + "grad_norm": 0.9586646556854248, + "learning_rate": 6.487696827363916e-06, + "loss": 0.1754283905029297, + "step": 5278 + }, + { + "epoch": 0.7136797634136037, + "grad_norm": 0.7057746648788452, + "learning_rate": 6.48206213152178e-06, + "loss": 0.14139747619628906, + "step": 5279 + }, + { + "epoch": 0.7138149556400507, + "grad_norm": 1.4581705331802368, + "learning_rate": 6.476429209159725e-06, + "loss": 0.17615032196044922, + "step": 5280 + }, + { + "epoch": 0.7139501478664977, + "grad_norm": 1.102221131324768, + "learning_rate": 6.470798061450568e-06, + "loss": 0.15878593921661377, + "step": 5281 + }, + { + "epoch": 0.7140853400929447, + "grad_norm": 1.3147752285003662, + "learning_rate": 6.465168689566738e-06, + "loss": 0.1579265594482422, + "step": 5282 + }, + { + "epoch": 0.7142205323193916, + "grad_norm": 1.3335950374603271, + "learning_rate": 6.4595410946803e-06, + "loss": 0.19703292846679688, + "step": 5283 + }, + { + "epoch": 0.7143557245458386, + "grad_norm": 2.142792224884033, + "learning_rate": 6.453915277962948e-06, + "loss": 0.21161460876464844, + "step": 5284 + }, + { + "epoch": 0.7144909167722856, + "grad_norm": 0.9373217225074768, + "learning_rate": 6.4482912405860055e-06, + "loss": 0.19946861267089844, + "step": 5285 + }, + { + "epoch": 0.7146261089987326, + "grad_norm": 3.7490131855010986, + "learning_rate": 6.442668983720434e-06, + "loss": 0.22241973876953125, + "step": 5286 + }, + { + "epoch": 0.7147613012251796, + "grad_norm": 1.23158860206604, + "learning_rate": 6.437048508536813e-06, + "loss": 0.1703634262084961, + "step": 5287 + }, + { + "epoch": 0.7148964934516265, + "grad_norm": 1.6232367753982544, + "learning_rate": 6.431429816205357e-06, + "loss": 0.1179962158203125, + "step": 5288 + }, + { + "epoch": 0.7150316856780735, + "grad_norm": 1.2622445821762085, + "learning_rate": 6.425812907895904e-06, + "loss": 0.2069411277770996, + "step": 5289 + }, + { + "epoch": 0.7151668779045205, + "grad_norm": 0.8563898205757141, + "learning_rate": 6.420197784777925e-06, + "loss": 0.15022850036621094, + "step": 5290 + }, + { + "epoch": 0.7153020701309675, + "grad_norm": 1.122961401939392, + "learning_rate": 6.414584448020528e-06, + "loss": 0.15265941619873047, + "step": 5291 + }, + { + "epoch": 0.7154372623574144, + "grad_norm": 0.9871898293495178, + "learning_rate": 6.408972898792423e-06, + "loss": 0.21117401123046875, + "step": 5292 + }, + { + "epoch": 0.7155724545838614, + "grad_norm": 1.1413291692733765, + "learning_rate": 6.4033631382619766e-06, + "loss": 0.14159274101257324, + "step": 5293 + }, + { + "epoch": 0.7157076468103084, + "grad_norm": 1.3430914878845215, + "learning_rate": 6.397755167597171e-06, + "loss": 0.18158745765686035, + "step": 5294 + }, + { + "epoch": 0.7158428390367554, + "grad_norm": 0.7743534445762634, + "learning_rate": 6.392148987965603e-06, + "loss": 0.12764322757720947, + "step": 5295 + }, + { + "epoch": 0.7159780312632024, + "grad_norm": 1.141188144683838, + "learning_rate": 6.386544600534532e-06, + "loss": 0.17114418745040894, + "step": 5296 + }, + { + "epoch": 0.7161132234896493, + "grad_norm": 0.94621741771698, + "learning_rate": 6.3809420064707965e-06, + "loss": 0.13990259170532227, + "step": 5297 + }, + { + "epoch": 0.7162484157160963, + "grad_norm": 1.3892114162445068, + "learning_rate": 6.375341206940902e-06, + "loss": 0.1618633270263672, + "step": 5298 + }, + { + "epoch": 0.7163836079425433, + "grad_norm": 1.5031366348266602, + "learning_rate": 6.369742203110962e-06, + "loss": 0.16081321239471436, + "step": 5299 + }, + { + "epoch": 0.7165188001689903, + "grad_norm": 1.1159032583236694, + "learning_rate": 6.364144996146716e-06, + "loss": 0.12784147262573242, + "step": 5300 + }, + { + "epoch": 0.7166539923954373, + "grad_norm": 1.0115638971328735, + "learning_rate": 6.358549587213534e-06, + "loss": 0.14824330806732178, + "step": 5301 + }, + { + "epoch": 0.7167891846218842, + "grad_norm": 1.6801844835281372, + "learning_rate": 6.352955977476405e-06, + "loss": 0.14242029190063477, + "step": 5302 + }, + { + "epoch": 0.7169243768483312, + "grad_norm": 0.7700613737106323, + "learning_rate": 6.347364168099959e-06, + "loss": 0.1633777618408203, + "step": 5303 + }, + { + "epoch": 0.7170595690747782, + "grad_norm": 0.6836645603179932, + "learning_rate": 6.341774160248435e-06, + "loss": 0.10064876079559326, + "step": 5304 + }, + { + "epoch": 0.7171947613012252, + "grad_norm": 1.1566351652145386, + "learning_rate": 6.3361859550857e-06, + "loss": 0.1804943084716797, + "step": 5305 + }, + { + "epoch": 0.7173299535276721, + "grad_norm": 1.269128441810608, + "learning_rate": 6.330599553775252e-06, + "loss": 0.1790175437927246, + "step": 5306 + }, + { + "epoch": 0.7174651457541191, + "grad_norm": 1.2210664749145508, + "learning_rate": 6.325014957480203e-06, + "loss": 0.16953563690185547, + "step": 5307 + }, + { + "epoch": 0.7176003379805661, + "grad_norm": 1.5633882284164429, + "learning_rate": 6.319432167363305e-06, + "loss": 0.24365615844726562, + "step": 5308 + }, + { + "epoch": 0.7177355302070131, + "grad_norm": 1.3419339656829834, + "learning_rate": 6.313851184586918e-06, + "loss": 0.1390066146850586, + "step": 5309 + }, + { + "epoch": 0.7178707224334601, + "grad_norm": 1.3377631902694702, + "learning_rate": 6.308272010313037e-06, + "loss": 0.22277069091796875, + "step": 5310 + }, + { + "epoch": 0.718005914659907, + "grad_norm": 1.1070847511291504, + "learning_rate": 6.302694645703273e-06, + "loss": 0.245086669921875, + "step": 5311 + }, + { + "epoch": 0.718141106886354, + "grad_norm": 1.1244513988494873, + "learning_rate": 6.297119091918857e-06, + "loss": 0.19168567657470703, + "step": 5312 + }, + { + "epoch": 0.718276299112801, + "grad_norm": 1.4264655113220215, + "learning_rate": 6.2915453501206634e-06, + "loss": 0.18460631370544434, + "step": 5313 + }, + { + "epoch": 0.718411491339248, + "grad_norm": 1.0178738832473755, + "learning_rate": 6.285973421469166e-06, + "loss": 0.18571043014526367, + "step": 5314 + }, + { + "epoch": 0.718546683565695, + "grad_norm": 1.9263534545898438, + "learning_rate": 6.28040330712447e-06, + "loss": 0.1701972484588623, + "step": 5315 + }, + { + "epoch": 0.7186818757921419, + "grad_norm": 0.9028226137161255, + "learning_rate": 6.274835008246304e-06, + "loss": 0.15158700942993164, + "step": 5316 + }, + { + "epoch": 0.7188170680185889, + "grad_norm": 0.9210264086723328, + "learning_rate": 6.269268525994013e-06, + "loss": 0.16845488548278809, + "step": 5317 + }, + { + "epoch": 0.7189522602450359, + "grad_norm": 0.6532940864562988, + "learning_rate": 6.263703861526578e-06, + "loss": 0.12207603454589844, + "step": 5318 + }, + { + "epoch": 0.7190874524714829, + "grad_norm": 2.0820014476776123, + "learning_rate": 6.258141016002587e-06, + "loss": 0.18713855743408203, + "step": 5319 + }, + { + "epoch": 0.7192226446979298, + "grad_norm": 1.1615533828735352, + "learning_rate": 6.252579990580254e-06, + "loss": 0.17844200134277344, + "step": 5320 + }, + { + "epoch": 0.7193578369243768, + "grad_norm": 1.5662070512771606, + "learning_rate": 6.247020786417412e-06, + "loss": 0.1857318878173828, + "step": 5321 + }, + { + "epoch": 0.7194930291508238, + "grad_norm": 1.068244218826294, + "learning_rate": 6.241463404671516e-06, + "loss": 0.15768051147460938, + "step": 5322 + }, + { + "epoch": 0.7196282213772708, + "grad_norm": 1.0775821208953857, + "learning_rate": 6.235907846499655e-06, + "loss": 0.1813983917236328, + "step": 5323 + }, + { + "epoch": 0.7197634136037178, + "grad_norm": 0.9361177086830139, + "learning_rate": 6.230354113058505e-06, + "loss": 0.19098472595214844, + "step": 5324 + }, + { + "epoch": 0.7198986058301647, + "grad_norm": 1.3774340152740479, + "learning_rate": 6.2248022055044e-06, + "loss": 0.16189992427825928, + "step": 5325 + }, + { + "epoch": 0.7200337980566117, + "grad_norm": 1.1104826927185059, + "learning_rate": 6.219252124993271e-06, + "loss": 0.1779160499572754, + "step": 5326 + }, + { + "epoch": 0.7201689902830587, + "grad_norm": 1.1793369054794312, + "learning_rate": 6.213703872680668e-06, + "loss": 0.23790359497070312, + "step": 5327 + }, + { + "epoch": 0.7203041825095057, + "grad_norm": 1.657008409500122, + "learning_rate": 6.208157449721785e-06, + "loss": 0.15612578392028809, + "step": 5328 + }, + { + "epoch": 0.7204393747359527, + "grad_norm": 0.9390886425971985, + "learning_rate": 6.202612857271393e-06, + "loss": 0.1592578887939453, + "step": 5329 + }, + { + "epoch": 0.7205745669623996, + "grad_norm": 1.925319790840149, + "learning_rate": 6.197070096483923e-06, + "loss": 0.15016651153564453, + "step": 5330 + }, + { + "epoch": 0.7207097591888466, + "grad_norm": 1.0671675205230713, + "learning_rate": 6.191529168513403e-06, + "loss": 0.16497421264648438, + "step": 5331 + }, + { + "epoch": 0.7208449514152936, + "grad_norm": 0.883821964263916, + "learning_rate": 6.1859900745134755e-06, + "loss": 0.1655750274658203, + "step": 5332 + }, + { + "epoch": 0.7209801436417406, + "grad_norm": 0.8217856287956238, + "learning_rate": 6.180452815637429e-06, + "loss": 0.1514291763305664, + "step": 5333 + }, + { + "epoch": 0.7211153358681875, + "grad_norm": 1.1794695854187012, + "learning_rate": 6.174917393038126e-06, + "loss": 0.1785411834716797, + "step": 5334 + }, + { + "epoch": 0.7212505280946345, + "grad_norm": 0.8648750185966492, + "learning_rate": 6.169383807868088e-06, + "loss": 0.19240760803222656, + "step": 5335 + }, + { + "epoch": 0.7213857203210815, + "grad_norm": 0.7522581815719604, + "learning_rate": 6.163852061279432e-06, + "loss": 0.15709686279296875, + "step": 5336 + }, + { + "epoch": 0.7215209125475285, + "grad_norm": 0.8271566033363342, + "learning_rate": 6.158322154423897e-06, + "loss": 0.15279579162597656, + "step": 5337 + }, + { + "epoch": 0.7216561047739755, + "grad_norm": 0.8708582520484924, + "learning_rate": 6.15279408845284e-06, + "loss": 0.12851381301879883, + "step": 5338 + }, + { + "epoch": 0.7217912970004224, + "grad_norm": 0.6457862257957458, + "learning_rate": 6.147267864517226e-06, + "loss": 0.12972640991210938, + "step": 5339 + }, + { + "epoch": 0.7219264892268694, + "grad_norm": 0.8093400001525879, + "learning_rate": 6.141743483767658e-06, + "loss": 0.20019912719726562, + "step": 5340 + }, + { + "epoch": 0.7220616814533164, + "grad_norm": 1.407281756401062, + "learning_rate": 6.136220947354333e-06, + "loss": 0.20703887939453125, + "step": 5341 + }, + { + "epoch": 0.7221968736797634, + "grad_norm": 1.152104377746582, + "learning_rate": 6.130700256427075e-06, + "loss": 0.2013249397277832, + "step": 5342 + }, + { + "epoch": 0.7223320659062104, + "grad_norm": 1.2698312997817993, + "learning_rate": 6.1251814121353204e-06, + "loss": 0.2011566162109375, + "step": 5343 + }, + { + "epoch": 0.7224672581326573, + "grad_norm": 0.7777379155158997, + "learning_rate": 6.1196644156281175e-06, + "loss": 0.133453369140625, + "step": 5344 + }, + { + "epoch": 0.7226024503591043, + "grad_norm": 0.9934561848640442, + "learning_rate": 6.114149268054143e-06, + "loss": 0.18868422508239746, + "step": 5345 + }, + { + "epoch": 0.7227376425855513, + "grad_norm": 1.0047225952148438, + "learning_rate": 6.108635970561679e-06, + "loss": 0.12573719024658203, + "step": 5346 + }, + { + "epoch": 0.7228728348119983, + "grad_norm": 1.8367822170257568, + "learning_rate": 6.103124524298617e-06, + "loss": 0.2581939697265625, + "step": 5347 + }, + { + "epoch": 0.7230080270384452, + "grad_norm": 0.6864392161369324, + "learning_rate": 6.097614930412475e-06, + "loss": 0.13956880569458008, + "step": 5348 + }, + { + "epoch": 0.7231432192648922, + "grad_norm": 1.3736300468444824, + "learning_rate": 6.092107190050371e-06, + "loss": 0.14592409133911133, + "step": 5349 + }, + { + "epoch": 0.7232784114913392, + "grad_norm": 0.898743212223053, + "learning_rate": 6.086601304359059e-06, + "loss": 0.16562843322753906, + "step": 5350 + }, + { + "epoch": 0.7234136037177862, + "grad_norm": 1.059808611869812, + "learning_rate": 6.081097274484887e-06, + "loss": 0.1921253204345703, + "step": 5351 + }, + { + "epoch": 0.7235487959442332, + "grad_norm": 0.6184583306312561, + "learning_rate": 6.075595101573825e-06, + "loss": 0.10637474060058594, + "step": 5352 + }, + { + "epoch": 0.7236839881706801, + "grad_norm": 0.9661160111427307, + "learning_rate": 6.070094786771451e-06, + "loss": 0.14311715960502625, + "step": 5353 + }, + { + "epoch": 0.7238191803971271, + "grad_norm": 1.2027264833450317, + "learning_rate": 6.06459633122296e-06, + "loss": 0.15629911422729492, + "step": 5354 + }, + { + "epoch": 0.7239543726235741, + "grad_norm": 1.3895877599716187, + "learning_rate": 6.059099736073166e-06, + "loss": 0.1889948844909668, + "step": 5355 + }, + { + "epoch": 0.7240895648500211, + "grad_norm": 1.1352018117904663, + "learning_rate": 6.0536050024664865e-06, + "loss": 0.1892547607421875, + "step": 5356 + }, + { + "epoch": 0.724224757076468, + "grad_norm": 0.8232440948486328, + "learning_rate": 6.048112131546953e-06, + "loss": 0.16222572326660156, + "step": 5357 + }, + { + "epoch": 0.724359949302915, + "grad_norm": 1.675679087638855, + "learning_rate": 6.0426211244582105e-06, + "loss": 0.16073906421661377, + "step": 5358 + }, + { + "epoch": 0.724495141529362, + "grad_norm": 1.1714844703674316, + "learning_rate": 6.03713198234351e-06, + "loss": 0.16106367111206055, + "step": 5359 + }, + { + "epoch": 0.724630333755809, + "grad_norm": 0.9745199680328369, + "learning_rate": 6.0316447063457395e-06, + "loss": 0.19371986389160156, + "step": 5360 + }, + { + "epoch": 0.724765525982256, + "grad_norm": 1.5840797424316406, + "learning_rate": 6.026159297607356e-06, + "loss": 0.17846298217773438, + "step": 5361 + }, + { + "epoch": 0.724900718208703, + "grad_norm": 1.2856674194335938, + "learning_rate": 6.020675757270466e-06, + "loss": 0.17676793038845062, + "step": 5362 + }, + { + "epoch": 0.72503591043515, + "grad_norm": 1.3327083587646484, + "learning_rate": 6.015194086476766e-06, + "loss": 0.1458110809326172, + "step": 5363 + }, + { + "epoch": 0.725171102661597, + "grad_norm": 1.190673828125, + "learning_rate": 6.009714286367565e-06, + "loss": 0.1539926528930664, + "step": 5364 + }, + { + "epoch": 0.725306294888044, + "grad_norm": 0.9986264109611511, + "learning_rate": 6.004236358083802e-06, + "loss": 0.16712522506713867, + "step": 5365 + }, + { + "epoch": 0.725441487114491, + "grad_norm": 1.6283403635025024, + "learning_rate": 5.998760302765989e-06, + "loss": 0.15686607360839844, + "step": 5366 + }, + { + "epoch": 0.725576679340938, + "grad_norm": 1.0782270431518555, + "learning_rate": 5.993286121554289e-06, + "loss": 0.1532679796218872, + "step": 5367 + }, + { + "epoch": 0.7257118715673849, + "grad_norm": 0.8782466053962708, + "learning_rate": 5.987813815588447e-06, + "loss": 0.2120189666748047, + "step": 5368 + }, + { + "epoch": 0.7258470637938319, + "grad_norm": 1.1710044145584106, + "learning_rate": 5.982343386007827e-06, + "loss": 0.19725322723388672, + "step": 5369 + }, + { + "epoch": 0.7259822560202789, + "grad_norm": 0.7435158491134644, + "learning_rate": 5.976874833951404e-06, + "loss": 0.1588430404663086, + "step": 5370 + }, + { + "epoch": 0.7261174482467259, + "grad_norm": 0.8678314685821533, + "learning_rate": 5.971408160557751e-06, + "loss": 0.14610815048217773, + "step": 5371 + }, + { + "epoch": 0.7262526404731728, + "grad_norm": 1.2908798456192017, + "learning_rate": 5.965943366965069e-06, + "loss": 0.19222164154052734, + "step": 5372 + }, + { + "epoch": 0.7263878326996198, + "grad_norm": 0.930377185344696, + "learning_rate": 5.960480454311155e-06, + "loss": 0.15697479248046875, + "step": 5373 + }, + { + "epoch": 0.7265230249260668, + "grad_norm": 1.8857017755508423, + "learning_rate": 5.955019423733416e-06, + "loss": 0.2737865447998047, + "step": 5374 + }, + { + "epoch": 0.7266582171525138, + "grad_norm": 0.9728448390960693, + "learning_rate": 5.949560276368866e-06, + "loss": 0.20526504516601562, + "step": 5375 + }, + { + "epoch": 0.7267934093789608, + "grad_norm": 1.3432884216308594, + "learning_rate": 5.9441030133541235e-06, + "loss": 0.21331787109375, + "step": 5376 + }, + { + "epoch": 0.7269286016054077, + "grad_norm": 0.8436256051063538, + "learning_rate": 5.938647635825432e-06, + "loss": 0.15552330017089844, + "step": 5377 + }, + { + "epoch": 0.7270637938318547, + "grad_norm": 0.9984455108642578, + "learning_rate": 5.933194144918623e-06, + "loss": 0.19208359718322754, + "step": 5378 + }, + { + "epoch": 0.7271989860583017, + "grad_norm": 1.8108103275299072, + "learning_rate": 5.927742541769142e-06, + "loss": 0.1502552032470703, + "step": 5379 + }, + { + "epoch": 0.7273341782847487, + "grad_norm": 2.7279884815216064, + "learning_rate": 5.9222928275120445e-06, + "loss": 0.17171192169189453, + "step": 5380 + }, + { + "epoch": 0.7274693705111956, + "grad_norm": 0.799777090549469, + "learning_rate": 5.916845003281983e-06, + "loss": 0.17359447479248047, + "step": 5381 + }, + { + "epoch": 0.7276045627376426, + "grad_norm": 1.5356882810592651, + "learning_rate": 5.911399070213234e-06, + "loss": 0.207733154296875, + "step": 5382 + }, + { + "epoch": 0.7277397549640896, + "grad_norm": 0.6983720064163208, + "learning_rate": 5.905955029439665e-06, + "loss": 0.16014456748962402, + "step": 5383 + }, + { + "epoch": 0.7278749471905366, + "grad_norm": 1.4003392457962036, + "learning_rate": 5.900512882094754e-06, + "loss": 0.2197399139404297, + "step": 5384 + }, + { + "epoch": 0.7280101394169836, + "grad_norm": 0.9805797934532166, + "learning_rate": 5.8950726293115855e-06, + "loss": 0.16019654273986816, + "step": 5385 + }, + { + "epoch": 0.7281453316434305, + "grad_norm": 1.2284855842590332, + "learning_rate": 5.889634272222844e-06, + "loss": 0.18422317504882812, + "step": 5386 + }, + { + "epoch": 0.7282805238698775, + "grad_norm": 0.5891017317771912, + "learning_rate": 5.8841978119608345e-06, + "loss": 0.10374271869659424, + "step": 5387 + }, + { + "epoch": 0.7284157160963245, + "grad_norm": 0.9230350852012634, + "learning_rate": 5.878763249657452e-06, + "loss": 0.11990642547607422, + "step": 5388 + }, + { + "epoch": 0.7285509083227715, + "grad_norm": 2.1666762828826904, + "learning_rate": 5.873330586444202e-06, + "loss": 0.1858212947845459, + "step": 5389 + }, + { + "epoch": 0.7286861005492185, + "grad_norm": 1.917571783065796, + "learning_rate": 5.867899823452193e-06, + "loss": 0.19831299781799316, + "step": 5390 + }, + { + "epoch": 0.7288212927756654, + "grad_norm": 2.017155647277832, + "learning_rate": 5.862470961812133e-06, + "loss": 0.15149211883544922, + "step": 5391 + }, + { + "epoch": 0.7289564850021124, + "grad_norm": 0.8554419875144958, + "learning_rate": 5.857044002654357e-06, + "loss": 0.16566067934036255, + "step": 5392 + }, + { + "epoch": 0.7290916772285594, + "grad_norm": 1.1382036209106445, + "learning_rate": 5.851618947108764e-06, + "loss": 0.1287059783935547, + "step": 5393 + }, + { + "epoch": 0.7292268694550064, + "grad_norm": 1.8647043704986572, + "learning_rate": 5.8461957963048984e-06, + "loss": 0.18945884704589844, + "step": 5394 + }, + { + "epoch": 0.7293620616814533, + "grad_norm": 1.6169652938842773, + "learning_rate": 5.840774551371882e-06, + "loss": 0.1940155029296875, + "step": 5395 + }, + { + "epoch": 0.7294972539079003, + "grad_norm": 1.0978327989578247, + "learning_rate": 5.8353552134384405e-06, + "loss": 0.20725250244140625, + "step": 5396 + }, + { + "epoch": 0.7296324461343473, + "grad_norm": 1.4625581502914429, + "learning_rate": 5.829937783632926e-06, + "loss": 0.21203231811523438, + "step": 5397 + }, + { + "epoch": 0.7297676383607943, + "grad_norm": 0.7802647948265076, + "learning_rate": 5.824522263083256e-06, + "loss": 0.1077108383178711, + "step": 5398 + }, + { + "epoch": 0.7299028305872413, + "grad_norm": 1.3631001710891724, + "learning_rate": 5.8191086529169855e-06, + "loss": 0.14869403839111328, + "step": 5399 + }, + { + "epoch": 0.7300380228136882, + "grad_norm": 0.9750107526779175, + "learning_rate": 5.813696954261253e-06, + "loss": 0.12857818603515625, + "step": 5400 + }, + { + "epoch": 0.7301732150401352, + "grad_norm": 1.142553448677063, + "learning_rate": 5.8082871682428e-06, + "loss": 0.20977020263671875, + "step": 5401 + }, + { + "epoch": 0.7303084072665822, + "grad_norm": 1.1021348237991333, + "learning_rate": 5.802879295987975e-06, + "loss": 0.18714427947998047, + "step": 5402 + }, + { + "epoch": 0.7304435994930292, + "grad_norm": 1.195054292678833, + "learning_rate": 5.797473338622722e-06, + "loss": 0.1609203815460205, + "step": 5403 + }, + { + "epoch": 0.7305787917194762, + "grad_norm": 2.185063600540161, + "learning_rate": 5.792069297272599e-06, + "loss": 0.20419692993164062, + "step": 5404 + }, + { + "epoch": 0.7307139839459231, + "grad_norm": 0.6816750168800354, + "learning_rate": 5.7866671730627485e-06, + "loss": 0.1489429473876953, + "step": 5405 + }, + { + "epoch": 0.7308491761723701, + "grad_norm": 1.3252195119857788, + "learning_rate": 5.781266967117925e-06, + "loss": 0.16443443298339844, + "step": 5406 + }, + { + "epoch": 0.7309843683988171, + "grad_norm": 1.0631413459777832, + "learning_rate": 5.7758686805624815e-06, + "loss": 0.15423917770385742, + "step": 5407 + }, + { + "epoch": 0.7311195606252641, + "grad_norm": 0.7996984124183655, + "learning_rate": 5.7704723145203605e-06, + "loss": 0.17241859436035156, + "step": 5408 + }, + { + "epoch": 0.731254752851711, + "grad_norm": 0.981478214263916, + "learning_rate": 5.765077870115126e-06, + "loss": 0.16557073593139648, + "step": 5409 + }, + { + "epoch": 0.731389945078158, + "grad_norm": 1.3903049230575562, + "learning_rate": 5.759685348469928e-06, + "loss": 0.19651222229003906, + "step": 5410 + }, + { + "epoch": 0.731525137304605, + "grad_norm": 1.4133092164993286, + "learning_rate": 5.754294750707514e-06, + "loss": 0.17502403259277344, + "step": 5411 + }, + { + "epoch": 0.731660329531052, + "grad_norm": 0.7892844080924988, + "learning_rate": 5.748906077950237e-06, + "loss": 0.14845561981201172, + "step": 5412 + }, + { + "epoch": 0.731795521757499, + "grad_norm": 1.1618221998214722, + "learning_rate": 5.743519331320042e-06, + "loss": 0.20782470703125, + "step": 5413 + }, + { + "epoch": 0.7319307139839459, + "grad_norm": 1.258887529373169, + "learning_rate": 5.73813451193849e-06, + "loss": 0.16991233825683594, + "step": 5414 + }, + { + "epoch": 0.7320659062103929, + "grad_norm": 1.2043417692184448, + "learning_rate": 5.7327516209267225e-06, + "loss": 0.18732059001922607, + "step": 5415 + }, + { + "epoch": 0.7322010984368399, + "grad_norm": 0.8529903292655945, + "learning_rate": 5.727370659405486e-06, + "loss": 0.14250469207763672, + "step": 5416 + }, + { + "epoch": 0.7323362906632869, + "grad_norm": 1.8589668273925781, + "learning_rate": 5.7219916284951265e-06, + "loss": 0.1817009449005127, + "step": 5417 + }, + { + "epoch": 0.7324714828897338, + "grad_norm": 1.0302187204360962, + "learning_rate": 5.716614529315582e-06, + "loss": 0.18629693984985352, + "step": 5418 + }, + { + "epoch": 0.7326066751161808, + "grad_norm": 1.132738709449768, + "learning_rate": 5.711239362986401e-06, + "loss": 0.19417476654052734, + "step": 5419 + }, + { + "epoch": 0.7327418673426278, + "grad_norm": 1.0355405807495117, + "learning_rate": 5.705866130626719e-06, + "loss": 0.13148212432861328, + "step": 5420 + }, + { + "epoch": 0.7328770595690748, + "grad_norm": 1.6669100522994995, + "learning_rate": 5.700494833355271e-06, + "loss": 0.20334434509277344, + "step": 5421 + }, + { + "epoch": 0.7330122517955218, + "grad_norm": 1.911421775817871, + "learning_rate": 5.69512547229039e-06, + "loss": 0.2167491912841797, + "step": 5422 + }, + { + "epoch": 0.7331474440219687, + "grad_norm": 1.1529169082641602, + "learning_rate": 5.689758048550001e-06, + "loss": 0.16358566284179688, + "step": 5423 + }, + { + "epoch": 0.7332826362484157, + "grad_norm": 1.0827101469039917, + "learning_rate": 5.684392563251644e-06, + "loss": 0.14029693603515625, + "step": 5424 + }, + { + "epoch": 0.7334178284748627, + "grad_norm": 1.3239370584487915, + "learning_rate": 5.679029017512422e-06, + "loss": 0.21293163299560547, + "step": 5425 + }, + { + "epoch": 0.7335530207013097, + "grad_norm": 1.7510124444961548, + "learning_rate": 5.6736674124490684e-06, + "loss": 0.22458267211914062, + "step": 5426 + }, + { + "epoch": 0.7336882129277567, + "grad_norm": 1.5140736103057861, + "learning_rate": 5.6683077491778935e-06, + "loss": 0.22754859924316406, + "step": 5427 + }, + { + "epoch": 0.7338234051542036, + "grad_norm": 0.621523916721344, + "learning_rate": 5.6629500288148044e-06, + "loss": 0.10659146308898926, + "step": 5428 + }, + { + "epoch": 0.7339585973806506, + "grad_norm": 0.4150683581829071, + "learning_rate": 5.657594252475319e-06, + "loss": 0.08076173067092896, + "step": 5429 + }, + { + "epoch": 0.7340937896070976, + "grad_norm": 1.5244630575180054, + "learning_rate": 5.652240421274521e-06, + "loss": 0.198591947555542, + "step": 5430 + }, + { + "epoch": 0.7342289818335446, + "grad_norm": 2.136486530303955, + "learning_rate": 5.646888536327121e-06, + "loss": 0.21309804916381836, + "step": 5431 + }, + { + "epoch": 0.7343641740599915, + "grad_norm": 1.9946231842041016, + "learning_rate": 5.641538598747403e-06, + "loss": 0.17765235900878906, + "step": 5432 + }, + { + "epoch": 0.7344993662864385, + "grad_norm": 1.6058526039123535, + "learning_rate": 5.6361906096492495e-06, + "loss": 0.19843292236328125, + "step": 5433 + }, + { + "epoch": 0.7346345585128855, + "grad_norm": 0.9471537470817566, + "learning_rate": 5.630844570146157e-06, + "loss": 0.1451871544122696, + "step": 5434 + }, + { + "epoch": 0.7347697507393325, + "grad_norm": 1.1687220335006714, + "learning_rate": 5.625500481351176e-06, + "loss": 0.16038846969604492, + "step": 5435 + }, + { + "epoch": 0.7349049429657795, + "grad_norm": 2.6340956687927246, + "learning_rate": 5.6201583443769895e-06, + "loss": 0.18080902099609375, + "step": 5436 + }, + { + "epoch": 0.7350401351922264, + "grad_norm": 1.7166969776153564, + "learning_rate": 5.614818160335857e-06, + "loss": 0.15173912048339844, + "step": 5437 + }, + { + "epoch": 0.7351753274186734, + "grad_norm": 1.537317156791687, + "learning_rate": 5.6094799303396315e-06, + "loss": 0.1852703094482422, + "step": 5438 + }, + { + "epoch": 0.7353105196451204, + "grad_norm": 1.0906593799591064, + "learning_rate": 5.6041436554997595e-06, + "loss": 0.19263076782226562, + "step": 5439 + }, + { + "epoch": 0.7354457118715674, + "grad_norm": 0.6516191363334656, + "learning_rate": 5.598809336927278e-06, + "loss": 0.12322711944580078, + "step": 5440 + }, + { + "epoch": 0.7355809040980144, + "grad_norm": 2.866525173187256, + "learning_rate": 5.5934769757328325e-06, + "loss": 0.21516799926757812, + "step": 5441 + }, + { + "epoch": 0.7357160963244613, + "grad_norm": 0.9940685033798218, + "learning_rate": 5.588146573026642e-06, + "loss": 0.19419193267822266, + "step": 5442 + }, + { + "epoch": 0.7358512885509083, + "grad_norm": 1.601098656654358, + "learning_rate": 5.582818129918525e-06, + "loss": 0.2529716491699219, + "step": 5443 + }, + { + "epoch": 0.7359864807773553, + "grad_norm": 1.7732347249984741, + "learning_rate": 5.5774916475178915e-06, + "loss": 0.183624267578125, + "step": 5444 + }, + { + "epoch": 0.7361216730038023, + "grad_norm": 1.0748921632766724, + "learning_rate": 5.572167126933738e-06, + "loss": 0.1806321144104004, + "step": 5445 + }, + { + "epoch": 0.7362568652302492, + "grad_norm": 1.2453570365905762, + "learning_rate": 5.566844569274669e-06, + "loss": 0.2239093780517578, + "step": 5446 + }, + { + "epoch": 0.7363920574566962, + "grad_norm": 2.179720163345337, + "learning_rate": 5.5615239756488665e-06, + "loss": 0.19482421875, + "step": 5447 + }, + { + "epoch": 0.7365272496831432, + "grad_norm": 2.343989610671997, + "learning_rate": 5.556205347164104e-06, + "loss": 0.18737125396728516, + "step": 5448 + }, + { + "epoch": 0.7366624419095902, + "grad_norm": 1.1184184551239014, + "learning_rate": 5.550888684927746e-06, + "loss": 0.12966585159301758, + "step": 5449 + }, + { + "epoch": 0.7367976341360372, + "grad_norm": 1.3188831806182861, + "learning_rate": 5.545573990046752e-06, + "loss": 0.18623828887939453, + "step": 5450 + }, + { + "epoch": 0.7369328263624841, + "grad_norm": 0.8549461960792542, + "learning_rate": 5.540261263627672e-06, + "loss": 0.15744781494140625, + "step": 5451 + }, + { + "epoch": 0.7370680185889311, + "grad_norm": 0.8998275399208069, + "learning_rate": 5.534950506776644e-06, + "loss": 0.1818866729736328, + "step": 5452 + }, + { + "epoch": 0.7372032108153781, + "grad_norm": 0.7639499306678772, + "learning_rate": 5.529641720599393e-06, + "loss": 0.15549564361572266, + "step": 5453 + }, + { + "epoch": 0.7373384030418251, + "grad_norm": 1.4306678771972656, + "learning_rate": 5.52433490620124e-06, + "loss": 0.1531050205230713, + "step": 5454 + }, + { + "epoch": 0.737473595268272, + "grad_norm": 1.0781877040863037, + "learning_rate": 5.519030064687082e-06, + "loss": 0.15582275390625, + "step": 5455 + }, + { + "epoch": 0.737608787494719, + "grad_norm": 1.0096534490585327, + "learning_rate": 5.51372719716143e-06, + "loss": 0.153289794921875, + "step": 5456 + }, + { + "epoch": 0.737743979721166, + "grad_norm": 2.3367340564727783, + "learning_rate": 5.508426304728363e-06, + "loss": 0.20807456970214844, + "step": 5457 + }, + { + "epoch": 0.737879171947613, + "grad_norm": 1.458406686782837, + "learning_rate": 5.503127388491552e-06, + "loss": 0.1856670379638672, + "step": 5458 + }, + { + "epoch": 0.73801436417406, + "grad_norm": 1.2269357442855835, + "learning_rate": 5.497830449554266e-06, + "loss": 0.25836181640625, + "step": 5459 + }, + { + "epoch": 0.738149556400507, + "grad_norm": 0.8984341621398926, + "learning_rate": 5.492535489019344e-06, + "loss": 0.15860557556152344, + "step": 5460 + }, + { + "epoch": 0.7382847486269539, + "grad_norm": 1.0002944469451904, + "learning_rate": 5.4872425079892454e-06, + "loss": 0.20991134643554688, + "step": 5461 + }, + { + "epoch": 0.7384199408534009, + "grad_norm": 1.2617703676223755, + "learning_rate": 5.481951507565973e-06, + "loss": 0.18722152709960938, + "step": 5462 + }, + { + "epoch": 0.7385551330798479, + "grad_norm": 1.349399209022522, + "learning_rate": 5.476662488851159e-06, + "loss": 0.1929912567138672, + "step": 5463 + }, + { + "epoch": 0.7386903253062949, + "grad_norm": 0.7708116769790649, + "learning_rate": 5.471375452946e-06, + "loss": 0.1670513153076172, + "step": 5464 + }, + { + "epoch": 0.7388255175327418, + "grad_norm": 1.4045013189315796, + "learning_rate": 5.466090400951279e-06, + "loss": 0.18651437759399414, + "step": 5465 + }, + { + "epoch": 0.7389607097591888, + "grad_norm": 1.3405613899230957, + "learning_rate": 5.460807333967387e-06, + "loss": 0.17983055114746094, + "step": 5466 + }, + { + "epoch": 0.7390959019856358, + "grad_norm": 1.4084433317184448, + "learning_rate": 5.455526253094267e-06, + "loss": 0.19733238220214844, + "step": 5467 + }, + { + "epoch": 0.7392310942120828, + "grad_norm": 1.6750537157058716, + "learning_rate": 5.450247159431486e-06, + "loss": 0.11149340867996216, + "step": 5468 + }, + { + "epoch": 0.7393662864385298, + "grad_norm": 0.8786253333091736, + "learning_rate": 5.44497005407817e-06, + "loss": 0.1979541778564453, + "step": 5469 + }, + { + "epoch": 0.7395014786649767, + "grad_norm": 2.5073506832122803, + "learning_rate": 5.439694938133042e-06, + "loss": 0.2315061092376709, + "step": 5470 + }, + { + "epoch": 0.7396366708914237, + "grad_norm": 2.5064053535461426, + "learning_rate": 5.434421812694409e-06, + "loss": 0.1924424171447754, + "step": 5471 + }, + { + "epoch": 0.7397718631178707, + "grad_norm": 0.9299335479736328, + "learning_rate": 5.4291506788601624e-06, + "loss": 0.13190746307373047, + "step": 5472 + }, + { + "epoch": 0.7399070553443177, + "grad_norm": 1.2495826482772827, + "learning_rate": 5.423881537727785e-06, + "loss": 0.20519351959228516, + "step": 5473 + }, + { + "epoch": 0.7400422475707646, + "grad_norm": 1.0828267335891724, + "learning_rate": 5.418614390394338e-06, + "loss": 0.17133331298828125, + "step": 5474 + }, + { + "epoch": 0.7401774397972116, + "grad_norm": 1.8061549663543701, + "learning_rate": 5.413349237956469e-06, + "loss": 0.27376604080200195, + "step": 5475 + }, + { + "epoch": 0.7403126320236586, + "grad_norm": 1.3505018949508667, + "learning_rate": 5.4080860815104125e-06, + "loss": 0.21478939056396484, + "step": 5476 + }, + { + "epoch": 0.7404478242501056, + "grad_norm": 1.9907888174057007, + "learning_rate": 5.402824922151977e-06, + "loss": 0.2262735366821289, + "step": 5477 + }, + { + "epoch": 0.7405830164765526, + "grad_norm": 1.3529293537139893, + "learning_rate": 5.397565760976577e-06, + "loss": 0.19457340240478516, + "step": 5478 + }, + { + "epoch": 0.7407182087029995, + "grad_norm": 0.6982883810997009, + "learning_rate": 5.392308599079193e-06, + "loss": 0.17594337463378906, + "step": 5479 + }, + { + "epoch": 0.7408534009294465, + "grad_norm": 0.89969402551651, + "learning_rate": 5.3870534375543916e-06, + "loss": 0.15802907943725586, + "step": 5480 + }, + { + "epoch": 0.7409885931558935, + "grad_norm": 1.5381609201431274, + "learning_rate": 5.381800277496328e-06, + "loss": 0.20641517639160156, + "step": 5481 + }, + { + "epoch": 0.7411237853823405, + "grad_norm": 1.4240254163742065, + "learning_rate": 5.376549119998731e-06, + "loss": 0.22413253784179688, + "step": 5482 + }, + { + "epoch": 0.7412589776087875, + "grad_norm": 1.7257921695709229, + "learning_rate": 5.3712999661549314e-06, + "loss": 0.22533750534057617, + "step": 5483 + }, + { + "epoch": 0.7413941698352344, + "grad_norm": 2.5512783527374268, + "learning_rate": 5.366052817057826e-06, + "loss": 0.20793819427490234, + "step": 5484 + }, + { + "epoch": 0.7415293620616814, + "grad_norm": 1.4617986679077148, + "learning_rate": 5.360807673799899e-06, + "loss": 0.1730213165283203, + "step": 5485 + }, + { + "epoch": 0.7416645542881284, + "grad_norm": 1.292579174041748, + "learning_rate": 5.355564537473214e-06, + "loss": 0.13782978057861328, + "step": 5486 + }, + { + "epoch": 0.7417997465145754, + "grad_norm": 0.9110461473464966, + "learning_rate": 5.35032340916942e-06, + "loss": 0.18610572814941406, + "step": 5487 + }, + { + "epoch": 0.7419349387410223, + "grad_norm": 1.025336742401123, + "learning_rate": 5.345084289979755e-06, + "loss": 0.16460609436035156, + "step": 5488 + }, + { + "epoch": 0.7420701309674693, + "grad_norm": 0.8170027732849121, + "learning_rate": 5.339847180995026e-06, + "loss": 0.10986661911010742, + "step": 5489 + }, + { + "epoch": 0.7422053231939163, + "grad_norm": 1.35186767578125, + "learning_rate": 5.33461208330563e-06, + "loss": 0.16333770751953125, + "step": 5490 + }, + { + "epoch": 0.7423405154203633, + "grad_norm": 1.5800249576568604, + "learning_rate": 5.32937899800154e-06, + "loss": 0.18089580535888672, + "step": 5491 + }, + { + "epoch": 0.7424757076468103, + "grad_norm": 1.617856502532959, + "learning_rate": 5.324147926172307e-06, + "loss": 0.16485190391540527, + "step": 5492 + }, + { + "epoch": 0.7426108998732572, + "grad_norm": 1.5219545364379883, + "learning_rate": 5.318918868907084e-06, + "loss": 0.1864166259765625, + "step": 5493 + }, + { + "epoch": 0.7427460920997042, + "grad_norm": 0.9913140535354614, + "learning_rate": 5.313691827294568e-06, + "loss": 0.1461009979248047, + "step": 5494 + }, + { + "epoch": 0.7428812843261512, + "grad_norm": 1.6396845579147339, + "learning_rate": 5.308466802423072e-06, + "loss": 0.1610097885131836, + "step": 5495 + }, + { + "epoch": 0.7430164765525982, + "grad_norm": 1.3406883478164673, + "learning_rate": 5.303243795380471e-06, + "loss": 0.18573760986328125, + "step": 5496 + }, + { + "epoch": 0.7431516687790453, + "grad_norm": 1.2215561866760254, + "learning_rate": 5.298022807254215e-06, + "loss": 0.1607666015625, + "step": 5497 + }, + { + "epoch": 0.7432868610054922, + "grad_norm": 0.896152675151825, + "learning_rate": 5.292803839131358e-06, + "loss": 0.12671756744384766, + "step": 5498 + }, + { + "epoch": 0.7434220532319392, + "grad_norm": 1.4494041204452515, + "learning_rate": 5.287586892098496e-06, + "loss": 0.179473876953125, + "step": 5499 + }, + { + "epoch": 0.7435572454583862, + "grad_norm": 1.46336829662323, + "learning_rate": 5.282371967241842e-06, + "loss": 0.2173004150390625, + "step": 5500 + }, + { + "epoch": 0.7436924376848332, + "grad_norm": 0.8535895347595215, + "learning_rate": 5.277159065647164e-06, + "loss": 0.15633583068847656, + "step": 5501 + }, + { + "epoch": 0.7438276299112802, + "grad_norm": 1.6958073377609253, + "learning_rate": 5.271948188399814e-06, + "loss": 0.2004108428955078, + "step": 5502 + }, + { + "epoch": 0.7439628221377271, + "grad_norm": 1.1025596857070923, + "learning_rate": 5.266739336584735e-06, + "loss": 0.18210983276367188, + "step": 5503 + }, + { + "epoch": 0.7440980143641741, + "grad_norm": 0.9344438910484314, + "learning_rate": 5.261532511286422e-06, + "loss": 0.16583919525146484, + "step": 5504 + }, + { + "epoch": 0.7442332065906211, + "grad_norm": 1.0008279085159302, + "learning_rate": 5.256327713588977e-06, + "loss": 0.15412235260009766, + "step": 5505 + }, + { + "epoch": 0.7443683988170681, + "grad_norm": 2.138267755508423, + "learning_rate": 5.25112494457606e-06, + "loss": 0.2547874450683594, + "step": 5506 + }, + { + "epoch": 0.744503591043515, + "grad_norm": 1.4364880323410034, + "learning_rate": 5.245924205330919e-06, + "loss": 0.21915864944458008, + "step": 5507 + }, + { + "epoch": 0.744638783269962, + "grad_norm": 1.0808571577072144, + "learning_rate": 5.240725496936373e-06, + "loss": 0.17722320556640625, + "step": 5508 + }, + { + "epoch": 0.744773975496409, + "grad_norm": 0.8131818771362305, + "learning_rate": 5.2355288204748145e-06, + "loss": 0.1521282196044922, + "step": 5509 + }, + { + "epoch": 0.744909167722856, + "grad_norm": 1.7834761142730713, + "learning_rate": 5.230334177028233e-06, + "loss": 0.1450948715209961, + "step": 5510 + }, + { + "epoch": 0.745044359949303, + "grad_norm": 1.3590015172958374, + "learning_rate": 5.2251415676781726e-06, + "loss": 0.18535304069519043, + "step": 5511 + }, + { + "epoch": 0.7451795521757499, + "grad_norm": 0.8459165692329407, + "learning_rate": 5.2199509935057655e-06, + "loss": 0.1409931182861328, + "step": 5512 + }, + { + "epoch": 0.7453147444021969, + "grad_norm": 0.848420262336731, + "learning_rate": 5.214762455591713e-06, + "loss": 0.11742591857910156, + "step": 5513 + }, + { + "epoch": 0.7454499366286439, + "grad_norm": 1.4901010990142822, + "learning_rate": 5.209575955016295e-06, + "loss": 0.20101547241210938, + "step": 5514 + }, + { + "epoch": 0.7455851288550909, + "grad_norm": 1.5747272968292236, + "learning_rate": 5.204391492859377e-06, + "loss": 0.17344218492507935, + "step": 5515 + }, + { + "epoch": 0.7457203210815379, + "grad_norm": 1.7182445526123047, + "learning_rate": 5.199209070200388e-06, + "loss": 0.19905948638916016, + "step": 5516 + }, + { + "epoch": 0.7458555133079848, + "grad_norm": 0.8937649130821228, + "learning_rate": 5.194028688118332e-06, + "loss": 0.15001583099365234, + "step": 5517 + }, + { + "epoch": 0.7459907055344318, + "grad_norm": 0.9096662402153015, + "learning_rate": 5.188850347691797e-06, + "loss": 0.14841651916503906, + "step": 5518 + }, + { + "epoch": 0.7461258977608788, + "grad_norm": 0.9075912833213806, + "learning_rate": 5.183674049998934e-06, + "loss": 0.15425682067871094, + "step": 5519 + }, + { + "epoch": 0.7462610899873258, + "grad_norm": 1.0316675901412964, + "learning_rate": 5.178499796117485e-06, + "loss": 0.15800857543945312, + "step": 5520 + }, + { + "epoch": 0.7463962822137727, + "grad_norm": 1.031766414642334, + "learning_rate": 5.173327587124753e-06, + "loss": 0.1940668821334839, + "step": 5521 + }, + { + "epoch": 0.7465314744402197, + "grad_norm": 1.390950083732605, + "learning_rate": 5.16815742409762e-06, + "loss": 0.20208454132080078, + "step": 5522 + }, + { + "epoch": 0.7466666666666667, + "grad_norm": 0.7603386640548706, + "learning_rate": 5.16298930811254e-06, + "loss": 0.17708683013916016, + "step": 5523 + }, + { + "epoch": 0.7468018588931137, + "grad_norm": 0.8726809620857239, + "learning_rate": 5.15782324024554e-06, + "loss": 0.1359729766845703, + "step": 5524 + }, + { + "epoch": 0.7469370511195607, + "grad_norm": 1.8092880249023438, + "learning_rate": 5.152659221572231e-06, + "loss": 0.19325733184814453, + "step": 5525 + }, + { + "epoch": 0.7470722433460076, + "grad_norm": 1.0619183778762817, + "learning_rate": 5.147497253167784e-06, + "loss": 0.16878509521484375, + "step": 5526 + }, + { + "epoch": 0.7472074355724546, + "grad_norm": 1.340755820274353, + "learning_rate": 5.142337336106948e-06, + "loss": 0.23514556884765625, + "step": 5527 + }, + { + "epoch": 0.7473426277989016, + "grad_norm": 1.4047523736953735, + "learning_rate": 5.137179471464047e-06, + "loss": 0.19392192363739014, + "step": 5528 + }, + { + "epoch": 0.7474778200253486, + "grad_norm": 0.9587230682373047, + "learning_rate": 5.13202366031297e-06, + "loss": 0.15888595581054688, + "step": 5529 + }, + { + "epoch": 0.7476130122517956, + "grad_norm": 1.0565791130065918, + "learning_rate": 5.1268699037272e-06, + "loss": 0.17792701721191406, + "step": 5530 + }, + { + "epoch": 0.7477482044782425, + "grad_norm": 1.264702320098877, + "learning_rate": 5.121718202779756e-06, + "loss": 0.18007469177246094, + "step": 5531 + }, + { + "epoch": 0.7478833967046895, + "grad_norm": 1.278017520904541, + "learning_rate": 5.116568558543264e-06, + "loss": 0.18833351135253906, + "step": 5532 + }, + { + "epoch": 0.7480185889311365, + "grad_norm": 1.3293447494506836, + "learning_rate": 5.1114209720899025e-06, + "loss": 0.1842479705810547, + "step": 5533 + }, + { + "epoch": 0.7481537811575835, + "grad_norm": 0.468590646982193, + "learning_rate": 5.106275444491423e-06, + "loss": 0.06927096843719482, + "step": 5534 + }, + { + "epoch": 0.7482889733840304, + "grad_norm": 1.4397578239440918, + "learning_rate": 5.101131976819165e-06, + "loss": 0.2094583511352539, + "step": 5535 + }, + { + "epoch": 0.7484241656104774, + "grad_norm": 1.2509934902191162, + "learning_rate": 5.095990570144008e-06, + "loss": 0.17056751251220703, + "step": 5536 + }, + { + "epoch": 0.7485593578369244, + "grad_norm": 0.981372594833374, + "learning_rate": 5.090851225536432e-06, + "loss": 0.1465930938720703, + "step": 5537 + }, + { + "epoch": 0.7486945500633714, + "grad_norm": 0.8076090216636658, + "learning_rate": 5.085713944066474e-06, + "loss": 0.1373729705810547, + "step": 5538 + }, + { + "epoch": 0.7488297422898184, + "grad_norm": 1.0660492181777954, + "learning_rate": 5.080578726803741e-06, + "loss": 0.15929794311523438, + "step": 5539 + }, + { + "epoch": 0.7489649345162653, + "grad_norm": 1.3466428518295288, + "learning_rate": 5.075445574817415e-06, + "loss": 0.19535255432128906, + "step": 5540 + }, + { + "epoch": 0.7491001267427123, + "grad_norm": 1.931800127029419, + "learning_rate": 5.07031448917624e-06, + "loss": 0.1823577880859375, + "step": 5541 + }, + { + "epoch": 0.7492353189691593, + "grad_norm": 1.2878483533859253, + "learning_rate": 5.065185470948544e-06, + "loss": 0.21713924407958984, + "step": 5542 + }, + { + "epoch": 0.7493705111956063, + "grad_norm": 1.5037792921066284, + "learning_rate": 5.060058521202211e-06, + "loss": 0.18640804290771484, + "step": 5543 + }, + { + "epoch": 0.7495057034220532, + "grad_norm": 1.9052491188049316, + "learning_rate": 5.054933641004703e-06, + "loss": 0.19440412521362305, + "step": 5544 + }, + { + "epoch": 0.7496408956485002, + "grad_norm": 1.7354813814163208, + "learning_rate": 5.0498108314230425e-06, + "loss": 0.153099924325943, + "step": 5545 + }, + { + "epoch": 0.7497760878749472, + "grad_norm": 0.8646870851516724, + "learning_rate": 5.044690093523823e-06, + "loss": 0.1392502784729004, + "step": 5546 + }, + { + "epoch": 0.7499112801013942, + "grad_norm": 1.132386326789856, + "learning_rate": 5.039571428373219e-06, + "loss": 0.1456890106201172, + "step": 5547 + }, + { + "epoch": 0.7500464723278412, + "grad_norm": 1.4150546789169312, + "learning_rate": 5.034454837036959e-06, + "loss": 0.13692450523376465, + "step": 5548 + }, + { + "epoch": 0.7501816645542881, + "grad_norm": 1.4570070505142212, + "learning_rate": 5.0293403205803455e-06, + "loss": 0.18346023559570312, + "step": 5549 + }, + { + "epoch": 0.7503168567807351, + "grad_norm": 1.0183249711990356, + "learning_rate": 5.024227880068247e-06, + "loss": 0.14780044555664062, + "step": 5550 + }, + { + "epoch": 0.7504520490071821, + "grad_norm": 0.8708744049072266, + "learning_rate": 5.019117516565096e-06, + "loss": 0.13231420516967773, + "step": 5551 + }, + { + "epoch": 0.7505872412336291, + "grad_norm": 0.8811061978340149, + "learning_rate": 5.014009231134908e-06, + "loss": 0.15431594848632812, + "step": 5552 + }, + { + "epoch": 0.750722433460076, + "grad_norm": 1.0152461528778076, + "learning_rate": 5.008903024841248e-06, + "loss": 0.18190956115722656, + "step": 5553 + }, + { + "epoch": 0.750857625686523, + "grad_norm": 0.8767868876457214, + "learning_rate": 5.0037988987472595e-06, + "loss": 0.18207740783691406, + "step": 5554 + }, + { + "epoch": 0.75099281791297, + "grad_norm": 1.1492546796798706, + "learning_rate": 4.998696853915646e-06, + "loss": 0.16100740432739258, + "step": 5555 + }, + { + "epoch": 0.751128010139417, + "grad_norm": 1.1635277271270752, + "learning_rate": 4.993596891408676e-06, + "loss": 0.17439889907836914, + "step": 5556 + }, + { + "epoch": 0.751263202365864, + "grad_norm": 1.0445656776428223, + "learning_rate": 4.988499012288198e-06, + "loss": 0.15757465362548828, + "step": 5557 + }, + { + "epoch": 0.751398394592311, + "grad_norm": 0.5957070589065552, + "learning_rate": 4.983403217615614e-06, + "loss": 0.11692547798156738, + "step": 5558 + }, + { + "epoch": 0.7515335868187579, + "grad_norm": 1.072064757347107, + "learning_rate": 4.978309508451896e-06, + "loss": 0.17076969146728516, + "step": 5559 + }, + { + "epoch": 0.7516687790452049, + "grad_norm": 1.1318572759628296, + "learning_rate": 4.973217885857578e-06, + "loss": 0.15820837020874023, + "step": 5560 + }, + { + "epoch": 0.7518039712716519, + "grad_norm": 1.1528007984161377, + "learning_rate": 4.968128350892763e-06, + "loss": 0.1442272663116455, + "step": 5561 + }, + { + "epoch": 0.7519391634980989, + "grad_norm": 1.620975136756897, + "learning_rate": 4.963040904617131e-06, + "loss": 0.21622848510742188, + "step": 5562 + }, + { + "epoch": 0.7520743557245458, + "grad_norm": 1.2600561380386353, + "learning_rate": 4.9579555480898955e-06, + "loss": 0.14247512817382812, + "step": 5563 + }, + { + "epoch": 0.7522095479509928, + "grad_norm": 1.1414223909378052, + "learning_rate": 4.952872282369873e-06, + "loss": 0.14894485473632812, + "step": 5564 + }, + { + "epoch": 0.7523447401774398, + "grad_norm": 0.5991482138633728, + "learning_rate": 4.947791108515417e-06, + "loss": 0.12276840209960938, + "step": 5565 + }, + { + "epoch": 0.7524799324038868, + "grad_norm": 0.7365414500236511, + "learning_rate": 4.942712027584453e-06, + "loss": 0.14475154876708984, + "step": 5566 + }, + { + "epoch": 0.7526151246303338, + "grad_norm": 1.5363119840621948, + "learning_rate": 4.937635040634485e-06, + "loss": 0.21039390563964844, + "step": 5567 + }, + { + "epoch": 0.7527503168567807, + "grad_norm": 0.965306282043457, + "learning_rate": 4.9325601487225545e-06, + "loss": 0.14312076568603516, + "step": 5568 + }, + { + "epoch": 0.7528855090832277, + "grad_norm": 2.654219150543213, + "learning_rate": 4.927487352905289e-06, + "loss": 0.22712326049804688, + "step": 5569 + }, + { + "epoch": 0.7530207013096747, + "grad_norm": 1.0947959423065186, + "learning_rate": 4.92241665423887e-06, + "loss": 0.16634368896484375, + "step": 5570 + }, + { + "epoch": 0.7531558935361217, + "grad_norm": 1.4040368795394897, + "learning_rate": 4.917348053779039e-06, + "loss": 0.15804576873779297, + "step": 5571 + }, + { + "epoch": 0.7532910857625686, + "grad_norm": 1.2418389320373535, + "learning_rate": 4.912281552581122e-06, + "loss": 0.17496204376220703, + "step": 5572 + }, + { + "epoch": 0.7534262779890156, + "grad_norm": 0.7636131048202515, + "learning_rate": 4.907217151699969e-06, + "loss": 0.1504077911376953, + "step": 5573 + }, + { + "epoch": 0.7535614702154626, + "grad_norm": 0.887273907661438, + "learning_rate": 4.9021548521900305e-06, + "loss": 0.13809704780578613, + "step": 5574 + }, + { + "epoch": 0.7536966624419096, + "grad_norm": 2.542372703552246, + "learning_rate": 4.8970946551053005e-06, + "loss": 0.23099708557128906, + "step": 5575 + }, + { + "epoch": 0.7538318546683566, + "grad_norm": 1.084800124168396, + "learning_rate": 4.892036561499339e-06, + "loss": 0.14268946647644043, + "step": 5576 + }, + { + "epoch": 0.7539670468948035, + "grad_norm": 0.9548205733299255, + "learning_rate": 4.8869805724252675e-06, + "loss": 0.18102121353149414, + "step": 5577 + }, + { + "epoch": 0.7541022391212505, + "grad_norm": 1.2947176694869995, + "learning_rate": 4.8819266889357665e-06, + "loss": 0.19460105895996094, + "step": 5578 + }, + { + "epoch": 0.7542374313476975, + "grad_norm": 1.579666256904602, + "learning_rate": 4.876874912083088e-06, + "loss": 0.18504762649536133, + "step": 5579 + }, + { + "epoch": 0.7543726235741445, + "grad_norm": 1.6235867738723755, + "learning_rate": 4.871825242919037e-06, + "loss": 0.24318885803222656, + "step": 5580 + }, + { + "epoch": 0.7545078158005915, + "grad_norm": 1.0481802225112915, + "learning_rate": 4.866777682494978e-06, + "loss": 0.2065591812133789, + "step": 5581 + }, + { + "epoch": 0.7546430080270384, + "grad_norm": 1.6847590208053589, + "learning_rate": 4.861732231861845e-06, + "loss": 0.2007908821105957, + "step": 5582 + }, + { + "epoch": 0.7547782002534854, + "grad_norm": 0.75111323595047, + "learning_rate": 4.85668889207012e-06, + "loss": 0.1385669708251953, + "step": 5583 + }, + { + "epoch": 0.7549133924799324, + "grad_norm": 1.0754163265228271, + "learning_rate": 4.851647664169862e-06, + "loss": 0.17052078247070312, + "step": 5584 + }, + { + "epoch": 0.7550485847063794, + "grad_norm": 1.0119633674621582, + "learning_rate": 4.846608549210679e-06, + "loss": 0.17748618125915527, + "step": 5585 + }, + { + "epoch": 0.7551837769328263, + "grad_norm": 0.653028130531311, + "learning_rate": 4.841571548241741e-06, + "loss": 0.11672306060791016, + "step": 5586 + }, + { + "epoch": 0.7553189691592733, + "grad_norm": 1.180534839630127, + "learning_rate": 4.836536662311777e-06, + "loss": 0.2061138153076172, + "step": 5587 + }, + { + "epoch": 0.7554541613857203, + "grad_norm": 1.6200429201126099, + "learning_rate": 4.8315038924690745e-06, + "loss": 0.2112889289855957, + "step": 5588 + }, + { + "epoch": 0.7555893536121673, + "grad_norm": 1.1325408220291138, + "learning_rate": 4.82647323976149e-06, + "loss": 0.20076751708984375, + "step": 5589 + }, + { + "epoch": 0.7557245458386143, + "grad_norm": 0.7634033560752869, + "learning_rate": 4.821444705236429e-06, + "loss": 0.16547656059265137, + "step": 5590 + }, + { + "epoch": 0.7558597380650612, + "grad_norm": 0.8454173803329468, + "learning_rate": 4.81641828994086e-06, + "loss": 0.15250205993652344, + "step": 5591 + }, + { + "epoch": 0.7559949302915082, + "grad_norm": 1.1030910015106201, + "learning_rate": 4.811393994921308e-06, + "loss": 0.17107868194580078, + "step": 5592 + }, + { + "epoch": 0.7561301225179552, + "grad_norm": 1.185733437538147, + "learning_rate": 4.806371821223854e-06, + "loss": 0.18511343002319336, + "step": 5593 + }, + { + "epoch": 0.7562653147444022, + "grad_norm": 0.7229312062263489, + "learning_rate": 4.801351769894151e-06, + "loss": 0.14079052209854126, + "step": 5594 + }, + { + "epoch": 0.7564005069708492, + "grad_norm": 1.2574173212051392, + "learning_rate": 4.796333841977394e-06, + "loss": 0.15498089790344238, + "step": 5595 + }, + { + "epoch": 0.7565356991972961, + "grad_norm": 1.4316736459732056, + "learning_rate": 4.791318038518345e-06, + "loss": 0.17536544799804688, + "step": 5596 + }, + { + "epoch": 0.7566708914237431, + "grad_norm": 0.9596560001373291, + "learning_rate": 4.7863043605613185e-06, + "loss": 0.16167068481445312, + "step": 5597 + }, + { + "epoch": 0.7568060836501901, + "grad_norm": 1.0597124099731445, + "learning_rate": 4.7812928091501865e-06, + "loss": 0.16962623596191406, + "step": 5598 + }, + { + "epoch": 0.7569412758766371, + "grad_norm": 0.8384588956832886, + "learning_rate": 4.7762833853283935e-06, + "loss": 0.12042665481567383, + "step": 5599 + }, + { + "epoch": 0.757076468103084, + "grad_norm": 1.059099555015564, + "learning_rate": 4.77127609013891e-06, + "loss": 0.1955275535583496, + "step": 5600 + }, + { + "epoch": 0.757211660329531, + "grad_norm": 0.9685809016227722, + "learning_rate": 4.766270924624295e-06, + "loss": 0.2124786376953125, + "step": 5601 + }, + { + "epoch": 0.757346852555978, + "grad_norm": 1.4474679231643677, + "learning_rate": 4.761267889826647e-06, + "loss": 0.2092428207397461, + "step": 5602 + }, + { + "epoch": 0.757482044782425, + "grad_norm": 1.664612054824829, + "learning_rate": 4.756266986787619e-06, + "loss": 0.21875, + "step": 5603 + }, + { + "epoch": 0.757617237008872, + "grad_norm": 0.8639447689056396, + "learning_rate": 4.751268216548439e-06, + "loss": 0.14753055572509766, + "step": 5604 + }, + { + "epoch": 0.7577524292353189, + "grad_norm": 0.5144842863082886, + "learning_rate": 4.746271580149861e-06, + "loss": 0.11172723770141602, + "step": 5605 + }, + { + "epoch": 0.7578876214617659, + "grad_norm": 1.1795637607574463, + "learning_rate": 4.7412770786322244e-06, + "loss": 0.1476426124572754, + "step": 5606 + }, + { + "epoch": 0.7580228136882129, + "grad_norm": 0.9664493799209595, + "learning_rate": 4.736284713035406e-06, + "loss": 0.14348816871643066, + "step": 5607 + }, + { + "epoch": 0.7581580059146599, + "grad_norm": 1.530418038368225, + "learning_rate": 4.731294484398843e-06, + "loss": 0.16219043731689453, + "step": 5608 + }, + { + "epoch": 0.7582931981411069, + "grad_norm": 0.9234780073165894, + "learning_rate": 4.726306393761526e-06, + "loss": 0.13222122192382812, + "step": 5609 + }, + { + "epoch": 0.7584283903675538, + "grad_norm": 0.952521562576294, + "learning_rate": 4.721320442162001e-06, + "loss": 0.19065523147583008, + "step": 5610 + }, + { + "epoch": 0.7585635825940008, + "grad_norm": 1.0002142190933228, + "learning_rate": 4.716336630638378e-06, + "loss": 0.1819133758544922, + "step": 5611 + }, + { + "epoch": 0.7586987748204478, + "grad_norm": 1.343812108039856, + "learning_rate": 4.711354960228306e-06, + "loss": 0.201995849609375, + "step": 5612 + }, + { + "epoch": 0.7588339670468948, + "grad_norm": 1.2567311525344849, + "learning_rate": 4.706375431968998e-06, + "loss": 0.1607494354248047, + "step": 5613 + }, + { + "epoch": 0.7589691592733417, + "grad_norm": 1.004839539527893, + "learning_rate": 4.701398046897218e-06, + "loss": 0.17444348335266113, + "step": 5614 + }, + { + "epoch": 0.7591043514997887, + "grad_norm": 1.7555283308029175, + "learning_rate": 4.696422806049277e-06, + "loss": 0.20669078826904297, + "step": 5615 + }, + { + "epoch": 0.7592395437262357, + "grad_norm": 1.1199944019317627, + "learning_rate": 4.69144971046106e-06, + "loss": 0.15883255004882812, + "step": 5616 + }, + { + "epoch": 0.7593747359526827, + "grad_norm": 1.5909534692764282, + "learning_rate": 4.686478761167984e-06, + "loss": 0.1797027587890625, + "step": 5617 + }, + { + "epoch": 0.7595099281791297, + "grad_norm": 1.7642052173614502, + "learning_rate": 4.681509959205028e-06, + "loss": 0.2161731719970703, + "step": 5618 + }, + { + "epoch": 0.7596451204055766, + "grad_norm": 0.7950900197029114, + "learning_rate": 4.676543305606724e-06, + "loss": 0.15172624588012695, + "step": 5619 + }, + { + "epoch": 0.7597803126320236, + "grad_norm": 1.3509894609451294, + "learning_rate": 4.67157880140715e-06, + "loss": 0.18468694388866425, + "step": 5620 + }, + { + "epoch": 0.7599155048584706, + "grad_norm": 1.6210018396377563, + "learning_rate": 4.666616447639952e-06, + "loss": 0.1890707015991211, + "step": 5621 + }, + { + "epoch": 0.7600506970849176, + "grad_norm": 0.9629420042037964, + "learning_rate": 4.661656245338314e-06, + "loss": 0.14961004257202148, + "step": 5622 + }, + { + "epoch": 0.7601858893113645, + "grad_norm": 0.8086307644844055, + "learning_rate": 4.656698195534978e-06, + "loss": 0.14900684356689453, + "step": 5623 + }, + { + "epoch": 0.7603210815378115, + "grad_norm": 1.3364981412887573, + "learning_rate": 4.651742299262233e-06, + "loss": 0.1511223316192627, + "step": 5624 + }, + { + "epoch": 0.7604562737642585, + "grad_norm": 0.7157567143440247, + "learning_rate": 4.646788557551921e-06, + "loss": 0.13143327832221985, + "step": 5625 + }, + { + "epoch": 0.7605914659907055, + "grad_norm": 1.0600775480270386, + "learning_rate": 4.641836971435445e-06, + "loss": 0.18670892715454102, + "step": 5626 + }, + { + "epoch": 0.7607266582171525, + "grad_norm": 1.2083684206008911, + "learning_rate": 4.63688754194375e-06, + "loss": 0.1737537384033203, + "step": 5627 + }, + { + "epoch": 0.7608618504435994, + "grad_norm": 1.340206503868103, + "learning_rate": 4.6319402701073295e-06, + "loss": 0.18640708923339844, + "step": 5628 + }, + { + "epoch": 0.7609970426700464, + "grad_norm": 2.08260440826416, + "learning_rate": 4.6269951569562355e-06, + "loss": 0.1842184066772461, + "step": 5629 + }, + { + "epoch": 0.7611322348964934, + "grad_norm": 1.2912315130233765, + "learning_rate": 4.622052203520061e-06, + "loss": 0.17406892776489258, + "step": 5630 + }, + { + "epoch": 0.7612674271229404, + "grad_norm": 0.8089340925216675, + "learning_rate": 4.617111410827968e-06, + "loss": 0.18362855911254883, + "step": 5631 + }, + { + "epoch": 0.7614026193493874, + "grad_norm": 0.9377435445785522, + "learning_rate": 4.612172779908639e-06, + "loss": 0.14360904693603516, + "step": 5632 + }, + { + "epoch": 0.7615378115758344, + "grad_norm": 1.583442211151123, + "learning_rate": 4.607236311790335e-06, + "loss": 0.2171487808227539, + "step": 5633 + }, + { + "epoch": 0.7616730038022814, + "grad_norm": 1.7319791316986084, + "learning_rate": 4.602302007500854e-06, + "loss": 0.1705636978149414, + "step": 5634 + }, + { + "epoch": 0.7618081960287284, + "grad_norm": 0.9414796233177185, + "learning_rate": 4.597369868067537e-06, + "loss": 0.14108514785766602, + "step": 5635 + }, + { + "epoch": 0.7619433882551754, + "grad_norm": 0.777948260307312, + "learning_rate": 4.592439894517296e-06, + "loss": 0.14987754821777344, + "step": 5636 + }, + { + "epoch": 0.7620785804816224, + "grad_norm": 1.052364468574524, + "learning_rate": 4.587512087876559e-06, + "loss": 0.15215209126472473, + "step": 5637 + }, + { + "epoch": 0.7622137727080693, + "grad_norm": 0.7150318622589111, + "learning_rate": 4.582586449171336e-06, + "loss": 0.12601280212402344, + "step": 5638 + }, + { + "epoch": 0.7623489649345163, + "grad_norm": 0.9592683911323547, + "learning_rate": 4.577662979427168e-06, + "loss": 0.14050960540771484, + "step": 5639 + }, + { + "epoch": 0.7624841571609633, + "grad_norm": 1.3156485557556152, + "learning_rate": 4.572741679669147e-06, + "loss": 0.23079752922058105, + "step": 5640 + }, + { + "epoch": 0.7626193493874103, + "grad_norm": 0.8563727140426636, + "learning_rate": 4.567822550921912e-06, + "loss": 0.1833357810974121, + "step": 5641 + }, + { + "epoch": 0.7627545416138573, + "grad_norm": 1.1649134159088135, + "learning_rate": 4.562905594209647e-06, + "loss": 0.15782999992370605, + "step": 5642 + }, + { + "epoch": 0.7628897338403042, + "grad_norm": 1.074506163597107, + "learning_rate": 4.557990810556102e-06, + "loss": 0.20490646362304688, + "step": 5643 + }, + { + "epoch": 0.7630249260667512, + "grad_norm": 0.7917906641960144, + "learning_rate": 4.553078200984553e-06, + "loss": 0.18158650398254395, + "step": 5644 + }, + { + "epoch": 0.7631601182931982, + "grad_norm": 1.0676697492599487, + "learning_rate": 4.548167766517832e-06, + "loss": 0.18944549560546875, + "step": 5645 + }, + { + "epoch": 0.7632953105196452, + "grad_norm": 0.7175119519233704, + "learning_rate": 4.543259508178318e-06, + "loss": 0.15016746520996094, + "step": 5646 + }, + { + "epoch": 0.7634305027460921, + "grad_norm": 1.0355457067489624, + "learning_rate": 4.538353426987931e-06, + "loss": 0.16494178771972656, + "step": 5647 + }, + { + "epoch": 0.7635656949725391, + "grad_norm": 1.1480623483657837, + "learning_rate": 4.533449523968154e-06, + "loss": 0.1848607063293457, + "step": 5648 + }, + { + "epoch": 0.7637008871989861, + "grad_norm": 0.9228355884552002, + "learning_rate": 4.528547800140001e-06, + "loss": 0.11853981018066406, + "step": 5649 + }, + { + "epoch": 0.7638360794254331, + "grad_norm": 1.0169438123703003, + "learning_rate": 4.523648256524037e-06, + "loss": 0.15368366241455078, + "step": 5650 + }, + { + "epoch": 0.7639712716518801, + "grad_norm": 1.1107758283615112, + "learning_rate": 4.518750894140372e-06, + "loss": 0.21843719482421875, + "step": 5651 + }, + { + "epoch": 0.764106463878327, + "grad_norm": 2.2256572246551514, + "learning_rate": 4.513855714008659e-06, + "loss": 0.24049663543701172, + "step": 5652 + }, + { + "epoch": 0.764241656104774, + "grad_norm": 1.0370551347732544, + "learning_rate": 4.508962717148111e-06, + "loss": 0.15035676956176758, + "step": 5653 + }, + { + "epoch": 0.764376848331221, + "grad_norm": 0.9271912574768066, + "learning_rate": 4.504071904577469e-06, + "loss": 0.1890249252319336, + "step": 5654 + }, + { + "epoch": 0.764512040557668, + "grad_norm": 1.1165226697921753, + "learning_rate": 4.499183277315027e-06, + "loss": 0.11548709869384766, + "step": 5655 + }, + { + "epoch": 0.764647232784115, + "grad_norm": 0.9335180521011353, + "learning_rate": 4.494296836378625e-06, + "loss": 0.13154125213623047, + "step": 5656 + }, + { + "epoch": 0.7647824250105619, + "grad_norm": 1.8017005920410156, + "learning_rate": 4.4894125827856415e-06, + "loss": 0.16046714782714844, + "step": 5657 + }, + { + "epoch": 0.7649176172370089, + "grad_norm": 0.7010825276374817, + "learning_rate": 4.4845305175530105e-06, + "loss": 0.1520557403564453, + "step": 5658 + }, + { + "epoch": 0.7650528094634559, + "grad_norm": 2.0297791957855225, + "learning_rate": 4.479650641697201e-06, + "loss": 0.21923065185546875, + "step": 5659 + }, + { + "epoch": 0.7651880016899029, + "grad_norm": 1.4055614471435547, + "learning_rate": 4.4747729562342305e-06, + "loss": 0.18424701690673828, + "step": 5660 + }, + { + "epoch": 0.7653231939163498, + "grad_norm": 0.8998335003852844, + "learning_rate": 4.469897462179656e-06, + "loss": 0.16823577880859375, + "step": 5661 + }, + { + "epoch": 0.7654583861427968, + "grad_norm": 1.0070158243179321, + "learning_rate": 4.46502416054858e-06, + "loss": 0.1489400863647461, + "step": 5662 + }, + { + "epoch": 0.7655935783692438, + "grad_norm": 0.9272902011871338, + "learning_rate": 4.460153052355663e-06, + "loss": 0.16038799285888672, + "step": 5663 + }, + { + "epoch": 0.7657287705956908, + "grad_norm": 1.0019853115081787, + "learning_rate": 4.455284138615074e-06, + "loss": 0.14382219314575195, + "step": 5664 + }, + { + "epoch": 0.7658639628221378, + "grad_norm": 1.6276488304138184, + "learning_rate": 4.4504174203405656e-06, + "loss": 0.17556190490722656, + "step": 5665 + }, + { + "epoch": 0.7659991550485847, + "grad_norm": 0.9283202290534973, + "learning_rate": 4.445552898545407e-06, + "loss": 0.1423492431640625, + "step": 5666 + }, + { + "epoch": 0.7661343472750317, + "grad_norm": 0.8269470930099487, + "learning_rate": 4.440690574242413e-06, + "loss": 0.17202186584472656, + "step": 5667 + }, + { + "epoch": 0.7662695395014787, + "grad_norm": 0.8893545269966125, + "learning_rate": 4.435830448443961e-06, + "loss": 0.1474905014038086, + "step": 5668 + }, + { + "epoch": 0.7664047317279257, + "grad_norm": 1.6766085624694824, + "learning_rate": 4.430972522161934e-06, + "loss": 0.2121429443359375, + "step": 5669 + }, + { + "epoch": 0.7665399239543726, + "grad_norm": 1.9850035905838013, + "learning_rate": 4.426116796407794e-06, + "loss": 0.1836223602294922, + "step": 5670 + }, + { + "epoch": 0.7666751161808196, + "grad_norm": 1.414304494857788, + "learning_rate": 4.421263272192523e-06, + "loss": 0.21085453033447266, + "step": 5671 + }, + { + "epoch": 0.7668103084072666, + "grad_norm": 1.202636480331421, + "learning_rate": 4.416411950526648e-06, + "loss": 0.1845874786376953, + "step": 5672 + }, + { + "epoch": 0.7669455006337136, + "grad_norm": 0.9374517798423767, + "learning_rate": 4.411562832420252e-06, + "loss": 0.13102245330810547, + "step": 5673 + }, + { + "epoch": 0.7670806928601606, + "grad_norm": 1.0864344835281372, + "learning_rate": 4.406715918882929e-06, + "loss": 0.20935440063476562, + "step": 5674 + }, + { + "epoch": 0.7672158850866075, + "grad_norm": 1.1287201642990112, + "learning_rate": 4.4018712109238475e-06, + "loss": 0.1835486888885498, + "step": 5675 + }, + { + "epoch": 0.7673510773130545, + "grad_norm": 0.9374114871025085, + "learning_rate": 4.3970287095516965e-06, + "loss": 0.1298377513885498, + "step": 5676 + }, + { + "epoch": 0.7674862695395015, + "grad_norm": 0.9655289053916931, + "learning_rate": 4.39218841577471e-06, + "loss": 0.18006229400634766, + "step": 5677 + }, + { + "epoch": 0.7676214617659485, + "grad_norm": 2.1124372482299805, + "learning_rate": 4.387350330600662e-06, + "loss": 0.20349884033203125, + "step": 5678 + }, + { + "epoch": 0.7677566539923955, + "grad_norm": 0.9207296371459961, + "learning_rate": 4.382514455036864e-06, + "loss": 0.1527242660522461, + "step": 5679 + }, + { + "epoch": 0.7678918462188424, + "grad_norm": 0.9362502694129944, + "learning_rate": 4.377680790090182e-06, + "loss": 0.1410350799560547, + "step": 5680 + }, + { + "epoch": 0.7680270384452894, + "grad_norm": 1.3551695346832275, + "learning_rate": 4.372849336767004e-06, + "loss": 0.20341205596923828, + "step": 5681 + }, + { + "epoch": 0.7681622306717364, + "grad_norm": 1.017533302307129, + "learning_rate": 4.3680200960732645e-06, + "loss": 0.19510364532470703, + "step": 5682 + }, + { + "epoch": 0.7682974228981834, + "grad_norm": 1.1069456338882446, + "learning_rate": 4.363193069014439e-06, + "loss": 0.1566905975341797, + "step": 5683 + }, + { + "epoch": 0.7684326151246303, + "grad_norm": 1.5834105014801025, + "learning_rate": 4.3583682565955325e-06, + "loss": 0.26218605041503906, + "step": 5684 + }, + { + "epoch": 0.7685678073510773, + "grad_norm": 1.1706461906433105, + "learning_rate": 4.3535456598211074e-06, + "loss": 0.15036487579345703, + "step": 5685 + }, + { + "epoch": 0.7687029995775243, + "grad_norm": 1.4391405582427979, + "learning_rate": 4.348725279695251e-06, + "loss": 0.22117233276367188, + "step": 5686 + }, + { + "epoch": 0.7688381918039713, + "grad_norm": 0.9555748701095581, + "learning_rate": 4.343907117221591e-06, + "loss": 0.11361606419086456, + "step": 5687 + }, + { + "epoch": 0.7689733840304183, + "grad_norm": 0.9524842500686646, + "learning_rate": 4.339091173403294e-06, + "loss": 0.17129135131835938, + "step": 5688 + }, + { + "epoch": 0.7691085762568652, + "grad_norm": 1.2796114683151245, + "learning_rate": 4.334277449243061e-06, + "loss": 0.1484670639038086, + "step": 5689 + }, + { + "epoch": 0.7692437684833122, + "grad_norm": 1.1945613622665405, + "learning_rate": 4.329465945743144e-06, + "loss": 0.18136310577392578, + "step": 5690 + }, + { + "epoch": 0.7693789607097592, + "grad_norm": 1.8413891792297363, + "learning_rate": 4.32465666390532e-06, + "loss": 0.156646728515625, + "step": 5691 + }, + { + "epoch": 0.7695141529362062, + "grad_norm": 1.3953511714935303, + "learning_rate": 4.319849604730905e-06, + "loss": 0.1966233253479004, + "step": 5692 + }, + { + "epoch": 0.7696493451626532, + "grad_norm": 0.9326554536819458, + "learning_rate": 4.315044769220758e-06, + "loss": 0.16521549224853516, + "step": 5693 + }, + { + "epoch": 0.7697845373891001, + "grad_norm": 1.5161690711975098, + "learning_rate": 4.310242158375264e-06, + "loss": 0.22219562530517578, + "step": 5694 + }, + { + "epoch": 0.7699197296155471, + "grad_norm": 1.4437106847763062, + "learning_rate": 4.30544177319436e-06, + "loss": 0.15215396881103516, + "step": 5695 + }, + { + "epoch": 0.7700549218419941, + "grad_norm": 0.8638103604316711, + "learning_rate": 4.300643614677511e-06, + "loss": 0.13219094276428223, + "step": 5696 + }, + { + "epoch": 0.7701901140684411, + "grad_norm": 1.5494129657745361, + "learning_rate": 4.2958476838237165e-06, + "loss": 0.17708861827850342, + "step": 5697 + }, + { + "epoch": 0.770325306294888, + "grad_norm": 0.8461418747901917, + "learning_rate": 4.2910539816315166e-06, + "loss": 0.174560546875, + "step": 5698 + }, + { + "epoch": 0.770460498521335, + "grad_norm": 0.9515027403831482, + "learning_rate": 4.286262509098979e-06, + "loss": 0.12755346298217773, + "step": 5699 + }, + { + "epoch": 0.770595690747782, + "grad_norm": 1.2471880912780762, + "learning_rate": 4.28147326722373e-06, + "loss": 0.20840883255004883, + "step": 5700 + }, + { + "epoch": 0.770730882974229, + "grad_norm": 0.9678155183792114, + "learning_rate": 4.2766862570028965e-06, + "loss": 0.13154226541519165, + "step": 5701 + }, + { + "epoch": 0.770866075200676, + "grad_norm": 1.1689928770065308, + "learning_rate": 4.2719014794331715e-06, + "loss": 0.1676788330078125, + "step": 5702 + }, + { + "epoch": 0.7710012674271229, + "grad_norm": 1.0286637544631958, + "learning_rate": 4.267118935510767e-06, + "loss": 0.14365673065185547, + "step": 5703 + }, + { + "epoch": 0.7711364596535699, + "grad_norm": 0.7536453604698181, + "learning_rate": 4.2623386262314306e-06, + "loss": 0.1428537368774414, + "step": 5704 + }, + { + "epoch": 0.7712716518800169, + "grad_norm": 1.5972416400909424, + "learning_rate": 4.257560552590461e-06, + "loss": 0.18857979774475098, + "step": 5705 + }, + { + "epoch": 0.7714068441064639, + "grad_norm": 1.0247960090637207, + "learning_rate": 4.252784715582661e-06, + "loss": 0.1702425479888916, + "step": 5706 + }, + { + "epoch": 0.7715420363329109, + "grad_norm": 1.3569600582122803, + "learning_rate": 4.2480111162024e-06, + "loss": 0.1625213623046875, + "step": 5707 + }, + { + "epoch": 0.7716772285593578, + "grad_norm": 0.9905298948287964, + "learning_rate": 4.243239755443561e-06, + "loss": 0.20572853088378906, + "step": 5708 + }, + { + "epoch": 0.7718124207858048, + "grad_norm": 1.0633753538131714, + "learning_rate": 4.238470634299567e-06, + "loss": 0.1307048797607422, + "step": 5709 + }, + { + "epoch": 0.7719476130122518, + "grad_norm": 1.004807949066162, + "learning_rate": 4.233703753763375e-06, + "loss": 0.16947460174560547, + "step": 5710 + }, + { + "epoch": 0.7720828052386988, + "grad_norm": 1.113210678100586, + "learning_rate": 4.228939114827469e-06, + "loss": 0.22174644470214844, + "step": 5711 + }, + { + "epoch": 0.7722179974651457, + "grad_norm": 0.952313244342804, + "learning_rate": 4.224176718483881e-06, + "loss": 0.16562700271606445, + "step": 5712 + }, + { + "epoch": 0.7723531896915927, + "grad_norm": 0.7830891609191895, + "learning_rate": 4.219416565724165e-06, + "loss": 0.11083650588989258, + "step": 5713 + }, + { + "epoch": 0.7724883819180397, + "grad_norm": 0.7530669569969177, + "learning_rate": 4.21465865753941e-06, + "loss": 0.14617443084716797, + "step": 5714 + }, + { + "epoch": 0.7726235741444867, + "grad_norm": 1.1028395891189575, + "learning_rate": 4.209902994920236e-06, + "loss": 0.1929454803466797, + "step": 5715 + }, + { + "epoch": 0.7727587663709337, + "grad_norm": 0.8484943509101868, + "learning_rate": 4.205149578856794e-06, + "loss": 0.1592416763305664, + "step": 5716 + }, + { + "epoch": 0.7728939585973806, + "grad_norm": 1.2756197452545166, + "learning_rate": 4.200398410338779e-06, + "loss": 0.16272687911987305, + "step": 5717 + }, + { + "epoch": 0.7730291508238276, + "grad_norm": 1.0048065185546875, + "learning_rate": 4.1956494903554056e-06, + "loss": 0.17531967163085938, + "step": 5718 + }, + { + "epoch": 0.7731643430502746, + "grad_norm": 0.774115264415741, + "learning_rate": 4.190902819895425e-06, + "loss": 0.15831279754638672, + "step": 5719 + }, + { + "epoch": 0.7732995352767216, + "grad_norm": 1.5515379905700684, + "learning_rate": 4.186158399947118e-06, + "loss": 0.17557334899902344, + "step": 5720 + }, + { + "epoch": 0.7734347275031686, + "grad_norm": 1.4792540073394775, + "learning_rate": 4.181416231498292e-06, + "loss": 0.1820697784423828, + "step": 5721 + }, + { + "epoch": 0.7735699197296155, + "grad_norm": 1.9483230113983154, + "learning_rate": 4.176676315536306e-06, + "loss": 0.18980026245117188, + "step": 5722 + }, + { + "epoch": 0.7737051119560625, + "grad_norm": 1.2648887634277344, + "learning_rate": 4.171938653048027e-06, + "loss": 0.1960453987121582, + "step": 5723 + }, + { + "epoch": 0.7738403041825095, + "grad_norm": 1.0147806406021118, + "learning_rate": 4.1672032450198616e-06, + "loss": 0.17206907272338867, + "step": 5724 + }, + { + "epoch": 0.7739754964089565, + "grad_norm": 2.1715707778930664, + "learning_rate": 4.16247009243775e-06, + "loss": 0.2610957622528076, + "step": 5725 + }, + { + "epoch": 0.7741106886354034, + "grad_norm": 1.7615704536437988, + "learning_rate": 4.1577391962871504e-06, + "loss": 0.1836376190185547, + "step": 5726 + }, + { + "epoch": 0.7742458808618504, + "grad_norm": 1.677809715270996, + "learning_rate": 4.153010557553076e-06, + "loss": 0.2107372283935547, + "step": 5727 + }, + { + "epoch": 0.7743810730882974, + "grad_norm": 0.9997429251670837, + "learning_rate": 4.148284177220045e-06, + "loss": 0.13818359375, + "step": 5728 + }, + { + "epoch": 0.7745162653147444, + "grad_norm": 1.3612557649612427, + "learning_rate": 4.143560056272117e-06, + "loss": 0.21035194396972656, + "step": 5729 + }, + { + "epoch": 0.7746514575411914, + "grad_norm": 1.3863136768341064, + "learning_rate": 4.1388381956928796e-06, + "loss": 0.158616304397583, + "step": 5730 + }, + { + "epoch": 0.7747866497676383, + "grad_norm": 1.1184004545211792, + "learning_rate": 4.134118596465443e-06, + "loss": 0.1649494171142578, + "step": 5731 + }, + { + "epoch": 0.7749218419940853, + "grad_norm": 1.102777123451233, + "learning_rate": 4.1294012595724675e-06, + "loss": 0.1613903045654297, + "step": 5732 + }, + { + "epoch": 0.7750570342205323, + "grad_norm": 2.2198445796966553, + "learning_rate": 4.1246861859961114e-06, + "loss": 0.2568550109863281, + "step": 5733 + }, + { + "epoch": 0.7751922264469793, + "grad_norm": 1.2297929525375366, + "learning_rate": 4.119973376718089e-06, + "loss": 0.19216108322143555, + "step": 5734 + }, + { + "epoch": 0.7753274186734262, + "grad_norm": 1.603895664215088, + "learning_rate": 4.115262832719628e-06, + "loss": 0.1389636993408203, + "step": 5735 + }, + { + "epoch": 0.7754626108998732, + "grad_norm": 0.9889018535614014, + "learning_rate": 4.110554554981486e-06, + "loss": 0.14987993240356445, + "step": 5736 + }, + { + "epoch": 0.7755978031263202, + "grad_norm": 0.8055657148361206, + "learning_rate": 4.1058485444839655e-06, + "loss": 0.14275503158569336, + "step": 5737 + }, + { + "epoch": 0.7757329953527672, + "grad_norm": 1.2474499940872192, + "learning_rate": 4.101144802206862e-06, + "loss": 0.1623058319091797, + "step": 5738 + }, + { + "epoch": 0.7758681875792142, + "grad_norm": 0.9579083919525146, + "learning_rate": 4.096443329129535e-06, + "loss": 0.14908599853515625, + "step": 5739 + }, + { + "epoch": 0.7760033798056611, + "grad_norm": 1.1723302602767944, + "learning_rate": 4.091744126230853e-06, + "loss": 0.17624664306640625, + "step": 5740 + }, + { + "epoch": 0.7761385720321081, + "grad_norm": 0.8712233304977417, + "learning_rate": 4.08704719448921e-06, + "loss": 0.17962646484375, + "step": 5741 + }, + { + "epoch": 0.7762737642585551, + "grad_norm": 0.7037849426269531, + "learning_rate": 4.082352534882543e-06, + "loss": 0.09752249717712402, + "step": 5742 + }, + { + "epoch": 0.7764089564850021, + "grad_norm": 1.0782477855682373, + "learning_rate": 4.07766014838829e-06, + "loss": 0.1750040054321289, + "step": 5743 + }, + { + "epoch": 0.7765441487114491, + "grad_norm": 1.2373197078704834, + "learning_rate": 4.072970035983443e-06, + "loss": 0.15466690063476562, + "step": 5744 + }, + { + "epoch": 0.776679340937896, + "grad_norm": 1.7748299837112427, + "learning_rate": 4.068282198644505e-06, + "loss": 0.1977221965789795, + "step": 5745 + }, + { + "epoch": 0.776814533164343, + "grad_norm": 0.9341349005699158, + "learning_rate": 4.06359663734751e-06, + "loss": 0.125213623046875, + "step": 5746 + }, + { + "epoch": 0.77694972539079, + "grad_norm": 1.1644492149353027, + "learning_rate": 4.058913353068013e-06, + "loss": 0.1862165927886963, + "step": 5747 + }, + { + "epoch": 0.777084917617237, + "grad_norm": 2.433779001235962, + "learning_rate": 4.0542323467810985e-06, + "loss": 0.18503284454345703, + "step": 5748 + }, + { + "epoch": 0.777220109843684, + "grad_norm": 0.7797772884368896, + "learning_rate": 4.049553619461381e-06, + "loss": 0.1127614974975586, + "step": 5749 + }, + { + "epoch": 0.7773553020701309, + "grad_norm": 1.1835607290267944, + "learning_rate": 4.044877172082997e-06, + "loss": 0.18398284912109375, + "step": 5750 + }, + { + "epoch": 0.7774904942965779, + "grad_norm": 0.6466657519340515, + "learning_rate": 4.040203005619604e-06, + "loss": 0.13517379760742188, + "step": 5751 + }, + { + "epoch": 0.7776256865230249, + "grad_norm": 1.3917794227600098, + "learning_rate": 4.035531121044392e-06, + "loss": 0.23613929748535156, + "step": 5752 + }, + { + "epoch": 0.7777608787494719, + "grad_norm": 1.2126444578170776, + "learning_rate": 4.030861519330065e-06, + "loss": 0.21366500854492188, + "step": 5753 + }, + { + "epoch": 0.7778960709759188, + "grad_norm": 1.790773868560791, + "learning_rate": 4.026194201448868e-06, + "loss": 0.1931896209716797, + "step": 5754 + }, + { + "epoch": 0.7780312632023658, + "grad_norm": 1.0487462282180786, + "learning_rate": 4.021529168372558e-06, + "loss": 0.194183349609375, + "step": 5755 + }, + { + "epoch": 0.7781664554288128, + "grad_norm": 1.098608374595642, + "learning_rate": 4.01686642107242e-06, + "loss": 0.15494155883789062, + "step": 5756 + }, + { + "epoch": 0.7783016476552598, + "grad_norm": 1.1798722743988037, + "learning_rate": 4.0122059605192624e-06, + "loss": 0.16982269287109375, + "step": 5757 + }, + { + "epoch": 0.7784368398817068, + "grad_norm": 0.8994905352592468, + "learning_rate": 4.007547787683412e-06, + "loss": 0.15837860107421875, + "step": 5758 + }, + { + "epoch": 0.7785720321081537, + "grad_norm": 0.9738286733627319, + "learning_rate": 4.002891903534736e-06, + "loss": 0.19238758087158203, + "step": 5759 + }, + { + "epoch": 0.7787072243346007, + "grad_norm": 3.014173746109009, + "learning_rate": 3.998238309042611e-06, + "loss": 0.225982666015625, + "step": 5760 + }, + { + "epoch": 0.7788424165610477, + "grad_norm": 2.397707223892212, + "learning_rate": 3.993587005175937e-06, + "loss": 0.2344341278076172, + "step": 5761 + }, + { + "epoch": 0.7789776087874947, + "grad_norm": 1.6218763589859009, + "learning_rate": 3.988937992903144e-06, + "loss": 0.19930267333984375, + "step": 5762 + }, + { + "epoch": 0.7791128010139416, + "grad_norm": 1.187485933303833, + "learning_rate": 3.9842912731921716e-06, + "loss": 0.15949058532714844, + "step": 5763 + }, + { + "epoch": 0.7792479932403886, + "grad_norm": 1.2378003597259521, + "learning_rate": 3.979646847010506e-06, + "loss": 0.19077301025390625, + "step": 5764 + }, + { + "epoch": 0.7793831854668356, + "grad_norm": 1.2841618061065674, + "learning_rate": 3.975004715325134e-06, + "loss": 0.20267200469970703, + "step": 5765 + }, + { + "epoch": 0.7795183776932826, + "grad_norm": 1.336878776550293, + "learning_rate": 3.970364879102572e-06, + "loss": 0.15315532684326172, + "step": 5766 + }, + { + "epoch": 0.7796535699197296, + "grad_norm": 0.6835128664970398, + "learning_rate": 3.96572733930886e-06, + "loss": 0.11800765991210938, + "step": 5767 + }, + { + "epoch": 0.7797887621461765, + "grad_norm": 1.255144715309143, + "learning_rate": 3.961092096909552e-06, + "loss": 0.19077491760253906, + "step": 5768 + }, + { + "epoch": 0.7799239543726236, + "grad_norm": 0.7086952924728394, + "learning_rate": 3.9564591528697455e-06, + "loss": 0.14722347259521484, + "step": 5769 + }, + { + "epoch": 0.7800591465990706, + "grad_norm": 1.504512906074524, + "learning_rate": 3.9518285081540275e-06, + "loss": 0.21566295623779297, + "step": 5770 + }, + { + "epoch": 0.7801943388255176, + "grad_norm": 0.9993622303009033, + "learning_rate": 3.947200163726534e-06, + "loss": 0.1497974395751953, + "step": 5771 + }, + { + "epoch": 0.7803295310519646, + "grad_norm": 0.7543260455131531, + "learning_rate": 3.9425741205509055e-06, + "loss": 0.11919450759887695, + "step": 5772 + }, + { + "epoch": 0.7804647232784115, + "grad_norm": 1.0005760192871094, + "learning_rate": 3.9379503795903065e-06, + "loss": 0.1514129638671875, + "step": 5773 + }, + { + "epoch": 0.7805999155048585, + "grad_norm": 1.7753955125808716, + "learning_rate": 3.933328941807439e-06, + "loss": 0.18676376342773438, + "step": 5774 + }, + { + "epoch": 0.7807351077313055, + "grad_norm": 1.4304224252700806, + "learning_rate": 3.928709808164491e-06, + "loss": 0.20827674865722656, + "step": 5775 + }, + { + "epoch": 0.7808702999577525, + "grad_norm": 1.8002541065216064, + "learning_rate": 3.924092979623203e-06, + "loss": 0.2202911376953125, + "step": 5776 + }, + { + "epoch": 0.7810054921841995, + "grad_norm": 1.176750898361206, + "learning_rate": 3.919478457144824e-06, + "loss": 0.17110055685043335, + "step": 5777 + }, + { + "epoch": 0.7811406844106464, + "grad_norm": 0.9747717380523682, + "learning_rate": 3.914866241690115e-06, + "loss": 0.18190264701843262, + "step": 5778 + }, + { + "epoch": 0.7812758766370934, + "grad_norm": 0.9151124954223633, + "learning_rate": 3.9102563342193695e-06, + "loss": 0.14024591445922852, + "step": 5779 + }, + { + "epoch": 0.7814110688635404, + "grad_norm": 0.781014084815979, + "learning_rate": 3.905648735692389e-06, + "loss": 0.11922860145568848, + "step": 5780 + }, + { + "epoch": 0.7815462610899874, + "grad_norm": 1.206429123878479, + "learning_rate": 3.901043447068508e-06, + "loss": 0.17983722686767578, + "step": 5781 + }, + { + "epoch": 0.7816814533164343, + "grad_norm": 1.2806367874145508, + "learning_rate": 3.896440469306567e-06, + "loss": 0.22631263732910156, + "step": 5782 + }, + { + "epoch": 0.7818166455428813, + "grad_norm": 0.8554794192314148, + "learning_rate": 3.891839803364934e-06, + "loss": 0.13141345977783203, + "step": 5783 + }, + { + "epoch": 0.7819518377693283, + "grad_norm": 0.9105827212333679, + "learning_rate": 3.887241450201487e-06, + "loss": 0.11873197555541992, + "step": 5784 + }, + { + "epoch": 0.7820870299957753, + "grad_norm": 0.8203549981117249, + "learning_rate": 3.882645410773629e-06, + "loss": 0.1869983673095703, + "step": 5785 + }, + { + "epoch": 0.7822222222222223, + "grad_norm": 0.7337989211082458, + "learning_rate": 3.878051686038284e-06, + "loss": 0.15885210037231445, + "step": 5786 + }, + { + "epoch": 0.7823574144486692, + "grad_norm": 1.308215856552124, + "learning_rate": 3.873460276951889e-06, + "loss": 0.17203521728515625, + "step": 5787 + }, + { + "epoch": 0.7824926066751162, + "grad_norm": 1.3012659549713135, + "learning_rate": 3.868871184470397e-06, + "loss": 0.17644882202148438, + "step": 5788 + }, + { + "epoch": 0.7826277989015632, + "grad_norm": 0.7731307744979858, + "learning_rate": 3.864284409549282e-06, + "loss": 0.13414859771728516, + "step": 5789 + }, + { + "epoch": 0.7827629911280102, + "grad_norm": 0.9234111905097961, + "learning_rate": 3.859699953143532e-06, + "loss": 0.1620645523071289, + "step": 5790 + }, + { + "epoch": 0.7828981833544572, + "grad_norm": 1.0429589748382568, + "learning_rate": 3.855117816207665e-06, + "loss": 0.1784496307373047, + "step": 5791 + }, + { + "epoch": 0.7830333755809041, + "grad_norm": 0.9223520159721375, + "learning_rate": 3.850537999695699e-06, + "loss": 0.14377450942993164, + "step": 5792 + }, + { + "epoch": 0.7831685678073511, + "grad_norm": 1.3123719692230225, + "learning_rate": 3.845960504561179e-06, + "loss": 0.19254302978515625, + "step": 5793 + }, + { + "epoch": 0.7833037600337981, + "grad_norm": 2.292695999145508, + "learning_rate": 3.841385331757161e-06, + "loss": 0.1718158721923828, + "step": 5794 + }, + { + "epoch": 0.7834389522602451, + "grad_norm": 1.375365972518921, + "learning_rate": 3.8368124822362184e-06, + "loss": 0.16459369659423828, + "step": 5795 + }, + { + "epoch": 0.783574144486692, + "grad_norm": 0.7936541438102722, + "learning_rate": 3.832241956950449e-06, + "loss": 0.1504192352294922, + "step": 5796 + }, + { + "epoch": 0.783709336713139, + "grad_norm": 1.1918771266937256, + "learning_rate": 3.82767375685146e-06, + "loss": 0.20840835571289062, + "step": 5797 + }, + { + "epoch": 0.783844528939586, + "grad_norm": 1.1423534154891968, + "learning_rate": 3.823107882890373e-06, + "loss": 0.20076847076416016, + "step": 5798 + }, + { + "epoch": 0.783979721166033, + "grad_norm": 1.7537564039230347, + "learning_rate": 3.8185443360178265e-06, + "loss": 0.17306888103485107, + "step": 5799 + }, + { + "epoch": 0.78411491339248, + "grad_norm": 0.6891571283340454, + "learning_rate": 3.813983117183973e-06, + "loss": 0.14136600494384766, + "step": 5800 + }, + { + "epoch": 0.7842501056189269, + "grad_norm": 1.4191315174102783, + "learning_rate": 3.8094242273384932e-06, + "loss": 0.21343612670898438, + "step": 5801 + }, + { + "epoch": 0.7843852978453739, + "grad_norm": 1.7443660497665405, + "learning_rate": 3.804867667430555e-06, + "loss": 0.2536964416503906, + "step": 5802 + }, + { + "epoch": 0.7845204900718209, + "grad_norm": 0.6830030679702759, + "learning_rate": 3.800313438408874e-06, + "loss": 0.11420917510986328, + "step": 5803 + }, + { + "epoch": 0.7846556822982679, + "grad_norm": 0.9493191838264465, + "learning_rate": 3.7957615412216582e-06, + "loss": 0.18427276611328125, + "step": 5804 + }, + { + "epoch": 0.7847908745247149, + "grad_norm": 1.298243761062622, + "learning_rate": 3.791211976816634e-06, + "loss": 0.2222881317138672, + "step": 5805 + }, + { + "epoch": 0.7849260667511618, + "grad_norm": 1.9392026662826538, + "learning_rate": 3.786664746141057e-06, + "loss": 0.1992359161376953, + "step": 5806 + }, + { + "epoch": 0.7850612589776088, + "grad_norm": 0.9334809184074402, + "learning_rate": 3.782119850141669e-06, + "loss": 0.16501259803771973, + "step": 5807 + }, + { + "epoch": 0.7851964512040558, + "grad_norm": 1.539667010307312, + "learning_rate": 3.777577289764752e-06, + "loss": 0.18382835388183594, + "step": 5808 + }, + { + "epoch": 0.7853316434305028, + "grad_norm": 1.213761806488037, + "learning_rate": 3.7730370659560904e-06, + "loss": 0.11244463920593262, + "step": 5809 + }, + { + "epoch": 0.7854668356569497, + "grad_norm": 0.9012846350669861, + "learning_rate": 3.7684991796609746e-06, + "loss": 0.1271834373474121, + "step": 5810 + }, + { + "epoch": 0.7856020278833967, + "grad_norm": 0.7157869935035706, + "learning_rate": 3.7639636318242344e-06, + "loss": 0.1372213363647461, + "step": 5811 + }, + { + "epoch": 0.7857372201098437, + "grad_norm": 1.1840213537216187, + "learning_rate": 3.7594304233901738e-06, + "loss": 0.1892223358154297, + "step": 5812 + }, + { + "epoch": 0.7858724123362907, + "grad_norm": 1.004406452178955, + "learning_rate": 3.754899555302645e-06, + "loss": 0.12111473083496094, + "step": 5813 + }, + { + "epoch": 0.7860076045627377, + "grad_norm": 1.1647124290466309, + "learning_rate": 3.7503710285049964e-06, + "loss": 0.1362166404724121, + "step": 5814 + }, + { + "epoch": 0.7861427967891846, + "grad_norm": 1.4857046604156494, + "learning_rate": 3.7458448439400888e-06, + "loss": 0.2019481658935547, + "step": 5815 + }, + { + "epoch": 0.7862779890156316, + "grad_norm": 1.1760765314102173, + "learning_rate": 3.7413210025502985e-06, + "loss": 0.1202692985534668, + "step": 5816 + }, + { + "epoch": 0.7864131812420786, + "grad_norm": 0.8019460439682007, + "learning_rate": 3.7367995052775123e-06, + "loss": 0.14469313621520996, + "step": 5817 + }, + { + "epoch": 0.7865483734685256, + "grad_norm": 1.664031744003296, + "learning_rate": 3.732280353063133e-06, + "loss": 0.18428564071655273, + "step": 5818 + }, + { + "epoch": 0.7866835656949726, + "grad_norm": 1.1546237468719482, + "learning_rate": 3.727763546848074e-06, + "loss": 0.1697826385498047, + "step": 5819 + }, + { + "epoch": 0.7868187579214195, + "grad_norm": 1.524002194404602, + "learning_rate": 3.7232490875727544e-06, + "loss": 0.1154184341430664, + "step": 5820 + }, + { + "epoch": 0.7869539501478665, + "grad_norm": 2.606360673904419, + "learning_rate": 3.718736976177108e-06, + "loss": 0.20606350898742676, + "step": 5821 + }, + { + "epoch": 0.7870891423743135, + "grad_norm": 1.4419660568237305, + "learning_rate": 3.71422721360058e-06, + "loss": 0.17506885528564453, + "step": 5822 + }, + { + "epoch": 0.7872243346007605, + "grad_norm": 1.0727072954177856, + "learning_rate": 3.709719800782133e-06, + "loss": 0.13677263259887695, + "step": 5823 + }, + { + "epoch": 0.7873595268272074, + "grad_norm": 0.8261385560035706, + "learning_rate": 3.7052147386602304e-06, + "loss": 0.10388565063476562, + "step": 5824 + }, + { + "epoch": 0.7874947190536544, + "grad_norm": 0.9895406365394592, + "learning_rate": 3.700712028172851e-06, + "loss": 0.13189315795898438, + "step": 5825 + }, + { + "epoch": 0.7876299112801014, + "grad_norm": 1.8887265920639038, + "learning_rate": 3.696211670257481e-06, + "loss": 0.2043914794921875, + "step": 5826 + }, + { + "epoch": 0.7877651035065484, + "grad_norm": 1.599543571472168, + "learning_rate": 3.691713665851117e-06, + "loss": 0.21718215942382812, + "step": 5827 + }, + { + "epoch": 0.7879002957329954, + "grad_norm": 1.4330143928527832, + "learning_rate": 3.6872180158902764e-06, + "loss": 0.1817312240600586, + "step": 5828 + }, + { + "epoch": 0.7880354879594423, + "grad_norm": 1.2297337055206299, + "learning_rate": 3.6827247213109705e-06, + "loss": 0.1617746353149414, + "step": 5829 + }, + { + "epoch": 0.7881706801858893, + "grad_norm": 0.9841012358665466, + "learning_rate": 3.6782337830487294e-06, + "loss": 0.16829204559326172, + "step": 5830 + }, + { + "epoch": 0.7883058724123363, + "grad_norm": 1.082634687423706, + "learning_rate": 3.6737452020385886e-06, + "loss": 0.14481878280639648, + "step": 5831 + }, + { + "epoch": 0.7884410646387833, + "grad_norm": 1.3710473775863647, + "learning_rate": 3.6692589792150923e-06, + "loss": 0.19174480438232422, + "step": 5832 + }, + { + "epoch": 0.7885762568652303, + "grad_norm": 1.2013518810272217, + "learning_rate": 3.6647751155123026e-06, + "loss": 0.16481590270996094, + "step": 5833 + }, + { + "epoch": 0.7887114490916772, + "grad_norm": 1.1895521879196167, + "learning_rate": 3.660293611863782e-06, + "loss": 0.1885700225830078, + "step": 5834 + }, + { + "epoch": 0.7888466413181242, + "grad_norm": 2.2827346324920654, + "learning_rate": 3.655814469202602e-06, + "loss": 0.2006072998046875, + "step": 5835 + }, + { + "epoch": 0.7889818335445712, + "grad_norm": 2.6859853267669678, + "learning_rate": 3.6513376884613446e-06, + "loss": 0.2439422607421875, + "step": 5836 + }, + { + "epoch": 0.7891170257710182, + "grad_norm": 1.476603388786316, + "learning_rate": 3.6468632705720934e-06, + "loss": 0.20268790423870087, + "step": 5837 + }, + { + "epoch": 0.7892522179974651, + "grad_norm": 1.3803499937057495, + "learning_rate": 3.6423912164664606e-06, + "loss": 0.18988323211669922, + "step": 5838 + }, + { + "epoch": 0.7893874102239121, + "grad_norm": 1.1412652730941772, + "learning_rate": 3.637921527075534e-06, + "loss": 0.1586771011352539, + "step": 5839 + }, + { + "epoch": 0.7895226024503591, + "grad_norm": 0.9427790641784668, + "learning_rate": 3.63345420332994e-06, + "loss": 0.1585707664489746, + "step": 5840 + }, + { + "epoch": 0.7896577946768061, + "grad_norm": 1.3868263959884644, + "learning_rate": 3.628989246159795e-06, + "loss": 0.158416748046875, + "step": 5841 + }, + { + "epoch": 0.7897929869032531, + "grad_norm": 1.1198753118515015, + "learning_rate": 3.6245266564947205e-06, + "loss": 0.2070155143737793, + "step": 5842 + }, + { + "epoch": 0.7899281791297, + "grad_norm": 0.8191492557525635, + "learning_rate": 3.620066435263868e-06, + "loss": 0.18553447723388672, + "step": 5843 + }, + { + "epoch": 0.790063371356147, + "grad_norm": 1.6474485397338867, + "learning_rate": 3.6156085833958596e-06, + "loss": 0.17853546142578125, + "step": 5844 + }, + { + "epoch": 0.790198563582594, + "grad_norm": 1.2241733074188232, + "learning_rate": 3.6111531018188584e-06, + "loss": 0.2231006622314453, + "step": 5845 + }, + { + "epoch": 0.790333755809041, + "grad_norm": 1.1772730350494385, + "learning_rate": 3.606699991460513e-06, + "loss": 0.13770484924316406, + "step": 5846 + }, + { + "epoch": 0.790468948035488, + "grad_norm": 1.581953763961792, + "learning_rate": 3.602249253247986e-06, + "loss": 0.2326803207397461, + "step": 5847 + }, + { + "epoch": 0.7906041402619349, + "grad_norm": 1.3109188079833984, + "learning_rate": 3.5978008881079445e-06, + "loss": 0.1599719524383545, + "step": 5848 + }, + { + "epoch": 0.7907393324883819, + "grad_norm": 0.7251803278923035, + "learning_rate": 3.5933548969665587e-06, + "loss": 0.13519883155822754, + "step": 5849 + }, + { + "epoch": 0.7908745247148289, + "grad_norm": 0.7502581477165222, + "learning_rate": 3.5889112807495152e-06, + "loss": 0.1480565071105957, + "step": 5850 + }, + { + "epoch": 0.7910097169412759, + "grad_norm": 1.209169864654541, + "learning_rate": 3.5844700403819935e-06, + "loss": 0.22179794311523438, + "step": 5851 + }, + { + "epoch": 0.7911449091677228, + "grad_norm": 1.134719967842102, + "learning_rate": 3.5800311767886847e-06, + "loss": 0.15722942352294922, + "step": 5852 + }, + { + "epoch": 0.7912801013941698, + "grad_norm": 0.9985283613204956, + "learning_rate": 3.575594690893784e-06, + "loss": 0.2027425765991211, + "step": 5853 + }, + { + "epoch": 0.7914152936206168, + "grad_norm": 0.8400408625602722, + "learning_rate": 3.5711605836209853e-06, + "loss": 0.12825965881347656, + "step": 5854 + }, + { + "epoch": 0.7915504858470638, + "grad_norm": 2.0692615509033203, + "learning_rate": 3.566728855893505e-06, + "loss": 0.20149993896484375, + "step": 5855 + }, + { + "epoch": 0.7916856780735108, + "grad_norm": 1.2247815132141113, + "learning_rate": 3.5622995086340466e-06, + "loss": 0.24745559692382812, + "step": 5856 + }, + { + "epoch": 0.7918208702999577, + "grad_norm": 1.2648489475250244, + "learning_rate": 3.5578725427648233e-06, + "loss": 0.1477351188659668, + "step": 5857 + }, + { + "epoch": 0.7919560625264047, + "grad_norm": 0.8537172675132751, + "learning_rate": 3.553447959207553e-06, + "loss": 0.14914178848266602, + "step": 5858 + }, + { + "epoch": 0.7920912547528517, + "grad_norm": 1.338640570640564, + "learning_rate": 3.5490257588834552e-06, + "loss": 0.15317249298095703, + "step": 5859 + }, + { + "epoch": 0.7922264469792987, + "grad_norm": 0.8530309200286865, + "learning_rate": 3.5446059427132615e-06, + "loss": 0.15430736541748047, + "step": 5860 + }, + { + "epoch": 0.7923616392057456, + "grad_norm": 2.4116532802581787, + "learning_rate": 3.5401885116171977e-06, + "loss": 0.19091796875, + "step": 5861 + }, + { + "epoch": 0.7924968314321926, + "grad_norm": 1.244110345840454, + "learning_rate": 3.5357734665149983e-06, + "loss": 0.1914815902709961, + "step": 5862 + }, + { + "epoch": 0.7926320236586396, + "grad_norm": 0.9299548864364624, + "learning_rate": 3.5313608083258975e-06, + "loss": 0.15876483917236328, + "step": 5863 + }, + { + "epoch": 0.7927672158850866, + "grad_norm": 1.0739024877548218, + "learning_rate": 3.526950537968629e-06, + "loss": 0.1807384490966797, + "step": 5864 + }, + { + "epoch": 0.7929024081115336, + "grad_norm": 1.2986563444137573, + "learning_rate": 3.5225426563614466e-06, + "loss": 0.15717315673828125, + "step": 5865 + }, + { + "epoch": 0.7930376003379805, + "grad_norm": 0.9040958881378174, + "learning_rate": 3.518137164422088e-06, + "loss": 0.13258934020996094, + "step": 5866 + }, + { + "epoch": 0.7931727925644275, + "grad_norm": 1.8466547727584839, + "learning_rate": 3.513734063067799e-06, + "loss": 0.1913738250732422, + "step": 5867 + }, + { + "epoch": 0.7933079847908745, + "grad_norm": 0.6558601260185242, + "learning_rate": 3.5093333532153316e-06, + "loss": 0.12159445881843567, + "step": 5868 + }, + { + "epoch": 0.7934431770173215, + "grad_norm": 0.8004727959632874, + "learning_rate": 3.504935035780931e-06, + "loss": 0.15655040740966797, + "step": 5869 + }, + { + "epoch": 0.7935783692437685, + "grad_norm": 1.3584675788879395, + "learning_rate": 3.500539111680364e-06, + "loss": 0.16486310958862305, + "step": 5870 + }, + { + "epoch": 0.7937135614702154, + "grad_norm": 1.7554868459701538, + "learning_rate": 3.4961455818288683e-06, + "loss": 0.207000732421875, + "step": 5871 + }, + { + "epoch": 0.7938487536966624, + "grad_norm": 0.8711773157119751, + "learning_rate": 3.491754447141212e-06, + "loss": 0.1572742462158203, + "step": 5872 + }, + { + "epoch": 0.7939839459231094, + "grad_norm": 1.2152760028839111, + "learning_rate": 3.4873657085316504e-06, + "loss": 0.17601966857910156, + "step": 5873 + }, + { + "epoch": 0.7941191381495564, + "grad_norm": 0.8813444375991821, + "learning_rate": 3.482979366913935e-06, + "loss": 0.17440176010131836, + "step": 5874 + }, + { + "epoch": 0.7942543303760033, + "grad_norm": 0.8859923481941223, + "learning_rate": 3.4785954232013423e-06, + "loss": 0.1443626880645752, + "step": 5875 + }, + { + "epoch": 0.7943895226024503, + "grad_norm": 2.0067145824432373, + "learning_rate": 3.4742138783066122e-06, + "loss": 0.21198463439941406, + "step": 5876 + }, + { + "epoch": 0.7945247148288973, + "grad_norm": 1.0756579637527466, + "learning_rate": 3.4698347331420206e-06, + "loss": 0.20216941833496094, + "step": 5877 + }, + { + "epoch": 0.7946599070553443, + "grad_norm": 0.9025580286979675, + "learning_rate": 3.4654579886193223e-06, + "loss": 0.1793837547302246, + "step": 5878 + }, + { + "epoch": 0.7947950992817913, + "grad_norm": 1.5246014595031738, + "learning_rate": 3.461083645649782e-06, + "loss": 0.17731475830078125, + "step": 5879 + }, + { + "epoch": 0.7949302915082382, + "grad_norm": 1.0727031230926514, + "learning_rate": 3.4567117051441594e-06, + "loss": 0.1420574188232422, + "step": 5880 + }, + { + "epoch": 0.7950654837346852, + "grad_norm": 0.850818932056427, + "learning_rate": 3.4523421680127115e-06, + "loss": 0.15140771865844727, + "step": 5881 + }, + { + "epoch": 0.7952006759611322, + "grad_norm": 1.2207752466201782, + "learning_rate": 3.447975035165209e-06, + "loss": 0.16934967041015625, + "step": 5882 + }, + { + "epoch": 0.7953358681875792, + "grad_norm": 1.8987658023834229, + "learning_rate": 3.4436103075109076e-06, + "loss": 0.19762897491455078, + "step": 5883 + }, + { + "epoch": 0.7954710604140262, + "grad_norm": 1.1975834369659424, + "learning_rate": 3.4392479859585642e-06, + "loss": 0.16284751892089844, + "step": 5884 + }, + { + "epoch": 0.7956062526404731, + "grad_norm": 1.592936396598816, + "learning_rate": 3.4348880714164416e-06, + "loss": 0.19690322875976562, + "step": 5885 + }, + { + "epoch": 0.7957414448669201, + "grad_norm": 2.1984376907348633, + "learning_rate": 3.430530564792289e-06, + "loss": 0.18921947479248047, + "step": 5886 + }, + { + "epoch": 0.7958766370933671, + "grad_norm": 1.0649752616882324, + "learning_rate": 3.426175466993374e-06, + "loss": 0.17252230644226074, + "step": 5887 + }, + { + "epoch": 0.7960118293198141, + "grad_norm": 1.1380372047424316, + "learning_rate": 3.4218227789264468e-06, + "loss": 0.13836383819580078, + "step": 5888 + }, + { + "epoch": 0.796147021546261, + "grad_norm": 0.7978634834289551, + "learning_rate": 3.417472501497758e-06, + "loss": 0.14134740829467773, + "step": 5889 + }, + { + "epoch": 0.796282213772708, + "grad_norm": 2.3068878650665283, + "learning_rate": 3.413124635613061e-06, + "loss": 0.21338367462158203, + "step": 5890 + }, + { + "epoch": 0.796417405999155, + "grad_norm": 1.2715401649475098, + "learning_rate": 3.4087791821775986e-06, + "loss": 0.18868911266326904, + "step": 5891 + }, + { + "epoch": 0.796552598225602, + "grad_norm": 0.8652893304824829, + "learning_rate": 3.4044361420961285e-06, + "loss": 0.14075469970703125, + "step": 5892 + }, + { + "epoch": 0.796687790452049, + "grad_norm": 1.0169157981872559, + "learning_rate": 3.4000955162728866e-06, + "loss": 0.13190746307373047, + "step": 5893 + }, + { + "epoch": 0.7968229826784959, + "grad_norm": 1.3094630241394043, + "learning_rate": 3.3957573056116164e-06, + "loss": 0.1589794158935547, + "step": 5894 + }, + { + "epoch": 0.7969581749049429, + "grad_norm": 0.6766445636749268, + "learning_rate": 3.391421511015558e-06, + "loss": 0.11551189422607422, + "step": 5895 + }, + { + "epoch": 0.7970933671313899, + "grad_norm": 0.6453374028205872, + "learning_rate": 3.38708813338744e-06, + "loss": 0.12232255935668945, + "step": 5896 + }, + { + "epoch": 0.7972285593578369, + "grad_norm": 1.168291449546814, + "learning_rate": 3.382757173629506e-06, + "loss": 0.1675577163696289, + "step": 5897 + }, + { + "epoch": 0.7973637515842839, + "grad_norm": 1.5562000274658203, + "learning_rate": 3.378428632643478e-06, + "loss": 0.19261741638183594, + "step": 5898 + }, + { + "epoch": 0.7974989438107308, + "grad_norm": 0.8348447680473328, + "learning_rate": 3.3741025113305825e-06, + "loss": 0.19612598419189453, + "step": 5899 + }, + { + "epoch": 0.7976341360371778, + "grad_norm": 0.890941321849823, + "learning_rate": 3.369778810591541e-06, + "loss": 0.1629781723022461, + "step": 5900 + }, + { + "epoch": 0.7977693282636248, + "grad_norm": 0.9431806802749634, + "learning_rate": 3.3654575313265664e-06, + "loss": 0.17294979095458984, + "step": 5901 + }, + { + "epoch": 0.7979045204900718, + "grad_norm": 0.7037155032157898, + "learning_rate": 3.361138674435386e-06, + "loss": 0.1250828504562378, + "step": 5902 + }, + { + "epoch": 0.7980397127165187, + "grad_norm": 1.0607848167419434, + "learning_rate": 3.35682224081719e-06, + "loss": 0.18647003173828125, + "step": 5903 + }, + { + "epoch": 0.7981749049429658, + "grad_norm": 0.9927728772163391, + "learning_rate": 3.352508231370699e-06, + "loss": 0.1597309112548828, + "step": 5904 + }, + { + "epoch": 0.7983100971694128, + "grad_norm": 0.7160606384277344, + "learning_rate": 3.3481966469941044e-06, + "loss": 0.1300565004348755, + "step": 5905 + }, + { + "epoch": 0.7984452893958598, + "grad_norm": 1.1032862663269043, + "learning_rate": 3.3438874885850984e-06, + "loss": 0.21190643310546875, + "step": 5906 + }, + { + "epoch": 0.7985804816223068, + "grad_norm": 1.1787217855453491, + "learning_rate": 3.3395807570408847e-06, + "loss": 0.18780803680419922, + "step": 5907 + }, + { + "epoch": 0.7987156738487537, + "grad_norm": 1.0600337982177734, + "learning_rate": 3.33527645325813e-06, + "loss": 0.18407821655273438, + "step": 5908 + }, + { + "epoch": 0.7988508660752007, + "grad_norm": 2.2151525020599365, + "learning_rate": 3.3309745781330247e-06, + "loss": 0.1832275390625, + "step": 5909 + }, + { + "epoch": 0.7989860583016477, + "grad_norm": 1.0850056409835815, + "learning_rate": 3.32667513256124e-06, + "loss": 0.18148040771484375, + "step": 5910 + }, + { + "epoch": 0.7991212505280947, + "grad_norm": 0.7696892023086548, + "learning_rate": 3.3223781174379375e-06, + "loss": 0.1491374969482422, + "step": 5911 + }, + { + "epoch": 0.7992564427545417, + "grad_norm": 1.0843309164047241, + "learning_rate": 3.3180835336577917e-06, + "loss": 0.1965789794921875, + "step": 5912 + }, + { + "epoch": 0.7993916349809886, + "grad_norm": 1.2669497728347778, + "learning_rate": 3.313791382114943e-06, + "loss": 0.2366180419921875, + "step": 5913 + }, + { + "epoch": 0.7995268272074356, + "grad_norm": 1.4543774127960205, + "learning_rate": 3.3095016637030505e-06, + "loss": 0.22435855865478516, + "step": 5914 + }, + { + "epoch": 0.7996620194338826, + "grad_norm": 1.7445255517959595, + "learning_rate": 3.3052143793152524e-06, + "loss": 0.1688370704650879, + "step": 5915 + }, + { + "epoch": 0.7997972116603296, + "grad_norm": 0.7679470181465149, + "learning_rate": 3.3009295298441855e-06, + "loss": 0.14403915405273438, + "step": 5916 + }, + { + "epoch": 0.7999324038867766, + "grad_norm": 0.9925470948219299, + "learning_rate": 3.2966471161819767e-06, + "loss": 0.1600818634033203, + "step": 5917 + }, + { + "epoch": 0.8000675961132235, + "grad_norm": 1.1217235326766968, + "learning_rate": 3.292367139220246e-06, + "loss": 0.13979387283325195, + "step": 5918 + }, + { + "epoch": 0.8002027883396705, + "grad_norm": 0.6252045631408691, + "learning_rate": 3.288089599850112e-06, + "loss": 0.10034584999084473, + "step": 5919 + }, + { + "epoch": 0.8003379805661175, + "grad_norm": 1.0051994323730469, + "learning_rate": 3.2838144989621795e-06, + "loss": 0.1896648406982422, + "step": 5920 + }, + { + "epoch": 0.8004731727925645, + "grad_norm": 2.191072463989258, + "learning_rate": 3.2795418374465458e-06, + "loss": 0.1831340789794922, + "step": 5921 + }, + { + "epoch": 0.8006083650190114, + "grad_norm": 0.9865363240242004, + "learning_rate": 3.275271616192803e-06, + "loss": 0.17546606063842773, + "step": 5922 + }, + { + "epoch": 0.8007435572454584, + "grad_norm": 1.162110686302185, + "learning_rate": 3.2710038360900303e-06, + "loss": 0.2068004608154297, + "step": 5923 + }, + { + "epoch": 0.8008787494719054, + "grad_norm": 1.0392760038375854, + "learning_rate": 3.266738498026808e-06, + "loss": 0.20862388610839844, + "step": 5924 + }, + { + "epoch": 0.8010139416983524, + "grad_norm": 0.8232437372207642, + "learning_rate": 3.2624756028912005e-06, + "loss": 0.1715259552001953, + "step": 5925 + }, + { + "epoch": 0.8011491339247994, + "grad_norm": 1.089142918586731, + "learning_rate": 3.2582151515707655e-06, + "loss": 0.18799686431884766, + "step": 5926 + }, + { + "epoch": 0.8012843261512463, + "grad_norm": 1.280590295791626, + "learning_rate": 3.253957144952551e-06, + "loss": 0.19170284271240234, + "step": 5927 + }, + { + "epoch": 0.8014195183776933, + "grad_norm": 1.770272135734558, + "learning_rate": 3.249701583923091e-06, + "loss": 0.2247486114501953, + "step": 5928 + }, + { + "epoch": 0.8015547106041403, + "grad_norm": 1.223771572113037, + "learning_rate": 3.2454484693684257e-06, + "loss": 0.18434619903564453, + "step": 5929 + }, + { + "epoch": 0.8016899028305873, + "grad_norm": 0.7650221586227417, + "learning_rate": 3.2411978021740727e-06, + "loss": 0.15496540069580078, + "step": 5930 + }, + { + "epoch": 0.8018250950570343, + "grad_norm": 0.9063459038734436, + "learning_rate": 3.2369495832250434e-06, + "loss": 0.16783761978149414, + "step": 5931 + }, + { + "epoch": 0.8019602872834812, + "grad_norm": 0.6971040368080139, + "learning_rate": 3.2327038134058378e-06, + "loss": 0.1528301239013672, + "step": 5932 + }, + { + "epoch": 0.8020954795099282, + "grad_norm": 1.5162559747695923, + "learning_rate": 3.228460493600446e-06, + "loss": 0.2302417755126953, + "step": 5933 + }, + { + "epoch": 0.8022306717363752, + "grad_norm": 1.0452804565429688, + "learning_rate": 3.2242196246923554e-06, + "loss": 0.16797232627868652, + "step": 5934 + }, + { + "epoch": 0.8023658639628222, + "grad_norm": 1.0449769496917725, + "learning_rate": 3.2199812075645375e-06, + "loss": 0.2094583511352539, + "step": 5935 + }, + { + "epoch": 0.8025010561892691, + "grad_norm": 1.8267536163330078, + "learning_rate": 3.2157452430994487e-06, + "loss": 0.15792083740234375, + "step": 5936 + }, + { + "epoch": 0.8026362484157161, + "grad_norm": 1.158838152885437, + "learning_rate": 3.2115117321790427e-06, + "loss": 0.1681222915649414, + "step": 5937 + }, + { + "epoch": 0.8027714406421631, + "grad_norm": 0.9939912557601929, + "learning_rate": 3.207280675684754e-06, + "loss": 0.17552566528320312, + "step": 5938 + }, + { + "epoch": 0.8029066328686101, + "grad_norm": 1.7586792707443237, + "learning_rate": 3.203052074497523e-06, + "loss": 0.15172886848449707, + "step": 5939 + }, + { + "epoch": 0.8030418250950571, + "grad_norm": 2.5150694847106934, + "learning_rate": 3.198825929497752e-06, + "loss": 0.17313742637634277, + "step": 5940 + }, + { + "epoch": 0.803177017321504, + "grad_norm": 0.8184636831283569, + "learning_rate": 3.194602241565357e-06, + "loss": 0.15375947952270508, + "step": 5941 + }, + { + "epoch": 0.803312209547951, + "grad_norm": 0.9839374423027039, + "learning_rate": 3.1903810115797282e-06, + "loss": 0.1146245002746582, + "step": 5942 + }, + { + "epoch": 0.803447401774398, + "grad_norm": 1.1804472208023071, + "learning_rate": 3.1861622404197475e-06, + "loss": 0.15810644626617432, + "step": 5943 + }, + { + "epoch": 0.803582594000845, + "grad_norm": 0.9766131639480591, + "learning_rate": 3.181945928963794e-06, + "loss": 0.14694786071777344, + "step": 5944 + }, + { + "epoch": 0.803717786227292, + "grad_norm": 1.7506136894226074, + "learning_rate": 3.1777320780897124e-06, + "loss": 0.15791082382202148, + "step": 5945 + }, + { + "epoch": 0.8038529784537389, + "grad_norm": 0.8934969305992126, + "learning_rate": 3.1735206886748602e-06, + "loss": 0.16957378387451172, + "step": 5946 + }, + { + "epoch": 0.8039881706801859, + "grad_norm": 1.1023931503295898, + "learning_rate": 3.1693117615960665e-06, + "loss": 0.19230270385742188, + "step": 5947 + }, + { + "epoch": 0.8041233629066329, + "grad_norm": 1.4080901145935059, + "learning_rate": 3.1651052977296537e-06, + "loss": 0.20988082885742188, + "step": 5948 + }, + { + "epoch": 0.8042585551330799, + "grad_norm": 0.9943472146987915, + "learning_rate": 3.1609012979514273e-06, + "loss": 0.17920851707458496, + "step": 5949 + }, + { + "epoch": 0.8043937473595268, + "grad_norm": 1.4543040990829468, + "learning_rate": 3.156699763136683e-06, + "loss": 0.18654441833496094, + "step": 5950 + }, + { + "epoch": 0.8045289395859738, + "grad_norm": 1.0371907949447632, + "learning_rate": 3.152500694160207e-06, + "loss": 0.1560354232788086, + "step": 5951 + }, + { + "epoch": 0.8046641318124208, + "grad_norm": 2.1326818466186523, + "learning_rate": 3.148304091896265e-06, + "loss": 0.1775798797607422, + "step": 5952 + }, + { + "epoch": 0.8047993240388678, + "grad_norm": 2.1662580966949463, + "learning_rate": 3.144109957218612e-06, + "loss": 0.16460657119750977, + "step": 5953 + }, + { + "epoch": 0.8049345162653148, + "grad_norm": 1.733975887298584, + "learning_rate": 3.1399182910004893e-06, + "loss": 0.18264341354370117, + "step": 5954 + }, + { + "epoch": 0.8050697084917617, + "grad_norm": 0.7742398381233215, + "learning_rate": 3.1357290941146215e-06, + "loss": 0.13188135623931885, + "step": 5955 + }, + { + "epoch": 0.8052049007182087, + "grad_norm": 1.1824049949645996, + "learning_rate": 3.1315423674332265e-06, + "loss": 0.17107105255126953, + "step": 5956 + }, + { + "epoch": 0.8053400929446557, + "grad_norm": 0.8766518831253052, + "learning_rate": 3.127358111828002e-06, + "loss": 0.17285728454589844, + "step": 5957 + }, + { + "epoch": 0.8054752851711027, + "grad_norm": 0.6217026710510254, + "learning_rate": 3.123176328170131e-06, + "loss": 0.1420135498046875, + "step": 5958 + }, + { + "epoch": 0.8056104773975497, + "grad_norm": 1.439857006072998, + "learning_rate": 3.1189970173302816e-06, + "loss": 0.22020769119262695, + "step": 5959 + }, + { + "epoch": 0.8057456696239966, + "grad_norm": 1.166935682296753, + "learning_rate": 3.1148201801786085e-06, + "loss": 0.18059158325195312, + "step": 5960 + }, + { + "epoch": 0.8058808618504436, + "grad_norm": 1.0186827182769775, + "learning_rate": 3.1106458175847572e-06, + "loss": 0.13172435760498047, + "step": 5961 + }, + { + "epoch": 0.8060160540768906, + "grad_norm": 0.811034619808197, + "learning_rate": 3.106473930417848e-06, + "loss": 0.10042047500610352, + "step": 5962 + }, + { + "epoch": 0.8061512463033376, + "grad_norm": 1.2436472177505493, + "learning_rate": 3.1023045195464903e-06, + "loss": 0.1500415802001953, + "step": 5963 + }, + { + "epoch": 0.8062864385297845, + "grad_norm": 0.7157169580459595, + "learning_rate": 3.098137585838779e-06, + "loss": 0.17971539497375488, + "step": 5964 + }, + { + "epoch": 0.8064216307562315, + "grad_norm": 1.1864300966262817, + "learning_rate": 3.093973130162286e-06, + "loss": 0.11483001708984375, + "step": 5965 + }, + { + "epoch": 0.8065568229826785, + "grad_norm": 1.507524013519287, + "learning_rate": 3.089811153384083e-06, + "loss": 0.13365426659584045, + "step": 5966 + }, + { + "epoch": 0.8066920152091255, + "grad_norm": 1.3271453380584717, + "learning_rate": 3.08565165637071e-06, + "loss": 0.21441078186035156, + "step": 5967 + }, + { + "epoch": 0.8068272074355725, + "grad_norm": 1.6617037057876587, + "learning_rate": 3.081494639988196e-06, + "loss": 0.15883731842041016, + "step": 5968 + }, + { + "epoch": 0.8069623996620194, + "grad_norm": 1.2097936868667603, + "learning_rate": 3.077340105102057e-06, + "loss": 0.17082786560058594, + "step": 5969 + }, + { + "epoch": 0.8070975918884664, + "grad_norm": 0.9197073578834534, + "learning_rate": 3.0731880525772817e-06, + "loss": 0.17792129516601562, + "step": 5970 + }, + { + "epoch": 0.8072327841149134, + "grad_norm": 1.4240236282348633, + "learning_rate": 3.069038483278364e-06, + "loss": 0.18358325958251953, + "step": 5971 + }, + { + "epoch": 0.8073679763413604, + "grad_norm": 1.0109297037124634, + "learning_rate": 3.0648913980692505e-06, + "loss": 0.21124935150146484, + "step": 5972 + }, + { + "epoch": 0.8075031685678074, + "grad_norm": 0.9199767112731934, + "learning_rate": 3.0607467978133985e-06, + "loss": 0.13824462890625, + "step": 5973 + }, + { + "epoch": 0.8076383607942543, + "grad_norm": 1.2320756912231445, + "learning_rate": 3.0566046833737294e-06, + "loss": 0.18898355960845947, + "step": 5974 + }, + { + "epoch": 0.8077735530207013, + "grad_norm": 0.8958389163017273, + "learning_rate": 3.0524650556126517e-06, + "loss": 0.15454483032226562, + "step": 5975 + }, + { + "epoch": 0.8079087452471483, + "grad_norm": 0.7117584347724915, + "learning_rate": 3.048327915392069e-06, + "loss": 0.11751937866210938, + "step": 5976 + }, + { + "epoch": 0.8080439374735953, + "grad_norm": 1.5684980154037476, + "learning_rate": 3.044193263573341e-06, + "loss": 0.18653488159179688, + "step": 5977 + }, + { + "epoch": 0.8081791297000422, + "grad_norm": 0.633672833442688, + "learning_rate": 3.0400611010173355e-06, + "loss": 0.12723708152770996, + "step": 5978 + }, + { + "epoch": 0.8083143219264892, + "grad_norm": 1.435807466506958, + "learning_rate": 3.0359314285843863e-06, + "loss": 0.15733051300048828, + "step": 5979 + }, + { + "epoch": 0.8084495141529362, + "grad_norm": 1.5499682426452637, + "learning_rate": 3.0318042471343104e-06, + "loss": 0.18578052520751953, + "step": 5980 + }, + { + "epoch": 0.8085847063793832, + "grad_norm": 0.8469221591949463, + "learning_rate": 3.027679557526422e-06, + "loss": 0.17724227905273438, + "step": 5981 + }, + { + "epoch": 0.8087198986058302, + "grad_norm": 0.9022312760353088, + "learning_rate": 3.0235573606194844e-06, + "loss": 0.1400747299194336, + "step": 5982 + }, + { + "epoch": 0.8088550908322771, + "grad_norm": 1.1368786096572876, + "learning_rate": 3.0194376572717743e-06, + "loss": 0.2078723907470703, + "step": 5983 + }, + { + "epoch": 0.8089902830587241, + "grad_norm": 1.1913106441497803, + "learning_rate": 3.0153204483410318e-06, + "loss": 0.18519115447998047, + "step": 5984 + }, + { + "epoch": 0.8091254752851711, + "grad_norm": 1.5260909795761108, + "learning_rate": 3.0112057346844834e-06, + "loss": 0.19892311096191406, + "step": 5985 + }, + { + "epoch": 0.8092606675116181, + "grad_norm": 1.2321239709854126, + "learning_rate": 3.007093517158832e-06, + "loss": 0.15843772888183594, + "step": 5986 + }, + { + "epoch": 0.809395859738065, + "grad_norm": 0.6808137893676758, + "learning_rate": 3.002983796620261e-06, + "loss": 0.14597702026367188, + "step": 5987 + }, + { + "epoch": 0.809531051964512, + "grad_norm": 1.8292533159255981, + "learning_rate": 2.9988765739244427e-06, + "loss": 0.19481205940246582, + "step": 5988 + }, + { + "epoch": 0.809666244190959, + "grad_norm": 1.192221999168396, + "learning_rate": 2.9947718499265197e-06, + "loss": 0.217620849609375, + "step": 5989 + }, + { + "epoch": 0.809801436417406, + "grad_norm": 0.7766583561897278, + "learning_rate": 2.9906696254811184e-06, + "loss": 0.1564464569091797, + "step": 5990 + }, + { + "epoch": 0.809936628643853, + "grad_norm": 1.8011471033096313, + "learning_rate": 2.9865699014423404e-06, + "loss": 0.1441364288330078, + "step": 5991 + }, + { + "epoch": 0.8100718208702999, + "grad_norm": 1.290523648262024, + "learning_rate": 2.9824726786637698e-06, + "loss": 0.1641385555267334, + "step": 5992 + }, + { + "epoch": 0.8102070130967469, + "grad_norm": 1.0178121328353882, + "learning_rate": 2.978377957998477e-06, + "loss": 0.18931198120117188, + "step": 5993 + }, + { + "epoch": 0.8103422053231939, + "grad_norm": 0.8530979752540588, + "learning_rate": 2.974285740299001e-06, + "loss": 0.14047670364379883, + "step": 5994 + }, + { + "epoch": 0.8104773975496409, + "grad_norm": 1.6549816131591797, + "learning_rate": 2.9701960264173612e-06, + "loss": 0.16742515563964844, + "step": 5995 + }, + { + "epoch": 0.8106125897760879, + "grad_norm": 1.6105870008468628, + "learning_rate": 2.96610881720506e-06, + "loss": 0.1378917694091797, + "step": 5996 + }, + { + "epoch": 0.8107477820025348, + "grad_norm": 1.0834912061691284, + "learning_rate": 2.9620241135130715e-06, + "loss": 0.2146596908569336, + "step": 5997 + }, + { + "epoch": 0.8108829742289818, + "grad_norm": 2.49206805229187, + "learning_rate": 2.9579419161918607e-06, + "loss": 0.20630550384521484, + "step": 5998 + }, + { + "epoch": 0.8110181664554288, + "grad_norm": 1.0012738704681396, + "learning_rate": 2.9538622260913595e-06, + "loss": 0.15961742401123047, + "step": 5999 + }, + { + "epoch": 0.8111533586818758, + "grad_norm": 1.3934578895568848, + "learning_rate": 2.9497850440609814e-06, + "loss": 0.16727781295776367, + "step": 6000 + }, + { + "epoch": 0.8112885509083227, + "grad_norm": 0.9119014739990234, + "learning_rate": 2.945710370949616e-06, + "loss": 0.14727783203125, + "step": 6001 + }, + { + "epoch": 0.8114237431347697, + "grad_norm": 0.6402777433395386, + "learning_rate": 2.941638207605629e-06, + "loss": 0.11113357543945312, + "step": 6002 + }, + { + "epoch": 0.8115589353612167, + "grad_norm": 1.40786874294281, + "learning_rate": 2.937568554876873e-06, + "loss": 0.1712799072265625, + "step": 6003 + }, + { + "epoch": 0.8116941275876637, + "grad_norm": 1.195235252380371, + "learning_rate": 2.9335014136106704e-06, + "loss": 0.16188287734985352, + "step": 6004 + }, + { + "epoch": 0.8118293198141107, + "grad_norm": 1.154290795326233, + "learning_rate": 2.929436784653818e-06, + "loss": 0.16428565979003906, + "step": 6005 + }, + { + "epoch": 0.8119645120405576, + "grad_norm": 1.3054907321929932, + "learning_rate": 2.925374668852597e-06, + "loss": 0.19732093811035156, + "step": 6006 + }, + { + "epoch": 0.8120997042670046, + "grad_norm": 0.768100917339325, + "learning_rate": 2.921315067052754e-06, + "loss": 0.14121365547180176, + "step": 6007 + }, + { + "epoch": 0.8122348964934516, + "grad_norm": 0.9616193771362305, + "learning_rate": 2.917257980099535e-06, + "loss": 0.16252517700195312, + "step": 6008 + }, + { + "epoch": 0.8123700887198986, + "grad_norm": 0.6918960809707642, + "learning_rate": 2.913203408837629e-06, + "loss": 0.16309165954589844, + "step": 6009 + }, + { + "epoch": 0.8125052809463456, + "grad_norm": 0.8309774994850159, + "learning_rate": 2.909151354111232e-06, + "loss": 0.1505732536315918, + "step": 6010 + }, + { + "epoch": 0.8126404731727925, + "grad_norm": 1.5441323518753052, + "learning_rate": 2.905101816763998e-06, + "loss": 0.1932506561279297, + "step": 6011 + }, + { + "epoch": 0.8127756653992395, + "grad_norm": 1.2129780054092407, + "learning_rate": 2.9010547976390617e-06, + "loss": 0.1472775936126709, + "step": 6012 + }, + { + "epoch": 0.8129108576256865, + "grad_norm": 1.1292622089385986, + "learning_rate": 2.897010297579042e-06, + "loss": 0.20280075073242188, + "step": 6013 + }, + { + "epoch": 0.8130460498521335, + "grad_norm": 0.9956125617027283, + "learning_rate": 2.8929683174260133e-06, + "loss": 0.1752185821533203, + "step": 6014 + }, + { + "epoch": 0.8131812420785804, + "grad_norm": 1.07522714138031, + "learning_rate": 2.8889288580215467e-06, + "loss": 0.10916900634765625, + "step": 6015 + }, + { + "epoch": 0.8133164343050274, + "grad_norm": 1.2742761373519897, + "learning_rate": 2.8848919202066752e-06, + "loss": 0.182281494140625, + "step": 6016 + }, + { + "epoch": 0.8134516265314744, + "grad_norm": 1.8565168380737305, + "learning_rate": 2.8808575048219123e-06, + "loss": 0.21141576766967773, + "step": 6017 + }, + { + "epoch": 0.8135868187579214, + "grad_norm": 0.964576780796051, + "learning_rate": 2.8768256127072436e-06, + "loss": 0.12996721267700195, + "step": 6018 + }, + { + "epoch": 0.8137220109843684, + "grad_norm": 1.0616918802261353, + "learning_rate": 2.872796244702128e-06, + "loss": 0.14802956581115723, + "step": 6019 + }, + { + "epoch": 0.8138572032108153, + "grad_norm": 0.9760144352912903, + "learning_rate": 2.8687694016455075e-06, + "loss": 0.19278335571289062, + "step": 6020 + }, + { + "epoch": 0.8139923954372623, + "grad_norm": 0.7736731767654419, + "learning_rate": 2.86474508437579e-06, + "loss": 0.17914915084838867, + "step": 6021 + }, + { + "epoch": 0.8141275876637093, + "grad_norm": 1.0802083015441895, + "learning_rate": 2.8607232937308587e-06, + "loss": 0.11943113803863525, + "step": 6022 + }, + { + "epoch": 0.8142627798901563, + "grad_norm": 1.279369831085205, + "learning_rate": 2.856704030548072e-06, + "loss": 0.1644296646118164, + "step": 6023 + }, + { + "epoch": 0.8143979721166033, + "grad_norm": 0.8400365114212036, + "learning_rate": 2.8526872956642568e-06, + "loss": 0.12800359725952148, + "step": 6024 + }, + { + "epoch": 0.8145331643430502, + "grad_norm": 0.911558985710144, + "learning_rate": 2.84867308991573e-06, + "loss": 0.14480876922607422, + "step": 6025 + }, + { + "epoch": 0.8146683565694972, + "grad_norm": 1.1778076887130737, + "learning_rate": 2.8446614141382638e-06, + "loss": 0.191680908203125, + "step": 6026 + }, + { + "epoch": 0.8148035487959442, + "grad_norm": 1.4221285581588745, + "learning_rate": 2.8406522691671104e-06, + "loss": 0.1989307403564453, + "step": 6027 + }, + { + "epoch": 0.8149387410223912, + "grad_norm": 0.9350261092185974, + "learning_rate": 2.8366456558369975e-06, + "loss": 0.1621246337890625, + "step": 6028 + }, + { + "epoch": 0.8150739332488381, + "grad_norm": 0.7575251460075378, + "learning_rate": 2.8326415749821186e-06, + "loss": 0.14951801300048828, + "step": 6029 + }, + { + "epoch": 0.8152091254752851, + "grad_norm": 1.2740938663482666, + "learning_rate": 2.828640027436151e-06, + "loss": 0.18076324462890625, + "step": 6030 + }, + { + "epoch": 0.8153443177017321, + "grad_norm": 1.2639656066894531, + "learning_rate": 2.824641014032235e-06, + "loss": 0.16839075088500977, + "step": 6031 + }, + { + "epoch": 0.8154795099281791, + "grad_norm": 1.9373390674591064, + "learning_rate": 2.820644535602987e-06, + "loss": 0.20013999938964844, + "step": 6032 + }, + { + "epoch": 0.8156147021546261, + "grad_norm": 1.3487224578857422, + "learning_rate": 2.8166505929804953e-06, + "loss": 0.22852134704589844, + "step": 6033 + }, + { + "epoch": 0.815749894381073, + "grad_norm": 0.8397018909454346, + "learning_rate": 2.8126591869963163e-06, + "loss": 0.12119770050048828, + "step": 6034 + }, + { + "epoch": 0.81588508660752, + "grad_norm": 0.9708743095397949, + "learning_rate": 2.8086703184814887e-06, + "loss": 0.17267417907714844, + "step": 6035 + }, + { + "epoch": 0.816020278833967, + "grad_norm": 0.8887884616851807, + "learning_rate": 2.8046839882665134e-06, + "loss": 0.19072723388671875, + "step": 6036 + }, + { + "epoch": 0.816155471060414, + "grad_norm": 1.8707904815673828, + "learning_rate": 2.800700197181364e-06, + "loss": 0.231536865234375, + "step": 6037 + }, + { + "epoch": 0.816290663286861, + "grad_norm": 1.3133318424224854, + "learning_rate": 2.7967189460554876e-06, + "loss": 0.13998031616210938, + "step": 6038 + }, + { + "epoch": 0.8164258555133079, + "grad_norm": 1.241249680519104, + "learning_rate": 2.792740235717801e-06, + "loss": 0.17536038160324097, + "step": 6039 + }, + { + "epoch": 0.816561047739755, + "grad_norm": 1.308943748474121, + "learning_rate": 2.7887640669967e-06, + "loss": 0.1636800765991211, + "step": 6040 + }, + { + "epoch": 0.816696239966202, + "grad_norm": 1.4974147081375122, + "learning_rate": 2.7847904407200327e-06, + "loss": 0.17994928359985352, + "step": 6041 + }, + { + "epoch": 0.816831432192649, + "grad_norm": 1.1783897876739502, + "learning_rate": 2.7808193577151363e-06, + "loss": 0.16935157775878906, + "step": 6042 + }, + { + "epoch": 0.816966624419096, + "grad_norm": 1.6643469333648682, + "learning_rate": 2.776850818808812e-06, + "loss": 0.21500921249389648, + "step": 6043 + }, + { + "epoch": 0.8171018166455429, + "grad_norm": 0.9193552732467651, + "learning_rate": 2.772884824827325e-06, + "loss": 0.1640491485595703, + "step": 6044 + }, + { + "epoch": 0.8172370088719899, + "grad_norm": 1.05073082447052, + "learning_rate": 2.768921376596429e-06, + "loss": 0.17467784881591797, + "step": 6045 + }, + { + "epoch": 0.8173722010984369, + "grad_norm": 3.345153570175171, + "learning_rate": 2.7649604749413176e-06, + "loss": 0.22596174478530884, + "step": 6046 + }, + { + "epoch": 0.8175073933248839, + "grad_norm": 1.515853762626648, + "learning_rate": 2.7610021206866837e-06, + "loss": 0.21549415588378906, + "step": 6047 + }, + { + "epoch": 0.8176425855513308, + "grad_norm": 2.041374444961548, + "learning_rate": 2.757046314656676e-06, + "loss": 0.21407747268676758, + "step": 6048 + }, + { + "epoch": 0.8177777777777778, + "grad_norm": 1.1523512601852417, + "learning_rate": 2.753093057674909e-06, + "loss": 0.2127547264099121, + "step": 6049 + }, + { + "epoch": 0.8179129700042248, + "grad_norm": 1.790549397468567, + "learning_rate": 2.749142350564483e-06, + "loss": 0.18041372299194336, + "step": 6050 + }, + { + "epoch": 0.8180481622306718, + "grad_norm": 0.684222936630249, + "learning_rate": 2.7451941941479414e-06, + "loss": 0.13886356353759766, + "step": 6051 + }, + { + "epoch": 0.8181833544571188, + "grad_norm": 2.2031030654907227, + "learning_rate": 2.741248589247323e-06, + "loss": 0.1802968978881836, + "step": 6052 + }, + { + "epoch": 0.8183185466835657, + "grad_norm": 0.9762936234474182, + "learning_rate": 2.73730553668412e-06, + "loss": 0.1410665512084961, + "step": 6053 + }, + { + "epoch": 0.8184537389100127, + "grad_norm": 1.05668306350708, + "learning_rate": 2.7333650372792978e-06, + "loss": 0.1737685203552246, + "step": 6054 + }, + { + "epoch": 0.8185889311364597, + "grad_norm": 1.5536688566207886, + "learning_rate": 2.7294270918532876e-06, + "loss": 0.13031005859375, + "step": 6055 + }, + { + "epoch": 0.8187241233629067, + "grad_norm": 0.892826497554779, + "learning_rate": 2.7254917012259882e-06, + "loss": 0.12874698638916016, + "step": 6056 + }, + { + "epoch": 0.8188593155893537, + "grad_norm": 0.9968985915184021, + "learning_rate": 2.721558866216776e-06, + "loss": 0.17169427871704102, + "step": 6057 + }, + { + "epoch": 0.8189945078158006, + "grad_norm": 1.5622568130493164, + "learning_rate": 2.7176285876444846e-06, + "loss": 0.15861892700195312, + "step": 6058 + }, + { + "epoch": 0.8191297000422476, + "grad_norm": 0.9793596863746643, + "learning_rate": 2.713700866327417e-06, + "loss": 0.1743389368057251, + "step": 6059 + }, + { + "epoch": 0.8192648922686946, + "grad_norm": 1.0789310932159424, + "learning_rate": 2.7097757030833497e-06, + "loss": 0.14250314235687256, + "step": 6060 + }, + { + "epoch": 0.8194000844951416, + "grad_norm": 1.404322624206543, + "learning_rate": 2.705853098729517e-06, + "loss": 0.16754817962646484, + "step": 6061 + }, + { + "epoch": 0.8195352767215885, + "grad_norm": 0.8507217764854431, + "learning_rate": 2.7019330540826325e-06, + "loss": 0.15524959564208984, + "step": 6062 + }, + { + "epoch": 0.8196704689480355, + "grad_norm": 1.3565703630447388, + "learning_rate": 2.6980155699588666e-06, + "loss": 0.19153404235839844, + "step": 6063 + }, + { + "epoch": 0.8198056611744825, + "grad_norm": 1.4479483366012573, + "learning_rate": 2.6941006471738633e-06, + "loss": 0.1690673828125, + "step": 6064 + }, + { + "epoch": 0.8199408534009295, + "grad_norm": 1.060706377029419, + "learning_rate": 2.690188286542726e-06, + "loss": 0.14524078369140625, + "step": 6065 + }, + { + "epoch": 0.8200760456273765, + "grad_norm": 0.795319676399231, + "learning_rate": 2.686278488880029e-06, + "loss": 0.12415313720703125, + "step": 6066 + }, + { + "epoch": 0.8202112378538234, + "grad_norm": 0.9854193329811096, + "learning_rate": 2.6823712549998187e-06, + "loss": 0.1701974868774414, + "step": 6067 + }, + { + "epoch": 0.8203464300802704, + "grad_norm": 1.7769702672958374, + "learning_rate": 2.678466585715599e-06, + "loss": 0.18006324768066406, + "step": 6068 + }, + { + "epoch": 0.8204816223067174, + "grad_norm": 1.349875569343567, + "learning_rate": 2.6745644818403426e-06, + "loss": 0.2193450927734375, + "step": 6069 + }, + { + "epoch": 0.8206168145331644, + "grad_norm": 1.6767956018447876, + "learning_rate": 2.6706649441864883e-06, + "loss": 0.17186546325683594, + "step": 6070 + }, + { + "epoch": 0.8207520067596114, + "grad_norm": 0.9098408222198486, + "learning_rate": 2.666767973565937e-06, + "loss": 0.15672779083251953, + "step": 6071 + }, + { + "epoch": 0.8208871989860583, + "grad_norm": 0.880479097366333, + "learning_rate": 2.6628735707900653e-06, + "loss": 0.16680526733398438, + "step": 6072 + }, + { + "epoch": 0.8210223912125053, + "grad_norm": 1.240790843963623, + "learning_rate": 2.658981736669707e-06, + "loss": 0.25571441650390625, + "step": 6073 + }, + { + "epoch": 0.8211575834389523, + "grad_norm": 1.0422959327697754, + "learning_rate": 2.655092472015161e-06, + "loss": 0.15816307067871094, + "step": 6074 + }, + { + "epoch": 0.8212927756653993, + "grad_norm": 1.1392889022827148, + "learning_rate": 2.6512057776361935e-06, + "loss": 0.20930880308151245, + "step": 6075 + }, + { + "epoch": 0.8214279678918462, + "grad_norm": 1.7220994234085083, + "learning_rate": 2.64732165434203e-06, + "loss": 0.18173789978027344, + "step": 6076 + }, + { + "epoch": 0.8215631601182932, + "grad_norm": 1.9753469228744507, + "learning_rate": 2.6434401029413792e-06, + "loss": 0.1540207862854004, + "step": 6077 + }, + { + "epoch": 0.8216983523447402, + "grad_norm": 1.1543912887573242, + "learning_rate": 2.639561124242385e-06, + "loss": 0.20812129974365234, + "step": 6078 + }, + { + "epoch": 0.8218335445711872, + "grad_norm": 0.6986100077629089, + "learning_rate": 2.635684719052682e-06, + "loss": 0.14097929000854492, + "step": 6079 + }, + { + "epoch": 0.8219687367976342, + "grad_norm": 1.185673713684082, + "learning_rate": 2.631810888179355e-06, + "loss": 0.1335620880126953, + "step": 6080 + }, + { + "epoch": 0.8221039290240811, + "grad_norm": 0.9642646908760071, + "learning_rate": 2.627939632428952e-06, + "loss": 0.13232135772705078, + "step": 6081 + }, + { + "epoch": 0.8222391212505281, + "grad_norm": 1.2406675815582275, + "learning_rate": 2.624070952607502e-06, + "loss": 0.13703632354736328, + "step": 6082 + }, + { + "epoch": 0.8223743134769751, + "grad_norm": 1.6374294757843018, + "learning_rate": 2.620204849520468e-06, + "loss": 0.18126296997070312, + "step": 6083 + }, + { + "epoch": 0.8225095057034221, + "grad_norm": 1.0054970979690552, + "learning_rate": 2.616341323972806e-06, + "loss": 0.1741647720336914, + "step": 6084 + }, + { + "epoch": 0.822644697929869, + "grad_norm": 1.076847791671753, + "learning_rate": 2.612480376768917e-06, + "loss": 0.1520226001739502, + "step": 6085 + }, + { + "epoch": 0.822779890156316, + "grad_norm": 1.0962084531784058, + "learning_rate": 2.608622008712672e-06, + "loss": 0.11036491394042969, + "step": 6086 + }, + { + "epoch": 0.822915082382763, + "grad_norm": 2.8500871658325195, + "learning_rate": 2.6047662206074034e-06, + "loss": 0.20782816410064697, + "step": 6087 + }, + { + "epoch": 0.82305027460921, + "grad_norm": 1.0612248182296753, + "learning_rate": 2.600913013255904e-06, + "loss": 0.17698431015014648, + "step": 6088 + }, + { + "epoch": 0.823185466835657, + "grad_norm": 1.733834981918335, + "learning_rate": 2.59706238746044e-06, + "loss": 0.1436328887939453, + "step": 6089 + }, + { + "epoch": 0.8233206590621039, + "grad_norm": 1.6506434679031372, + "learning_rate": 2.593214344022725e-06, + "loss": 0.23116111755371094, + "step": 6090 + }, + { + "epoch": 0.8234558512885509, + "grad_norm": 1.213346242904663, + "learning_rate": 2.5893688837439474e-06, + "loss": 0.2115011215209961, + "step": 6091 + }, + { + "epoch": 0.8235910435149979, + "grad_norm": 0.8036051392555237, + "learning_rate": 2.5855260074247473e-06, + "loss": 0.126739501953125, + "step": 6092 + }, + { + "epoch": 0.8237262357414449, + "grad_norm": 1.0845261812210083, + "learning_rate": 2.581685715865232e-06, + "loss": 0.1801142692565918, + "step": 6093 + }, + { + "epoch": 0.8238614279678919, + "grad_norm": 1.4276034832000732, + "learning_rate": 2.5778480098649766e-06, + "loss": 0.2077932357788086, + "step": 6094 + }, + { + "epoch": 0.8239966201943388, + "grad_norm": 1.3080353736877441, + "learning_rate": 2.5740128902230087e-06, + "loss": 0.1738567352294922, + "step": 6095 + }, + { + "epoch": 0.8241318124207858, + "grad_norm": 1.7174516916275024, + "learning_rate": 2.5701803577378214e-06, + "loss": 0.18425846099853516, + "step": 6096 + }, + { + "epoch": 0.8242670046472328, + "grad_norm": 0.8913487792015076, + "learning_rate": 2.566350413207366e-06, + "loss": 0.16863059997558594, + "step": 6097 + }, + { + "epoch": 0.8244021968736798, + "grad_norm": 0.9576337933540344, + "learning_rate": 2.5625230574290554e-06, + "loss": 0.16823863983154297, + "step": 6098 + }, + { + "epoch": 0.8245373891001267, + "grad_norm": 0.8997681140899658, + "learning_rate": 2.558698291199773e-06, + "loss": 0.15287399291992188, + "step": 6099 + }, + { + "epoch": 0.8246725813265737, + "grad_norm": 1.208045244216919, + "learning_rate": 2.5548761153158524e-06, + "loss": 0.17196178436279297, + "step": 6100 + }, + { + "epoch": 0.8248077735530207, + "grad_norm": 0.7089408040046692, + "learning_rate": 2.55105653057309e-06, + "loss": 0.1575608253479004, + "step": 6101 + }, + { + "epoch": 0.8249429657794677, + "grad_norm": 1.524511456489563, + "learning_rate": 2.547239537766743e-06, + "loss": 0.17682933807373047, + "step": 6102 + }, + { + "epoch": 0.8250781580059147, + "grad_norm": 0.8569207787513733, + "learning_rate": 2.543425137691526e-06, + "loss": 0.15643692016601562, + "step": 6103 + }, + { + "epoch": 0.8252133502323616, + "grad_norm": 1.1182936429977417, + "learning_rate": 2.5396133311416264e-06, + "loss": 0.18325233459472656, + "step": 6104 + }, + { + "epoch": 0.8253485424588086, + "grad_norm": 0.9564006328582764, + "learning_rate": 2.5358041189106784e-06, + "loss": 0.17651939392089844, + "step": 6105 + }, + { + "epoch": 0.8254837346852556, + "grad_norm": 0.8630929589271545, + "learning_rate": 2.531997501791779e-06, + "loss": 0.15137434005737305, + "step": 6106 + }, + { + "epoch": 0.8256189269117026, + "grad_norm": 0.90911465883255, + "learning_rate": 2.528193480577489e-06, + "loss": 0.11822032928466797, + "step": 6107 + }, + { + "epoch": 0.8257541191381496, + "grad_norm": 0.8955482244491577, + "learning_rate": 2.5243920560598186e-06, + "loss": 0.1212911605834961, + "step": 6108 + }, + { + "epoch": 0.8258893113645965, + "grad_norm": 2.1968774795532227, + "learning_rate": 2.5205932290302598e-06, + "loss": 0.22354888916015625, + "step": 6109 + }, + { + "epoch": 0.8260245035910435, + "grad_norm": 0.7046879529953003, + "learning_rate": 2.516797000279729e-06, + "loss": 0.10719013214111328, + "step": 6110 + }, + { + "epoch": 0.8261596958174905, + "grad_norm": 1.467839002609253, + "learning_rate": 2.513003370598637e-06, + "loss": 0.2000129222869873, + "step": 6111 + }, + { + "epoch": 0.8262948880439375, + "grad_norm": 0.7647542953491211, + "learning_rate": 2.509212340776832e-06, + "loss": 0.0921945571899414, + "step": 6112 + }, + { + "epoch": 0.8264300802703844, + "grad_norm": 1.3513394594192505, + "learning_rate": 2.505423911603622e-06, + "loss": 0.1749129295349121, + "step": 6113 + }, + { + "epoch": 0.8265652724968314, + "grad_norm": 0.861473560333252, + "learning_rate": 2.501638083867789e-06, + "loss": 0.14263617992401123, + "step": 6114 + }, + { + "epoch": 0.8267004647232784, + "grad_norm": 0.88294517993927, + "learning_rate": 2.497854858357552e-06, + "loss": 0.14968490600585938, + "step": 6115 + }, + { + "epoch": 0.8268356569497254, + "grad_norm": 2.7444021701812744, + "learning_rate": 2.494074235860604e-06, + "loss": 0.17145919799804688, + "step": 6116 + }, + { + "epoch": 0.8269708491761724, + "grad_norm": 0.8517177700996399, + "learning_rate": 2.4902962171640913e-06, + "loss": 0.14659547805786133, + "step": 6117 + }, + { + "epoch": 0.8271060414026193, + "grad_norm": 0.9189021587371826, + "learning_rate": 2.4865208030546167e-06, + "loss": 0.15937137603759766, + "step": 6118 + }, + { + "epoch": 0.8272412336290663, + "grad_norm": 0.9920191764831543, + "learning_rate": 2.482747994318239e-06, + "loss": 0.11202430725097656, + "step": 6119 + }, + { + "epoch": 0.8273764258555133, + "grad_norm": 1.146591305732727, + "learning_rate": 2.478977791740477e-06, + "loss": 0.12406206130981445, + "step": 6120 + }, + { + "epoch": 0.8275116180819603, + "grad_norm": 1.4638158082962036, + "learning_rate": 2.475210196106313e-06, + "loss": 0.2104511260986328, + "step": 6121 + }, + { + "epoch": 0.8276468103084073, + "grad_norm": 1.3976386785507202, + "learning_rate": 2.4714452082001753e-06, + "loss": 0.14893722534179688, + "step": 6122 + }, + { + "epoch": 0.8277820025348542, + "grad_norm": 1.0303772687911987, + "learning_rate": 2.467682828805956e-06, + "loss": 0.1817913055419922, + "step": 6123 + }, + { + "epoch": 0.8279171947613012, + "grad_norm": 1.6236698627471924, + "learning_rate": 2.4639230587070017e-06, + "loss": 0.146897554397583, + "step": 6124 + }, + { + "epoch": 0.8280523869877482, + "grad_norm": 1.711416482925415, + "learning_rate": 2.460165898686114e-06, + "loss": 0.18636691570281982, + "step": 6125 + }, + { + "epoch": 0.8281875792141952, + "grad_norm": 3.414409875869751, + "learning_rate": 2.4564113495255597e-06, + "loss": 0.2081054449081421, + "step": 6126 + }, + { + "epoch": 0.8283227714406421, + "grad_norm": 1.0230519771575928, + "learning_rate": 2.4526594120070545e-06, + "loss": 0.17973709106445312, + "step": 6127 + }, + { + "epoch": 0.8284579636670891, + "grad_norm": 0.8750633597373962, + "learning_rate": 2.4489100869117686e-06, + "loss": 0.13215065002441406, + "step": 6128 + }, + { + "epoch": 0.8285931558935361, + "grad_norm": 1.528674602508545, + "learning_rate": 2.4451633750203344e-06, + "loss": 0.22925639152526855, + "step": 6129 + }, + { + "epoch": 0.8287283481199831, + "grad_norm": 1.441159725189209, + "learning_rate": 2.441419277112831e-06, + "loss": 0.19792938232421875, + "step": 6130 + }, + { + "epoch": 0.8288635403464301, + "grad_norm": 1.6094666719436646, + "learning_rate": 2.4376777939688107e-06, + "loss": 0.2055816650390625, + "step": 6131 + }, + { + "epoch": 0.828998732572877, + "grad_norm": 1.185416340827942, + "learning_rate": 2.4339389263672625e-06, + "loss": 0.18233442306518555, + "step": 6132 + }, + { + "epoch": 0.829133924799324, + "grad_norm": 0.498168021440506, + "learning_rate": 2.4302026750866406e-06, + "loss": 0.09771871566772461, + "step": 6133 + }, + { + "epoch": 0.829269117025771, + "grad_norm": 1.0136938095092773, + "learning_rate": 2.4264690409048517e-06, + "loss": 0.12264084815979004, + "step": 6134 + }, + { + "epoch": 0.829404309252218, + "grad_norm": 1.0527483224868774, + "learning_rate": 2.4227380245992555e-06, + "loss": 0.15552902221679688, + "step": 6135 + }, + { + "epoch": 0.829539501478665, + "grad_norm": 0.9546632170677185, + "learning_rate": 2.4190096269466767e-06, + "loss": 0.174957275390625, + "step": 6136 + }, + { + "epoch": 0.8296746937051119, + "grad_norm": 1.0038312673568726, + "learning_rate": 2.415283848723383e-06, + "loss": 0.1708831787109375, + "step": 6137 + }, + { + "epoch": 0.8298098859315589, + "grad_norm": 1.6940914392471313, + "learning_rate": 2.411560690705101e-06, + "loss": 0.22352981567382812, + "step": 6138 + }, + { + "epoch": 0.8299450781580059, + "grad_norm": 1.003494143486023, + "learning_rate": 2.4078401536670146e-06, + "loss": 0.18477821350097656, + "step": 6139 + }, + { + "epoch": 0.8300802703844529, + "grad_norm": 0.8504602313041687, + "learning_rate": 2.4041222383837538e-06, + "loss": 0.1630370318889618, + "step": 6140 + }, + { + "epoch": 0.8302154626108998, + "grad_norm": 1.0547864437103271, + "learning_rate": 2.400406945629418e-06, + "loss": 0.15450400114059448, + "step": 6141 + }, + { + "epoch": 0.8303506548373468, + "grad_norm": 1.3355293273925781, + "learning_rate": 2.3966942761775396e-06, + "loss": 0.13461029529571533, + "step": 6142 + }, + { + "epoch": 0.8304858470637938, + "grad_norm": 1.4127442836761475, + "learning_rate": 2.3929842308011263e-06, + "loss": 0.13334119319915771, + "step": 6143 + }, + { + "epoch": 0.8306210392902408, + "grad_norm": 1.4277772903442383, + "learning_rate": 2.3892768102726236e-06, + "loss": 0.18231201171875, + "step": 6144 + }, + { + "epoch": 0.8307562315166878, + "grad_norm": 1.2124049663543701, + "learning_rate": 2.3855720153639344e-06, + "loss": 0.1561412811279297, + "step": 6145 + }, + { + "epoch": 0.8308914237431347, + "grad_norm": 1.3672864437103271, + "learning_rate": 2.381869846846428e-06, + "loss": 0.22016239166259766, + "step": 6146 + }, + { + "epoch": 0.8310266159695817, + "grad_norm": 1.3312278985977173, + "learning_rate": 2.3781703054908993e-06, + "loss": 0.16948366165161133, + "step": 6147 + }, + { + "epoch": 0.8311618081960287, + "grad_norm": 2.371581792831421, + "learning_rate": 2.374473392067624e-06, + "loss": 0.23529160022735596, + "step": 6148 + }, + { + "epoch": 0.8312970004224757, + "grad_norm": 1.0126550197601318, + "learning_rate": 2.370779107346317e-06, + "loss": 0.1361989974975586, + "step": 6149 + }, + { + "epoch": 0.8314321926489227, + "grad_norm": 1.1454592943191528, + "learning_rate": 2.3670874520961437e-06, + "loss": 0.1357412338256836, + "step": 6150 + }, + { + "epoch": 0.8315673848753696, + "grad_norm": 1.0357590913772583, + "learning_rate": 2.3633984270857367e-06, + "loss": 0.1480344831943512, + "step": 6151 + }, + { + "epoch": 0.8317025771018166, + "grad_norm": 1.1969467401504517, + "learning_rate": 2.359712033083156e-06, + "loss": 0.15659022331237793, + "step": 6152 + }, + { + "epoch": 0.8318377693282636, + "grad_norm": 1.244457721710205, + "learning_rate": 2.35602827085594e-06, + "loss": 0.18448418378829956, + "step": 6153 + }, + { + "epoch": 0.8319729615547106, + "grad_norm": 1.0550355911254883, + "learning_rate": 2.3523471411710644e-06, + "loss": 0.18457984924316406, + "step": 6154 + }, + { + "epoch": 0.8321081537811575, + "grad_norm": 1.0347785949707031, + "learning_rate": 2.3486686447949585e-06, + "loss": 0.15082788467407227, + "step": 6155 + }, + { + "epoch": 0.8322433460076045, + "grad_norm": 2.2821013927459717, + "learning_rate": 2.3449927824935075e-06, + "loss": 0.1829671859741211, + "step": 6156 + }, + { + "epoch": 0.8323785382340515, + "grad_norm": 0.9535189270973206, + "learning_rate": 2.3413195550320393e-06, + "loss": 0.18484878540039062, + "step": 6157 + }, + { + "epoch": 0.8325137304604985, + "grad_norm": 1.0555816888809204, + "learning_rate": 2.3376489631753474e-06, + "loss": 0.15626144409179688, + "step": 6158 + }, + { + "epoch": 0.8326489226869455, + "grad_norm": 0.7003810405731201, + "learning_rate": 2.3339810076876665e-06, + "loss": 0.11058981716632843, + "step": 6159 + }, + { + "epoch": 0.8327841149133924, + "grad_norm": 1.2119359970092773, + "learning_rate": 2.3303156893326815e-06, + "loss": 0.1506023406982422, + "step": 6160 + }, + { + "epoch": 0.8329193071398394, + "grad_norm": 1.0864394903182983, + "learning_rate": 2.326653008873535e-06, + "loss": 0.19234848022460938, + "step": 6161 + }, + { + "epoch": 0.8330544993662864, + "grad_norm": 0.9930852055549622, + "learning_rate": 2.3229929670728085e-06, + "loss": 0.14966583251953125, + "step": 6162 + }, + { + "epoch": 0.8331896915927334, + "grad_norm": 2.0420889854431152, + "learning_rate": 2.319335564692554e-06, + "loss": 0.19245147705078125, + "step": 6163 + }, + { + "epoch": 0.8333248838191804, + "grad_norm": 0.7486175298690796, + "learning_rate": 2.315680802494256e-06, + "loss": 0.1457509994506836, + "step": 6164 + }, + { + "epoch": 0.8334600760456273, + "grad_norm": 1.3638601303100586, + "learning_rate": 2.312028681238856e-06, + "loss": 0.19734573364257812, + "step": 6165 + }, + { + "epoch": 0.8335952682720743, + "grad_norm": 2.4501829147338867, + "learning_rate": 2.3083792016867434e-06, + "loss": 0.1871786117553711, + "step": 6166 + }, + { + "epoch": 0.8337304604985213, + "grad_norm": 1.2597883939743042, + "learning_rate": 2.304732364597759e-06, + "loss": 0.09703421592712402, + "step": 6167 + }, + { + "epoch": 0.8338656527249683, + "grad_norm": 2.0859267711639404, + "learning_rate": 2.3010881707311994e-06, + "loss": 0.20683956146240234, + "step": 6168 + }, + { + "epoch": 0.8340008449514152, + "grad_norm": 1.5158740282058716, + "learning_rate": 2.2974466208458017e-06, + "loss": 0.1488513946533203, + "step": 6169 + }, + { + "epoch": 0.8341360371778622, + "grad_norm": 1.1211365461349487, + "learning_rate": 2.293807715699755e-06, + "loss": 0.180938720703125, + "step": 6170 + }, + { + "epoch": 0.8342712294043092, + "grad_norm": 1.8995894193649292, + "learning_rate": 2.2901714560507e-06, + "loss": 0.22339916229248047, + "step": 6171 + }, + { + "epoch": 0.8344064216307562, + "grad_norm": 0.8575047850608826, + "learning_rate": 2.286537842655722e-06, + "loss": 0.14801931381225586, + "step": 6172 + }, + { + "epoch": 0.8345416138572032, + "grad_norm": 0.933614194393158, + "learning_rate": 2.2829068762713633e-06, + "loss": 0.15410709381103516, + "step": 6173 + }, + { + "epoch": 0.8346768060836501, + "grad_norm": 1.2840702533721924, + "learning_rate": 2.279278557653611e-06, + "loss": 0.22905540466308594, + "step": 6174 + }, + { + "epoch": 0.8348119983100971, + "grad_norm": 2.2746710777282715, + "learning_rate": 2.2756528875578965e-06, + "loss": 0.24266767501831055, + "step": 6175 + }, + { + "epoch": 0.8349471905365442, + "grad_norm": 1.6231721639633179, + "learning_rate": 2.2720298667391067e-06, + "loss": 0.15422916412353516, + "step": 6176 + }, + { + "epoch": 0.8350823827629912, + "grad_norm": 1.193681240081787, + "learning_rate": 2.268409495951568e-06, + "loss": 0.23371315002441406, + "step": 6177 + }, + { + "epoch": 0.8352175749894382, + "grad_norm": 0.7155642509460449, + "learning_rate": 2.2647917759490723e-06, + "loss": 0.1354689598083496, + "step": 6178 + }, + { + "epoch": 0.8353527672158851, + "grad_norm": 1.0123320817947388, + "learning_rate": 2.261176707484834e-06, + "loss": 0.11757993698120117, + "step": 6179 + }, + { + "epoch": 0.8354879594423321, + "grad_norm": 2.0679335594177246, + "learning_rate": 2.2575642913115408e-06, + "loss": 0.2002277374267578, + "step": 6180 + }, + { + "epoch": 0.8356231516687791, + "grad_norm": 1.0954053401947021, + "learning_rate": 2.253954528181313e-06, + "loss": 0.20328426361083984, + "step": 6181 + }, + { + "epoch": 0.8357583438952261, + "grad_norm": 0.945042610168457, + "learning_rate": 2.2503474188457206e-06, + "loss": 0.20763778686523438, + "step": 6182 + }, + { + "epoch": 0.835893536121673, + "grad_norm": 1.3575836420059204, + "learning_rate": 2.2467429640557903e-06, + "loss": 0.1425628662109375, + "step": 6183 + }, + { + "epoch": 0.83602872834812, + "grad_norm": 0.8715459108352661, + "learning_rate": 2.2431411645619776e-06, + "loss": 0.14175176620483398, + "step": 6184 + }, + { + "epoch": 0.836163920574567, + "grad_norm": 1.1249775886535645, + "learning_rate": 2.239542021114205e-06, + "loss": 0.21918106079101562, + "step": 6185 + }, + { + "epoch": 0.836299112801014, + "grad_norm": 1.7404025793075562, + "learning_rate": 2.2359455344618306e-06, + "loss": 0.20702362060546875, + "step": 6186 + }, + { + "epoch": 0.836434305027461, + "grad_norm": 1.311781406402588, + "learning_rate": 2.232351705353663e-06, + "loss": 0.2141704559326172, + "step": 6187 + }, + { + "epoch": 0.836569497253908, + "grad_norm": 1.3623697757720947, + "learning_rate": 2.228760534537955e-06, + "loss": 0.18335247039794922, + "step": 6188 + }, + { + "epoch": 0.8367046894803549, + "grad_norm": 1.0828830003738403, + "learning_rate": 2.2251720227624044e-06, + "loss": 0.17909908294677734, + "step": 6189 + }, + { + "epoch": 0.8368398817068019, + "grad_norm": 1.3389908075332642, + "learning_rate": 2.2215861707741666e-06, + "loss": 0.2291707992553711, + "step": 6190 + }, + { + "epoch": 0.8369750739332489, + "grad_norm": 1.6758027076721191, + "learning_rate": 2.2180029793198313e-06, + "loss": 0.1393446922302246, + "step": 6191 + }, + { + "epoch": 0.8371102661596959, + "grad_norm": 1.672338604927063, + "learning_rate": 2.2144224491454363e-06, + "loss": 0.14776533842086792, + "step": 6192 + }, + { + "epoch": 0.8372454583861428, + "grad_norm": 1.1904263496398926, + "learning_rate": 2.2108445809964695e-06, + "loss": 0.15963387489318848, + "step": 6193 + }, + { + "epoch": 0.8373806506125898, + "grad_norm": 1.2583891153335571, + "learning_rate": 2.2072693756178567e-06, + "loss": 0.1912822723388672, + "step": 6194 + }, + { + "epoch": 0.8375158428390368, + "grad_norm": 0.9383134245872498, + "learning_rate": 2.203696833753983e-06, + "loss": 0.20670700073242188, + "step": 6195 + }, + { + "epoch": 0.8376510350654838, + "grad_norm": 1.3985265493392944, + "learning_rate": 2.200126956148668e-06, + "loss": 0.18425893783569336, + "step": 6196 + }, + { + "epoch": 0.8377862272919308, + "grad_norm": 0.9472099542617798, + "learning_rate": 2.196559743545177e-06, + "loss": 0.17144203186035156, + "step": 6197 + }, + { + "epoch": 0.8379214195183777, + "grad_norm": 1.364683747291565, + "learning_rate": 2.1929951966862233e-06, + "loss": 0.18219280242919922, + "step": 6198 + }, + { + "epoch": 0.8380566117448247, + "grad_norm": 1.1948028802871704, + "learning_rate": 2.1894333163139607e-06, + "loss": 0.2162628173828125, + "step": 6199 + }, + { + "epoch": 0.8381918039712717, + "grad_norm": 1.3858624696731567, + "learning_rate": 2.1858741031700015e-06, + "loss": 0.18250656127929688, + "step": 6200 + }, + { + "epoch": 0.8383269961977187, + "grad_norm": 0.5892735719680786, + "learning_rate": 2.1823175579953856e-06, + "loss": 0.10857677459716797, + "step": 6201 + }, + { + "epoch": 0.8384621884241656, + "grad_norm": 1.2920548915863037, + "learning_rate": 2.1787636815306065e-06, + "loss": 0.19202709197998047, + "step": 6202 + }, + { + "epoch": 0.8385973806506126, + "grad_norm": 1.199432611465454, + "learning_rate": 2.1752124745156005e-06, + "loss": 0.21104812622070312, + "step": 6203 + }, + { + "epoch": 0.8387325728770596, + "grad_norm": 0.9232925176620483, + "learning_rate": 2.171663937689744e-06, + "loss": 0.14895915985107422, + "step": 6204 + }, + { + "epoch": 0.8388677651035066, + "grad_norm": 0.7447475790977478, + "learning_rate": 2.168118071791868e-06, + "loss": 0.14146804809570312, + "step": 6205 + }, + { + "epoch": 0.8390029573299536, + "grad_norm": 1.361911654472351, + "learning_rate": 2.164574877560237e-06, + "loss": 0.21272659301757812, + "step": 6206 + }, + { + "epoch": 0.8391381495564005, + "grad_norm": 1.462789535522461, + "learning_rate": 2.161034355732564e-06, + "loss": 0.19586515426635742, + "step": 6207 + }, + { + "epoch": 0.8392733417828475, + "grad_norm": 0.9570719003677368, + "learning_rate": 2.1574965070460047e-06, + "loss": 0.1771411895751953, + "step": 6208 + }, + { + "epoch": 0.8394085340092945, + "grad_norm": 1.6840550899505615, + "learning_rate": 2.1539613322371527e-06, + "loss": 0.16612529754638672, + "step": 6209 + }, + { + "epoch": 0.8395437262357415, + "grad_norm": 0.851108193397522, + "learning_rate": 2.1504288320420613e-06, + "loss": 0.1774444580078125, + "step": 6210 + }, + { + "epoch": 0.8396789184621885, + "grad_norm": 1.069047451019287, + "learning_rate": 2.1468990071962038e-06, + "loss": 0.1613612174987793, + "step": 6211 + }, + { + "epoch": 0.8398141106886354, + "grad_norm": 0.6757184267044067, + "learning_rate": 2.143371858434515e-06, + "loss": 0.1392993927001953, + "step": 6212 + }, + { + "epoch": 0.8399493029150824, + "grad_norm": 1.664408564567566, + "learning_rate": 2.139847386491367e-06, + "loss": 0.17747879028320312, + "step": 6213 + }, + { + "epoch": 0.8400844951415294, + "grad_norm": 2.1912841796875, + "learning_rate": 2.1363255921005685e-06, + "loss": 0.19891071319580078, + "step": 6214 + }, + { + "epoch": 0.8402196873679764, + "grad_norm": 1.742181658744812, + "learning_rate": 2.1328064759953853e-06, + "loss": 0.1901998519897461, + "step": 6215 + }, + { + "epoch": 0.8403548795944233, + "grad_norm": 1.0470956563949585, + "learning_rate": 2.129290038908504e-06, + "loss": 0.15223264694213867, + "step": 6216 + }, + { + "epoch": 0.8404900718208703, + "grad_norm": 0.8628921508789062, + "learning_rate": 2.1257762815720745e-06, + "loss": 0.12753534317016602, + "step": 6217 + }, + { + "epoch": 0.8406252640473173, + "grad_norm": 2.58061146736145, + "learning_rate": 2.122265204717678e-06, + "loss": 0.1978759765625, + "step": 6218 + }, + { + "epoch": 0.8407604562737643, + "grad_norm": 1.2342422008514404, + "learning_rate": 2.1187568090763328e-06, + "loss": 0.18824481964111328, + "step": 6219 + }, + { + "epoch": 0.8408956485002113, + "grad_norm": 0.5594731569290161, + "learning_rate": 2.1152510953785196e-06, + "loss": 0.10934829711914062, + "step": 6220 + }, + { + "epoch": 0.8410308407266582, + "grad_norm": 1.1864529848098755, + "learning_rate": 2.1117480643541304e-06, + "loss": 0.153900146484375, + "step": 6221 + }, + { + "epoch": 0.8411660329531052, + "grad_norm": 1.358526349067688, + "learning_rate": 2.1082477167325275e-06, + "loss": 0.16078662872314453, + "step": 6222 + }, + { + "epoch": 0.8413012251795522, + "grad_norm": 1.1038748025894165, + "learning_rate": 2.1047500532424968e-06, + "loss": 0.17917442321777344, + "step": 6223 + }, + { + "epoch": 0.8414364174059992, + "grad_norm": 1.5504013299942017, + "learning_rate": 2.1012550746122705e-06, + "loss": 0.19980669021606445, + "step": 6224 + }, + { + "epoch": 0.8415716096324461, + "grad_norm": 1.0121597051620483, + "learning_rate": 2.0977627815695217e-06, + "loss": 0.15400314331054688, + "step": 6225 + }, + { + "epoch": 0.8417068018588931, + "grad_norm": 1.0103116035461426, + "learning_rate": 2.094273174841362e-06, + "loss": 0.17439568042755127, + "step": 6226 + }, + { + "epoch": 0.8418419940853401, + "grad_norm": 1.112388253211975, + "learning_rate": 2.0907862551543516e-06, + "loss": 0.15419578552246094, + "step": 6227 + }, + { + "epoch": 0.8419771863117871, + "grad_norm": 1.314086675643921, + "learning_rate": 2.087302023234485e-06, + "loss": 0.13413846492767334, + "step": 6228 + }, + { + "epoch": 0.8421123785382341, + "grad_norm": 0.651127278804779, + "learning_rate": 2.083820479807194e-06, + "loss": 0.11928582191467285, + "step": 6229 + }, + { + "epoch": 0.842247570764681, + "grad_norm": 1.2742419242858887, + "learning_rate": 2.0803416255973585e-06, + "loss": 0.19922637939453125, + "step": 6230 + }, + { + "epoch": 0.842382762991128, + "grad_norm": 1.8111321926116943, + "learning_rate": 2.0768654613292887e-06, + "loss": 0.16463732719421387, + "step": 6231 + }, + { + "epoch": 0.842517955217575, + "grad_norm": 1.633489727973938, + "learning_rate": 2.0733919877267477e-06, + "loss": 0.18404459953308105, + "step": 6232 + }, + { + "epoch": 0.842653147444022, + "grad_norm": 1.2448697090148926, + "learning_rate": 2.0699212055129268e-06, + "loss": 0.1781761646270752, + "step": 6233 + }, + { + "epoch": 0.842788339670469, + "grad_norm": 1.147660493850708, + "learning_rate": 2.066453115410463e-06, + "loss": 0.15729331970214844, + "step": 6234 + }, + { + "epoch": 0.8429235318969159, + "grad_norm": 1.0645372867584229, + "learning_rate": 2.062987718141431e-06, + "loss": 0.20954132080078125, + "step": 6235 + }, + { + "epoch": 0.8430587241233629, + "grad_norm": 1.390980839729309, + "learning_rate": 2.0595250144273423e-06, + "loss": 0.13446331024169922, + "step": 6236 + }, + { + "epoch": 0.8431939163498099, + "grad_norm": 0.9093831181526184, + "learning_rate": 2.056065004989155e-06, + "loss": 0.13841819763183594, + "step": 6237 + }, + { + "epoch": 0.8433291085762569, + "grad_norm": 1.0498404502868652, + "learning_rate": 2.0526076905472585e-06, + "loss": 0.1700420379638672, + "step": 6238 + }, + { + "epoch": 0.8434643008027038, + "grad_norm": 2.439100503921509, + "learning_rate": 2.0491530718214855e-06, + "loss": 0.20040130615234375, + "step": 6239 + }, + { + "epoch": 0.8435994930291508, + "grad_norm": 0.948650598526001, + "learning_rate": 2.0457011495311045e-06, + "loss": 0.14566409587860107, + "step": 6240 + }, + { + "epoch": 0.8437346852555978, + "grad_norm": 1.1841480731964111, + "learning_rate": 2.0422519243948232e-06, + "loss": 0.14728641510009766, + "step": 6241 + }, + { + "epoch": 0.8438698774820448, + "grad_norm": 1.4580824375152588, + "learning_rate": 2.0388053971307927e-06, + "loss": 0.2090930938720703, + "step": 6242 + }, + { + "epoch": 0.8440050697084918, + "grad_norm": 1.3309868574142456, + "learning_rate": 2.0353615684565956e-06, + "loss": 0.1404285430908203, + "step": 6243 + }, + { + "epoch": 0.8441402619349387, + "grad_norm": 0.9833971858024597, + "learning_rate": 2.0319204390892566e-06, + "loss": 0.17285728454589844, + "step": 6244 + }, + { + "epoch": 0.8442754541613857, + "grad_norm": 1.2245672941207886, + "learning_rate": 2.0284820097452374e-06, + "loss": 0.13866472244262695, + "step": 6245 + }, + { + "epoch": 0.8444106463878327, + "grad_norm": 0.6681135296821594, + "learning_rate": 2.02504628114043e-06, + "loss": 0.1077723503112793, + "step": 6246 + }, + { + "epoch": 0.8445458386142797, + "grad_norm": 0.888547420501709, + "learning_rate": 2.0216132539901865e-06, + "loss": 0.16910552978515625, + "step": 6247 + }, + { + "epoch": 0.8446810308407267, + "grad_norm": 1.6373014450073242, + "learning_rate": 2.0181829290092663e-06, + "loss": 0.17827892303466797, + "step": 6248 + }, + { + "epoch": 0.8448162230671736, + "grad_norm": 1.2827008962631226, + "learning_rate": 2.014755306911891e-06, + "loss": 0.1621088981628418, + "step": 6249 + }, + { + "epoch": 0.8449514152936206, + "grad_norm": 1.2814399003982544, + "learning_rate": 2.0113303884117057e-06, + "loss": 0.1277916431427002, + "step": 6250 + }, + { + "epoch": 0.8450866075200676, + "grad_norm": 0.7529605627059937, + "learning_rate": 2.0079081742217957e-06, + "loss": 0.11689400672912598, + "step": 6251 + }, + { + "epoch": 0.8452217997465146, + "grad_norm": 1.8506730794906616, + "learning_rate": 2.0044886650546915e-06, + "loss": 0.17331218719482422, + "step": 6252 + }, + { + "epoch": 0.8453569919729615, + "grad_norm": 1.1317185163497925, + "learning_rate": 2.0010718616223406e-06, + "loss": 0.17982101440429688, + "step": 6253 + }, + { + "epoch": 0.8454921841994085, + "grad_norm": 1.1462839841842651, + "learning_rate": 1.9976577646361514e-06, + "loss": 0.14794492721557617, + "step": 6254 + }, + { + "epoch": 0.8456273764258555, + "grad_norm": 0.7859115600585938, + "learning_rate": 1.994246374806953e-06, + "loss": 0.1321239471435547, + "step": 6255 + }, + { + "epoch": 0.8457625686523025, + "grad_norm": 2.170652389526367, + "learning_rate": 1.9908376928450128e-06, + "loss": 0.1707468032836914, + "step": 6256 + }, + { + "epoch": 0.8458977608787495, + "grad_norm": 0.9859130382537842, + "learning_rate": 1.987431719460039e-06, + "loss": 0.15401244163513184, + "step": 6257 + }, + { + "epoch": 0.8460329531051964, + "grad_norm": 1.1299784183502197, + "learning_rate": 1.9840284553611706e-06, + "loss": 0.14007854461669922, + "step": 6258 + }, + { + "epoch": 0.8461681453316434, + "grad_norm": 1.0514180660247803, + "learning_rate": 1.980627901256989e-06, + "loss": 0.17079877853393555, + "step": 6259 + }, + { + "epoch": 0.8463033375580904, + "grad_norm": 0.9567099809646606, + "learning_rate": 1.9772300578555062e-06, + "loss": 0.18895888328552246, + "step": 6260 + }, + { + "epoch": 0.8464385297845374, + "grad_norm": 1.3666324615478516, + "learning_rate": 1.973834925864172e-06, + "loss": 0.18380475044250488, + "step": 6261 + }, + { + "epoch": 0.8465737220109844, + "grad_norm": 1.0582427978515625, + "learning_rate": 1.97044250598987e-06, + "loss": 0.1831674575805664, + "step": 6262 + }, + { + "epoch": 0.8467089142374313, + "grad_norm": 1.0290175676345825, + "learning_rate": 1.9670527989389177e-06, + "loss": 0.1715412139892578, + "step": 6263 + }, + { + "epoch": 0.8468441064638783, + "grad_norm": 1.3548446893692017, + "learning_rate": 1.9636658054170747e-06, + "loss": 0.20255088806152344, + "step": 6264 + }, + { + "epoch": 0.8469792986903253, + "grad_norm": 0.9062769412994385, + "learning_rate": 1.960281526129531e-06, + "loss": 0.193084716796875, + "step": 6265 + }, + { + "epoch": 0.8471144909167723, + "grad_norm": 1.1642833948135376, + "learning_rate": 1.9568999617809077e-06, + "loss": 0.19539260864257812, + "step": 6266 + }, + { + "epoch": 0.8472496831432192, + "grad_norm": 1.0204161405563354, + "learning_rate": 1.9535211130752676e-06, + "loss": 0.18901634216308594, + "step": 6267 + }, + { + "epoch": 0.8473848753696662, + "grad_norm": 1.5740911960601807, + "learning_rate": 1.950144980716101e-06, + "loss": 0.2320117950439453, + "step": 6268 + }, + { + "epoch": 0.8475200675961132, + "grad_norm": 0.8457884192466736, + "learning_rate": 1.9467715654063444e-06, + "loss": 0.15311002731323242, + "step": 6269 + }, + { + "epoch": 0.8476552598225602, + "grad_norm": 1.1248319149017334, + "learning_rate": 1.9434008678483532e-06, + "loss": 0.1924905776977539, + "step": 6270 + }, + { + "epoch": 0.8477904520490072, + "grad_norm": 1.0604660511016846, + "learning_rate": 1.9400328887439295e-06, + "loss": 0.1890702247619629, + "step": 6271 + }, + { + "epoch": 0.8479256442754541, + "grad_norm": 2.2242255210876465, + "learning_rate": 1.9366676287943038e-06, + "loss": 0.2086944580078125, + "step": 6272 + }, + { + "epoch": 0.8480608365019011, + "grad_norm": 1.0713787078857422, + "learning_rate": 1.9333050887001337e-06, + "loss": 0.16119706630706787, + "step": 6273 + }, + { + "epoch": 0.8481960287283481, + "grad_norm": 1.2495838403701782, + "learning_rate": 1.9299452691615293e-06, + "loss": 0.11509251594543457, + "step": 6274 + }, + { + "epoch": 0.8483312209547951, + "grad_norm": 1.0761128664016724, + "learning_rate": 1.9265881708780182e-06, + "loss": 0.18350791931152344, + "step": 6275 + }, + { + "epoch": 0.848466413181242, + "grad_norm": 1.191149115562439, + "learning_rate": 1.9232337945485657e-06, + "loss": 0.16768741607666016, + "step": 6276 + }, + { + "epoch": 0.848601605407689, + "grad_norm": 0.9965090155601501, + "learning_rate": 1.91988214087157e-06, + "loss": 0.1889941692352295, + "step": 6277 + }, + { + "epoch": 0.848736797634136, + "grad_norm": 0.984058678150177, + "learning_rate": 1.9165332105448613e-06, + "loss": 0.20703887939453125, + "step": 6278 + }, + { + "epoch": 0.848871989860583, + "grad_norm": 1.2002536058425903, + "learning_rate": 1.913187004265715e-06, + "loss": 0.21549177169799805, + "step": 6279 + }, + { + "epoch": 0.84900718208703, + "grad_norm": 0.8504394888877869, + "learning_rate": 1.909843522730814e-06, + "loss": 0.16881728172302246, + "step": 6280 + }, + { + "epoch": 0.8491423743134769, + "grad_norm": 0.7221499085426331, + "learning_rate": 1.9065027666363017e-06, + "loss": 0.11715841293334961, + "step": 6281 + }, + { + "epoch": 0.8492775665399239, + "grad_norm": 1.7518599033355713, + "learning_rate": 1.903164736677736e-06, + "loss": 0.2264871597290039, + "step": 6282 + }, + { + "epoch": 0.8494127587663709, + "grad_norm": 0.812059223651886, + "learning_rate": 1.8998294335501082e-06, + "loss": 0.15494966506958008, + "step": 6283 + }, + { + "epoch": 0.8495479509928179, + "grad_norm": 0.8587742447853088, + "learning_rate": 1.8964968579478592e-06, + "loss": 0.15098953247070312, + "step": 6284 + }, + { + "epoch": 0.8496831432192649, + "grad_norm": 0.9456426501274109, + "learning_rate": 1.893167010564834e-06, + "loss": 0.14714765548706055, + "step": 6285 + }, + { + "epoch": 0.8498183354457118, + "grad_norm": 1.1235036849975586, + "learning_rate": 1.8898398920943349e-06, + "loss": 0.17379093170166016, + "step": 6286 + }, + { + "epoch": 0.8499535276721588, + "grad_norm": 0.9256793260574341, + "learning_rate": 1.886515503229081e-06, + "loss": 0.15215396881103516, + "step": 6287 + }, + { + "epoch": 0.8500887198986058, + "grad_norm": 0.6435022354125977, + "learning_rate": 1.8831938446612269e-06, + "loss": 0.11315274238586426, + "step": 6288 + }, + { + "epoch": 0.8502239121250528, + "grad_norm": 1.5932315587997437, + "learning_rate": 1.8798749170823676e-06, + "loss": 0.14092445373535156, + "step": 6289 + }, + { + "epoch": 0.8503591043514998, + "grad_norm": 2.051391839981079, + "learning_rate": 1.8765587211835089e-06, + "loss": 0.15987014770507812, + "step": 6290 + }, + { + "epoch": 0.8504942965779467, + "grad_norm": 1.1069653034210205, + "learning_rate": 1.8732452576551102e-06, + "loss": 0.18801546096801758, + "step": 6291 + }, + { + "epoch": 0.8506294888043937, + "grad_norm": 1.1818568706512451, + "learning_rate": 1.8699345271870493e-06, + "loss": 0.14302223920822144, + "step": 6292 + }, + { + "epoch": 0.8507646810308407, + "grad_norm": 1.100877046585083, + "learning_rate": 1.8666265304686387e-06, + "loss": 0.14019489288330078, + "step": 6293 + }, + { + "epoch": 0.8508998732572877, + "grad_norm": 1.1298385858535767, + "learning_rate": 1.8633212681886203e-06, + "loss": 0.15425443649291992, + "step": 6294 + }, + { + "epoch": 0.8510350654837346, + "grad_norm": 1.2238614559173584, + "learning_rate": 1.8600187410351621e-06, + "loss": 0.18645191192626953, + "step": 6295 + }, + { + "epoch": 0.8511702577101816, + "grad_norm": 0.8080711960792542, + "learning_rate": 1.8567189496958776e-06, + "loss": 0.14484024047851562, + "step": 6296 + }, + { + "epoch": 0.8513054499366286, + "grad_norm": 0.9121830463409424, + "learning_rate": 1.853421894857797e-06, + "loss": 0.12390804290771484, + "step": 6297 + }, + { + "epoch": 0.8514406421630756, + "grad_norm": 1.0221834182739258, + "learning_rate": 1.8501275772073827e-06, + "loss": 0.15497827529907227, + "step": 6298 + }, + { + "epoch": 0.8515758343895226, + "grad_norm": 1.3868948221206665, + "learning_rate": 1.8468359974305315e-06, + "loss": 0.1701679229736328, + "step": 6299 + }, + { + "epoch": 0.8517110266159695, + "grad_norm": 1.0798214673995972, + "learning_rate": 1.8435471562125633e-06, + "loss": 0.13428783416748047, + "step": 6300 + }, + { + "epoch": 0.8518462188424165, + "grad_norm": 0.8273772597312927, + "learning_rate": 1.8402610542382386e-06, + "loss": 0.14809608459472656, + "step": 6301 + }, + { + "epoch": 0.8519814110688635, + "grad_norm": 1.5770983695983887, + "learning_rate": 1.836977692191742e-06, + "loss": 0.1824333667755127, + "step": 6302 + }, + { + "epoch": 0.8521166032953105, + "grad_norm": 1.2388843297958374, + "learning_rate": 1.8336970707566781e-06, + "loss": 0.22007465362548828, + "step": 6303 + }, + { + "epoch": 0.8522517955217574, + "grad_norm": 0.9671942591667175, + "learning_rate": 1.8304191906160973e-06, + "loss": 0.19976234436035156, + "step": 6304 + }, + { + "epoch": 0.8523869877482044, + "grad_norm": 1.1258398294448853, + "learning_rate": 1.8271440524524668e-06, + "loss": 0.21798467636108398, + "step": 6305 + }, + { + "epoch": 0.8525221799746514, + "grad_norm": 1.3445639610290527, + "learning_rate": 1.8238716569476949e-06, + "loss": 0.1522226333618164, + "step": 6306 + }, + { + "epoch": 0.8526573722010984, + "grad_norm": 1.0746725797653198, + "learning_rate": 1.8206020047831078e-06, + "loss": 0.1488351821899414, + "step": 6307 + }, + { + "epoch": 0.8527925644275454, + "grad_norm": 1.4821486473083496, + "learning_rate": 1.8173350966394648e-06, + "loss": 0.15462207794189453, + "step": 6308 + }, + { + "epoch": 0.8529277566539923, + "grad_norm": 0.8736923933029175, + "learning_rate": 1.8140709331969513e-06, + "loss": 0.16199684143066406, + "step": 6309 + }, + { + "epoch": 0.8530629488804393, + "grad_norm": 1.3552755117416382, + "learning_rate": 1.810809515135184e-06, + "loss": 0.1948223114013672, + "step": 6310 + }, + { + "epoch": 0.8531981411068863, + "grad_norm": 1.867465853691101, + "learning_rate": 1.8075508431332111e-06, + "loss": 0.18097877502441406, + "step": 6311 + }, + { + "epoch": 0.8533333333333334, + "grad_norm": 0.9573332071304321, + "learning_rate": 1.8042949178695034e-06, + "loss": 0.11316967010498047, + "step": 6312 + }, + { + "epoch": 0.8534685255597804, + "grad_norm": 0.9344730973243713, + "learning_rate": 1.8010417400219636e-06, + "loss": 0.15883541107177734, + "step": 6313 + }, + { + "epoch": 0.8536037177862273, + "grad_norm": 1.6662367582321167, + "learning_rate": 1.7977913102679167e-06, + "loss": 0.1753377914428711, + "step": 6314 + }, + { + "epoch": 0.8537389100126743, + "grad_norm": 1.8869081735610962, + "learning_rate": 1.7945436292841193e-06, + "loss": 0.19003963470458984, + "step": 6315 + }, + { + "epoch": 0.8538741022391213, + "grad_norm": 1.0469653606414795, + "learning_rate": 1.791298697746766e-06, + "loss": 0.13212895393371582, + "step": 6316 + }, + { + "epoch": 0.8540092944655683, + "grad_norm": 0.8228566646575928, + "learning_rate": 1.7880565163314545e-06, + "loss": 0.1853322982788086, + "step": 6317 + }, + { + "epoch": 0.8541444866920153, + "grad_norm": 1.4089813232421875, + "learning_rate": 1.784817085713233e-06, + "loss": 0.1348482370376587, + "step": 6318 + }, + { + "epoch": 0.8542796789184622, + "grad_norm": 0.9777628183364868, + "learning_rate": 1.7815804065665669e-06, + "loss": 0.1686868667602539, + "step": 6319 + }, + { + "epoch": 0.8544148711449092, + "grad_norm": 1.4728889465332031, + "learning_rate": 1.778346479565346e-06, + "loss": 0.1647968292236328, + "step": 6320 + }, + { + "epoch": 0.8545500633713562, + "grad_norm": 1.193688988685608, + "learning_rate": 1.7751153053829011e-06, + "loss": 0.1815328598022461, + "step": 6321 + }, + { + "epoch": 0.8546852555978032, + "grad_norm": 1.304884672164917, + "learning_rate": 1.7718868846919662e-06, + "loss": 0.1832122802734375, + "step": 6322 + }, + { + "epoch": 0.8548204478242502, + "grad_norm": 2.5404052734375, + "learning_rate": 1.7686612181647266e-06, + "loss": 0.1812753677368164, + "step": 6323 + }, + { + "epoch": 0.8549556400506971, + "grad_norm": 1.100369930267334, + "learning_rate": 1.7654383064727802e-06, + "loss": 0.19495582580566406, + "step": 6324 + }, + { + "epoch": 0.8550908322771441, + "grad_norm": 1.0349186658859253, + "learning_rate": 1.762218150287152e-06, + "loss": 0.11691141128540039, + "step": 6325 + }, + { + "epoch": 0.8552260245035911, + "grad_norm": 1.4328306913375854, + "learning_rate": 1.759000750278299e-06, + "loss": 0.22269058227539062, + "step": 6326 + }, + { + "epoch": 0.8553612167300381, + "grad_norm": 1.3923872709274292, + "learning_rate": 1.7557861071160953e-06, + "loss": 0.1661449670791626, + "step": 6327 + }, + { + "epoch": 0.855496408956485, + "grad_norm": 0.6199512481689453, + "learning_rate": 1.7525742214698538e-06, + "loss": 0.09385907649993896, + "step": 6328 + }, + { + "epoch": 0.855631601182932, + "grad_norm": 0.7496179938316345, + "learning_rate": 1.7493650940083045e-06, + "loss": 0.13285160064697266, + "step": 6329 + }, + { + "epoch": 0.855766793409379, + "grad_norm": 0.6946294903755188, + "learning_rate": 1.746158725399603e-06, + "loss": 0.13773822784423828, + "step": 6330 + }, + { + "epoch": 0.855901985635826, + "grad_norm": 1.548938274383545, + "learning_rate": 1.7429551163113322e-06, + "loss": 0.2080669403076172, + "step": 6331 + }, + { + "epoch": 0.856037177862273, + "grad_norm": 1.1919760704040527, + "learning_rate": 1.7397542674105e-06, + "loss": 0.19496631622314453, + "step": 6332 + }, + { + "epoch": 0.8561723700887199, + "grad_norm": 1.0689605474472046, + "learning_rate": 1.7365561793635431e-06, + "loss": 0.18716049194335938, + "step": 6333 + }, + { + "epoch": 0.8563075623151669, + "grad_norm": 1.1003448963165283, + "learning_rate": 1.7333608528363227e-06, + "loss": 0.1591663360595703, + "step": 6334 + }, + { + "epoch": 0.8564427545416139, + "grad_norm": 0.8463094830513, + "learning_rate": 1.7301682884941128e-06, + "loss": 0.16757440567016602, + "step": 6335 + }, + { + "epoch": 0.8565779467680609, + "grad_norm": 0.9306474924087524, + "learning_rate": 1.726978487001632e-06, + "loss": 0.14037609100341797, + "step": 6336 + }, + { + "epoch": 0.8567131389945079, + "grad_norm": 1.7993676662445068, + "learning_rate": 1.7237914490230072e-06, + "loss": 0.20835113525390625, + "step": 6337 + }, + { + "epoch": 0.8568483312209548, + "grad_norm": 1.5667762756347656, + "learning_rate": 1.7206071752218027e-06, + "loss": 0.15067839622497559, + "step": 6338 + }, + { + "epoch": 0.8569835234474018, + "grad_norm": 0.9301304817199707, + "learning_rate": 1.7174256662610032e-06, + "loss": 0.1498403549194336, + "step": 6339 + }, + { + "epoch": 0.8571187156738488, + "grad_norm": 1.698898434638977, + "learning_rate": 1.714246922803004e-06, + "loss": 0.19235920906066895, + "step": 6340 + }, + { + "epoch": 0.8572539079002958, + "grad_norm": 0.6782569885253906, + "learning_rate": 1.7110709455096468e-06, + "loss": 0.12676215171813965, + "step": 6341 + }, + { + "epoch": 0.8573891001267427, + "grad_norm": 1.2585948705673218, + "learning_rate": 1.7078977350421815e-06, + "loss": 0.16432571411132812, + "step": 6342 + }, + { + "epoch": 0.8575242923531897, + "grad_norm": 0.7204188704490662, + "learning_rate": 1.7047272920612926e-06, + "loss": 0.1386280059814453, + "step": 6343 + }, + { + "epoch": 0.8576594845796367, + "grad_norm": 0.8334446549415588, + "learning_rate": 1.7015596172270841e-06, + "loss": 0.1485891342163086, + "step": 6344 + }, + { + "epoch": 0.8577946768060837, + "grad_norm": 1.424103856086731, + "learning_rate": 1.6983947111990717e-06, + "loss": 0.2587437629699707, + "step": 6345 + }, + { + "epoch": 0.8579298690325307, + "grad_norm": 0.9113597273826599, + "learning_rate": 1.695232574636218e-06, + "loss": 0.15562820434570312, + "step": 6346 + }, + { + "epoch": 0.8580650612589776, + "grad_norm": 0.861763060092926, + "learning_rate": 1.6920732081968882e-06, + "loss": 0.1543712615966797, + "step": 6347 + }, + { + "epoch": 0.8582002534854246, + "grad_norm": 1.4242202043533325, + "learning_rate": 1.6889166125388878e-06, + "loss": 0.1682729721069336, + "step": 6348 + }, + { + "epoch": 0.8583354457118716, + "grad_norm": 0.7612265348434448, + "learning_rate": 1.6857627883194277e-06, + "loss": 0.1378183364868164, + "step": 6349 + }, + { + "epoch": 0.8584706379383186, + "grad_norm": 1.0747435092926025, + "learning_rate": 1.6826117361951577e-06, + "loss": 0.19345355033874512, + "step": 6350 + }, + { + "epoch": 0.8586058301647655, + "grad_norm": 1.1082537174224854, + "learning_rate": 1.6794634568221412e-06, + "loss": 0.1585688591003418, + "step": 6351 + }, + { + "epoch": 0.8587410223912125, + "grad_norm": 0.8508347272872925, + "learning_rate": 1.676317950855864e-06, + "loss": 0.14651095867156982, + "step": 6352 + }, + { + "epoch": 0.8588762146176595, + "grad_norm": 1.3187150955200195, + "learning_rate": 1.6731752189512456e-06, + "loss": 0.1440267562866211, + "step": 6353 + }, + { + "epoch": 0.8590114068441065, + "grad_norm": 1.6386632919311523, + "learning_rate": 1.6700352617626092e-06, + "loss": 0.1860048770904541, + "step": 6354 + }, + { + "epoch": 0.8591465990705535, + "grad_norm": 0.823549211025238, + "learning_rate": 1.6668980799437167e-06, + "loss": 0.1343517303466797, + "step": 6355 + }, + { + "epoch": 0.8592817912970004, + "grad_norm": 0.6790689826011658, + "learning_rate": 1.6637636741477458e-06, + "loss": 0.1320209503173828, + "step": 6356 + }, + { + "epoch": 0.8594169835234474, + "grad_norm": 1.2886433601379395, + "learning_rate": 1.6606320450272943e-06, + "loss": 0.1545734405517578, + "step": 6357 + }, + { + "epoch": 0.8595521757498944, + "grad_norm": 0.7061427235603333, + "learning_rate": 1.657503193234386e-06, + "loss": 0.15484619140625, + "step": 6358 + }, + { + "epoch": 0.8596873679763414, + "grad_norm": 0.8254783153533936, + "learning_rate": 1.654377119420461e-06, + "loss": 0.17655372619628906, + "step": 6359 + }, + { + "epoch": 0.8598225602027884, + "grad_norm": 1.260725736618042, + "learning_rate": 1.6512538242363889e-06, + "loss": 0.1584911346435547, + "step": 6360 + }, + { + "epoch": 0.8599577524292353, + "grad_norm": 1.5559543371200562, + "learning_rate": 1.6481333083324563e-06, + "loss": 0.1739358901977539, + "step": 6361 + }, + { + "epoch": 0.8600929446556823, + "grad_norm": 0.6541129350662231, + "learning_rate": 1.6450155723583698e-06, + "loss": 0.12384319305419922, + "step": 6362 + }, + { + "epoch": 0.8602281368821293, + "grad_norm": 0.8542926907539368, + "learning_rate": 1.6419006169632573e-06, + "loss": 0.12067985534667969, + "step": 6363 + }, + { + "epoch": 0.8603633291085763, + "grad_norm": 1.2117305994033813, + "learning_rate": 1.638788442795668e-06, + "loss": 0.21179485321044922, + "step": 6364 + }, + { + "epoch": 0.8604985213350232, + "grad_norm": 1.0029221773147583, + "learning_rate": 1.6356790505035785e-06, + "loss": 0.16630077362060547, + "step": 6365 + }, + { + "epoch": 0.8606337135614702, + "grad_norm": 1.0357182025909424, + "learning_rate": 1.6325724407343795e-06, + "loss": 0.18329143524169922, + "step": 6366 + }, + { + "epoch": 0.8607689057879172, + "grad_norm": 1.0740844011306763, + "learning_rate": 1.6294686141348801e-06, + "loss": 0.1861743927001953, + "step": 6367 + }, + { + "epoch": 0.8609040980143642, + "grad_norm": 1.1387689113616943, + "learning_rate": 1.626367571351317e-06, + "loss": 0.17217063903808594, + "step": 6368 + }, + { + "epoch": 0.8610392902408112, + "grad_norm": 1.981236219406128, + "learning_rate": 1.6232693130293386e-06, + "loss": 0.20763683319091797, + "step": 6369 + }, + { + "epoch": 0.8611744824672581, + "grad_norm": 1.0935992002487183, + "learning_rate": 1.6201738398140254e-06, + "loss": 0.11727619171142578, + "step": 6370 + }, + { + "epoch": 0.8613096746937051, + "grad_norm": 0.7181010246276855, + "learning_rate": 1.6170811523498718e-06, + "loss": 0.131159245967865, + "step": 6371 + }, + { + "epoch": 0.8614448669201521, + "grad_norm": 1.4967948198318481, + "learning_rate": 1.613991251280783e-06, + "loss": 0.18373870849609375, + "step": 6372 + }, + { + "epoch": 0.8615800591465991, + "grad_norm": 1.1380367279052734, + "learning_rate": 1.6109041372501028e-06, + "loss": 0.1880960464477539, + "step": 6373 + }, + { + "epoch": 0.861715251373046, + "grad_norm": 1.5309343338012695, + "learning_rate": 1.6078198109005766e-06, + "loss": 0.1677541732788086, + "step": 6374 + }, + { + "epoch": 0.861850443599493, + "grad_norm": 1.2975012063980103, + "learning_rate": 1.6047382728743843e-06, + "loss": 0.176544189453125, + "step": 6375 + }, + { + "epoch": 0.86198563582594, + "grad_norm": 1.6805533170700073, + "learning_rate": 1.6016595238131176e-06, + "loss": 0.17225134372711182, + "step": 6376 + }, + { + "epoch": 0.862120828052387, + "grad_norm": 1.078234314918518, + "learning_rate": 1.5985835643577824e-06, + "loss": 0.17897415161132812, + "step": 6377 + }, + { + "epoch": 0.862256020278834, + "grad_norm": 0.7642520070075989, + "learning_rate": 1.5955103951488177e-06, + "loss": 0.1291799545288086, + "step": 6378 + }, + { + "epoch": 0.862391212505281, + "grad_norm": 1.1915998458862305, + "learning_rate": 1.5924400168260666e-06, + "loss": 0.1720409393310547, + "step": 6379 + }, + { + "epoch": 0.8625264047317279, + "grad_norm": 1.4987159967422485, + "learning_rate": 1.5893724300288064e-06, + "loss": 0.16414451599121094, + "step": 6380 + }, + { + "epoch": 0.8626615969581749, + "grad_norm": 1.294561743736267, + "learning_rate": 1.5863076353957196e-06, + "loss": 0.1965770721435547, + "step": 6381 + }, + { + "epoch": 0.8627967891846219, + "grad_norm": 1.7166186571121216, + "learning_rate": 1.5832456335649104e-06, + "loss": 0.21533203125, + "step": 6382 + }, + { + "epoch": 0.8629319814110689, + "grad_norm": 2.1693785190582275, + "learning_rate": 1.580186425173909e-06, + "loss": 0.22189879417419434, + "step": 6383 + }, + { + "epoch": 0.8630671736375158, + "grad_norm": 0.876650869846344, + "learning_rate": 1.5771300108596543e-06, + "loss": 0.1353088617324829, + "step": 6384 + }, + { + "epoch": 0.8632023658639628, + "grad_norm": 1.4678584337234497, + "learning_rate": 1.5740763912585171e-06, + "loss": 0.15218353271484375, + "step": 6385 + }, + { + "epoch": 0.8633375580904098, + "grad_norm": 3.0997366905212402, + "learning_rate": 1.5710255670062657e-06, + "loss": 0.18373775482177734, + "step": 6386 + }, + { + "epoch": 0.8634727503168568, + "grad_norm": 1.0350871086120605, + "learning_rate": 1.567977538738105e-06, + "loss": 0.1682291030883789, + "step": 6387 + }, + { + "epoch": 0.8636079425433038, + "grad_norm": 1.1952470541000366, + "learning_rate": 1.5649323070886494e-06, + "loss": 0.1619739532470703, + "step": 6388 + }, + { + "epoch": 0.8637431347697507, + "grad_norm": 0.9888564348220825, + "learning_rate": 1.5618898726919284e-06, + "loss": 0.15361618995666504, + "step": 6389 + }, + { + "epoch": 0.8638783269961977, + "grad_norm": 1.0632531642913818, + "learning_rate": 1.5588502361814032e-06, + "loss": 0.13387584686279297, + "step": 6390 + }, + { + "epoch": 0.8640135192226447, + "grad_norm": 1.0776424407958984, + "learning_rate": 1.5558133981899314e-06, + "loss": 0.19541168212890625, + "step": 6391 + }, + { + "epoch": 0.8641487114490917, + "grad_norm": 1.0624291896820068, + "learning_rate": 1.5527793593498053e-06, + "loss": 0.19646549224853516, + "step": 6392 + }, + { + "epoch": 0.8642839036755386, + "grad_norm": 0.993002712726593, + "learning_rate": 1.5497481202927244e-06, + "loss": 0.1404862105846405, + "step": 6393 + }, + { + "epoch": 0.8644190959019856, + "grad_norm": 1.8081856966018677, + "learning_rate": 1.5467196816498107e-06, + "loss": 0.18233346939086914, + "step": 6394 + }, + { + "epoch": 0.8645542881284326, + "grad_norm": 0.9291538596153259, + "learning_rate": 1.5436940440516018e-06, + "loss": 0.10775184631347656, + "step": 6395 + }, + { + "epoch": 0.8646894803548796, + "grad_norm": 1.2466799020767212, + "learning_rate": 1.5406712081280484e-06, + "loss": 0.1950855255126953, + "step": 6396 + }, + { + "epoch": 0.8648246725813266, + "grad_norm": 1.6430858373641968, + "learning_rate": 1.5376511745085254e-06, + "loss": 0.17039108276367188, + "step": 6397 + }, + { + "epoch": 0.8649598648077735, + "grad_norm": 1.1904093027114868, + "learning_rate": 1.5346339438218181e-06, + "loss": 0.16352558135986328, + "step": 6398 + }, + { + "epoch": 0.8650950570342205, + "grad_norm": 1.0221774578094482, + "learning_rate": 1.5316195166961295e-06, + "loss": 0.14184236526489258, + "step": 6399 + }, + { + "epoch": 0.8652302492606675, + "grad_norm": 0.7791646122932434, + "learning_rate": 1.5286078937590802e-06, + "loss": 0.16111791133880615, + "step": 6400 + }, + { + "epoch": 0.8653654414871145, + "grad_norm": 1.0860053300857544, + "learning_rate": 1.5255990756377025e-06, + "loss": 0.1878659725189209, + "step": 6401 + }, + { + "epoch": 0.8655006337135615, + "grad_norm": 1.0716074705123901, + "learning_rate": 1.5225930629584534e-06, + "loss": 0.14123046398162842, + "step": 6402 + }, + { + "epoch": 0.8656358259400084, + "grad_norm": 2.6023716926574707, + "learning_rate": 1.5195898563472038e-06, + "loss": 0.23031234741210938, + "step": 6403 + }, + { + "epoch": 0.8657710181664554, + "grad_norm": 0.9334373474121094, + "learning_rate": 1.5165894564292254e-06, + "loss": 0.11715030670166016, + "step": 6404 + }, + { + "epoch": 0.8659062103929024, + "grad_norm": 1.602604627609253, + "learning_rate": 1.5135918638292269e-06, + "loss": 0.1645113229751587, + "step": 6405 + }, + { + "epoch": 0.8660414026193494, + "grad_norm": 0.7436071634292603, + "learning_rate": 1.5105970791713186e-06, + "loss": 0.1606283187866211, + "step": 6406 + }, + { + "epoch": 0.8661765948457963, + "grad_norm": 1.6294381618499756, + "learning_rate": 1.5076051030790355e-06, + "loss": 0.19243431091308594, + "step": 6407 + }, + { + "epoch": 0.8663117870722433, + "grad_norm": 1.2523728609085083, + "learning_rate": 1.5046159361753226e-06, + "loss": 0.21007823944091797, + "step": 6408 + }, + { + "epoch": 0.8664469792986903, + "grad_norm": 0.728074312210083, + "learning_rate": 1.5016295790825336e-06, + "loss": 0.13447272777557373, + "step": 6409 + }, + { + "epoch": 0.8665821715251373, + "grad_norm": 1.6927303075790405, + "learning_rate": 1.4986460324224493e-06, + "loss": 0.17896175384521484, + "step": 6410 + }, + { + "epoch": 0.8667173637515843, + "grad_norm": 1.1252261400222778, + "learning_rate": 1.4956652968162582e-06, + "loss": 0.17017865180969238, + "step": 6411 + }, + { + "epoch": 0.8668525559780312, + "grad_norm": 0.7999898791313171, + "learning_rate": 1.492687372884567e-06, + "loss": 0.16033077239990234, + "step": 6412 + }, + { + "epoch": 0.8669877482044782, + "grad_norm": 1.3143024444580078, + "learning_rate": 1.4897122612473978e-06, + "loss": 0.2348613739013672, + "step": 6413 + }, + { + "epoch": 0.8671229404309252, + "grad_norm": 2.4359519481658936, + "learning_rate": 1.4867399625241772e-06, + "loss": 0.1888413429260254, + "step": 6414 + }, + { + "epoch": 0.8672581326573722, + "grad_norm": 0.8997766971588135, + "learning_rate": 1.4837704773337602e-06, + "loss": 0.14679336547851562, + "step": 6415 + }, + { + "epoch": 0.8673933248838192, + "grad_norm": 1.0707746744155884, + "learning_rate": 1.4808038062944036e-06, + "loss": 0.19666290283203125, + "step": 6416 + }, + { + "epoch": 0.8675285171102661, + "grad_norm": 0.675529956817627, + "learning_rate": 1.4778399500237933e-06, + "loss": 0.141806960105896, + "step": 6417 + }, + { + "epoch": 0.8676637093367131, + "grad_norm": 0.8770679235458374, + "learning_rate": 1.4748789091390124e-06, + "loss": 0.15349960327148438, + "step": 6418 + }, + { + "epoch": 0.8677989015631601, + "grad_norm": 1.5107576847076416, + "learning_rate": 1.471920684256563e-06, + "loss": 0.18856382369995117, + "step": 6419 + }, + { + "epoch": 0.8679340937896071, + "grad_norm": 0.7999342083930969, + "learning_rate": 1.4689652759923721e-06, + "loss": 0.14658582210540771, + "step": 6420 + }, + { + "epoch": 0.868069286016054, + "grad_norm": 0.9932968020439148, + "learning_rate": 1.4660126849617645e-06, + "loss": 0.1845836639404297, + "step": 6421 + }, + { + "epoch": 0.868204478242501, + "grad_norm": 1.276864767074585, + "learning_rate": 1.4630629117794914e-06, + "loss": 0.2045001983642578, + "step": 6422 + }, + { + "epoch": 0.868339670468948, + "grad_norm": 0.9430343508720398, + "learning_rate": 1.4601159570597033e-06, + "loss": 0.16817855834960938, + "step": 6423 + }, + { + "epoch": 0.868474862695395, + "grad_norm": 1.0615726709365845, + "learning_rate": 1.4571718214159795e-06, + "loss": 0.1956474781036377, + "step": 6424 + }, + { + "epoch": 0.868610054921842, + "grad_norm": 1.1551867723464966, + "learning_rate": 1.454230505461303e-06, + "loss": 0.1643831729888916, + "step": 6425 + }, + { + "epoch": 0.8687452471482889, + "grad_norm": 1.5699961185455322, + "learning_rate": 1.4512920098080672e-06, + "loss": 0.21731948852539062, + "step": 6426 + }, + { + "epoch": 0.8688804393747359, + "grad_norm": 1.7338488101959229, + "learning_rate": 1.4483563350680878e-06, + "loss": 0.1677560806274414, + "step": 6427 + }, + { + "epoch": 0.8690156316011829, + "grad_norm": 1.758483648300171, + "learning_rate": 1.4454234818525824e-06, + "loss": 0.2407245635986328, + "step": 6428 + }, + { + "epoch": 0.8691508238276299, + "grad_norm": 1.1380667686462402, + "learning_rate": 1.4424934507721926e-06, + "loss": 0.15755844116210938, + "step": 6429 + }, + { + "epoch": 0.8692860160540768, + "grad_norm": 1.7364165782928467, + "learning_rate": 1.4395662424369622e-06, + "loss": 0.19521808624267578, + "step": 6430 + }, + { + "epoch": 0.8694212082805238, + "grad_norm": 0.9872238636016846, + "learning_rate": 1.436641857456355e-06, + "loss": 0.12627077102661133, + "step": 6431 + }, + { + "epoch": 0.8695564005069708, + "grad_norm": 0.833183765411377, + "learning_rate": 1.4337202964392409e-06, + "loss": 0.18872451782226562, + "step": 6432 + }, + { + "epoch": 0.8696915927334178, + "grad_norm": 0.7500179409980774, + "learning_rate": 1.4308015599939033e-06, + "loss": 0.13712453842163086, + "step": 6433 + }, + { + "epoch": 0.8698267849598648, + "grad_norm": 1.0205035209655762, + "learning_rate": 1.4278856487280428e-06, + "loss": 0.2464752197265625, + "step": 6434 + }, + { + "epoch": 0.8699619771863117, + "grad_norm": 0.7680830359458923, + "learning_rate": 1.4249725632487653e-06, + "loss": 0.13248729705810547, + "step": 6435 + }, + { + "epoch": 0.8700971694127587, + "grad_norm": 0.9121467471122742, + "learning_rate": 1.4220623041625924e-06, + "loss": 0.122802734375, + "step": 6436 + }, + { + "epoch": 0.8702323616392057, + "grad_norm": 0.8302256464958191, + "learning_rate": 1.4191548720754527e-06, + "loss": 0.13457965850830078, + "step": 6437 + }, + { + "epoch": 0.8703675538656527, + "grad_norm": 1.1469273567199707, + "learning_rate": 1.4162502675926887e-06, + "loss": 0.1412487030029297, + "step": 6438 + }, + { + "epoch": 0.8705027460920997, + "grad_norm": 1.5004311800003052, + "learning_rate": 1.4133484913190596e-06, + "loss": 0.2181873321533203, + "step": 6439 + }, + { + "epoch": 0.8706379383185466, + "grad_norm": 0.7366278767585754, + "learning_rate": 1.4104495438587295e-06, + "loss": 0.13414216041564941, + "step": 6440 + }, + { + "epoch": 0.8707731305449936, + "grad_norm": 0.7611256241798401, + "learning_rate": 1.4075534258152667e-06, + "loss": 0.12714362144470215, + "step": 6441 + }, + { + "epoch": 0.8709083227714406, + "grad_norm": 0.927427351474762, + "learning_rate": 1.4046601377916673e-06, + "loss": 0.18413352966308594, + "step": 6442 + }, + { + "epoch": 0.8710435149978876, + "grad_norm": 1.1897666454315186, + "learning_rate": 1.4017696803903246e-06, + "loss": 0.1521916389465332, + "step": 6443 + }, + { + "epoch": 0.8711787072243345, + "grad_norm": 1.5434967279434204, + "learning_rate": 1.3988820542130504e-06, + "loss": 0.13733363151550293, + "step": 6444 + }, + { + "epoch": 0.8713138994507815, + "grad_norm": 0.9383054375648499, + "learning_rate": 1.395997259861067e-06, + "loss": 0.15607070922851562, + "step": 6445 + }, + { + "epoch": 0.8714490916772285, + "grad_norm": 0.9628801941871643, + "learning_rate": 1.3931152979349926e-06, + "loss": 0.11391592025756836, + "step": 6446 + }, + { + "epoch": 0.8715842839036756, + "grad_norm": 1.1624178886413574, + "learning_rate": 1.3902361690348769e-06, + "loss": 0.18275737762451172, + "step": 6447 + }, + { + "epoch": 0.8717194761301226, + "grad_norm": 1.4009239673614502, + "learning_rate": 1.3873598737601639e-06, + "loss": 0.21906280517578125, + "step": 6448 + }, + { + "epoch": 0.8718546683565696, + "grad_norm": 2.043870687484741, + "learning_rate": 1.3844864127097229e-06, + "loss": 0.1984386444091797, + "step": 6449 + }, + { + "epoch": 0.8719898605830165, + "grad_norm": 1.173125147819519, + "learning_rate": 1.3816157864818151e-06, + "loss": 0.18511009216308594, + "step": 6450 + }, + { + "epoch": 0.8721250528094635, + "grad_norm": 2.294313669204712, + "learning_rate": 1.3787479956741194e-06, + "loss": 0.2332448959350586, + "step": 6451 + }, + { + "epoch": 0.8722602450359105, + "grad_norm": 1.191086769104004, + "learning_rate": 1.3758830408837314e-06, + "loss": 0.17751431465148926, + "step": 6452 + }, + { + "epoch": 0.8723954372623575, + "grad_norm": 1.0550912618637085, + "learning_rate": 1.3730209227071439e-06, + "loss": 0.16355609893798828, + "step": 6453 + }, + { + "epoch": 0.8725306294888044, + "grad_norm": 0.7120527625083923, + "learning_rate": 1.3701616417402734e-06, + "loss": 0.14161574840545654, + "step": 6454 + }, + { + "epoch": 0.8726658217152514, + "grad_norm": 1.025841474533081, + "learning_rate": 1.367305198578429e-06, + "loss": 0.17511940002441406, + "step": 6455 + }, + { + "epoch": 0.8728010139416984, + "grad_norm": 0.7685655355453491, + "learning_rate": 1.36445159381634e-06, + "loss": 0.15527749061584473, + "step": 6456 + }, + { + "epoch": 0.8729362061681454, + "grad_norm": 0.9519075751304626, + "learning_rate": 1.361600828048144e-06, + "loss": 0.15294861793518066, + "step": 6457 + }, + { + "epoch": 0.8730713983945924, + "grad_norm": 1.369391679763794, + "learning_rate": 1.3587529018673816e-06, + "loss": 0.17380046844482422, + "step": 6458 + }, + { + "epoch": 0.8732065906210393, + "grad_norm": 1.509925127029419, + "learning_rate": 1.3559078158670152e-06, + "loss": 0.14191436767578125, + "step": 6459 + }, + { + "epoch": 0.8733417828474863, + "grad_norm": 0.9091055989265442, + "learning_rate": 1.353065570639394e-06, + "loss": 0.18527793884277344, + "step": 6460 + }, + { + "epoch": 0.8734769750739333, + "grad_norm": 1.8208065032958984, + "learning_rate": 1.3502261667763e-06, + "loss": 0.17343950271606445, + "step": 6461 + }, + { + "epoch": 0.8736121673003803, + "grad_norm": 1.1622000932693481, + "learning_rate": 1.3473896048689067e-06, + "loss": 0.1895294189453125, + "step": 6462 + }, + { + "epoch": 0.8737473595268272, + "grad_norm": 1.209180235862732, + "learning_rate": 1.3445558855078017e-06, + "loss": 0.22981643676757812, + "step": 6463 + }, + { + "epoch": 0.8738825517532742, + "grad_norm": 1.369468092918396, + "learning_rate": 1.3417250092829814e-06, + "loss": 0.1377144455909729, + "step": 6464 + }, + { + "epoch": 0.8740177439797212, + "grad_norm": 0.9143245816230774, + "learning_rate": 1.338896976783846e-06, + "loss": 0.10422086715698242, + "step": 6465 + }, + { + "epoch": 0.8741529362061682, + "grad_norm": 1.2839452028274536, + "learning_rate": 1.336071788599213e-06, + "loss": 0.14771032333374023, + "step": 6466 + }, + { + "epoch": 0.8742881284326152, + "grad_norm": 0.6701331734657288, + "learning_rate": 1.3332494453172982e-06, + "loss": 0.16073846817016602, + "step": 6467 + }, + { + "epoch": 0.8744233206590621, + "grad_norm": 2.085589647293091, + "learning_rate": 1.3304299475257287e-06, + "loss": 0.22716903686523438, + "step": 6468 + }, + { + "epoch": 0.8745585128855091, + "grad_norm": 1.4576971530914307, + "learning_rate": 1.3276132958115394e-06, + "loss": 0.14553475379943848, + "step": 6469 + }, + { + "epoch": 0.8746937051119561, + "grad_norm": 0.853722333908081, + "learning_rate": 1.32479949076117e-06, + "loss": 0.18283796310424805, + "step": 6470 + }, + { + "epoch": 0.8748288973384031, + "grad_norm": 0.9312347173690796, + "learning_rate": 1.3219885329604747e-06, + "loss": 0.1799945831298828, + "step": 6471 + }, + { + "epoch": 0.8749640895648501, + "grad_norm": 1.914143443107605, + "learning_rate": 1.319180422994709e-06, + "loss": 0.17462730407714844, + "step": 6472 + }, + { + "epoch": 0.875099281791297, + "grad_norm": 0.905396580696106, + "learning_rate": 1.3163751614485287e-06, + "loss": 0.14744949340820312, + "step": 6473 + }, + { + "epoch": 0.875234474017744, + "grad_norm": 0.9209718108177185, + "learning_rate": 1.3135727489060113e-06, + "loss": 0.12139225006103516, + "step": 6474 + }, + { + "epoch": 0.875369666244191, + "grad_norm": 0.7193505764007568, + "learning_rate": 1.3107731859506317e-06, + "loss": 0.1452922821044922, + "step": 6475 + }, + { + "epoch": 0.875504858470638, + "grad_norm": 1.2635722160339355, + "learning_rate": 1.3079764731652772e-06, + "loss": 0.15636277198791504, + "step": 6476 + }, + { + "epoch": 0.875640050697085, + "grad_norm": 1.4347875118255615, + "learning_rate": 1.3051826111322368e-06, + "loss": 0.22640037536621094, + "step": 6477 + }, + { + "epoch": 0.8757752429235319, + "grad_norm": 0.9692603945732117, + "learning_rate": 1.3023916004332021e-06, + "loss": 0.16889095306396484, + "step": 6478 + }, + { + "epoch": 0.8759104351499789, + "grad_norm": 0.5560367703437805, + "learning_rate": 1.2996034416492847e-06, + "loss": 0.12021636962890625, + "step": 6479 + }, + { + "epoch": 0.8760456273764259, + "grad_norm": 1.7467150688171387, + "learning_rate": 1.2968181353609854e-06, + "loss": 0.20093071460723877, + "step": 6480 + }, + { + "epoch": 0.8761808196028729, + "grad_norm": 0.7838498950004578, + "learning_rate": 1.2940356821482285e-06, + "loss": 0.15776348114013672, + "step": 6481 + }, + { + "epoch": 0.8763160118293198, + "grad_norm": 1.67617666721344, + "learning_rate": 1.291256082590334e-06, + "loss": 0.17545604705810547, + "step": 6482 + }, + { + "epoch": 0.8764512040557668, + "grad_norm": 0.7530940175056458, + "learning_rate": 1.2884793372660208e-06, + "loss": 0.13154888153076172, + "step": 6483 + }, + { + "epoch": 0.8765863962822138, + "grad_norm": 1.8822190761566162, + "learning_rate": 1.285705446753433e-06, + "loss": 0.14623165130615234, + "step": 6484 + }, + { + "epoch": 0.8767215885086608, + "grad_norm": 1.1215318441390991, + "learning_rate": 1.2829344116301e-06, + "loss": 0.13733911514282227, + "step": 6485 + }, + { + "epoch": 0.8768567807351078, + "grad_norm": 1.067826271057129, + "learning_rate": 1.2801662324729774e-06, + "loss": 0.1979503631591797, + "step": 6486 + }, + { + "epoch": 0.8769919729615547, + "grad_norm": 2.797069787979126, + "learning_rate": 1.2774009098584055e-06, + "loss": 0.19020843505859375, + "step": 6487 + }, + { + "epoch": 0.8771271651880017, + "grad_norm": 1.5129469633102417, + "learning_rate": 1.274638444362139e-06, + "loss": 0.1667652130126953, + "step": 6488 + }, + { + "epoch": 0.8772623574144487, + "grad_norm": 1.2476168870925903, + "learning_rate": 1.2718788365593443e-06, + "loss": 0.20147371292114258, + "step": 6489 + }, + { + "epoch": 0.8773975496408957, + "grad_norm": 0.6584613919258118, + "learning_rate": 1.26912208702458e-06, + "loss": 0.10057687759399414, + "step": 6490 + }, + { + "epoch": 0.8775327418673426, + "grad_norm": 0.9104622006416321, + "learning_rate": 1.2663681963318242e-06, + "loss": 0.14842987060546875, + "step": 6491 + }, + { + "epoch": 0.8776679340937896, + "grad_norm": 1.123921513557434, + "learning_rate": 1.2636171650544443e-06, + "loss": 0.12040328979492188, + "step": 6492 + }, + { + "epoch": 0.8778031263202366, + "grad_norm": 1.5896222591400146, + "learning_rate": 1.260868993765219e-06, + "loss": 0.1627359390258789, + "step": 6493 + }, + { + "epoch": 0.8779383185466836, + "grad_norm": 0.8061502575874329, + "learning_rate": 1.258123683036339e-06, + "loss": 0.14883995056152344, + "step": 6494 + }, + { + "epoch": 0.8780735107731306, + "grad_norm": 1.2326322793960571, + "learning_rate": 1.2553812334393872e-06, + "loss": 0.1844642162322998, + "step": 6495 + }, + { + "epoch": 0.8782087029995775, + "grad_norm": 0.980921745300293, + "learning_rate": 1.2526416455453582e-06, + "loss": 0.18453216552734375, + "step": 6496 + }, + { + "epoch": 0.8783438952260245, + "grad_norm": 1.2596570253372192, + "learning_rate": 1.249904919924646e-06, + "loss": 0.14360570907592773, + "step": 6497 + }, + { + "epoch": 0.8784790874524715, + "grad_norm": 1.179748296737671, + "learning_rate": 1.2471710571470579e-06, + "loss": 0.18545126914978027, + "step": 6498 + }, + { + "epoch": 0.8786142796789185, + "grad_norm": 1.4292460680007935, + "learning_rate": 1.2444400577817922e-06, + "loss": 0.1529979705810547, + "step": 6499 + }, + { + "epoch": 0.8787494719053655, + "grad_norm": 0.7834233641624451, + "learning_rate": 1.2417119223974621e-06, + "loss": 0.1285996437072754, + "step": 6500 + }, + { + "epoch": 0.8788846641318124, + "grad_norm": 1.0200555324554443, + "learning_rate": 1.2389866515620768e-06, + "loss": 0.1552438735961914, + "step": 6501 + }, + { + "epoch": 0.8790198563582594, + "grad_norm": 1.1155558824539185, + "learning_rate": 1.2362642458430505e-06, + "loss": 0.1605844497680664, + "step": 6502 + }, + { + "epoch": 0.8791550485847064, + "grad_norm": 1.4928619861602783, + "learning_rate": 1.2335447058072103e-06, + "loss": 0.1800251007080078, + "step": 6503 + }, + { + "epoch": 0.8792902408111534, + "grad_norm": 0.7532255053520203, + "learning_rate": 1.230828032020771e-06, + "loss": 0.15865063667297363, + "step": 6504 + }, + { + "epoch": 0.8794254330376003, + "grad_norm": 1.2514050006866455, + "learning_rate": 1.2281142250493638e-06, + "loss": 0.18199777603149414, + "step": 6505 + }, + { + "epoch": 0.8795606252640473, + "grad_norm": 1.4715584516525269, + "learning_rate": 1.225403285458015e-06, + "loss": 0.23967552185058594, + "step": 6506 + }, + { + "epoch": 0.8796958174904943, + "grad_norm": 1.1212742328643799, + "learning_rate": 1.2226952138111546e-06, + "loss": 0.1686382293701172, + "step": 6507 + }, + { + "epoch": 0.8798310097169413, + "grad_norm": 0.8724827170372009, + "learning_rate": 1.219990010672622e-06, + "loss": 0.1712191104888916, + "step": 6508 + }, + { + "epoch": 0.8799662019433883, + "grad_norm": 1.4848971366882324, + "learning_rate": 1.2172876766056562e-06, + "loss": 0.1912059783935547, + "step": 6509 + }, + { + "epoch": 0.8801013941698352, + "grad_norm": 1.0990840196609497, + "learning_rate": 1.2145882121728906e-06, + "loss": 0.15616416931152344, + "step": 6510 + }, + { + "epoch": 0.8802365863962822, + "grad_norm": 0.9132286906242371, + "learning_rate": 1.2118916179363727e-06, + "loss": 0.09194135665893555, + "step": 6511 + }, + { + "epoch": 0.8803717786227292, + "grad_norm": 0.6504778265953064, + "learning_rate": 1.209197894457546e-06, + "loss": 0.1567840576171875, + "step": 6512 + }, + { + "epoch": 0.8805069708491762, + "grad_norm": 1.0683306455612183, + "learning_rate": 1.2065070422972606e-06, + "loss": 0.1783415675163269, + "step": 6513 + }, + { + "epoch": 0.8806421630756232, + "grad_norm": 1.5259732007980347, + "learning_rate": 1.2038190620157685e-06, + "loss": 0.1909313201904297, + "step": 6514 + }, + { + "epoch": 0.8807773553020701, + "grad_norm": 1.4287910461425781, + "learning_rate": 1.2011339541727117e-06, + "loss": 0.20134520530700684, + "step": 6515 + }, + { + "epoch": 0.8809125475285171, + "grad_norm": 1.361433506011963, + "learning_rate": 1.198451719327155e-06, + "loss": 0.131264328956604, + "step": 6516 + }, + { + "epoch": 0.8810477397549641, + "grad_norm": 0.9544827938079834, + "learning_rate": 1.1957723580375447e-06, + "loss": 0.20270538330078125, + "step": 6517 + }, + { + "epoch": 0.8811829319814111, + "grad_norm": 1.1416447162628174, + "learning_rate": 1.193095870861748e-06, + "loss": 0.22045516967773438, + "step": 6518 + }, + { + "epoch": 0.881318124207858, + "grad_norm": 1.3852561712265015, + "learning_rate": 1.1904222583570156e-06, + "loss": 0.14323043823242188, + "step": 6519 + }, + { + "epoch": 0.881453316434305, + "grad_norm": 1.7089171409606934, + "learning_rate": 1.1877515210800077e-06, + "loss": 0.1660609245300293, + "step": 6520 + }, + { + "epoch": 0.881588508660752, + "grad_norm": 0.8545126914978027, + "learning_rate": 1.1850836595867925e-06, + "loss": 0.15291047096252441, + "step": 6521 + }, + { + "epoch": 0.881723700887199, + "grad_norm": 1.2741386890411377, + "learning_rate": 1.1824186744328259e-06, + "loss": 0.18982505798339844, + "step": 6522 + }, + { + "epoch": 0.881858893113646, + "grad_norm": 0.9314596056938171, + "learning_rate": 1.179756566172982e-06, + "loss": 0.1536264419555664, + "step": 6523 + }, + { + "epoch": 0.8819940853400929, + "grad_norm": 1.5855191946029663, + "learning_rate": 1.177097335361516e-06, + "loss": 0.2144012451171875, + "step": 6524 + }, + { + "epoch": 0.8821292775665399, + "grad_norm": 0.6500311493873596, + "learning_rate": 1.1744409825520969e-06, + "loss": 0.10776782035827637, + "step": 6525 + }, + { + "epoch": 0.8822644697929869, + "grad_norm": 0.8030337691307068, + "learning_rate": 1.171787508297792e-06, + "loss": 0.15600013732910156, + "step": 6526 + }, + { + "epoch": 0.8823996620194339, + "grad_norm": 1.2796467542648315, + "learning_rate": 1.1691369131510676e-06, + "loss": 0.15424847602844238, + "step": 6527 + }, + { + "epoch": 0.8825348542458809, + "grad_norm": 0.7039836645126343, + "learning_rate": 1.1664891976637992e-06, + "loss": 0.1290445327758789, + "step": 6528 + }, + { + "epoch": 0.8826700464723278, + "grad_norm": 1.9667648077011108, + "learning_rate": 1.1638443623872442e-06, + "loss": 0.22120535373687744, + "step": 6529 + }, + { + "epoch": 0.8828052386987748, + "grad_norm": 0.7460547089576721, + "learning_rate": 1.1612024078720752e-06, + "loss": 0.13184571266174316, + "step": 6530 + }, + { + "epoch": 0.8829404309252218, + "grad_norm": 0.9625377655029297, + "learning_rate": 1.1585633346683655e-06, + "loss": 0.14002609252929688, + "step": 6531 + }, + { + "epoch": 0.8830756231516688, + "grad_norm": 1.9665478467941284, + "learning_rate": 1.155927143325579e-06, + "loss": 0.16969597339630127, + "step": 6532 + }, + { + "epoch": 0.8832108153781157, + "grad_norm": 1.0385044813156128, + "learning_rate": 1.1532938343925887e-06, + "loss": 0.17267227172851562, + "step": 6533 + }, + { + "epoch": 0.8833460076045627, + "grad_norm": 1.2857882976531982, + "learning_rate": 1.1506634084176587e-06, + "loss": 0.162506103515625, + "step": 6534 + }, + { + "epoch": 0.8834811998310097, + "grad_norm": 0.7613010406494141, + "learning_rate": 1.148035865948463e-06, + "loss": 0.13284587860107422, + "step": 6535 + }, + { + "epoch": 0.8836163920574567, + "grad_norm": 1.1295785903930664, + "learning_rate": 1.1454112075320688e-06, + "loss": 0.16387176513671875, + "step": 6536 + }, + { + "epoch": 0.8837515842839037, + "grad_norm": 0.8276877999305725, + "learning_rate": 1.1427894337149426e-06, + "loss": 0.1426563262939453, + "step": 6537 + }, + { + "epoch": 0.8838867765103506, + "grad_norm": 1.4408518075942993, + "learning_rate": 1.1401705450429506e-06, + "loss": 0.1465773582458496, + "step": 6538 + }, + { + "epoch": 0.8840219687367976, + "grad_norm": 1.148671269416809, + "learning_rate": 1.1375545420613586e-06, + "loss": 0.1928114891052246, + "step": 6539 + }, + { + "epoch": 0.8841571609632446, + "grad_norm": 0.7789602875709534, + "learning_rate": 1.1349414253148377e-06, + "loss": 0.17882466316223145, + "step": 6540 + }, + { + "epoch": 0.8842923531896916, + "grad_norm": 1.3094373941421509, + "learning_rate": 1.1323311953474524e-06, + "loss": 0.23018646240234375, + "step": 6541 + }, + { + "epoch": 0.8844275454161385, + "grad_norm": 1.290117859840393, + "learning_rate": 1.1297238527026582e-06, + "loss": 0.20974111557006836, + "step": 6542 + }, + { + "epoch": 0.8845627376425855, + "grad_norm": 0.8406637907028198, + "learning_rate": 1.1271193979233258e-06, + "loss": 0.14974021911621094, + "step": 6543 + }, + { + "epoch": 0.8846979298690325, + "grad_norm": 1.1002711057662964, + "learning_rate": 1.1245178315517113e-06, + "loss": 0.19750595092773438, + "step": 6544 + }, + { + "epoch": 0.8848331220954795, + "grad_norm": 0.8967341184616089, + "learning_rate": 1.1219191541294798e-06, + "loss": 0.13075494766235352, + "step": 6545 + }, + { + "epoch": 0.8849683143219265, + "grad_norm": 0.8628976345062256, + "learning_rate": 1.1193233661976887e-06, + "loss": 0.1522972583770752, + "step": 6546 + }, + { + "epoch": 0.8851035065483734, + "grad_norm": 2.2426846027374268, + "learning_rate": 1.1167304682967904e-06, + "loss": 0.22336578369140625, + "step": 6547 + }, + { + "epoch": 0.8852386987748204, + "grad_norm": 0.8924522995948792, + "learning_rate": 1.114140460966645e-06, + "loss": 0.16767597198486328, + "step": 6548 + }, + { + "epoch": 0.8853738910012674, + "grad_norm": 0.9396560788154602, + "learning_rate": 1.111553344746501e-06, + "loss": 0.17200660705566406, + "step": 6549 + }, + { + "epoch": 0.8855090832277144, + "grad_norm": 0.8493363857269287, + "learning_rate": 1.1089691201750174e-06, + "loss": 0.14303302764892578, + "step": 6550 + }, + { + "epoch": 0.8856442754541614, + "grad_norm": 1.7896114587783813, + "learning_rate": 1.106387787790239e-06, + "loss": 0.19734787940979004, + "step": 6551 + }, + { + "epoch": 0.8857794676806083, + "grad_norm": 1.8230129480361938, + "learning_rate": 1.1038093481296091e-06, + "loss": 0.24407577514648438, + "step": 6552 + }, + { + "epoch": 0.8859146599070553, + "grad_norm": 1.031961441040039, + "learning_rate": 1.10123380172998e-06, + "loss": 0.1483926773071289, + "step": 6553 + }, + { + "epoch": 0.8860498521335023, + "grad_norm": 1.0547319650650024, + "learning_rate": 1.098661149127586e-06, + "loss": 0.16070556640625, + "step": 6554 + }, + { + "epoch": 0.8861850443599493, + "grad_norm": 0.7637943625450134, + "learning_rate": 1.0960913908580788e-06, + "loss": 0.12938213348388672, + "step": 6555 + }, + { + "epoch": 0.8863202365863962, + "grad_norm": 1.0752573013305664, + "learning_rate": 1.0935245274564852e-06, + "loss": 0.16737616062164307, + "step": 6556 + }, + { + "epoch": 0.8864554288128432, + "grad_norm": 0.7607343196868896, + "learning_rate": 1.0909605594572413e-06, + "loss": 0.1563572883605957, + "step": 6557 + }, + { + "epoch": 0.8865906210392902, + "grad_norm": 0.940025806427002, + "learning_rate": 1.0883994873941816e-06, + "loss": 0.17044353485107422, + "step": 6558 + }, + { + "epoch": 0.8867258132657372, + "grad_norm": 1.5754863023757935, + "learning_rate": 1.0858413118005345e-06, + "loss": 0.1802845001220703, + "step": 6559 + }, + { + "epoch": 0.8868610054921842, + "grad_norm": 0.9095927476882935, + "learning_rate": 1.0832860332089288e-06, + "loss": 0.1260828971862793, + "step": 6560 + }, + { + "epoch": 0.8869961977186311, + "grad_norm": 2.0084259510040283, + "learning_rate": 1.0807336521513828e-06, + "loss": 0.17546653747558594, + "step": 6561 + }, + { + "epoch": 0.8871313899450781, + "grad_norm": 1.382297158241272, + "learning_rate": 1.0781841691593142e-06, + "loss": 0.20831632614135742, + "step": 6562 + }, + { + "epoch": 0.8872665821715251, + "grad_norm": 0.7710789442062378, + "learning_rate": 1.0756375847635435e-06, + "loss": 0.13666105270385742, + "step": 6563 + }, + { + "epoch": 0.8874017743979721, + "grad_norm": 0.8217912912368774, + "learning_rate": 1.0730938994942818e-06, + "loss": 0.11766195297241211, + "step": 6564 + }, + { + "epoch": 0.887536966624419, + "grad_norm": 1.270566701889038, + "learning_rate": 1.070553113881137e-06, + "loss": 0.11794900894165039, + "step": 6565 + }, + { + "epoch": 0.887672158850866, + "grad_norm": 1.6255724430084229, + "learning_rate": 1.0680152284531158e-06, + "loss": 0.19935989379882812, + "step": 6566 + }, + { + "epoch": 0.887807351077313, + "grad_norm": 1.008938193321228, + "learning_rate": 1.0654802437386157e-06, + "loss": 0.2162036895751953, + "step": 6567 + }, + { + "epoch": 0.88794254330376, + "grad_norm": 2.335869312286377, + "learning_rate": 1.062948160265438e-06, + "loss": 0.17975997924804688, + "step": 6568 + }, + { + "epoch": 0.888077735530207, + "grad_norm": 1.128427267074585, + "learning_rate": 1.0604189785607772e-06, + "loss": 0.17583847045898438, + "step": 6569 + }, + { + "epoch": 0.888212927756654, + "grad_norm": 1.059628963470459, + "learning_rate": 1.0578926991512171e-06, + "loss": 0.14550495147705078, + "step": 6570 + }, + { + "epoch": 0.8883481199831009, + "grad_norm": 0.9011848568916321, + "learning_rate": 1.0553693225627458e-06, + "loss": 0.15854835510253906, + "step": 6571 + }, + { + "epoch": 0.8884833122095479, + "grad_norm": 0.9133105874061584, + "learning_rate": 1.0528488493207444e-06, + "loss": 0.1428055763244629, + "step": 6572 + }, + { + "epoch": 0.8886185044359949, + "grad_norm": 1.562174677848816, + "learning_rate": 1.0503312799499898e-06, + "loss": 0.18050241470336914, + "step": 6573 + }, + { + "epoch": 0.8887536966624419, + "grad_norm": 1.0124595165252686, + "learning_rate": 1.0478166149746476e-06, + "loss": 0.17652177810668945, + "step": 6574 + }, + { + "epoch": 0.8888888888888888, + "grad_norm": 1.5915160179138184, + "learning_rate": 1.0453048549182892e-06, + "loss": 0.1987318992614746, + "step": 6575 + }, + { + "epoch": 0.8890240811153358, + "grad_norm": 1.004435658454895, + "learning_rate": 1.0427960003038744e-06, + "loss": 0.14862060546875, + "step": 6576 + }, + { + "epoch": 0.8891592733417828, + "grad_norm": 0.7279787659645081, + "learning_rate": 1.040290051653764e-06, + "loss": 0.12130200862884521, + "step": 6577 + }, + { + "epoch": 0.8892944655682298, + "grad_norm": 1.5734901428222656, + "learning_rate": 1.0377870094897085e-06, + "loss": 0.19864702224731445, + "step": 6578 + }, + { + "epoch": 0.8894296577946768, + "grad_norm": 1.0018874406814575, + "learning_rate": 1.0352868743328497e-06, + "loss": 0.12802064418792725, + "step": 6579 + }, + { + "epoch": 0.8895648500211237, + "grad_norm": 2.0887367725372314, + "learning_rate": 1.032789646703733e-06, + "loss": 0.18796443939208984, + "step": 6580 + }, + { + "epoch": 0.8897000422475707, + "grad_norm": 0.7076752185821533, + "learning_rate": 1.0302953271222938e-06, + "loss": 0.13414430618286133, + "step": 6581 + }, + { + "epoch": 0.8898352344740177, + "grad_norm": 0.9663779735565186, + "learning_rate": 1.0278039161078634e-06, + "loss": 0.21762943267822266, + "step": 6582 + }, + { + "epoch": 0.8899704267004648, + "grad_norm": 0.9245215058326721, + "learning_rate": 1.0253154141791705e-06, + "loss": 0.16146814823150635, + "step": 6583 + }, + { + "epoch": 0.8901056189269118, + "grad_norm": 0.6266096234321594, + "learning_rate": 1.0228298218543253e-06, + "loss": 0.12328863143920898, + "step": 6584 + }, + { + "epoch": 0.8902408111533587, + "grad_norm": 0.831498384475708, + "learning_rate": 1.020347139650849e-06, + "loss": 0.15243816375732422, + "step": 6585 + }, + { + "epoch": 0.8903760033798057, + "grad_norm": 0.944320023059845, + "learning_rate": 1.0178673680856448e-06, + "loss": 0.1877889633178711, + "step": 6586 + }, + { + "epoch": 0.8905111956062527, + "grad_norm": 0.9225878715515137, + "learning_rate": 1.0153905076750196e-06, + "loss": 0.1655750274658203, + "step": 6587 + }, + { + "epoch": 0.8906463878326997, + "grad_norm": 0.7733673453330994, + "learning_rate": 1.0129165589346644e-06, + "loss": 0.17371702194213867, + "step": 6588 + }, + { + "epoch": 0.8907815800591466, + "grad_norm": 1.5543419122695923, + "learning_rate": 1.0104455223796688e-06, + "loss": 0.16976165771484375, + "step": 6589 + }, + { + "epoch": 0.8909167722855936, + "grad_norm": 1.9064877033233643, + "learning_rate": 1.0079773985245178e-06, + "loss": 0.1680774688720703, + "step": 6590 + }, + { + "epoch": 0.8910519645120406, + "grad_norm": 1.4115545749664307, + "learning_rate": 1.0055121878830837e-06, + "loss": 0.2322063446044922, + "step": 6591 + }, + { + "epoch": 0.8911871567384876, + "grad_norm": 1.4145376682281494, + "learning_rate": 1.0030498909686458e-06, + "loss": 0.20521163940429688, + "step": 6592 + }, + { + "epoch": 0.8913223489649346, + "grad_norm": 1.4466968774795532, + "learning_rate": 1.0005905082938593e-06, + "loss": 0.17391395568847656, + "step": 6593 + }, + { + "epoch": 0.8914575411913815, + "grad_norm": 0.7907772660255432, + "learning_rate": 9.981340403707794e-07, + "loss": 0.14064311981201172, + "step": 6594 + }, + { + "epoch": 0.8915927334178285, + "grad_norm": 0.6483770608901978, + "learning_rate": 9.956804877108638e-07, + "loss": 0.08921170234680176, + "step": 6595 + }, + { + "epoch": 0.8917279256442755, + "grad_norm": 1.345627784729004, + "learning_rate": 9.932298508249488e-07, + "loss": 0.15694713592529297, + "step": 6596 + }, + { + "epoch": 0.8918631178707225, + "grad_norm": 0.9512044787406921, + "learning_rate": 9.907821302232729e-07, + "loss": 0.18037033081054688, + "step": 6597 + }, + { + "epoch": 0.8919983100971695, + "grad_norm": 0.7375819087028503, + "learning_rate": 9.883373264154633e-07, + "loss": 0.15340042114257812, + "step": 6598 + }, + { + "epoch": 0.8921335023236164, + "grad_norm": 1.1011661291122437, + "learning_rate": 9.858954399105397e-07, + "loss": 0.16709041595458984, + "step": 6599 + }, + { + "epoch": 0.8922686945500634, + "grad_norm": 0.9979453682899475, + "learning_rate": 9.834564712169202e-07, + "loss": 0.19518184661865234, + "step": 6600 + }, + { + "epoch": 0.8924038867765104, + "grad_norm": 1.0354499816894531, + "learning_rate": 9.81020420842409e-07, + "loss": 0.16480469703674316, + "step": 6601 + }, + { + "epoch": 0.8925390790029574, + "grad_norm": 0.8557064533233643, + "learning_rate": 9.785872892942033e-07, + "loss": 0.15523147583007812, + "step": 6602 + }, + { + "epoch": 0.8926742712294043, + "grad_norm": 0.6182724833488464, + "learning_rate": 9.761570770788964e-07, + "loss": 0.1368236541748047, + "step": 6603 + }, + { + "epoch": 0.8928094634558513, + "grad_norm": 1.4018851518630981, + "learning_rate": 9.737297847024685e-07, + "loss": 0.20059490203857422, + "step": 6604 + }, + { + "epoch": 0.8929446556822983, + "grad_norm": 1.0296344757080078, + "learning_rate": 9.713054126702968e-07, + "loss": 0.19029617309570312, + "step": 6605 + }, + { + "epoch": 0.8930798479087453, + "grad_norm": 1.1691275835037231, + "learning_rate": 9.688839614871497e-07, + "loss": 0.1652843952178955, + "step": 6606 + }, + { + "epoch": 0.8932150401351923, + "grad_norm": 1.9045733213424683, + "learning_rate": 9.664654316571852e-07, + "loss": 0.23373985290527344, + "step": 6607 + }, + { + "epoch": 0.8933502323616392, + "grad_norm": 1.368884563446045, + "learning_rate": 9.640498236839507e-07, + "loss": 0.1771697998046875, + "step": 6608 + }, + { + "epoch": 0.8934854245880862, + "grad_norm": 0.8743281960487366, + "learning_rate": 9.616371380703953e-07, + "loss": 0.18822669982910156, + "step": 6609 + }, + { + "epoch": 0.8936206168145332, + "grad_norm": 0.8311845660209656, + "learning_rate": 9.592273753188507e-07, + "loss": 0.14622116088867188, + "step": 6610 + }, + { + "epoch": 0.8937558090409802, + "grad_norm": 1.4363242387771606, + "learning_rate": 9.568205359310372e-07, + "loss": 0.15888357162475586, + "step": 6611 + }, + { + "epoch": 0.8938910012674272, + "grad_norm": 1.1034234762191772, + "learning_rate": 9.544166204080772e-07, + "loss": 0.1808767318725586, + "step": 6612 + }, + { + "epoch": 0.8940261934938741, + "grad_norm": 0.9408245086669922, + "learning_rate": 9.520156292504739e-07, + "loss": 0.1525440216064453, + "step": 6613 + }, + { + "epoch": 0.8941613857203211, + "grad_norm": 2.0564417839050293, + "learning_rate": 9.496175629581322e-07, + "loss": 0.17179840803146362, + "step": 6614 + }, + { + "epoch": 0.8942965779467681, + "grad_norm": 1.7813953161239624, + "learning_rate": 9.472224220303427e-07, + "loss": 0.19536590576171875, + "step": 6615 + }, + { + "epoch": 0.8944317701732151, + "grad_norm": 0.8127740621566772, + "learning_rate": 9.448302069657799e-07, + "loss": 0.1358175277709961, + "step": 6616 + }, + { + "epoch": 0.894566962399662, + "grad_norm": 1.2911267280578613, + "learning_rate": 9.424409182625205e-07, + "loss": 0.15685749053955078, + "step": 6617 + }, + { + "epoch": 0.894702154626109, + "grad_norm": 1.1219089031219482, + "learning_rate": 9.40054556418023e-07, + "loss": 0.22308731079101562, + "step": 6618 + }, + { + "epoch": 0.894837346852556, + "grad_norm": 1.0438556671142578, + "learning_rate": 9.376711219291483e-07, + "loss": 0.1798267364501953, + "step": 6619 + }, + { + "epoch": 0.894972539079003, + "grad_norm": 1.3127247095108032, + "learning_rate": 9.352906152921348e-07, + "loss": 0.16666698455810547, + "step": 6620 + }, + { + "epoch": 0.89510773130545, + "grad_norm": 0.877829909324646, + "learning_rate": 9.32913037002614e-07, + "loss": 0.1724621057510376, + "step": 6621 + }, + { + "epoch": 0.8952429235318969, + "grad_norm": 0.7444975972175598, + "learning_rate": 9.30538387555615e-07, + "loss": 0.1267385482788086, + "step": 6622 + }, + { + "epoch": 0.8953781157583439, + "grad_norm": 2.1655256748199463, + "learning_rate": 9.281666674455508e-07, + "loss": 0.2221851348876953, + "step": 6623 + }, + { + "epoch": 0.8955133079847909, + "grad_norm": 1.118656873703003, + "learning_rate": 9.257978771662295e-07, + "loss": 0.17228031158447266, + "step": 6624 + }, + { + "epoch": 0.8956485002112379, + "grad_norm": 1.2261126041412354, + "learning_rate": 9.234320172108418e-07, + "loss": 0.17986488342285156, + "step": 6625 + }, + { + "epoch": 0.8957836924376849, + "grad_norm": 0.998991847038269, + "learning_rate": 9.210690880719719e-07, + "loss": 0.17502212524414062, + "step": 6626 + }, + { + "epoch": 0.8959188846641318, + "grad_norm": 1.0733058452606201, + "learning_rate": 9.187090902415962e-07, + "loss": 0.19030380249023438, + "step": 6627 + }, + { + "epoch": 0.8960540768905788, + "grad_norm": 1.1971570253372192, + "learning_rate": 9.163520242110784e-07, + "loss": 0.16012287139892578, + "step": 6628 + }, + { + "epoch": 0.8961892691170258, + "grad_norm": 1.0565327405929565, + "learning_rate": 9.13997890471176e-07, + "loss": 0.1644878387451172, + "step": 6629 + }, + { + "epoch": 0.8963244613434728, + "grad_norm": 0.9064598083496094, + "learning_rate": 9.116466895120251e-07, + "loss": 0.1627044677734375, + "step": 6630 + }, + { + "epoch": 0.8964596535699197, + "grad_norm": 0.8583405017852783, + "learning_rate": 9.092984218231609e-07, + "loss": 0.15228843688964844, + "step": 6631 + }, + { + "epoch": 0.8965948457963667, + "grad_norm": 1.5336501598358154, + "learning_rate": 9.069530878935072e-07, + "loss": 0.22105026245117188, + "step": 6632 + }, + { + "epoch": 0.8967300380228137, + "grad_norm": 1.000156044960022, + "learning_rate": 9.046106882113753e-07, + "loss": 0.1772022247314453, + "step": 6633 + }, + { + "epoch": 0.8968652302492607, + "grad_norm": 0.9172965288162231, + "learning_rate": 9.022712232644631e-07, + "loss": 0.1481032371520996, + "step": 6634 + }, + { + "epoch": 0.8970004224757077, + "grad_norm": 0.7959843873977661, + "learning_rate": 8.999346935398611e-07, + "loss": 0.15102195739746094, + "step": 6635 + }, + { + "epoch": 0.8971356147021546, + "grad_norm": 1.2164933681488037, + "learning_rate": 8.976010995240436e-07, + "loss": 0.14605331420898438, + "step": 6636 + }, + { + "epoch": 0.8972708069286016, + "grad_norm": 1.1046253442764282, + "learning_rate": 8.952704417028818e-07, + "loss": 0.1568288803100586, + "step": 6637 + }, + { + "epoch": 0.8974059991550486, + "grad_norm": 1.0943857431411743, + "learning_rate": 8.929427205616308e-07, + "loss": 0.1493082046508789, + "step": 6638 + }, + { + "epoch": 0.8975411913814956, + "grad_norm": 1.0855458974838257, + "learning_rate": 8.906179365849332e-07, + "loss": 0.2113513946533203, + "step": 6639 + }, + { + "epoch": 0.8976763836079426, + "grad_norm": 1.7264057397842407, + "learning_rate": 8.882960902568216e-07, + "loss": 0.19024419784545898, + "step": 6640 + }, + { + "epoch": 0.8978115758343895, + "grad_norm": 1.164091944694519, + "learning_rate": 8.85977182060716e-07, + "loss": 0.18418312072753906, + "step": 6641 + }, + { + "epoch": 0.8979467680608365, + "grad_norm": 2.2955360412597656, + "learning_rate": 8.836612124794285e-07, + "loss": 0.20495223999023438, + "step": 6642 + }, + { + "epoch": 0.8980819602872835, + "grad_norm": 0.7050706744194031, + "learning_rate": 8.813481819951502e-07, + "loss": 0.15507221221923828, + "step": 6643 + }, + { + "epoch": 0.8982171525137305, + "grad_norm": 0.8720549941062927, + "learning_rate": 8.790380910894724e-07, + "loss": 0.18284130096435547, + "step": 6644 + }, + { + "epoch": 0.8983523447401774, + "grad_norm": 1.4186044931411743, + "learning_rate": 8.767309402433671e-07, + "loss": 0.1862649917602539, + "step": 6645 + }, + { + "epoch": 0.8984875369666244, + "grad_norm": 1.249140739440918, + "learning_rate": 8.744267299371917e-07, + "loss": 0.22250747680664062, + "step": 6646 + }, + { + "epoch": 0.8986227291930714, + "grad_norm": 1.2052971124649048, + "learning_rate": 8.721254606507023e-07, + "loss": 0.23142433166503906, + "step": 6647 + }, + { + "epoch": 0.8987579214195184, + "grad_norm": 1.3691017627716064, + "learning_rate": 8.698271328630275e-07, + "loss": 0.14511680603027344, + "step": 6648 + }, + { + "epoch": 0.8988931136459654, + "grad_norm": 2.337817668914795, + "learning_rate": 8.675317470526961e-07, + "loss": 0.20157241821289062, + "step": 6649 + }, + { + "epoch": 0.8990283058724123, + "grad_norm": 1.4767838716506958, + "learning_rate": 8.652393036976159e-07, + "loss": 0.20978212356567383, + "step": 6650 + }, + { + "epoch": 0.8991634980988593, + "grad_norm": 0.9995988607406616, + "learning_rate": 8.629498032750916e-07, + "loss": 0.17191696166992188, + "step": 6651 + }, + { + "epoch": 0.8992986903253063, + "grad_norm": 1.2044475078582764, + "learning_rate": 8.606632462618069e-07, + "loss": 0.1571044921875, + "step": 6652 + }, + { + "epoch": 0.8994338825517533, + "grad_norm": 1.2899765968322754, + "learning_rate": 8.583796331338311e-07, + "loss": 0.16674983501434326, + "step": 6653 + }, + { + "epoch": 0.8995690747782003, + "grad_norm": 1.4176361560821533, + "learning_rate": 8.560989643666306e-07, + "loss": 0.215118408203125, + "step": 6654 + }, + { + "epoch": 0.8997042670046472, + "grad_norm": 0.7933415174484253, + "learning_rate": 8.538212404350471e-07, + "loss": 0.1616649627685547, + "step": 6655 + }, + { + "epoch": 0.8998394592310942, + "grad_norm": 1.0810478925704956, + "learning_rate": 8.515464618133228e-07, + "loss": 0.18869686126708984, + "step": 6656 + }, + { + "epoch": 0.8999746514575412, + "grad_norm": 1.2443526983261108, + "learning_rate": 8.492746289750725e-07, + "loss": 0.19379818439483643, + "step": 6657 + }, + { + "epoch": 0.9001098436839882, + "grad_norm": 0.8801183104515076, + "learning_rate": 8.470057423933026e-07, + "loss": 0.1304638385772705, + "step": 6658 + }, + { + "epoch": 0.9002450359104351, + "grad_norm": 1.096293568611145, + "learning_rate": 8.447398025404118e-07, + "loss": 0.15602248907089233, + "step": 6659 + }, + { + "epoch": 0.9003802281368821, + "grad_norm": 1.0055476427078247, + "learning_rate": 8.42476809888178e-07, + "loss": 0.17836523056030273, + "step": 6660 + }, + { + "epoch": 0.9005154203633291, + "grad_norm": 1.208188772201538, + "learning_rate": 8.402167649077725e-07, + "loss": 0.17304229736328125, + "step": 6661 + }, + { + "epoch": 0.9006506125897761, + "grad_norm": 1.6339181661605835, + "learning_rate": 8.379596680697454e-07, + "loss": 0.18050909042358398, + "step": 6662 + }, + { + "epoch": 0.9007858048162231, + "grad_norm": 0.986391007900238, + "learning_rate": 8.357055198440328e-07, + "loss": 0.1651301383972168, + "step": 6663 + }, + { + "epoch": 0.90092099704267, + "grad_norm": 1.594878077507019, + "learning_rate": 8.334543206999673e-07, + "loss": 0.1588139533996582, + "step": 6664 + }, + { + "epoch": 0.901056189269117, + "grad_norm": 1.9142035245895386, + "learning_rate": 8.312060711062558e-07, + "loss": 0.23285150527954102, + "step": 6665 + }, + { + "epoch": 0.901191381495564, + "grad_norm": 1.1975116729736328, + "learning_rate": 8.289607715309988e-07, + "loss": 0.15429186820983887, + "step": 6666 + }, + { + "epoch": 0.901326573722011, + "grad_norm": 1.0255736112594604, + "learning_rate": 8.267184224416791e-07, + "loss": 0.18127059936523438, + "step": 6667 + }, + { + "epoch": 0.901461765948458, + "grad_norm": 1.7517188787460327, + "learning_rate": 8.244790243051614e-07, + "loss": 0.19010353088378906, + "step": 6668 + }, + { + "epoch": 0.9015969581749049, + "grad_norm": 1.193472146987915, + "learning_rate": 8.222425775877079e-07, + "loss": 0.2161407470703125, + "step": 6669 + }, + { + "epoch": 0.9017321504013519, + "grad_norm": 1.3372970819473267, + "learning_rate": 8.200090827549527e-07, + "loss": 0.2160816192626953, + "step": 6670 + }, + { + "epoch": 0.9018673426277989, + "grad_norm": 0.7680690288543701, + "learning_rate": 8.17778540271924e-07, + "loss": 0.14060938358306885, + "step": 6671 + }, + { + "epoch": 0.9020025348542459, + "grad_norm": 1.1983314752578735, + "learning_rate": 8.155509506030334e-07, + "loss": 0.20907354354858398, + "step": 6672 + }, + { + "epoch": 0.9021377270806928, + "grad_norm": 0.630081057548523, + "learning_rate": 8.133263142120717e-07, + "loss": 0.12068510055541992, + "step": 6673 + }, + { + "epoch": 0.9022729193071398, + "grad_norm": 1.5676183700561523, + "learning_rate": 8.111046315622284e-07, + "loss": 0.17276239395141602, + "step": 6674 + }, + { + "epoch": 0.9024081115335868, + "grad_norm": 1.4925576448440552, + "learning_rate": 8.088859031160633e-07, + "loss": 0.19556236267089844, + "step": 6675 + }, + { + "epoch": 0.9025433037600338, + "grad_norm": 1.6685117483139038, + "learning_rate": 8.066701293355288e-07, + "loss": 0.18534636497497559, + "step": 6676 + }, + { + "epoch": 0.9026784959864808, + "grad_norm": 1.430798888206482, + "learning_rate": 8.044573106819625e-07, + "loss": 0.21676015853881836, + "step": 6677 + }, + { + "epoch": 0.9028136882129277, + "grad_norm": 0.7649757862091064, + "learning_rate": 8.022474476160824e-07, + "loss": 0.15969347953796387, + "step": 6678 + }, + { + "epoch": 0.9029488804393747, + "grad_norm": 1.0281206369400024, + "learning_rate": 8.000405405979988e-07, + "loss": 0.14366722106933594, + "step": 6679 + }, + { + "epoch": 0.9030840726658217, + "grad_norm": 1.120094895362854, + "learning_rate": 7.978365900871943e-07, + "loss": 0.1974048614501953, + "step": 6680 + }, + { + "epoch": 0.9032192648922687, + "grad_norm": 1.030008316040039, + "learning_rate": 7.956355965425482e-07, + "loss": 0.1503148078918457, + "step": 6681 + }, + { + "epoch": 0.9033544571187156, + "grad_norm": 1.462011694908142, + "learning_rate": 7.934375604223193e-07, + "loss": 0.2095794677734375, + "step": 6682 + }, + { + "epoch": 0.9034896493451626, + "grad_norm": 1.064103364944458, + "learning_rate": 7.912424821841463e-07, + "loss": 0.1659870147705078, + "step": 6683 + }, + { + "epoch": 0.9036248415716096, + "grad_norm": 0.8238223195075989, + "learning_rate": 7.89050362285062e-07, + "loss": 0.14467620849609375, + "step": 6684 + }, + { + "epoch": 0.9037600337980566, + "grad_norm": 1.45100998878479, + "learning_rate": 7.868612011814713e-07, + "loss": 0.16307497024536133, + "step": 6685 + }, + { + "epoch": 0.9038952260245036, + "grad_norm": 0.8868352174758911, + "learning_rate": 7.846749993291746e-07, + "loss": 0.16602325439453125, + "step": 6686 + }, + { + "epoch": 0.9040304182509505, + "grad_norm": 1.0470459461212158, + "learning_rate": 7.824917571833445e-07, + "loss": 0.11875343322753906, + "step": 6687 + }, + { + "epoch": 0.9041656104773975, + "grad_norm": 1.3385058641433716, + "learning_rate": 7.80311475198554e-07, + "loss": 0.1608753204345703, + "step": 6688 + }, + { + "epoch": 0.9043008027038445, + "grad_norm": 0.8118037581443787, + "learning_rate": 7.781341538287384e-07, + "loss": 0.14539504051208496, + "step": 6689 + }, + { + "epoch": 0.9044359949302915, + "grad_norm": 1.8691107034683228, + "learning_rate": 7.759597935272316e-07, + "loss": 0.14951753616333008, + "step": 6690 + }, + { + "epoch": 0.9045711871567385, + "grad_norm": 1.0722888708114624, + "learning_rate": 7.7378839474675e-07, + "loss": 0.1387958526611328, + "step": 6691 + }, + { + "epoch": 0.9047063793831854, + "grad_norm": 0.7438547611236572, + "learning_rate": 7.716199579393851e-07, + "loss": 0.15665017068386078, + "step": 6692 + }, + { + "epoch": 0.9048415716096324, + "grad_norm": 0.7806492447853088, + "learning_rate": 7.694544835566259e-07, + "loss": 0.12917137145996094, + "step": 6693 + }, + { + "epoch": 0.9049767638360794, + "grad_norm": 1.0045160055160522, + "learning_rate": 7.672919720493249e-07, + "loss": 0.18025779724121094, + "step": 6694 + }, + { + "epoch": 0.9051119560625264, + "grad_norm": 0.9888438582420349, + "learning_rate": 7.651324238677338e-07, + "loss": 0.16955792903900146, + "step": 6695 + }, + { + "epoch": 0.9052471482889733, + "grad_norm": 0.858755350112915, + "learning_rate": 7.629758394614828e-07, + "loss": 0.14068222045898438, + "step": 6696 + }, + { + "epoch": 0.9053823405154203, + "grad_norm": 0.884871780872345, + "learning_rate": 7.608222192795794e-07, + "loss": 0.15554046630859375, + "step": 6697 + }, + { + "epoch": 0.9055175327418673, + "grad_norm": 1.1258841753005981, + "learning_rate": 7.586715637704284e-07, + "loss": 0.1649169921875, + "step": 6698 + }, + { + "epoch": 0.9056527249683143, + "grad_norm": 0.910371720790863, + "learning_rate": 7.565238733817998e-07, + "loss": 0.1685476303100586, + "step": 6699 + }, + { + "epoch": 0.9057879171947613, + "grad_norm": 0.8648293614387512, + "learning_rate": 7.543791485608542e-07, + "loss": 0.2098095417022705, + "step": 6700 + }, + { + "epoch": 0.9059231094212082, + "grad_norm": 1.2737137079238892, + "learning_rate": 7.52237389754138e-07, + "loss": 0.17931652069091797, + "step": 6701 + }, + { + "epoch": 0.9060583016476552, + "grad_norm": 1.134995460510254, + "learning_rate": 7.500985974075758e-07, + "loss": 0.14721202850341797, + "step": 6702 + }, + { + "epoch": 0.9061934938741022, + "grad_norm": 0.9695498943328857, + "learning_rate": 7.479627719664767e-07, + "loss": 0.17166423797607422, + "step": 6703 + }, + { + "epoch": 0.9063286861005492, + "grad_norm": 1.0928118228912354, + "learning_rate": 7.458299138755281e-07, + "loss": 0.1693439483642578, + "step": 6704 + }, + { + "epoch": 0.9064638783269962, + "grad_norm": 0.9881254434585571, + "learning_rate": 7.437000235788033e-07, + "loss": 0.12952327728271484, + "step": 6705 + }, + { + "epoch": 0.9065990705534431, + "grad_norm": 1.5902758836746216, + "learning_rate": 7.415731015197575e-07, + "loss": 0.1794424057006836, + "step": 6706 + }, + { + "epoch": 0.9067342627798901, + "grad_norm": 0.8967025876045227, + "learning_rate": 7.39449148141228e-07, + "loss": 0.11917352676391602, + "step": 6707 + }, + { + "epoch": 0.9068694550063371, + "grad_norm": 0.9916856288909912, + "learning_rate": 7.373281638854329e-07, + "loss": 0.17327308654785156, + "step": 6708 + }, + { + "epoch": 0.9070046472327841, + "grad_norm": 2.049166679382324, + "learning_rate": 7.352101491939722e-07, + "loss": 0.17474365234375, + "step": 6709 + }, + { + "epoch": 0.907139839459231, + "grad_norm": 1.0560553073883057, + "learning_rate": 7.330951045078249e-07, + "loss": 0.14926815032958984, + "step": 6710 + }, + { + "epoch": 0.907275031685678, + "grad_norm": 0.7818946838378906, + "learning_rate": 7.309830302673621e-07, + "loss": 0.17352962493896484, + "step": 6711 + }, + { + "epoch": 0.907410223912125, + "grad_norm": 1.199280858039856, + "learning_rate": 7.288739269123184e-07, + "loss": 0.15593397617340088, + "step": 6712 + }, + { + "epoch": 0.907545416138572, + "grad_norm": 1.6214392185211182, + "learning_rate": 7.267677948818296e-07, + "loss": 0.15970993041992188, + "step": 6713 + }, + { + "epoch": 0.907680608365019, + "grad_norm": 0.7071395516395569, + "learning_rate": 7.246646346143997e-07, + "loss": 0.11743593215942383, + "step": 6714 + }, + { + "epoch": 0.9078158005914659, + "grad_norm": 0.8203786015510559, + "learning_rate": 7.225644465479153e-07, + "loss": 0.1452326774597168, + "step": 6715 + }, + { + "epoch": 0.9079509928179129, + "grad_norm": 1.303985357284546, + "learning_rate": 7.204672311196547e-07, + "loss": 0.1757822036743164, + "step": 6716 + }, + { + "epoch": 0.9080861850443599, + "grad_norm": 1.1265748739242554, + "learning_rate": 7.183729887662604e-07, + "loss": 0.09971332550048828, + "step": 6717 + }, + { + "epoch": 0.9082213772708069, + "grad_norm": 1.178935170173645, + "learning_rate": 7.162817199237703e-07, + "loss": 0.19397926330566406, + "step": 6718 + }, + { + "epoch": 0.908356569497254, + "grad_norm": 1.5768262147903442, + "learning_rate": 7.141934250275978e-07, + "loss": 0.1585092544555664, + "step": 6719 + }, + { + "epoch": 0.9084917617237009, + "grad_norm": 2.1778602600097656, + "learning_rate": 7.121081045125316e-07, + "loss": 0.18851089477539062, + "step": 6720 + }, + { + "epoch": 0.9086269539501479, + "grad_norm": 1.0620629787445068, + "learning_rate": 7.100257588127545e-07, + "loss": 0.13008403778076172, + "step": 6721 + }, + { + "epoch": 0.9087621461765949, + "grad_norm": 1.0338125228881836, + "learning_rate": 7.079463883618148e-07, + "loss": 0.1543283462524414, + "step": 6722 + }, + { + "epoch": 0.9088973384030419, + "grad_norm": 1.0764707326889038, + "learning_rate": 7.058699935926527e-07, + "loss": 0.18457603454589844, + "step": 6723 + }, + { + "epoch": 0.9090325306294889, + "grad_norm": 1.1665832996368408, + "learning_rate": 7.037965749375808e-07, + "loss": 0.16879498958587646, + "step": 6724 + }, + { + "epoch": 0.9091677228559358, + "grad_norm": 1.127356767654419, + "learning_rate": 7.017261328283037e-07, + "loss": 0.16724681854248047, + "step": 6725 + }, + { + "epoch": 0.9093029150823828, + "grad_norm": 1.3785279989242554, + "learning_rate": 6.996586676958916e-07, + "loss": 0.16535234451293945, + "step": 6726 + }, + { + "epoch": 0.9094381073088298, + "grad_norm": 0.7778226733207703, + "learning_rate": 6.975941799708019e-07, + "loss": 0.149641752243042, + "step": 6727 + }, + { + "epoch": 0.9095732995352768, + "grad_norm": 0.706840455532074, + "learning_rate": 6.955326700828757e-07, + "loss": 0.12535953521728516, + "step": 6728 + }, + { + "epoch": 0.9097084917617237, + "grad_norm": 0.8697673082351685, + "learning_rate": 6.934741384613246e-07, + "loss": 0.1455981731414795, + "step": 6729 + }, + { + "epoch": 0.9098436839881707, + "grad_norm": 0.8157376646995544, + "learning_rate": 6.91418585534756e-07, + "loss": 0.16133975982666016, + "step": 6730 + }, + { + "epoch": 0.9099788762146177, + "grad_norm": 1.2191591262817383, + "learning_rate": 6.893660117311373e-07, + "loss": 0.22642278671264648, + "step": 6731 + }, + { + "epoch": 0.9101140684410647, + "grad_norm": 0.9939918518066406, + "learning_rate": 6.873164174778252e-07, + "loss": 0.1807088851928711, + "step": 6732 + }, + { + "epoch": 0.9102492606675117, + "grad_norm": 1.0536137819290161, + "learning_rate": 6.852698032015631e-07, + "loss": 0.14378440380096436, + "step": 6733 + }, + { + "epoch": 0.9103844528939586, + "grad_norm": 0.7766066789627075, + "learning_rate": 6.832261693284636e-07, + "loss": 0.12801015377044678, + "step": 6734 + }, + { + "epoch": 0.9105196451204056, + "grad_norm": 1.4431747198104858, + "learning_rate": 6.811855162840214e-07, + "loss": 0.16617298126220703, + "step": 6735 + }, + { + "epoch": 0.9106548373468526, + "grad_norm": 2.011122941970825, + "learning_rate": 6.791478444931132e-07, + "loss": 0.22838783264160156, + "step": 6736 + }, + { + "epoch": 0.9107900295732996, + "grad_norm": 1.3376126289367676, + "learning_rate": 6.77113154379988e-07, + "loss": 0.21860504150390625, + "step": 6737 + }, + { + "epoch": 0.9109252217997466, + "grad_norm": 1.7054890394210815, + "learning_rate": 6.75081446368287e-07, + "loss": 0.17700958251953125, + "step": 6738 + }, + { + "epoch": 0.9110604140261935, + "grad_norm": 1.1025233268737793, + "learning_rate": 6.730527208810166e-07, + "loss": 0.16390371322631836, + "step": 6739 + }, + { + "epoch": 0.9111956062526405, + "grad_norm": 1.2439676523208618, + "learning_rate": 6.710269783405709e-07, + "loss": 0.20368516445159912, + "step": 6740 + }, + { + "epoch": 0.9113307984790875, + "grad_norm": 1.4852169752120972, + "learning_rate": 6.690042191687206e-07, + "loss": 0.20545828342437744, + "step": 6741 + }, + { + "epoch": 0.9114659907055345, + "grad_norm": 0.8305413126945496, + "learning_rate": 6.669844437866124e-07, + "loss": 0.15709972381591797, + "step": 6742 + }, + { + "epoch": 0.9116011829319814, + "grad_norm": 1.0936546325683594, + "learning_rate": 6.649676526147764e-07, + "loss": 0.1474313735961914, + "step": 6743 + }, + { + "epoch": 0.9117363751584284, + "grad_norm": 0.9522294998168945, + "learning_rate": 6.629538460731199e-07, + "loss": 0.19360923767089844, + "step": 6744 + }, + { + "epoch": 0.9118715673848754, + "grad_norm": 0.8566400408744812, + "learning_rate": 6.609430245809261e-07, + "loss": 0.15583860874176025, + "step": 6745 + }, + { + "epoch": 0.9120067596113224, + "grad_norm": 1.0703624486923218, + "learning_rate": 6.589351885568617e-07, + "loss": 0.15053081512451172, + "step": 6746 + }, + { + "epoch": 0.9121419518377694, + "grad_norm": 1.214871883392334, + "learning_rate": 6.569303384189624e-07, + "loss": 0.2011098861694336, + "step": 6747 + }, + { + "epoch": 0.9122771440642163, + "grad_norm": 1.5464457273483276, + "learning_rate": 6.54928474584659e-07, + "loss": 0.2445850372314453, + "step": 6748 + }, + { + "epoch": 0.9124123362906633, + "grad_norm": 1.616714596748352, + "learning_rate": 6.5292959747074e-07, + "loss": 0.237945556640625, + "step": 6749 + }, + { + "epoch": 0.9125475285171103, + "grad_norm": 1.1570183038711548, + "learning_rate": 6.509337074933891e-07, + "loss": 0.19701099395751953, + "step": 6750 + }, + { + "epoch": 0.9126827207435573, + "grad_norm": 0.8465988636016846, + "learning_rate": 6.489408050681589e-07, + "loss": 0.11767005920410156, + "step": 6751 + }, + { + "epoch": 0.9128179129700043, + "grad_norm": 1.2097126245498657, + "learning_rate": 6.469508906099792e-07, + "loss": 0.21654891967773438, + "step": 6752 + }, + { + "epoch": 0.9129531051964512, + "grad_norm": 1.7918184995651245, + "learning_rate": 6.449639645331684e-07, + "loss": 0.13945382833480835, + "step": 6753 + }, + { + "epoch": 0.9130882974228982, + "grad_norm": 1.8902429342269897, + "learning_rate": 6.429800272514058e-07, + "loss": 0.19729042053222656, + "step": 6754 + }, + { + "epoch": 0.9132234896493452, + "grad_norm": 0.7039175629615784, + "learning_rate": 6.409990791777659e-07, + "loss": 0.1213526725769043, + "step": 6755 + }, + { + "epoch": 0.9133586818757922, + "grad_norm": 1.102613925933838, + "learning_rate": 6.390211207246888e-07, + "loss": 0.20709228515625, + "step": 6756 + }, + { + "epoch": 0.9134938741022391, + "grad_norm": 1.1705427169799805, + "learning_rate": 6.370461523039967e-07, + "loss": 0.19670867919921875, + "step": 6757 + }, + { + "epoch": 0.9136290663286861, + "grad_norm": 0.9059383869171143, + "learning_rate": 6.350741743268873e-07, + "loss": 0.16883468627929688, + "step": 6758 + }, + { + "epoch": 0.9137642585551331, + "grad_norm": 1.2360154390335083, + "learning_rate": 6.331051872039373e-07, + "loss": 0.15971755981445312, + "step": 6759 + }, + { + "epoch": 0.9138994507815801, + "grad_norm": 0.6795998811721802, + "learning_rate": 6.31139191345102e-07, + "loss": 0.12293505668640137, + "step": 6760 + }, + { + "epoch": 0.9140346430080271, + "grad_norm": 1.7758582830429077, + "learning_rate": 6.291761871597091e-07, + "loss": 0.17943763732910156, + "step": 6761 + }, + { + "epoch": 0.914169835234474, + "grad_norm": 0.931975781917572, + "learning_rate": 6.272161750564731e-07, + "loss": 0.1481645107269287, + "step": 6762 + }, + { + "epoch": 0.914305027460921, + "grad_norm": 0.7977692484855652, + "learning_rate": 6.252591554434728e-07, + "loss": 0.13846158981323242, + "step": 6763 + }, + { + "epoch": 0.914440219687368, + "grad_norm": 0.9667164087295532, + "learning_rate": 6.233051287281688e-07, + "loss": 0.1804065704345703, + "step": 6764 + }, + { + "epoch": 0.914575411913815, + "grad_norm": 1.8307242393493652, + "learning_rate": 6.213540953174057e-07, + "loss": 0.1817340850830078, + "step": 6765 + }, + { + "epoch": 0.914710604140262, + "grad_norm": 1.6505370140075684, + "learning_rate": 6.194060556173953e-07, + "loss": 0.18779540061950684, + "step": 6766 + }, + { + "epoch": 0.9148457963667089, + "grad_norm": 0.9660489559173584, + "learning_rate": 6.17461010033733e-07, + "loss": 0.1647796630859375, + "step": 6767 + }, + { + "epoch": 0.9149809885931559, + "grad_norm": 0.92435622215271, + "learning_rate": 6.155189589713833e-07, + "loss": 0.17723655700683594, + "step": 6768 + }, + { + "epoch": 0.9151161808196029, + "grad_norm": 0.8326014876365662, + "learning_rate": 6.135799028346928e-07, + "loss": 0.1484508514404297, + "step": 6769 + }, + { + "epoch": 0.9152513730460499, + "grad_norm": 1.0861568450927734, + "learning_rate": 6.116438420273868e-07, + "loss": 0.17220067977905273, + "step": 6770 + }, + { + "epoch": 0.9153865652724968, + "grad_norm": 1.156847357749939, + "learning_rate": 6.097107769525595e-07, + "loss": 0.13736557960510254, + "step": 6771 + }, + { + "epoch": 0.9155217574989438, + "grad_norm": 1.4594306945800781, + "learning_rate": 6.077807080126873e-07, + "loss": 0.17350053787231445, + "step": 6772 + }, + { + "epoch": 0.9156569497253908, + "grad_norm": 1.4144175052642822, + "learning_rate": 6.058536356096206e-07, + "loss": 0.2276449203491211, + "step": 6773 + }, + { + "epoch": 0.9157921419518378, + "grad_norm": 1.4107614755630493, + "learning_rate": 6.039295601445833e-07, + "loss": 0.21044921875, + "step": 6774 + }, + { + "epoch": 0.9159273341782848, + "grad_norm": 1.195586919784546, + "learning_rate": 6.020084820181831e-07, + "loss": 0.1856060028076172, + "step": 6775 + }, + { + "epoch": 0.9160625264047317, + "grad_norm": 1.1761809587478638, + "learning_rate": 6.000904016303971e-07, + "loss": 0.141021728515625, + "step": 6776 + }, + { + "epoch": 0.9161977186311787, + "grad_norm": 1.7974510192871094, + "learning_rate": 5.981753193805789e-07, + "loss": 0.14397656917572021, + "step": 6777 + }, + { + "epoch": 0.9163329108576257, + "grad_norm": 1.5928044319152832, + "learning_rate": 5.962632356674597e-07, + "loss": 0.18073272705078125, + "step": 6778 + }, + { + "epoch": 0.9164681030840727, + "grad_norm": 0.6539821028709412, + "learning_rate": 5.94354150889141e-07, + "loss": 0.12350940704345703, + "step": 6779 + }, + { + "epoch": 0.9166032953105197, + "grad_norm": 0.9880049824714661, + "learning_rate": 5.924480654431147e-07, + "loss": 0.1483381986618042, + "step": 6780 + }, + { + "epoch": 0.9167384875369666, + "grad_norm": 1.2179538011550903, + "learning_rate": 5.905449797262252e-07, + "loss": 0.14670944213867188, + "step": 6781 + }, + { + "epoch": 0.9168736797634136, + "grad_norm": 0.8636088967323303, + "learning_rate": 5.886448941347156e-07, + "loss": 0.13743972778320312, + "step": 6782 + }, + { + "epoch": 0.9170088719898606, + "grad_norm": 1.7533119916915894, + "learning_rate": 5.867478090641892e-07, + "loss": 0.1954803466796875, + "step": 6783 + }, + { + "epoch": 0.9171440642163076, + "grad_norm": 0.8614572286605835, + "learning_rate": 5.848537249096269e-07, + "loss": 0.15858745574951172, + "step": 6784 + }, + { + "epoch": 0.9172792564427545, + "grad_norm": 1.195934772491455, + "learning_rate": 5.829626420653949e-07, + "loss": 0.18611717224121094, + "step": 6785 + }, + { + "epoch": 0.9174144486692015, + "grad_norm": 1.6256399154663086, + "learning_rate": 5.810745609252166e-07, + "loss": 0.21474456787109375, + "step": 6786 + }, + { + "epoch": 0.9175496408956485, + "grad_norm": 1.0646802186965942, + "learning_rate": 5.791894818822091e-07, + "loss": 0.16489005088806152, + "step": 6787 + }, + { + "epoch": 0.9176848331220955, + "grad_norm": 0.9180534482002258, + "learning_rate": 5.773074053288519e-07, + "loss": 0.15557479858398438, + "step": 6788 + }, + { + "epoch": 0.9178200253485425, + "grad_norm": 2.328115701675415, + "learning_rate": 5.75428331657003e-07, + "loss": 0.1453409194946289, + "step": 6789 + }, + { + "epoch": 0.9179552175749894, + "grad_norm": 0.7374373078346252, + "learning_rate": 5.735522612578998e-07, + "loss": 0.09610319137573242, + "step": 6790 + }, + { + "epoch": 0.9180904098014364, + "grad_norm": 1.513714075088501, + "learning_rate": 5.716791945221444e-07, + "loss": 0.15625974535942078, + "step": 6791 + }, + { + "epoch": 0.9182256020278834, + "grad_norm": 0.7924400568008423, + "learning_rate": 5.698091318397219e-07, + "loss": 0.15407180786132812, + "step": 6792 + }, + { + "epoch": 0.9183607942543304, + "grad_norm": 1.5105005502700806, + "learning_rate": 5.679420735999908e-07, + "loss": 0.23344850540161133, + "step": 6793 + }, + { + "epoch": 0.9184959864807773, + "grad_norm": 1.113030195236206, + "learning_rate": 5.660780201916799e-07, + "loss": 0.14873027801513672, + "step": 6794 + }, + { + "epoch": 0.9186311787072243, + "grad_norm": 1.2140769958496094, + "learning_rate": 5.642169720028973e-07, + "loss": 0.2314774990081787, + "step": 6795 + }, + { + "epoch": 0.9187663709336713, + "grad_norm": 1.0213863849639893, + "learning_rate": 5.623589294211196e-07, + "loss": 0.16526174545288086, + "step": 6796 + }, + { + "epoch": 0.9189015631601183, + "grad_norm": 1.5017553567886353, + "learning_rate": 5.605038928332057e-07, + "loss": 0.19676971435546875, + "step": 6797 + }, + { + "epoch": 0.9190367553865653, + "grad_norm": 1.120961308479309, + "learning_rate": 5.586518626253817e-07, + "loss": 0.14935043454170227, + "step": 6798 + }, + { + "epoch": 0.9191719476130122, + "grad_norm": 0.8091015815734863, + "learning_rate": 5.568028391832524e-07, + "loss": 0.1257791519165039, + "step": 6799 + }, + { + "epoch": 0.9193071398394592, + "grad_norm": 1.3048439025878906, + "learning_rate": 5.549568228917917e-07, + "loss": 0.19629764556884766, + "step": 6800 + }, + { + "epoch": 0.9194423320659062, + "grad_norm": 1.2796249389648438, + "learning_rate": 5.531138141353486e-07, + "loss": 0.2021503448486328, + "step": 6801 + }, + { + "epoch": 0.9195775242923532, + "grad_norm": 0.7782748341560364, + "learning_rate": 5.512738132976514e-07, + "loss": 0.1341695785522461, + "step": 6802 + }, + { + "epoch": 0.9197127165188002, + "grad_norm": 0.8553540110588074, + "learning_rate": 5.49436820761795e-07, + "loss": 0.15029644966125488, + "step": 6803 + }, + { + "epoch": 0.9198479087452471, + "grad_norm": 0.9512537121772766, + "learning_rate": 5.476028369102537e-07, + "loss": 0.16156387329101562, + "step": 6804 + }, + { + "epoch": 0.9199831009716941, + "grad_norm": 0.9684756398200989, + "learning_rate": 5.45771862124872e-07, + "loss": 0.12393474578857422, + "step": 6805 + }, + { + "epoch": 0.9201182931981411, + "grad_norm": 2.228598117828369, + "learning_rate": 5.439438967868649e-07, + "loss": 0.16321754455566406, + "step": 6806 + }, + { + "epoch": 0.9202534854245881, + "grad_norm": 0.8086915016174316, + "learning_rate": 5.421189412768296e-07, + "loss": 0.13257122039794922, + "step": 6807 + }, + { + "epoch": 0.920388677651035, + "grad_norm": 1.1834681034088135, + "learning_rate": 5.402969959747306e-07, + "loss": 0.2162151336669922, + "step": 6808 + }, + { + "epoch": 0.920523869877482, + "grad_norm": 0.8241886496543884, + "learning_rate": 5.384780612599044e-07, + "loss": 0.18962574005126953, + "step": 6809 + }, + { + "epoch": 0.920659062103929, + "grad_norm": 0.7904039025306702, + "learning_rate": 5.366621375110647e-07, + "loss": 0.09569859504699707, + "step": 6810 + }, + { + "epoch": 0.920794254330376, + "grad_norm": 0.8951303362846375, + "learning_rate": 5.348492251062942e-07, + "loss": 0.16202640533447266, + "step": 6811 + }, + { + "epoch": 0.920929446556823, + "grad_norm": 1.418837070465088, + "learning_rate": 5.330393244230558e-07, + "loss": 0.2121572494506836, + "step": 6812 + }, + { + "epoch": 0.9210646387832699, + "grad_norm": 1.4238643646240234, + "learning_rate": 5.312324358381731e-07, + "loss": 0.1611948013305664, + "step": 6813 + }, + { + "epoch": 0.9211998310097169, + "grad_norm": 1.1536599397659302, + "learning_rate": 5.29428559727857e-07, + "loss": 0.16081500053405762, + "step": 6814 + }, + { + "epoch": 0.9213350232361639, + "grad_norm": 1.6921736001968384, + "learning_rate": 5.276276964676802e-07, + "loss": 0.16599369049072266, + "step": 6815 + }, + { + "epoch": 0.9214702154626109, + "grad_norm": 0.86725914478302, + "learning_rate": 5.258298464325928e-07, + "loss": 0.15939807891845703, + "step": 6816 + }, + { + "epoch": 0.9216054076890579, + "grad_norm": 1.0355339050292969, + "learning_rate": 5.240350099969204e-07, + "loss": 0.1880812644958496, + "step": 6817 + }, + { + "epoch": 0.9217405999155048, + "grad_norm": 0.771996021270752, + "learning_rate": 5.222431875343492e-07, + "loss": 0.13859176635742188, + "step": 6818 + }, + { + "epoch": 0.9218757921419518, + "grad_norm": 1.039351463317871, + "learning_rate": 5.204543794179539e-07, + "loss": 0.15136241912841797, + "step": 6819 + }, + { + "epoch": 0.9220109843683988, + "grad_norm": 1.3492240905761719, + "learning_rate": 5.186685860201717e-07, + "loss": 0.15941619873046875, + "step": 6820 + }, + { + "epoch": 0.9221461765948458, + "grad_norm": 0.9099437594413757, + "learning_rate": 5.16885807712812e-07, + "loss": 0.15189552307128906, + "step": 6821 + }, + { + "epoch": 0.9222813688212927, + "grad_norm": 0.8142754435539246, + "learning_rate": 5.151060448670625e-07, + "loss": 0.11348915100097656, + "step": 6822 + }, + { + "epoch": 0.9224165610477397, + "grad_norm": 1.0997073650360107, + "learning_rate": 5.133292978534754e-07, + "loss": 0.15671825408935547, + "step": 6823 + }, + { + "epoch": 0.9225517532741867, + "grad_norm": 0.8878243565559387, + "learning_rate": 5.115555670419814e-07, + "loss": 0.16170263290405273, + "step": 6824 + }, + { + "epoch": 0.9226869455006337, + "grad_norm": 1.1478452682495117, + "learning_rate": 5.097848528018817e-07, + "loss": 0.13878536224365234, + "step": 6825 + }, + { + "epoch": 0.9228221377270807, + "grad_norm": 1.011464238166809, + "learning_rate": 5.080171555018448e-07, + "loss": 0.14964675903320312, + "step": 6826 + }, + { + "epoch": 0.9229573299535276, + "grad_norm": 1.0709928274154663, + "learning_rate": 5.06252475509918e-07, + "loss": 0.18352794647216797, + "step": 6827 + }, + { + "epoch": 0.9230925221799746, + "grad_norm": 1.724845051765442, + "learning_rate": 5.044908131935139e-07, + "loss": 0.23067855834960938, + "step": 6828 + }, + { + "epoch": 0.9232277144064216, + "grad_norm": 0.921457827091217, + "learning_rate": 5.027321689194242e-07, + "loss": 0.189239501953125, + "step": 6829 + }, + { + "epoch": 0.9233629066328686, + "grad_norm": 1.485518217086792, + "learning_rate": 5.009765430538061e-07, + "loss": 0.16341376304626465, + "step": 6830 + }, + { + "epoch": 0.9234980988593156, + "grad_norm": 0.8229610323905945, + "learning_rate": 4.992239359621886e-07, + "loss": 0.13027381896972656, + "step": 6831 + }, + { + "epoch": 0.9236332910857625, + "grad_norm": 1.1949256658554077, + "learning_rate": 4.974743480094767e-07, + "loss": 0.14664077758789062, + "step": 6832 + }, + { + "epoch": 0.9237684833122095, + "grad_norm": 0.8907371759414673, + "learning_rate": 4.957277795599407e-07, + "loss": 0.15859603881835938, + "step": 6833 + }, + { + "epoch": 0.9239036755386565, + "grad_norm": 1.3830283880233765, + "learning_rate": 4.93984230977228e-07, + "loss": 0.14602303504943848, + "step": 6834 + }, + { + "epoch": 0.9240388677651035, + "grad_norm": 0.9414574503898621, + "learning_rate": 4.922437026243531e-07, + "loss": 0.170318603515625, + "step": 6835 + }, + { + "epoch": 0.9241740599915504, + "grad_norm": 1.9344474077224731, + "learning_rate": 4.905061948637063e-07, + "loss": 0.24384307861328125, + "step": 6836 + }, + { + "epoch": 0.9243092522179974, + "grad_norm": 1.0105434656143188, + "learning_rate": 4.887717080570431e-07, + "loss": 0.1478862762451172, + "step": 6837 + }, + { + "epoch": 0.9244444444444444, + "grad_norm": 0.9474513530731201, + "learning_rate": 4.870402425654913e-07, + "loss": 0.14140701293945312, + "step": 6838 + }, + { + "epoch": 0.9245796366708914, + "grad_norm": 1.2620896100997925, + "learning_rate": 4.853117987495542e-07, + "loss": 0.19680285453796387, + "step": 6839 + }, + { + "epoch": 0.9247148288973384, + "grad_norm": 1.124436616897583, + "learning_rate": 4.83586376969104e-07, + "loss": 0.21566402912139893, + "step": 6840 + }, + { + "epoch": 0.9248500211237853, + "grad_norm": 1.7103341817855835, + "learning_rate": 4.818639775833816e-07, + "loss": 0.1856060028076172, + "step": 6841 + }, + { + "epoch": 0.9249852133502323, + "grad_norm": 0.8405003547668457, + "learning_rate": 4.801446009509969e-07, + "loss": 0.15198802947998047, + "step": 6842 + }, + { + "epoch": 0.9251204055766793, + "grad_norm": 1.268671989440918, + "learning_rate": 4.784282474299367e-07, + "loss": 0.15775728225708008, + "step": 6843 + }, + { + "epoch": 0.9252555978031263, + "grad_norm": 0.8011864423751831, + "learning_rate": 4.767149173775537e-07, + "loss": 0.15517902374267578, + "step": 6844 + }, + { + "epoch": 0.9253907900295733, + "grad_norm": 0.9571229815483093, + "learning_rate": 4.750046111505724e-07, + "loss": 0.14945721626281738, + "step": 6845 + }, + { + "epoch": 0.9255259822560202, + "grad_norm": 1.2321099042892456, + "learning_rate": 4.732973291050896e-07, + "loss": 0.22460174560546875, + "step": 6846 + }, + { + "epoch": 0.9256611744824672, + "grad_norm": 0.8086537718772888, + "learning_rate": 4.7159307159656607e-07, + "loss": 0.14993280172348022, + "step": 6847 + }, + { + "epoch": 0.9257963667089142, + "grad_norm": 0.9530975222587585, + "learning_rate": 4.6989183897983954e-07, + "loss": 0.13495564460754395, + "step": 6848 + }, + { + "epoch": 0.9259315589353612, + "grad_norm": 0.8083940148353577, + "learning_rate": 4.681936316091201e-07, + "loss": 0.17575359344482422, + "step": 6849 + }, + { + "epoch": 0.9260667511618081, + "grad_norm": 1.0287210941314697, + "learning_rate": 4.664984498379765e-07, + "loss": 0.1811199188232422, + "step": 6850 + }, + { + "epoch": 0.9262019433882551, + "grad_norm": 1.3945170640945435, + "learning_rate": 4.6480629401935814e-07, + "loss": 0.17000555992126465, + "step": 6851 + }, + { + "epoch": 0.9263371356147021, + "grad_norm": 0.8777503371238708, + "learning_rate": 4.631171645055815e-07, + "loss": 0.15983819961547852, + "step": 6852 + }, + { + "epoch": 0.9264723278411491, + "grad_norm": 1.2790606021881104, + "learning_rate": 4.614310616483286e-07, + "loss": 0.1291027069091797, + "step": 6853 + }, + { + "epoch": 0.9266075200675962, + "grad_norm": 2.5518715381622314, + "learning_rate": 4.5974798579866193e-07, + "loss": 0.2043776512145996, + "step": 6854 + }, + { + "epoch": 0.9267427122940431, + "grad_norm": 1.289115071296692, + "learning_rate": 4.580679373069996e-07, + "loss": 0.2124267965555191, + "step": 6855 + }, + { + "epoch": 0.9268779045204901, + "grad_norm": 2.0251758098602295, + "learning_rate": 4.5639091652314e-07, + "loss": 0.21028053760528564, + "step": 6856 + }, + { + "epoch": 0.9270130967469371, + "grad_norm": 1.1990656852722168, + "learning_rate": 4.54716923796249e-07, + "loss": 0.19442427158355713, + "step": 6857 + }, + { + "epoch": 0.9271482889733841, + "grad_norm": 0.8964511752128601, + "learning_rate": 4.5304595947485927e-07, + "loss": 0.1645498275756836, + "step": 6858 + }, + { + "epoch": 0.9272834811998311, + "grad_norm": 2.4061243534088135, + "learning_rate": 4.5137802390687433e-07, + "loss": 0.17140674591064453, + "step": 6859 + }, + { + "epoch": 0.927418673426278, + "grad_norm": 2.120358467102051, + "learning_rate": 4.497131174395663e-07, + "loss": 0.18662548065185547, + "step": 6860 + }, + { + "epoch": 0.927553865652725, + "grad_norm": 1.4562383890151978, + "learning_rate": 4.4805124041957967e-07, + "loss": 0.14902305603027344, + "step": 6861 + }, + { + "epoch": 0.927689057879172, + "grad_norm": 1.9436578750610352, + "learning_rate": 4.463923931929259e-07, + "loss": 0.22417545318603516, + "step": 6862 + }, + { + "epoch": 0.927824250105619, + "grad_norm": 0.8414587378501892, + "learning_rate": 4.4473657610498377e-07, + "loss": 0.18558979034423828, + "step": 6863 + }, + { + "epoch": 0.927959442332066, + "grad_norm": 1.178419589996338, + "learning_rate": 4.430837895005058e-07, + "loss": 0.1932048797607422, + "step": 6864 + }, + { + "epoch": 0.9280946345585129, + "grad_norm": 0.7472342252731323, + "learning_rate": 4.4143403372360836e-07, + "loss": 0.1239774227142334, + "step": 6865 + }, + { + "epoch": 0.9282298267849599, + "grad_norm": 1.2048664093017578, + "learning_rate": 4.3978730911778176e-07, + "loss": 0.19588279724121094, + "step": 6866 + }, + { + "epoch": 0.9283650190114069, + "grad_norm": 1.6109174489974976, + "learning_rate": 4.381436160258834e-07, + "loss": 0.1859283447265625, + "step": 6867 + }, + { + "epoch": 0.9285002112378539, + "grad_norm": 1.0783982276916504, + "learning_rate": 4.3650295479013615e-07, + "loss": 0.2029552459716797, + "step": 6868 + }, + { + "epoch": 0.9286354034643008, + "grad_norm": 1.533647060394287, + "learning_rate": 4.348653257521351e-07, + "loss": 0.21853065490722656, + "step": 6869 + }, + { + "epoch": 0.9287705956907478, + "grad_norm": 1.643060326576233, + "learning_rate": 4.332307292528442e-07, + "loss": 0.1819465160369873, + "step": 6870 + }, + { + "epoch": 0.9289057879171948, + "grad_norm": 0.7947250008583069, + "learning_rate": 4.315991656325946e-07, + "loss": 0.1918010711669922, + "step": 6871 + }, + { + "epoch": 0.9290409801436418, + "grad_norm": 1.1351237297058105, + "learning_rate": 4.299706352310895e-07, + "loss": 0.1490306854248047, + "step": 6872 + }, + { + "epoch": 0.9291761723700888, + "grad_norm": 1.0748343467712402, + "learning_rate": 4.283451383873926e-07, + "loss": 0.22548198699951172, + "step": 6873 + }, + { + "epoch": 0.9293113645965357, + "grad_norm": 1.0095285177230835, + "learning_rate": 4.26722675439945e-07, + "loss": 0.14268875122070312, + "step": 6874 + }, + { + "epoch": 0.9294465568229827, + "grad_norm": 1.441441535949707, + "learning_rate": 4.251032467265481e-07, + "loss": 0.18883323669433594, + "step": 6875 + }, + { + "epoch": 0.9295817490494297, + "grad_norm": 0.9878628253936768, + "learning_rate": 4.234868525843805e-07, + "loss": 0.1687760353088379, + "step": 6876 + }, + { + "epoch": 0.9297169412758767, + "grad_norm": 1.282901644706726, + "learning_rate": 4.218734933499796e-07, + "loss": 0.20367050170898438, + "step": 6877 + }, + { + "epoch": 0.9298521335023237, + "grad_norm": 0.6648201942443848, + "learning_rate": 4.202631693592601e-07, + "loss": 0.12532281875610352, + "step": 6878 + }, + { + "epoch": 0.9299873257287706, + "grad_norm": 1.0283968448638916, + "learning_rate": 4.186558809474955e-07, + "loss": 0.2093348503112793, + "step": 6879 + }, + { + "epoch": 0.9301225179552176, + "grad_norm": 1.1400434970855713, + "learning_rate": 4.170516284493331e-07, + "loss": 0.12535810470581055, + "step": 6880 + }, + { + "epoch": 0.9302577101816646, + "grad_norm": 1.2748878002166748, + "learning_rate": 4.1545041219879063e-07, + "loss": 0.15709900856018066, + "step": 6881 + }, + { + "epoch": 0.9303929024081116, + "grad_norm": 1.8680342435836792, + "learning_rate": 4.138522325292432e-07, + "loss": 0.1587967872619629, + "step": 6882 + }, + { + "epoch": 0.9305280946345585, + "grad_norm": 0.8770149946212769, + "learning_rate": 4.1225708977344457e-07, + "loss": 0.1810312271118164, + "step": 6883 + }, + { + "epoch": 0.9306632868610055, + "grad_norm": 1.538745403289795, + "learning_rate": 4.106649842635124e-07, + "loss": 0.1918349266052246, + "step": 6884 + }, + { + "epoch": 0.9307984790874525, + "grad_norm": 1.6287195682525635, + "learning_rate": 4.090759163309282e-07, + "loss": 0.23414039611816406, + "step": 6885 + }, + { + "epoch": 0.9309336713138995, + "grad_norm": 1.0517938137054443, + "learning_rate": 4.07489886306549e-07, + "loss": 0.15229272842407227, + "step": 6886 + }, + { + "epoch": 0.9310688635403465, + "grad_norm": 0.6034350991249084, + "learning_rate": 4.059068945205907e-07, + "loss": 0.10676407814025879, + "step": 6887 + }, + { + "epoch": 0.9312040557667934, + "grad_norm": 1.1849470138549805, + "learning_rate": 4.043269413026429e-07, + "loss": 0.17846012115478516, + "step": 6888 + }, + { + "epoch": 0.9313392479932404, + "grad_norm": 1.0466327667236328, + "learning_rate": 4.027500269816592e-07, + "loss": 0.17405414581298828, + "step": 6889 + }, + { + "epoch": 0.9314744402196874, + "grad_norm": 0.778199315071106, + "learning_rate": 4.011761518859619e-07, + "loss": 0.14313697814941406, + "step": 6890 + }, + { + "epoch": 0.9316096324461344, + "grad_norm": 0.9889121055603027, + "learning_rate": 3.996053163432406e-07, + "loss": 0.15319538116455078, + "step": 6891 + }, + { + "epoch": 0.9317448246725814, + "grad_norm": 1.3294932842254639, + "learning_rate": 3.980375206805503e-07, + "loss": 0.1484537124633789, + "step": 6892 + }, + { + "epoch": 0.9318800168990283, + "grad_norm": 1.1328479051589966, + "learning_rate": 3.9647276522431664e-07, + "loss": 0.10921239852905273, + "step": 6893 + }, + { + "epoch": 0.9320152091254753, + "grad_norm": 1.9791101217269897, + "learning_rate": 3.949110503003289e-07, + "loss": 0.18129920959472656, + "step": 6894 + }, + { + "epoch": 0.9321504013519223, + "grad_norm": 0.8612326979637146, + "learning_rate": 3.9335237623374377e-07, + "loss": 0.15658773481845856, + "step": 6895 + }, + { + "epoch": 0.9322855935783693, + "grad_norm": 2.137049913406372, + "learning_rate": 3.917967433490849e-07, + "loss": 0.17886686325073242, + "step": 6896 + }, + { + "epoch": 0.9324207858048162, + "grad_norm": 1.0243951082229614, + "learning_rate": 3.902441519702449e-07, + "loss": 0.156585693359375, + "step": 6897 + }, + { + "epoch": 0.9325559780312632, + "grad_norm": 1.082966923713684, + "learning_rate": 3.886946024204818e-07, + "loss": 0.1297922134399414, + "step": 6898 + }, + { + "epoch": 0.9326911702577102, + "grad_norm": 2.3474693298339844, + "learning_rate": 3.871480950224193e-07, + "loss": 0.19859769940376282, + "step": 6899 + }, + { + "epoch": 0.9328263624841572, + "grad_norm": 1.4735304117202759, + "learning_rate": 3.856046300980498e-07, + "loss": 0.15506011247634888, + "step": 6900 + }, + { + "epoch": 0.9329615547106042, + "grad_norm": 0.8837671875953674, + "learning_rate": 3.8406420796872953e-07, + "loss": 0.12552547454833984, + "step": 6901 + }, + { + "epoch": 0.9330967469370511, + "grad_norm": 0.7841081023216248, + "learning_rate": 3.825268289551803e-07, + "loss": 0.164825439453125, + "step": 6902 + }, + { + "epoch": 0.9332319391634981, + "grad_norm": 1.6778563261032104, + "learning_rate": 3.8099249337749777e-07, + "loss": 0.20982837677001953, + "step": 6903 + }, + { + "epoch": 0.9333671313899451, + "grad_norm": 0.6978492140769958, + "learning_rate": 3.7946120155513465e-07, + "loss": 0.13621997833251953, + "step": 6904 + }, + { + "epoch": 0.9335023236163921, + "grad_norm": 0.9136191606521606, + "learning_rate": 3.7793295380691595e-07, + "loss": 0.15282034873962402, + "step": 6905 + }, + { + "epoch": 0.933637515842839, + "grad_norm": 0.9879100322723389, + "learning_rate": 3.7640775045103214e-07, + "loss": 0.1868581771850586, + "step": 6906 + }, + { + "epoch": 0.933772708069286, + "grad_norm": 0.6956599950790405, + "learning_rate": 3.7488559180503423e-07, + "loss": 0.1422288417816162, + "step": 6907 + }, + { + "epoch": 0.933907900295733, + "grad_norm": 0.9054815769195557, + "learning_rate": 3.7336647818584866e-07, + "loss": 0.1433730125427246, + "step": 6908 + }, + { + "epoch": 0.93404309252218, + "grad_norm": 1.2635170221328735, + "learning_rate": 3.718504099097625e-07, + "loss": 0.1242523193359375, + "step": 6909 + }, + { + "epoch": 0.934178284748627, + "grad_norm": 1.3305695056915283, + "learning_rate": 3.703373872924265e-07, + "loss": 0.1690056324005127, + "step": 6910 + }, + { + "epoch": 0.9343134769750739, + "grad_norm": 1.3940216302871704, + "learning_rate": 3.688274106488604e-07, + "loss": 0.18387436866760254, + "step": 6911 + }, + { + "epoch": 0.9344486692015209, + "grad_norm": 1.4567819833755493, + "learning_rate": 3.67320480293451e-07, + "loss": 0.15241384506225586, + "step": 6912 + }, + { + "epoch": 0.9345838614279679, + "grad_norm": 1.1121695041656494, + "learning_rate": 3.6581659653994736e-07, + "loss": 0.15810799598693848, + "step": 6913 + }, + { + "epoch": 0.9347190536544149, + "grad_norm": 0.8731660842895508, + "learning_rate": 3.64315759701469e-07, + "loss": 0.15412139892578125, + "step": 6914 + }, + { + "epoch": 0.9348542458808619, + "grad_norm": 0.8984377384185791, + "learning_rate": 3.6281797009049765e-07, + "loss": 0.14575576782226562, + "step": 6915 + }, + { + "epoch": 0.9349894381073088, + "grad_norm": 1.1138936281204224, + "learning_rate": 3.613232280188772e-07, + "loss": 0.19501054286956787, + "step": 6916 + }, + { + "epoch": 0.9351246303337558, + "grad_norm": 0.7889968752861023, + "learning_rate": 3.5983153379782363e-07, + "loss": 0.15529441833496094, + "step": 6917 + }, + { + "epoch": 0.9352598225602028, + "grad_norm": 0.9597473740577698, + "learning_rate": 3.5834288773791854e-07, + "loss": 0.1800384521484375, + "step": 6918 + }, + { + "epoch": 0.9353950147866498, + "grad_norm": 1.3094351291656494, + "learning_rate": 3.568572901491007e-07, + "loss": 0.15926361083984375, + "step": 6919 + }, + { + "epoch": 0.9355302070130967, + "grad_norm": 2.3606109619140625, + "learning_rate": 3.553747413406827e-07, + "loss": 0.16363811492919922, + "step": 6920 + }, + { + "epoch": 0.9356653992395437, + "grad_norm": 1.3630280494689941, + "learning_rate": 3.538952416213376e-07, + "loss": 0.184647798538208, + "step": 6921 + }, + { + "epoch": 0.9358005914659907, + "grad_norm": 1.2892872095108032, + "learning_rate": 3.524187912991056e-07, + "loss": 0.2171459197998047, + "step": 6922 + }, + { + "epoch": 0.9359357836924377, + "grad_norm": 1.436084270477295, + "learning_rate": 3.5094539068139254e-07, + "loss": 0.14622116088867188, + "step": 6923 + }, + { + "epoch": 0.9360709759188847, + "grad_norm": 1.0650582313537598, + "learning_rate": 3.494750400749663e-07, + "loss": 0.11447733640670776, + "step": 6924 + }, + { + "epoch": 0.9362061681453316, + "grad_norm": 1.2595068216323853, + "learning_rate": 3.480077397859638e-07, + "loss": 0.14632892608642578, + "step": 6925 + }, + { + "epoch": 0.9363413603717786, + "grad_norm": 1.3274850845336914, + "learning_rate": 3.4654349011988384e-07, + "loss": 0.16489505767822266, + "step": 6926 + }, + { + "epoch": 0.9364765525982256, + "grad_norm": 0.9624202847480774, + "learning_rate": 3.4508229138159095e-07, + "loss": 0.13409948348999023, + "step": 6927 + }, + { + "epoch": 0.9366117448246726, + "grad_norm": 0.6399134993553162, + "learning_rate": 3.4362414387531516e-07, + "loss": 0.10895943641662598, + "step": 6928 + }, + { + "epoch": 0.9367469370511196, + "grad_norm": 1.2405040264129639, + "learning_rate": 3.4216904790464854e-07, + "loss": 0.1999492645263672, + "step": 6929 + }, + { + "epoch": 0.9368821292775665, + "grad_norm": 1.3759093284606934, + "learning_rate": 3.407170037725521e-07, + "loss": 0.1988992691040039, + "step": 6930 + }, + { + "epoch": 0.9370173215040135, + "grad_norm": 1.4552645683288574, + "learning_rate": 3.3926801178134737e-07, + "loss": 0.16596126556396484, + "step": 6931 + }, + { + "epoch": 0.9371525137304605, + "grad_norm": 0.9265978336334229, + "learning_rate": 3.3782207223272467e-07, + "loss": 0.15142905712127686, + "step": 6932 + }, + { + "epoch": 0.9372877059569075, + "grad_norm": 0.8365117907524109, + "learning_rate": 3.363791854277348e-07, + "loss": 0.19535446166992188, + "step": 6933 + }, + { + "epoch": 0.9374228981833544, + "grad_norm": 1.929330587387085, + "learning_rate": 3.349393516667926e-07, + "loss": 0.1852121353149414, + "step": 6934 + }, + { + "epoch": 0.9375580904098014, + "grad_norm": 1.2405568361282349, + "learning_rate": 3.335025712496814e-07, + "loss": 0.15087223052978516, + "step": 6935 + }, + { + "epoch": 0.9376932826362484, + "grad_norm": 1.422148585319519, + "learning_rate": 3.320688444755471e-07, + "loss": 0.21785545349121094, + "step": 6936 + }, + { + "epoch": 0.9378284748626954, + "grad_norm": 1.6526321172714233, + "learning_rate": 3.306381716428991e-07, + "loss": 0.18062400817871094, + "step": 6937 + }, + { + "epoch": 0.9379636670891424, + "grad_norm": 1.3130121231079102, + "learning_rate": 3.2921055304960925e-07, + "loss": 0.2098846435546875, + "step": 6938 + }, + { + "epoch": 0.9380988593155893, + "grad_norm": 0.9979737997055054, + "learning_rate": 3.277859889929147e-07, + "loss": 0.1981792449951172, + "step": 6939 + }, + { + "epoch": 0.9382340515420363, + "grad_norm": 0.7161824107170105, + "learning_rate": 3.263644797694215e-07, + "loss": 0.14830735325813293, + "step": 6940 + }, + { + "epoch": 0.9383692437684833, + "grad_norm": 0.7012309432029724, + "learning_rate": 3.2494602567509303e-07, + "loss": 0.12056827545166016, + "step": 6941 + }, + { + "epoch": 0.9385044359949303, + "grad_norm": 1.0711491107940674, + "learning_rate": 3.2353062700525794e-07, + "loss": 0.17443561553955078, + "step": 6942 + }, + { + "epoch": 0.9386396282213773, + "grad_norm": 0.8885208368301392, + "learning_rate": 3.221182840546122e-07, + "loss": 0.17954778671264648, + "step": 6943 + }, + { + "epoch": 0.9387748204478242, + "grad_norm": 0.6597723364830017, + "learning_rate": 3.207089971172089e-07, + "loss": 0.1304483413696289, + "step": 6944 + }, + { + "epoch": 0.9389100126742712, + "grad_norm": 1.173721432685852, + "learning_rate": 3.1930276648647504e-07, + "loss": 0.18240642547607422, + "step": 6945 + }, + { + "epoch": 0.9390452049007182, + "grad_norm": 1.2117289304733276, + "learning_rate": 3.178995924551914e-07, + "loss": 0.19396591186523438, + "step": 6946 + }, + { + "epoch": 0.9391803971271652, + "grad_norm": 0.8468600511550903, + "learning_rate": 3.164994753155059e-07, + "loss": 0.16847282648086548, + "step": 6947 + }, + { + "epoch": 0.9393155893536121, + "grad_norm": 1.629804253578186, + "learning_rate": 3.1510241535893215e-07, + "loss": 0.17116355895996094, + "step": 6948 + }, + { + "epoch": 0.9394507815800591, + "grad_norm": 0.8811553716659546, + "learning_rate": 3.1370841287634567e-07, + "loss": 0.1329355239868164, + "step": 6949 + }, + { + "epoch": 0.9395859738065061, + "grad_norm": 2.0105812549591064, + "learning_rate": 3.1231746815798436e-07, + "loss": 0.1999340057373047, + "step": 6950 + }, + { + "epoch": 0.9397211660329531, + "grad_norm": 1.2636001110076904, + "learning_rate": 3.1092958149344985e-07, + "loss": 0.13026142120361328, + "step": 6951 + }, + { + "epoch": 0.9398563582594001, + "grad_norm": 0.8365142941474915, + "learning_rate": 3.095447531717077e-07, + "loss": 0.12633132934570312, + "step": 6952 + }, + { + "epoch": 0.939991550485847, + "grad_norm": 2.3109328746795654, + "learning_rate": 3.08162983481089e-07, + "loss": 0.19128036499023438, + "step": 6953 + }, + { + "epoch": 0.940126742712294, + "grad_norm": 0.7146958708763123, + "learning_rate": 3.067842727092801e-07, + "loss": 0.09265774488449097, + "step": 6954 + }, + { + "epoch": 0.940261934938741, + "grad_norm": 1.276873230934143, + "learning_rate": 3.0540862114334323e-07, + "loss": 0.1888561248779297, + "step": 6955 + }, + { + "epoch": 0.940397127165188, + "grad_norm": 0.6841320395469666, + "learning_rate": 3.0403602906969086e-07, + "loss": 0.11055788397789001, + "step": 6956 + }, + { + "epoch": 0.940532319391635, + "grad_norm": 1.168628215789795, + "learning_rate": 3.0266649677410605e-07, + "loss": 0.17499637603759766, + "step": 6957 + }, + { + "epoch": 0.9406675116180819, + "grad_norm": 1.724021077156067, + "learning_rate": 3.0130002454173243e-07, + "loss": 0.1623673439025879, + "step": 6958 + }, + { + "epoch": 0.9408027038445289, + "grad_norm": 1.3459324836730957, + "learning_rate": 2.9993661265707407e-07, + "loss": 0.20406723022460938, + "step": 6959 + }, + { + "epoch": 0.9409378960709759, + "grad_norm": 1.0251187086105347, + "learning_rate": 2.985762614040072e-07, + "loss": 0.178466796875, + "step": 6960 + }, + { + "epoch": 0.9410730882974229, + "grad_norm": 1.9301649332046509, + "learning_rate": 2.972189710657586e-07, + "loss": 0.14935904741287231, + "step": 6961 + }, + { + "epoch": 0.9412082805238698, + "grad_norm": 0.9159307479858398, + "learning_rate": 2.958647419249255e-07, + "loss": 0.12334012985229492, + "step": 6962 + }, + { + "epoch": 0.9413434727503168, + "grad_norm": 1.1499208211898804, + "learning_rate": 2.9451357426346415e-07, + "loss": 0.20439720153808594, + "step": 6963 + }, + { + "epoch": 0.9414786649767638, + "grad_norm": 1.91754949092865, + "learning_rate": 2.9316546836269776e-07, + "loss": 0.1833571195602417, + "step": 6964 + }, + { + "epoch": 0.9416138572032108, + "grad_norm": 1.253158450126648, + "learning_rate": 2.9182042450330516e-07, + "loss": 0.17911529541015625, + "step": 6965 + }, + { + "epoch": 0.9417490494296578, + "grad_norm": 1.1508615016937256, + "learning_rate": 2.9047844296533397e-07, + "loss": 0.1601315140724182, + "step": 6966 + }, + { + "epoch": 0.9418842416561047, + "grad_norm": 0.967922568321228, + "learning_rate": 2.8913952402819246e-07, + "loss": 0.16477394104003906, + "step": 6967 + }, + { + "epoch": 0.9420194338825517, + "grad_norm": 1.3220723867416382, + "learning_rate": 2.878036679706492e-07, + "loss": 0.2159714698791504, + "step": 6968 + }, + { + "epoch": 0.9421546261089987, + "grad_norm": 1.2639391422271729, + "learning_rate": 2.8647087507083837e-07, + "loss": 0.19647550582885742, + "step": 6969 + }, + { + "epoch": 0.9422898183354457, + "grad_norm": 1.6839158535003662, + "learning_rate": 2.8514114560625303e-07, + "loss": 0.17318344116210938, + "step": 6970 + }, + { + "epoch": 0.9424250105618927, + "grad_norm": 1.3257887363433838, + "learning_rate": 2.8381447985375007e-07, + "loss": 0.18767929077148438, + "step": 6971 + }, + { + "epoch": 0.9425602027883396, + "grad_norm": 0.9894129037857056, + "learning_rate": 2.8249087808954853e-07, + "loss": 0.2026081085205078, + "step": 6972 + }, + { + "epoch": 0.9426953950147866, + "grad_norm": 1.1003007888793945, + "learning_rate": 2.811703405892296e-07, + "loss": 0.19527816772460938, + "step": 6973 + }, + { + "epoch": 0.9428305872412336, + "grad_norm": 0.6857394576072693, + "learning_rate": 2.798528676277368e-07, + "loss": 0.12031960487365723, + "step": 6974 + }, + { + "epoch": 0.9429657794676806, + "grad_norm": 0.9882878661155701, + "learning_rate": 2.785384594793738e-07, + "loss": 0.1722729206085205, + "step": 6975 + }, + { + "epoch": 0.9431009716941275, + "grad_norm": 2.102351427078247, + "learning_rate": 2.772271164178086e-07, + "loss": 0.1469125747680664, + "step": 6976 + }, + { + "epoch": 0.9432361639205745, + "grad_norm": 1.4030895233154297, + "learning_rate": 2.759188387160677e-07, + "loss": 0.1977672576904297, + "step": 6977 + }, + { + "epoch": 0.9433713561470215, + "grad_norm": 0.9867737293243408, + "learning_rate": 2.746136266465449e-07, + "loss": 0.1395587921142578, + "step": 6978 + }, + { + "epoch": 0.9435065483734685, + "grad_norm": 0.7916335463523865, + "learning_rate": 2.7331148048098943e-07, + "loss": 0.13576698303222656, + "step": 6979 + }, + { + "epoch": 0.9436417405999155, + "grad_norm": 2.0794079303741455, + "learning_rate": 2.7201240049051613e-07, + "loss": 0.16881781816482544, + "step": 6980 + }, + { + "epoch": 0.9437769328263624, + "grad_norm": 1.2684155702590942, + "learning_rate": 2.707163869455986e-07, + "loss": 0.16578775644302368, + "step": 6981 + }, + { + "epoch": 0.9439121250528094, + "grad_norm": 1.126895546913147, + "learning_rate": 2.694234401160778e-07, + "loss": 0.2003873586654663, + "step": 6982 + }, + { + "epoch": 0.9440473172792564, + "grad_norm": 1.1429036855697632, + "learning_rate": 2.6813356027114986e-07, + "loss": 0.195068359375, + "step": 6983 + }, + { + "epoch": 0.9441825095057034, + "grad_norm": 1.7646294832229614, + "learning_rate": 2.6684674767937346e-07, + "loss": 0.15339183807373047, + "step": 6984 + }, + { + "epoch": 0.9443177017321503, + "grad_norm": 1.6443897485733032, + "learning_rate": 2.655630026086708e-07, + "loss": 0.21820354461669922, + "step": 6985 + }, + { + "epoch": 0.9444528939585973, + "grad_norm": 1.0942819118499756, + "learning_rate": 2.642823253263249e-07, + "loss": 0.17298316955566406, + "step": 6986 + }, + { + "epoch": 0.9445880861850443, + "grad_norm": 0.9483981728553772, + "learning_rate": 2.630047160989807e-07, + "loss": 0.14149093627929688, + "step": 6987 + }, + { + "epoch": 0.9447232784114913, + "grad_norm": 1.279844045639038, + "learning_rate": 2.6173017519263875e-07, + "loss": 0.19954967498779297, + "step": 6988 + }, + { + "epoch": 0.9448584706379383, + "grad_norm": 0.7546291351318359, + "learning_rate": 2.6045870287267014e-07, + "loss": 0.11913084983825684, + "step": 6989 + }, + { + "epoch": 0.9449936628643854, + "grad_norm": 1.097650170326233, + "learning_rate": 2.5919029940380147e-07, + "loss": 0.21255111694335938, + "step": 6990 + }, + { + "epoch": 0.9451288550908323, + "grad_norm": 0.8942933678627014, + "learning_rate": 2.5792496505011807e-07, + "loss": 0.1363382339477539, + "step": 6991 + }, + { + "epoch": 0.9452640473172793, + "grad_norm": 0.9604797959327698, + "learning_rate": 2.5666270007507266e-07, + "loss": 0.1538677215576172, + "step": 6992 + }, + { + "epoch": 0.9453992395437263, + "grad_norm": 0.8983729481697083, + "learning_rate": 2.5540350474147324e-07, + "loss": 0.14554214477539062, + "step": 6993 + }, + { + "epoch": 0.9455344317701733, + "grad_norm": 0.7184409499168396, + "learning_rate": 2.5414737931149346e-07, + "loss": 0.10421323776245117, + "step": 6994 + }, + { + "epoch": 0.9456696239966202, + "grad_norm": 0.8119010329246521, + "learning_rate": 2.5289432404666246e-07, + "loss": 0.1484060287475586, + "step": 6995 + }, + { + "epoch": 0.9458048162230672, + "grad_norm": 1.395830750465393, + "learning_rate": 2.5164433920787487e-07, + "loss": 0.25280189514160156, + "step": 6996 + }, + { + "epoch": 0.9459400084495142, + "grad_norm": 0.8624078631401062, + "learning_rate": 2.503974250553842e-07, + "loss": 0.15318012237548828, + "step": 6997 + }, + { + "epoch": 0.9460752006759612, + "grad_norm": 0.9680809378623962, + "learning_rate": 2.491535818488011e-07, + "loss": 0.1606903076171875, + "step": 6998 + }, + { + "epoch": 0.9462103929024082, + "grad_norm": 1.1840519905090332, + "learning_rate": 2.479128098471067e-07, + "loss": 0.2083454132080078, + "step": 6999 + }, + { + "epoch": 0.9463455851288551, + "grad_norm": 1.8465065956115723, + "learning_rate": 2.466751093086328e-07, + "loss": 0.16587352752685547, + "step": 7000 + }, + { + "epoch": 0.9464807773553021, + "grad_norm": 1.0893745422363281, + "learning_rate": 2.454404804910748e-07, + "loss": 0.19550800323486328, + "step": 7001 + }, + { + "epoch": 0.9466159695817491, + "grad_norm": 1.111434817314148, + "learning_rate": 2.442089236514888e-07, + "loss": 0.16739463806152344, + "step": 7002 + }, + { + "epoch": 0.9467511618081961, + "grad_norm": 0.9888833165168762, + "learning_rate": 2.429804390462931e-07, + "loss": 0.1490001678466797, + "step": 7003 + }, + { + "epoch": 0.946886354034643, + "grad_norm": 1.4196141958236694, + "learning_rate": 2.4175502693126293e-07, + "loss": 0.18605244159698486, + "step": 7004 + }, + { + "epoch": 0.94702154626109, + "grad_norm": 0.9099364876747131, + "learning_rate": 2.4053268756153933e-07, + "loss": 0.14433574676513672, + "step": 7005 + }, + { + "epoch": 0.947156738487537, + "grad_norm": 1.4102363586425781, + "learning_rate": 2.393134211916154e-07, + "loss": 0.18846988677978516, + "step": 7006 + }, + { + "epoch": 0.947291930713984, + "grad_norm": 1.0599256753921509, + "learning_rate": 2.3809722807535128e-07, + "loss": 0.19267940521240234, + "step": 7007 + }, + { + "epoch": 0.947427122940431, + "grad_norm": 1.169120192527771, + "learning_rate": 2.3688410846596287e-07, + "loss": 0.19522762298583984, + "step": 7008 + }, + { + "epoch": 0.9475623151668779, + "grad_norm": 1.2129591703414917, + "learning_rate": 2.3567406261603143e-07, + "loss": 0.15459728240966797, + "step": 7009 + }, + { + "epoch": 0.9476975073933249, + "grad_norm": 1.0208072662353516, + "learning_rate": 2.3446709077749206e-07, + "loss": 0.16237735748291016, + "step": 7010 + }, + { + "epoch": 0.9478326996197719, + "grad_norm": 0.8853017091751099, + "learning_rate": 2.3326319320164546e-07, + "loss": 0.169769287109375, + "step": 7011 + }, + { + "epoch": 0.9479678918462189, + "grad_norm": 0.9158598184585571, + "learning_rate": 2.320623701391461e-07, + "loss": 0.15769469738006592, + "step": 7012 + }, + { + "epoch": 0.9481030840726659, + "grad_norm": 1.2379778623580933, + "learning_rate": 2.30864621840014e-07, + "loss": 0.1318216323852539, + "step": 7013 + }, + { + "epoch": 0.9482382762991128, + "grad_norm": 0.8993387222290039, + "learning_rate": 2.2966994855362633e-07, + "loss": 0.1631094217300415, + "step": 7014 + }, + { + "epoch": 0.9483734685255598, + "grad_norm": 1.266741394996643, + "learning_rate": 2.2847835052872079e-07, + "loss": 0.1709613800048828, + "step": 7015 + }, + { + "epoch": 0.9485086607520068, + "grad_norm": 1.3037835359573364, + "learning_rate": 2.2728982801339392e-07, + "loss": 0.12365007400512695, + "step": 7016 + }, + { + "epoch": 0.9486438529784538, + "grad_norm": 1.358773946762085, + "learning_rate": 2.261043812551028e-07, + "loss": 0.1684703826904297, + "step": 7017 + }, + { + "epoch": 0.9487790452049008, + "grad_norm": 1.4487115144729614, + "learning_rate": 2.249220105006633e-07, + "loss": 0.1723041534423828, + "step": 7018 + }, + { + "epoch": 0.9489142374313477, + "grad_norm": 1.2705066204071045, + "learning_rate": 2.2374271599625185e-07, + "loss": 0.17004108428955078, + "step": 7019 + }, + { + "epoch": 0.9490494296577947, + "grad_norm": 1.2343331575393677, + "learning_rate": 2.2256649798740204e-07, + "loss": 0.2066946029663086, + "step": 7020 + }, + { + "epoch": 0.9491846218842417, + "grad_norm": 1.3259086608886719, + "learning_rate": 2.2139335671901294e-07, + "loss": 0.14203453063964844, + "step": 7021 + }, + { + "epoch": 0.9493198141106887, + "grad_norm": 0.778133749961853, + "learning_rate": 2.2022329243533422e-07, + "loss": 0.1772785186767578, + "step": 7022 + }, + { + "epoch": 0.9494550063371356, + "grad_norm": 0.9714994430541992, + "learning_rate": 2.19056305379981e-07, + "loss": 0.2127361297607422, + "step": 7023 + }, + { + "epoch": 0.9495901985635826, + "grad_norm": 1.2761167287826538, + "learning_rate": 2.178923957959289e-07, + "loss": 0.15991878509521484, + "step": 7024 + }, + { + "epoch": 0.9497253907900296, + "grad_norm": 1.0379520654678345, + "learning_rate": 2.1673156392550408e-07, + "loss": 0.1804487705230713, + "step": 7025 + }, + { + "epoch": 0.9498605830164766, + "grad_norm": 0.8538023829460144, + "learning_rate": 2.155738100104049e-07, + "loss": 0.169907808303833, + "step": 7026 + }, + { + "epoch": 0.9499957752429236, + "grad_norm": 1.181728482246399, + "learning_rate": 2.1441913429167682e-07, + "loss": 0.17593955993652344, + "step": 7027 + }, + { + "epoch": 0.9501309674693705, + "grad_norm": 0.7111400961875916, + "learning_rate": 2.1326753700973256e-07, + "loss": 0.1240682601928711, + "step": 7028 + }, + { + "epoch": 0.9502661596958175, + "grad_norm": 1.1719928979873657, + "learning_rate": 2.1211901840434034e-07, + "loss": 0.15153980255126953, + "step": 7029 + }, + { + "epoch": 0.9504013519222645, + "grad_norm": 2.7940778732299805, + "learning_rate": 2.1097357871462386e-07, + "loss": 0.19905662536621094, + "step": 7030 + }, + { + "epoch": 0.9505365441487115, + "grad_norm": 1.7728191614151, + "learning_rate": 2.098312181790757e-07, + "loss": 0.20144343376159668, + "step": 7031 + }, + { + "epoch": 0.9506717363751584, + "grad_norm": 0.6830088496208191, + "learning_rate": 2.086919370355389e-07, + "loss": 0.11834859848022461, + "step": 7032 + }, + { + "epoch": 0.9508069286016054, + "grad_norm": 0.8876307606697083, + "learning_rate": 2.075557355212171e-07, + "loss": 0.1480649709701538, + "step": 7033 + }, + { + "epoch": 0.9509421208280524, + "grad_norm": 1.082811951637268, + "learning_rate": 2.0642261387267268e-07, + "loss": 0.17164230346679688, + "step": 7034 + }, + { + "epoch": 0.9510773130544994, + "grad_norm": 0.9651477336883545, + "learning_rate": 2.0529257232583033e-07, + "loss": 0.20316505432128906, + "step": 7035 + }, + { + "epoch": 0.9512125052809464, + "grad_norm": 1.1677285432815552, + "learning_rate": 2.0416561111596844e-07, + "loss": 0.14597034454345703, + "step": 7036 + }, + { + "epoch": 0.9513476975073933, + "grad_norm": 0.881619930267334, + "learning_rate": 2.0304173047772933e-07, + "loss": 0.18599891662597656, + "step": 7037 + }, + { + "epoch": 0.9514828897338403, + "grad_norm": 0.9835766553878784, + "learning_rate": 2.0192093064510753e-07, + "loss": 0.16470098495483398, + "step": 7038 + }, + { + "epoch": 0.9516180819602873, + "grad_norm": 1.1271790266036987, + "learning_rate": 2.0080321185146134e-07, + "loss": 0.17893218994140625, + "step": 7039 + }, + { + "epoch": 0.9517532741867343, + "grad_norm": 1.0424292087554932, + "learning_rate": 1.996885743295046e-07, + "loss": 0.17154884338378906, + "step": 7040 + }, + { + "epoch": 0.9518884664131813, + "grad_norm": 1.6355482339859009, + "learning_rate": 1.985770183113117e-07, + "loss": 0.2030085325241089, + "step": 7041 + }, + { + "epoch": 0.9520236586396282, + "grad_norm": 1.4516222476959229, + "learning_rate": 1.9746854402831583e-07, + "loss": 0.15929412841796875, + "step": 7042 + }, + { + "epoch": 0.9521588508660752, + "grad_norm": 0.7079383730888367, + "learning_rate": 1.963631517113057e-07, + "loss": 0.1104116439819336, + "step": 7043 + }, + { + "epoch": 0.9522940430925222, + "grad_norm": 0.9599607586860657, + "learning_rate": 1.952608415904289e-07, + "loss": 0.16903924942016602, + "step": 7044 + }, + { + "epoch": 0.9524292353189692, + "grad_norm": 1.5272794961929321, + "learning_rate": 1.9416161389519348e-07, + "loss": 0.1675567626953125, + "step": 7045 + }, + { + "epoch": 0.9525644275454161, + "grad_norm": 1.2193652391433716, + "learning_rate": 1.9306546885446475e-07, + "loss": 0.155792236328125, + "step": 7046 + }, + { + "epoch": 0.9526996197718631, + "grad_norm": 0.8074981570243835, + "learning_rate": 1.919724066964651e-07, + "loss": 0.1313788890838623, + "step": 7047 + }, + { + "epoch": 0.9528348119983101, + "grad_norm": 1.130308985710144, + "learning_rate": 1.908824276487775e-07, + "loss": 0.14759445190429688, + "step": 7048 + }, + { + "epoch": 0.9529700042247571, + "grad_norm": 1.1680020093917847, + "learning_rate": 1.8979553193833876e-07, + "loss": 0.16346263885498047, + "step": 7049 + }, + { + "epoch": 0.9531051964512041, + "grad_norm": 0.8579732775688171, + "learning_rate": 1.8871171979144786e-07, + "loss": 0.14668750762939453, + "step": 7050 + }, + { + "epoch": 0.953240388677651, + "grad_norm": 0.9967208504676819, + "learning_rate": 1.8763099143376262e-07, + "loss": 0.14379024505615234, + "step": 7051 + }, + { + "epoch": 0.953375580904098, + "grad_norm": 1.1646144390106201, + "learning_rate": 1.8655334709029303e-07, + "loss": 0.22216796875, + "step": 7052 + }, + { + "epoch": 0.953510773130545, + "grad_norm": 0.7213800549507141, + "learning_rate": 1.8547878698541132e-07, + "loss": 0.16598081588745117, + "step": 7053 + }, + { + "epoch": 0.953645965356992, + "grad_norm": 1.0283057689666748, + "learning_rate": 1.8440731134284684e-07, + "loss": 0.13375091552734375, + "step": 7054 + }, + { + "epoch": 0.953781157583439, + "grad_norm": 1.6930747032165527, + "learning_rate": 1.833389203856861e-07, + "loss": 0.19726181030273438, + "step": 7055 + }, + { + "epoch": 0.9539163498098859, + "grad_norm": 1.482744574546814, + "learning_rate": 1.8227361433637625e-07, + "loss": 0.19539260864257812, + "step": 7056 + }, + { + "epoch": 0.9540515420363329, + "grad_norm": 1.0703012943267822, + "learning_rate": 1.812113934167148e-07, + "loss": 0.1871337890625, + "step": 7057 + }, + { + "epoch": 0.9541867342627799, + "grad_norm": 1.0114115476608276, + "learning_rate": 1.8015225784786483e-07, + "loss": 0.17520053684711456, + "step": 7058 + }, + { + "epoch": 0.9543219264892269, + "grad_norm": 1.0584514141082764, + "learning_rate": 1.7909620785034663e-07, + "loss": 0.19161224365234375, + "step": 7059 + }, + { + "epoch": 0.9544571187156738, + "grad_norm": 0.8739388585090637, + "learning_rate": 1.7804324364402936e-07, + "loss": 0.17091941833496094, + "step": 7060 + }, + { + "epoch": 0.9545923109421208, + "grad_norm": 1.7398759126663208, + "learning_rate": 1.769933654481526e-07, + "loss": 0.16160964965820312, + "step": 7061 + }, + { + "epoch": 0.9547275031685678, + "grad_norm": 1.3912725448608398, + "learning_rate": 1.7594657348129984e-07, + "loss": 0.18796837329864502, + "step": 7062 + }, + { + "epoch": 0.9548626953950148, + "grad_norm": 0.8760247826576233, + "learning_rate": 1.749028679614234e-07, + "loss": 0.16924095153808594, + "step": 7063 + }, + { + "epoch": 0.9549978876214618, + "grad_norm": 1.1377896070480347, + "learning_rate": 1.7386224910582615e-07, + "loss": 0.11742448806762695, + "step": 7064 + }, + { + "epoch": 0.9551330798479087, + "grad_norm": 1.3483519554138184, + "learning_rate": 1.728247171311731e-07, + "loss": 0.16912841796875, + "step": 7065 + }, + { + "epoch": 0.9552682720743557, + "grad_norm": 1.2951308488845825, + "learning_rate": 1.7179027225348142e-07, + "loss": 0.19443154335021973, + "step": 7066 + }, + { + "epoch": 0.9554034643008027, + "grad_norm": 1.1068569421768188, + "learning_rate": 1.7075891468812722e-07, + "loss": 0.18080520629882812, + "step": 7067 + }, + { + "epoch": 0.9555386565272497, + "grad_norm": 1.0774205923080444, + "learning_rate": 1.69730644649847e-07, + "loss": 0.15363025665283203, + "step": 7068 + }, + { + "epoch": 0.9556738487536967, + "grad_norm": 1.6942801475524902, + "learning_rate": 1.687054623527312e-07, + "loss": 0.19241046905517578, + "step": 7069 + }, + { + "epoch": 0.9558090409801436, + "grad_norm": 0.8343832492828369, + "learning_rate": 1.676833680102291e-07, + "loss": 0.10416269302368164, + "step": 7070 + }, + { + "epoch": 0.9559442332065906, + "grad_norm": 0.8511726260185242, + "learning_rate": 1.6666436183514378e-07, + "loss": 0.10283756256103516, + "step": 7071 + }, + { + "epoch": 0.9560794254330376, + "grad_norm": 1.1140782833099365, + "learning_rate": 1.6564844403964053e-07, + "loss": 0.1880664825439453, + "step": 7072 + }, + { + "epoch": 0.9562146176594846, + "grad_norm": 0.8259708285331726, + "learning_rate": 1.6463561483523682e-07, + "loss": 0.15117835998535156, + "step": 7073 + }, + { + "epoch": 0.9563498098859315, + "grad_norm": 1.044789433479309, + "learning_rate": 1.6362587443281063e-07, + "loss": 0.1439371109008789, + "step": 7074 + }, + { + "epoch": 0.9564850021123785, + "grad_norm": 3.2706148624420166, + "learning_rate": 1.626192230425938e-07, + "loss": 0.20502901077270508, + "step": 7075 + }, + { + "epoch": 0.9566201943388255, + "grad_norm": 1.0384197235107422, + "learning_rate": 1.6161566087417868e-07, + "loss": 0.18082523345947266, + "step": 7076 + }, + { + "epoch": 0.9567553865652725, + "grad_norm": 0.7943189740180969, + "learning_rate": 1.6061518813650977e-07, + "loss": 0.16203641891479492, + "step": 7077 + }, + { + "epoch": 0.9568905787917195, + "grad_norm": 0.878852903842926, + "learning_rate": 1.5961780503789215e-07, + "loss": 0.1276082992553711, + "step": 7078 + }, + { + "epoch": 0.9570257710181664, + "grad_norm": 1.022603154182434, + "learning_rate": 1.5862351178598633e-07, + "loss": 0.15902996063232422, + "step": 7079 + }, + { + "epoch": 0.9571609632446134, + "grad_norm": 1.2049932479858398, + "learning_rate": 1.5763230858781008e-07, + "loss": 0.1198415756225586, + "step": 7080 + }, + { + "epoch": 0.9572961554710604, + "grad_norm": 0.9522535800933838, + "learning_rate": 1.5664419564973497e-07, + "loss": 0.16373729705810547, + "step": 7081 + }, + { + "epoch": 0.9574313476975074, + "grad_norm": 1.0250359773635864, + "learning_rate": 1.5565917317749146e-07, + "loss": 0.1851975917816162, + "step": 7082 + }, + { + "epoch": 0.9575665399239544, + "grad_norm": 1.3224772214889526, + "learning_rate": 1.5467724137617046e-07, + "loss": 0.18883132934570312, + "step": 7083 + }, + { + "epoch": 0.9577017321504013, + "grad_norm": 1.297202229499817, + "learning_rate": 1.5369840045021178e-07, + "loss": 0.1912250518798828, + "step": 7084 + }, + { + "epoch": 0.9578369243768483, + "grad_norm": 0.9278029203414917, + "learning_rate": 1.5272265060341572e-07, + "loss": 0.19639205932617188, + "step": 7085 + }, + { + "epoch": 0.9579721166032953, + "grad_norm": 1.536542534828186, + "learning_rate": 1.517499920389398e-07, + "loss": 0.19723987579345703, + "step": 7086 + }, + { + "epoch": 0.9581073088297423, + "grad_norm": 0.9885773062705994, + "learning_rate": 1.5078042495929534e-07, + "loss": 0.19787216186523438, + "step": 7087 + }, + { + "epoch": 0.9582425010561892, + "grad_norm": 1.3699674606323242, + "learning_rate": 1.498139495663542e-07, + "loss": 0.15988171100616455, + "step": 7088 + }, + { + "epoch": 0.9583776932826362, + "grad_norm": 1.084795355796814, + "learning_rate": 1.4885056606133707e-07, + "loss": 0.19570159912109375, + "step": 7089 + }, + { + "epoch": 0.9585128855090832, + "grad_norm": 0.6768075227737427, + "learning_rate": 1.478902746448302e-07, + "loss": 0.13908278942108154, + "step": 7090 + }, + { + "epoch": 0.9586480777355302, + "grad_norm": 1.4892845153808594, + "learning_rate": 1.469330755167686e-07, + "loss": 0.22181129455566406, + "step": 7091 + }, + { + "epoch": 0.9587832699619772, + "grad_norm": 1.0576359033584595, + "learning_rate": 1.4597896887644458e-07, + "loss": 0.1736459732055664, + "step": 7092 + }, + { + "epoch": 0.9589184621884241, + "grad_norm": 0.7634274959564209, + "learning_rate": 1.4502795492251418e-07, + "loss": 0.1763286590576172, + "step": 7093 + }, + { + "epoch": 0.9590536544148711, + "grad_norm": 0.7180197238922119, + "learning_rate": 1.4408003385297742e-07, + "loss": 0.1296900510787964, + "step": 7094 + }, + { + "epoch": 0.9591888466413181, + "grad_norm": 0.7676934599876404, + "learning_rate": 1.4313520586519968e-07, + "loss": 0.11440253257751465, + "step": 7095 + }, + { + "epoch": 0.9593240388677651, + "grad_norm": 0.7943113446235657, + "learning_rate": 1.4219347115589863e-07, + "loss": 0.14129972457885742, + "step": 7096 + }, + { + "epoch": 0.959459231094212, + "grad_norm": 1.0915900468826294, + "learning_rate": 1.4125482992114914e-07, + "loss": 0.14510822296142578, + "step": 7097 + }, + { + "epoch": 0.959594423320659, + "grad_norm": 2.141817092895508, + "learning_rate": 1.403192823563798e-07, + "loss": 0.15626931190490723, + "step": 7098 + }, + { + "epoch": 0.959729615547106, + "grad_norm": 1.0602796077728271, + "learning_rate": 1.3938682865637654e-07, + "loss": 0.16187810897827148, + "step": 7099 + }, + { + "epoch": 0.959864807773553, + "grad_norm": 0.9447826743125916, + "learning_rate": 1.38457469015284e-07, + "loss": 0.16381263732910156, + "step": 7100 + }, + { + "epoch": 0.96, + "grad_norm": 1.0039433240890503, + "learning_rate": 1.3753120362659576e-07, + "loss": 0.1378326416015625, + "step": 7101 + }, + { + "epoch": 0.9601351922264469, + "grad_norm": 1.00853431224823, + "learning_rate": 1.3660803268316925e-07, + "loss": 0.18384933471679688, + "step": 7102 + }, + { + "epoch": 0.9602703844528939, + "grad_norm": 0.7060810923576355, + "learning_rate": 1.3568795637721065e-07, + "loss": 0.15129804611206055, + "step": 7103 + }, + { + "epoch": 0.9604055766793409, + "grad_norm": 1.0206860303878784, + "learning_rate": 1.347709749002851e-07, + "loss": 0.16194725036621094, + "step": 7104 + }, + { + "epoch": 0.9605407689057879, + "grad_norm": 1.98849618434906, + "learning_rate": 1.338570884433149e-07, + "loss": 0.2380657196044922, + "step": 7105 + }, + { + "epoch": 0.9606759611322349, + "grad_norm": 0.991416871547699, + "learning_rate": 1.3294629719657448e-07, + "loss": 0.19721364974975586, + "step": 7106 + }, + { + "epoch": 0.9608111533586818, + "grad_norm": 1.5264153480529785, + "learning_rate": 1.3203860134969548e-07, + "loss": 0.178314208984375, + "step": 7107 + }, + { + "epoch": 0.9609463455851288, + "grad_norm": 0.9761550426483154, + "learning_rate": 1.3113400109166508e-07, + "loss": 0.14107108116149902, + "step": 7108 + }, + { + "epoch": 0.9610815378115758, + "grad_norm": 2.3769993782043457, + "learning_rate": 1.3023249661082592e-07, + "loss": 0.19447320699691772, + "step": 7109 + }, + { + "epoch": 0.9612167300380228, + "grad_norm": 0.9111101031303406, + "learning_rate": 1.2933408809487623e-07, + "loss": 0.1765880584716797, + "step": 7110 + }, + { + "epoch": 0.9613519222644697, + "grad_norm": 1.793071985244751, + "learning_rate": 1.2843877573086972e-07, + "loss": 0.1785578727722168, + "step": 7111 + }, + { + "epoch": 0.9614871144909167, + "grad_norm": 0.776685357093811, + "learning_rate": 1.2754655970521556e-07, + "loss": 0.11192750930786133, + "step": 7112 + }, + { + "epoch": 0.9616223067173637, + "grad_norm": 1.4859867095947266, + "learning_rate": 1.2665744020367686e-07, + "loss": 0.17167648673057556, + "step": 7113 + }, + { + "epoch": 0.9617574989438107, + "grad_norm": 1.095131516456604, + "learning_rate": 1.2577141741137388e-07, + "loss": 0.1838674545288086, + "step": 7114 + }, + { + "epoch": 0.9618926911702577, + "grad_norm": 1.3015220165252686, + "learning_rate": 1.248884915127807e-07, + "loss": 0.1648005247116089, + "step": 7115 + }, + { + "epoch": 0.9620278833967046, + "grad_norm": 0.9335525631904602, + "learning_rate": 1.2400866269172694e-07, + "loss": 0.12241888046264648, + "step": 7116 + }, + { + "epoch": 0.9621630756231516, + "grad_norm": 2.4253151416778564, + "learning_rate": 1.2313193113139777e-07, + "loss": 0.1744241714477539, + "step": 7117 + }, + { + "epoch": 0.9622982678495986, + "grad_norm": 1.8318414688110352, + "learning_rate": 1.2225829701433545e-07, + "loss": 0.14675283432006836, + "step": 7118 + }, + { + "epoch": 0.9624334600760456, + "grad_norm": 0.8328853845596313, + "learning_rate": 1.2138776052243116e-07, + "loss": 0.1313028335571289, + "step": 7119 + }, + { + "epoch": 0.9625686523024926, + "grad_norm": 1.4564807415008545, + "learning_rate": 1.2052032183693996e-07, + "loss": 0.2045459747314453, + "step": 7120 + }, + { + "epoch": 0.9627038445289395, + "grad_norm": 1.504341959953308, + "learning_rate": 1.196559811384623e-07, + "loss": 0.2007904052734375, + "step": 7121 + }, + { + "epoch": 0.9628390367553865, + "grad_norm": 1.3762198686599731, + "learning_rate": 1.1879473860696266e-07, + "loss": 0.17700862884521484, + "step": 7122 + }, + { + "epoch": 0.9629742289818335, + "grad_norm": 1.278596043586731, + "learning_rate": 1.179365944217542e-07, + "loss": 0.22965621948242188, + "step": 7123 + }, + { + "epoch": 0.9631094212082805, + "grad_norm": 0.8342136144638062, + "learning_rate": 1.1708154876150735e-07, + "loss": 0.16316509246826172, + "step": 7124 + }, + { + "epoch": 0.9632446134347274, + "grad_norm": 0.8797103762626648, + "learning_rate": 1.1622960180424801e-07, + "loss": 0.1547832489013672, + "step": 7125 + }, + { + "epoch": 0.9633798056611745, + "grad_norm": 1.561728596687317, + "learning_rate": 1.1538075372735435e-07, + "loss": 0.17431354522705078, + "step": 7126 + }, + { + "epoch": 0.9635149978876215, + "grad_norm": 1.3765701055526733, + "learning_rate": 1.1453500470756328e-07, + "loss": 0.1888265609741211, + "step": 7127 + }, + { + "epoch": 0.9636501901140685, + "grad_norm": 0.6069510579109192, + "learning_rate": 1.1369235492096397e-07, + "loss": 0.12566661834716797, + "step": 7128 + }, + { + "epoch": 0.9637853823405155, + "grad_norm": 1.071848750114441, + "learning_rate": 1.1285280454299774e-07, + "loss": 0.12290287017822266, + "step": 7129 + }, + { + "epoch": 0.9639205745669625, + "grad_norm": 1.1194480657577515, + "learning_rate": 1.1201635374846808e-07, + "loss": 0.14417016506195068, + "step": 7130 + }, + { + "epoch": 0.9640557667934094, + "grad_norm": 1.3878682851791382, + "learning_rate": 1.1118300271152404e-07, + "loss": 0.1517190933227539, + "step": 7131 + }, + { + "epoch": 0.9641909590198564, + "grad_norm": 1.0549837350845337, + "learning_rate": 1.1035275160567682e-07, + "loss": 0.13437843322753906, + "step": 7132 + }, + { + "epoch": 0.9643261512463034, + "grad_norm": 0.8931710124015808, + "learning_rate": 1.0952560060378813e-07, + "loss": 0.16132497787475586, + "step": 7133 + }, + { + "epoch": 0.9644613434727504, + "grad_norm": 2.0878305435180664, + "learning_rate": 1.0870154987807523e-07, + "loss": 0.18332195281982422, + "step": 7134 + }, + { + "epoch": 0.9645965356991973, + "grad_norm": 1.961843729019165, + "learning_rate": 1.0788059960010921e-07, + "loss": 0.21039772033691406, + "step": 7135 + }, + { + "epoch": 0.9647317279256443, + "grad_norm": 2.4643235206604004, + "learning_rate": 1.0706274994081499e-07, + "loss": 0.202484130859375, + "step": 7136 + }, + { + "epoch": 0.9648669201520913, + "grad_norm": 1.414941668510437, + "learning_rate": 1.0624800107047805e-07, + "loss": 0.17751026153564453, + "step": 7137 + }, + { + "epoch": 0.9650021123785383, + "grad_norm": 1.6314334869384766, + "learning_rate": 1.0543635315872934e-07, + "loss": 0.1760258674621582, + "step": 7138 + }, + { + "epoch": 0.9651373046049853, + "grad_norm": 1.513124704360962, + "learning_rate": 1.0462780637455871e-07, + "loss": 0.16096115112304688, + "step": 7139 + }, + { + "epoch": 0.9652724968314322, + "grad_norm": 1.3170726299285889, + "learning_rate": 1.0382236088631148e-07, + "loss": 0.19763565063476562, + "step": 7140 + }, + { + "epoch": 0.9654076890578792, + "grad_norm": 1.1763368844985962, + "learning_rate": 1.0302001686168349e-07, + "loss": 0.19106268882751465, + "step": 7141 + }, + { + "epoch": 0.9655428812843262, + "grad_norm": 1.1916087865829468, + "learning_rate": 1.0222077446772949e-07, + "loss": 0.22853660583496094, + "step": 7142 + }, + { + "epoch": 0.9656780735107732, + "grad_norm": 0.9290386438369751, + "learning_rate": 1.0142463387085465e-07, + "loss": 0.1544055938720703, + "step": 7143 + }, + { + "epoch": 0.9658132657372202, + "grad_norm": 1.145377516746521, + "learning_rate": 1.0063159523682142e-07, + "loss": 0.1629772186279297, + "step": 7144 + }, + { + "epoch": 0.9659484579636671, + "grad_norm": 0.5970140099525452, + "learning_rate": 9.984165873074102e-08, + "loss": 0.12464714050292969, + "step": 7145 + }, + { + "epoch": 0.9660836501901141, + "grad_norm": 1.0250977277755737, + "learning_rate": 9.905482451708526e-08, + "loss": 0.15589046478271484, + "step": 7146 + }, + { + "epoch": 0.9662188424165611, + "grad_norm": 1.4684053659439087, + "learning_rate": 9.827109275967638e-08, + "loss": 0.18293946981430054, + "step": 7147 + }, + { + "epoch": 0.9663540346430081, + "grad_norm": 2.2026636600494385, + "learning_rate": 9.749046362169223e-08, + "loss": 0.18991661071777344, + "step": 7148 + }, + { + "epoch": 0.966489226869455, + "grad_norm": 0.7151421308517456, + "learning_rate": 9.671293726566443e-08, + "loss": 0.13555145263671875, + "step": 7149 + }, + { + "epoch": 0.966624419095902, + "grad_norm": 1.5888656377792358, + "learning_rate": 9.593851385347518e-08, + "loss": 0.17270278930664062, + "step": 7150 + }, + { + "epoch": 0.966759611322349, + "grad_norm": 1.7988790273666382, + "learning_rate": 9.516719354636716e-08, + "loss": 0.13352978229522705, + "step": 7151 + }, + { + "epoch": 0.966894803548796, + "grad_norm": 0.8385295271873474, + "learning_rate": 9.439897650493024e-08, + "loss": 0.14668607711791992, + "step": 7152 + }, + { + "epoch": 0.967029995775243, + "grad_norm": 1.4584004878997803, + "learning_rate": 9.363386288911313e-08, + "loss": 0.17164230346679688, + "step": 7153 + }, + { + "epoch": 0.9671651880016899, + "grad_norm": 1.8320735692977905, + "learning_rate": 9.287185285821675e-08, + "loss": 0.1378650665283203, + "step": 7154 + }, + { + "epoch": 0.9673003802281369, + "grad_norm": 1.011025071144104, + "learning_rate": 9.211294657089587e-08, + "loss": 0.17245864868164062, + "step": 7155 + }, + { + "epoch": 0.9674355724545839, + "grad_norm": 1.7312713861465454, + "learning_rate": 9.135714418515573e-08, + "loss": 0.19532012939453125, + "step": 7156 + }, + { + "epoch": 0.9675707646810309, + "grad_norm": 1.5306978225708008, + "learning_rate": 9.060444585836381e-08, + "loss": 0.18760108947753906, + "step": 7157 + }, + { + "epoch": 0.9677059569074778, + "grad_norm": 1.1531139612197876, + "learning_rate": 8.985485174722974e-08, + "loss": 0.2074298858642578, + "step": 7158 + }, + { + "epoch": 0.9678411491339248, + "grad_norm": 0.9318092465400696, + "learning_rate": 8.910836200782868e-08, + "loss": 0.16977548599243164, + "step": 7159 + }, + { + "epoch": 0.9679763413603718, + "grad_norm": 0.8461886048316956, + "learning_rate": 8.836497679557964e-08, + "loss": 0.1353921890258789, + "step": 7160 + }, + { + "epoch": 0.9681115335868188, + "grad_norm": 1.3059446811676025, + "learning_rate": 8.762469626526048e-08, + "loss": 0.19249725341796875, + "step": 7161 + }, + { + "epoch": 0.9682467258132658, + "grad_norm": 0.7001742124557495, + "learning_rate": 8.688752057100457e-08, + "loss": 0.13103771209716797, + "step": 7162 + }, + { + "epoch": 0.9683819180397127, + "grad_norm": 0.8404279351234436, + "learning_rate": 8.615344986629082e-08, + "loss": 0.17218804359436035, + "step": 7163 + }, + { + "epoch": 0.9685171102661597, + "grad_norm": 0.9690999984741211, + "learning_rate": 8.542248430396027e-08, + "loss": 0.18452072143554688, + "step": 7164 + }, + { + "epoch": 0.9686523024926067, + "grad_norm": 1.1719168424606323, + "learning_rate": 8.469462403620287e-08, + "loss": 0.1506209373474121, + "step": 7165 + }, + { + "epoch": 0.9687874947190537, + "grad_norm": 1.027123212814331, + "learning_rate": 8.39698692145624e-08, + "loss": 0.1435985565185547, + "step": 7166 + }, + { + "epoch": 0.9689226869455007, + "grad_norm": 1.2666568756103516, + "learning_rate": 8.324821998993648e-08, + "loss": 0.16504454612731934, + "step": 7167 + }, + { + "epoch": 0.9690578791719476, + "grad_norm": 0.8207797408103943, + "learning_rate": 8.252967651257826e-08, + "loss": 0.15892601013183594, + "step": 7168 + }, + { + "epoch": 0.9691930713983946, + "grad_norm": 1.1917369365692139, + "learning_rate": 8.181423893208973e-08, + "loss": 0.11492204666137695, + "step": 7169 + }, + { + "epoch": 0.9693282636248416, + "grad_norm": 1.136677622795105, + "learning_rate": 8.110190739743172e-08, + "loss": 0.1826953887939453, + "step": 7170 + }, + { + "epoch": 0.9694634558512886, + "grad_norm": 1.513115644454956, + "learning_rate": 8.03926820569123e-08, + "loss": 0.2111072540283203, + "step": 7171 + }, + { + "epoch": 0.9695986480777355, + "grad_norm": 1.2681777477264404, + "learning_rate": 7.968656305819833e-08, + "loss": 0.17836666107177734, + "step": 7172 + }, + { + "epoch": 0.9697338403041825, + "grad_norm": 1.457101583480835, + "learning_rate": 7.898355054830719e-08, + "loss": 0.147369384765625, + "step": 7173 + }, + { + "epoch": 0.9698690325306295, + "grad_norm": 1.0182133913040161, + "learning_rate": 7.828364467360849e-08, + "loss": 0.14681339263916016, + "step": 7174 + }, + { + "epoch": 0.9700042247570765, + "grad_norm": 1.2612043619155884, + "learning_rate": 7.758684557982731e-08, + "loss": 0.1560688018798828, + "step": 7175 + }, + { + "epoch": 0.9701394169835235, + "grad_norm": 1.7324298620224, + "learning_rate": 7.689315341204262e-08, + "loss": 0.23563766479492188, + "step": 7176 + }, + { + "epoch": 0.9702746092099704, + "grad_norm": 0.8930456042289734, + "learning_rate": 7.62025683146822e-08, + "loss": 0.17041826248168945, + "step": 7177 + }, + { + "epoch": 0.9704098014364174, + "grad_norm": 0.8613118529319763, + "learning_rate": 7.551509043152937e-08, + "loss": 0.1299428939819336, + "step": 7178 + }, + { + "epoch": 0.9705449936628644, + "grad_norm": 0.7200577259063721, + "learning_rate": 7.483071990572132e-08, + "loss": 0.14456462860107422, + "step": 7179 + }, + { + "epoch": 0.9706801858893114, + "grad_norm": 0.6957589387893677, + "learning_rate": 7.414945687975072e-08, + "loss": 0.12284493446350098, + "step": 7180 + }, + { + "epoch": 0.9708153781157584, + "grad_norm": 0.8150241374969482, + "learning_rate": 7.347130149545578e-08, + "loss": 0.1579742431640625, + "step": 7181 + }, + { + "epoch": 0.9709505703422053, + "grad_norm": 2.2492763996124268, + "learning_rate": 7.279625389403355e-08, + "loss": 0.18596220016479492, + "step": 7182 + }, + { + "epoch": 0.9710857625686523, + "grad_norm": 0.8839974403381348, + "learning_rate": 7.212431421603327e-08, + "loss": 0.19951319694519043, + "step": 7183 + }, + { + "epoch": 0.9712209547950993, + "grad_norm": 1.9972691535949707, + "learning_rate": 7.145548260135638e-08, + "loss": 0.22493743896484375, + "step": 7184 + }, + { + "epoch": 0.9713561470215463, + "grad_norm": 1.2882407903671265, + "learning_rate": 7.078975918925645e-08, + "loss": 0.17240333557128906, + "step": 7185 + }, + { + "epoch": 0.9714913392479932, + "grad_norm": 1.1543464660644531, + "learning_rate": 7.012714411834098e-08, + "loss": 0.200042724609375, + "step": 7186 + }, + { + "epoch": 0.9716265314744402, + "grad_norm": 0.7761035561561584, + "learning_rate": 6.946763752656959e-08, + "loss": 0.16349124908447266, + "step": 7187 + }, + { + "epoch": 0.9717617237008872, + "grad_norm": 2.1249611377716064, + "learning_rate": 6.881123955125579e-08, + "loss": 0.1902303695678711, + "step": 7188 + }, + { + "epoch": 0.9718969159273342, + "grad_norm": 1.2013185024261475, + "learning_rate": 6.815795032906524e-08, + "loss": 0.15337467193603516, + "step": 7189 + }, + { + "epoch": 0.9720321081537812, + "grad_norm": 1.1208775043487549, + "learning_rate": 6.750776999601415e-08, + "loss": 0.13518810272216797, + "step": 7190 + }, + { + "epoch": 0.9721673003802281, + "grad_norm": 1.5856140851974487, + "learning_rate": 6.68606986874759e-08, + "loss": 0.17936277389526367, + "step": 7191 + }, + { + "epoch": 0.9723024926066751, + "grad_norm": 0.8141245245933533, + "learning_rate": 6.62167365381744e-08, + "loss": 0.1470470428466797, + "step": 7192 + }, + { + "epoch": 0.9724376848331221, + "grad_norm": 1.0699129104614258, + "learning_rate": 6.557588368218237e-08, + "loss": 0.16811370849609375, + "step": 7193 + }, + { + "epoch": 0.9725728770595691, + "grad_norm": 1.0268573760986328, + "learning_rate": 6.493814025293476e-08, + "loss": 0.20555877685546875, + "step": 7194 + }, + { + "epoch": 0.972708069286016, + "grad_norm": 1.0829578638076782, + "learning_rate": 6.430350638320704e-08, + "loss": 0.19777393341064453, + "step": 7195 + }, + { + "epoch": 0.972843261512463, + "grad_norm": 1.1269197463989258, + "learning_rate": 6.367198220513848e-08, + "loss": 0.1859452724456787, + "step": 7196 + }, + { + "epoch": 0.97297845373891, + "grad_norm": 1.023902416229248, + "learning_rate": 6.304356785021226e-08, + "loss": 0.16260242462158203, + "step": 7197 + }, + { + "epoch": 0.973113645965357, + "grad_norm": 2.685072183609009, + "learning_rate": 6.241826344926704e-08, + "loss": 0.2359628677368164, + "step": 7198 + }, + { + "epoch": 0.973248838191804, + "grad_norm": 1.1743239164352417, + "learning_rate": 6.17960691324987e-08, + "loss": 0.21769332885742188, + "step": 7199 + }, + { + "epoch": 0.973384030418251, + "grad_norm": 0.9565960168838501, + "learning_rate": 6.117698502944857e-08, + "loss": 0.1842660903930664, + "step": 7200 + }, + { + "epoch": 0.9735192226446979, + "grad_norm": 0.8898730874061584, + "learning_rate": 6.056101126901358e-08, + "loss": 0.15668296813964844, + "step": 7201 + }, + { + "epoch": 0.9736544148711449, + "grad_norm": 1.8366475105285645, + "learning_rate": 5.994814797944281e-08, + "loss": 0.2002243995666504, + "step": 7202 + }, + { + "epoch": 0.9737896070975919, + "grad_norm": 0.6003488898277283, + "learning_rate": 5.933839528833751e-08, + "loss": 0.07465946674346924, + "step": 7203 + }, + { + "epoch": 0.9739247993240389, + "grad_norm": 0.8286743760108948, + "learning_rate": 5.873175332265279e-08, + "loss": 0.14000606536865234, + "step": 7204 + }, + { + "epoch": 0.9740599915504858, + "grad_norm": 1.0218167304992676, + "learning_rate": 5.812822220869096e-08, + "loss": 0.15097808837890625, + "step": 7205 + }, + { + "epoch": 0.9741951837769328, + "grad_norm": 1.3449265956878662, + "learning_rate": 5.752780207211483e-08, + "loss": 0.1745452880859375, + "step": 7206 + }, + { + "epoch": 0.9743303760033798, + "grad_norm": 0.7474161982536316, + "learning_rate": 5.693049303793274e-08, + "loss": 0.13282322883605957, + "step": 7207 + }, + { + "epoch": 0.9744655682298268, + "grad_norm": 1.85740327835083, + "learning_rate": 5.6336295230508536e-08, + "loss": 0.1816849708557129, + "step": 7208 + }, + { + "epoch": 0.9746007604562738, + "grad_norm": 1.1863350868225098, + "learning_rate": 5.5745208773558266e-08, + "loss": 0.18543028831481934, + "step": 7209 + }, + { + "epoch": 0.9747359526827207, + "grad_norm": 0.8628146648406982, + "learning_rate": 5.515723379014681e-08, + "loss": 0.17764568328857422, + "step": 7210 + }, + { + "epoch": 0.9748711449091677, + "grad_norm": 1.4264284372329712, + "learning_rate": 5.4572370402694583e-08, + "loss": 0.19461441040039062, + "step": 7211 + }, + { + "epoch": 0.9750063371356147, + "grad_norm": 2.3023037910461426, + "learning_rate": 5.399061873297417e-08, + "loss": 0.1838665008544922, + "step": 7212 + }, + { + "epoch": 0.9751415293620617, + "grad_norm": 0.8093187212944031, + "learning_rate": 5.341197890210869e-08, + "loss": 0.1583251953125, + "step": 7213 + }, + { + "epoch": 0.9752767215885086, + "grad_norm": 0.8853726983070374, + "learning_rate": 5.283645103057344e-08, + "loss": 0.11314535140991211, + "step": 7214 + }, + { + "epoch": 0.9754119138149556, + "grad_norm": 1.486255407333374, + "learning_rate": 5.226403523819756e-08, + "loss": 0.20304203033447266, + "step": 7215 + }, + { + "epoch": 0.9755471060414026, + "grad_norm": 0.9995924830436707, + "learning_rate": 5.169473164416072e-08, + "loss": 0.17139625549316406, + "step": 7216 + }, + { + "epoch": 0.9756822982678496, + "grad_norm": 0.8910306096076965, + "learning_rate": 5.112854036699477e-08, + "loss": 0.1562633514404297, + "step": 7217 + }, + { + "epoch": 0.9758174904942966, + "grad_norm": 1.2105767726898193, + "learning_rate": 5.0565461524583745e-08, + "loss": 0.17580699920654297, + "step": 7218 + }, + { + "epoch": 0.9759526827207435, + "grad_norm": 1.0213998556137085, + "learning_rate": 5.0005495234163865e-08, + "loss": 0.1726093292236328, + "step": 7219 + }, + { + "epoch": 0.9760878749471905, + "grad_norm": 1.1177624464035034, + "learning_rate": 4.9448641612321874e-08, + "loss": 0.1695718765258789, + "step": 7220 + }, + { + "epoch": 0.9762230671736375, + "grad_norm": 0.9595202803611755, + "learning_rate": 4.889490077500003e-08, + "loss": 0.1505718231201172, + "step": 7221 + }, + { + "epoch": 0.9763582594000845, + "grad_norm": 1.8065110445022583, + "learning_rate": 4.8344272837489434e-08, + "loss": 0.19502532482147217, + "step": 7222 + }, + { + "epoch": 0.9764934516265315, + "grad_norm": 1.5631041526794434, + "learning_rate": 4.779675791443172e-08, + "loss": 0.13937902450561523, + "step": 7223 + }, + { + "epoch": 0.9766286438529784, + "grad_norm": 0.8443604707717896, + "learning_rate": 4.72523561198257e-08, + "loss": 0.16242873668670654, + "step": 7224 + }, + { + "epoch": 0.9767638360794254, + "grad_norm": 1.0379021167755127, + "learning_rate": 4.6711067567014044e-08, + "loss": 0.18265342712402344, + "step": 7225 + }, + { + "epoch": 0.9768990283058724, + "grad_norm": 0.9588872790336609, + "learning_rate": 4.6172892368701595e-08, + "loss": 0.1867837905883789, + "step": 7226 + }, + { + "epoch": 0.9770342205323194, + "grad_norm": 1.001046061515808, + "learning_rate": 4.5637830636935385e-08, + "loss": 0.1444079875946045, + "step": 7227 + }, + { + "epoch": 0.9771694127587663, + "grad_norm": 1.2527439594268799, + "learning_rate": 4.5105882483119643e-08, + "loss": 0.18381083011627197, + "step": 7228 + }, + { + "epoch": 0.9773046049852133, + "grad_norm": 1.439324140548706, + "learning_rate": 4.4577048018007436e-08, + "loss": 0.16228103637695312, + "step": 7229 + }, + { + "epoch": 0.9774397972116603, + "grad_norm": 1.0737818479537964, + "learning_rate": 4.405132735170569e-08, + "loss": 0.15839767456054688, + "step": 7230 + }, + { + "epoch": 0.9775749894381073, + "grad_norm": 0.8232232928276062, + "learning_rate": 4.3528720593675184e-08, + "loss": 0.1758575439453125, + "step": 7231 + }, + { + "epoch": 0.9777101816645543, + "grad_norm": 0.9403512477874756, + "learning_rate": 4.300922785271888e-08, + "loss": 0.1376628875732422, + "step": 7232 + }, + { + "epoch": 0.9778453738910012, + "grad_norm": 1.2808226346969604, + "learning_rate": 4.249284923700358e-08, + "loss": 0.16298246383666992, + "step": 7233 + }, + { + "epoch": 0.9779805661174482, + "grad_norm": 1.59022855758667, + "learning_rate": 4.197958485404163e-08, + "loss": 0.1289353370666504, + "step": 7234 + }, + { + "epoch": 0.9781157583438952, + "grad_norm": 1.9270899295806885, + "learning_rate": 4.1469434810694206e-08, + "loss": 0.1957225799560547, + "step": 7235 + }, + { + "epoch": 0.9782509505703422, + "grad_norm": 1.4817548990249634, + "learning_rate": 4.096239921317968e-08, + "loss": 0.16260910034179688, + "step": 7236 + }, + { + "epoch": 0.9783861427967891, + "grad_norm": 1.0501106977462769, + "learning_rate": 4.045847816706361e-08, + "loss": 0.15535354614257812, + "step": 7237 + }, + { + "epoch": 0.9785213350232361, + "grad_norm": 1.6144909858703613, + "learning_rate": 3.9957671777268724e-08, + "loss": 0.23519515991210938, + "step": 7238 + }, + { + "epoch": 0.9786565272496831, + "grad_norm": 0.9325268268585205, + "learning_rate": 3.945998014806163e-08, + "loss": 0.18716812133789062, + "step": 7239 + }, + { + "epoch": 0.9787917194761301, + "grad_norm": 1.1416516304016113, + "learning_rate": 3.896540338306609e-08, + "loss": 0.16619491577148438, + "step": 7240 + }, + { + "epoch": 0.9789269117025771, + "grad_norm": 1.0785635709762573, + "learning_rate": 3.847394158525641e-08, + "loss": 0.17587709426879883, + "step": 7241 + }, + { + "epoch": 0.979062103929024, + "grad_norm": 1.9263311624526978, + "learning_rate": 3.798559485695574e-08, + "loss": 0.23343753814697266, + "step": 7242 + }, + { + "epoch": 0.979197296155471, + "grad_norm": 0.9645626544952393, + "learning_rate": 3.7500363299842746e-08, + "loss": 0.16544723510742188, + "step": 7243 + }, + { + "epoch": 0.979332488381918, + "grad_norm": 1.0040361881256104, + "learning_rate": 3.701824701494327e-08, + "loss": 0.1776714324951172, + "step": 7244 + }, + { + "epoch": 0.979467680608365, + "grad_norm": 1.1052600145339966, + "learning_rate": 3.653924610263703e-08, + "loss": 0.17157649993896484, + "step": 7245 + }, + { + "epoch": 0.979602872834812, + "grad_norm": 0.9224521517753601, + "learning_rate": 3.6063360662654255e-08, + "loss": 0.15214157104492188, + "step": 7246 + }, + { + "epoch": 0.9797380650612589, + "grad_norm": 0.9142236113548279, + "learning_rate": 3.559059079407734e-08, + "loss": 0.14783668518066406, + "step": 7247 + }, + { + "epoch": 0.9798732572877059, + "grad_norm": 0.9299970865249634, + "learning_rate": 3.512093659533922e-08, + "loss": 0.20923995971679688, + "step": 7248 + }, + { + "epoch": 0.9800084495141529, + "grad_norm": 0.7759482264518738, + "learning_rate": 3.4654398164225e-08, + "loss": 0.1537771224975586, + "step": 7249 + }, + { + "epoch": 0.9801436417405999, + "grad_norm": 2.05537748336792, + "learning_rate": 3.4190975597870325e-08, + "loss": 0.18368864059448242, + "step": 7250 + }, + { + "epoch": 0.9802788339670468, + "grad_norm": 0.983456015586853, + "learning_rate": 3.373066899276134e-08, + "loss": 0.15195655822753906, + "step": 7251 + }, + { + "epoch": 0.9804140261934938, + "grad_norm": 0.7887934446334839, + "learning_rate": 3.3273478444736386e-08, + "loss": 0.15648460388183594, + "step": 7252 + }, + { + "epoch": 0.9805492184199408, + "grad_norm": 0.9016557931900024, + "learning_rate": 3.281940404898764e-08, + "loss": 0.1652379035949707, + "step": 7253 + }, + { + "epoch": 0.9806844106463878, + "grad_norm": 1.712913155555725, + "learning_rate": 3.236844590005117e-08, + "loss": 0.194793701171875, + "step": 7254 + }, + { + "epoch": 0.9808196028728348, + "grad_norm": 1.3363252878189087, + "learning_rate": 3.192060409182351e-08, + "loss": 0.19053077697753906, + "step": 7255 + }, + { + "epoch": 0.9809547950992817, + "grad_norm": 2.1128203868865967, + "learning_rate": 3.147587871754509e-08, + "loss": 0.2031574249267578, + "step": 7256 + }, + { + "epoch": 0.9810899873257287, + "grad_norm": 1.0806351900100708, + "learning_rate": 3.1034269869810174e-08, + "loss": 0.19896411895751953, + "step": 7257 + }, + { + "epoch": 0.9812251795521757, + "grad_norm": 1.3647725582122803, + "learning_rate": 3.05957776405652e-08, + "loss": 0.1773386001586914, + "step": 7258 + }, + { + "epoch": 0.9813603717786227, + "grad_norm": 0.8439529538154602, + "learning_rate": 3.016040212110549e-08, + "loss": 0.1278771162033081, + "step": 7259 + }, + { + "epoch": 0.9814955640050697, + "grad_norm": 1.5146340131759644, + "learning_rate": 2.9728143402078522e-08, + "loss": 0.18013668060302734, + "step": 7260 + }, + { + "epoch": 0.9816307562315166, + "grad_norm": 0.9241484999656677, + "learning_rate": 2.9299001573483975e-08, + "loss": 0.13370704650878906, + "step": 7261 + }, + { + "epoch": 0.9817659484579637, + "grad_norm": 1.0415635108947754, + "learning_rate": 2.8872976724670375e-08, + "loss": 0.20318269729614258, + "step": 7262 + }, + { + "epoch": 0.9819011406844107, + "grad_norm": 0.9046740531921387, + "learning_rate": 2.8450068944338436e-08, + "loss": 0.1349802017211914, + "step": 7263 + }, + { + "epoch": 0.9820363329108577, + "grad_norm": 0.9697754979133606, + "learning_rate": 2.803027832054106e-08, + "loss": 0.1750507354736328, + "step": 7264 + }, + { + "epoch": 0.9821715251373047, + "grad_norm": 1.0696742534637451, + "learning_rate": 2.7613604940679995e-08, + "loss": 0.17043828964233398, + "step": 7265 + }, + { + "epoch": 0.9823067173637516, + "grad_norm": 0.9562708139419556, + "learning_rate": 2.7200048891509176e-08, + "loss": 0.19631004333496094, + "step": 7266 + }, + { + "epoch": 0.9824419095901986, + "grad_norm": 1.1618362665176392, + "learning_rate": 2.67896102591314e-08, + "loss": 0.19598841667175293, + "step": 7267 + }, + { + "epoch": 0.9825771018166456, + "grad_norm": 1.6189159154891968, + "learning_rate": 2.6382289129004978e-08, + "loss": 0.17659282684326172, + "step": 7268 + }, + { + "epoch": 0.9827122940430926, + "grad_norm": 1.9550522565841675, + "learning_rate": 2.5978085585935395e-08, + "loss": 0.17226266860961914, + "step": 7269 + }, + { + "epoch": 0.9828474862695395, + "grad_norm": 0.9543769955635071, + "learning_rate": 2.5576999714078676e-08, + "loss": 0.17045879364013672, + "step": 7270 + }, + { + "epoch": 0.9829826784959865, + "grad_norm": 0.6764682531356812, + "learning_rate": 2.517903159694468e-08, + "loss": 0.12017536163330078, + "step": 7271 + }, + { + "epoch": 0.9831178707224335, + "grad_norm": 2.183420419692993, + "learning_rate": 2.4784181317390465e-08, + "loss": 0.2104175090789795, + "step": 7272 + }, + { + "epoch": 0.9832530629488805, + "grad_norm": 1.640756607055664, + "learning_rate": 2.4392448957628598e-08, + "loss": 0.21809673309326172, + "step": 7273 + }, + { + "epoch": 0.9833882551753275, + "grad_norm": 1.1838451623916626, + "learning_rate": 2.4003834599217177e-08, + "loss": 0.19672393798828125, + "step": 7274 + }, + { + "epoch": 0.9835234474017744, + "grad_norm": 0.7297896146774292, + "learning_rate": 2.3618338323071474e-08, + "loss": 0.1278705596923828, + "step": 7275 + }, + { + "epoch": 0.9836586396282214, + "grad_norm": 0.5232936143875122, + "learning_rate": 2.3235960209448958e-08, + "loss": 0.07738608121871948, + "step": 7276 + }, + { + "epoch": 0.9837938318546684, + "grad_norm": 1.0427874326705933, + "learning_rate": 2.2856700337967606e-08, + "loss": 0.1937999725341797, + "step": 7277 + }, + { + "epoch": 0.9839290240811154, + "grad_norm": 1.6218242645263672, + "learning_rate": 2.2480558787587592e-08, + "loss": 0.1422128677368164, + "step": 7278 + }, + { + "epoch": 0.9840642163075624, + "grad_norm": 1.1342490911483765, + "learning_rate": 2.2107535636626263e-08, + "loss": 0.18131446838378906, + "step": 7279 + }, + { + "epoch": 0.9841994085340093, + "grad_norm": 0.8911510109901428, + "learning_rate": 2.1737630962746502e-08, + "loss": 0.14415740966796875, + "step": 7280 + }, + { + "epoch": 0.9843346007604563, + "grad_norm": 1.264528512954712, + "learning_rate": 2.1370844842966696e-08, + "loss": 0.1870269775390625, + "step": 7281 + }, + { + "epoch": 0.9844697929869033, + "grad_norm": 1.0244718790054321, + "learning_rate": 2.100717735365243e-08, + "loss": 0.13822078704833984, + "step": 7282 + }, + { + "epoch": 0.9846049852133503, + "grad_norm": 1.2269244194030762, + "learning_rate": 2.0646628570521464e-08, + "loss": 0.17969632148742676, + "step": 7283 + }, + { + "epoch": 0.9847401774397972, + "grad_norm": 1.7875611782073975, + "learning_rate": 2.028919856864375e-08, + "loss": 0.20510482788085938, + "step": 7284 + }, + { + "epoch": 0.9848753696662442, + "grad_norm": 0.8839919567108154, + "learning_rate": 1.9934887422434766e-08, + "loss": 0.1232914924621582, + "step": 7285 + }, + { + "epoch": 0.9850105618926912, + "grad_norm": 0.9244188666343689, + "learning_rate": 1.9583695205665496e-08, + "loss": 0.1779327392578125, + "step": 7286 + }, + { + "epoch": 0.9851457541191382, + "grad_norm": 0.8528125286102295, + "learning_rate": 1.9235621991457454e-08, + "loss": 0.12193822860717773, + "step": 7287 + }, + { + "epoch": 0.9852809463455852, + "grad_norm": 0.8231453895568848, + "learning_rate": 1.889066785227933e-08, + "loss": 0.17607498168945312, + "step": 7288 + }, + { + "epoch": 0.9854161385720321, + "grad_norm": 1.1122676134109497, + "learning_rate": 1.854883285995368e-08, + "loss": 0.1445636749267578, + "step": 7289 + }, + { + "epoch": 0.9855513307984791, + "grad_norm": 0.9929426908493042, + "learning_rate": 1.8210117085651902e-08, + "loss": 0.18935394287109375, + "step": 7290 + }, + { + "epoch": 0.9856865230249261, + "grad_norm": 0.9319225549697876, + "learning_rate": 1.7874520599894252e-08, + "loss": 0.1456918716430664, + "step": 7291 + }, + { + "epoch": 0.9858217152513731, + "grad_norm": 1.7456135749816895, + "learning_rate": 1.7542043472558166e-08, + "loss": 0.18245935440063477, + "step": 7292 + }, + { + "epoch": 0.98595690747782, + "grad_norm": 0.8514389395713806, + "learning_rate": 1.7212685772864945e-08, + "loss": 0.16852951049804688, + "step": 7293 + }, + { + "epoch": 0.986092099704267, + "grad_norm": 1.0665686130523682, + "learning_rate": 1.68864475693864e-08, + "loss": 0.17768096923828125, + "step": 7294 + }, + { + "epoch": 0.986227291930714, + "grad_norm": 1.8206572532653809, + "learning_rate": 1.6563328930051526e-08, + "loss": 0.21712517738342285, + "step": 7295 + }, + { + "epoch": 0.986362484157161, + "grad_norm": 1.1156790256500244, + "learning_rate": 1.624332992213151e-08, + "loss": 0.1737537384033203, + "step": 7296 + }, + { + "epoch": 0.986497676383608, + "grad_norm": 1.063873529434204, + "learning_rate": 1.5926450612254728e-08, + "loss": 0.18525314331054688, + "step": 7297 + }, + { + "epoch": 0.986632868610055, + "grad_norm": 0.9279559254646301, + "learning_rate": 1.5612691066395068e-08, + "loss": 0.1679532527923584, + "step": 7298 + }, + { + "epoch": 0.9867680608365019, + "grad_norm": 0.6950719952583313, + "learning_rate": 1.530205134987861e-08, + "loss": 0.1355583667755127, + "step": 7299 + }, + { + "epoch": 0.9869032530629489, + "grad_norm": 1.4669368267059326, + "learning_rate": 1.499453152738528e-08, + "loss": 0.1821460723876953, + "step": 7300 + }, + { + "epoch": 0.9870384452893959, + "grad_norm": 0.9478243589401245, + "learning_rate": 1.4690131662938866e-08, + "loss": 0.19354939460754395, + "step": 7301 + }, + { + "epoch": 0.9871736375158429, + "grad_norm": 1.6360996961593628, + "learning_rate": 1.438885181991867e-08, + "loss": 0.15348577499389648, + "step": 7302 + }, + { + "epoch": 0.9873088297422898, + "grad_norm": 0.8750420808792114, + "learning_rate": 1.4090692061052846e-08, + "loss": 0.13043498992919922, + "step": 7303 + }, + { + "epoch": 0.9874440219687368, + "grad_norm": 0.9935842156410217, + "learning_rate": 1.3795652448420071e-08, + "loss": 0.16595458984375, + "step": 7304 + }, + { + "epoch": 0.9875792141951838, + "grad_norm": 1.0260628461837769, + "learning_rate": 1.3503733043447874e-08, + "loss": 0.14501237869262695, + "step": 7305 + }, + { + "epoch": 0.9877144064216308, + "grad_norm": 1.2830389738082886, + "learning_rate": 1.3214933906915971e-08, + "loss": 0.18228816986083984, + "step": 7306 + }, + { + "epoch": 0.9878495986480778, + "grad_norm": 1.0005398988723755, + "learning_rate": 1.2929255098954596e-08, + "loss": 0.16384315490722656, + "step": 7307 + }, + { + "epoch": 0.9879847908745247, + "grad_norm": 1.0565052032470703, + "learning_rate": 1.2646696679042835e-08, + "loss": 0.12819170951843262, + "step": 7308 + }, + { + "epoch": 0.9881199831009717, + "grad_norm": 1.379349946975708, + "learning_rate": 1.2367258706010298e-08, + "loss": 0.1906871795654297, + "step": 7309 + }, + { + "epoch": 0.9882551753274187, + "grad_norm": 1.4749375581741333, + "learning_rate": 1.2090941238040443e-08, + "loss": 0.1494770050048828, + "step": 7310 + }, + { + "epoch": 0.9883903675538657, + "grad_norm": 1.1573714017868042, + "learning_rate": 1.1817744332660584e-08, + "loss": 0.19374942779541016, + "step": 7311 + }, + { + "epoch": 0.9885255597803126, + "grad_norm": 1.9752477407455444, + "learning_rate": 1.1547668046751891e-08, + "loss": 0.19591617584228516, + "step": 7312 + }, + { + "epoch": 0.9886607520067596, + "grad_norm": 1.1509065628051758, + "learning_rate": 1.1280712436549379e-08, + "loss": 0.17366409301757812, + "step": 7313 + }, + { + "epoch": 0.9887959442332066, + "grad_norm": 0.8882109522819519, + "learning_rate": 1.1016877557630257e-08, + "loss": 0.1535041332244873, + "step": 7314 + }, + { + "epoch": 0.9889311364596536, + "grad_norm": 1.4429450035095215, + "learning_rate": 1.0756163464928915e-08, + "loss": 0.15332555770874023, + "step": 7315 + }, + { + "epoch": 0.9890663286861006, + "grad_norm": 1.075042486190796, + "learning_rate": 1.0498570212726932e-08, + "loss": 0.0976266860961914, + "step": 7316 + }, + { + "epoch": 0.9892015209125475, + "grad_norm": 1.0847547054290771, + "learning_rate": 1.024409785465641e-08, + "loss": 0.2151951789855957, + "step": 7317 + }, + { + "epoch": 0.9893367131389945, + "grad_norm": 2.1262550354003906, + "learning_rate": 9.992746443699962e-09, + "loss": 0.17905807495117188, + "step": 7318 + }, + { + "epoch": 0.9894719053654415, + "grad_norm": 0.7050594091415405, + "learning_rate": 9.744516032190731e-09, + "loss": 0.12417125701904297, + "step": 7319 + }, + { + "epoch": 0.9896070975918885, + "grad_norm": 0.7700551152229309, + "learning_rate": 9.499406671809041e-09, + "loss": 0.14566659927368164, + "step": 7320 + }, + { + "epoch": 0.9897422898183355, + "grad_norm": 1.0600937604904175, + "learning_rate": 9.2574184135924e-09, + "loss": 0.16457796096801758, + "step": 7321 + }, + { + "epoch": 0.9898774820447824, + "grad_norm": 0.9225361347198486, + "learning_rate": 9.018551307920508e-09, + "loss": 0.15133380889892578, + "step": 7322 + }, + { + "epoch": 0.9900126742712294, + "grad_norm": 0.8620610237121582, + "learning_rate": 8.782805404526917e-09, + "loss": 0.12780380249023438, + "step": 7323 + }, + { + "epoch": 0.9901478664976764, + "grad_norm": 0.9073818922042847, + "learning_rate": 8.55018075249736e-09, + "loss": 0.18050765991210938, + "step": 7324 + }, + { + "epoch": 0.9902830587241234, + "grad_norm": 1.0237106084823608, + "learning_rate": 8.320677400264764e-09, + "loss": 0.1272258758544922, + "step": 7325 + }, + { + "epoch": 0.9904182509505703, + "grad_norm": 2.0932223796844482, + "learning_rate": 8.094295395610906e-09, + "loss": 0.17127227783203125, + "step": 7326 + }, + { + "epoch": 0.9905534431770173, + "grad_norm": 0.7614780068397522, + "learning_rate": 7.87103478567308e-09, + "loss": 0.1362626552581787, + "step": 7327 + }, + { + "epoch": 0.9906886354034643, + "grad_norm": 2.6018054485321045, + "learning_rate": 7.65089561693244e-09, + "loss": 0.20467662811279297, + "step": 7328 + }, + { + "epoch": 0.9908238276299113, + "grad_norm": 1.8599770069122314, + "learning_rate": 7.433877935225652e-09, + "loss": 0.23937749862670898, + "step": 7329 + }, + { + "epoch": 0.9909590198563583, + "grad_norm": 1.6535546779632568, + "learning_rate": 7.219981785733243e-09, + "loss": 0.2488689422607422, + "step": 7330 + }, + { + "epoch": 0.9910942120828052, + "grad_norm": 1.333464503288269, + "learning_rate": 7.009207212992919e-09, + "loss": 0.1871333122253418, + "step": 7331 + }, + { + "epoch": 0.9912294043092522, + "grad_norm": 1.3936303853988647, + "learning_rate": 6.801554260889575e-09, + "loss": 0.18279457092285156, + "step": 7332 + }, + { + "epoch": 0.9913645965356992, + "grad_norm": 2.6115469932556152, + "learning_rate": 6.5970229726552976e-09, + "loss": 0.17894554138183594, + "step": 7333 + }, + { + "epoch": 0.9914997887621462, + "grad_norm": 1.036321997642517, + "learning_rate": 6.3956133908743556e-09, + "loss": 0.1586000919342041, + "step": 7334 + }, + { + "epoch": 0.9916349809885932, + "grad_norm": 0.9874232411384583, + "learning_rate": 6.197325557483202e-09, + "loss": 0.18906879425048828, + "step": 7335 + }, + { + "epoch": 0.9917701732150401, + "grad_norm": 1.2339227199554443, + "learning_rate": 6.002159513765482e-09, + "loss": 0.20660972595214844, + "step": 7336 + }, + { + "epoch": 0.9919053654414871, + "grad_norm": 1.4209257364273071, + "learning_rate": 5.810115300355357e-09, + "loss": 0.1447368860244751, + "step": 7337 + }, + { + "epoch": 0.9920405576679341, + "grad_norm": 1.4106571674346924, + "learning_rate": 5.621192957239174e-09, + "loss": 0.17977237701416016, + "step": 7338 + }, + { + "epoch": 0.9921757498943811, + "grad_norm": 0.9243113398551941, + "learning_rate": 5.435392523748806e-09, + "loss": 0.1879138946533203, + "step": 7339 + }, + { + "epoch": 0.992310942120828, + "grad_norm": 1.0342358350753784, + "learning_rate": 5.252714038571638e-09, + "loss": 0.1633462905883789, + "step": 7340 + }, + { + "epoch": 0.992446134347275, + "grad_norm": 0.8588201999664307, + "learning_rate": 5.073157539742246e-09, + "loss": 0.2013254165649414, + "step": 7341 + }, + { + "epoch": 0.992581326573722, + "grad_norm": 1.3490233421325684, + "learning_rate": 4.896723064642394e-09, + "loss": 0.19267654418945312, + "step": 7342 + }, + { + "epoch": 0.992716518800169, + "grad_norm": 1.363053321838379, + "learning_rate": 4.723410650009363e-09, + "loss": 0.13930273056030273, + "step": 7343 + }, + { + "epoch": 0.992851711026616, + "grad_norm": 0.7633225321769714, + "learning_rate": 4.553220331925956e-09, + "loss": 0.13668537139892578, + "step": 7344 + }, + { + "epoch": 0.9929869032530629, + "grad_norm": 1.1288535594940186, + "learning_rate": 4.38615214582716e-09, + "loss": 0.18528461456298828, + "step": 7345 + }, + { + "epoch": 0.9931220954795099, + "grad_norm": 1.898842692375183, + "learning_rate": 4.2222061265001496e-09, + "loss": 0.11673331260681152, + "step": 7346 + }, + { + "epoch": 0.9932572877059569, + "grad_norm": 1.1446244716644287, + "learning_rate": 4.0613823080742905e-09, + "loss": 0.14681482315063477, + "step": 7347 + }, + { + "epoch": 0.9933924799324039, + "grad_norm": 1.2303898334503174, + "learning_rate": 3.903680724037795e-09, + "loss": 0.1594851016998291, + "step": 7348 + }, + { + "epoch": 0.9935276721588508, + "grad_norm": 0.6970008611679077, + "learning_rate": 3.749101407224398e-09, + "loss": 0.13020730018615723, + "step": 7349 + }, + { + "epoch": 0.9936628643852978, + "grad_norm": 1.0794814825057983, + "learning_rate": 3.597644389818355e-09, + "loss": 0.16993427276611328, + "step": 7350 + }, + { + "epoch": 0.9937980566117448, + "grad_norm": 1.0079402923583984, + "learning_rate": 3.4493097033527767e-09, + "loss": 0.18355178833007812, + "step": 7351 + }, + { + "epoch": 0.9939332488381918, + "grad_norm": 1.695164442062378, + "learning_rate": 3.3040973787112904e-09, + "loss": 0.22621726989746094, + "step": 7352 + }, + { + "epoch": 0.9940684410646388, + "grad_norm": 0.8182611465454102, + "learning_rate": 3.162007446129711e-09, + "loss": 0.15974998474121094, + "step": 7353 + }, + { + "epoch": 0.9942036332910857, + "grad_norm": 1.092155933380127, + "learning_rate": 3.023039935191041e-09, + "loss": 0.12267303466796875, + "step": 7354 + }, + { + "epoch": 0.9943388255175327, + "grad_norm": 1.8967422246932983, + "learning_rate": 2.887194874830468e-09, + "loss": 0.203948974609375, + "step": 7355 + }, + { + "epoch": 0.9944740177439797, + "grad_norm": 1.5373715162277222, + "learning_rate": 2.7544722933287026e-09, + "loss": 0.168548583984375, + "step": 7356 + }, + { + "epoch": 0.9946092099704267, + "grad_norm": 3.407031774520874, + "learning_rate": 2.6248722183203066e-09, + "loss": 0.26572322845458984, + "step": 7357 + }, + { + "epoch": 0.9947444021968737, + "grad_norm": 1.1482545137405396, + "learning_rate": 2.498394676790361e-09, + "loss": 0.18760061264038086, + "step": 7358 + }, + { + "epoch": 0.9948795944233206, + "grad_norm": 1.3253059387207031, + "learning_rate": 2.375039695071135e-09, + "loss": 0.184722900390625, + "step": 7359 + }, + { + "epoch": 0.9950147866497676, + "grad_norm": 1.8004224300384521, + "learning_rate": 2.2548072988454184e-09, + "loss": 0.18215274810791016, + "step": 7360 + }, + { + "epoch": 0.9951499788762146, + "grad_norm": 1.150065302848816, + "learning_rate": 2.1376975131465194e-09, + "loss": 0.18532133102416992, + "step": 7361 + }, + { + "epoch": 0.9952851711026616, + "grad_norm": 1.2456303834915161, + "learning_rate": 2.023710362356601e-09, + "loss": 0.21260452270507812, + "step": 7362 + }, + { + "epoch": 0.9954203633291085, + "grad_norm": 0.7877089977264404, + "learning_rate": 1.9128458702100117e-09, + "loss": 0.11085927486419678, + "step": 7363 + }, + { + "epoch": 0.9955555555555555, + "grad_norm": 0.7680411338806152, + "learning_rate": 1.8051040597882873e-09, + "loss": 0.14110727608203888, + "step": 7364 + }, + { + "epoch": 0.9956907477820025, + "grad_norm": 1.2317640781402588, + "learning_rate": 1.70048495352515e-09, + "loss": 0.1412665843963623, + "step": 7365 + }, + { + "epoch": 0.9958259400084495, + "grad_norm": 1.1753548383712769, + "learning_rate": 1.5989885731998443e-09, + "loss": 0.2248525619506836, + "step": 7366 + }, + { + "epoch": 0.9959611322348965, + "grad_norm": 1.157978892326355, + "learning_rate": 1.5006149399487966e-09, + "loss": 0.1808757781982422, + "step": 7367 + }, + { + "epoch": 0.9960963244613434, + "grad_norm": 1.24444580078125, + "learning_rate": 1.4053640742489604e-09, + "loss": 0.1722240447998047, + "step": 7368 + }, + { + "epoch": 0.9962315166877904, + "grad_norm": 0.7689839601516724, + "learning_rate": 1.3132359959361351e-09, + "loss": 0.13801860809326172, + "step": 7369 + }, + { + "epoch": 0.9963667089142374, + "grad_norm": 0.7986721992492676, + "learning_rate": 1.2242307241899787e-09, + "loss": 0.13493633270263672, + "step": 7370 + }, + { + "epoch": 0.9965019011406844, + "grad_norm": 1.176010012626648, + "learning_rate": 1.1383482775406685e-09, + "loss": 0.17741775512695312, + "step": 7371 + }, + { + "epoch": 0.9966370933671314, + "grad_norm": 0.6402312517166138, + "learning_rate": 1.0555886738738973e-09, + "loss": 0.1308603286743164, + "step": 7372 + }, + { + "epoch": 0.9967722855935783, + "grad_norm": 1.0092192888259888, + "learning_rate": 9.75951930415886e-10, + "loss": 0.15879487991333008, + "step": 7373 + }, + { + "epoch": 0.9969074778200253, + "grad_norm": 0.9983760714530945, + "learning_rate": 8.994380637483701e-10, + "loss": 0.12014353275299072, + "step": 7374 + }, + { + "epoch": 0.9970426700464723, + "grad_norm": 1.4045286178588867, + "learning_rate": 8.260470898036054e-10, + "loss": 0.1888408660888672, + "step": 7375 + }, + { + "epoch": 0.9971778622729193, + "grad_norm": 0.9965384602546692, + "learning_rate": 7.557790238627016e-10, + "loss": 0.12786483764648438, + "step": 7376 + }, + { + "epoch": 0.9973130544993662, + "grad_norm": 1.1297028064727783, + "learning_rate": 6.886338805522918e-10, + "loss": 0.1680002212524414, + "step": 7377 + }, + { + "epoch": 0.9974482467258132, + "grad_norm": 0.8662614226341248, + "learning_rate": 6.246116738561903e-10, + "loss": 0.13668346405029297, + "step": 7378 + }, + { + "epoch": 0.9975834389522602, + "grad_norm": 1.2495498657226562, + "learning_rate": 5.637124171004038e-10, + "loss": 0.17917275428771973, + "step": 7379 + }, + { + "epoch": 0.9977186311787072, + "grad_norm": 1.2119765281677246, + "learning_rate": 5.059361229681203e-10, + "loss": 0.2170848846435547, + "step": 7380 + }, + { + "epoch": 0.9978538234051542, + "grad_norm": 1.5703673362731934, + "learning_rate": 4.5128280348638583e-10, + "loss": 0.199737548828125, + "step": 7381 + }, + { + "epoch": 0.9979890156316011, + "grad_norm": 1.292392373085022, + "learning_rate": 3.9975247003443127e-10, + "loss": 0.18749713897705078, + "step": 7382 + }, + { + "epoch": 0.9981242078580481, + "grad_norm": 0.9077715277671814, + "learning_rate": 3.51345133342007e-10, + "loss": 0.15091705322265625, + "step": 7383 + }, + { + "epoch": 0.9982594000844951, + "grad_norm": 0.7391771078109741, + "learning_rate": 3.060608034877177e-10, + "loss": 0.12168383598327637, + "step": 7384 + }, + { + "epoch": 0.9983945923109421, + "grad_norm": 1.1722162961959839, + "learning_rate": 2.638994898990221e-10, + "loss": 0.1868276596069336, + "step": 7385 + }, + { + "epoch": 0.998529784537389, + "grad_norm": 1.9503419399261475, + "learning_rate": 2.2486120135556398e-10, + "loss": 0.14784622192382812, + "step": 7386 + }, + { + "epoch": 0.998664976763836, + "grad_norm": 1.0859471559524536, + "learning_rate": 1.889459459841758e-10, + "loss": 0.1971149444580078, + "step": 7387 + }, + { + "epoch": 0.998800168990283, + "grad_norm": 1.0214329957962036, + "learning_rate": 1.56153731263875e-10, + "loss": 0.1439189910888672, + "step": 7388 + }, + { + "epoch": 0.99893536121673, + "grad_norm": 0.9618058800697327, + "learning_rate": 1.2648456402086784e-10, + "loss": 0.15452098846435547, + "step": 7389 + }, + { + "epoch": 0.999070553443177, + "grad_norm": 1.1153603792190552, + "learning_rate": 9.99384504318801e-11, + "loss": 0.14294958114624023, + "step": 7390 + }, + { + "epoch": 0.999205745669624, + "grad_norm": 1.242556095123291, + "learning_rate": 7.651539602582247e-11, + "loss": 0.16652870178222656, + "step": 7391 + }, + { + "epoch": 0.9993409378960709, + "grad_norm": 1.1816502809524536, + "learning_rate": 5.6215405678794464e-11, + "loss": 0.21294212341308594, + "step": 7392 + }, + { + "epoch": 0.9994761301225179, + "grad_norm": 0.8464372754096985, + "learning_rate": 3.9038483615749795e-11, + "loss": 0.15835332870483398, + "step": 7393 + }, + { + "epoch": 0.9996113223489649, + "grad_norm": 0.7513839602470398, + "learning_rate": 2.4984633415492398e-11, + "loss": 0.1408405303955078, + "step": 7394 + }, + { + "epoch": 0.9997465145754119, + "grad_norm": 0.8847243189811707, + "learning_rate": 1.4053858004015041e-11, + "loss": 0.17638516426086426, + "step": 7395 + }, + { + "epoch": 0.9998817068018588, + "grad_norm": 1.8010793924331665, + "learning_rate": 6.246159654499373e-12, + "loss": 0.19793701171875, + "step": 7396 + }, + { + "epoch": 1.0, + "grad_norm": 1.0269384384155273, + "learning_rate": 1.561539995642569e-12, + "loss": 0.2227954864501953, + "step": 7397 + }, + { + "epoch": 1.0, + "step": 7397, + "total_flos": 8.015953600697703e+19, + "train_loss": 0.1922166269180403, + "train_runtime": 105556.7771, + "train_samples_per_second": 17.939, + "train_steps_per_second": 0.07 + } + ], + "logging_steps": 1.0, + "max_steps": 7397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 8.015953600697703e+19, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}