| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 0, |
| "global_step": 327, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0030581039755351682, |
| "grad_norm": 0.443359375, |
| "learning_rate": 1e-05, |
| "loss": 1.9943, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0061162079510703364, |
| "grad_norm": 0.423828125, |
| "learning_rate": 9.96941896024465e-06, |
| "loss": 1.9359, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.009174311926605505, |
| "grad_norm": 0.435546875, |
| "learning_rate": 9.938837920489298e-06, |
| "loss": 1.9327, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.012232415902140673, |
| "grad_norm": 0.4609375, |
| "learning_rate": 9.908256880733946e-06, |
| "loss": 2.0686, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.01529051987767584, |
| "grad_norm": 0.419921875, |
| "learning_rate": 9.877675840978595e-06, |
| "loss": 1.9825, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.01834862385321101, |
| "grad_norm": 0.412109375, |
| "learning_rate": 9.847094801223243e-06, |
| "loss": 1.9199, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.021406727828746176, |
| "grad_norm": 0.375, |
| "learning_rate": 9.81651376146789e-06, |
| "loss": 1.8093, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.024464831804281346, |
| "grad_norm": 0.412109375, |
| "learning_rate": 9.785932721712539e-06, |
| "loss": 2.1014, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.027522935779816515, |
| "grad_norm": 0.3828125, |
| "learning_rate": 9.755351681957187e-06, |
| "loss": 1.9116, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.03058103975535168, |
| "grad_norm": 0.36328125, |
| "learning_rate": 9.724770642201836e-06, |
| "loss": 1.9779, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03363914373088685, |
| "grad_norm": 0.34375, |
| "learning_rate": 9.694189602446484e-06, |
| "loss": 1.9248, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.03669724770642202, |
| "grad_norm": 0.3203125, |
| "learning_rate": 9.663608562691133e-06, |
| "loss": 1.8547, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.039755351681957186, |
| "grad_norm": 0.3359375, |
| "learning_rate": 9.633027522935781e-06, |
| "loss": 1.9319, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.04281345565749235, |
| "grad_norm": 0.341796875, |
| "learning_rate": 9.602446483180428e-06, |
| "loss": 1.966, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.045871559633027525, |
| "grad_norm": 0.333984375, |
| "learning_rate": 9.571865443425077e-06, |
| "loss": 1.9488, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.04892966360856269, |
| "grad_norm": 0.4140625, |
| "learning_rate": 9.541284403669727e-06, |
| "loss": 1.8087, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.05198776758409786, |
| "grad_norm": 0.2890625, |
| "learning_rate": 9.510703363914374e-06, |
| "loss": 1.8776, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.05504587155963303, |
| "grad_norm": 0.259765625, |
| "learning_rate": 9.480122324159022e-06, |
| "loss": 1.7371, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0581039755351682, |
| "grad_norm": 0.24609375, |
| "learning_rate": 9.44954128440367e-06, |
| "loss": 1.7353, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.06116207951070336, |
| "grad_norm": 0.267578125, |
| "learning_rate": 9.41896024464832e-06, |
| "loss": 1.8114, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06422018348623854, |
| "grad_norm": 0.26171875, |
| "learning_rate": 9.388379204892966e-06, |
| "loss": 1.7347, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.0672782874617737, |
| "grad_norm": 0.2734375, |
| "learning_rate": 9.357798165137616e-06, |
| "loss": 1.7692, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.07033639143730887, |
| "grad_norm": 0.255859375, |
| "learning_rate": 9.327217125382265e-06, |
| "loss": 1.7877, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.07339449541284404, |
| "grad_norm": 0.296875, |
| "learning_rate": 9.296636085626912e-06, |
| "loss": 1.8301, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.0764525993883792, |
| "grad_norm": 0.224609375, |
| "learning_rate": 9.26605504587156e-06, |
| "loss": 1.658, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.07951070336391437, |
| "grad_norm": 0.240234375, |
| "learning_rate": 9.235474006116209e-06, |
| "loss": 1.7265, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.08256880733944955, |
| "grad_norm": 0.232421875, |
| "learning_rate": 9.204892966360857e-06, |
| "loss": 1.7334, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.0856269113149847, |
| "grad_norm": 0.244140625, |
| "learning_rate": 9.174311926605506e-06, |
| "loss": 1.8412, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.08868501529051988, |
| "grad_norm": 0.22265625, |
| "learning_rate": 9.143730886850154e-06, |
| "loss": 1.6743, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.09174311926605505, |
| "grad_norm": 0.2333984375, |
| "learning_rate": 9.113149847094803e-06, |
| "loss": 1.7155, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.09480122324159021, |
| "grad_norm": 0.20703125, |
| "learning_rate": 9.08256880733945e-06, |
| "loss": 1.6282, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.09785932721712538, |
| "grad_norm": 0.232421875, |
| "learning_rate": 9.051987767584098e-06, |
| "loss": 1.7725, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.10091743119266056, |
| "grad_norm": 0.2021484375, |
| "learning_rate": 9.021406727828746e-06, |
| "loss": 1.6011, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.10397553516819572, |
| "grad_norm": 0.2353515625, |
| "learning_rate": 8.990825688073395e-06, |
| "loss": 1.6585, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.10703363914373089, |
| "grad_norm": 0.20703125, |
| "learning_rate": 8.960244648318043e-06, |
| "loss": 1.6251, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.11009174311926606, |
| "grad_norm": 0.21875, |
| "learning_rate": 8.929663608562692e-06, |
| "loss": 1.6919, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.11314984709480122, |
| "grad_norm": 0.2109375, |
| "learning_rate": 8.89908256880734e-06, |
| "loss": 1.6721, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.1162079510703364, |
| "grad_norm": 0.2236328125, |
| "learning_rate": 8.868501529051989e-06, |
| "loss": 1.5971, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.11926605504587157, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 8.837920489296636e-06, |
| "loss": 1.6569, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.12232415902140673, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 8.807339449541286e-06, |
| "loss": 1.5487, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.12538226299694188, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 8.776758409785935e-06, |
| "loss": 1.6258, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.12844036697247707, |
| "grad_norm": 0.2158203125, |
| "learning_rate": 8.746177370030581e-06, |
| "loss": 1.6342, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.13149847094801223, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 8.71559633027523e-06, |
| "loss": 1.5386, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.1345565749235474, |
| "grad_norm": 0.19140625, |
| "learning_rate": 8.685015290519878e-06, |
| "loss": 1.6483, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.13761467889908258, |
| "grad_norm": 0.1875, |
| "learning_rate": 8.654434250764527e-06, |
| "loss": 1.6573, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.14067278287461774, |
| "grad_norm": 0.193359375, |
| "learning_rate": 8.623853211009175e-06, |
| "loss": 1.6196, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.1437308868501529, |
| "grad_norm": 0.17578125, |
| "learning_rate": 8.593272171253824e-06, |
| "loss": 1.5876, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.14678899082568808, |
| "grad_norm": 0.16796875, |
| "learning_rate": 8.562691131498472e-06, |
| "loss": 1.4985, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.14984709480122324, |
| "grad_norm": 0.2021484375, |
| "learning_rate": 8.53211009174312e-06, |
| "loss": 1.5836, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.1529051987767584, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 8.501529051987768e-06, |
| "loss": 1.5866, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1559633027522936, |
| "grad_norm": 0.171875, |
| "learning_rate": 8.470948012232416e-06, |
| "loss": 1.5418, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.15902140672782875, |
| "grad_norm": 0.154296875, |
| "learning_rate": 8.440366972477065e-06, |
| "loss": 1.4874, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.1620795107033639, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 8.409785932721713e-06, |
| "loss": 1.5738, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.1651376146788991, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 8.379204892966362e-06, |
| "loss": 1.5435, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.16819571865443425, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 8.34862385321101e-06, |
| "loss": 1.5404, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.1712538226299694, |
| "grad_norm": 0.2216796875, |
| "learning_rate": 8.318042813455657e-06, |
| "loss": 1.603, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.1743119266055046, |
| "grad_norm": 0.2099609375, |
| "learning_rate": 8.287461773700306e-06, |
| "loss": 1.5362, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.17737003058103976, |
| "grad_norm": 0.24609375, |
| "learning_rate": 8.256880733944956e-06, |
| "loss": 1.553, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.18042813455657492, |
| "grad_norm": 0.1640625, |
| "learning_rate": 8.226299694189603e-06, |
| "loss": 1.5814, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.1834862385321101, |
| "grad_norm": 0.17578125, |
| "learning_rate": 8.195718654434251e-06, |
| "loss": 1.5952, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.18654434250764526, |
| "grad_norm": 0.24609375, |
| "learning_rate": 8.1651376146789e-06, |
| "loss": 1.5422, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.18960244648318042, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 8.134556574923548e-06, |
| "loss": 1.5216, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.1926605504587156, |
| "grad_norm": 0.15234375, |
| "learning_rate": 8.103975535168197e-06, |
| "loss": 1.5198, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.19571865443425077, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 8.073394495412845e-06, |
| "loss": 1.5289, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.19877675840978593, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 8.042813455657494e-06, |
| "loss": 1.4866, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.2018348623853211, |
| "grad_norm": 0.271484375, |
| "learning_rate": 8.01223241590214e-06, |
| "loss": 1.5346, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.20489296636085627, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 7.981651376146789e-06, |
| "loss": 1.4209, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.20795107033639143, |
| "grad_norm": 0.1875, |
| "learning_rate": 7.951070336391438e-06, |
| "loss": 1.458, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.21100917431192662, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 7.920489296636086e-06, |
| "loss": 1.5318, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.21406727828746178, |
| "grad_norm": 0.146484375, |
| "learning_rate": 7.889908256880735e-06, |
| "loss": 1.4342, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.21712538226299694, |
| "grad_norm": 0.150390625, |
| "learning_rate": 7.859327217125383e-06, |
| "loss": 1.477, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.22018348623853212, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 7.828746177370032e-06, |
| "loss": 1.5185, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.22324159021406728, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 7.79816513761468e-06, |
| "loss": 1.4906, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.22629969418960244, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 7.767584097859327e-06, |
| "loss": 1.4896, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.22935779816513763, |
| "grad_norm": 0.1640625, |
| "learning_rate": 7.737003058103975e-06, |
| "loss": 1.4208, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.2324159021406728, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 7.706422018348626e-06, |
| "loss": 1.4615, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.23547400611620795, |
| "grad_norm": 0.1640625, |
| "learning_rate": 7.675840978593273e-06, |
| "loss": 1.535, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.23853211009174313, |
| "grad_norm": 0.255859375, |
| "learning_rate": 7.645259938837921e-06, |
| "loss": 1.5198, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.2415902140672783, |
| "grad_norm": 0.171875, |
| "learning_rate": 7.6146788990825695e-06, |
| "loss": 1.4999, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.24464831804281345, |
| "grad_norm": 0.158203125, |
| "learning_rate": 7.584097859327217e-06, |
| "loss": 1.4931, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.24770642201834864, |
| "grad_norm": 0.181640625, |
| "learning_rate": 7.553516819571866e-06, |
| "loss": 1.4184, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.25076452599388377, |
| "grad_norm": 0.1689453125, |
| "learning_rate": 7.522935779816515e-06, |
| "loss": 1.4276, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.25382262996941896, |
| "grad_norm": 0.158203125, |
| "learning_rate": 7.492354740061163e-06, |
| "loss": 1.4851, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.25688073394495414, |
| "grad_norm": 0.181640625, |
| "learning_rate": 7.461773700305811e-06, |
| "loss": 1.4652, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.2599388379204893, |
| "grad_norm": 0.16015625, |
| "learning_rate": 7.431192660550459e-06, |
| "loss": 1.3763, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.26299694189602446, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 7.4006116207951074e-06, |
| "loss": 1.3851, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.26605504587155965, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 7.370030581039755e-06, |
| "loss": 1.4771, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.2691131498470948, |
| "grad_norm": 0.150390625, |
| "learning_rate": 7.3394495412844045e-06, |
| "loss": 1.4717, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.27217125382262997, |
| "grad_norm": 0.14453125, |
| "learning_rate": 7.308868501529053e-06, |
| "loss": 1.379, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.27522935779816515, |
| "grad_norm": 0.2578125, |
| "learning_rate": 7.278287461773701e-06, |
| "loss": 1.4997, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2782874617737003, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 7.247706422018349e-06, |
| "loss": 1.4874, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.28134556574923547, |
| "grad_norm": 0.16015625, |
| "learning_rate": 7.217125382262997e-06, |
| "loss": 1.395, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.28440366972477066, |
| "grad_norm": 0.16796875, |
| "learning_rate": 7.186544342507645e-06, |
| "loss": 1.4112, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.2874617737003058, |
| "grad_norm": 0.154296875, |
| "learning_rate": 7.155963302752295e-06, |
| "loss": 1.4487, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.290519877675841, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 7.125382262996942e-06, |
| "loss": 1.4645, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.29357798165137616, |
| "grad_norm": 0.1552734375, |
| "learning_rate": 7.094801223241591e-06, |
| "loss": 1.4871, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.2966360856269113, |
| "grad_norm": 0.146484375, |
| "learning_rate": 7.0642201834862385e-06, |
| "loss": 1.3655, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.2996941896024465, |
| "grad_norm": 0.154296875, |
| "learning_rate": 7.033639143730887e-06, |
| "loss": 1.3332, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.30275229357798167, |
| "grad_norm": 0.1484375, |
| "learning_rate": 7.0030581039755356e-06, |
| "loss": 1.4129, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.3058103975535168, |
| "grad_norm": 0.146484375, |
| "learning_rate": 6.972477064220184e-06, |
| "loss": 1.3736, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.308868501529052, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 6.941896024464833e-06, |
| "loss": 1.3838, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.3119266055045872, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 6.911314984709481e-06, |
| "loss": 1.3569, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.3149847094801223, |
| "grad_norm": 0.140625, |
| "learning_rate": 6.880733944954129e-06, |
| "loss": 1.3893, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.3180428134556575, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 6.850152905198777e-06, |
| "loss": 1.4268, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.3211009174311927, |
| "grad_norm": 0.1875, |
| "learning_rate": 6.819571865443425e-06, |
| "loss": 1.3937, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.3241590214067278, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 6.788990825688074e-06, |
| "loss": 1.4216, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.327217125382263, |
| "grad_norm": 0.154296875, |
| "learning_rate": 6.758409785932723e-06, |
| "loss": 1.3811, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.3302752293577982, |
| "grad_norm": 0.150390625, |
| "learning_rate": 6.7278287461773705e-06, |
| "loss": 1.3724, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.19140625, |
| "learning_rate": 6.697247706422019e-06, |
| "loss": 1.3512, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.3363914373088685, |
| "grad_norm": 0.21484375, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 1.3825, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3394495412844037, |
| "grad_norm": 0.197265625, |
| "learning_rate": 6.636085626911316e-06, |
| "loss": 1.368, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.3425076452599388, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 6.6055045871559645e-06, |
| "loss": 1.3675, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.345565749235474, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 6.574923547400612e-06, |
| "loss": 1.4697, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.3486238532110092, |
| "grad_norm": 0.181640625, |
| "learning_rate": 6.544342507645261e-06, |
| "loss": 1.3567, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.3516819571865443, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 6.513761467889908e-06, |
| "loss": 1.4582, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.3547400611620795, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 6.483180428134557e-06, |
| "loss": 1.3841, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.3577981651376147, |
| "grad_norm": 0.2412109375, |
| "learning_rate": 6.452599388379206e-06, |
| "loss": 1.3835, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.36085626911314983, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 6.422018348623854e-06, |
| "loss": 1.296, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.363914373088685, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 6.391437308868502e-06, |
| "loss": 1.4054, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.3669724770642202, |
| "grad_norm": 0.166015625, |
| "learning_rate": 6.36085626911315e-06, |
| "loss": 1.4426, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.37003058103975534, |
| "grad_norm": 0.19921875, |
| "learning_rate": 6.330275229357799e-06, |
| "loss": 1.3692, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.3730886850152905, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 6.299694189602446e-06, |
| "loss": 1.4525, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.3761467889908257, |
| "grad_norm": 0.158203125, |
| "learning_rate": 6.269113149847096e-06, |
| "loss": 1.3568, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.37920489296636084, |
| "grad_norm": 0.201171875, |
| "learning_rate": 6.238532110091744e-06, |
| "loss": 1.3903, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.382262996941896, |
| "grad_norm": 0.154296875, |
| "learning_rate": 6.207951070336392e-06, |
| "loss": 1.2537, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.3853211009174312, |
| "grad_norm": 0.1474609375, |
| "learning_rate": 6.17737003058104e-06, |
| "loss": 1.3744, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.38837920489296635, |
| "grad_norm": 0.21484375, |
| "learning_rate": 6.146788990825688e-06, |
| "loss": 1.4813, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.39143730886850153, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 6.1162079510703365e-06, |
| "loss": 1.4173, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.3944954128440367, |
| "grad_norm": 0.162109375, |
| "learning_rate": 6.085626911314986e-06, |
| "loss": 1.3833, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.39755351681957185, |
| "grad_norm": 0.2314453125, |
| "learning_rate": 6.0550458715596335e-06, |
| "loss": 1.445, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.40061162079510704, |
| "grad_norm": 0.173828125, |
| "learning_rate": 6.024464831804282e-06, |
| "loss": 1.3309, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.4036697247706422, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 5.9938837920489305e-06, |
| "loss": 1.3953, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.40672782874617736, |
| "grad_norm": 0.1953125, |
| "learning_rate": 5.963302752293578e-06, |
| "loss": 1.4004, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.40978593272171254, |
| "grad_norm": 0.22265625, |
| "learning_rate": 5.932721712538227e-06, |
| "loss": 1.4212, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.41284403669724773, |
| "grad_norm": 0.181640625, |
| "learning_rate": 5.902140672782875e-06, |
| "loss": 1.457, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.41590214067278286, |
| "grad_norm": 0.1640625, |
| "learning_rate": 5.871559633027524e-06, |
| "loss": 1.4102, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.41896024464831805, |
| "grad_norm": 0.162109375, |
| "learning_rate": 5.840978593272172e-06, |
| "loss": 1.3688, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.42201834862385323, |
| "grad_norm": 0.181640625, |
| "learning_rate": 5.81039755351682e-06, |
| "loss": 1.3866, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.42507645259938837, |
| "grad_norm": 0.1552734375, |
| "learning_rate": 5.7798165137614684e-06, |
| "loss": 1.4175, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.42813455657492355, |
| "grad_norm": 0.171875, |
| "learning_rate": 5.749235474006116e-06, |
| "loss": 1.4204, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.43119266055045874, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 5.7186544342507654e-06, |
| "loss": 1.2843, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.43425076452599387, |
| "grad_norm": 0.181640625, |
| "learning_rate": 5.688073394495414e-06, |
| "loss": 1.2509, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.43730886850152906, |
| "grad_norm": 0.154296875, |
| "learning_rate": 5.657492354740062e-06, |
| "loss": 1.3239, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.44036697247706424, |
| "grad_norm": 0.1484375, |
| "learning_rate": 5.62691131498471e-06, |
| "loss": 1.3339, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.4434250764525994, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 5.596330275229358e-06, |
| "loss": 1.3634, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.44648318042813456, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 5.565749235474006e-06, |
| "loss": 1.4016, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.44954128440366975, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 5.535168195718656e-06, |
| "loss": 1.3122, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.4525993883792049, |
| "grad_norm": 0.22265625, |
| "learning_rate": 5.504587155963303e-06, |
| "loss": 1.4062, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.45565749235474007, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 5.474006116207952e-06, |
| "loss": 1.3255, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.45871559633027525, |
| "grad_norm": 0.150390625, |
| "learning_rate": 5.4434250764525995e-06, |
| "loss": 1.3093, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.4617737003058104, |
| "grad_norm": 0.158203125, |
| "learning_rate": 5.412844036697248e-06, |
| "loss": 1.2434, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.4648318042813456, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 5.382262996941896e-06, |
| "loss": 1.2982, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.46788990825688076, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 5.351681957186545e-06, |
| "loss": 1.3628, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.4709480122324159, |
| "grad_norm": 0.1484375, |
| "learning_rate": 5.3211009174311936e-06, |
| "loss": 1.3182, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.4740061162079511, |
| "grad_norm": 0.154296875, |
| "learning_rate": 5.290519877675841e-06, |
| "loss": 1.3639, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.47706422018348627, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 5.25993883792049e-06, |
| "loss": 1.2816, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.4801223241590214, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 5.229357798165137e-06, |
| "loss": 1.3766, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.4831804281345566, |
| "grad_norm": 0.162109375, |
| "learning_rate": 5.198776758409786e-06, |
| "loss": 1.363, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.48623853211009177, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 5.168195718654435e-06, |
| "loss": 1.3383, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.4892966360856269, |
| "grad_norm": 0.158203125, |
| "learning_rate": 5.137614678899083e-06, |
| "loss": 1.3568, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.4923547400611621, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 5.1070336391437315e-06, |
| "loss": 1.4505, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.4954128440366973, |
| "grad_norm": 0.1640625, |
| "learning_rate": 5.076452599388379e-06, |
| "loss": 1.3725, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.4984709480122324, |
| "grad_norm": 0.267578125, |
| "learning_rate": 5.045871559633028e-06, |
| "loss": 1.4881, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.5015290519877675, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 5.015290519877676e-06, |
| "loss": 1.3694, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.5045871559633027, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 4.984709480122325e-06, |
| "loss": 1.3286, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.5076452599388379, |
| "grad_norm": 0.24609375, |
| "learning_rate": 4.954128440366973e-06, |
| "loss": 1.3231, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.5107033639143731, |
| "grad_norm": 0.1796875, |
| "learning_rate": 4.923547400611622e-06, |
| "loss": 1.33, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.5137614678899083, |
| "grad_norm": 0.185546875, |
| "learning_rate": 4.892966360856269e-06, |
| "loss": 1.4605, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.5168195718654435, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 4.862385321100918e-06, |
| "loss": 1.2918, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.5198776758409785, |
| "grad_norm": 0.162109375, |
| "learning_rate": 4.831804281345566e-06, |
| "loss": 1.348, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5229357798165137, |
| "grad_norm": 0.171875, |
| "learning_rate": 4.801223241590214e-06, |
| "loss": 1.2813, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.5259938837920489, |
| "grad_norm": 0.1796875, |
| "learning_rate": 4.770642201834863e-06, |
| "loss": 1.4265, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.5290519877675841, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 4.740061162079511e-06, |
| "loss": 1.3373, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.5321100917431193, |
| "grad_norm": 0.240234375, |
| "learning_rate": 4.70948012232416e-06, |
| "loss": 1.2793, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.5351681957186545, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 4.678899082568808e-06, |
| "loss": 1.3507, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.5382262996941896, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 4.648318042813456e-06, |
| "loss": 1.368, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.5412844036697247, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 4.617737003058104e-06, |
| "loss": 1.3328, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.5443425076452599, |
| "grad_norm": 0.2314453125, |
| "learning_rate": 4.587155963302753e-06, |
| "loss": 1.265, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.5474006116207951, |
| "grad_norm": 0.16796875, |
| "learning_rate": 4.556574923547401e-06, |
| "loss": 1.2886, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.5504587155963303, |
| "grad_norm": 0.15234375, |
| "learning_rate": 4.525993883792049e-06, |
| "loss": 1.351, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5535168195718655, |
| "grad_norm": 0.216796875, |
| "learning_rate": 4.4954128440366975e-06, |
| "loss": 1.3572, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.5565749235474006, |
| "grad_norm": 0.158203125, |
| "learning_rate": 4.464831804281346e-06, |
| "loss": 1.3426, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.5596330275229358, |
| "grad_norm": 0.16796875, |
| "learning_rate": 4.4342507645259945e-06, |
| "loss": 1.2794, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.5626911314984709, |
| "grad_norm": 0.162109375, |
| "learning_rate": 4.403669724770643e-06, |
| "loss": 1.2771, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.5657492354740061, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 4.373088685015291e-06, |
| "loss": 1.3546, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.5688073394495413, |
| "grad_norm": 0.2158203125, |
| "learning_rate": 4.342507645259939e-06, |
| "loss": 1.3474, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.5718654434250765, |
| "grad_norm": 0.1484375, |
| "learning_rate": 4.311926605504588e-06, |
| "loss": 1.3174, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.5749235474006116, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 4.281345565749236e-06, |
| "loss": 1.2487, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.5779816513761468, |
| "grad_norm": 0.22265625, |
| "learning_rate": 4.250764525993884e-06, |
| "loss": 1.3071, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.581039755351682, |
| "grad_norm": 0.205078125, |
| "learning_rate": 4.220183486238532e-06, |
| "loss": 1.3915, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5840978593272171, |
| "grad_norm": 0.2080078125, |
| "learning_rate": 4.189602446483181e-06, |
| "loss": 1.452, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.5871559633027523, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 4.1590214067278286e-06, |
| "loss": 1.3903, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.5902140672782875, |
| "grad_norm": 0.1796875, |
| "learning_rate": 4.128440366972478e-06, |
| "loss": 1.329, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.5932721712538226, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 4.097859327217126e-06, |
| "loss": 1.3377, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.5963302752293578, |
| "grad_norm": 0.15234375, |
| "learning_rate": 4.067278287461774e-06, |
| "loss": 1.2853, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.599388379204893, |
| "grad_norm": 0.158203125, |
| "learning_rate": 4.036697247706423e-06, |
| "loss": 1.3245, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.6024464831804281, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 4.00611620795107e-06, |
| "loss": 1.3843, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.6055045871559633, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 3.975535168195719e-06, |
| "loss": 1.2846, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.6085626911314985, |
| "grad_norm": 0.171875, |
| "learning_rate": 3.944954128440367e-06, |
| "loss": 1.3853, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.6116207951070336, |
| "grad_norm": 0.16796875, |
| "learning_rate": 3.914373088685016e-06, |
| "loss": 1.2639, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6146788990825688, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 3.8837920489296635e-06, |
| "loss": 1.2994, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.617737003058104, |
| "grad_norm": 0.16015625, |
| "learning_rate": 3.853211009174313e-06, |
| "loss": 1.2767, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.6207951070336392, |
| "grad_norm": 0.2216796875, |
| "learning_rate": 3.8226299694189605e-06, |
| "loss": 1.3354, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.6238532110091743, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 3.7920489296636086e-06, |
| "loss": 1.348, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.6269113149847095, |
| "grad_norm": 0.17578125, |
| "learning_rate": 3.7614678899082575e-06, |
| "loss": 1.3357, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.6299694189602446, |
| "grad_norm": 0.201171875, |
| "learning_rate": 3.7308868501529056e-06, |
| "loss": 1.3303, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.6330275229357798, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 3.7003058103975537e-06, |
| "loss": 1.2762, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.636085626911315, |
| "grad_norm": 0.1875, |
| "learning_rate": 3.6697247706422022e-06, |
| "loss": 1.3132, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.6391437308868502, |
| "grad_norm": 0.154296875, |
| "learning_rate": 3.6391437308868503e-06, |
| "loss": 1.3251, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.6422018348623854, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 3.6085626911314984e-06, |
| "loss": 1.2838, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.6452599388379205, |
| "grad_norm": 0.271484375, |
| "learning_rate": 3.5779816513761473e-06, |
| "loss": 1.2575, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.6483180428134556, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 3.5474006116207954e-06, |
| "loss": 1.3564, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.6513761467889908, |
| "grad_norm": 0.2177734375, |
| "learning_rate": 3.5168195718654435e-06, |
| "loss": 1.2914, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.654434250764526, |
| "grad_norm": 0.162109375, |
| "learning_rate": 3.486238532110092e-06, |
| "loss": 1.358, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.6574923547400612, |
| "grad_norm": 0.1875, |
| "learning_rate": 3.4556574923547405e-06, |
| "loss": 1.3918, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.6605504587155964, |
| "grad_norm": 0.16015625, |
| "learning_rate": 3.4250764525993886e-06, |
| "loss": 1.2418, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.6636085626911316, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 3.394495412844037e-06, |
| "loss": 1.3089, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.177734375, |
| "learning_rate": 3.3639143730886852e-06, |
| "loss": 1.3742, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.6697247706422018, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 1.2713, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.672782874617737, |
| "grad_norm": 0.20703125, |
| "learning_rate": 3.3027522935779823e-06, |
| "loss": 1.2878, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6758409785932722, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 3.2721712538226303e-06, |
| "loss": 1.308, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.6788990825688074, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 3.2415902140672784e-06, |
| "loss": 1.3303, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.6819571865443425, |
| "grad_norm": 0.15234375, |
| "learning_rate": 3.211009174311927e-06, |
| "loss": 1.313, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.6850152905198776, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 3.180428134556575e-06, |
| "loss": 1.292, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.6880733944954128, |
| "grad_norm": 0.1689453125, |
| "learning_rate": 3.149847094801223e-06, |
| "loss": 1.3372, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.691131498470948, |
| "grad_norm": 0.18359375, |
| "learning_rate": 3.119266055045872e-06, |
| "loss": 1.4325, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.6941896024464832, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 3.08868501529052e-06, |
| "loss": 1.2689, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.6972477064220184, |
| "grad_norm": 0.2490234375, |
| "learning_rate": 3.0581039755351682e-06, |
| "loss": 1.3271, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.7003058103975535, |
| "grad_norm": 0.166015625, |
| "learning_rate": 3.0275229357798168e-06, |
| "loss": 1.2369, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.7033639143730887, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 2.9969418960244653e-06, |
| "loss": 1.2675, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.7064220183486238, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 2.9663608562691134e-06, |
| "loss": 1.3266, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.709480122324159, |
| "grad_norm": 0.150390625, |
| "learning_rate": 2.935779816513762e-06, |
| "loss": 1.3025, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.7125382262996942, |
| "grad_norm": 0.208984375, |
| "learning_rate": 2.90519877675841e-06, |
| "loss": 1.311, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.7155963302752294, |
| "grad_norm": 0.162109375, |
| "learning_rate": 2.874617737003058e-06, |
| "loss": 1.2618, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.7186544342507645, |
| "grad_norm": 0.1953125, |
| "learning_rate": 2.844036697247707e-06, |
| "loss": 1.3004, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.7217125382262997, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 2.813455657492355e-06, |
| "loss": 1.3086, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.7247706422018348, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 2.782874617737003e-06, |
| "loss": 1.3202, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.72782874617737, |
| "grad_norm": 0.1640625, |
| "learning_rate": 2.7522935779816517e-06, |
| "loss": 1.3231, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.7308868501529052, |
| "grad_norm": 0.1796875, |
| "learning_rate": 2.7217125382262998e-06, |
| "loss": 1.2527, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.7339449541284404, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 2.691131498470948e-06, |
| "loss": 1.2934, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.7370030581039755, |
| "grad_norm": 0.1640625, |
| "learning_rate": 2.6605504587155968e-06, |
| "loss": 1.3146, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.7400611620795107, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 2.629969418960245e-06, |
| "loss": 1.2623, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.7431192660550459, |
| "grad_norm": 0.162109375, |
| "learning_rate": 2.599388379204893e-06, |
| "loss": 1.305, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.746177370030581, |
| "grad_norm": 0.154296875, |
| "learning_rate": 2.5688073394495415e-06, |
| "loss": 1.2735, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.7492354740061162, |
| "grad_norm": 0.19140625, |
| "learning_rate": 2.5382262996941896e-06, |
| "loss": 1.2917, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.7522935779816514, |
| "grad_norm": 0.158203125, |
| "learning_rate": 2.507645259938838e-06, |
| "loss": 1.2999, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.7553516819571865, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 2.4770642201834866e-06, |
| "loss": 1.2394, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.7584097859327217, |
| "grad_norm": 0.173828125, |
| "learning_rate": 2.4464831804281347e-06, |
| "loss": 1.3258, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.7614678899082569, |
| "grad_norm": 0.158203125, |
| "learning_rate": 2.415902140672783e-06, |
| "loss": 1.2557, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.764525993883792, |
| "grad_norm": 0.302734375, |
| "learning_rate": 2.3853211009174317e-06, |
| "loss": 1.4177, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.7675840978593272, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 2.35474006116208e-06, |
| "loss": 1.3027, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.7706422018348624, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 2.324159021406728e-06, |
| "loss": 1.2882, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.7737003058103975, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 2.2935779816513764e-06, |
| "loss": 1.3363, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.7767584097859327, |
| "grad_norm": 0.251953125, |
| "learning_rate": 2.2629969418960245e-06, |
| "loss": 1.3373, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.7798165137614679, |
| "grad_norm": 0.1796875, |
| "learning_rate": 2.232415902140673e-06, |
| "loss": 1.273, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.7828746177370031, |
| "grad_norm": 0.248046875, |
| "learning_rate": 2.2018348623853215e-06, |
| "loss": 1.2923, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.7859327217125383, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 2.1712538226299696e-06, |
| "loss": 1.3607, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.7889908256880734, |
| "grad_norm": 0.166015625, |
| "learning_rate": 2.140672782874618e-06, |
| "loss": 1.3533, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.7920489296636085, |
| "grad_norm": 0.21484375, |
| "learning_rate": 2.110091743119266e-06, |
| "loss": 1.3925, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.7951070336391437, |
| "grad_norm": 0.26171875, |
| "learning_rate": 2.0795107033639143e-06, |
| "loss": 1.2769, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.7981651376146789, |
| "grad_norm": 0.294921875, |
| "learning_rate": 2.048929663608563e-06, |
| "loss": 1.3495, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.8012232415902141, |
| "grad_norm": 0.205078125, |
| "learning_rate": 2.0183486238532113e-06, |
| "loss": 1.3381, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.8042813455657493, |
| "grad_norm": 0.16015625, |
| "learning_rate": 1.9877675840978594e-06, |
| "loss": 1.2995, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.8073394495412844, |
| "grad_norm": 0.1640625, |
| "learning_rate": 1.957186544342508e-06, |
| "loss": 1.3762, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.8103975535168195, |
| "grad_norm": 0.16015625, |
| "learning_rate": 1.9266055045871564e-06, |
| "loss": 1.2758, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.8134556574923547, |
| "grad_norm": 0.310546875, |
| "learning_rate": 1.8960244648318043e-06, |
| "loss": 1.3743, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.8165137614678899, |
| "grad_norm": 0.169921875, |
| "learning_rate": 1.8654434250764528e-06, |
| "loss": 1.3316, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.8195718654434251, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 1.8348623853211011e-06, |
| "loss": 1.3649, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.8226299694189603, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 1.8042813455657492e-06, |
| "loss": 1.3521, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.8256880733944955, |
| "grad_norm": 0.166015625, |
| "learning_rate": 1.7737003058103977e-06, |
| "loss": 1.3256, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.8287461773700305, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 1.743119266055046e-06, |
| "loss": 1.3524, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.8318042813455657, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 1.7125382262996943e-06, |
| "loss": 1.3189, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.8348623853211009, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 1.6819571865443426e-06, |
| "loss": 1.2601, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.8379204892966361, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 1.6513761467889911e-06, |
| "loss": 1.321, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.8409785932721713, |
| "grad_norm": 0.234375, |
| "learning_rate": 1.6207951070336392e-06, |
| "loss": 1.3292, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.8440366972477065, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 1.5902140672782875e-06, |
| "loss": 1.3433, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.8470948012232415, |
| "grad_norm": 0.162109375, |
| "learning_rate": 1.559633027522936e-06, |
| "loss": 1.2742, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.8501529051987767, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 1.5290519877675841e-06, |
| "loss": 1.3389, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.8532110091743119, |
| "grad_norm": 0.16796875, |
| "learning_rate": 1.4984709480122326e-06, |
| "loss": 1.353, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.8562691131498471, |
| "grad_norm": 0.400390625, |
| "learning_rate": 1.467889908256881e-06, |
| "loss": 1.3024, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.8593272171253823, |
| "grad_norm": 0.158203125, |
| "learning_rate": 1.437308868501529e-06, |
| "loss": 1.2669, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.8623853211009175, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 1.4067278287461775e-06, |
| "loss": 1.3096, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.8654434250764526, |
| "grad_norm": 0.275390625, |
| "learning_rate": 1.3761467889908258e-06, |
| "loss": 1.3487, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.8685015290519877, |
| "grad_norm": 0.177734375, |
| "learning_rate": 1.345565749235474e-06, |
| "loss": 1.3377, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.8715596330275229, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 1.3149847094801224e-06, |
| "loss": 1.3559, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.8746177370030581, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 1.2844036697247707e-06, |
| "loss": 1.4072, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.8776758409785933, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 1.253822629969419e-06, |
| "loss": 1.3186, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.8807339449541285, |
| "grad_norm": 0.150390625, |
| "learning_rate": 1.2232415902140673e-06, |
| "loss": 1.2673, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.8837920489296636, |
| "grad_norm": 0.193359375, |
| "learning_rate": 1.1926605504587159e-06, |
| "loss": 1.361, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.8868501529051988, |
| "grad_norm": 0.162109375, |
| "learning_rate": 1.162079510703364e-06, |
| "loss": 1.288, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.8899082568807339, |
| "grad_norm": 0.1689453125, |
| "learning_rate": 1.1314984709480122e-06, |
| "loss": 1.3306, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.8929663608562691, |
| "grad_norm": 0.169921875, |
| "learning_rate": 1.1009174311926608e-06, |
| "loss": 1.3058, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.8960244648318043, |
| "grad_norm": 0.2001953125, |
| "learning_rate": 1.070336391437309e-06, |
| "loss": 1.3644, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.8990825688073395, |
| "grad_norm": 0.181640625, |
| "learning_rate": 1.0397553516819571e-06, |
| "loss": 1.2995, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.9021406727828746, |
| "grad_norm": 0.162109375, |
| "learning_rate": 1.0091743119266057e-06, |
| "loss": 1.2999, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.9051987767584098, |
| "grad_norm": 0.17578125, |
| "learning_rate": 9.78593272171254e-07, |
| "loss": 1.3765, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.908256880733945, |
| "grad_norm": 0.173828125, |
| "learning_rate": 9.480122324159022e-07, |
| "loss": 1.2698, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.9113149847094801, |
| "grad_norm": 0.181640625, |
| "learning_rate": 9.174311926605506e-07, |
| "loss": 1.2676, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.9143730886850153, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 8.868501529051989e-07, |
| "loss": 1.3122, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.9174311926605505, |
| "grad_norm": 0.162109375, |
| "learning_rate": 8.562691131498472e-07, |
| "loss": 1.3116, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.9204892966360856, |
| "grad_norm": 0.1875, |
| "learning_rate": 8.256880733944956e-07, |
| "loss": 1.2399, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.9235474006116208, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 7.951070336391438e-07, |
| "loss": 1.3656, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.926605504587156, |
| "grad_norm": 0.287109375, |
| "learning_rate": 7.645259938837921e-07, |
| "loss": 1.3512, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.9296636085626911, |
| "grad_norm": 0.169921875, |
| "learning_rate": 7.339449541284405e-07, |
| "loss": 1.3316, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.9327217125382263, |
| "grad_norm": 0.28515625, |
| "learning_rate": 7.033639143730888e-07, |
| "loss": 1.2843, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.9357798165137615, |
| "grad_norm": 0.1640625, |
| "learning_rate": 6.72782874617737e-07, |
| "loss": 1.3167, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.9388379204892966, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 6.422018348623854e-07, |
| "loss": 1.29, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.9418960244648318, |
| "grad_norm": 0.2158203125, |
| "learning_rate": 6.116207951070337e-07, |
| "loss": 1.3749, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.944954128440367, |
| "grad_norm": 0.177734375, |
| "learning_rate": 5.81039755351682e-07, |
| "loss": 1.3022, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.9480122324159022, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 5.504587155963304e-07, |
| "loss": 1.3843, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.9510703363914373, |
| "grad_norm": 0.203125, |
| "learning_rate": 5.198776758409786e-07, |
| "loss": 1.3537, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.9541284403669725, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 4.89296636085627e-07, |
| "loss": 1.3483, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.9571865443425076, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 4.587155963302753e-07, |
| "loss": 1.3044, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.9602446483180428, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 4.281345565749236e-07, |
| "loss": 1.2622, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.963302752293578, |
| "grad_norm": 0.158203125, |
| "learning_rate": 3.975535168195719e-07, |
| "loss": 1.325, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.9663608562691132, |
| "grad_norm": 0.296875, |
| "learning_rate": 3.6697247706422023e-07, |
| "loss": 1.3494, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.9694189602446484, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 3.363914373088685e-07, |
| "loss": 1.3004, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.9724770642201835, |
| "grad_norm": 0.166015625, |
| "learning_rate": 3.0581039755351683e-07, |
| "loss": 1.32, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.9755351681957186, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 2.752293577981652e-07, |
| "loss": 1.3519, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.9785932721712538, |
| "grad_norm": 0.2099609375, |
| "learning_rate": 2.446483180428135e-07, |
| "loss": 1.2681, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.981651376146789, |
| "grad_norm": 0.28125, |
| "learning_rate": 2.140672782874618e-07, |
| "loss": 1.2803, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.9847094801223242, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 1.8348623853211012e-07, |
| "loss": 1.2805, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.9877675840978594, |
| "grad_norm": 0.17578125, |
| "learning_rate": 1.5290519877675842e-07, |
| "loss": 1.3546, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.9908256880733946, |
| "grad_norm": 0.162109375, |
| "learning_rate": 1.2232415902140674e-07, |
| "loss": 1.2922, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.9938837920489296, |
| "grad_norm": 0.16796875, |
| "learning_rate": 9.174311926605506e-08, |
| "loss": 1.3383, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.9969418960244648, |
| "grad_norm": 0.16796875, |
| "learning_rate": 6.116207951070337e-08, |
| "loss": 1.2738, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.1552734375, |
| "learning_rate": 3.0581039755351686e-08, |
| "loss": 1.3367, |
| "step": 327 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 327, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 0, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.0452705633726628e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|