| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9923175416133163, |
| "eval_steps": 19, |
| "global_step": 146, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.013663535439795047, |
| "grad_norm": 2.015625, |
| "learning_rate": 5.714285714285714e-07, |
| "loss": 2.0297, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.013663535439795047, |
| "eval_loss": 2.0118041038513184, |
| "eval_runtime": 36.5931, |
| "eval_samples_per_second": 13.472, |
| "eval_steps_per_second": 3.389, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.027327070879590094, |
| "grad_norm": 1.96875, |
| "learning_rate": 1.1428571428571428e-06, |
| "loss": 2.0165, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.04099060631938514, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.714285714285714e-06, |
| "loss": 2.031, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.05465414175918019, |
| "grad_norm": 1.9921875, |
| "learning_rate": 2.2857142857142856e-06, |
| "loss": 2.0347, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.06831767719897523, |
| "grad_norm": 1.875, |
| "learning_rate": 2.8571428571428573e-06, |
| "loss": 2.0388, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.08198121263877028, |
| "grad_norm": 1.765625, |
| "learning_rate": 3.428571428571428e-06, |
| "loss": 2.0397, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.09564474807856532, |
| "grad_norm": 1.53125, |
| "learning_rate": 4e-06, |
| "loss": 2.0434, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.10930828351836037, |
| "grad_norm": 1.4140625, |
| "learning_rate": 4.571428571428571e-06, |
| "loss": 2.0087, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.12297181895815543, |
| "grad_norm": 1.1640625, |
| "learning_rate": 5.142857142857143e-06, |
| "loss": 2.0519, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.13663535439795046, |
| "grad_norm": 0.9765625, |
| "learning_rate": 5.7142857142857145e-06, |
| "loss": 1.9951, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.1502988898377455, |
| "grad_norm": 0.875, |
| "learning_rate": 6.285714285714285e-06, |
| "loss": 2.0071, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.16396242527754057, |
| "grad_norm": 0.87109375, |
| "learning_rate": 6.857142857142856e-06, |
| "loss": 2.0082, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.1776259607173356, |
| "grad_norm": 0.88671875, |
| "learning_rate": 7.428571428571428e-06, |
| "loss": 1.9933, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.19128949615713065, |
| "grad_norm": 0.90625, |
| "learning_rate": 8e-06, |
| "loss": 2.0326, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.2049530315969257, |
| "grad_norm": 0.87890625, |
| "learning_rate": 7.998867178772516e-06, |
| "loss": 2.0448, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.21861656703672075, |
| "grad_norm": 0.78515625, |
| "learning_rate": 7.995469356732032e-06, |
| "loss": 2.0034, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.2322801024765158, |
| "grad_norm": 0.7109375, |
| "learning_rate": 7.989808458441014e-06, |
| "loss": 1.9929, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.24594363791631085, |
| "grad_norm": 0.703125, |
| "learning_rate": 7.981887690292338e-06, |
| "loss": 1.949, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.2596071733561059, |
| "grad_norm": 0.68359375, |
| "learning_rate": 7.971711538693153e-06, |
| "loss": 2.0034, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.2596071733561059, |
| "eval_loss": 1.9669320583343506, |
| "eval_runtime": 36.3557, |
| "eval_samples_per_second": 13.56, |
| "eval_steps_per_second": 3.411, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.27327070879590093, |
| "grad_norm": 0.65234375, |
| "learning_rate": 7.95928576752373e-06, |
| "loss": 2.0095, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.28693424423569597, |
| "grad_norm": 0.61328125, |
| "learning_rate": 7.944617414872746e-06, |
| "loss": 2.0096, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.300597779675491, |
| "grad_norm": 0.65625, |
| "learning_rate": 7.927714789050827e-06, |
| "loss": 2.0187, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.3142613151152861, |
| "grad_norm": 0.57421875, |
| "learning_rate": 7.908587463884638e-06, |
| "loss": 1.9755, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.32792485055508114, |
| "grad_norm": 0.54296875, |
| "learning_rate": 7.887246273294166e-06, |
| "loss": 1.9908, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.3415883859948762, |
| "grad_norm": 0.53515625, |
| "learning_rate": 7.863703305156273e-06, |
| "loss": 1.9447, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.3552519214346712, |
| "grad_norm": 0.5625, |
| "learning_rate": 7.837971894457989e-06, |
| "loss": 1.9833, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.36891545687446625, |
| "grad_norm": 0.55859375, |
| "learning_rate": 7.810066615743442e-06, |
| "loss": 2.0089, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.3825789923142613, |
| "grad_norm": 0.50390625, |
| "learning_rate": 7.780003274858673e-06, |
| "loss": 2.0116, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.3962425277540564, |
| "grad_norm": 0.5078125, |
| "learning_rate": 7.747798899999048e-06, |
| "loss": 2.0061, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.4099060631938514, |
| "grad_norm": 0.49609375, |
| "learning_rate": 7.71347173206429e-06, |
| "loss": 1.923, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.42356959863364646, |
| "grad_norm": 0.51171875, |
| "learning_rate": 7.677041214326663e-06, |
| "loss": 1.9959, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.4372331340734415, |
| "grad_norm": 0.478515625, |
| "learning_rate": 7.638527981418074e-06, |
| "loss": 1.9421, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.45089666951323654, |
| "grad_norm": 0.470703125, |
| "learning_rate": 7.5979538476424126e-06, |
| "loss": 1.9684, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.4645602049530316, |
| "grad_norm": 0.45703125, |
| "learning_rate": 7.555341794619694e-06, |
| "loss": 1.9564, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.47822374039282667, |
| "grad_norm": 0.455078125, |
| "learning_rate": 7.510715958269022e-06, |
| "loss": 1.9902, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.4918872758326217, |
| "grad_norm": 0.462890625, |
| "learning_rate": 7.4641016151377545e-06, |
| "loss": 1.983, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.5055508112724167, |
| "grad_norm": 0.47265625, |
| "learning_rate": 7.415525168084592e-06, |
| "loss": 1.9775, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.5192143467122118, |
| "grad_norm": 0.451171875, |
| "learning_rate": 7.365014131324725e-06, |
| "loss": 1.9595, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.5192143467122118, |
| "eval_loss": 1.9398664236068726, |
| "eval_runtime": 36.4379, |
| "eval_samples_per_second": 13.53, |
| "eval_steps_per_second": 3.403, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.5328778821520068, |
| "grad_norm": 0.44140625, |
| "learning_rate": 7.3125971148454824e-06, |
| "loss": 1.9414, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.5465414175918019, |
| "grad_norm": 0.498046875, |
| "learning_rate": 7.258303808201343e-06, |
| "loss": 1.9589, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.5602049530315969, |
| "grad_norm": 0.4609375, |
| "learning_rate": 7.202164963697441e-06, |
| "loss": 1.9913, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.5738684884713919, |
| "grad_norm": 0.447265625, |
| "learning_rate": 7.1442123789711495e-06, |
| "loss": 1.9872, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.587532023911187, |
| "grad_norm": 0.451171875, |
| "learning_rate": 7.08447887898155e-06, |
| "loss": 1.9749, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.601195559350982, |
| "grad_norm": 0.4375, |
| "learning_rate": 7.022998297417033e-06, |
| "loss": 1.9547, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.6148590947907772, |
| "grad_norm": 0.451171875, |
| "learning_rate": 6.959805457531536e-06, |
| "loss": 1.959, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.6285226302305722, |
| "grad_norm": 0.43359375, |
| "learning_rate": 6.89493615242028e-06, |
| "loss": 1.9369, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.6421861656703672, |
| "grad_norm": 0.44921875, |
| "learning_rate": 6.82842712474619e-06, |
| "loss": 1.9256, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.6558497011101623, |
| "grad_norm": 0.42578125, |
| "learning_rate": 6.760316045928448e-06, |
| "loss": 1.9476, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.6695132365499573, |
| "grad_norm": 0.43359375, |
| "learning_rate": 6.690641494805011e-06, |
| "loss": 1.9738, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.6831767719897524, |
| "grad_norm": 0.421875, |
| "learning_rate": 6.619442935781141e-06, |
| "loss": 1.9385, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.6968403074295474, |
| "grad_norm": 0.470703125, |
| "learning_rate": 6.546760696476353e-06, |
| "loss": 2.0047, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.7105038428693424, |
| "grad_norm": 0.419921875, |
| "learning_rate": 6.472635944882421e-06, |
| "loss": 1.9871, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.7241673783091375, |
| "grad_norm": 0.421875, |
| "learning_rate": 6.397110666045387e-06, |
| "loss": 1.9662, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.7378309137489325, |
| "grad_norm": 0.447265625, |
| "learning_rate": 6.3202276382847925e-06, |
| "loss": 1.9604, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.7514944491887275, |
| "grad_norm": 0.43359375, |
| "learning_rate": 6.242030408963575e-06, |
| "loss": 1.9581, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.7651579846285226, |
| "grad_norm": 0.419921875, |
| "learning_rate": 6.162563269822391e-06, |
| "loss": 1.9354, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.7788215200683177, |
| "grad_norm": 0.431640625, |
| "learning_rate": 6.081871231892289e-06, |
| "loss": 1.9519, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.7788215200683177, |
| "eval_loss": 1.9265698194503784, |
| "eval_runtime": 36.375, |
| "eval_samples_per_second": 13.553, |
| "eval_steps_per_second": 3.409, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.7924850555081128, |
| "grad_norm": 0.40625, |
| "learning_rate": 6e-06, |
| "loss": 1.9568, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.8061485909479078, |
| "grad_norm": 0.416015625, |
| "learning_rate": 5.916995946880227e-06, |
| "loss": 1.9888, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.8198121263877028, |
| "grad_norm": 0.41015625, |
| "learning_rate": 5.832906086909641e-06, |
| "loss": 1.9502, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.8334756618274979, |
| "grad_norm": 0.404296875, |
| "learning_rate": 5.7477780494774375e-06, |
| "loss": 1.987, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.8471391972672929, |
| "grad_norm": 0.412109375, |
| "learning_rate": 5.661660052007546e-06, |
| "loss": 1.9504, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.860802732707088, |
| "grad_norm": 0.423828125, |
| "learning_rate": 5.574600872647766e-06, |
| "loss": 1.9838, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.874466268146883, |
| "grad_norm": 0.421875, |
| "learning_rate": 5.48664982264131e-06, |
| "loss": 1.9554, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.888129803586678, |
| "grad_norm": 0.419921875, |
| "learning_rate": 5.397856718396394e-06, |
| "loss": 1.994, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.9017933390264731, |
| "grad_norm": 0.416015625, |
| "learning_rate": 5.308271853269687e-06, |
| "loss": 1.9496, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.9154568744662681, |
| "grad_norm": 0.396484375, |
| "learning_rate": 5.2179459690796286e-06, |
| "loss": 1.9508, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.9291204099060631, |
| "grad_norm": 0.40234375, |
| "learning_rate": 5.126930227365719e-06, |
| "loss": 1.9268, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.9427839453458582, |
| "grad_norm": 0.4140625, |
| "learning_rate": 5.035276180410083e-06, |
| "loss": 1.9589, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.9564474807856533, |
| "grad_norm": 0.439453125, |
| "learning_rate": 4.943035742037709e-06, |
| "loss": 1.9929, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.9701110162254484, |
| "grad_norm": 0.396484375, |
| "learning_rate": 4.850261158211906e-06, |
| "loss": 1.9219, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.9837745516652434, |
| "grad_norm": 0.41015625, |
| "learning_rate": 4.7570049774416405e-06, |
| "loss": 1.9546, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.9974380871050385, |
| "grad_norm": 0.400390625, |
| "learning_rate": 4.663320021017497e-06, |
| "loss": 1.9449, |
| "step": 73 |
| }, |
| { |
| "epoch": 1.0089628681177978, |
| "grad_norm": 0.427734375, |
| "learning_rate": 4.569259353093141e-06, |
| "loss": 1.972, |
| "step": 74 |
| }, |
| { |
| "epoch": 1.0226205719163466, |
| "grad_norm": 0.400390625, |
| "learning_rate": 4.47487625062922e-06, |
| "loss": 1.9305, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.0362782757148954, |
| "grad_norm": 0.4296875, |
| "learning_rate": 4.38022417321673e-06, |
| "loss": 1.9479, |
| "step": 76 |
| }, |
| { |
| "epoch": 1.0362782757148954, |
| "eval_loss": 1.9197667837142944, |
| "eval_runtime": 36.1856, |
| "eval_samples_per_second": 13.624, |
| "eval_steps_per_second": 3.427, |
| "step": 76 |
| }, |
| { |
| "epoch": 1.0499359795134442, |
| "grad_norm": 0.466796875, |
| "learning_rate": 4.285356732796929e-06, |
| "loss": 1.9611, |
| "step": 77 |
| }, |
| { |
| "epoch": 1.0635936833119932, |
| "grad_norm": 0.40234375, |
| "learning_rate": 4.1903276632949695e-06, |
| "loss": 1.9134, |
| "step": 78 |
| }, |
| { |
| "epoch": 1.077251387110542, |
| "grad_norm": 0.3984375, |
| "learning_rate": 4.09519079018443e-06, |
| "loss": 1.9607, |
| "step": 79 |
| }, |
| { |
| "epoch": 1.0909090909090908, |
| "grad_norm": 0.412109375, |
| "learning_rate": 4e-06, |
| "loss": 1.9168, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.1045667947076399, |
| "grad_norm": 0.41796875, |
| "learning_rate": 3.90480920981557e-06, |
| "loss": 1.9901, |
| "step": 81 |
| }, |
| { |
| "epoch": 1.1182244985061887, |
| "grad_norm": 0.40234375, |
| "learning_rate": 3.8096723367050306e-06, |
| "loss": 1.9429, |
| "step": 82 |
| }, |
| { |
| "epoch": 1.1318822023047375, |
| "grad_norm": 0.39453125, |
| "learning_rate": 3.7146432672030706e-06, |
| "loss": 1.9432, |
| "step": 83 |
| }, |
| { |
| "epoch": 1.1455399061032865, |
| "grad_norm": 0.4140625, |
| "learning_rate": 3.61977582678327e-06, |
| "loss": 1.9166, |
| "step": 84 |
| }, |
| { |
| "epoch": 1.1591976099018353, |
| "grad_norm": 0.416015625, |
| "learning_rate": 3.52512374937078e-06, |
| "loss": 1.9604, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.172855313700384, |
| "grad_norm": 0.40234375, |
| "learning_rate": 3.4307406469068596e-06, |
| "loss": 1.9634, |
| "step": 86 |
| }, |
| { |
| "epoch": 1.186513017498933, |
| "grad_norm": 0.3984375, |
| "learning_rate": 3.336679978982504e-06, |
| "loss": 1.9572, |
| "step": 87 |
| }, |
| { |
| "epoch": 1.200170721297482, |
| "grad_norm": 0.396484375, |
| "learning_rate": 3.2429950225583604e-06, |
| "loss": 1.9414, |
| "step": 88 |
| }, |
| { |
| "epoch": 1.2138284250960307, |
| "grad_norm": 0.396484375, |
| "learning_rate": 3.149738841788093e-06, |
| "loss": 1.9492, |
| "step": 89 |
| }, |
| { |
| "epoch": 1.2274861288945795, |
| "grad_norm": 0.408203125, |
| "learning_rate": 3.0569642579622904e-06, |
| "loss": 1.9323, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.2411438326931286, |
| "grad_norm": 0.41015625, |
| "learning_rate": 2.9647238195899164e-06, |
| "loss": 1.9547, |
| "step": 91 |
| }, |
| { |
| "epoch": 1.2548015364916774, |
| "grad_norm": 0.4140625, |
| "learning_rate": 2.873069772634281e-06, |
| "loss": 1.9388, |
| "step": 92 |
| }, |
| { |
| "epoch": 1.2684592402902262, |
| "grad_norm": 0.3984375, |
| "learning_rate": 2.7820540309203724e-06, |
| "loss": 1.9594, |
| "step": 93 |
| }, |
| { |
| "epoch": 1.282116944088775, |
| "grad_norm": 0.3984375, |
| "learning_rate": 2.6917281467303133e-06, |
| "loss": 1.912, |
| "step": 94 |
| }, |
| { |
| "epoch": 1.295774647887324, |
| "grad_norm": 0.400390625, |
| "learning_rate": 2.602143281603607e-06, |
| "loss": 1.9381, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.295774647887324, |
| "eval_loss": 1.9166765213012695, |
| "eval_runtime": 36.3011, |
| "eval_samples_per_second": 13.581, |
| "eval_steps_per_second": 3.416, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.3094323516858728, |
| "grad_norm": 0.392578125, |
| "learning_rate": 2.5133501773586904e-06, |
| "loss": 1.9297, |
| "step": 96 |
| }, |
| { |
| "epoch": 1.3230900554844216, |
| "grad_norm": 0.408203125, |
| "learning_rate": 2.4253991273522344e-06, |
| "loss": 1.94, |
| "step": 97 |
| }, |
| { |
| "epoch": 1.3367477592829706, |
| "grad_norm": 0.40625, |
| "learning_rate": 2.3383399479924544e-06, |
| "loss": 1.9836, |
| "step": 98 |
| }, |
| { |
| "epoch": 1.3504054630815194, |
| "grad_norm": 0.40234375, |
| "learning_rate": 2.252221950522562e-06, |
| "loss": 1.936, |
| "step": 99 |
| }, |
| { |
| "epoch": 1.3640631668800682, |
| "grad_norm": 0.41015625, |
| "learning_rate": 2.1670939130903583e-06, |
| "loss": 2.0112, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.3777208706786173, |
| "grad_norm": 0.408203125, |
| "learning_rate": 2.0830040531197743e-06, |
| "loss": 1.9661, |
| "step": 101 |
| }, |
| { |
| "epoch": 1.391378574477166, |
| "grad_norm": 0.40234375, |
| "learning_rate": 2.0000000000000008e-06, |
| "loss": 1.9627, |
| "step": 102 |
| }, |
| { |
| "epoch": 1.4050362782757149, |
| "grad_norm": 0.40234375, |
| "learning_rate": 1.9181287681077113e-06, |
| "loss": 1.9496, |
| "step": 103 |
| }, |
| { |
| "epoch": 1.418693982074264, |
| "grad_norm": 0.396484375, |
| "learning_rate": 1.837436730177611e-06, |
| "loss": 1.9493, |
| "step": 104 |
| }, |
| { |
| "epoch": 1.4323516858728127, |
| "grad_norm": 0.41015625, |
| "learning_rate": 1.7579695910364233e-06, |
| "loss": 1.9523, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.4460093896713615, |
| "grad_norm": 0.3984375, |
| "learning_rate": 1.6797723617152077e-06, |
| "loss": 1.9495, |
| "step": 106 |
| }, |
| { |
| "epoch": 1.4596670934699103, |
| "grad_norm": 0.39453125, |
| "learning_rate": 1.6028893339546122e-06, |
| "loss": 1.9722, |
| "step": 107 |
| }, |
| { |
| "epoch": 1.473324797268459, |
| "grad_norm": 0.400390625, |
| "learning_rate": 1.527364055117579e-06, |
| "loss": 1.9264, |
| "step": 108 |
| }, |
| { |
| "epoch": 1.4869825010670081, |
| "grad_norm": 0.392578125, |
| "learning_rate": 1.4532393035236475e-06, |
| "loss": 1.9395, |
| "step": 109 |
| }, |
| { |
| "epoch": 1.500640204865557, |
| "grad_norm": 0.421875, |
| "learning_rate": 1.38055706421886e-06, |
| "loss": 1.927, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.5142979086641057, |
| "grad_norm": 0.412109375, |
| "learning_rate": 1.3093585051949898e-06, |
| "loss": 1.9362, |
| "step": 111 |
| }, |
| { |
| "epoch": 1.5279556124626548, |
| "grad_norm": 0.412109375, |
| "learning_rate": 1.2396839540715527e-06, |
| "loss": 1.9238, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.5416133162612036, |
| "grad_norm": 0.3984375, |
| "learning_rate": 1.1715728752538101e-06, |
| "loss": 1.9462, |
| "step": 113 |
| }, |
| { |
| "epoch": 1.5552710200597524, |
| "grad_norm": 0.400390625, |
| "learning_rate": 1.1050638475797191e-06, |
| "loss": 1.9258, |
| "step": 114 |
| }, |
| { |
| "epoch": 1.5552710200597524, |
| "eval_loss": 1.9157997369766235, |
| "eval_runtime": 36.0542, |
| "eval_samples_per_second": 13.674, |
| "eval_steps_per_second": 3.439, |
| "step": 114 |
| }, |
| { |
| "epoch": 1.5689287238583014, |
| "grad_norm": 0.400390625, |
| "learning_rate": 1.0401945424684652e-06, |
| "loss": 1.9553, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.5825864276568502, |
| "grad_norm": 0.3828125, |
| "learning_rate": 9.770017025829674e-07, |
| "loss": 1.9163, |
| "step": 116 |
| }, |
| { |
| "epoch": 1.596244131455399, |
| "grad_norm": 0.412109375, |
| "learning_rate": 9.155211210184495e-07, |
| "loss": 1.9958, |
| "step": 117 |
| }, |
| { |
| "epoch": 1.609901835253948, |
| "grad_norm": 0.40234375, |
| "learning_rate": 8.557876210288508e-07, |
| "loss": 1.9564, |
| "step": 118 |
| }, |
| { |
| "epoch": 1.6235595390524968, |
| "grad_norm": 0.42578125, |
| "learning_rate": 7.978350363025587e-07, |
| "loss": 1.984, |
| "step": 119 |
| }, |
| { |
| "epoch": 1.6372172428510456, |
| "grad_norm": 0.412109375, |
| "learning_rate": 7.416961917986571e-07, |
| "loss": 1.9543, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.6508749466495947, |
| "grad_norm": 0.390625, |
| "learning_rate": 6.874028851545173e-07, |
| "loss": 1.927, |
| "step": 121 |
| }, |
| { |
| "epoch": 1.6645326504481432, |
| "grad_norm": 0.427734375, |
| "learning_rate": 6.349858686752747e-07, |
| "loss": 1.9333, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.6781903542466923, |
| "grad_norm": 0.412109375, |
| "learning_rate": 5.844748319154078e-07, |
| "loss": 1.9837, |
| "step": 123 |
| }, |
| { |
| "epoch": 1.6918480580452413, |
| "grad_norm": 0.416015625, |
| "learning_rate": 5.358983848622451e-07, |
| "loss": 1.9233, |
| "step": 124 |
| }, |
| { |
| "epoch": 1.7055057618437899, |
| "grad_norm": 0.4140625, |
| "learning_rate": 4.892840417309774e-07, |
| "loss": 1.9787, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.719163465642339, |
| "grad_norm": 0.390625, |
| "learning_rate": 4.4465820538030653e-07, |
| "loss": 1.9822, |
| "step": 126 |
| }, |
| { |
| "epoch": 1.7328211694408877, |
| "grad_norm": 0.392578125, |
| "learning_rate": 4.0204615235758734e-07, |
| "loss": 1.9564, |
| "step": 127 |
| }, |
| { |
| "epoch": 1.7464788732394365, |
| "grad_norm": 0.3984375, |
| "learning_rate": 3.6147201858192623e-07, |
| "loss": 1.9345, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.7601365770379855, |
| "grad_norm": 0.396484375, |
| "learning_rate": 3.229587856733378e-07, |
| "loss": 1.9379, |
| "step": 129 |
| }, |
| { |
| "epoch": 1.7737942808365343, |
| "grad_norm": 0.39453125, |
| "learning_rate": 2.865282679357097e-07, |
| "loss": 1.9308, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.7874519846350831, |
| "grad_norm": 0.40234375, |
| "learning_rate": 2.522011000009536e-07, |
| "loss": 1.9508, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.8011096884336322, |
| "grad_norm": 0.40625, |
| "learning_rate": 2.1999672514132617e-07, |
| "loss": 1.9122, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.814767392232181, |
| "grad_norm": 0.41015625, |
| "learning_rate": 1.8993338425655802e-07, |
| "loss": 1.9508, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.814767392232181, |
| "eval_loss": 1.915639042854309, |
| "eval_runtime": 36.4567, |
| "eval_samples_per_second": 13.523, |
| "eval_steps_per_second": 3.401, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.8284250960307298, |
| "grad_norm": 0.412109375, |
| "learning_rate": 1.6202810554201097e-07, |
| "loss": 1.9649, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.8420827998292788, |
| "grad_norm": 0.39453125, |
| "learning_rate": 1.3629669484372718e-07, |
| "loss": 1.9101, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.8557405036278276, |
| "grad_norm": 0.396484375, |
| "learning_rate": 1.1275372670583338e-07, |
| "loss": 1.9386, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.8693982074263764, |
| "grad_norm": 0.3984375, |
| "learning_rate": 9.141253611536238e-08, |
| "loss": 1.9315, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.8830559112249254, |
| "grad_norm": 0.408203125, |
| "learning_rate": 7.228521094917317e-08, |
| "loss": 1.929, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.896713615023474, |
| "grad_norm": 0.400390625, |
| "learning_rate": 5.5382585127254025e-08, |
| "loss": 1.9361, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.910371318822023, |
| "grad_norm": 0.3984375, |
| "learning_rate": 4.071423247626926e-08, |
| "loss": 1.961, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.924029022620572, |
| "grad_norm": 0.421875, |
| "learning_rate": 2.828846130684681e-08, |
| "loss": 1.9462, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.9376867264191207, |
| "grad_norm": 0.40234375, |
| "learning_rate": 1.8112309707661643e-08, |
| "loss": 1.9729, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.9513444302176697, |
| "grad_norm": 0.40625, |
| "learning_rate": 1.0191541558985939e-08, |
| "loss": 1.9747, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.9650021340162185, |
| "grad_norm": 0.388671875, |
| "learning_rate": 4.5306432679681486e-09, |
| "loss": 1.916, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.9786598378147673, |
| "grad_norm": 0.3828125, |
| "learning_rate": 1.1328212274839266e-09, |
| "loss": 1.9484, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.9923175416133163, |
| "grad_norm": 0.3984375, |
| "learning_rate": 0.0, |
| "loss": 1.9286, |
| "step": 146 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 146, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 37, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.2034879291926774e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|