| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.99726094101893, | |
| "global_step": 512, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.3124999999999999e-05, | |
| "loss": 3.3565, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.6249999999999998e-05, | |
| "loss": 3.1293, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.9374999999999995e-05, | |
| "loss": 2.987, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 5.2499999999999995e-05, | |
| "loss": 2.9036, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 6.5625e-05, | |
| "loss": 2.7414, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 6.99971917961865e-05, | |
| "loss": 2.7513, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 6.998244995833964e-05, | |
| "loss": 2.6567, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 6.995507775098683e-05, | |
| "loss": 2.6567, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 6.991508505682909e-05, | |
| "loss": 2.651, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 6.986248631517822e-05, | |
| "loss": 2.5459, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 6.979730051674372e-05, | |
| "loss": 2.5653, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 6.9719551196776e-05, | |
| "loss": 2.5159, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 6.962926642656914e-05, | |
| "loss": 2.4596, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 6.952647880332572e-05, | |
| "loss": 2.5057, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 6.941122543838767e-05, | |
| "loss": 2.4062, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 6.92835479438373e-05, | |
| "loss": 2.44, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 6.914349241747322e-05, | |
| "loss": 2.3659, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 6.899110942616686e-05, | |
| "loss": 2.3672, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 6.882645398760536e-05, | |
| "loss": 2.3759, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 6.864958555042743e-05, | |
| "loss": 2.3567, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 6.846056797275964e-05, | |
| "loss": 2.3606, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 6.825946949916035e-05, | |
| "loss": 2.3166, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 6.804636273598024e-05, | |
| "loss": 2.2707, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 6.782132462514781e-05, | |
| "loss": 2.2617, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 6.758443641638958e-05, | |
| "loss": 2.2521, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 6.733578363789503e-05, | |
| "loss": 2.2746, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 6.707545606543672e-05, | |
| "loss": 2.2112, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 6.680354768995687e-05, | |
| "loss": 2.2237, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 6.652015668363205e-05, | |
| "loss": 2.2225, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 6.622538536442822e-05, | |
| "loss": 2.1471, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 6.5919340159159e-05, | |
| "loss": 2.0738, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 6.560213156506037e-05, | |
| "loss": 2.1797, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 6.527387410989579e-05, | |
| "loss": 2.2545, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 6.493468631060607e-05, | |
| "loss": 2.214, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 6.458469063051903e-05, | |
| "loss": 2.1769, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 6.422401343513426e-05, | |
| "loss": 2.1163, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 6.385278494649894e-05, | |
| "loss": 2.1318, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 6.347113919619143e-05, | |
| "loss": 2.1464, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 6.307921397692931e-05, | |
| "loss": 2.1739, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 6.267715079281944e-05, | |
| "loss": 2.1177, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 6.226509480826817e-05, | |
| "loss": 2.1126, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 6.184319479556984e-05, | |
| "loss": 2.1321, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 6.141160308119273e-05, | |
| "loss": 2.0559, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 6.0970475490781874e-05, | |
| "loss": 2.1131, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 6.0519971292898285e-05, | |
| "loss": 2.1012, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 6.0060253141515295e-05, | |
| "loss": 2.076, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 5.95914870172926e-05, | |
| "loss": 2.0437, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 5.911384216764903e-05, | |
| "loss": 2.0269, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 5.862749104565608e-05, | |
| "loss": 2.0285, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 5.8132609247773926e-05, | |
| "loss": 2.0499, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 5.762937545045251e-05, | |
| "loss": 2.0388, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 5.711797134562063e-05, | |
| "loss": 1.9678, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 5.6598581575086404e-05, | |
| "loss": 2.0444, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 5.60713936638724e-05, | |
| "loss": 2.0038, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 5.553659795251013e-05, | |
| "loss": 1.9812, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 5.499438752831773e-05, | |
| "loss": 1.9962, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.444495815568607e-05, | |
| "loss": 1.9857, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 5.38885082053983e-05, | |
| "loss": 1.9288, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 5.332523858300823e-05, | |
| "loss": 1.9271, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 5.2755352656303755e-05, | |
| "loss": 1.9128, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 5.217905618188108e-05, | |
| "loss": 2.0431, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 5.1596557230856576e-05, | |
| "loss": 1.8937, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 5.1008066113743024e-05, | |
| "loss": 1.9296, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 5.04137953045172e-05, | |
| "loss": 1.9901, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.981395936390644e-05, | |
| "loss": 1.9698, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.920877486192174e-05, | |
| "loss": 1.9076, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.85984602996655e-05, | |
| "loss": 1.8708, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.798323603044187e-05, | |
| "loss": 1.9115, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.736332418019853e-05, | |
| "loss": 1.8812, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.67389485673284e-05, | |
| "loss": 1.9152, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.6110334621860254e-05, | |
| "loss": 2.0206, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.547770930406753e-05, | |
| "loss": 1.9291, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.4841301022524574e-05, | |
| "loss": 1.9792, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.420133955164006e-05, | |
| "loss": 1.9464, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.3558055948697185e-05, | |
| "loss": 1.835, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.291168247043072e-05, | |
| "loss": 1.771, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.2262452489171054e-05, | |
| "loss": 1.9584, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.1610600408585395e-05, | |
| "loss": 2.0061, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.095636157904658e-05, | |
| "loss": 1.8876, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.029997221266018e-05, | |
| "loss": 1.8706, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.964166929798036e-05, | |
| "loss": 1.884, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.898169051444552e-05, | |
| "loss": 1.8348, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.8320274146564356e-05, | |
| "loss": 1.8026, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.7657658997883615e-05, | |
| "loss": 1.847, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.699408430476834e-05, | |
| "loss": 1.8465, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.8656249046325684, | |
| "eval_runtime": 309.3485, | |
| "eval_samples_per_second": 13.629, | |
| "eval_steps_per_second": 13.629, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.632978965002587e-05, | |
| "loss": 2.1828, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.566501487640479e-05, | |
| "loss": 1.6617, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.5e-05, | |
| "loss": 1.6496, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.433498512359521e-05, | |
| "loss": 1.5581, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.367021034997412e-05, | |
| "loss": 1.6121, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.300591569523165e-05, | |
| "loss": 1.6664, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.2342341002116385e-05, | |
| "loss": 1.5229, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.1679725853435645e-05, | |
| "loss": 1.5276, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.101830948555448e-05, | |
| "loss": 1.6506, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.035833070201963e-05, | |
| "loss": 1.5594, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 2.9700027787339826e-05, | |
| "loss": 1.4913, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 2.904363842095341e-05, | |
| "loss": 1.6122, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 2.838939959141461e-05, | |
| "loss": 1.6086, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 2.7737547510828943e-05, | |
| "loss": 1.5963, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 2.7088317529569277e-05, | |
| "loss": 1.6068, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 2.6441944051302816e-05, | |
| "loss": 1.528, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 2.5798660448359928e-05, | |
| "loss": 1.629, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 2.5158698977475426e-05, | |
| "loss": 1.6335, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 2.4522290695932468e-05, | |
| "loss": 1.5641, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.3889665378139753e-05, | |
| "loss": 1.5514, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.32610514326716e-05, | |
| "loss": 1.6076, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.263667581980147e-05, | |
| "loss": 1.5815, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.2016763969558128e-05, | |
| "loss": 1.5281, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.140153970033449e-05, | |
| "loss": 1.5254, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.0791225138078253e-05, | |
| "loss": 1.5966, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.0186040636093567e-05, | |
| "loss": 1.5427, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 1.9586204695482795e-05, | |
| "loss": 1.6041, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 1.8991933886256963e-05, | |
| "loss": 1.4729, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 1.840344276914342e-05, | |
| "loss": 1.564, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 1.7820943818118924e-05, | |
| "loss": 1.5334, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 1.7244647343696252e-05, | |
| "loss": 1.5026, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 1.6674761416991767e-05, | |
| "loss": 1.586, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 1.611149179460171e-05, | |
| "loss": 1.5368, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 1.555504184431393e-05, | |
| "loss": 1.5588, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 1.500561247168226e-05, | |
| "loss": 1.5276, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 1.446340204748987e-05, | |
| "loss": 1.4957, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 1.3928606336127589e-05, | |
| "loss": 1.5479, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 1.34014184249136e-05, | |
| "loss": 1.4862, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 1.2882028654379362e-05, | |
| "loss": 1.5849, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 1.2370624549547507e-05, | |
| "loss": 1.5811, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 1.186739075222608e-05, | |
| "loss": 1.4914, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 1.1372508954343916e-05, | |
| "loss": 1.5442, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 1.0886157832350968e-05, | |
| "loss": 1.528, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 1.0408512982707408e-05, | |
| "loss": 1.51, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 9.939746858484699e-06, | |
| "loss": 1.4226, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 9.480028707101716e-06, | |
| "loss": 1.5474, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 9.02952450921813e-06, | |
| "loss": 1.5436, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 8.588396918807265e-06, | |
| "loss": 1.5102, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 8.156805204430163e-06, | |
| "loss": 1.4774, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 7.734905191731818e-06, | |
| "loss": 1.5731, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 7.322849207180554e-06, | |
| "loss": 1.4854, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 6.92078602307069e-06, | |
| "loss": 1.5024, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 6.528860803808572e-06, | |
| "loss": 1.4762, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 6.147215053501067e-06, | |
| "loss": 1.5015, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 5.775986564865746e-06, | |
| "loss": 1.4655, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 5.4153093694809615e-06, | |
| "loss": 1.3807, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 5.065313689393926e-06, | |
| "loss": 1.5052, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 4.7261258901042164e-06, | |
| "loss": 1.5502, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 4.397868434939627e-06, | |
| "loss": 1.4234, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 4.080659840841001e-06, | |
| "loss": 1.4875, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 3.774614635571785e-06, | |
| "loss": 1.4858, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 3.4798433163679534e-06, | |
| "loss": 1.4217, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 3.1964523100431278e-06, | |
| "loss": 1.4572, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.924543934563277e-06, | |
| "loss": 1.5039, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.664216362104964e-06, | |
| "loss": 1.4897, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.4155635836104113e-06, | |
| "loss": 1.5384, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.178675374852189e-06, | |
| "loss": 1.505, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 1.953637264019748e-06, | |
| "loss": 1.5019, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 1.7405305008396436e-06, | |
| "loss": 1.5941, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.5394320272403605e-06, | |
| "loss": 1.5265, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.3504144495725661e-06, | |
| "loss": 1.5035, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.1735460123946455e-06, | |
| "loss": 1.5002, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.0088905738331372e-06, | |
| "loss": 1.4317, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 8.565075825267759e-07, | |
| "loss": 1.4852, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 7.16452056162693e-07, | |
| "loss": 1.5445, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 5.887745616123169e-07, | |
| "loss": 1.4855, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 4.735211966742819e-07, | |
| "loss": 1.4852, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 3.7073357343086086e-07, | |
| "loss": 1.4545, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 2.804488032239943e-07, | |
| "loss": 1.445, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 2.0269948325627227e-07, | |
| "loss": 1.4484, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.3751368482176727e-07, | |
| "loss": 1.4113, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 8.491494317091229e-08, | |
| "loss": 1.5394, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 4.492224901315344e-08, | |
| "loss": 1.4475, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.7550041660350232e-08, | |
| "loss": 1.4934, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 2.808203813499665e-09, | |
| "loss": 1.4903, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 1.7396166324615479, | |
| "eval_runtime": 309.3927, | |
| "eval_samples_per_second": 13.627, | |
| "eval_steps_per_second": 13.627, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 512, | |
| "total_flos": 2.7416092993388544e+17, | |
| "train_loss": 1.8553011305630207, | |
| "train_runtime": 15007.5766, | |
| "train_samples_per_second": 4.379, | |
| "train_steps_per_second": 0.034 | |
| } | |
| ], | |
| "max_steps": 512, | |
| "num_train_epochs": 2, | |
| "total_flos": 2.7416092993388544e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |