{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.09269162210338681, "eval_steps": 62, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0030897207367795603, "grad_norm": 4588.7900390625, "learning_rate": 4.0000000000000004e-11, "loss": 11.1378, "step": 1 }, { "epoch": 0.006179441473559121, "grad_norm": 4484.97314453125, "learning_rate": 8.000000000000001e-11, "loss": 11.1465, "step": 2 }, { "epoch": 0.009269162210338681, "grad_norm": 4465.8408203125, "learning_rate": 1.2e-10, "loss": 11.0439, "step": 3 }, { "epoch": 0.012358882947118241, "grad_norm": 4807.01171875, "learning_rate": 1.6000000000000002e-10, "loss": 11.3464, "step": 4 }, { "epoch": 0.015448603683897801, "grad_norm": 4590.3369140625, "learning_rate": 2e-10, "loss": 11.0523, "step": 5 }, { "epoch": 0.018538324420677363, "grad_norm": 4261.99658203125, "learning_rate": 1.9984815164333162e-10, "loss": 10.9142, "step": 6 }, { "epoch": 0.02162804515745692, "grad_norm": 4459.41455078125, "learning_rate": 1.9939306773179497e-10, "loss": 11.0083, "step": 7 }, { "epoch": 0.024717765894236483, "grad_norm": 4372.3876953125, "learning_rate": 1.9863613034027225e-10, "loss": 10.9957, "step": 8 }, { "epoch": 0.027807486631016044, "grad_norm": 4308.26953125, "learning_rate": 1.9757963826274357e-10, "loss": 10.9736, "step": 9 }, { "epoch": 0.030897207367795602, "grad_norm": 3821.595458984375, "learning_rate": 1.9622680003092504e-10, "loss": 10.4572, "step": 10 }, { "epoch": 0.03398692810457516, "grad_norm": 4184.8388671875, "learning_rate": 1.9458172417006345e-10, "loss": 10.7733, "step": 11 }, { "epoch": 0.037076648841354726, "grad_norm": 4909.9609375, "learning_rate": 1.9264940672148017e-10, "loss": 11.6039, "step": 12 }, { "epoch": 0.040166369578134284, "grad_norm": 4097.865234375, "learning_rate": 1.9043571606975777e-10, "loss": 10.7497, "step": 13 }, { "epoch": 0.04325609031491384, "grad_norm": 4388.50244140625, "learning_rate": 1.879473751206489e-10, "loss": 10.8997, "step": 14 }, { "epoch": 0.04634581105169341, "grad_norm": 4527.54150390625, "learning_rate": 1.851919408838327e-10, "loss": 11.0818, "step": 15 }, { "epoch": 0.049435531788472965, "grad_norm": 4139.16552734375, "learning_rate": 1.821777815225245e-10, "loss": 10.6874, "step": 16 }, { "epoch": 0.052525252525252523, "grad_norm": 4184.986328125, "learning_rate": 1.7891405093963936e-10, "loss": 10.6914, "step": 17 }, { "epoch": 0.05561497326203209, "grad_norm": 4698.89599609375, "learning_rate": 1.7541066097768963e-10, "loss": 11.3234, "step": 18 }, { "epoch": 0.05870469399881165, "grad_norm": 4441.7099609375, "learning_rate": 1.7167825131684515e-10, "loss": 11.0614, "step": 19 }, { "epoch": 0.061794414735591205, "grad_norm": 4341.03955078125, "learning_rate": 1.6772815716257412e-10, "loss": 10.9571, "step": 20 }, { "epoch": 0.06488413547237076, "grad_norm": 4568.626953125, "learning_rate": 1.6357237482099684e-10, "loss": 11.1655, "step": 21 }, { "epoch": 0.06797385620915032, "grad_norm": 4392.01953125, "learning_rate": 1.59223525266498e-10, "loss": 11.0342, "step": 22 }, { "epoch": 0.0710635769459299, "grad_norm": 4315.537109375, "learning_rate": 1.5469481581224272e-10, "loss": 11.0743, "step": 23 }, { "epoch": 0.07415329768270945, "grad_norm": 4450.1845703125, "learning_rate": 1.5e-10, "loss": 10.9424, "step": 24 }, { "epoch": 0.07724301841948901, "grad_norm": 4252.31982421875, "learning_rate": 1.4515333583108894e-10, "loss": 10.8202, "step": 25 }, { "epoch": 0.08033273915626857, "grad_norm": 4481.5, "learning_rate": 1.4016954246529695e-10, "loss": 11.0462, "step": 26 }, { "epoch": 0.08342245989304813, "grad_norm": 4287.03515625, "learning_rate": 1.3506375551927545e-10, "loss": 11.0053, "step": 27 }, { "epoch": 0.08651218062982768, "grad_norm": 4513.53662109375, "learning_rate": 1.2985148110016947e-10, "loss": 11.1842, "step": 28 }, { "epoch": 0.08960190136660724, "grad_norm": 4208.63330078125, "learning_rate": 1.2454854871407994e-10, "loss": 10.9974, "step": 29 }, { "epoch": 0.09269162210338681, "grad_norm": 4587.828125, "learning_rate": 1.1917106319237384e-10, "loss": 11.3144, "step": 30 } ], "logging_steps": 1, "max_steps": 62, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.978137921748992e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }