| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.09269162210338681, |
| "eval_steps": 62, |
| "global_step": 30, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0030897207367795603, |
| "grad_norm": 4588.7900390625, |
| "learning_rate": 4.0000000000000004e-11, |
| "loss": 11.1378, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.006179441473559121, |
| "grad_norm": 4484.97314453125, |
| "learning_rate": 8.000000000000001e-11, |
| "loss": 11.1465, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.009269162210338681, |
| "grad_norm": 4465.8408203125, |
| "learning_rate": 1.2e-10, |
| "loss": 11.0439, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.012358882947118241, |
| "grad_norm": 4807.01171875, |
| "learning_rate": 1.6000000000000002e-10, |
| "loss": 11.3464, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.015448603683897801, |
| "grad_norm": 4590.3369140625, |
| "learning_rate": 2e-10, |
| "loss": 11.0523, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.018538324420677363, |
| "grad_norm": 4261.99658203125, |
| "learning_rate": 1.9984815164333162e-10, |
| "loss": 10.9142, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.02162804515745692, |
| "grad_norm": 4459.41455078125, |
| "learning_rate": 1.9939306773179497e-10, |
| "loss": 11.0083, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.024717765894236483, |
| "grad_norm": 4372.3876953125, |
| "learning_rate": 1.9863613034027225e-10, |
| "loss": 10.9957, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.027807486631016044, |
| "grad_norm": 4308.26953125, |
| "learning_rate": 1.9757963826274357e-10, |
| "loss": 10.9736, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.030897207367795602, |
| "grad_norm": 3821.595458984375, |
| "learning_rate": 1.9622680003092504e-10, |
| "loss": 10.4572, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03398692810457516, |
| "grad_norm": 4184.8388671875, |
| "learning_rate": 1.9458172417006345e-10, |
| "loss": 10.7733, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.037076648841354726, |
| "grad_norm": 4909.9609375, |
| "learning_rate": 1.9264940672148017e-10, |
| "loss": 11.6039, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.040166369578134284, |
| "grad_norm": 4097.865234375, |
| "learning_rate": 1.9043571606975777e-10, |
| "loss": 10.7497, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.04325609031491384, |
| "grad_norm": 4388.50244140625, |
| "learning_rate": 1.879473751206489e-10, |
| "loss": 10.8997, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.04634581105169341, |
| "grad_norm": 4527.54150390625, |
| "learning_rate": 1.851919408838327e-10, |
| "loss": 11.0818, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.049435531788472965, |
| "grad_norm": 4139.16552734375, |
| "learning_rate": 1.821777815225245e-10, |
| "loss": 10.6874, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.052525252525252523, |
| "grad_norm": 4184.986328125, |
| "learning_rate": 1.7891405093963936e-10, |
| "loss": 10.6914, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.05561497326203209, |
| "grad_norm": 4698.89599609375, |
| "learning_rate": 1.7541066097768963e-10, |
| "loss": 11.3234, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.05870469399881165, |
| "grad_norm": 4441.7099609375, |
| "learning_rate": 1.7167825131684515e-10, |
| "loss": 11.0614, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.061794414735591205, |
| "grad_norm": 4341.03955078125, |
| "learning_rate": 1.6772815716257412e-10, |
| "loss": 10.9571, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06488413547237076, |
| "grad_norm": 4568.626953125, |
| "learning_rate": 1.6357237482099684e-10, |
| "loss": 11.1655, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.06797385620915032, |
| "grad_norm": 4392.01953125, |
| "learning_rate": 1.59223525266498e-10, |
| "loss": 11.0342, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0710635769459299, |
| "grad_norm": 4315.537109375, |
| "learning_rate": 1.5469481581224272e-10, |
| "loss": 11.0743, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.07415329768270945, |
| "grad_norm": 4450.1845703125, |
| "learning_rate": 1.5e-10, |
| "loss": 10.9424, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.07724301841948901, |
| "grad_norm": 4252.31982421875, |
| "learning_rate": 1.4515333583108894e-10, |
| "loss": 10.8202, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.08033273915626857, |
| "grad_norm": 4481.5, |
| "learning_rate": 1.4016954246529695e-10, |
| "loss": 11.0462, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.08342245989304813, |
| "grad_norm": 4287.03515625, |
| "learning_rate": 1.3506375551927545e-10, |
| "loss": 11.0053, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.08651218062982768, |
| "grad_norm": 4513.53662109375, |
| "learning_rate": 1.2985148110016947e-10, |
| "loss": 11.1842, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.08960190136660724, |
| "grad_norm": 4208.63330078125, |
| "learning_rate": 1.2454854871407994e-10, |
| "loss": 10.9974, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.09269162210338681, |
| "grad_norm": 4587.828125, |
| "learning_rate": 1.1917106319237384e-10, |
| "loss": 11.3144, |
| "step": 30 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 62, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 10, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.978137921748992e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|