| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.12358882947118241, |
| "eval_steps": 62, |
| "global_step": 40, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0030897207367795603, |
| "grad_norm": 4588.7900390625, |
| "learning_rate": 4.0000000000000004e-11, |
| "loss": 11.1378, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.006179441473559121, |
| "grad_norm": 4484.97314453125, |
| "learning_rate": 8.000000000000001e-11, |
| "loss": 11.1465, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.009269162210338681, |
| "grad_norm": 4465.8408203125, |
| "learning_rate": 1.2e-10, |
| "loss": 11.0439, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.012358882947118241, |
| "grad_norm": 4807.01171875, |
| "learning_rate": 1.6000000000000002e-10, |
| "loss": 11.3464, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.015448603683897801, |
| "grad_norm": 4590.3369140625, |
| "learning_rate": 2e-10, |
| "loss": 11.0523, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.018538324420677363, |
| "grad_norm": 4261.99658203125, |
| "learning_rate": 1.9984815164333162e-10, |
| "loss": 10.9142, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.02162804515745692, |
| "grad_norm": 4459.41455078125, |
| "learning_rate": 1.9939306773179497e-10, |
| "loss": 11.0083, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.024717765894236483, |
| "grad_norm": 4372.3876953125, |
| "learning_rate": 1.9863613034027225e-10, |
| "loss": 10.9957, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.027807486631016044, |
| "grad_norm": 4308.26953125, |
| "learning_rate": 1.9757963826274357e-10, |
| "loss": 10.9736, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.030897207367795602, |
| "grad_norm": 3821.595458984375, |
| "learning_rate": 1.9622680003092504e-10, |
| "loss": 10.4572, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03398692810457516, |
| "grad_norm": 4184.8388671875, |
| "learning_rate": 1.9458172417006345e-10, |
| "loss": 10.7733, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.037076648841354726, |
| "grad_norm": 4909.9609375, |
| "learning_rate": 1.9264940672148017e-10, |
| "loss": 11.6039, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.040166369578134284, |
| "grad_norm": 4097.865234375, |
| "learning_rate": 1.9043571606975777e-10, |
| "loss": 10.7497, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.04325609031491384, |
| "grad_norm": 4388.50244140625, |
| "learning_rate": 1.879473751206489e-10, |
| "loss": 10.8997, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.04634581105169341, |
| "grad_norm": 4527.54150390625, |
| "learning_rate": 1.851919408838327e-10, |
| "loss": 11.0818, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.049435531788472965, |
| "grad_norm": 4139.16552734375, |
| "learning_rate": 1.821777815225245e-10, |
| "loss": 10.6874, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.052525252525252523, |
| "grad_norm": 4184.986328125, |
| "learning_rate": 1.7891405093963936e-10, |
| "loss": 10.6914, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.05561497326203209, |
| "grad_norm": 4698.89599609375, |
| "learning_rate": 1.7541066097768963e-10, |
| "loss": 11.3234, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.05870469399881165, |
| "grad_norm": 4441.7099609375, |
| "learning_rate": 1.7167825131684515e-10, |
| "loss": 11.0614, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.061794414735591205, |
| "grad_norm": 4341.03955078125, |
| "learning_rate": 1.6772815716257412e-10, |
| "loss": 10.9571, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06488413547237076, |
| "grad_norm": 4568.626953125, |
| "learning_rate": 1.6357237482099684e-10, |
| "loss": 11.1655, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.06797385620915032, |
| "grad_norm": 4392.01953125, |
| "learning_rate": 1.59223525266498e-10, |
| "loss": 11.0342, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0710635769459299, |
| "grad_norm": 4315.537109375, |
| "learning_rate": 1.5469481581224272e-10, |
| "loss": 11.0743, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.07415329768270945, |
| "grad_norm": 4450.1845703125, |
| "learning_rate": 1.5e-10, |
| "loss": 10.9424, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.07724301841948901, |
| "grad_norm": 4252.31982421875, |
| "learning_rate": 1.4515333583108894e-10, |
| "loss": 10.8202, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.08033273915626857, |
| "grad_norm": 4481.5, |
| "learning_rate": 1.4016954246529695e-10, |
| "loss": 11.0462, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.08342245989304813, |
| "grad_norm": 4287.03515625, |
| "learning_rate": 1.3506375551927545e-10, |
| "loss": 11.0053, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.08651218062982768, |
| "grad_norm": 4513.53662109375, |
| "learning_rate": 1.2985148110016947e-10, |
| "loss": 11.1842, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.08960190136660724, |
| "grad_norm": 4208.63330078125, |
| "learning_rate": 1.2454854871407994e-10, |
| "loss": 10.9974, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.09269162210338681, |
| "grad_norm": 4587.828125, |
| "learning_rate": 1.1917106319237384e-10, |
| "loss": 11.3144, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.09578134284016637, |
| "grad_norm": 4346.50341796875, |
| "learning_rate": 1.1373535578184083e-10, |
| "loss": 11.0087, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.09887106357694593, |
| "grad_norm": 4239.73974609375, |
| "learning_rate": 1.0825793454723326e-10, |
| "loss": 10.8437, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.10196078431372549, |
| "grad_norm": 4778.1728515625, |
| "learning_rate": 1.0275543423681622e-10, |
| "loss": 11.3694, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.10505050505050505, |
| "grad_norm": 4481.9033203125, |
| "learning_rate": 9.724456576318382e-11, |
| "loss": 11.1087, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.1081402257872846, |
| "grad_norm": 4602.31005859375, |
| "learning_rate": 9.174206545276678e-11, |
| "loss": 11.3214, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.11122994652406418, |
| "grad_norm": 4783.39501953125, |
| "learning_rate": 8.626464421815918e-11, |
| "loss": 11.2677, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.11431966726084374, |
| "grad_norm": 4314.626953125, |
| "learning_rate": 8.082893680762619e-11, |
| "loss": 10.9266, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.1174093879976233, |
| "grad_norm": 4121.65478515625, |
| "learning_rate": 7.54514512859201e-11, |
| "loss": 10.7766, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.12049910873440285, |
| "grad_norm": 4387.51318359375, |
| "learning_rate": 7.014851889983058e-11, |
| "loss": 11.1114, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.12358882947118241, |
| "grad_norm": 4680.94140625, |
| "learning_rate": 6.493624448072457e-11, |
| "loss": 11.3017, |
| "step": 40 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 62, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 10, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.637517228998656e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|