{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 1771, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.028232636928289104, "grad_norm": 1.75, "learning_rate": 4.537037037037038e-06, "loss": 0.7356, "step": 50 }, { "epoch": 0.05646527385657821, "grad_norm": 2.21875, "learning_rate": 4.991530674368419e-06, "loss": 0.6929, "step": 100 }, { "epoch": 0.08469791078486731, "grad_norm": 1.6953125, "learning_rate": 4.962327657422145e-06, "loss": 0.6668, "step": 150 }, { "epoch": 0.11293054771315642, "grad_norm": 1.3515625, "learning_rate": 4.91253057525732e-06, "loss": 0.6428, "step": 200 }, { "epoch": 0.1411631846414455, "grad_norm": 1.546875, "learning_rate": 4.8425559136144585e-06, "loss": 0.662, "step": 250 }, { "epoch": 0.16939582156973462, "grad_norm": 1.8046875, "learning_rate": 4.752988916598333e-06, "loss": 0.644, "step": 300 }, { "epoch": 0.1976284584980237, "grad_norm": 1.6953125, "learning_rate": 4.644578691896712e-06, "loss": 0.6516, "step": 350 }, { "epoch": 0.22586109542631283, "grad_norm": 2.109375, "learning_rate": 4.518231945500217e-06, "loss": 0.6616, "step": 400 }, { "epoch": 0.2540937323546019, "grad_norm": 1.7421875, "learning_rate": 4.375005398323957e-06, "loss": 0.6432, "step": 450 }, { "epoch": 0.282326369282891, "grad_norm": 1.65625, "learning_rate": 4.216096948155693e-06, "loss": 0.652, "step": 500 }, { "epoch": 0.3105590062111801, "grad_norm": 2.125, "learning_rate": 4.042835650849013e-06, "loss": 0.6376, "step": 550 }, { "epoch": 0.33879164313946925, "grad_norm": 2.0625, "learning_rate": 3.856670604555346e-06, "loss": 0.6358, "step": 600 }, { "epoch": 0.36702428006775834, "grad_norm": 1.7421875, "learning_rate": 3.6591588299633184e-06, "loss": 0.6363, "step": 650 }, { "epoch": 0.3952569169960474, "grad_norm": 1.578125, "learning_rate": 3.4519522479109736e-06, "loss": 0.6386, "step": 700 }, { "epoch": 0.4234895539243365, "grad_norm": 3.453125, "learning_rate": 3.236783863285632e-06, "loss": 0.6448, "step": 750 }, { "epoch": 0.45172219085262566, "grad_norm": 2.1875, "learning_rate": 3.015453270764544e-06, "loss": 0.63, "step": 800 }, { "epoch": 0.47995482778091475, "grad_norm": 2.5625, "learning_rate": 2.7898116036213417e-06, "loss": 0.6377, "step": 850 }, { "epoch": 0.5081874647092038, "grad_norm": 2.640625, "learning_rate": 2.5617460514813238e-06, "loss": 0.638, "step": 900 }, { "epoch": 0.536420101637493, "grad_norm": 2.0625, "learning_rate": 2.333164076513759e-06, "loss": 0.6339, "step": 950 }, { "epoch": 0.564652738565782, "grad_norm": 2.125, "learning_rate": 2.105977460071578e-06, "loss": 0.6215, "step": 1000 }, { "epoch": 0.5928853754940712, "grad_norm": 1.6171875, "learning_rate": 1.8820863132068939e-06, "loss": 0.6411, "step": 1050 }, { "epoch": 0.6211180124223602, "grad_norm": 1.140625, "learning_rate": 1.663363184792921e-06, "loss": 0.6317, "step": 1100 }, { "epoch": 0.6493506493506493, "grad_norm": 1.7890625, "learning_rate": 1.4516374001665143e-06, "loss": 0.6287, "step": 1150 }, { "epoch": 0.6775832862789385, "grad_norm": 1.203125, "learning_rate": 1.2486797612775693e-06, "loss": 0.6498, "step": 1200 }, { "epoch": 0.7058159232072275, "grad_norm": 1.59375, "learning_rate": 1.0561877363079853e-06, "loss": 0.6423, "step": 1250 }, { "epoch": 0.7340485601355167, "grad_norm": 2.078125, "learning_rate": 8.75771262629145e-07, "loss": 0.6316, "step": 1300 }, { "epoch": 0.7622811970638057, "grad_norm": 1.1171875, "learning_rate": 7.089392818371207e-07, "loss": 0.6361, "step": 1350 }, { "epoch": 0.7905138339920948, "grad_norm": 1.859375, "learning_rate": 5.570871194819521e-07, "loss": 0.6243, "step": 1400 }, { "epoch": 0.818746470920384, "grad_norm": 3.125, "learning_rate": 4.2148481504262316e-07, "loss": 0.6408, "step": 1450 }, { "epoch": 0.846979107848673, "grad_norm": 2.921875, "learning_rate": 3.0326649975182866e-07, "loss": 0.6262, "step": 1500 }, { "epoch": 0.8752117447769622, "grad_norm": 1.875, "learning_rate": 2.034209111107635e-07, "loss": 0.6414, "step": 1550 }, { "epoch": 0.9034443817052513, "grad_norm": 1.375, "learning_rate": 1.2278312342729415e-07, "loss": 0.6341, "step": 1600 }, { "epoch": 0.9316770186335404, "grad_norm": 2.359375, "learning_rate": 6.202756354044327e-08, "loss": 0.6351, "step": 1650 }, { "epoch": 0.9599096555618295, "grad_norm": 1.5859375, "learning_rate": 2.1662370145288935e-08, "loss": 0.6261, "step": 1700 }, { "epoch": 0.9881422924901185, "grad_norm": 1.3359375, "learning_rate": 2.025143894951176e-09, "loss": 0.6328, "step": 1750 } ], "logging_steps": 50, "max_steps": 1771, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.698468202388193e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }