| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9813542688910697, |
| "eval_steps": 200, |
| "global_step": 3000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03271180896303565, |
| "grad_norm": 783188.3125, |
| "learning_rate": 1.934e-05, |
| "loss": 3.6293, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0654236179260713, |
| "grad_norm": 612990.25, |
| "learning_rate": 1.8673333333333333e-05, |
| "loss": 2.7233, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0654236179260713, |
| "eval_loss": 2.3727903366088867, |
| "eval_runtime": 114.6359, |
| "eval_samples_per_second": 23.71, |
| "eval_steps_per_second": 2.966, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09813542688910697, |
| "grad_norm": 648431.5, |
| "learning_rate": 1.8006666666666668e-05, |
| "loss": 2.452, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1308472358521426, |
| "grad_norm": 490992.59375, |
| "learning_rate": 1.734e-05, |
| "loss": 2.5093, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1308472358521426, |
| "eval_loss": 2.2560293674468994, |
| "eval_runtime": 114.4327, |
| "eval_samples_per_second": 23.752, |
| "eval_steps_per_second": 2.971, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.16355904481517827, |
| "grad_norm": 453563.65625, |
| "learning_rate": 1.6673333333333335e-05, |
| "loss": 2.4046, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.19627085377821393, |
| "grad_norm": 696398.625, |
| "learning_rate": 1.6006666666666667e-05, |
| "loss": 2.3773, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.19627085377821393, |
| "eval_loss": 2.187253475189209, |
| "eval_runtime": 114.7317, |
| "eval_samples_per_second": 23.69, |
| "eval_steps_per_second": 2.963, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2289826627412496, |
| "grad_norm": 451413.25, |
| "learning_rate": 1.5340000000000002e-05, |
| "loss": 2.3683, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.2616944717042852, |
| "grad_norm": 777109.4375, |
| "learning_rate": 1.4673333333333336e-05, |
| "loss": 2.3133, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2616944717042852, |
| "eval_loss": 2.1392085552215576, |
| "eval_runtime": 115.3261, |
| "eval_samples_per_second": 23.568, |
| "eval_steps_per_second": 2.948, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2944062806673209, |
| "grad_norm": 423084.59375, |
| "learning_rate": 1.400666666666667e-05, |
| "loss": 2.3798, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.32711808963035655, |
| "grad_norm": 472675.1875, |
| "learning_rate": 1.3340000000000001e-05, |
| "loss": 2.2885, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.32711808963035655, |
| "eval_loss": 2.110429286956787, |
| "eval_runtime": 114.6313, |
| "eval_samples_per_second": 23.711, |
| "eval_steps_per_second": 2.966, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3598298985933922, |
| "grad_norm": 351128.03125, |
| "learning_rate": 1.2673333333333335e-05, |
| "loss": 2.2975, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.39254170755642787, |
| "grad_norm": 395426.5, |
| "learning_rate": 1.2006666666666668e-05, |
| "loss": 2.3144, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.39254170755642787, |
| "eval_loss": 2.0805437564849854, |
| "eval_runtime": 114.2209, |
| "eval_samples_per_second": 23.796, |
| "eval_steps_per_second": 2.977, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.4252535165194635, |
| "grad_norm": 485672.09375, |
| "learning_rate": 1.134e-05, |
| "loss": 2.2471, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.4579653254824992, |
| "grad_norm": 499586.5, |
| "learning_rate": 1.0673333333333333e-05, |
| "loss": 2.2103, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.4579653254824992, |
| "eval_loss": 2.059140682220459, |
| "eval_runtime": 114.1443, |
| "eval_samples_per_second": 23.812, |
| "eval_steps_per_second": 2.979, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.49067713444553485, |
| "grad_norm": 511962.75, |
| "learning_rate": 1.0006666666666667e-05, |
| "loss": 2.2685, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5233889434085705, |
| "grad_norm": 462990.46875, |
| "learning_rate": 9.340000000000002e-06, |
| "loss": 2.2141, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.5233889434085705, |
| "eval_loss": 2.047919750213623, |
| "eval_runtime": 114.8218, |
| "eval_samples_per_second": 23.671, |
| "eval_steps_per_second": 2.961, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.5561007523716062, |
| "grad_norm": 420309.09375, |
| "learning_rate": 8.673333333333334e-06, |
| "loss": 2.2015, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.5888125613346418, |
| "grad_norm": 451439.71875, |
| "learning_rate": 8.006666666666667e-06, |
| "loss": 2.185, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.5888125613346418, |
| "eval_loss": 2.029125213623047, |
| "eval_runtime": 114.5201, |
| "eval_samples_per_second": 23.734, |
| "eval_steps_per_second": 2.969, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.6215243702976775, |
| "grad_norm": 456356.46875, |
| "learning_rate": 7.340000000000001e-06, |
| "loss": 2.2019, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.6542361792607131, |
| "grad_norm": 426280.46875, |
| "learning_rate": 6.6733333333333335e-06, |
| "loss": 2.2518, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.6542361792607131, |
| "eval_loss": 2.0237390995025635, |
| "eval_runtime": 114.8674, |
| "eval_samples_per_second": 23.662, |
| "eval_steps_per_second": 2.96, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.6869479882237488, |
| "grad_norm": 375687.84375, |
| "learning_rate": 6.006666666666667e-06, |
| "loss": 2.1483, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.7196597971867844, |
| "grad_norm": 610521.0, |
| "learning_rate": 5.3400000000000005e-06, |
| "loss": 2.1684, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.7196597971867844, |
| "eval_loss": 2.0064055919647217, |
| "eval_runtime": 114.7599, |
| "eval_samples_per_second": 23.684, |
| "eval_steps_per_second": 2.963, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.7523716061498201, |
| "grad_norm": 520093.5, |
| "learning_rate": 4.673333333333333e-06, |
| "loss": 2.1643, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.7850834151128557, |
| "grad_norm": 533737.6875, |
| "learning_rate": 4.006666666666667e-06, |
| "loss": 2.1538, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.7850834151128557, |
| "eval_loss": 2.0018444061279297, |
| "eval_runtime": 114.3517, |
| "eval_samples_per_second": 23.769, |
| "eval_steps_per_second": 2.973, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.8177952240758914, |
| "grad_norm": 479944.59375, |
| "learning_rate": 3.3400000000000006e-06, |
| "loss": 2.1533, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.850507033038927, |
| "grad_norm": 495037.4375, |
| "learning_rate": 2.6733333333333333e-06, |
| "loss": 2.1478, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.850507033038927, |
| "eval_loss": 1.9968267679214478, |
| "eval_runtime": 114.5945, |
| "eval_samples_per_second": 23.718, |
| "eval_steps_per_second": 2.967, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.8832188420019627, |
| "grad_norm": 409938.59375, |
| "learning_rate": 2.006666666666667e-06, |
| "loss": 2.1738, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.9159306509649984, |
| "grad_norm": 389876.75, |
| "learning_rate": 1.34e-06, |
| "loss": 2.1086, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.9159306509649984, |
| "eval_loss": 1.9919960498809814, |
| "eval_runtime": 115.1421, |
| "eval_samples_per_second": 23.606, |
| "eval_steps_per_second": 2.953, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.948642459928034, |
| "grad_norm": 340806.90625, |
| "learning_rate": 6.733333333333334e-07, |
| "loss": 2.1958, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.9813542688910697, |
| "grad_norm": 401251.53125, |
| "learning_rate": 6.666666666666667e-09, |
| "loss": 2.1662, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.9813542688910697, |
| "eval_loss": 1.988901972770691, |
| "eval_runtime": 115.013, |
| "eval_samples_per_second": 23.632, |
| "eval_steps_per_second": 2.956, |
| "step": 3000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 3000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.461498937344e+16, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|