{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 9760, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.23359630724948005, "eval_loss": 4.781733989715576, "eval_runtime": 7.3547, "eval_samples_per_second": 30.321, "eval_steps_per_second": 0.952, "step": 976 }, { "epoch": 1.0245901639344261, "grad_norm": 13917.33203125, "learning_rate": 0.0005993999999999999, "loss": 1.2461, "step": 1000 }, { "epoch": 2.0, "eval_accuracy": 0.2654778724561881, "eval_loss": 4.399641990661621, "eval_runtime": 6.6539, "eval_samples_per_second": 33.514, "eval_steps_per_second": 1.052, "step": 1952 }, { "epoch": 2.0491803278688523, "grad_norm": 10209.078125, "learning_rate": 0.0005315753424657533, "loss": 1.0362, "step": 2000 }, { "epoch": 3.0, "eval_accuracy": 0.28501224188919994, "eval_loss": 4.204015254974365, "eval_runtime": 6.7337, "eval_samples_per_second": 33.117, "eval_steps_per_second": 1.04, "step": 2928 }, { "epoch": 3.0737704918032787, "grad_norm": 9973.97265625, "learning_rate": 0.0004630821917808219, "loss": 0.9715, "step": 3000 }, { "epoch": 4.0, "eval_accuracy": 0.29392819846778934, "eval_loss": 4.097615718841553, "eval_runtime": 6.7811, "eval_samples_per_second": 32.886, "eval_steps_per_second": 1.032, "step": 3904 }, { "epoch": 4.098360655737705, "grad_norm": 10912.2734375, "learning_rate": 0.00039458904109589035, "loss": 0.9346, "step": 4000 }, { "epoch": 5.0, "eval_accuracy": 0.3033619123673795, "eval_loss": 4.011116027832031, "eval_runtime": 6.6583, "eval_samples_per_second": 33.492, "eval_steps_per_second": 1.051, "step": 4880 }, { "epoch": 5.122950819672131, "grad_norm": 9932.109375, "learning_rate": 0.0003260958904109589, "loss": 0.9069, "step": 5000 }, { "epoch": 6.0, "eval_accuracy": 0.3087500987249129, "eval_loss": 3.9493002891540527, "eval_runtime": 6.7082, "eval_samples_per_second": 33.243, "eval_steps_per_second": 1.043, "step": 5856 }, { "epoch": 6.147540983606557, "grad_norm": 9450.361328125, "learning_rate": 0.00025760273972602734, "loss": 0.8835, "step": 6000 }, { "epoch": 7.0, "eval_accuracy": 0.3171044202434337, "eval_loss": 3.8856163024902344, "eval_runtime": 6.6331, "eval_samples_per_second": 33.619, "eval_steps_per_second": 1.055, "step": 6832 }, { "epoch": 7.172131147540983, "grad_norm": 9493.173828125, "learning_rate": 0.00018910958904109587, "loss": 0.8622, "step": 7000 }, { "epoch": 8.0, "eval_accuracy": 0.32337893693013786, "eval_loss": 3.8232619762420654, "eval_runtime": 6.7593, "eval_samples_per_second": 32.991, "eval_steps_per_second": 1.036, "step": 7808 }, { "epoch": 8.19672131147541, "grad_norm": 9608.576171875, "learning_rate": 0.00012061643835616437, "loss": 0.8415, "step": 8000 }, { "epoch": 9.0, "eval_accuracy": 0.329653453616842, "eval_loss": 3.7797963619232178, "eval_runtime": 6.6962, "eval_samples_per_second": 33.303, "eval_steps_per_second": 1.045, "step": 8784 }, { "epoch": 9.221311475409836, "grad_norm": 9442.396484375, "learning_rate": 5.212328767123287e-05, "loss": 0.8265, "step": 9000 }, { "epoch": 10.0, "eval_accuracy": 0.3321018314568287, "eval_loss": 3.758513927459717, "eval_runtime": 6.7113, "eval_samples_per_second": 33.228, "eval_steps_per_second": 1.043, "step": 9760 }, { "epoch": 10.0, "step": 9760, "total_flos": 8.16015015936e+16, "train_loss": 0.9351713461954085, "train_runtime": 10046.3917, "train_samples_per_second": 31.086, "train_steps_per_second": 0.971 } ], "logging_steps": 1000, "max_steps": 9760, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.16015015936e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }