| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 10100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.9900990099009901, | |
| "grad_norm": 10830.5537109375, | |
| "learning_rate": 0.0005993999999999999, | |
| "loss": 1.2778, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.17797619047619048, | |
| "eval_loss": 4.991881370544434, | |
| "eval_runtime": 6.3501, | |
| "eval_samples_per_second": 37.795, | |
| "eval_steps_per_second": 1.26, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.9801980198019802, | |
| "grad_norm": 12182.1923828125, | |
| "learning_rate": 0.0005341318681318681, | |
| "loss": 1.1301, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.20255218525766472, | |
| "eval_loss": 4.643928050994873, | |
| "eval_runtime": 5.6378, | |
| "eval_samples_per_second": 42.57, | |
| "eval_steps_per_second": 1.419, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 2.9702970297029703, | |
| "grad_norm": 11544.783203125, | |
| "learning_rate": 0.00046819780219780217, | |
| "loss": 1.0468, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.23992987606001304, | |
| "eval_loss": 4.329844951629639, | |
| "eval_runtime": 5.6063, | |
| "eval_samples_per_second": 42.809, | |
| "eval_steps_per_second": 1.427, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 3.9603960396039604, | |
| "grad_norm": 10567.1142578125, | |
| "learning_rate": 0.0004022637362637362, | |
| "loss": 0.9842, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.2685665362035225, | |
| "eval_loss": 4.115429878234863, | |
| "eval_runtime": 5.663, | |
| "eval_samples_per_second": 42.381, | |
| "eval_steps_per_second": 1.413, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 4.9504950495049505, | |
| "grad_norm": 10359.140625, | |
| "learning_rate": 0.0003363296703296703, | |
| "loss": 0.9369, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.2893183300717547, | |
| "eval_loss": 3.944108486175537, | |
| "eval_runtime": 5.7246, | |
| "eval_samples_per_second": 41.924, | |
| "eval_steps_per_second": 1.397, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 5.9405940594059405, | |
| "grad_norm": 10749.0302734375, | |
| "learning_rate": 0.00027039560439560435, | |
| "loss": 0.9021, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.30401174168297457, | |
| "eval_loss": 3.8409361839294434, | |
| "eval_runtime": 5.6946, | |
| "eval_samples_per_second": 42.145, | |
| "eval_steps_per_second": 1.405, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 6.930693069306931, | |
| "grad_norm": 10786.7880859375, | |
| "learning_rate": 0.00020446153846153845, | |
| "loss": 0.8748, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.3148401826484018, | |
| "eval_loss": 3.7550878524780273, | |
| "eval_runtime": 5.6957, | |
| "eval_samples_per_second": 42.137, | |
| "eval_steps_per_second": 1.405, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 7.920792079207921, | |
| "grad_norm": 11502.447265625, | |
| "learning_rate": 0.00013852747252747252, | |
| "loss": 0.851, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.323899217221135, | |
| "eval_loss": 3.684725284576416, | |
| "eval_runtime": 5.6021, | |
| "eval_samples_per_second": 42.841, | |
| "eval_steps_per_second": 1.428, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 8.910891089108912, | |
| "grad_norm": 11624.4814453125, | |
| "learning_rate": 7.25934065934066e-05, | |
| "loss": 0.8329, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.33088714938030006, | |
| "eval_loss": 3.6328799724578857, | |
| "eval_runtime": 5.5532, | |
| "eval_samples_per_second": 43.219, | |
| "eval_steps_per_second": 1.441, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 9.900990099009901, | |
| "grad_norm": 12129.1943359375, | |
| "learning_rate": 6.659340659340659e-06, | |
| "loss": 0.8156, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.3351108936725375, | |
| "eval_loss": 3.6097142696380615, | |
| "eval_runtime": 5.5755, | |
| "eval_samples_per_second": 43.045, | |
| "eval_steps_per_second": 1.435, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 10100, | |
| "total_flos": 8.444174598144e+16, | |
| "train_loss": 0.9637339843145691, | |
| "train_runtime": 10989.9202, | |
| "train_samples_per_second": 29.406, | |
| "train_steps_per_second": 0.919 | |
| } | |
| ], | |
| "logging_steps": 1000, | |
| "max_steps": 10100, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.444174598144e+16, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |