{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 9980, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.22615130876182502, "eval_loss": 4.766125679016113, "eval_runtime": 6.8593, "eval_samples_per_second": 33.385, "eval_steps_per_second": 1.166, "step": 998 }, { "epoch": 1.002004008016032, "grad_norm": 11396.599609375, "learning_rate": 0.0005993999999999999, "loss": 1.2399, "step": 1000 }, { "epoch": 2.0, "eval_accuracy": 0.26749502217588594, "eval_loss": 4.338047504425049, "eval_runtime": 6.3687, "eval_samples_per_second": 35.957, "eval_steps_per_second": 1.256, "step": 1996 }, { "epoch": 2.004008016032064, "grad_norm": 10861.8779296875, "learning_rate": 0.0005332516703786191, "loss": 1.0378, "step": 2000 }, { "epoch": 3.0, "eval_accuracy": 0.2781343200676813, "eval_loss": 4.19285249710083, "eval_runtime": 5.938, "eval_samples_per_second": 38.565, "eval_steps_per_second": 1.347, "step": 2994 }, { "epoch": 3.006012024048096, "grad_norm": 10864.8798828125, "learning_rate": 0.00046643652561247214, "loss": 0.9739, "step": 3000 }, { "epoch": 4.0, "eval_accuracy": 0.286568847793948, "eval_loss": 4.103455066680908, "eval_runtime": 6.2421, "eval_samples_per_second": 36.687, "eval_steps_per_second": 1.282, "step": 3992 }, { "epoch": 4.008016032064128, "grad_norm": 10894.2529296875, "learning_rate": 0.00039962138084632514, "loss": 0.9423, "step": 4000 }, { "epoch": 5.0, "eval_accuracy": 0.29784906724549004, "eval_loss": 4.001025676727295, "eval_runtime": 6.0982, "eval_samples_per_second": 37.552, "eval_steps_per_second": 1.312, "step": 4990 }, { "epoch": 5.01002004008016, "grad_norm": 11722.3232421875, "learning_rate": 0.00033280623608017814, "loss": 0.9155, "step": 5000 }, { "epoch": 6.0, "eval_accuracy": 0.30403609670224496, "eval_loss": 3.937136650085449, "eval_runtime": 6.0876, "eval_samples_per_second": 37.618, "eval_steps_per_second": 1.314, "step": 5988 }, { "epoch": 6.012024048096192, "grad_norm": 10417.4404296875, "learning_rate": 0.0002659910913140312, "loss": 0.8943, "step": 6000 }, { "epoch": 7.0, "eval_accuracy": 0.3146412121108538, "eval_loss": 3.859510898590088, "eval_runtime": 6.1124, "eval_samples_per_second": 37.465, "eval_steps_per_second": 1.309, "step": 6986 }, { "epoch": 7.014028056112225, "grad_norm": 10313.73828125, "learning_rate": 0.00019917594654788416, "loss": 0.8736, "step": 7000 }, { "epoch": 8.0, "eval_accuracy": 0.32122988574504996, "eval_loss": 3.8007497787475586, "eval_runtime": 6.1187, "eval_samples_per_second": 37.426, "eval_steps_per_second": 1.307, "step": 7984 }, { "epoch": 8.016032064128256, "grad_norm": 9752.1123046875, "learning_rate": 0.00013236080178173718, "loss": 0.8539, "step": 8000 }, { "epoch": 9.0, "eval_accuracy": 0.32736564147702507, "eval_loss": 3.7534029483795166, "eval_runtime": 4.519, "eval_samples_per_second": 50.675, "eval_steps_per_second": 1.77, "step": 8982 }, { "epoch": 9.01803607214429, "grad_norm": 9999.22265625, "learning_rate": 6.554565701559019e-05, "loss": 0.8352, "step": 9000 }, { "epoch": 10.0, "eval_accuracy": 0.3312282620770986, "eval_loss": 3.730226755142212, "eval_runtime": 6.1123, "eval_samples_per_second": 37.465, "eval_steps_per_second": 1.309, "step": 9980 }, { "epoch": 10.0, "step": 9980, "total_flos": 8.34436104192e+16, "train_loss": 0.9392847890605429, "train_runtime": 9744.779, "train_samples_per_second": 32.771, "train_steps_per_second": 1.024 } ], "logging_steps": 1000, "max_steps": 9980, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.34436104192e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }