{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.8867924528301887, "eval_steps": 20, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.018867924528301886, "eval_loss": 1.91287362575531, "eval_runtime": 19.9916, "eval_samples_per_second": 75.082, "eval_steps_per_second": 4.702, "step": 1 }, { "epoch": 0.18867924528301888, "grad_norm": 1.6953125, "learning_rate": 6.666666666666667e-05, "loss": 1.7788, "step": 10 }, { "epoch": 0.37735849056603776, "grad_norm": 1.1875, "learning_rate": 0.00013333333333333334, "loss": 1.6758, "step": 20 }, { "epoch": 0.37735849056603776, "eval_loss": 1.5468029975891113, "eval_runtime": 19.2466, "eval_samples_per_second": 77.988, "eval_steps_per_second": 4.884, "step": 20 }, { "epoch": 0.5660377358490566, "grad_norm": 1.3515625, "learning_rate": 0.0002, "loss": 1.7428, "step": 30 }, { "epoch": 0.7547169811320755, "grad_norm": 0.9609375, "learning_rate": 0.00019984815164333163, "loss": 1.7482, "step": 40 }, { "epoch": 0.7547169811320755, "eval_loss": 1.5930224657058716, "eval_runtime": 20.8935, "eval_samples_per_second": 71.84, "eval_steps_per_second": 4.499, "step": 40 }, { "epoch": 0.9433962264150944, "grad_norm": 1.0078125, "learning_rate": 0.00019939306773179497, "loss": 1.7373, "step": 50 }, { "epoch": 1.1320754716981132, "grad_norm": 1.015625, "learning_rate": 0.00019863613034027224, "loss": 1.3262, "step": 60 }, { "epoch": 1.1320754716981132, "eval_loss": 1.694644570350647, "eval_runtime": 19.8369, "eval_samples_per_second": 75.667, "eval_steps_per_second": 4.739, "step": 60 }, { "epoch": 1.320754716981132, "grad_norm": 0.8671875, "learning_rate": 0.00019757963826274357, "loss": 1.2568, "step": 70 }, { "epoch": 1.509433962264151, "grad_norm": 0.90234375, "learning_rate": 0.00019622680003092503, "loss": 1.072, "step": 80 }, { "epoch": 1.509433962264151, "eval_loss": 1.7398728132247925, "eval_runtime": 28.2186, "eval_samples_per_second": 53.192, "eval_steps_per_second": 3.331, "step": 80 }, { "epoch": 1.6981132075471699, "grad_norm": 0.85546875, "learning_rate": 0.00019458172417006347, "loss": 1.0807, "step": 90 }, { "epoch": 1.8867924528301887, "grad_norm": 0.80859375, "learning_rate": 0.00019264940672148018, "loss": 1.3163, "step": 100 }, { "epoch": 1.8867924528301887, "eval_loss": 1.7708204984664917, "eval_runtime": 19.9313, "eval_samples_per_second": 75.309, "eval_steps_per_second": 4.716, "step": 100 } ], "logging_steps": 10, "max_steps": 600, "num_input_tokens_seen": 0, "num_train_epochs": 12, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.313993814455091e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }