{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.997624703087886, "eval_steps": 500, "global_step": 210, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "global_step": 10, "learning_rate": 4.761904761904762e-05, "loss": 0.5879, "step": 10 }, { "epoch": 0.1, "global_step": 20, "learning_rate": 9.523809523809524e-05, "loss": 0.506, "step": 20 }, { "epoch": 0.14, "global_step": 30, "learning_rate": 0.0001, "loss": 0.5252, "step": 30 }, { "epoch": 0.19, "global_step": 40, "learning_rate": 0.0001, "loss": 0.5572, "step": 40 }, { "epoch": 0.24, "global_step": 50, "learning_rate": 0.0001, "loss": 0.4937, "step": 50 }, { "epoch": 0.29, "global_step": 60, "learning_rate": 0.0001, "loss": 0.4925, "step": 60 }, { "epoch": 0.33, "global_step": 70, "learning_rate": 0.0001, "loss": 0.4309, "step": 70 }, { "epoch": 0.38, "global_step": 80, "learning_rate": 0.0001, "loss": 0.4831, "step": 80 }, { "epoch": 0.43, "global_step": 90, "learning_rate": 0.0001, "loss": 0.4896, "step": 90 }, { "epoch": 0.48, "global_step": 100, "learning_rate": 0.0001, "loss": 0.4257, "step": 100 }, { "epoch": 0.52, "global_step": 110, "learning_rate": 0.0001, "loss": 0.5, "step": 110 }, { "epoch": 0.57, "global_step": 120, "learning_rate": 0.0001, "loss": 0.4954, "step": 120 }, { "epoch": 0.62, "global_step": 130, "learning_rate": 0.0001, "loss": 0.4691, "step": 130 }, { "epoch": 0.67, "global_step": 140, "learning_rate": 0.0001, "loss": 0.4373, "step": 140 }, { "epoch": 0.71, "global_step": 150, "learning_rate": 0.0001, "loss": 0.526, "step": 150 }, { "epoch": 0.76, "global_step": 160, "learning_rate": 0.0001, "loss": 0.4297, "step": 160 }, { "epoch": 0.81, "global_step": 170, "learning_rate": 0.0001, "loss": 0.4708, "step": 170 }, { "epoch": 0.86, "global_step": 180, "learning_rate": 0.0001, "loss": 0.4872, "step": 180 }, { "epoch": 0.9, "global_step": 190, "learning_rate": 0.0001, "loss": 0.4888, "step": 190 }, { "epoch": 0.95, "global_step": 200, "learning_rate": 0.0001, "loss": 0.4754, "step": 200 }, { "epoch": 1.0, "global_step": 210, "learning_rate": 0.0001, "loss": 0.4733, "step": 210 }, { "epoch": 1.0, "step": 210, "total_flos": 1.1368810483705446e+17, "train_loss": 0.4878490357171921, "train_runtime": 6234.3462, "train_samples_per_second": 0.405, "train_steps_per_second": 0.034 } ], "logging_steps": 10, "max_steps": 210, "num_train_epochs": 1, "save_steps": 100, "total_flos": 1.1368810483705446e+17, "trial_name": null, "trial_params": null }