{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.995245641838352, "eval_steps": 500, "global_step": 630, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "global_step": 20, "learning_rate": 3.1746031746031745e-05, "loss": 0.5195, "step": 20 }, { "epoch": 0.19, "global_step": 40, "learning_rate": 6.349206349206349e-05, "loss": 0.5055, "step": 40 }, { "epoch": 0.29, "global_step": 60, "learning_rate": 9.523809523809524e-05, "loss": 0.5483, "step": 60 }, { "epoch": 0.38, "global_step": 80, "learning_rate": 0.0001, "loss": 0.4659, "step": 80 }, { "epoch": 0.48, "global_step": 100, "learning_rate": 0.0001, "loss": 0.5625, "step": 100 }, { "epoch": 0.57, "global_step": 120, "learning_rate": 0.0001, "loss": 0.545, "step": 120 }, { "epoch": 0.67, "global_step": 140, "learning_rate": 0.0001, "loss": 0.4962, "step": 140 }, { "epoch": 0.76, "global_step": 160, "learning_rate": 0.0001, "loss": 0.5508, "step": 160 }, { "epoch": 0.86, "global_step": 180, "learning_rate": 0.0001, "loss": 0.5314, "step": 180 }, { "epoch": 0.95, "global_step": 200, "learning_rate": 0.0001, "loss": 0.5251, "step": 200 }, { "epoch": 1.05, "global_step": 220, "learning_rate": 0.0001, "loss": 0.347, "step": 220 }, { "epoch": 1.14, "global_step": 240, "learning_rate": 0.0001, "loss": 0.2006, "step": 240 }, { "epoch": 1.24, "global_step": 260, "learning_rate": 0.0001, "loss": 0.165, "step": 260 }, { "epoch": 1.33, "global_step": 280, "learning_rate": 0.0001, "loss": 0.2124, "step": 280 }, { "epoch": 1.43, "global_step": 300, "learning_rate": 0.0001, "loss": 0.2007, "step": 300 }, { "epoch": 1.52, "global_step": 320, "learning_rate": 0.0001, "loss": 0.1981, "step": 320 }, { "epoch": 1.62, "global_step": 340, "learning_rate": 0.0001, "loss": 0.2031, "step": 340 }, { "epoch": 1.71, "global_step": 360, "learning_rate": 0.0001, "loss": 0.1392, "step": 360 }, { "epoch": 1.81, "global_step": 380, "learning_rate": 0.0001, "loss": 0.204, "step": 380 }, { "epoch": 1.9, "global_step": 400, "learning_rate": 0.0001, "loss": 0.1626, "step": 400 }, { "epoch": 2.0, "global_step": 420, "learning_rate": 0.0001, "loss": 0.146, "step": 420 }, { "epoch": 2.09, "global_step": 440, "learning_rate": 0.0001, "loss": 0.0404, "step": 440 }, { "epoch": 2.19, "global_step": 460, "learning_rate": 0.0001, "loss": 0.0304, "step": 460 }, { "epoch": 2.28, "global_step": 480, "learning_rate": 0.0001, "loss": 0.0166, "step": 480 }, { "epoch": 2.38, "global_step": 500, "learning_rate": 0.0001, "loss": 0.0366, "step": 500 }, { "epoch": 2.47, "global_step": 520, "learning_rate": 0.0001, "loss": 0.018, "step": 520 }, { "epoch": 2.57, "global_step": 540, "learning_rate": 0.0001, "loss": 0.0186, "step": 540 }, { "epoch": 2.66, "global_step": 560, "learning_rate": 0.0001, "loss": 0.0308, "step": 560 }, { "epoch": 2.76, "global_step": 580, "learning_rate": 0.0001, "loss": 0.0536, "step": 580 }, { "epoch": 2.85, "global_step": 600, "learning_rate": 0.0001, "loss": 0.0563, "step": 600 }, { "epoch": 2.95, "global_step": 620, "learning_rate": 0.0001, "loss": 0.0242, "step": 620 }, { "epoch": 3.0, "step": 630, "total_flos": 3.402731466386309e+17, "train_loss": 0.24650317042592973, "train_runtime": 47469.8541, "train_samples_per_second": 0.159, "train_steps_per_second": 0.013 } ], "logging_steps": 20, "max_steps": 630, "num_train_epochs": 3, "save_steps": 1000, "total_flos": 3.402731466386309e+17, "trial_name": null, "trial_params": null }