{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 15969, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "learning_rate": 4.843446677938506e-05, "loss": 1.9037, "step": 500 }, { "epoch": 0.19, "learning_rate": 4.686893355877012e-05, "loss": 1.7424, "step": 1000 }, { "epoch": 0.28, "learning_rate": 4.530340033815518e-05, "loss": 1.6677, "step": 1500 }, { "epoch": 0.38, "learning_rate": 4.373786711754024e-05, "loss": 1.6062, "step": 2000 }, { "epoch": 0.47, "learning_rate": 4.2172333896925295e-05, "loss": 1.5738, "step": 2500 }, { "epoch": 0.56, "learning_rate": 4.060680067631035e-05, "loss": 1.5341, "step": 3000 }, { "epoch": 0.66, "learning_rate": 3.9041267455695416e-05, "loss": 1.4979, "step": 3500 }, { "epoch": 0.75, "learning_rate": 3.7475734235080466e-05, "loss": 1.4711, "step": 4000 }, { "epoch": 0.85, "learning_rate": 3.591020101446553e-05, "loss": 1.4475, "step": 4500 }, { "epoch": 0.94, "learning_rate": 3.434466779385059e-05, "loss": 1.4165, "step": 5000 }, { "epoch": 1.03, "learning_rate": 3.277913457323565e-05, "loss": 1.3643, "step": 5500 }, { "epoch": 1.13, "learning_rate": 3.12136013526207e-05, "loss": 1.286, "step": 6000 }, { "epoch": 1.22, "learning_rate": 2.9648068132005762e-05, "loss": 1.2671, "step": 6500 }, { "epoch": 1.32, "learning_rate": 2.8082534911390823e-05, "loss": 1.2576, "step": 7000 }, { "epoch": 1.41, "learning_rate": 2.6517001690775877e-05, "loss": 1.2502, "step": 7500 }, { "epoch": 1.5, "learning_rate": 2.4951468470160937e-05, "loss": 1.2262, "step": 8000 }, { "epoch": 1.6, "learning_rate": 2.3385935249545994e-05, "loss": 1.223, "step": 8500 }, { "epoch": 1.69, "learning_rate": 2.1820402028931055e-05, "loss": 1.214, "step": 9000 }, { "epoch": 1.78, "learning_rate": 2.0254868808316112e-05, "loss": 1.1949, "step": 9500 }, { "epoch": 1.88, "learning_rate": 1.868933558770117e-05, "loss": 1.1977, "step": 10000 }, { "epoch": 1.97, "learning_rate": 1.712380236708623e-05, "loss": 1.1708, "step": 10500 }, { "epoch": 2.07, "learning_rate": 1.5558269146471287e-05, "loss": 1.0994, "step": 11000 }, { "epoch": 2.16, "learning_rate": 1.3992735925856349e-05, "loss": 1.0833, "step": 11500 }, { "epoch": 2.25, "learning_rate": 1.2427202705241406e-05, "loss": 1.0865, "step": 12000 }, { "epoch": 2.35, "learning_rate": 1.0861669484626465e-05, "loss": 1.073, "step": 12500 }, { "epoch": 2.44, "learning_rate": 9.296136264011522e-06, "loss": 1.072, "step": 13000 }, { "epoch": 2.54, "learning_rate": 7.730603043396581e-06, "loss": 1.071, "step": 13500 }, { "epoch": 2.63, "learning_rate": 6.16506982278164e-06, "loss": 1.0711, "step": 14000 }, { "epoch": 2.72, "learning_rate": 4.599536602166698e-06, "loss": 1.0627, "step": 14500 }, { "epoch": 2.82, "learning_rate": 3.034003381551757e-06, "loss": 1.06, "step": 15000 }, { "epoch": 2.91, "learning_rate": 1.4684701609368152e-06, "loss": 1.0543, "step": 15500 }, { "epoch": 3.0, "step": 15969, "total_flos": 2.977670544895181e+16, "train_loss": 1.2911211150314672, "train_runtime": 9784.2831, "train_samples_per_second": 1.632, "train_steps_per_second": 1.632 } ], "max_steps": 15969, "num_train_epochs": 3, "total_flos": 2.977670544895181e+16, "trial_name": null, "trial_params": null }