{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.4534883720930232, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 1.9709302325581396e-05, "loss": 8.9999, "step": 10 }, { "epoch": 0.06, "learning_rate": 1.941860465116279e-05, "loss": 0.6596, "step": 20 }, { "epoch": 0.09, "learning_rate": 1.9127906976744188e-05, "loss": 0.4021, "step": 30 }, { "epoch": 0.12, "learning_rate": 1.8837209302325582e-05, "loss": 0.4213, "step": 40 }, { "epoch": 0.15, "learning_rate": 1.8546511627906977e-05, "loss": 0.5277, "step": 50 }, { "epoch": 0.17, "learning_rate": 1.825581395348837e-05, "loss": 0.3852, "step": 60 }, { "epoch": 0.2, "learning_rate": 1.796511627906977e-05, "loss": 0.3803, "step": 70 }, { "epoch": 0.23, "learning_rate": 1.7674418604651163e-05, "loss": 0.3951, "step": 80 }, { "epoch": 0.26, "learning_rate": 1.738372093023256e-05, "loss": 0.464, "step": 90 }, { "epoch": 0.29, "learning_rate": 1.7093023255813955e-05, "loss": 0.4053, "step": 100 }, { "epoch": 0.32, "learning_rate": 1.680232558139535e-05, "loss": 0.3543, "step": 110 }, { "epoch": 0.35, "learning_rate": 1.6511627906976747e-05, "loss": 0.3946, "step": 120 }, { "epoch": 0.38, "learning_rate": 1.6220930232558142e-05, "loss": 0.346, "step": 130 }, { "epoch": 0.41, "learning_rate": 1.5930232558139536e-05, "loss": 0.38, "step": 140 }, { "epoch": 0.44, "learning_rate": 1.563953488372093e-05, "loss": 0.3105, "step": 150 }, { "epoch": 0.47, "learning_rate": 1.5348837209302328e-05, "loss": 0.4101, "step": 160 }, { "epoch": 0.49, "learning_rate": 1.5058139534883723e-05, "loss": 0.3464, "step": 170 }, { "epoch": 0.52, "learning_rate": 1.4767441860465117e-05, "loss": 0.4718, "step": 180 }, { "epoch": 0.55, "learning_rate": 1.4476744186046511e-05, "loss": 0.3197, "step": 190 }, { "epoch": 0.58, "learning_rate": 1.4186046511627909e-05, "loss": 0.342, "step": 200 }, { "epoch": 0.61, "learning_rate": 1.3895348837209303e-05, "loss": 0.3305, "step": 210 }, { "epoch": 0.64, "learning_rate": 1.36046511627907e-05, "loss": 0.3919, "step": 220 }, { "epoch": 0.67, "learning_rate": 1.3313953488372094e-05, "loss": 0.3627, "step": 230 }, { "epoch": 0.7, "learning_rate": 1.302325581395349e-05, "loss": 0.4491, "step": 240 }, { "epoch": 0.73, "learning_rate": 1.2732558139534886e-05, "loss": 0.3236, "step": 250 }, { "epoch": 0.76, "learning_rate": 1.244186046511628e-05, "loss": 0.3039, "step": 260 }, { "epoch": 0.78, "learning_rate": 1.2151162790697674e-05, "loss": 0.3295, "step": 270 }, { "epoch": 0.81, "learning_rate": 1.1860465116279072e-05, "loss": 0.3124, "step": 280 }, { "epoch": 0.84, "learning_rate": 1.1569767441860467e-05, "loss": 0.3983, "step": 290 }, { "epoch": 0.87, "learning_rate": 1.1279069767441861e-05, "loss": 0.3162, "step": 300 }, { "epoch": 0.9, "learning_rate": 1.0988372093023257e-05, "loss": 0.262, "step": 310 }, { "epoch": 0.93, "learning_rate": 1.0697674418604651e-05, "loss": 0.3604, "step": 320 }, { "epoch": 0.96, "learning_rate": 1.0406976744186047e-05, "loss": 0.4004, "step": 330 }, { "epoch": 0.99, "learning_rate": 1.0116279069767443e-05, "loss": 0.3273, "step": 340 }, { "epoch": 1.02, "learning_rate": 9.825581395348838e-06, "loss": 0.2561, "step": 350 }, { "epoch": 1.05, "learning_rate": 9.534883720930234e-06, "loss": 0.2271, "step": 360 }, { "epoch": 1.08, "learning_rate": 9.244186046511628e-06, "loss": 0.2014, "step": 370 }, { "epoch": 1.1, "learning_rate": 8.953488372093024e-06, "loss": 0.1644, "step": 380 }, { "epoch": 1.13, "learning_rate": 8.662790697674419e-06, "loss": 0.2031, "step": 390 }, { "epoch": 1.16, "learning_rate": 8.372093023255815e-06, "loss": 0.1831, "step": 400 }, { "epoch": 1.19, "learning_rate": 8.08139534883721e-06, "loss": 0.1622, "step": 410 }, { "epoch": 1.22, "learning_rate": 7.790697674418605e-06, "loss": 0.2303, "step": 420 }, { "epoch": 1.25, "learning_rate": 7.500000000000001e-06, "loss": 0.1943, "step": 430 }, { "epoch": 1.28, "learning_rate": 7.209302325581395e-06, "loss": 0.1727, "step": 440 }, { "epoch": 1.31, "learning_rate": 6.9186046511627914e-06, "loss": 0.2055, "step": 450 }, { "epoch": 1.34, "learning_rate": 6.627906976744186e-06, "loss": 0.1705, "step": 460 }, { "epoch": 1.37, "learning_rate": 6.337209302325582e-06, "loss": 0.2098, "step": 470 }, { "epoch": 1.4, "learning_rate": 6.046511627906977e-06, "loss": 0.2049, "step": 480 }, { "epoch": 1.42, "learning_rate": 5.755813953488373e-06, "loss": 0.1899, "step": 490 }, { "epoch": 1.45, "learning_rate": 5.465116279069767e-06, "loss": 0.1808, "step": 500 } ], "max_steps": 688, "num_train_epochs": 2, "total_flos": 927778278998016.0, "trial_name": null, "trial_params": null }