{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9883495145631067, "eval_steps": 500, "global_step": 128, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 6.666666666666667e-06, "loss": 1.8233, "step": 2 }, { "epoch": 0.06, "learning_rate": 1.3333333333333333e-05, "loss": 1.9878, "step": 4 }, { "epoch": 0.09, "learning_rate": 2e-05, "loss": 1.9236, "step": 6 }, { "epoch": 0.12, "learning_rate": 1.999429490929718e-05, "loss": 2.0953, "step": 8 }, { "epoch": 0.16, "learning_rate": 1.9977186146800707e-05, "loss": 2.0993, "step": 10 }, { "epoch": 0.19, "learning_rate": 1.994869323391895e-05, "loss": 2.1089, "step": 12 }, { "epoch": 0.22, "learning_rate": 1.990884868158239e-05, "loss": 2.1726, "step": 14 }, { "epoch": 0.25, "learning_rate": 1.985769795314804e-05, "loss": 2.4714, "step": 16 }, { "epoch": 0.28, "learning_rate": 1.9795299412524948e-05, "loss": 1.8168, "step": 18 }, { "epoch": 0.31, "learning_rate": 1.9721724257579907e-05, "loss": 1.8571, "step": 20 }, { "epoch": 0.34, "learning_rate": 1.963705643889941e-05, "loss": 1.7876, "step": 22 }, { "epoch": 0.37, "learning_rate": 1.954139256400049e-05, "loss": 2.0869, "step": 24 }, { "epoch": 0.4, "learning_rate": 1.9434841787099804e-05, "loss": 2.1225, "step": 26 }, { "epoch": 0.43, "learning_rate": 1.9317525684566686e-05, "loss": 2.2122, "step": 28 }, { "epoch": 0.47, "learning_rate": 1.918957811620231e-05, "loss": 2.1554, "step": 30 }, { "epoch": 0.5, "learning_rate": 1.9051145072503216e-05, "loss": 2.5127, "step": 32 }, { "epoch": 0.53, "learning_rate": 1.8902384508083518e-05, "loss": 1.801, "step": 34 }, { "epoch": 0.56, "learning_rate": 1.8743466161445823e-05, "loss": 1.938, "step": 36 }, { "epoch": 0.59, "learning_rate": 1.857457136130651e-05, "loss": 2.0655, "step": 38 }, { "epoch": 0.62, "learning_rate": 1.839589281969639e-05, "loss": 1.9769, "step": 40 }, { "epoch": 0.65, "learning_rate": 1.8207634412072765e-05, "loss": 2.1012, "step": 42 }, { "epoch": 0.68, "learning_rate": 1.8010010944693846e-05, "loss": 2.12, "step": 44 }, { "epoch": 0.71, "learning_rate": 1.780324790952092e-05, "loss": 2.0332, "step": 46 }, { "epoch": 0.75, "learning_rate": 1.758758122692791e-05, "loss": 2.3212, "step": 48 }, { "epoch": 0.78, "learning_rate": 1.7363256976511972e-05, "loss": 1.8738, "step": 50 }, { "epoch": 0.81, "learning_rate": 1.7130531116312202e-05, "loss": 1.9489, "step": 52 }, { "epoch": 0.84, "learning_rate": 1.688966919075687e-05, "loss": 1.9085, "step": 54 }, { "epoch": 0.87, "learning_rate": 1.6640946027672395e-05, "loss": 1.8755, "step": 56 }, { "epoch": 0.9, "learning_rate": 1.6384645424699835e-05, "loss": 2.1313, "step": 58 }, { "epoch": 0.93, "learning_rate": 1.612105982547663e-05, "loss": 1.938, "step": 60 }, { "epoch": 0.96, "learning_rate": 1.5850489985953076e-05, "loss": 2.1761, "step": 62 }, { "epoch": 0.99, "learning_rate": 1.5573244631224364e-05, "loss": 2.1247, "step": 64 }, { "epoch": 1.03, "learning_rate": 1.5289640103269626e-05, "loss": 1.8631, "step": 66 }, { "epoch": 1.06, "learning_rate": 1.5000000000000002e-05, "loss": 1.8933, "step": 68 }, { "epoch": 1.09, "learning_rate": 1.4704654806027558e-05, "loss": 1.7232, "step": 70 }, { "epoch": 1.12, "learning_rate": 1.4403941515576344e-05, "loss": 1.9163, "step": 72 }, { "epoch": 1.15, "learning_rate": 1.4098203247965876e-05, "loss": 2.0748, "step": 74 }, { "epoch": 1.18, "learning_rate": 1.3787788856105762e-05, "loss": 1.9534, "step": 76 }, { "epoch": 1.21, "learning_rate": 1.3473052528448203e-05, "loss": 2.0542, "step": 78 }, { "epoch": 1.24, "learning_rate": 1.3154353384852559e-05, "loss": 1.9958, "step": 80 }, { "epoch": 1.27, "learning_rate": 1.283205506682304e-05, "loss": 1.7535, "step": 82 }, { "epoch": 1.3, "learning_rate": 1.2506525322587207e-05, "loss": 1.8274, "step": 84 }, { "epoch": 1.34, "learning_rate": 1.2178135587488515e-05, "loss": 1.8489, "step": 86 }, { "epoch": 1.37, "learning_rate": 1.1847260560171895e-05, "loss": 1.8733, "step": 88 }, { "epoch": 1.4, "learning_rate": 1.1514277775045768e-05, "loss": 1.9922, "step": 90 }, { "epoch": 1.43, "learning_rate": 1.1179567171508463e-05, "loss": 2.0302, "step": 92 }, { "epoch": 1.46, "learning_rate": 1.0843510660430447e-05, "loss": 1.8805, "step": 94 }, { "epoch": 1.49, "learning_rate": 1.0506491688387128e-05, "loss": 2.1553, "step": 96 }, { "epoch": 1.52, "learning_rate": 1.0168894800139311e-05, "loss": 1.9718, "step": 98 }, { "epoch": 1.55, "learning_rate": 9.83110519986069e-06, "loss": 1.8164, "step": 100 }, { "epoch": 1.58, "learning_rate": 9.493508311612874e-06, "loss": 1.8331, "step": 102 }, { "epoch": 1.62, "learning_rate": 9.156489339569555e-06, "loss": 1.9306, "step": 104 }, { "epoch": 1.65, "learning_rate": 8.820432828491542e-06, "loss": 1.9804, "step": 106 }, { "epoch": 1.68, "learning_rate": 8.485722224954237e-06, "loss": 2.1306, "step": 108 }, { "epoch": 1.71, "learning_rate": 8.15273943982811e-06, "loss": 2.1067, "step": 110 }, { "epoch": 1.74, "learning_rate": 7.821864412511485e-06, "loss": 1.9919, "step": 112 }, { "epoch": 1.77, "learning_rate": 7.493474677412795e-06, "loss": 1.9345, "step": 114 }, { "epoch": 1.8, "learning_rate": 7.16794493317696e-06, "loss": 1.9166, "step": 116 }, { "epoch": 1.83, "learning_rate": 6.845646615147445e-06, "loss": 1.998, "step": 118 }, { "epoch": 1.86, "learning_rate": 6.526947471551799e-06, "loss": 1.8984, "step": 120 }, { "epoch": 1.9, "learning_rate": 6.21221114389424e-06, "loss": 1.9596, "step": 122 }, { "epoch": 1.93, "learning_rate": 5.901796752034128e-06, "loss": 1.8888, "step": 124 }, { "epoch": 1.96, "learning_rate": 5.5960584844236565e-06, "loss": 2.1129, "step": 126 }, { "epoch": 1.99, "learning_rate": 5.295345193972445e-06, "loss": 2.0109, "step": 128 } ], "logging_steps": 2, "max_steps": 192, "num_train_epochs": 3, "save_steps": 500, "total_flos": 2.4420751512354816e+16, "trial_name": null, "trial_params": null }