{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.0, "eval_steps": 500, "global_step": 96, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 6.666666666666667e-06, "loss": 0.9785, "step": 1 }, { "epoch": 0.12, "learning_rate": 1.3333333333333333e-05, "loss": 1.0381, "step": 2 }, { "epoch": 0.19, "learning_rate": 2e-05, "loss": 0.5466, "step": 3 }, { "epoch": 0.25, "learning_rate": 1.999429490929718e-05, "loss": 2.5332, "step": 4 }, { "epoch": 0.31, "learning_rate": 1.9977186146800707e-05, "loss": 0.4666, "step": 5 }, { "epoch": 0.38, "learning_rate": 1.994869323391895e-05, "loss": 0.2064, "step": 6 }, { "epoch": 0.44, "learning_rate": 1.990884868158239e-05, "loss": 0.2202, "step": 7 }, { "epoch": 0.5, "learning_rate": 1.985769795314804e-05, "loss": 0.1354, "step": 8 }, { "epoch": 0.56, "learning_rate": 1.9795299412524948e-05, "loss": 0.2023, "step": 9 }, { "epoch": 0.62, "learning_rate": 1.9721724257579907e-05, "loss": 0.1093, "step": 10 }, { "epoch": 0.69, "learning_rate": 1.963705643889941e-05, "loss": 0.126, "step": 11 }, { "epoch": 0.75, "learning_rate": 1.954139256400049e-05, "loss": 0.1307, "step": 12 }, { "epoch": 0.81, "learning_rate": 1.9434841787099804e-05, "loss": 0.1108, "step": 13 }, { "epoch": 0.88, "learning_rate": 1.9317525684566686e-05, "loss": 0.1157, "step": 14 }, { "epoch": 0.94, "learning_rate": 1.918957811620231e-05, "loss": 0.1541, "step": 15 }, { "epoch": 1.0, "learning_rate": 1.9051145072503216e-05, "loss": 0.1068, "step": 16 }, { "epoch": 1.06, "learning_rate": 1.8902384508083518e-05, "loss": 0.1263, "step": 17 }, { "epoch": 1.12, "learning_rate": 1.8743466161445823e-05, "loss": 0.1382, "step": 18 }, { "epoch": 1.19, "learning_rate": 1.857457136130651e-05, "loss": 0.1142, "step": 19 }, { "epoch": 1.25, "learning_rate": 1.839589281969639e-05, "loss": 0.1077, "step": 20 }, { "epoch": 1.31, "learning_rate": 1.8207634412072765e-05, "loss": 0.116, "step": 21 }, { "epoch": 1.38, "learning_rate": 1.8010010944693846e-05, "loss": 0.1183, "step": 22 }, { "epoch": 1.44, "learning_rate": 1.780324790952092e-05, "loss": 0.1205, "step": 23 }, { "epoch": 1.5, "learning_rate": 1.758758122692791e-05, "loss": 0.1078, "step": 24 }, { "epoch": 1.56, "learning_rate": 1.7363256976511972e-05, "loss": 0.1143, "step": 25 }, { "epoch": 1.62, "learning_rate": 1.7130531116312202e-05, "loss": 0.1083, "step": 26 }, { "epoch": 1.69, "learning_rate": 1.688966919075687e-05, "loss": 0.1096, "step": 27 }, { "epoch": 1.75, "learning_rate": 1.6640946027672395e-05, "loss": 0.0986, "step": 28 }, { "epoch": 1.81, "learning_rate": 1.6384645424699835e-05, "loss": 0.11, "step": 29 }, { "epoch": 1.88, "learning_rate": 1.612105982547663e-05, "loss": 0.0924, "step": 30 }, { "epoch": 1.94, "learning_rate": 1.5850489985953076e-05, "loss": 0.1262, "step": 31 }, { "epoch": 2.0, "learning_rate": 1.5573244631224364e-05, "loss": 0.0916, "step": 32 }, { "epoch": 2.06, "learning_rate": 1.5289640103269626e-05, "loss": 0.0937, "step": 33 }, { "epoch": 2.12, "learning_rate": 1.5000000000000002e-05, "loss": 0.1017, "step": 34 }, { "epoch": 2.19, "learning_rate": 1.4704654806027558e-05, "loss": 0.0928, "step": 35 }, { "epoch": 2.25, "learning_rate": 1.4403941515576344e-05, "loss": 0.1088, "step": 36 }, { "epoch": 2.31, "learning_rate": 1.4098203247965876e-05, "loss": 0.1034, "step": 37 }, { "epoch": 2.38, "learning_rate": 1.3787788856105762e-05, "loss": 0.1024, "step": 38 }, { "epoch": 2.44, "learning_rate": 1.3473052528448203e-05, "loss": 0.0874, "step": 39 }, { "epoch": 2.5, "learning_rate": 1.3154353384852559e-05, "loss": 0.097, "step": 40 }, { "epoch": 2.56, "learning_rate": 1.283205506682304e-05, "loss": 0.0948, "step": 41 }, { "epoch": 2.62, "learning_rate": 1.2506525322587207e-05, "loss": 0.0788, "step": 42 }, { "epoch": 2.69, "learning_rate": 1.2178135587488515e-05, "loss": 0.087, "step": 43 }, { "epoch": 2.75, "learning_rate": 1.1847260560171895e-05, "loss": 0.0866, "step": 44 }, { "epoch": 2.81, "learning_rate": 1.1514277775045768e-05, "loss": 0.0992, "step": 45 }, { "epoch": 2.88, "learning_rate": 1.1179567171508463e-05, "loss": 0.0916, "step": 46 }, { "epoch": 2.94, "learning_rate": 1.0843510660430447e-05, "loss": 0.0786, "step": 47 }, { "epoch": 3.0, "learning_rate": 1.0506491688387128e-05, "loss": 0.0915, "step": 48 }, { "epoch": 3.06, "learning_rate": 1.0168894800139311e-05, "loss": 0.0815, "step": 49 }, { "epoch": 3.12, "learning_rate": 9.83110519986069e-06, "loss": 0.0762, "step": 50 }, { "epoch": 3.19, "learning_rate": 9.493508311612874e-06, "loss": 0.0908, "step": 51 }, { "epoch": 3.25, "learning_rate": 9.156489339569555e-06, "loss": 0.0753, "step": 52 }, { "epoch": 3.31, "learning_rate": 8.820432828491542e-06, "loss": 0.0576, "step": 53 }, { "epoch": 3.38, "learning_rate": 8.485722224954237e-06, "loss": 0.0742, "step": 54 }, { "epoch": 3.44, "learning_rate": 8.15273943982811e-06, "loss": 0.0976, "step": 55 }, { "epoch": 3.5, "learning_rate": 7.821864412511485e-06, "loss": 0.0964, "step": 56 }, { "epoch": 3.56, "learning_rate": 7.493474677412795e-06, "loss": 0.0894, "step": 57 }, { "epoch": 3.62, "learning_rate": 7.16794493317696e-06, "loss": 0.0877, "step": 58 }, { "epoch": 3.69, "learning_rate": 6.845646615147445e-06, "loss": 0.0871, "step": 59 }, { "epoch": 3.75, "learning_rate": 6.526947471551799e-06, "loss": 0.0714, "step": 60 }, { "epoch": 3.81, "learning_rate": 6.21221114389424e-06, "loss": 0.0806, "step": 61 }, { "epoch": 3.88, "learning_rate": 5.901796752034128e-06, "loss": 0.0879, "step": 62 }, { "epoch": 3.94, "learning_rate": 5.5960584844236565e-06, "loss": 0.0731, "step": 63 }, { "epoch": 4.0, "learning_rate": 5.295345193972445e-06, "loss": 0.0867, "step": 64 }, { "epoch": 4.06, "learning_rate": 5.000000000000003e-06, "loss": 0.0633, "step": 65 }, { "epoch": 4.12, "learning_rate": 4.710359896730379e-06, "loss": 0.0632, "step": 66 }, { "epoch": 4.19, "learning_rate": 4.426755368775637e-06, "loss": 0.0824, "step": 67 }, { "epoch": 4.25, "learning_rate": 4.149510014046922e-06, "loss": 0.0883, "step": 68 }, { "epoch": 4.31, "learning_rate": 3.878940174523371e-06, "loss": 0.0598, "step": 69 }, { "epoch": 4.38, "learning_rate": 3.6153545753001663e-06, "loss": 0.0612, "step": 70 }, { "epoch": 4.44, "learning_rate": 3.3590539723276083e-06, "loss": 0.0597, "step": 71 }, { "epoch": 4.5, "learning_rate": 3.110330809243134e-06, "loss": 0.0798, "step": 72 }, { "epoch": 4.56, "learning_rate": 2.869468883687798e-06, "loss": 0.0694, "step": 73 }, { "epoch": 4.62, "learning_rate": 2.6367430234880286e-06, "loss": 0.0791, "step": 74 }, { "epoch": 4.69, "learning_rate": 2.4124187730720916e-06, "loss": 0.0692, "step": 75 }, { "epoch": 4.75, "learning_rate": 2.196752090479083e-06, "loss": 0.0687, "step": 76 }, { "epoch": 4.81, "learning_rate": 1.9899890553061565e-06, "loss": 0.0582, "step": 77 }, { "epoch": 4.88, "learning_rate": 1.7923655879272395e-06, "loss": 0.056, "step": 78 }, { "epoch": 4.94, "learning_rate": 1.60410718030361e-06, "loss": 0.0554, "step": 79 }, { "epoch": 5.0, "learning_rate": 1.425428638693489e-06, "loss": 0.0484, "step": 80 }, { "epoch": 5.06, "learning_rate": 1.2565338385541792e-06, "loss": 0.0637, "step": 81 }, { "epoch": 5.12, "learning_rate": 1.097615491916485e-06, "loss": 0.0595, "step": 82 }, { "epoch": 5.19, "learning_rate": 9.488549274967873e-07, "loss": 0.0533, "step": 83 }, { "epoch": 5.25, "learning_rate": 8.10421883797694e-07, "loss": 0.0834, "step": 84 }, { "epoch": 5.31, "learning_rate": 6.824743154333157e-07, "loss": 0.0598, "step": 85 }, { "epoch": 5.38, "learning_rate": 5.651582129001987e-07, "loss": 0.0504, "step": 86 }, { "epoch": 5.44, "learning_rate": 4.5860743599951186e-07, "loss": 0.0688, "step": 87 }, { "epoch": 5.5, "learning_rate": 3.629435611005916e-07, "loss": 0.0621, "step": 88 }, { "epoch": 5.56, "learning_rate": 2.7827574242009434e-07, "loss": 0.0482, "step": 89 }, { "epoch": 5.62, "learning_rate": 2.0470058747505516e-07, "loss": 0.0515, "step": 90 }, { "epoch": 5.69, "learning_rate": 1.4230204685196202e-07, "loss": 0.0695, "step": 91 }, { "epoch": 5.75, "learning_rate": 9.11513184176116e-08, "loss": 0.051, "step": 92 }, { "epoch": 5.81, "learning_rate": 5.1306766081048456e-08, "loss": 0.0379, "step": 93 }, { "epoch": 5.88, "learning_rate": 2.2813853199292745e-08, "loss": 0.0565, "step": 94 }, { "epoch": 5.94, "learning_rate": 5.705090702819993e-09, "loss": 0.0524, "step": 95 }, { "epoch": 6.0, "learning_rate": 0.0, "loss": 0.0478, "step": 96 }, { "epoch": 6.0, "step": 96, "total_flos": 2277230247936.0, "train_loss": 0.14340813954671225, "train_runtime": 691.2623, "train_samples_per_second": 8.515, "train_steps_per_second": 0.139 } ], "logging_steps": 1.0, "max_steps": 96, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 800, "total_flos": 2277230247936.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }