{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.13801035077630822, "eval_steps": 500, "global_step": 60, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0023001725129384704, "grad_norm": 1.1044824123382568, "learning_rate": 4e-05, "loss": 1.478, "step": 1 }, { "epoch": 0.004600345025876941, "grad_norm": 1.002670407295227, "learning_rate": 8e-05, "loss": 1.3821, "step": 2 }, { "epoch": 0.006900517538815411, "grad_norm": 1.0559437274932861, "learning_rate": 0.00012, "loss": 1.3293, "step": 3 }, { "epoch": 0.009200690051753882, "grad_norm": 0.7474594116210938, "learning_rate": 0.00016, "loss": 1.3018, "step": 4 }, { "epoch": 0.011500862564692352, "grad_norm": 0.844304621219635, "learning_rate": 0.0002, "loss": 1.3377, "step": 5 }, { "epoch": 0.013801035077630822, "grad_norm": 0.6108690500259399, "learning_rate": 0.00019636363636363636, "loss": 1.3059, "step": 6 }, { "epoch": 0.016101207590569294, "grad_norm": 0.7801899909973145, "learning_rate": 0.00019272727272727274, "loss": 1.1677, "step": 7 }, { "epoch": 0.018401380103507763, "grad_norm": 0.7782777547836304, "learning_rate": 0.0001890909090909091, "loss": 1.271, "step": 8 }, { "epoch": 0.020701552616446232, "grad_norm": 0.653756320476532, "learning_rate": 0.00018545454545454545, "loss": 1.0682, "step": 9 }, { "epoch": 0.023001725129384705, "grad_norm": 0.5281978249549866, "learning_rate": 0.00018181818181818183, "loss": 1.1964, "step": 10 }, { "epoch": 0.025301897642323174, "grad_norm": 0.5355067253112793, "learning_rate": 0.0001781818181818182, "loss": 1.0085, "step": 11 }, { "epoch": 0.027602070155261643, "grad_norm": 0.47657492756843567, "learning_rate": 0.00017454545454545454, "loss": 0.9768, "step": 12 }, { "epoch": 0.029902242668200116, "grad_norm": 0.5171030759811401, "learning_rate": 0.0001709090909090909, "loss": 0.9436, "step": 13 }, { "epoch": 0.03220241518113859, "grad_norm": 0.467006117105484, "learning_rate": 0.00016727272727272728, "loss": 1.1535, "step": 14 }, { "epoch": 0.034502587694077054, "grad_norm": 0.5699396729469299, "learning_rate": 0.00016363636363636366, "loss": 0.9282, "step": 15 }, { "epoch": 0.03680276020701553, "grad_norm": 0.511089026927948, "learning_rate": 0.00016, "loss": 1.0179, "step": 16 }, { "epoch": 0.039102932719954, "grad_norm": 0.5324010252952576, "learning_rate": 0.00015636363636363637, "loss": 0.8374, "step": 17 }, { "epoch": 0.041403105232892465, "grad_norm": 0.5165485739707947, "learning_rate": 0.00015272727272727275, "loss": 1.012, "step": 18 }, { "epoch": 0.04370327774583094, "grad_norm": 0.4569143056869507, "learning_rate": 0.0001490909090909091, "loss": 1.0364, "step": 19 }, { "epoch": 0.04600345025876941, "grad_norm": 0.5322045683860779, "learning_rate": 0.00014545454545454546, "loss": 1.0632, "step": 20 }, { "epoch": 0.048303622771707876, "grad_norm": 0.5586767792701721, "learning_rate": 0.00014181818181818184, "loss": 0.9602, "step": 21 }, { "epoch": 0.05060379528464635, "grad_norm": 0.49304378032684326, "learning_rate": 0.0001381818181818182, "loss": 0.9795, "step": 22 }, { "epoch": 0.05290396779758482, "grad_norm": 0.5537046790122986, "learning_rate": 0.00013454545454545455, "loss": 0.822, "step": 23 }, { "epoch": 0.055204140310523286, "grad_norm": 0.5536014437675476, "learning_rate": 0.00013090909090909093, "loss": 1.0246, "step": 24 }, { "epoch": 0.05750431282346176, "grad_norm": 0.48426035046577454, "learning_rate": 0.00012727272727272728, "loss": 1.0075, "step": 25 }, { "epoch": 0.05980448533640023, "grad_norm": 0.48291561007499695, "learning_rate": 0.00012363636363636364, "loss": 1.0763, "step": 26 }, { "epoch": 0.0621046578493387, "grad_norm": 0.5491870045661926, "learning_rate": 0.00012, "loss": 0.9692, "step": 27 }, { "epoch": 0.06440483036227718, "grad_norm": 0.5002620220184326, "learning_rate": 0.00011636363636363636, "loss": 0.8783, "step": 28 }, { "epoch": 0.06670500287521564, "grad_norm": 0.5101091265678406, "learning_rate": 0.00011272727272727272, "loss": 1.1709, "step": 29 }, { "epoch": 0.06900517538815411, "grad_norm": 0.4794892966747284, "learning_rate": 0.00010909090909090909, "loss": 0.9715, "step": 30 }, { "epoch": 0.07130534790109258, "grad_norm": 0.5005142092704773, "learning_rate": 0.00010545454545454545, "loss": 1.048, "step": 31 }, { "epoch": 0.07360552041403105, "grad_norm": 0.558136522769928, "learning_rate": 0.00010181818181818181, "loss": 0.9694, "step": 32 }, { "epoch": 0.07590569292696953, "grad_norm": 0.5822620391845703, "learning_rate": 9.818181818181818e-05, "loss": 0.8191, "step": 33 }, { "epoch": 0.078205865439908, "grad_norm": 0.5888277888298035, "learning_rate": 9.454545454545455e-05, "loss": 0.9948, "step": 34 }, { "epoch": 0.08050603795284646, "grad_norm": 0.5250777006149292, "learning_rate": 9.090909090909092e-05, "loss": 0.9533, "step": 35 }, { "epoch": 0.08280621046578493, "grad_norm": 0.4621059000492096, "learning_rate": 8.727272727272727e-05, "loss": 1.0415, "step": 36 }, { "epoch": 0.0851063829787234, "grad_norm": 0.565501868724823, "learning_rate": 8.363636363636364e-05, "loss": 0.8335, "step": 37 }, { "epoch": 0.08740655549166187, "grad_norm": 0.5443646907806396, "learning_rate": 8e-05, "loss": 0.8514, "step": 38 }, { "epoch": 0.08970672800460035, "grad_norm": 0.5232164859771729, "learning_rate": 7.636363636363637e-05, "loss": 0.904, "step": 39 }, { "epoch": 0.09200690051753882, "grad_norm": 0.5777676701545715, "learning_rate": 7.272727272727273e-05, "loss": 1.1459, "step": 40 }, { "epoch": 0.09430707303047729, "grad_norm": 0.545629620552063, "learning_rate": 6.90909090909091e-05, "loss": 0.8957, "step": 41 }, { "epoch": 0.09660724554341575, "grad_norm": 0.6201319098472595, "learning_rate": 6.545454545454546e-05, "loss": 0.9554, "step": 42 }, { "epoch": 0.09890741805635422, "grad_norm": 0.5603893995285034, "learning_rate": 6.181818181818182e-05, "loss": 0.8743, "step": 43 }, { "epoch": 0.1012075905692927, "grad_norm": 0.5196118354797363, "learning_rate": 5.818181818181818e-05, "loss": 1.1818, "step": 44 }, { "epoch": 0.10350776308223117, "grad_norm": 0.5396114587783813, "learning_rate": 5.4545454545454546e-05, "loss": 0.8085, "step": 45 }, { "epoch": 0.10580793559516964, "grad_norm": 0.5425748229026794, "learning_rate": 5.090909090909091e-05, "loss": 0.8977, "step": 46 }, { "epoch": 0.10810810810810811, "grad_norm": 0.4833182692527771, "learning_rate": 4.7272727272727275e-05, "loss": 0.9912, "step": 47 }, { "epoch": 0.11040828062104657, "grad_norm": 0.5984821319580078, "learning_rate": 4.3636363636363636e-05, "loss": 0.884, "step": 48 }, { "epoch": 0.11270845313398505, "grad_norm": 0.5533356070518494, "learning_rate": 4e-05, "loss": 0.8668, "step": 49 }, { "epoch": 0.11500862564692352, "grad_norm": 0.5323106050491333, "learning_rate": 3.6363636363636364e-05, "loss": 0.8699, "step": 50 }, { "epoch": 0.11730879815986199, "grad_norm": 0.5784793496131897, "learning_rate": 3.272727272727273e-05, "loss": 0.9276, "step": 51 }, { "epoch": 0.11960897067280046, "grad_norm": 0.4988090395927429, "learning_rate": 2.909090909090909e-05, "loss": 1.0361, "step": 52 }, { "epoch": 0.12190914318573894, "grad_norm": 0.628527820110321, "learning_rate": 2.5454545454545454e-05, "loss": 0.7982, "step": 53 }, { "epoch": 0.1242093156986774, "grad_norm": 0.6348117589950562, "learning_rate": 2.1818181818181818e-05, "loss": 0.7576, "step": 54 }, { "epoch": 0.12650948821161587, "grad_norm": 0.5996023416519165, "learning_rate": 1.8181818181818182e-05, "loss": 0.8433, "step": 55 }, { "epoch": 0.12880966072455435, "grad_norm": 0.7210708856582642, "learning_rate": 1.4545454545454545e-05, "loss": 0.811, "step": 56 }, { "epoch": 0.1311098332374928, "grad_norm": 0.5307955145835876, "learning_rate": 1.0909090909090909e-05, "loss": 0.8914, "step": 57 }, { "epoch": 0.13341000575043127, "grad_norm": 0.5893259048461914, "learning_rate": 7.272727272727272e-06, "loss": 0.8525, "step": 58 }, { "epoch": 0.13571017826336976, "grad_norm": 0.5287377834320068, "learning_rate": 3.636363636363636e-06, "loss": 0.8509, "step": 59 }, { "epoch": 0.13801035077630822, "grad_norm": 0.5250273942947388, "learning_rate": 0.0, "loss": 0.8053, "step": 60 } ], "logging_steps": 1, "max_steps": 60, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.997055015960576e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }