{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.007187350263536177, "eval_steps": 500, "global_step": 60, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00011978917105893628, "grad_norm": 0.953666090965271, "learning_rate": 4e-05, "loss": 2.065, "step": 1 }, { "epoch": 0.00023957834211787255, "grad_norm": 0.8322485685348511, "learning_rate": 8e-05, "loss": 2.0129, "step": 2 }, { "epoch": 0.0003593675131768088, "grad_norm": 0.9846245646476746, "learning_rate": 0.00012, "loss": 2.0287, "step": 3 }, { "epoch": 0.0004791566842357451, "grad_norm": 0.8462266325950623, "learning_rate": 0.00016, "loss": 2.0177, "step": 4 }, { "epoch": 0.0005989458552946814, "grad_norm": 0.8897871375083923, "learning_rate": 0.0002, "loss": 1.888, "step": 5 }, { "epoch": 0.0007187350263536176, "grad_norm": 0.7725397348403931, "learning_rate": 0.00019636363636363636, "loss": 2.0803, "step": 6 }, { "epoch": 0.0008385241974125539, "grad_norm": 0.674379825592041, "learning_rate": 0.00019272727272727274, "loss": 1.8878, "step": 7 }, { "epoch": 0.0009583133684714902, "grad_norm": 0.7274234294891357, "learning_rate": 0.0001890909090909091, "loss": 2.0603, "step": 8 }, { "epoch": 0.0010781025395304265, "grad_norm": 0.5945920944213867, "learning_rate": 0.00018545454545454545, "loss": 1.8161, "step": 9 }, { "epoch": 0.0011978917105893627, "grad_norm": 0.6586880087852478, "learning_rate": 0.00018181818181818183, "loss": 1.9888, "step": 10 }, { "epoch": 0.001317680881648299, "grad_norm": 0.7260717153549194, "learning_rate": 0.0001781818181818182, "loss": 1.8034, "step": 11 }, { "epoch": 0.0014374700527072352, "grad_norm": 0.7235636711120605, "learning_rate": 0.00017454545454545454, "loss": 1.7587, "step": 12 }, { "epoch": 0.0015572592237661715, "grad_norm": 0.6093252897262573, "learning_rate": 0.0001709090909090909, "loss": 1.8527, "step": 13 }, { "epoch": 0.0016770483948251077, "grad_norm": 0.6886703968048096, "learning_rate": 0.00016727272727272728, "loss": 1.9906, "step": 14 }, { "epoch": 0.0017968375658840442, "grad_norm": 0.613222062587738, "learning_rate": 0.00016363636363636366, "loss": 1.7073, "step": 15 }, { "epoch": 0.0019166267369429804, "grad_norm": 0.5307195782661438, "learning_rate": 0.00016, "loss": 1.7741, "step": 16 }, { "epoch": 0.0020364159080019165, "grad_norm": 0.6123448610305786, "learning_rate": 0.00015636363636363637, "loss": 1.7753, "step": 17 }, { "epoch": 0.002156205079060853, "grad_norm": 0.5280768275260925, "learning_rate": 0.00015272727272727275, "loss": 1.7654, "step": 18 }, { "epoch": 0.002275994250119789, "grad_norm": 0.5204810500144958, "learning_rate": 0.0001490909090909091, "loss": 1.7462, "step": 19 }, { "epoch": 0.0023957834211787254, "grad_norm": 0.5603053569793701, "learning_rate": 0.00014545454545454546, "loss": 1.7481, "step": 20 }, { "epoch": 0.002515572592237662, "grad_norm": 0.46755069494247437, "learning_rate": 0.00014181818181818184, "loss": 1.6687, "step": 21 }, { "epoch": 0.002635361763296598, "grad_norm": 0.6600363254547119, "learning_rate": 0.0001381818181818182, "loss": 1.7004, "step": 22 }, { "epoch": 0.0027551509343555344, "grad_norm": 0.7076529264450073, "learning_rate": 0.00013454545454545455, "loss": 1.8755, "step": 23 }, { "epoch": 0.0028749401054144704, "grad_norm": 0.6433557271957397, "learning_rate": 0.00013090909090909093, "loss": 1.8093, "step": 24 }, { "epoch": 0.002994729276473407, "grad_norm": 0.416513592004776, "learning_rate": 0.00012727272727272728, "loss": 1.7596, "step": 25 }, { "epoch": 0.003114518447532343, "grad_norm": 0.571668803691864, "learning_rate": 0.00012363636363636364, "loss": 1.7043, "step": 26 }, { "epoch": 0.0032343076185912794, "grad_norm": 0.5698043704032898, "learning_rate": 0.00012, "loss": 1.6695, "step": 27 }, { "epoch": 0.0033540967896502154, "grad_norm": 0.5556852221488953, "learning_rate": 0.00011636363636363636, "loss": 1.8473, "step": 28 }, { "epoch": 0.003473885960709152, "grad_norm": 0.544235110282898, "learning_rate": 0.00011272727272727272, "loss": 1.7663, "step": 29 }, { "epoch": 0.0035936751317680884, "grad_norm": 0.486969918012619, "learning_rate": 0.00010909090909090909, "loss": 1.7088, "step": 30 }, { "epoch": 0.0037134643028270244, "grad_norm": 0.5989601016044617, "learning_rate": 0.00010545454545454545, "loss": 1.8058, "step": 31 }, { "epoch": 0.003833253473885961, "grad_norm": 0.6275827884674072, "learning_rate": 0.00010181818181818181, "loss": 1.715, "step": 32 }, { "epoch": 0.003953042644944897, "grad_norm": 0.46334367990493774, "learning_rate": 9.818181818181818e-05, "loss": 1.6563, "step": 33 }, { "epoch": 0.004072831816003833, "grad_norm": 0.6081639528274536, "learning_rate": 9.454545454545455e-05, "loss": 1.9894, "step": 34 }, { "epoch": 0.004192620987062769, "grad_norm": 0.4673190414905548, "learning_rate": 9.090909090909092e-05, "loss": 1.5616, "step": 35 }, { "epoch": 0.004312410158121706, "grad_norm": 0.5921393036842346, "learning_rate": 8.727272727272727e-05, "loss": 1.8312, "step": 36 }, { "epoch": 0.004432199329180642, "grad_norm": 0.5008795261383057, "learning_rate": 8.363636363636364e-05, "loss": 1.793, "step": 37 }, { "epoch": 0.004551988500239578, "grad_norm": 0.49707821011543274, "learning_rate": 8e-05, "loss": 1.7043, "step": 38 }, { "epoch": 0.004671777671298514, "grad_norm": 0.6233934164047241, "learning_rate": 7.636363636363637e-05, "loss": 1.7512, "step": 39 }, { "epoch": 0.004791566842357451, "grad_norm": 0.5607012510299683, "learning_rate": 7.272727272727273e-05, "loss": 1.6754, "step": 40 }, { "epoch": 0.004911356013416387, "grad_norm": 0.6054458618164062, "learning_rate": 6.90909090909091e-05, "loss": 1.7594, "step": 41 }, { "epoch": 0.005031145184475324, "grad_norm": 0.5356596112251282, "learning_rate": 6.545454545454546e-05, "loss": 1.7597, "step": 42 }, { "epoch": 0.005150934355534259, "grad_norm": 0.4934113919734955, "learning_rate": 6.181818181818182e-05, "loss": 1.6513, "step": 43 }, { "epoch": 0.005270723526593196, "grad_norm": 0.5411939024925232, "learning_rate": 5.818181818181818e-05, "loss": 1.617, "step": 44 }, { "epoch": 0.005390512697652132, "grad_norm": 0.46617865562438965, "learning_rate": 5.4545454545454546e-05, "loss": 1.6456, "step": 45 }, { "epoch": 0.005510301868711069, "grad_norm": 0.605570375919342, "learning_rate": 5.090909090909091e-05, "loss": 1.7424, "step": 46 }, { "epoch": 0.005630091039770004, "grad_norm": 0.527157187461853, "learning_rate": 4.7272727272727275e-05, "loss": 1.6622, "step": 47 }, { "epoch": 0.005749880210828941, "grad_norm": 0.5319560170173645, "learning_rate": 4.3636363636363636e-05, "loss": 1.6626, "step": 48 }, { "epoch": 0.005869669381887877, "grad_norm": 0.5411937236785889, "learning_rate": 4e-05, "loss": 1.5206, "step": 49 }, { "epoch": 0.005989458552946814, "grad_norm": 0.519945502281189, "learning_rate": 3.6363636363636364e-05, "loss": 1.6893, "step": 50 }, { "epoch": 0.00610924772400575, "grad_norm": 0.594184398651123, "learning_rate": 3.272727272727273e-05, "loss": 1.9877, "step": 51 }, { "epoch": 0.006229036895064686, "grad_norm": 0.63973468542099, "learning_rate": 2.909090909090909e-05, "loss": 1.9143, "step": 52 }, { "epoch": 0.006348826066123622, "grad_norm": 0.5652793049812317, "learning_rate": 2.5454545454545454e-05, "loss": 1.6949, "step": 53 }, { "epoch": 0.006468615237182559, "grad_norm": 0.5221755504608154, "learning_rate": 2.1818181818181818e-05, "loss": 1.8363, "step": 54 }, { "epoch": 0.006588404408241495, "grad_norm": 0.5718774795532227, "learning_rate": 1.8181818181818182e-05, "loss": 1.7272, "step": 55 }, { "epoch": 0.006708193579300431, "grad_norm": 0.4630696177482605, "learning_rate": 1.4545454545454545e-05, "loss": 1.5824, "step": 56 }, { "epoch": 0.006827982750359367, "grad_norm": 0.5809832811355591, "learning_rate": 1.0909090909090909e-05, "loss": 1.7316, "step": 57 }, { "epoch": 0.006947771921418304, "grad_norm": 0.4795840382575989, "learning_rate": 7.272727272727272e-06, "loss": 1.5127, "step": 58 }, { "epoch": 0.00706756109247724, "grad_norm": 0.4962644875049591, "learning_rate": 3.636363636363636e-06, "loss": 1.633, "step": 59 }, { "epoch": 0.007187350263536177, "grad_norm": 0.47581109404563904, "learning_rate": 0.0, "loss": 1.5483, "step": 60 } ], "logging_steps": 1, "max_steps": 60, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3581339433615360.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }