| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.007187350263536177, |
| "eval_steps": 500, |
| "global_step": 60, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00011978917105893628, |
| "grad_norm": 0.953666090965271, |
| "learning_rate": 4e-05, |
| "loss": 2.065, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.00023957834211787255, |
| "grad_norm": 0.8322485685348511, |
| "learning_rate": 8e-05, |
| "loss": 2.0129, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0003593675131768088, |
| "grad_norm": 0.9846245646476746, |
| "learning_rate": 0.00012, |
| "loss": 2.0287, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0004791566842357451, |
| "grad_norm": 0.8462266325950623, |
| "learning_rate": 0.00016, |
| "loss": 2.0177, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0005989458552946814, |
| "grad_norm": 0.8897871375083923, |
| "learning_rate": 0.0002, |
| "loss": 1.888, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0007187350263536176, |
| "grad_norm": 0.7725397348403931, |
| "learning_rate": 0.00019636363636363636, |
| "loss": 2.0803, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0008385241974125539, |
| "grad_norm": 0.674379825592041, |
| "learning_rate": 0.00019272727272727274, |
| "loss": 1.8878, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0009583133684714902, |
| "grad_norm": 0.7274234294891357, |
| "learning_rate": 0.0001890909090909091, |
| "loss": 2.0603, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0010781025395304265, |
| "grad_norm": 0.5945920944213867, |
| "learning_rate": 0.00018545454545454545, |
| "loss": 1.8161, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0011978917105893627, |
| "grad_norm": 0.6586880087852478, |
| "learning_rate": 0.00018181818181818183, |
| "loss": 1.9888, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.001317680881648299, |
| "grad_norm": 0.7260717153549194, |
| "learning_rate": 0.0001781818181818182, |
| "loss": 1.8034, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0014374700527072352, |
| "grad_norm": 0.7235636711120605, |
| "learning_rate": 0.00017454545454545454, |
| "loss": 1.7587, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0015572592237661715, |
| "grad_norm": 0.6093252897262573, |
| "learning_rate": 0.0001709090909090909, |
| "loss": 1.8527, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0016770483948251077, |
| "grad_norm": 0.6886703968048096, |
| "learning_rate": 0.00016727272727272728, |
| "loss": 1.9906, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0017968375658840442, |
| "grad_norm": 0.613222062587738, |
| "learning_rate": 0.00016363636363636366, |
| "loss": 1.7073, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0019166267369429804, |
| "grad_norm": 0.5307195782661438, |
| "learning_rate": 0.00016, |
| "loss": 1.7741, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0020364159080019165, |
| "grad_norm": 0.6123448610305786, |
| "learning_rate": 0.00015636363636363637, |
| "loss": 1.7753, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.002156205079060853, |
| "grad_norm": 0.5280768275260925, |
| "learning_rate": 0.00015272727272727275, |
| "loss": 1.7654, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.002275994250119789, |
| "grad_norm": 0.5204810500144958, |
| "learning_rate": 0.0001490909090909091, |
| "loss": 1.7462, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0023957834211787254, |
| "grad_norm": 0.5603053569793701, |
| "learning_rate": 0.00014545454545454546, |
| "loss": 1.7481, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.002515572592237662, |
| "grad_norm": 0.46755069494247437, |
| "learning_rate": 0.00014181818181818184, |
| "loss": 1.6687, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.002635361763296598, |
| "grad_norm": 0.6600363254547119, |
| "learning_rate": 0.0001381818181818182, |
| "loss": 1.7004, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0027551509343555344, |
| "grad_norm": 0.7076529264450073, |
| "learning_rate": 0.00013454545454545455, |
| "loss": 1.8755, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0028749401054144704, |
| "grad_norm": 0.6433557271957397, |
| "learning_rate": 0.00013090909090909093, |
| "loss": 1.8093, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.002994729276473407, |
| "grad_norm": 0.416513592004776, |
| "learning_rate": 0.00012727272727272728, |
| "loss": 1.7596, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.003114518447532343, |
| "grad_norm": 0.571668803691864, |
| "learning_rate": 0.00012363636363636364, |
| "loss": 1.7043, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.0032343076185912794, |
| "grad_norm": 0.5698043704032898, |
| "learning_rate": 0.00012, |
| "loss": 1.6695, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.0033540967896502154, |
| "grad_norm": 0.5556852221488953, |
| "learning_rate": 0.00011636363636363636, |
| "loss": 1.8473, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.003473885960709152, |
| "grad_norm": 0.544235110282898, |
| "learning_rate": 0.00011272727272727272, |
| "loss": 1.7663, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.0035936751317680884, |
| "grad_norm": 0.486969918012619, |
| "learning_rate": 0.00010909090909090909, |
| "loss": 1.7088, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0037134643028270244, |
| "grad_norm": 0.5989601016044617, |
| "learning_rate": 0.00010545454545454545, |
| "loss": 1.8058, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.003833253473885961, |
| "grad_norm": 0.6275827884674072, |
| "learning_rate": 0.00010181818181818181, |
| "loss": 1.715, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.003953042644944897, |
| "grad_norm": 0.46334367990493774, |
| "learning_rate": 9.818181818181818e-05, |
| "loss": 1.6563, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.004072831816003833, |
| "grad_norm": 0.6081639528274536, |
| "learning_rate": 9.454545454545455e-05, |
| "loss": 1.9894, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.004192620987062769, |
| "grad_norm": 0.4673190414905548, |
| "learning_rate": 9.090909090909092e-05, |
| "loss": 1.5616, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.004312410158121706, |
| "grad_norm": 0.5921393036842346, |
| "learning_rate": 8.727272727272727e-05, |
| "loss": 1.8312, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.004432199329180642, |
| "grad_norm": 0.5008795261383057, |
| "learning_rate": 8.363636363636364e-05, |
| "loss": 1.793, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.004551988500239578, |
| "grad_norm": 0.49707821011543274, |
| "learning_rate": 8e-05, |
| "loss": 1.7043, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.004671777671298514, |
| "grad_norm": 0.6233934164047241, |
| "learning_rate": 7.636363636363637e-05, |
| "loss": 1.7512, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.004791566842357451, |
| "grad_norm": 0.5607012510299683, |
| "learning_rate": 7.272727272727273e-05, |
| "loss": 1.6754, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.004911356013416387, |
| "grad_norm": 0.6054458618164062, |
| "learning_rate": 6.90909090909091e-05, |
| "loss": 1.7594, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.005031145184475324, |
| "grad_norm": 0.5356596112251282, |
| "learning_rate": 6.545454545454546e-05, |
| "loss": 1.7597, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.005150934355534259, |
| "grad_norm": 0.4934113919734955, |
| "learning_rate": 6.181818181818182e-05, |
| "loss": 1.6513, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.005270723526593196, |
| "grad_norm": 0.5411939024925232, |
| "learning_rate": 5.818181818181818e-05, |
| "loss": 1.617, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.005390512697652132, |
| "grad_norm": 0.46617865562438965, |
| "learning_rate": 5.4545454545454546e-05, |
| "loss": 1.6456, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.005510301868711069, |
| "grad_norm": 0.605570375919342, |
| "learning_rate": 5.090909090909091e-05, |
| "loss": 1.7424, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.005630091039770004, |
| "grad_norm": 0.527157187461853, |
| "learning_rate": 4.7272727272727275e-05, |
| "loss": 1.6622, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.005749880210828941, |
| "grad_norm": 0.5319560170173645, |
| "learning_rate": 4.3636363636363636e-05, |
| "loss": 1.6626, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.005869669381887877, |
| "grad_norm": 0.5411937236785889, |
| "learning_rate": 4e-05, |
| "loss": 1.5206, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.005989458552946814, |
| "grad_norm": 0.519945502281189, |
| "learning_rate": 3.6363636363636364e-05, |
| "loss": 1.6893, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.00610924772400575, |
| "grad_norm": 0.594184398651123, |
| "learning_rate": 3.272727272727273e-05, |
| "loss": 1.9877, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.006229036895064686, |
| "grad_norm": 0.63973468542099, |
| "learning_rate": 2.909090909090909e-05, |
| "loss": 1.9143, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.006348826066123622, |
| "grad_norm": 0.5652793049812317, |
| "learning_rate": 2.5454545454545454e-05, |
| "loss": 1.6949, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.006468615237182559, |
| "grad_norm": 0.5221755504608154, |
| "learning_rate": 2.1818181818181818e-05, |
| "loss": 1.8363, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.006588404408241495, |
| "grad_norm": 0.5718774795532227, |
| "learning_rate": 1.8181818181818182e-05, |
| "loss": 1.7272, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.006708193579300431, |
| "grad_norm": 0.4630696177482605, |
| "learning_rate": 1.4545454545454545e-05, |
| "loss": 1.5824, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.006827982750359367, |
| "grad_norm": 0.5809832811355591, |
| "learning_rate": 1.0909090909090909e-05, |
| "loss": 1.7316, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.006947771921418304, |
| "grad_norm": 0.4795840382575989, |
| "learning_rate": 7.272727272727272e-06, |
| "loss": 1.5127, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.00706756109247724, |
| "grad_norm": 0.4962644875049591, |
| "learning_rate": 3.636363636363636e-06, |
| "loss": 1.633, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.007187350263536177, |
| "grad_norm": 0.47581109404563904, |
| "learning_rate": 0.0, |
| "loss": 1.5483, |
| "step": 60 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 60, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3581339433615360.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|