| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 152, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.013157894736842105, |
| "grad_norm": 0.011193246343451556, |
| "learning_rate": 5e-06, |
| "loss": 0.1709, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.02631578947368421, |
| "grad_norm": 0.012632831062784382, |
| "learning_rate": 1e-05, |
| "loss": 0.1722, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.039473684210526314, |
| "grad_norm": 0.01217162428713648, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 0.1719, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.05263157894736842, |
| "grad_norm": 0.014278596408591488, |
| "learning_rate": 2e-05, |
| "loss": 0.1736, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.06578947368421052, |
| "grad_norm": 0.011651798020362559, |
| "learning_rate": 2.5e-05, |
| "loss": 0.1693, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.07894736842105263, |
| "grad_norm": 0.013076438611297115, |
| "learning_rate": 3.0000000000000004e-05, |
| "loss": 0.1746, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.09210526315789473, |
| "grad_norm": 0.013682964249895072, |
| "learning_rate": 3.5000000000000004e-05, |
| "loss": 0.1704, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.10526315789473684, |
| "grad_norm": 0.014686466968317395, |
| "learning_rate": 4e-05, |
| "loss": 0.1699, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.11842105263157894, |
| "grad_norm": 0.016281931534044915, |
| "learning_rate": 4.5e-05, |
| "loss": 0.1689, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.13157894736842105, |
| "grad_norm": 0.0171854262921491, |
| "learning_rate": 5e-05, |
| "loss": 0.1687, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.14473684210526316, |
| "grad_norm": 0.018894242233212503, |
| "learning_rate": 5.5e-05, |
| "loss": 0.1646, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.15789473684210525, |
| "grad_norm": 0.01779881219815579, |
| "learning_rate": 6.000000000000001e-05, |
| "loss": 0.1569, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.17105263157894737, |
| "grad_norm": 0.017710450658139844, |
| "learning_rate": 6.500000000000001e-05, |
| "loss": 0.1545, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.18421052631578946, |
| "grad_norm": 0.017592203426232824, |
| "learning_rate": 7.000000000000001e-05, |
| "loss": 0.1479, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.19736842105263158, |
| "grad_norm": 0.017739355110624643, |
| "learning_rate": 7.500000000000001e-05, |
| "loss": 0.1412, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.21052631578947367, |
| "grad_norm": 0.01830298878239801, |
| "learning_rate": 8e-05, |
| "loss": 0.1377, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.2236842105263158, |
| "grad_norm": 0.019099329601174855, |
| "learning_rate": 7.99893283244655e-05, |
| "loss": 0.138, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.23684210526315788, |
| "grad_norm": 0.020182334218850437, |
| "learning_rate": 7.995731899209491e-05, |
| "loss": 0.1335, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.021528435171673927, |
| "learning_rate": 7.990398908254869e-05, |
| "loss": 0.1262, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.2631578947368421, |
| "grad_norm": 0.019272557399678692, |
| "learning_rate": 7.982936705180139e-05, |
| "loss": 0.1199, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.27631578947368424, |
| "grad_norm": 0.01555919871902734, |
| "learning_rate": 7.9733492716958e-05, |
| "loss": 0.1155, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.2894736842105263, |
| "grad_norm": 0.014069638973321082, |
| "learning_rate": 7.961641723500821e-05, |
| "loss": 0.1124, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.3026315789473684, |
| "grad_norm": 0.01364776366814878, |
| "learning_rate": 7.947820307552984e-05, |
| "loss": 0.1095, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.3157894736842105, |
| "grad_norm": 0.013427503151618732, |
| "learning_rate": 7.931892398735608e-05, |
| "loss": 0.1065, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.32894736842105265, |
| "grad_norm": 0.011876878823261603, |
| "learning_rate": 7.913866495922436e-05, |
| "loss": 0.1003, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.34210526315789475, |
| "grad_norm": 0.011314513909875947, |
| "learning_rate": 7.893752217442773e-05, |
| "loss": 0.096, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.35526315789473684, |
| "grad_norm": 0.010900987899870946, |
| "learning_rate": 7.871560295949292e-05, |
| "loss": 0.0927, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.3684210526315789, |
| "grad_norm": 0.011694060724422608, |
| "learning_rate": 7.847302572691277e-05, |
| "loss": 0.0835, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.3815789473684211, |
| "grad_norm": 0.012306370489616343, |
| "learning_rate": 7.820991991196321e-05, |
| "loss": 0.0818, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.39473684210526316, |
| "grad_norm": 0.013091064612739244, |
| "learning_rate": 7.792642590363864e-05, |
| "loss": 0.0784, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.40789473684210525, |
| "grad_norm": 0.012940667007794919, |
| "learning_rate": 7.762269496974271e-05, |
| "loss": 0.0726, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.42105263157894735, |
| "grad_norm": 0.012578766778023602, |
| "learning_rate": 7.729888917617424e-05, |
| "loss": 0.0723, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.4342105263157895, |
| "grad_norm": 0.011800836575446658, |
| "learning_rate": 7.695518130045147e-05, |
| "loss": 0.0629, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.4473684210526316, |
| "grad_norm": 0.01216383986530226, |
| "learning_rate": 7.659175473952085e-05, |
| "loss": 0.0583, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.4605263157894737, |
| "grad_norm": 0.012221190154378425, |
| "learning_rate": 7.620880341189928e-05, |
| "loss": 0.0547, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.47368421052631576, |
| "grad_norm": 0.012221416929572789, |
| "learning_rate": 7.58065316542025e-05, |
| "loss": 0.0537, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.4868421052631579, |
| "grad_norm": 0.01022797450598566, |
| "learning_rate": 7.538515411211422e-05, |
| "loss": 0.0525, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.010196900578949973, |
| "learning_rate": 7.494489562585479e-05, |
| "loss": 0.0522, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.5131578947368421, |
| "grad_norm": 0.01081474068902212, |
| "learning_rate": 7.448599111021003e-05, |
| "loss": 0.0497, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.5263157894736842, |
| "grad_norm": 0.011127976549735704, |
| "learning_rate": 7.400868542918457e-05, |
| "loss": 0.0494, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.5394736842105263, |
| "grad_norm": 0.009719680216921442, |
| "learning_rate": 7.351323326534634e-05, |
| "loss": 0.0474, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.5526315789473685, |
| "grad_norm": 0.011121396526423858, |
| "learning_rate": 7.299989898393209e-05, |
| "loss": 0.0491, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.5657894736842105, |
| "grad_norm": 0.00932521994941077, |
| "learning_rate": 7.246895649178646e-05, |
| "loss": 0.0468, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.5789473684210527, |
| "grad_norm": 0.010516122343315393, |
| "learning_rate": 7.192068909120959e-05, |
| "loss": 0.046, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.5921052631578947, |
| "grad_norm": 0.008426401948130227, |
| "learning_rate": 7.135538932879176e-05, |
| "loss": 0.0416, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.6052631578947368, |
| "grad_norm": 0.009231664562787666, |
| "learning_rate": 7.077335883931516e-05, |
| "loss": 0.0463, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.618421052631579, |
| "grad_norm": 0.007751899943539208, |
| "learning_rate": 7.017490818480657e-05, |
| "loss": 0.0439, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.631578947368421, |
| "grad_norm": 0.007380790901519942, |
| "learning_rate": 6.956035668882637e-05, |
| "loss": 0.0406, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.6447368421052632, |
| "grad_norm": 0.007089333975023999, |
| "learning_rate": 6.893003226608281e-05, |
| "loss": 0.0417, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.6578947368421053, |
| "grad_norm": 0.0074054283948870505, |
| "learning_rate": 6.828427124746191e-05, |
| "loss": 0.0423, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.6710526315789473, |
| "grad_norm": 0.006910246968440978, |
| "learning_rate": 6.762341820056687e-05, |
| "loss": 0.0449, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.6842105263157895, |
| "grad_norm": 0.006530381828194509, |
| "learning_rate": 6.69478257458623e-05, |
| "loss": 0.0428, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.6973684210526315, |
| "grad_norm": 0.007097174249867035, |
| "learning_rate": 6.625785436852172e-05, |
| "loss": 0.0425, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.7105263157894737, |
| "grad_norm": 0.006711690699573767, |
| "learning_rate": 6.555387222607845e-05, |
| "loss": 0.0435, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.7236842105263158, |
| "grad_norm": 0.00701588418316799, |
| "learning_rate": 6.483625495198287e-05, |
| "loss": 0.0439, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.7368421052631579, |
| "grad_norm": 0.006833977043231902, |
| "learning_rate": 6.410538545517026e-05, |
| "loss": 0.0417, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.005986983591642089, |
| "learning_rate": 6.336165371574707e-05, |
| "loss": 0.0434, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.7631578947368421, |
| "grad_norm": 0.0062782002366031, |
| "learning_rate": 6.260545657690369e-05, |
| "loss": 0.0403, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.7763157894736842, |
| "grad_norm": 0.006945884720384615, |
| "learning_rate": 6.183719753316539e-05, |
| "loss": 0.0416, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.7894736842105263, |
| "grad_norm": 0.006319586224258474, |
| "learning_rate": 6.105728651509424e-05, |
| "loss": 0.0415, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.8026315789473685, |
| "grad_norm": 0.058532063238545366, |
| "learning_rate": 6.026613967055678e-05, |
| "loss": 0.0409, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.8157894736842105, |
| "grad_norm": 0.006336527241640012, |
| "learning_rate": 5.946417914267425e-05, |
| "loss": 0.0382, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.8289473684210527, |
| "grad_norm": 0.006470625016947808, |
| "learning_rate": 5.865183284457392e-05, |
| "loss": 0.0416, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.8421052631578947, |
| "grad_norm": 0.00682983462425869, |
| "learning_rate": 5.782953423106154e-05, |
| "loss": 0.0437, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.8552631578947368, |
| "grad_norm": 0.006033568829008143, |
| "learning_rate": 5.699772206733689e-05, |
| "loss": 0.0423, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.868421052631579, |
| "grad_norm": 0.006263481762315256, |
| "learning_rate": 5.61568401948758e-05, |
| "loss": 0.0401, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.881578947368421, |
| "grad_norm": 0.0074388468143108035, |
| "learning_rate": 5.5307337294603595e-05, |
| "loss": 0.0426, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.8947368421052632, |
| "grad_norm": 0.006213659757931369, |
| "learning_rate": 5.444966664748613e-05, |
| "loss": 0.0412, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.9078947368421053, |
| "grad_norm": 0.006380330465230061, |
| "learning_rate": 5.3584285892666454e-05, |
| "loss": 0.0428, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.9210526315789473, |
| "grad_norm": 0.006207536923484298, |
| "learning_rate": 5.271165678327607e-05, |
| "loss": 0.04, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.9342105263157895, |
| "grad_norm": 0.0066810038964758185, |
| "learning_rate": 5.183224494005083e-05, |
| "loss": 0.0435, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.9473684210526315, |
| "grad_norm": 0.006226425147587245, |
| "learning_rate": 5.0946519602883326e-05, |
| "loss": 0.0401, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.9605263157894737, |
| "grad_norm": 0.0059224557005355645, |
| "learning_rate": 5.0054953380444116e-05, |
| "loss": 0.0435, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.9736842105263158, |
| "grad_norm": 0.007488520716144708, |
| "learning_rate": 4.9158021998005366e-05, |
| "loss": 0.0427, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.9868421052631579, |
| "grad_norm": 0.006072226631282511, |
| "learning_rate": 4.825620404360159e-05, |
| "loss": 0.041, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.006569700378799453, |
| "learning_rate": 4.734998071266282e-05, |
| "loss": 0.0433, |
| "step": 76 |
| }, |
| { |
| "epoch": 1.013157894736842, |
| "grad_norm": 0.005838560277534237, |
| "learning_rate": 4.643983555125652e-05, |
| "loss": 0.0403, |
| "step": 77 |
| }, |
| { |
| "epoch": 1.0263157894736843, |
| "grad_norm": 0.005804466890798108, |
| "learning_rate": 4.552625419807529e-05, |
| "loss": 0.0414, |
| "step": 78 |
| }, |
| { |
| "epoch": 1.0394736842105263, |
| "grad_norm": 0.0066119101268658585, |
| "learning_rate": 4.460972412530791e-05, |
| "loss": 0.0414, |
| "step": 79 |
| }, |
| { |
| "epoch": 1.0526315789473684, |
| "grad_norm": 0.005556966301273331, |
| "learning_rate": 4.369073437853208e-05, |
| "loss": 0.0378, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.0657894736842106, |
| "grad_norm": 0.006512517911844179, |
| "learning_rate": 4.276977531576767e-05, |
| "loss": 0.0388, |
| "step": 81 |
| }, |
| { |
| "epoch": 1.0789473684210527, |
| "grad_norm": 0.005833488831583047, |
| "learning_rate": 4.184733834582959e-05, |
| "loss": 0.039, |
| "step": 82 |
| }, |
| { |
| "epoch": 1.0921052631578947, |
| "grad_norm": 0.006216115200940607, |
| "learning_rate": 4.092391566612005e-05, |
| "loss": 0.0409, |
| "step": 83 |
| }, |
| { |
| "epoch": 1.1052631578947367, |
| "grad_norm": 0.00575317037203953, |
| "learning_rate": 4e-05, |
| "loss": 0.0409, |
| "step": 84 |
| }, |
| { |
| "epoch": 1.118421052631579, |
| "grad_norm": 0.006222832163571204, |
| "learning_rate": 3.9076084333879964e-05, |
| "loss": 0.0387, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.131578947368421, |
| "grad_norm": 0.005647614055554481, |
| "learning_rate": 3.815266165417043e-05, |
| "loss": 0.0397, |
| "step": 86 |
| }, |
| { |
| "epoch": 1.1447368421052633, |
| "grad_norm": 0.006096737358226156, |
| "learning_rate": 3.723022468423234e-05, |
| "loss": 0.0409, |
| "step": 87 |
| }, |
| { |
| "epoch": 1.1578947368421053, |
| "grad_norm": 0.0054974626092716364, |
| "learning_rate": 3.630926562146792e-05, |
| "loss": 0.0366, |
| "step": 88 |
| }, |
| { |
| "epoch": 1.1710526315789473, |
| "grad_norm": 0.005754720995497907, |
| "learning_rate": 3.53902758746921e-05, |
| "loss": 0.038, |
| "step": 89 |
| }, |
| { |
| "epoch": 1.1842105263157894, |
| "grad_norm": 0.006362210531056341, |
| "learning_rate": 3.447374580192473e-05, |
| "loss": 0.0396, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.1973684210526316, |
| "grad_norm": 0.006025762605068581, |
| "learning_rate": 3.356016444874348e-05, |
| "loss": 0.0388, |
| "step": 91 |
| }, |
| { |
| "epoch": 1.2105263157894737, |
| "grad_norm": 0.005994627251788469, |
| "learning_rate": 3.2650019287337184e-05, |
| "loss": 0.0402, |
| "step": 92 |
| }, |
| { |
| "epoch": 1.2236842105263157, |
| "grad_norm": 0.00620754951196749, |
| "learning_rate": 3.1743795956398425e-05, |
| "loss": 0.0407, |
| "step": 93 |
| }, |
| { |
| "epoch": 1.236842105263158, |
| "grad_norm": 0.006255192958964342, |
| "learning_rate": 3.084197800199465e-05, |
| "loss": 0.0372, |
| "step": 94 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 0.006320719774606469, |
| "learning_rate": 2.9945046619555894e-05, |
| "loss": 0.0378, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.263157894736842, |
| "grad_norm": 0.0060922447961943, |
| "learning_rate": 2.905348039711669e-05, |
| "loss": 0.0397, |
| "step": 96 |
| }, |
| { |
| "epoch": 1.2763157894736843, |
| "grad_norm": 0.006202374774637231, |
| "learning_rate": 2.816775505994919e-05, |
| "loss": 0.0375, |
| "step": 97 |
| }, |
| { |
| "epoch": 1.2894736842105263, |
| "grad_norm": 0.005867178023820865, |
| "learning_rate": 2.728834321672394e-05, |
| "loss": 0.0359, |
| "step": 98 |
| }, |
| { |
| "epoch": 1.3026315789473684, |
| "grad_norm": 0.005998890900515434, |
| "learning_rate": 2.6415714107333545e-05, |
| "loss": 0.0393, |
| "step": 99 |
| }, |
| { |
| "epoch": 1.3157894736842106, |
| "grad_norm": 0.005839373614340455, |
| "learning_rate": 2.5550333352513885e-05, |
| "loss": 0.0386, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.3289473684210527, |
| "grad_norm": 0.0061143595917766535, |
| "learning_rate": 2.4692662705396412e-05, |
| "loss": 0.0376, |
| "step": 101 |
| }, |
| { |
| "epoch": 1.3421052631578947, |
| "grad_norm": 0.0065210416681655975, |
| "learning_rate": 2.3843159805124207e-05, |
| "loss": 0.0414, |
| "step": 102 |
| }, |
| { |
| "epoch": 1.3552631578947367, |
| "grad_norm": 0.006158647021865143, |
| "learning_rate": 2.300227793266314e-05, |
| "loss": 0.0394, |
| "step": 103 |
| }, |
| { |
| "epoch": 1.368421052631579, |
| "grad_norm": 0.005766499988676987, |
| "learning_rate": 2.2170465768938473e-05, |
| "loss": 0.0371, |
| "step": 104 |
| }, |
| { |
| "epoch": 1.381578947368421, |
| "grad_norm": 0.0059323690491137875, |
| "learning_rate": 2.1348167155426073e-05, |
| "loss": 0.0399, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.3947368421052633, |
| "grad_norm": 0.006516041490698414, |
| "learning_rate": 2.0535820857325755e-05, |
| "loss": 0.0394, |
| "step": 106 |
| }, |
| { |
| "epoch": 1.4078947368421053, |
| "grad_norm": 0.006798971635981513, |
| "learning_rate": 1.973386032944323e-05, |
| "loss": 0.0389, |
| "step": 107 |
| }, |
| { |
| "epoch": 1.4210526315789473, |
| "grad_norm": 0.006304951283408136, |
| "learning_rate": 1.8942713484905762e-05, |
| "loss": 0.0382, |
| "step": 108 |
| }, |
| { |
| "epoch": 1.4342105263157894, |
| "grad_norm": 0.006305286850533848, |
| "learning_rate": 1.816280246683463e-05, |
| "loss": 0.0401, |
| "step": 109 |
| }, |
| { |
| "epoch": 1.4473684210526316, |
| "grad_norm": 0.0061112906748014935, |
| "learning_rate": 1.7394543423096327e-05, |
| "loss": 0.0402, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.4605263157894737, |
| "grad_norm": 0.006103907371143296, |
| "learning_rate": 1.6638346284252946e-05, |
| "loss": 0.0387, |
| "step": 111 |
| }, |
| { |
| "epoch": 1.4736842105263157, |
| "grad_norm": 0.006214314821635323, |
| "learning_rate": 1.589461454482975e-05, |
| "loss": 0.0403, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.486842105263158, |
| "grad_norm": 0.0059749757145963625, |
| "learning_rate": 1.5163745048017147e-05, |
| "loss": 0.0389, |
| "step": 113 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.005237867395501871, |
| "learning_rate": 1.4446127773921559e-05, |
| "loss": 0.0369, |
| "step": 114 |
| }, |
| { |
| "epoch": 1.513157894736842, |
| "grad_norm": 0.00650681235281057, |
| "learning_rate": 1.37421456314783e-05, |
| "loss": 0.0377, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.526315789473684, |
| "grad_norm": 0.006517220869170101, |
| "learning_rate": 1.3052174254137713e-05, |
| "loss": 0.0401, |
| "step": 116 |
| }, |
| { |
| "epoch": 1.5394736842105263, |
| "grad_norm": 0.006712118321082451, |
| "learning_rate": 1.2376581799433139e-05, |
| "loss": 0.0398, |
| "step": 117 |
| }, |
| { |
| "epoch": 1.5526315789473686, |
| "grad_norm": 0.006200500734559256, |
| "learning_rate": 1.1715728752538103e-05, |
| "loss": 0.0375, |
| "step": 118 |
| }, |
| { |
| "epoch": 1.5657894736842106, |
| "grad_norm": 0.005732987430416324, |
| "learning_rate": 1.1069967733917215e-05, |
| "loss": 0.0391, |
| "step": 119 |
| }, |
| { |
| "epoch": 1.5789473684210527, |
| "grad_norm": 0.006484965464125168, |
| "learning_rate": 1.0439643311173642e-05, |
| "loss": 0.041, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.5921052631578947, |
| "grad_norm": 0.006091177429475543, |
| "learning_rate": 9.825091815193443e-06, |
| "loss": 0.0396, |
| "step": 121 |
| }, |
| { |
| "epoch": 1.6052631578947367, |
| "grad_norm": 0.005897121800005114, |
| "learning_rate": 9.226641160684843e-06, |
| "loss": 0.038, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.618421052631579, |
| "grad_norm": 0.006402643906597493, |
| "learning_rate": 8.644610671208263e-06, |
| "loss": 0.041, |
| "step": 123 |
| }, |
| { |
| "epoch": 1.631578947368421, |
| "grad_norm": 0.006350403783775521, |
| "learning_rate": 8.07931090879042e-06, |
| "loss": 0.0369, |
| "step": 124 |
| }, |
| { |
| "epoch": 1.6447368421052633, |
| "grad_norm": 0.006319766184613516, |
| "learning_rate": 7.531043508213552e-06, |
| "loss": 0.0407, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.6578947368421053, |
| "grad_norm": 0.006377723302699078, |
| "learning_rate": 7.000101016067913e-06, |
| "loss": 0.0367, |
| "step": 126 |
| }, |
| { |
| "epoch": 1.6710526315789473, |
| "grad_norm": 0.006186351651696318, |
| "learning_rate": 6.4867667346536715e-06, |
| "loss": 0.0379, |
| "step": 127 |
| }, |
| { |
| "epoch": 1.6842105263157894, |
| "grad_norm": 0.006108718491588002, |
| "learning_rate": 5.991314570815441e-06, |
| "loss": 0.0393, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.6973684210526314, |
| "grad_norm": 0.0057589729685953136, |
| "learning_rate": 5.514008889789977e-06, |
| "loss": 0.0368, |
| "step": 129 |
| }, |
| { |
| "epoch": 1.7105263157894737, |
| "grad_norm": 0.005754253166546721, |
| "learning_rate": 5.055104374145221e-06, |
| "loss": 0.0352, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.723684210526316, |
| "grad_norm": 0.006859144229533852, |
| "learning_rate": 4.614845887885793e-06, |
| "loss": 0.0393, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.736842105263158, |
| "grad_norm": 0.007047040409450739, |
| "learning_rate": 4.193468345797511e-06, |
| "loss": 0.0402, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 0.006382411362923948, |
| "learning_rate": 3.791196588100716e-06, |
| "loss": 0.038, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.763157894736842, |
| "grad_norm": 0.006315946771327425, |
| "learning_rate": 3.4082452604791594e-06, |
| "loss": 0.0365, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.776315789473684, |
| "grad_norm": 0.006017388517146248, |
| "learning_rate": 3.0448186995485307e-06, |
| "loss": 0.0368, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.7894736842105263, |
| "grad_norm": 0.006327757401819577, |
| "learning_rate": 2.7011108238257723e-06, |
| "loss": 0.0363, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.8026315789473686, |
| "grad_norm": 0.006203858870102881, |
| "learning_rate": 2.3773050302572955e-06, |
| "loss": 0.0393, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.8157894736842106, |
| "grad_norm": 0.005922591453193032, |
| "learning_rate": 2.073574096361366e-06, |
| "loss": 0.0373, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.8289473684210527, |
| "grad_norm": 0.005850429039456173, |
| "learning_rate": 1.7900800880368008e-06, |
| "loss": 0.0348, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.8421052631578947, |
| "grad_norm": 0.006355905348540479, |
| "learning_rate": 1.5269742730872384e-06, |
| "loss": 0.0399, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.8552631578947367, |
| "grad_norm": 0.006160454658469522, |
| "learning_rate": 1.2843970405070994e-06, |
| "loss": 0.0376, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.868421052631579, |
| "grad_norm": 0.006410484683214994, |
| "learning_rate": 1.0624778255722857e-06, |
| "loss": 0.0377, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.881578947368421, |
| "grad_norm": 0.009474571629909624, |
| "learning_rate": 8.613350407756438e-07, |
| "loss": 0.0395, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.8947368421052633, |
| "grad_norm": 0.00641904068629697, |
| "learning_rate": 6.810760126439287e-07, |
| "loss": 0.0385, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.9078947368421053, |
| "grad_norm": 0.0061467775277458895, |
| "learning_rate": 5.217969244701771e-07, |
| "loss": 0.0371, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.9210526315789473, |
| "grad_norm": 0.0065777183398005095, |
| "learning_rate": 3.835827649917967e-07, |
| "loss": 0.0395, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.9342105263157894, |
| "grad_norm": 0.00622464792774119, |
| "learning_rate": 2.6650728304200125e-07, |
| "loss": 0.0388, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.9473684210526314, |
| "grad_norm": 0.00658284263741108, |
| "learning_rate": 1.706329481986213e-07, |
| "loss": 0.0394, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.9605263157894737, |
| "grad_norm": 0.006984312886553246, |
| "learning_rate": 9.601091745132352e-08, |
| "loss": 0.041, |
| "step": 149 |
| }, |
| { |
| "epoch": 1.973684210526316, |
| "grad_norm": 0.006342838371429401, |
| "learning_rate": 4.268100790510321e-08, |
| "loss": 0.0379, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.986842105263158, |
| "grad_norm": 0.006421376568210238, |
| "learning_rate": 1.0671675534510429e-08, |
| "loss": 0.0394, |
| "step": 151 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.006358948228182317, |
| "learning_rate": 0.0, |
| "loss": 0.0371, |
| "step": 152 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 152, |
| "total_flos": 113543336886272.0, |
| "train_loss": 0.060337725389552746, |
| "train_runtime": 1259.4637, |
| "train_samples_per_second": 0.965, |
| "train_steps_per_second": 0.121 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 152, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": false, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 113543336886272.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|