| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 118, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01694915254237288, | |
| "grad_norm": 13.582984428143623, | |
| "learning_rate": 8.333333333333333e-07, | |
| "loss": 4.6947, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.03389830508474576, | |
| "grad_norm": 14.123407176783754, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 4.7048, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.05084745762711865, | |
| "grad_norm": 14.538515649925046, | |
| "learning_rate": 2.5e-06, | |
| "loss": 4.4774, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.06779661016949153, | |
| "grad_norm": 12.917201583651316, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 4.5093, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0847457627118644, | |
| "grad_norm": 12.878282813705553, | |
| "learning_rate": 4.166666666666667e-06, | |
| "loss": 4.5946, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.1016949152542373, | |
| "grad_norm": 13.424010409794782, | |
| "learning_rate": 5e-06, | |
| "loss": 4.6969, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.11864406779661017, | |
| "grad_norm": 12.247691943280598, | |
| "learning_rate": 5.833333333333334e-06, | |
| "loss": 4.4876, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.13559322033898305, | |
| "grad_norm": 13.219835780776348, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 4.551, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.15254237288135594, | |
| "grad_norm": 13.377987371106677, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 4.1263, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.1694915254237288, | |
| "grad_norm": 12.792383156845725, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 4.1093, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.1864406779661017, | |
| "grad_norm": 11.58927724154859, | |
| "learning_rate": 9.166666666666666e-06, | |
| "loss": 4.0903, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.2033898305084746, | |
| "grad_norm": 12.857260545779804, | |
| "learning_rate": 1e-05, | |
| "loss": 3.7907, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.22033898305084745, | |
| "grad_norm": 11.263428499431882, | |
| "learning_rate": 9.997804182543973e-06, | |
| "loss": 3.4157, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.23728813559322035, | |
| "grad_norm": 9.096821276875527, | |
| "learning_rate": 9.991218658821609e-06, | |
| "loss": 3.248, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.2542372881355932, | |
| "grad_norm": 8.683777340261885, | |
| "learning_rate": 9.980249213076085e-06, | |
| "loss": 3.0936, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.2711864406779661, | |
| "grad_norm": 7.426067753465437, | |
| "learning_rate": 9.964905480067585e-06, | |
| "loss": 2.9003, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.288135593220339, | |
| "grad_norm": 6.801819161049987, | |
| "learning_rate": 9.945200936610821e-06, | |
| "loss": 2.7429, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.3050847457627119, | |
| "grad_norm": 5.517891034162887, | |
| "learning_rate": 9.921152889737985e-06, | |
| "loss": 2.4546, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.3220338983050847, | |
| "grad_norm": 4.607884862007527, | |
| "learning_rate": 9.892782461497521e-06, | |
| "loss": 2.2101, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.3389830508474576, | |
| "grad_norm": 4.198018015086853, | |
| "learning_rate": 9.860114570402055e-06, | |
| "loss": 2.002, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.3559322033898305, | |
| "grad_norm": 3.8937197111619337, | |
| "learning_rate": 9.823177909541795e-06, | |
| "loss": 1.9571, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.3728813559322034, | |
| "grad_norm": 3.116953631354838, | |
| "learning_rate": 9.782004921382612e-06, | |
| "loss": 1.7892, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.3898305084745763, | |
| "grad_norm": 2.88170444003024, | |
| "learning_rate": 9.736631769270958e-06, | |
| "loss": 1.8111, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.4067796610169492, | |
| "grad_norm": 2.360981852701721, | |
| "learning_rate": 9.687098305670606e-06, | |
| "loss": 1.6501, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.423728813559322, | |
| "grad_norm": 1.9535896880852222, | |
| "learning_rate": 9.633448037159167e-06, | |
| "loss": 1.4975, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.4406779661016949, | |
| "grad_norm": 1.742928903020929, | |
| "learning_rate": 9.575728086215093e-06, | |
| "loss": 1.6203, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.4576271186440678, | |
| "grad_norm": 1.6631917739747308, | |
| "learning_rate": 9.513989149828718e-06, | |
| "loss": 1.5796, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.4745762711864407, | |
| "grad_norm": 1.1808991053273623, | |
| "learning_rate": 9.448285454973739e-06, | |
| "loss": 1.4377, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.4915254237288136, | |
| "grad_norm": 1.0227483418783903, | |
| "learning_rate": 9.378674710978185e-06, | |
| "loss": 1.4009, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.5084745762711864, | |
| "grad_norm": 0.9157571127323277, | |
| "learning_rate": 9.305218058836778e-06, | |
| "loss": 1.2478, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.5254237288135594, | |
| "grad_norm": 0.9338477321471075, | |
| "learning_rate": 9.22798001750913e-06, | |
| "loss": 1.3688, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.5423728813559322, | |
| "grad_norm": 0.8796568592495111, | |
| "learning_rate": 9.14702842725101e-06, | |
| "loss": 1.2535, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.559322033898305, | |
| "grad_norm": 0.9219993618943527, | |
| "learning_rate": 9.062434390028407e-06, | |
| "loss": 1.2974, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.576271186440678, | |
| "grad_norm": 0.9825650845819016, | |
| "learning_rate": 8.974272207066767e-06, | |
| "loss": 1.3383, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.5932203389830508, | |
| "grad_norm": 0.9055827980183389, | |
| "learning_rate": 8.882619313590212e-06, | |
| "loss": 1.2601, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.6101694915254238, | |
| "grad_norm": 0.9167811987216382, | |
| "learning_rate": 8.787556210808101e-06, | |
| "loss": 1.226, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.6271186440677966, | |
| "grad_norm": 0.8488058692801289, | |
| "learning_rate": 8.689166395208638e-06, | |
| "loss": 1.1668, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.6440677966101694, | |
| "grad_norm": 0.8462653312925347, | |
| "learning_rate": 8.587536285221656e-06, | |
| "loss": 1.2131, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.6610169491525424, | |
| "grad_norm": 0.741157564370513, | |
| "learning_rate": 8.482755145314987e-06, | |
| "loss": 1.1322, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.6779661016949152, | |
| "grad_norm": 0.7694007284550851, | |
| "learning_rate": 8.374915007591053e-06, | |
| "loss": 1.1526, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.6949152542372882, | |
| "grad_norm": 0.7658488631154642, | |
| "learning_rate": 8.264110590952609e-06, | |
| "loss": 1.1856, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.711864406779661, | |
| "grad_norm": 0.6405702241168747, | |
| "learning_rate": 8.150439217908557e-06, | |
| "loss": 1.0037, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.7288135593220338, | |
| "grad_norm": 0.616014862055841, | |
| "learning_rate": 8.034000729092967e-06, | |
| "loss": 1.0329, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.7457627118644068, | |
| "grad_norm": 0.6578920077905241, | |
| "learning_rate": 7.914897395572362e-06, | |
| "loss": 1.0895, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.7627118644067796, | |
| "grad_norm": 0.5912450960157642, | |
| "learning_rate": 7.793233829018263e-06, | |
| "loss": 1.0218, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.7796610169491526, | |
| "grad_norm": 0.5582880461976706, | |
| "learning_rate": 7.669116889823955e-06, | |
| "loss": 1.013, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.7966101694915254, | |
| "grad_norm": 0.524466989613803, | |
| "learning_rate": 7.542655593246103e-06, | |
| "loss": 0.9566, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.8135593220338984, | |
| "grad_norm": 0.48335542595907816, | |
| "learning_rate": 7.413961013653725e-06, | |
| "loss": 0.8977, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.8305084745762712, | |
| "grad_norm": 0.5365637758122486, | |
| "learning_rate": 7.283146186968566e-06, | |
| "loss": 1.0325, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.847457627118644, | |
| "grad_norm": 0.5099489526642094, | |
| "learning_rate": 7.1503260113826035e-06, | |
| "loss": 1.0182, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.864406779661017, | |
| "grad_norm": 0.4657708339456314, | |
| "learning_rate": 7.015617146439863e-06, | |
| "loss": 0.9675, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.8813559322033898, | |
| "grad_norm": 0.48914070170734636, | |
| "learning_rate": 6.879137910571191e-06, | |
| "loss": 0.9954, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.8983050847457628, | |
| "grad_norm": 0.46882412163559467, | |
| "learning_rate": 6.741008177171995e-06, | |
| "loss": 0.9976, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.9152542372881356, | |
| "grad_norm": 0.3936331113504765, | |
| "learning_rate": 6.601349269314188e-06, | |
| "loss": 0.8422, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.9322033898305084, | |
| "grad_norm": 0.437572210845862, | |
| "learning_rate": 6.46028385318488e-06, | |
| "loss": 0.9785, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.9491525423728814, | |
| "grad_norm": 0.42239632796928905, | |
| "learning_rate": 6.3179358303453386e-06, | |
| "loss": 0.9773, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.9661016949152542, | |
| "grad_norm": 0.3849602768372098, | |
| "learning_rate": 6.17443022890492e-06, | |
| "loss": 0.9132, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.9830508474576272, | |
| "grad_norm": 0.35887608129604537, | |
| "learning_rate": 6.029893093705492e-06, | |
| "loss": 0.8932, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.3775382679783028, | |
| "learning_rate": 5.884451375612865e-06, | |
| "loss": 0.9605, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 1.0169491525423728, | |
| "grad_norm": 0.35775761239990117, | |
| "learning_rate": 5.738232820012407e-06, | |
| "loss": 0.9254, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.0338983050847457, | |
| "grad_norm": 0.34923814072726866, | |
| "learning_rate": 5.591365854606829e-06, | |
| "loss": 0.9174, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 1.0508474576271187, | |
| "grad_norm": 0.34017872341374605, | |
| "learning_rate": 5.443979476614674e-06, | |
| "loss": 0.9484, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 1.0677966101694916, | |
| "grad_norm": 0.3089243172711035, | |
| "learning_rate": 5.296203139468572e-06, | |
| "loss": 0.8374, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 1.0847457627118644, | |
| "grad_norm": 0.2871261805872519, | |
| "learning_rate": 5.148166639112799e-06, | |
| "loss": 0.8462, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 1.1016949152542372, | |
| "grad_norm": 0.30706922907791745, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8865, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.11864406779661, | |
| "grad_norm": 0.2938750275036386, | |
| "learning_rate": 4.8518333608872015e-06, | |
| "loss": 0.8858, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 1.1355932203389831, | |
| "grad_norm": 0.2549040934942167, | |
| "learning_rate": 4.703796860531429e-06, | |
| "loss": 0.7952, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 1.152542372881356, | |
| "grad_norm": 0.28685443691965085, | |
| "learning_rate": 4.556020523385326e-06, | |
| "loss": 0.8437, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 1.1694915254237288, | |
| "grad_norm": 0.2867782597037466, | |
| "learning_rate": 4.408634145393172e-06, | |
| "loss": 0.8934, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 1.1864406779661016, | |
| "grad_norm": 0.31047696033713723, | |
| "learning_rate": 4.261767179987595e-06, | |
| "loss": 0.9716, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.2033898305084745, | |
| "grad_norm": 0.25941316846282275, | |
| "learning_rate": 4.115548624387136e-06, | |
| "loss": 0.8287, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 1.2203389830508475, | |
| "grad_norm": 0.2699492319649961, | |
| "learning_rate": 3.970106906294509e-06, | |
| "loss": 0.8472, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 1.2372881355932204, | |
| "grad_norm": 0.25234340024796287, | |
| "learning_rate": 3.825569771095082e-06, | |
| "loss": 0.8287, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 1.2542372881355932, | |
| "grad_norm": 0.2660601540051665, | |
| "learning_rate": 3.682064169654663e-06, | |
| "loss": 0.8793, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 1.271186440677966, | |
| "grad_norm": 0.242634531727012, | |
| "learning_rate": 3.539716146815122e-06, | |
| "loss": 0.7988, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.288135593220339, | |
| "grad_norm": 0.2465357214488806, | |
| "learning_rate": 3.398650730685813e-06, | |
| "loss": 0.8259, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 1.305084745762712, | |
| "grad_norm": 0.22575413510974532, | |
| "learning_rate": 3.258991822828007e-06, | |
| "loss": 0.7525, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 1.3220338983050848, | |
| "grad_norm": 0.23693526616000352, | |
| "learning_rate": 3.1208620894288105e-06, | |
| "loss": 0.8184, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 1.3389830508474576, | |
| "grad_norm": 0.23000459251399064, | |
| "learning_rate": 2.98438285356014e-06, | |
| "loss": 0.7968, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 1.3559322033898304, | |
| "grad_norm": 0.1958693615856507, | |
| "learning_rate": 2.8496739886173994e-06, | |
| "loss": 0.6473, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.3728813559322033, | |
| "grad_norm": 0.24742346042002725, | |
| "learning_rate": 2.716853813031435e-06, | |
| "loss": 0.8747, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 1.3898305084745763, | |
| "grad_norm": 0.2506984767340435, | |
| "learning_rate": 2.5860389863462765e-06, | |
| "loss": 0.8807, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 1.4067796610169492, | |
| "grad_norm": 0.22963412267254538, | |
| "learning_rate": 2.457344406753899e-06, | |
| "loss": 0.8101, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 1.423728813559322, | |
| "grad_norm": 0.2317036951827151, | |
| "learning_rate": 2.330883110176049e-06, | |
| "loss": 0.8256, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 1.4406779661016949, | |
| "grad_norm": 0.19606132470172136, | |
| "learning_rate": 2.2067661709817384e-06, | |
| "loss": 0.6938, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.457627118644068, | |
| "grad_norm": 0.219441511162039, | |
| "learning_rate": 2.0851026044276405e-06, | |
| "loss": 0.7687, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 1.4745762711864407, | |
| "grad_norm": 0.20310562062320556, | |
| "learning_rate": 1.9659992709070346e-06, | |
| "loss": 0.76, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 1.4915254237288136, | |
| "grad_norm": 0.21286522814139364, | |
| "learning_rate": 1.8495607820914451e-06, | |
| "loss": 0.7625, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 1.5084745762711864, | |
| "grad_norm": 0.22107972808860454, | |
| "learning_rate": 1.7358894090473928e-06, | |
| "loss": 0.8307, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 1.5254237288135593, | |
| "grad_norm": 0.21471387542490836, | |
| "learning_rate": 1.6250849924089485e-06, | |
| "loss": 0.7501, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.542372881355932, | |
| "grad_norm": 0.22047778238568697, | |
| "learning_rate": 1.5172448546850166e-06, | |
| "loss": 0.8155, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 1.559322033898305, | |
| "grad_norm": 0.21411524032874296, | |
| "learning_rate": 1.4124637147783431e-06, | |
| "loss": 0.7937, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 1.576271186440678, | |
| "grad_norm": 0.21327217337137594, | |
| "learning_rate": 1.3108336047913633e-06, | |
| "loss": 0.7728, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 1.5932203389830508, | |
| "grad_norm": 0.1920350890527698, | |
| "learning_rate": 1.2124437891918995e-06, | |
| "loss": 0.6718, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 1.6101694915254239, | |
| "grad_norm": 0.21902635675798737, | |
| "learning_rate": 1.1173806864097885e-06, | |
| "loss": 0.804, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.6271186440677967, | |
| "grad_norm": 0.22207901313261283, | |
| "learning_rate": 1.0257277929332332e-06, | |
| "loss": 0.8154, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 1.6440677966101696, | |
| "grad_norm": 0.21571048693428027, | |
| "learning_rate": 9.375656099715935e-07, | |
| "loss": 0.7889, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 1.6610169491525424, | |
| "grad_norm": 0.21357881995728437, | |
| "learning_rate": 8.529715727489912e-07, | |
| "loss": 0.8078, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 1.6779661016949152, | |
| "grad_norm": 0.2235192978949996, | |
| "learning_rate": 7.720199824908692e-07, | |
| "loss": 0.8096, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 1.694915254237288, | |
| "grad_norm": 0.22622640413844738, | |
| "learning_rate": 6.947819411632223e-07, | |
| "loss": 0.8223, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.711864406779661, | |
| "grad_norm": 0.22601175493797776, | |
| "learning_rate": 6.213252890218163e-07, | |
| "loss": 0.8135, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 1.7288135593220337, | |
| "grad_norm": 0.1911211920002579, | |
| "learning_rate": 5.517145450262639e-07, | |
| "loss": 0.7068, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 1.7457627118644068, | |
| "grad_norm": 0.21175848010983564, | |
| "learning_rate": 4.860108501712824e-07, | |
| "loss": 0.7783, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 1.7627118644067796, | |
| "grad_norm": 0.21053330888859195, | |
| "learning_rate": 4.242719137849077e-07, | |
| "loss": 0.8187, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 1.7796610169491527, | |
| "grad_norm": 0.2130541935136037, | |
| "learning_rate": 3.665519628408332e-07, | |
| "loss": 0.7656, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.7966101694915255, | |
| "grad_norm": 0.22526303577323026, | |
| "learning_rate": 3.1290169432939556e-07, | |
| "loss": 0.8245, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 1.8135593220338984, | |
| "grad_norm": 0.2072005309322467, | |
| "learning_rate": 2.6336823072904305e-07, | |
| "loss": 0.743, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 1.8305084745762712, | |
| "grad_norm": 0.22448423254493008, | |
| "learning_rate": 2.179950786173879e-07, | |
| "loss": 0.7898, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 1.847457627118644, | |
| "grad_norm": 0.2138555012447717, | |
| "learning_rate": 1.7682209045820687e-07, | |
| "loss": 0.7782, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 1.8644067796610169, | |
| "grad_norm": 0.1993249828378952, | |
| "learning_rate": 1.3988542959794627e-07, | |
| "loss": 0.7393, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.8813559322033897, | |
| "grad_norm": 0.21708860295387616, | |
| "learning_rate": 1.0721753850247984e-07, | |
| "loss": 0.7906, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 1.8983050847457628, | |
| "grad_norm": 0.18874363435593658, | |
| "learning_rate": 7.884711026201586e-08, | |
| "loss": 0.6812, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 1.9152542372881356, | |
| "grad_norm": 0.25545899012578377, | |
| "learning_rate": 5.479906338917984e-08, | |
| "loss": 0.9486, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 1.9322033898305084, | |
| "grad_norm": 0.1936498628431566, | |
| "learning_rate": 3.5094519932415417e-08, | |
| "loss": 0.745, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 1.9491525423728815, | |
| "grad_norm": 0.20802679119960885, | |
| "learning_rate": 1.975078692391552e-08, | |
| "loss": 0.7714, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.9661016949152543, | |
| "grad_norm": 0.22619971076796636, | |
| "learning_rate": 8.781341178393244e-09, | |
| "loss": 0.8194, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 1.9830508474576272, | |
| "grad_norm": 0.2211171658614822, | |
| "learning_rate": 2.19581745602826e-09, | |
| "loss": 0.7564, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.22426334671251008, | |
| "learning_rate": 0.0, | |
| "loss": 0.8302, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 118, | |
| "total_flos": 1854342635192320.0, | |
| "train_loss": 1.4390717199293233, | |
| "train_runtime": 1287.2643, | |
| "train_samples_per_second": 5.867, | |
| "train_steps_per_second": 0.092 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 118, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1854342635192320.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |