| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.4778972520908005, |
| "eval_steps": 500, |
| "global_step": 150, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "grad_norm": 10.092560768127441, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.9497, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 8.963287353515625, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 1.951, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 9.27600383758545, |
| "learning_rate": 3e-06, |
| "loss": 1.9345, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 5.632405757904053, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 1.9157, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 5.668580055236816, |
| "learning_rate": 5e-06, |
| "loss": 1.9026, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 3.8046553134918213, |
| "learning_rate": 6e-06, |
| "loss": 1.8923, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 4.357985973358154, |
| "learning_rate": 7e-06, |
| "loss": 1.8241, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 4.685062885284424, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 1.8467, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 4.768229961395264, |
| "learning_rate": 9e-06, |
| "loss": 1.8199, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 4.796407699584961, |
| "learning_rate": 1e-05, |
| "loss": 1.8374, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 5.7536139488220215, |
| "learning_rate": 9.999731248679734e-06, |
| "loss": 1.779, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 4.202663898468018, |
| "learning_rate": 9.99892502360984e-06, |
| "loss": 1.7579, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 2.9114131927490234, |
| "learning_rate": 9.99758141145994e-06, |
| "loss": 1.7433, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 2.3823723793029785, |
| "learning_rate": 9.995700556669052e-06, |
| "loss": 1.7212, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 2.3254876136779785, |
| "learning_rate": 9.993282661430058e-06, |
| "loss": 1.7218, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 2.053166151046753, |
| "learning_rate": 9.990327985667972e-06, |
| "loss": 1.7256, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 2.3782012462615967, |
| "learning_rate": 9.986836847012001e-06, |
| "loss": 1.713, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.1311683654785156, |
| "learning_rate": 9.98280962076139e-06, |
| "loss": 1.6785, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.325747489929199, |
| "learning_rate": 9.978246739845095e-06, |
| "loss": 1.7167, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.1330366134643555, |
| "learning_rate": 9.973148694775217e-06, |
| "loss": 1.676, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.2632806301116943, |
| "learning_rate": 9.967516033594295e-06, |
| "loss": 1.7033, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.6582744121551514, |
| "learning_rate": 9.961349361816384e-06, |
| "loss": 1.6957, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 1.9663337469100952, |
| "learning_rate": 9.954649342361952e-06, |
| "loss": 1.6729, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.2557435035705566, |
| "learning_rate": 9.947416695486633e-06, |
| "loss": 1.6399, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.090054512023926, |
| "learning_rate": 9.939652198703785e-06, |
| "loss": 1.6792, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.1648776531219482, |
| "learning_rate": 9.93135668670091e-06, |
| "loss": 1.6625, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 2.1209850311279297, |
| "learning_rate": 9.92253105124993e-06, |
| "loss": 1.7057, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 2.2777392864227295, |
| "learning_rate": 9.91317624111132e-06, |
| "loss": 1.6052, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 2.1739561557769775, |
| "learning_rate": 9.903293261932106e-06, |
| "loss": 1.6139, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 2.4518871307373047, |
| "learning_rate": 9.89288317613777e-06, |
| "loss": 1.6759, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.795519471168518, |
| "learning_rate": 9.881947102818036e-06, |
| "loss": 1.7036, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 2.150223731994629, |
| "learning_rate": 9.870486217606557e-06, |
| "loss": 1.6469, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.91805899143219, |
| "learning_rate": 9.858501752554548e-06, |
| "loss": 1.6353, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.8129810094833374, |
| "learning_rate": 9.845994995998332e-06, |
| "loss": 1.6551, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 2.1308679580688477, |
| "learning_rate": 9.83296729242084e-06, |
| "loss": 1.617, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.8321012258529663, |
| "learning_rate": 9.819420042307091e-06, |
| "loss": 1.6213, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.9112164974212646, |
| "learning_rate": 9.805354701993624e-06, |
| "loss": 1.6245, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 2.1160471439361572, |
| "learning_rate": 9.79077278351195e-06, |
| "loss": 1.6405, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 2.6318371295928955, |
| "learning_rate": 9.77567585442601e-06, |
| "loss": 1.6234, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 3.238373279571533, |
| "learning_rate": 9.76006553766365e-06, |
| "loss": 1.6452, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 2.056736469268799, |
| "learning_rate": 9.743943511342168e-06, |
| "loss": 1.679, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 2.000826358795166, |
| "learning_rate": 9.727311508587907e-06, |
| "loss": 1.6904, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 2.089482069015503, |
| "learning_rate": 9.710171317349946e-06, |
| "loss": 1.62, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 2.5748705863952637, |
| "learning_rate": 9.692524780207897e-06, |
| "loss": 1.6669, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.9039987325668335, |
| "learning_rate": 9.674373794173818e-06, |
| "loss": 1.6489, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 1.8047181367874146, |
| "learning_rate": 9.655720310488298e-06, |
| "loss": 1.6227, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 2.0552868843078613, |
| "learning_rate": 9.636566334410682e-06, |
| "loss": 1.5898, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 2.252218008041382, |
| "learning_rate": 9.616913925003514e-06, |
| "loss": 1.6667, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 2.015887498855591, |
| "learning_rate": 9.596765194911182e-06, |
| "loss": 1.6668, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 2.582007884979248, |
| "learning_rate": 9.576122310132814e-06, |
| "loss": 1.6542, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.9070011377334595, |
| "learning_rate": 9.554987489789426e-06, |
| "loss": 1.691, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.9671483039855957, |
| "learning_rate": 9.533363005885362e-06, |
| "loss": 1.6116, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 2.4808287620544434, |
| "learning_rate": 9.511251183064068e-06, |
| "loss": 1.6653, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 2.237518548965454, |
| "learning_rate": 9.48865439835817e-06, |
| "loss": 1.6571, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 2.142627477645874, |
| "learning_rate": 9.465575080933959e-06, |
| "loss": 1.6247, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 2.0365779399871826, |
| "learning_rate": 9.442015711830246e-06, |
| "loss": 1.6323, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 2.427443742752075, |
| "learning_rate": 9.417978823691652e-06, |
| "loss": 1.6646, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 2.014340877532959, |
| "learning_rate": 9.393467000496345e-06, |
| "loss": 1.5754, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 2.6110949516296387, |
| "learning_rate": 9.368482877278264e-06, |
| "loss": 1.657, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 2.048231363296509, |
| "learning_rate": 9.34302913984385e-06, |
| "loss": 1.61, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 2.672330379486084, |
| "learning_rate": 9.317108524483319e-06, |
| "loss": 1.5894, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 2.6745121479034424, |
| "learning_rate": 9.29072381767651e-06, |
| "loss": 1.5623, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 2.1490442752838135, |
| "learning_rate": 9.26387785579334e-06, |
| "loss": 1.6668, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 3.805147171020508, |
| "learning_rate": 9.236573524788888e-06, |
| "loss": 1.6274, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 2.243023157119751, |
| "learning_rate": 9.208813759893158e-06, |
| "loss": 1.6496, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 2.5260114669799805, |
| "learning_rate": 9.180601545295535e-06, |
| "loss": 1.623, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 2.0329606533050537, |
| "learning_rate": 9.151939913823988e-06, |
| "loss": 1.5873, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 3.3269598484039307, |
| "learning_rate": 9.122831946619038e-06, |
| "loss": 1.6327, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 2.1953978538513184, |
| "learning_rate": 9.093280772802527e-06, |
| "loss": 1.6321, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 3.2001328468322754, |
| "learning_rate": 9.063289569141251e-06, |
| "loss": 1.6338, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 2.076502799987793, |
| "learning_rate": 9.032861559705442e-06, |
| "loss": 1.6648, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 2.0800352096557617, |
| "learning_rate": 9.002000015522182e-06, |
| "loss": 1.5694, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 2.0901432037353516, |
| "learning_rate": 8.970708254223768e-06, |
| "loss": 1.6571, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 3.1389923095703125, |
| "learning_rate": 8.938989639691068e-06, |
| "loss": 1.6151, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 2.0514352321624756, |
| "learning_rate": 8.90684758169189e-06, |
| "loss": 1.618, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.9462100267410278, |
| "learning_rate": 8.87428553551445e-06, |
| "loss": 1.6367, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 2.2343838214874268, |
| "learning_rate": 8.841307001595904e-06, |
| "loss": 1.6321, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 2.6064207553863525, |
| "learning_rate": 8.807915525146065e-06, |
| "loss": 1.5913, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 2.589186191558838, |
| "learning_rate": 8.774114695766286e-06, |
| "loss": 1.6, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 2.0082924365997314, |
| "learning_rate": 8.739908147063576e-06, |
| "loss": 1.6266, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.9739817380905151, |
| "learning_rate": 8.705299556259986e-06, |
| "loss": 1.5926, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 2.9566433429718018, |
| "learning_rate": 8.670292643797302e-06, |
| "loss": 1.624, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 2.10595703125, |
| "learning_rate": 8.634891172937102e-06, |
| "loss": 1.6437, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.7080891132354736, |
| "learning_rate": 8.599098949356201e-06, |
| "loss": 1.6131, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 2.358856678009033, |
| "learning_rate": 8.562919820737537e-06, |
| "loss": 1.5748, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 2.030383825302124, |
| "learning_rate": 8.526357676356538e-06, |
| "loss": 1.6389, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 2.3567280769348145, |
| "learning_rate": 8.489416446663037e-06, |
| "loss": 1.6288, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 2.1183040142059326, |
| "learning_rate": 8.452100102858734e-06, |
| "loss": 1.5929, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 2.467944860458374, |
| "learning_rate": 8.414412656470297e-06, |
| "loss": 1.5579, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 2.200274705886841, |
| "learning_rate": 8.376358158918114e-06, |
| "loss": 1.587, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 2.3279902935028076, |
| "learning_rate": 8.33794070108077e-06, |
| "loss": 1.6496, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 2.7403910160064697, |
| "learning_rate": 8.299164412855268e-06, |
| "loss": 1.5665, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 2.5097343921661377, |
| "learning_rate": 8.260033462713073e-06, |
| "loss": 1.6245, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 2.2657604217529297, |
| "learning_rate": 8.22055205725199e-06, |
| "loss": 1.5785, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 2.4038476943969727, |
| "learning_rate": 8.180724440743957e-06, |
| "loss": 1.6912, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 2.811049461364746, |
| "learning_rate": 8.14055489467878e-06, |
| "loss": 1.6059, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 2.474153518676758, |
| "learning_rate": 8.100047737303877e-06, |
| "loss": 1.6423, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 2.351736307144165, |
| "learning_rate": 8.059207323160057e-06, |
| "loss": 1.6199, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 2.3526997566223145, |
| "learning_rate": 8.018038042613407e-06, |
| "loss": 1.5856, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 2.075406789779663, |
| "learning_rate": 7.97654432138333e-06, |
| "loss": 1.5901, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 2.6973438262939453, |
| "learning_rate": 7.93473062006677e-06, |
| "loss": 1.5696, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 2.6449167728424072, |
| "learning_rate": 7.892601433658705e-06, |
| "loss": 1.5939, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 2.360849380493164, |
| "learning_rate": 7.850161291068915e-06, |
| "loss": 1.5449, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 2.467226028442383, |
| "learning_rate": 7.807414754635145e-06, |
| "loss": 1.5926, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 2.449989080429077, |
| "learning_rate": 7.764366419632636e-06, |
| "loss": 1.5591, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 5.518803119659424, |
| "learning_rate": 7.721020913780137e-06, |
| "loss": 1.5406, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 2.6841485500335693, |
| "learning_rate": 7.677382896742417e-06, |
| "loss": 1.5797, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 2.475085496902466, |
| "learning_rate": 7.63345705962935e-06, |
| "loss": 1.5653, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 2.3905832767486572, |
| "learning_rate": 7.589248124491627e-06, |
| "loss": 1.5641, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 2.7013638019561768, |
| "learning_rate": 7.544760843813122e-06, |
| "loss": 1.5837, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 2.500148296356201, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 1.6255, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 2.106887102127075, |
| "learning_rate": 7.454970404866612e-06, |
| "loss": 1.6069, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 2.1840200424194336, |
| "learning_rate": 7.409676899118213e-06, |
| "loss": 1.5959, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 5.758022308349609, |
| "learning_rate": 7.3641243518305915e-06, |
| "loss": 1.616, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 2.24625301361084, |
| "learning_rate": 7.318317659926637e-06, |
| "loss": 1.639, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 3.8651440143585205, |
| "learning_rate": 7.272261747649922e-06, |
| "loss": 1.6287, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.9262839555740356, |
| "learning_rate": 7.225961566035335e-06, |
| "loss": 1.5851, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 2.4312078952789307, |
| "learning_rate": 7.179422092376856e-06, |
| "loss": 1.5934, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 2.391693592071533, |
| "learning_rate": 7.132648329692478e-06, |
| "loss": 1.5719, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 2.10438871383667, |
| "learning_rate": 7.085645306186391e-06, |
| "loss": 1.5876, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.984710931777954, |
| "learning_rate": 7.038418074708444e-06, |
| "loss": 1.6506, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 2.4563817977905273, |
| "learning_rate": 6.990971712210966e-06, |
| "loss": 1.5578, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.856941819190979, |
| "learning_rate": 6.943311319202976e-06, |
| "loss": 1.5806, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 2.228983163833618, |
| "learning_rate": 6.895442019201898e-06, |
| "loss": 1.5639, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 2.9688351154327393, |
| "learning_rate": 6.8473689581827585e-06, |
| "loss": 1.5939, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 2.028876304626465, |
| "learning_rate": 6.7990973040250055e-06, |
| "loss": 1.6096, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 2.385794162750244, |
| "learning_rate": 6.750632245956954e-06, |
| "loss": 1.5617, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 2.0037996768951416, |
| "learning_rate": 6.701978993997942e-06, |
| "loss": 1.6028, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 2.0149664878845215, |
| "learning_rate": 6.653142778398247e-06, |
| "loss": 1.583, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 2.466585874557495, |
| "learning_rate": 6.6041288490768385e-06, |
| "loss": 1.6368, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 2.181319236755371, |
| "learning_rate": 6.554942475057003e-06, |
| "loss": 1.5819, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.9432387351989746, |
| "learning_rate": 6.505588943899923e-06, |
| "loss": 1.5551, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 2.277068614959717, |
| "learning_rate": 6.456073561136261e-06, |
| "loss": 1.5788, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.9018107652664185, |
| "learning_rate": 6.406401649695814e-06, |
| "loss": 1.5639, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 2.755958318710327, |
| "learning_rate": 6.356578549335295e-06, |
| "loss": 1.6015, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 2.2761142253875732, |
| "learning_rate": 6.306609616064304e-06, |
| "loss": 1.6054, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 3.212486505508423, |
| "learning_rate": 6.256500221569556e-06, |
| "loss": 1.5953, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 2.2175703048706055, |
| "learning_rate": 6.2062557526374226e-06, |
| "loss": 1.5745, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.9958215951919556, |
| "learning_rate": 6.15588161057485e-06, |
| "loss": 1.5981, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 2.4180896282196045, |
| "learning_rate": 6.10538321062871e-06, |
| "loss": 1.5836, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 2.0187716484069824, |
| "learning_rate": 6.0547659814036664e-06, |
| "loss": 1.6062, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 2.3086986541748047, |
| "learning_rate": 6.004035364278593e-06, |
| "loss": 1.5764, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.9949414730072021, |
| "learning_rate": 5.953196812821622e-06, |
| "loss": 1.6037, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.8632932901382446, |
| "learning_rate": 5.902255792203882e-06, |
| "loss": 1.6051, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 2.034745931625366, |
| "learning_rate": 5.851217778611994e-06, |
| "loss": 1.6386, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 2.1993603706359863, |
| "learning_rate": 5.800088258659371e-06, |
| "loss": 1.6081, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 2.0585718154907227, |
| "learning_rate": 5.748872728796409e-06, |
| "loss": 1.5955, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 2.2753782272338867, |
| "learning_rate": 5.697576694719616e-06, |
| "loss": 1.5655, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 3.237483024597168, |
| "learning_rate": 5.646205670779745e-06, |
| "loss": 1.6004, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 3.019814968109131, |
| "learning_rate": 5.594765179389003e-06, |
| "loss": 1.5721, |
| "step": 150 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 313, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 50, |
| "total_flos": 164924379693056.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|