| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 764, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002617801047120419, |
| "grad_norm": 12.14416528139153, |
| "learning_rate": 5e-08, |
| "loss": 1.2113, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.005235602094240838, |
| "grad_norm": 13.17693450180874, |
| "learning_rate": 1e-07, |
| "loss": 1.3287, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.007853403141361256, |
| "grad_norm": 12.985464074157603, |
| "learning_rate": 1.5e-07, |
| "loss": 1.2799, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.010471204188481676, |
| "grad_norm": 12.303158064529542, |
| "learning_rate": 2e-07, |
| "loss": 1.2283, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.013089005235602094, |
| "grad_norm": 11.991631362158266, |
| "learning_rate": 2.5e-07, |
| "loss": 1.2474, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.015706806282722512, |
| "grad_norm": 12.177257617308824, |
| "learning_rate": 3e-07, |
| "loss": 1.2422, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.01832460732984293, |
| "grad_norm": 11.848705719604377, |
| "learning_rate": 3.5e-07, |
| "loss": 1.2089, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.020942408376963352, |
| "grad_norm": 11.380785229390055, |
| "learning_rate": 4e-07, |
| "loss": 1.2073, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.02356020942408377, |
| "grad_norm": 12.151969368253802, |
| "learning_rate": 4.5e-07, |
| "loss": 1.2446, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.02617801047120419, |
| "grad_norm": 10.914812549644966, |
| "learning_rate": 5e-07, |
| "loss": 1.166, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.028795811518324606, |
| "grad_norm": 10.553169040252493, |
| "learning_rate": 5.5e-07, |
| "loss": 1.1974, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.031413612565445025, |
| "grad_norm": 9.928122697621971, |
| "learning_rate": 6e-07, |
| "loss": 1.1835, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.034031413612565446, |
| "grad_norm": 10.308940915743477, |
| "learning_rate": 6.5e-07, |
| "loss": 1.2328, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.03664921465968586, |
| "grad_norm": 8.5614124802958, |
| "learning_rate": 7e-07, |
| "loss": 1.1695, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.03926701570680628, |
| "grad_norm": 5.740975243348377, |
| "learning_rate": 7.5e-07, |
| "loss": 1.0755, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.041884816753926704, |
| "grad_norm": 5.615178091434232, |
| "learning_rate": 8e-07, |
| "loss": 1.1424, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.04450261780104712, |
| "grad_norm": 4.87125675495863, |
| "learning_rate": 8.499999999999999e-07, |
| "loss": 1.1168, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.04712041884816754, |
| "grad_norm": 4.189940295803935, |
| "learning_rate": 9e-07, |
| "loss": 1.0721, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.049738219895287955, |
| "grad_norm": 3.1156768984206398, |
| "learning_rate": 9.499999999999999e-07, |
| "loss": 1.1014, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.05235602094240838, |
| "grad_norm": 3.978211048515667, |
| "learning_rate": 1e-06, |
| "loss": 1.1225, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0549738219895288, |
| "grad_norm": 4.391126954807224, |
| "learning_rate": 1.05e-06, |
| "loss": 1.1086, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.05759162303664921, |
| "grad_norm": 4.352706918386151, |
| "learning_rate": 1.1e-06, |
| "loss": 1.0908, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.060209424083769635, |
| "grad_norm": 4.325997750838297, |
| "learning_rate": 1.1499999999999998e-06, |
| "loss": 1.1033, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.06282722513089005, |
| "grad_norm": 4.231113168471665, |
| "learning_rate": 1.2e-06, |
| "loss": 1.0248, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.06544502617801047, |
| "grad_norm": 3.5599588675120417, |
| "learning_rate": 1.2499999999999999e-06, |
| "loss": 1.0409, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.06806282722513089, |
| "grad_norm": 3.2504844113357567, |
| "learning_rate": 1.3e-06, |
| "loss": 1.1003, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.07068062827225131, |
| "grad_norm": 2.4901674050656353, |
| "learning_rate": 1.35e-06, |
| "loss": 1.0911, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.07329842931937172, |
| "grad_norm": 2.265540549670321, |
| "learning_rate": 1.4e-06, |
| "loss": 0.9979, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.07591623036649214, |
| "grad_norm": 2.272463051491201, |
| "learning_rate": 1.4499999999999999e-06, |
| "loss": 1.0648, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.07853403141361257, |
| "grad_norm": 2.088378247588902, |
| "learning_rate": 1.5e-06, |
| "loss": 1.0645, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.08115183246073299, |
| "grad_norm": 1.8960968819771264, |
| "learning_rate": 1.55e-06, |
| "loss": 1.0602, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.08376963350785341, |
| "grad_norm": 1.876678889645711, |
| "learning_rate": 1.6e-06, |
| "loss": 0.9687, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.08638743455497382, |
| "grad_norm": 1.7888216612828005, |
| "learning_rate": 1.6499999999999999e-06, |
| "loss": 1.0007, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.08900523560209424, |
| "grad_norm": 1.6895788633860827, |
| "learning_rate": 1.6999999999999998e-06, |
| "loss": 0.9958, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.09162303664921466, |
| "grad_norm": 1.931578989654563, |
| "learning_rate": 1.75e-06, |
| "loss": 1.0477, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.09424083769633508, |
| "grad_norm": 1.6276158377243375, |
| "learning_rate": 1.8e-06, |
| "loss": 1.0214, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.0968586387434555, |
| "grad_norm": 1.7716540010644546, |
| "learning_rate": 1.85e-06, |
| "loss": 1.0214, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.09947643979057591, |
| "grad_norm": 3.0105407265939674, |
| "learning_rate": 1.8999999999999998e-06, |
| "loss": 1.0118, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.10209424083769633, |
| "grad_norm": 2.4861479827761572, |
| "learning_rate": 1.95e-06, |
| "loss": 0.9727, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.10471204188481675, |
| "grad_norm": 1.910338672583786, |
| "learning_rate": 2e-06, |
| "loss": 1.0347, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.10732984293193717, |
| "grad_norm": 1.6226405420762602, |
| "learning_rate": 1.9999905856154088e-06, |
| "loss": 1.0291, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.1099476439790576, |
| "grad_norm": 1.7503126460390113, |
| "learning_rate": 1.999962342638896e-06, |
| "loss": 0.9635, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.112565445026178, |
| "grad_norm": 2.727417218966873, |
| "learning_rate": 1.9999152716022427e-06, |
| "loss": 1.0507, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.11518324607329843, |
| "grad_norm": 2.708816278120753, |
| "learning_rate": 1.9998493733917385e-06, |
| "loss": 0.9853, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.11780104712041885, |
| "grad_norm": 2.9505093043707435, |
| "learning_rate": 1.999764649248165e-06, |
| "loss": 0.9889, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.12041884816753927, |
| "grad_norm": 1.498614559284899, |
| "learning_rate": 1.999661100766774e-06, |
| "loss": 1.017, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.12303664921465969, |
| "grad_norm": 2.1604669089253763, |
| "learning_rate": 1.999538729897256e-06, |
| "loss": 1.0121, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.1256544502617801, |
| "grad_norm": 1.52889515935512, |
| "learning_rate": 1.9993975389437036e-06, |
| "loss": 0.9868, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.12827225130890052, |
| "grad_norm": 1.4296095766217882, |
| "learning_rate": 1.999237530564569e-06, |
| "loss": 0.9565, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.13089005235602094, |
| "grad_norm": 1.8054769713376762, |
| "learning_rate": 1.9990587077726125e-06, |
| "loss": 1.0322, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.13350785340314136, |
| "grad_norm": 1.5605101749195394, |
| "learning_rate": 1.998861073934848e-06, |
| "loss": 0.9747, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.13612565445026178, |
| "grad_norm": 1.5246491305874612, |
| "learning_rate": 1.998644632772477e-06, |
| "loss": 0.966, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.1387434554973822, |
| "grad_norm": 1.4368471680359753, |
| "learning_rate": 1.99840938836082e-06, |
| "loss": 0.9695, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.14136125654450263, |
| "grad_norm": 1.8063786781607425, |
| "learning_rate": 1.9981553451292393e-06, |
| "loss": 0.9998, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.14397905759162305, |
| "grad_norm": 1.6681675381916519, |
| "learning_rate": 1.9978825078610574e-06, |
| "loss": 0.9312, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.14659685863874344, |
| "grad_norm": 1.563085947347343, |
| "learning_rate": 1.9975908816934638e-06, |
| "loss": 0.9968, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.14921465968586387, |
| "grad_norm": 1.3952505008026292, |
| "learning_rate": 1.9972804721174198e-06, |
| "loss": 0.9663, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.1518324607329843, |
| "grad_norm": 1.3520603930155224, |
| "learning_rate": 1.996951284977556e-06, |
| "loss": 0.8925, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.1544502617801047, |
| "grad_norm": 1.337444199143247, |
| "learning_rate": 1.9966033264720613e-06, |
| "loss": 0.9193, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.15706806282722513, |
| "grad_norm": 1.4128992812159349, |
| "learning_rate": 1.9962366031525663e-06, |
| "loss": 0.9911, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.15968586387434555, |
| "grad_norm": 2.165095124414085, |
| "learning_rate": 1.9958511219240188e-06, |
| "loss": 0.9031, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.16230366492146597, |
| "grad_norm": 1.4678385494619572, |
| "learning_rate": 1.9954468900445565e-06, |
| "loss": 0.9982, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.1649214659685864, |
| "grad_norm": 1.4579212205955776, |
| "learning_rate": 1.995023915125368e-06, |
| "loss": 1.0127, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.16753926701570682, |
| "grad_norm": 1.44368135176114, |
| "learning_rate": 1.9945822051305507e-06, |
| "loss": 0.9337, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.17015706806282724, |
| "grad_norm": 1.4345254855690996, |
| "learning_rate": 1.9941217683769596e-06, |
| "loss": 0.9975, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.17277486910994763, |
| "grad_norm": 1.3493816428278362, |
| "learning_rate": 1.9936426135340527e-06, |
| "loss": 0.9735, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.17539267015706805, |
| "grad_norm": 1.3912607998996758, |
| "learning_rate": 1.9931447496237255e-06, |
| "loss": 0.9915, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.17801047120418848, |
| "grad_norm": 1.2714356937152822, |
| "learning_rate": 1.9926281860201426e-06, |
| "loss": 0.97, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.1806282722513089, |
| "grad_norm": 5.841882115164898, |
| "learning_rate": 1.992092932449561e-06, |
| "loss": 1.0218, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.18324607329842932, |
| "grad_norm": 1.4744811175034291, |
| "learning_rate": 1.9915389989901473e-06, |
| "loss": 0.9489, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.18586387434554974, |
| "grad_norm": 1.5174779872284268, |
| "learning_rate": 1.9909663960717854e-06, |
| "loss": 0.9675, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.18848167539267016, |
| "grad_norm": 1.5861494594737444, |
| "learning_rate": 1.9903751344758845e-06, |
| "loss": 0.9844, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.19109947643979058, |
| "grad_norm": 1.4751001773688759, |
| "learning_rate": 1.9897652253351726e-06, |
| "loss": 0.9614, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.193717277486911, |
| "grad_norm": 1.6468725537877267, |
| "learning_rate": 1.9891366801334875e-06, |
| "loss": 0.9667, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.19633507853403143, |
| "grad_norm": 1.5660663682362246, |
| "learning_rate": 1.9884895107055627e-06, |
| "loss": 0.9236, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.19895287958115182, |
| "grad_norm": 1.3436323814828306, |
| "learning_rate": 1.987823729236801e-06, |
| "loss": 0.9386, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.20157068062827224, |
| "grad_norm": 1.3841264848928616, |
| "learning_rate": 1.9871393482630486e-06, |
| "loss": 0.9601, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.20418848167539266, |
| "grad_norm": 1.320210753509364, |
| "learning_rate": 1.9864363806703567e-06, |
| "loss": 0.9653, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.20680628272251309, |
| "grad_norm": 1.4667262276867, |
| "learning_rate": 1.9857148396947403e-06, |
| "loss": 0.9674, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.2094240837696335, |
| "grad_norm": 1.3925922106886959, |
| "learning_rate": 1.984974738921927e-06, |
| "loss": 0.9608, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.21204188481675393, |
| "grad_norm": 1.3988665357319279, |
| "learning_rate": 1.9842160922871043e-06, |
| "loss": 0.946, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.21465968586387435, |
| "grad_norm": 1.7341184731544157, |
| "learning_rate": 1.9834389140746535e-06, |
| "loss": 0.9687, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.21727748691099477, |
| "grad_norm": 1.5654992728704895, |
| "learning_rate": 1.982643218917885e-06, |
| "loss": 0.9226, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.2198952879581152, |
| "grad_norm": 1.4005888078938449, |
| "learning_rate": 1.9818290217987584e-06, |
| "loss": 0.9102, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.22251308900523561, |
| "grad_norm": 1.7730556296776727, |
| "learning_rate": 1.980996338047604e-06, |
| "loss": 0.9946, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.225130890052356, |
| "grad_norm": 1.286071508143601, |
| "learning_rate": 1.980145183342831e-06, |
| "loss": 0.927, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.22774869109947643, |
| "grad_norm": 1.3414728661794506, |
| "learning_rate": 1.9792755737106357e-06, |
| "loss": 0.9343, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.23036649214659685, |
| "grad_norm": 1.3347460205847725, |
| "learning_rate": 1.978387525524697e-06, |
| "loss": 0.9362, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.23298429319371727, |
| "grad_norm": 1.650760589379498, |
| "learning_rate": 1.9774810555058694e-06, |
| "loss": 0.9292, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.2356020942408377, |
| "grad_norm": 1.6997958511222782, |
| "learning_rate": 1.976556180721867e-06, |
| "loss": 0.9788, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.23821989528795812, |
| "grad_norm": 1.2701256411486546, |
| "learning_rate": 1.975612918586944e-06, |
| "loss": 0.9372, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.24083769633507854, |
| "grad_norm": 1.462964107782864, |
| "learning_rate": 1.9746512868615655e-06, |
| "loss": 0.9735, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.24345549738219896, |
| "grad_norm": 1.4946637034205932, |
| "learning_rate": 1.973671303652073e-06, |
| "loss": 0.9422, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.24607329842931938, |
| "grad_norm": 1.5055409318458846, |
| "learning_rate": 1.972672987410345e-06, |
| "loss": 0.9429, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.2486910994764398, |
| "grad_norm": 1.3658365972401865, |
| "learning_rate": 1.971656356933446e-06, |
| "loss": 0.9241, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.2513089005235602, |
| "grad_norm": 1.457543519448887, |
| "learning_rate": 1.970621431363278e-06, |
| "loss": 0.9263, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.25392670157068065, |
| "grad_norm": 1.5217829037825779, |
| "learning_rate": 1.9695682301862154e-06, |
| "loss": 0.8864, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.25654450261780104, |
| "grad_norm": 1.6462247115282178, |
| "learning_rate": 1.9684967732327396e-06, |
| "loss": 0.9661, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.2591623036649215, |
| "grad_norm": 1.4786966750910804, |
| "learning_rate": 1.9674070806770667e-06, |
| "loss": 0.9361, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.2617801047120419, |
| "grad_norm": 1.4474819723877972, |
| "learning_rate": 1.9662991730367663e-06, |
| "loss": 0.9309, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2643979057591623, |
| "grad_norm": 1.5606246155653447, |
| "learning_rate": 1.965173071172375e-06, |
| "loss": 1.0381, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.2670157068062827, |
| "grad_norm": 1.737033140318964, |
| "learning_rate": 1.9640287962870057e-06, |
| "loss": 0.9154, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.2696335078534031, |
| "grad_norm": 1.3953874097403483, |
| "learning_rate": 1.962866369925946e-06, |
| "loss": 0.9353, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.27225130890052357, |
| "grad_norm": 1.498311854630254, |
| "learning_rate": 1.9616858139762532e-06, |
| "loss": 0.9815, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.27486910994764396, |
| "grad_norm": 1.5614482029934333, |
| "learning_rate": 1.960487150666343e-06, |
| "loss": 0.9859, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.2774869109947644, |
| "grad_norm": 1.429931223232455, |
| "learning_rate": 1.95927040256557e-06, |
| "loss": 0.9758, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.2801047120418848, |
| "grad_norm": 1.379643597315614, |
| "learning_rate": 1.958035592583803e-06, |
| "loss": 0.9271, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.28272251308900526, |
| "grad_norm": 1.3427537371416565, |
| "learning_rate": 1.956782743970995e-06, |
| "loss": 0.9344, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.28534031413612565, |
| "grad_norm": 1.465051648855202, |
| "learning_rate": 1.955511880316743e-06, |
| "loss": 0.9275, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.2879581151832461, |
| "grad_norm": 2.003389869253871, |
| "learning_rate": 1.9542230255498453e-06, |
| "loss": 0.9097, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.2905759162303665, |
| "grad_norm": 1.6295520912396342, |
| "learning_rate": 1.9529162039378505e-06, |
| "loss": 0.9782, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.2931937172774869, |
| "grad_norm": 1.4907288362098328, |
| "learning_rate": 1.951591440086602e-06, |
| "loss": 0.9409, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.29581151832460734, |
| "grad_norm": 1.2903657225842853, |
| "learning_rate": 1.9502487589397717e-06, |
| "loss": 0.959, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.29842931937172773, |
| "grad_norm": 1.315579510161896, |
| "learning_rate": 1.948888185778393e-06, |
| "loss": 0.9236, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.3010471204188482, |
| "grad_norm": 1.4325555890728199, |
| "learning_rate": 1.947509746220385e-06, |
| "loss": 0.9219, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.3036649214659686, |
| "grad_norm": 2.1417887202189148, |
| "learning_rate": 1.9461134662200666e-06, |
| "loss": 0.9291, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.306282722513089, |
| "grad_norm": 1.7577458450102519, |
| "learning_rate": 1.9446993720676725e-06, |
| "loss": 0.9287, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.3089005235602094, |
| "grad_norm": 1.605550965078164, |
| "learning_rate": 1.9432674903888547e-06, |
| "loss": 0.9201, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.31151832460732987, |
| "grad_norm": 1.6692783458171698, |
| "learning_rate": 1.941817848144183e-06, |
| "loss": 0.9664, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.31413612565445026, |
| "grad_norm": 1.3222419715525664, |
| "learning_rate": 1.9403504726286365e-06, |
| "loss": 0.9384, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.31675392670157065, |
| "grad_norm": 1.3649203244771078, |
| "learning_rate": 1.93886539147109e-06, |
| "loss": 0.9222, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.3193717277486911, |
| "grad_norm": 1.4740460309140695, |
| "learning_rate": 1.9373626326337944e-06, |
| "loss": 0.934, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.3219895287958115, |
| "grad_norm": 1.6873725448477543, |
| "learning_rate": 1.9358422244118486e-06, |
| "loss": 0.9412, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.32460732984293195, |
| "grad_norm": 1.443964954764275, |
| "learning_rate": 1.9343041954326677e-06, |
| "loss": 0.9392, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.32722513089005234, |
| "grad_norm": 1.4796628325478134, |
| "learning_rate": 1.932748574655445e-06, |
| "loss": 0.9429, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.3298429319371728, |
| "grad_norm": 1.5284156236831998, |
| "learning_rate": 1.931175391370605e-06, |
| "loss": 0.9232, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.3324607329842932, |
| "grad_norm": 1.5078471698245262, |
| "learning_rate": 1.929584675199252e-06, |
| "loss": 0.9067, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.33507853403141363, |
| "grad_norm": 1.502472082150914, |
| "learning_rate": 1.927976456092614e-06, |
| "loss": 0.8879, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.337696335078534, |
| "grad_norm": 1.4759381537342793, |
| "learning_rate": 1.9263507643314775e-06, |
| "loss": 0.9353, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.3403141361256545, |
| "grad_norm": 1.4842466813820954, |
| "learning_rate": 1.9247076305256173e-06, |
| "loss": 0.9893, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.34293193717277487, |
| "grad_norm": 1.6848494648004948, |
| "learning_rate": 1.923047085613221e-06, |
| "loss": 0.9626, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.34554973821989526, |
| "grad_norm": 1.4263062643952467, |
| "learning_rate": 1.9213691608603046e-06, |
| "loss": 0.9663, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.3481675392670157, |
| "grad_norm": 1.4254166058185196, |
| "learning_rate": 1.9196738878601262e-06, |
| "loss": 0.9407, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.3507853403141361, |
| "grad_norm": 1.3701341944366068, |
| "learning_rate": 1.9179612985325907e-06, |
| "loss": 0.9171, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.35340314136125656, |
| "grad_norm": 1.4560420480782974, |
| "learning_rate": 1.9162314251236464e-06, |
| "loss": 0.9306, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.35602094240837695, |
| "grad_norm": 1.8728921194195394, |
| "learning_rate": 1.9144843002046803e-06, |
| "loss": 0.9863, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.3586387434554974, |
| "grad_norm": 2.193090846077988, |
| "learning_rate": 1.912719956671905e-06, |
| "loss": 0.9179, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.3612565445026178, |
| "grad_norm": 1.430834588607403, |
| "learning_rate": 1.9109384277457366e-06, |
| "loss": 0.9276, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.36387434554973824, |
| "grad_norm": 1.4448020174914782, |
| "learning_rate": 1.9091397469701734e-06, |
| "loss": 0.9627, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.36649214659685864, |
| "grad_norm": 1.4436255308032022, |
| "learning_rate": 1.9073239482121597e-06, |
| "loss": 0.9378, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.36910994764397903, |
| "grad_norm": 1.4798088827513647, |
| "learning_rate": 1.905491065660951e-06, |
| "loss": 0.9173, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.3717277486910995, |
| "grad_norm": 1.4906417431608974, |
| "learning_rate": 1.9036411338274702e-06, |
| "loss": 0.919, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.3743455497382199, |
| "grad_norm": 1.3081791814577268, |
| "learning_rate": 1.9017741875436569e-06, |
| "loss": 0.9344, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.3769633507853403, |
| "grad_norm": 1.580109669017806, |
| "learning_rate": 1.8998902619618114e-06, |
| "loss": 0.8929, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.3795811518324607, |
| "grad_norm": 1.3635250226966933, |
| "learning_rate": 1.8979893925539336e-06, |
| "loss": 0.9081, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.38219895287958117, |
| "grad_norm": 1.6121323606475548, |
| "learning_rate": 1.8960716151110553e-06, |
| "loss": 0.9498, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.38481675392670156, |
| "grad_norm": 1.5509213848165346, |
| "learning_rate": 1.894136965742565e-06, |
| "loss": 0.9346, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.387434554973822, |
| "grad_norm": 1.2608338411937827, |
| "learning_rate": 1.8921854808755292e-06, |
| "loss": 0.9172, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.3900523560209424, |
| "grad_norm": 1.6360405183310027, |
| "learning_rate": 1.8902171972540058e-06, |
| "loss": 0.9451, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.39267015706806285, |
| "grad_norm": 1.5445742243019402, |
| "learning_rate": 1.8882321519383533e-06, |
| "loss": 0.9129, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.39528795811518325, |
| "grad_norm": 1.4940492827900818, |
| "learning_rate": 1.886230382304531e-06, |
| "loss": 0.9094, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.39790575916230364, |
| "grad_norm": 1.3563938923005274, |
| "learning_rate": 1.884211926043398e-06, |
| "loss": 0.9043, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.4005235602094241, |
| "grad_norm": 1.6211122765460089, |
| "learning_rate": 1.882176821160001e-06, |
| "loss": 0.9305, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.4031413612565445, |
| "grad_norm": 1.681549030140733, |
| "learning_rate": 1.8801251059728602e-06, |
| "loss": 0.9355, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.40575916230366493, |
| "grad_norm": 1.3532611132842844, |
| "learning_rate": 1.878056819113247e-06, |
| "loss": 0.9037, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.4083769633507853, |
| "grad_norm": 1.2366899485640606, |
| "learning_rate": 1.875971999524458e-06, |
| "loss": 0.9226, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.4109947643979058, |
| "grad_norm": 1.509182442933609, |
| "learning_rate": 1.8738706864610791e-06, |
| "loss": 0.9369, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.41361256544502617, |
| "grad_norm": 1.5001269900745426, |
| "learning_rate": 1.8717529194882497e-06, |
| "loss": 0.9205, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.4162303664921466, |
| "grad_norm": 1.526382871129804, |
| "learning_rate": 1.8696187384809153e-06, |
| "loss": 0.9528, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.418848167539267, |
| "grad_norm": 1.6451704713068278, |
| "learning_rate": 1.8674681836230768e-06, |
| "loss": 0.902, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.4214659685863874, |
| "grad_norm": 1.6801687897446185, |
| "learning_rate": 1.8653012954070356e-06, |
| "loss": 0.9501, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.42408376963350786, |
| "grad_norm": 1.492660556590876, |
| "learning_rate": 1.8631181146326303e-06, |
| "loss": 0.8757, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.42670157068062825, |
| "grad_norm": 1.5445804684094795, |
| "learning_rate": 1.860918682406467e-06, |
| "loss": 0.965, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.4293193717277487, |
| "grad_norm": 1.3120909599925894, |
| "learning_rate": 1.8587030401411478e-06, |
| "loss": 0.905, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.4319371727748691, |
| "grad_norm": 1.4931155750107132, |
| "learning_rate": 1.8564712295544892e-06, |
| "loss": 0.9572, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.43455497382198954, |
| "grad_norm": 1.4799748467877742, |
| "learning_rate": 1.8542232926687382e-06, |
| "loss": 0.9403, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.43717277486910994, |
| "grad_norm": 1.398242361130049, |
| "learning_rate": 1.851959271809779e-06, |
| "loss": 0.9277, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.4397905759162304, |
| "grad_norm": 1.3162309419047837, |
| "learning_rate": 1.8496792096063379e-06, |
| "loss": 0.9432, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.4424083769633508, |
| "grad_norm": 1.3729148476187043, |
| "learning_rate": 1.8473831489891798e-06, |
| "loss": 0.9163, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.44502617801047123, |
| "grad_norm": 1.3752720452781697, |
| "learning_rate": 1.8450711331903005e-06, |
| "loss": 0.8945, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.4476439790575916, |
| "grad_norm": 3.097140298366517, |
| "learning_rate": 1.8427432057421113e-06, |
| "loss": 0.9809, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.450261780104712, |
| "grad_norm": 1.4316389209889804, |
| "learning_rate": 1.8403994104766212e-06, |
| "loss": 0.9334, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.45287958115183247, |
| "grad_norm": 1.3301576806951263, |
| "learning_rate": 1.83803979152461e-06, |
| "loss": 0.8997, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.45549738219895286, |
| "grad_norm": 1.574246099163456, |
| "learning_rate": 1.8356643933147985e-06, |
| "loss": 0.9339, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.4581151832460733, |
| "grad_norm": 1.5726445345032816, |
| "learning_rate": 1.8332732605730109e-06, |
| "loss": 0.9189, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.4607329842931937, |
| "grad_norm": 1.3989291143673765, |
| "learning_rate": 1.8308664383213342e-06, |
| "loss": 0.9581, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.46335078534031415, |
| "grad_norm": 1.2735890663091676, |
| "learning_rate": 1.8284439718772687e-06, |
| "loss": 0.8719, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.46596858638743455, |
| "grad_norm": 1.3611146629538726, |
| "learning_rate": 1.8260059068528762e-06, |
| "loss": 0.9096, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.468586387434555, |
| "grad_norm": 1.4059562216053316, |
| "learning_rate": 1.82355228915392e-06, |
| "loss": 0.9278, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.4712041884816754, |
| "grad_norm": 1.421480459895735, |
| "learning_rate": 1.8210831649790015e-06, |
| "loss": 0.9062, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.4738219895287958, |
| "grad_norm": 1.345537280701926, |
| "learning_rate": 1.8185985808186901e-06, |
| "loss": 0.9398, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.47643979057591623, |
| "grad_norm": 1.5028559265923433, |
| "learning_rate": 1.8160985834546474e-06, |
| "loss": 0.9499, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.4790575916230366, |
| "grad_norm": 1.4192629619284285, |
| "learning_rate": 1.813583219958746e-06, |
| "loss": 0.929, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.4816753926701571, |
| "grad_norm": 1.4195786299746405, |
| "learning_rate": 1.811052537692186e-06, |
| "loss": 0.9476, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.48429319371727747, |
| "grad_norm": 2.1261085586767563, |
| "learning_rate": 1.8085065843045986e-06, |
| "loss": 0.9337, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.4869109947643979, |
| "grad_norm": 1.2369103172872313, |
| "learning_rate": 1.8059454077331526e-06, |
| "loss": 0.9131, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.4895287958115183, |
| "grad_norm": 1.370665008442108, |
| "learning_rate": 1.8033690562016507e-06, |
| "loss": 0.9224, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.49214659685863876, |
| "grad_norm": 1.4363195193343707, |
| "learning_rate": 1.8007775782196212e-06, |
| "loss": 0.8641, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.49476439790575916, |
| "grad_norm": 1.379290489309924, |
| "learning_rate": 1.798171022581405e-06, |
| "loss": 0.8866, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.4973821989528796, |
| "grad_norm": 1.3327086874705742, |
| "learning_rate": 1.7955494383652364e-06, |
| "loss": 0.9627, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.3524230122373244, |
| "learning_rate": 1.7929128749323193e-06, |
| "loss": 0.9298, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.5026178010471204, |
| "grad_norm": 1.752108274154178, |
| "learning_rate": 1.7902613819258983e-06, |
| "loss": 0.9339, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.5052356020942408, |
| "grad_norm": 1.3300479424414557, |
| "learning_rate": 1.7875950092703232e-06, |
| "loss": 0.8478, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.5078534031413613, |
| "grad_norm": 1.306067293591098, |
| "learning_rate": 1.784913807170109e-06, |
| "loss": 0.9291, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.5104712041884817, |
| "grad_norm": 1.354434811620228, |
| "learning_rate": 1.7822178261089917e-06, |
| "loss": 0.9201, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.5130890052356021, |
| "grad_norm": 1.2911273639837269, |
| "learning_rate": 1.7795071168489759e-06, |
| "loss": 0.9113, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.5157068062827225, |
| "grad_norm": 1.5073633394920012, |
| "learning_rate": 1.776781730429381e-06, |
| "loss": 0.9479, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.518324607329843, |
| "grad_norm": 1.2653498786382138, |
| "learning_rate": 1.7740417181658787e-06, |
| "loss": 0.9511, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.5209424083769634, |
| "grad_norm": 1.5973497991327796, |
| "learning_rate": 1.771287131649527e-06, |
| "loss": 0.9362, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.5235602094240838, |
| "grad_norm": 1.379719661766157, |
| "learning_rate": 1.7685180227458e-06, |
| "loss": 0.8989, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5261780104712042, |
| "grad_norm": 1.4962583435319539, |
| "learning_rate": 1.7657344435936106e-06, |
| "loss": 0.8672, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.5287958115183246, |
| "grad_norm": 1.3271274763682006, |
| "learning_rate": 1.762936446604327e-06, |
| "loss": 0.8942, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.5314136125654451, |
| "grad_norm": 1.3534628217323486, |
| "learning_rate": 1.76012408446079e-06, |
| "loss": 0.9257, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.5340314136125655, |
| "grad_norm": 1.3609075473720407, |
| "learning_rate": 1.7572974101163163e-06, |
| "loss": 0.9313, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.5366492146596858, |
| "grad_norm": 8.364809892666464, |
| "learning_rate": 1.7544564767937046e-06, |
| "loss": 0.9791, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.5392670157068062, |
| "grad_norm": 1.6277057784648477, |
| "learning_rate": 1.7516013379842336e-06, |
| "loss": 0.9643, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.5418848167539267, |
| "grad_norm": 1.426014151333132, |
| "learning_rate": 1.7487320474466523e-06, |
| "loss": 0.8707, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.5445026178010471, |
| "grad_norm": 1.5101226358909996, |
| "learning_rate": 1.74584865920617e-06, |
| "loss": 0.9143, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.5471204188481675, |
| "grad_norm": 1.326604618366996, |
| "learning_rate": 1.742951227553438e-06, |
| "loss": 0.89, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.5497382198952879, |
| "grad_norm": 1.3388738076109712, |
| "learning_rate": 1.7400398070435292e-06, |
| "loss": 0.8602, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.5523560209424084, |
| "grad_norm": 1.4065266233569822, |
| "learning_rate": 1.7371144524949073e-06, |
| "loss": 0.9099, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.5549738219895288, |
| "grad_norm": 1.2785583398255533, |
| "learning_rate": 1.734175218988398e-06, |
| "loss": 0.8571, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.5575916230366492, |
| "grad_norm": 1.420241281508304, |
| "learning_rate": 1.7312221618661514e-06, |
| "loss": 0.9063, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.5602094240837696, |
| "grad_norm": 1.5127883406404337, |
| "learning_rate": 1.7282553367305975e-06, |
| "loss": 0.9437, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.56282722513089, |
| "grad_norm": 1.4529213853984837, |
| "learning_rate": 1.7252747994434022e-06, |
| "loss": 0.9517, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.5654450261780105, |
| "grad_norm": 1.3516735541386737, |
| "learning_rate": 1.7222806061244147e-06, |
| "loss": 0.9019, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.5680628272251309, |
| "grad_norm": 1.4258098232064567, |
| "learning_rate": 1.7192728131506092e-06, |
| "loss": 0.901, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.5706806282722513, |
| "grad_norm": 1.299725438647712, |
| "learning_rate": 1.7162514771550253e-06, |
| "loss": 0.9115, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.5732984293193717, |
| "grad_norm": 1.300283239081131, |
| "learning_rate": 1.7132166550257017e-06, |
| "loss": 0.9022, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.5759162303664922, |
| "grad_norm": 1.3020433685723158, |
| "learning_rate": 1.7101684039046037e-06, |
| "loss": 0.9243, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.5785340314136126, |
| "grad_norm": 1.3383755861416764, |
| "learning_rate": 1.7071067811865474e-06, |
| "loss": 0.914, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.581151832460733, |
| "grad_norm": 1.3617645878878841, |
| "learning_rate": 1.7040318445181207e-06, |
| "loss": 0.9448, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.5837696335078534, |
| "grad_norm": 1.317670208749211, |
| "learning_rate": 1.700943651796597e-06, |
| "loss": 0.9106, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.5863874345549738, |
| "grad_norm": 1.478587633969318, |
| "learning_rate": 1.697842261168843e-06, |
| "loss": 0.947, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.5890052356020943, |
| "grad_norm": 1.3390543612415757, |
| "learning_rate": 1.6947277310302282e-06, |
| "loss": 0.9077, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.5916230366492147, |
| "grad_norm": 1.3598980768634963, |
| "learning_rate": 1.6916001200235207e-06, |
| "loss": 0.8765, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.5942408376963351, |
| "grad_norm": 1.591747805206869, |
| "learning_rate": 1.6884594870377869e-06, |
| "loss": 0.9668, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.5968586387434555, |
| "grad_norm": 1.4026080824858307, |
| "learning_rate": 1.68530589120728e-06, |
| "loss": 0.9415, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.599476439790576, |
| "grad_norm": 1.3140522254135556, |
| "learning_rate": 1.682139391910328e-06, |
| "loss": 0.9124, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.6020942408376964, |
| "grad_norm": 1.3573630561193102, |
| "learning_rate": 1.6789600487682153e-06, |
| "loss": 0.8675, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.6047120418848168, |
| "grad_norm": 1.5531452619140906, |
| "learning_rate": 1.6757679216440605e-06, |
| "loss": 0.8829, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.6073298429319371, |
| "grad_norm": 1.386101738337072, |
| "learning_rate": 1.672563070641688e-06, |
| "loss": 0.9138, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.6099476439790575, |
| "grad_norm": 1.515437757259388, |
| "learning_rate": 1.6693455561044975e-06, |
| "loss": 0.9046, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.612565445026178, |
| "grad_norm": 1.4439550204519966, |
| "learning_rate": 1.666115438614328e-06, |
| "loss": 0.873, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.6151832460732984, |
| "grad_norm": 1.7085338183883585, |
| "learning_rate": 1.662872778990316e-06, |
| "loss": 0.9909, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.6178010471204188, |
| "grad_norm": 1.6347877993915798, |
| "learning_rate": 1.6596176382877504e-06, |
| "loss": 0.9517, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.6204188481675392, |
| "grad_norm": 1.380918811824384, |
| "learning_rate": 1.6563500777969252e-06, |
| "loss": 0.8986, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.6230366492146597, |
| "grad_norm": 1.5191318499232158, |
| "learning_rate": 1.6530701590419823e-06, |
| "loss": 0.8985, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.6256544502617801, |
| "grad_norm": 1.5356278576800755, |
| "learning_rate": 1.6497779437797546e-06, |
| "loss": 0.9535, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.6282722513089005, |
| "grad_norm": 1.7591796956296735, |
| "learning_rate": 1.6464734939986035e-06, |
| "loss": 0.9594, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.6308900523560209, |
| "grad_norm": 1.3110448901705802, |
| "learning_rate": 1.6431568719172513e-06, |
| "loss": 0.8951, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.6335078534031413, |
| "grad_norm": 1.4810842676756832, |
| "learning_rate": 1.6398281399836097e-06, |
| "loss": 0.9074, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.6361256544502618, |
| "grad_norm": 1.4043554800654259, |
| "learning_rate": 1.6364873608736035e-06, |
| "loss": 0.8844, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.6387434554973822, |
| "grad_norm": 1.3361096629243905, |
| "learning_rate": 1.6331345974899922e-06, |
| "loss": 0.9108, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.6413612565445026, |
| "grad_norm": 1.4539365545858338, |
| "learning_rate": 1.629769912961183e-06, |
| "loss": 0.9266, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.643979057591623, |
| "grad_norm": 1.3882801062958485, |
| "learning_rate": 1.626393370640045e-06, |
| "loss": 0.8367, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.6465968586387435, |
| "grad_norm": 1.357920569980624, |
| "learning_rate": 1.6230050341027133e-06, |
| "loss": 0.9019, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.6492146596858639, |
| "grad_norm": 1.376988259591919, |
| "learning_rate": 1.6196049671473952e-06, |
| "loss": 0.9237, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.6518324607329843, |
| "grad_norm": 1.4667994230662116, |
| "learning_rate": 1.616193233793166e-06, |
| "loss": 0.9049, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.6544502617801047, |
| "grad_norm": 1.4710588704789733, |
| "learning_rate": 1.612769898278766e-06, |
| "loss": 0.9257, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.6570680628272252, |
| "grad_norm": 1.4734634699731224, |
| "learning_rate": 1.6093350250613892e-06, |
| "loss": 0.9427, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.6596858638743456, |
| "grad_norm": 1.6689138343051504, |
| "learning_rate": 1.605888678815471e-06, |
| "loss": 0.9178, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.662303664921466, |
| "grad_norm": 1.60608501987548, |
| "learning_rate": 1.602430924431469e-06, |
| "loss": 0.9243, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.6649214659685864, |
| "grad_norm": 1.4533072028855505, |
| "learning_rate": 1.5989618270146422e-06, |
| "loss": 0.9299, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.6675392670157068, |
| "grad_norm": 1.3611068269937114, |
| "learning_rate": 1.5954814518838253e-06, |
| "loss": 0.8827, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.6701570680628273, |
| "grad_norm": 2.189032798629252, |
| "learning_rate": 1.5919898645701987e-06, |
| "loss": 0.8654, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.6727748691099477, |
| "grad_norm": 1.6469272333873342, |
| "learning_rate": 1.5884871308160536e-06, |
| "loss": 0.8814, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.675392670157068, |
| "grad_norm": 1.615551535176859, |
| "learning_rate": 1.5849733165735555e-06, |
| "loss": 0.8662, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.6780104712041884, |
| "grad_norm": 1.6004613743827494, |
| "learning_rate": 1.5814484880035016e-06, |
| "loss": 0.9108, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.680628272251309, |
| "grad_norm": 1.3629024118660333, |
| "learning_rate": 1.5779127114740755e-06, |
| "loss": 0.8778, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.6832460732984293, |
| "grad_norm": 1.3470589436390712, |
| "learning_rate": 1.5743660535595975e-06, |
| "loss": 0.8397, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.6858638743455497, |
| "grad_norm": 1.717889341576825, |
| "learning_rate": 1.5708085810392705e-06, |
| "loss": 0.8965, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.6884816753926701, |
| "grad_norm": 1.4542924841797191, |
| "learning_rate": 1.567240360895924e-06, |
| "loss": 0.9216, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.6910994764397905, |
| "grad_norm": 1.4614348413742864, |
| "learning_rate": 1.563661460314751e-06, |
| "loss": 0.8916, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.693717277486911, |
| "grad_norm": 1.3658211303340664, |
| "learning_rate": 1.5600719466820447e-06, |
| "loss": 0.8966, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.6963350785340314, |
| "grad_norm": 1.4487091797072118, |
| "learning_rate": 1.5564718875839287e-06, |
| "loss": 0.9164, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.6989528795811518, |
| "grad_norm": 1.4145507388498686, |
| "learning_rate": 1.5528613508050847e-06, |
| "loss": 0.9457, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.7015706806282722, |
| "grad_norm": 1.5517624112660597, |
| "learning_rate": 1.5492404043274767e-06, |
| "loss": 0.9648, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.7041884816753927, |
| "grad_norm": 1.301936781288767, |
| "learning_rate": 1.5456091163290697e-06, |
| "loss": 0.9122, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.7068062827225131, |
| "grad_norm": 1.4477955601190502, |
| "learning_rate": 1.5419675551825472e-06, |
| "loss": 0.908, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.7094240837696335, |
| "grad_norm": 1.4661273565672055, |
| "learning_rate": 1.5383157894540242e-06, |
| "loss": 0.9318, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.7120418848167539, |
| "grad_norm": 1.786377011283619, |
| "learning_rate": 1.5346538879017538e-06, |
| "loss": 0.9028, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.7146596858638743, |
| "grad_norm": 1.3019728233703969, |
| "learning_rate": 1.5309819194748359e-06, |
| "loss": 0.8638, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.7172774869109948, |
| "grad_norm": 1.3738632081309683, |
| "learning_rate": 1.5272999533119162e-06, |
| "loss": 0.9442, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.7198952879581152, |
| "grad_norm": 5.173480528185797, |
| "learning_rate": 1.5236080587398853e-06, |
| "loss": 0.8709, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.7225130890052356, |
| "grad_norm": 1.44197906148805, |
| "learning_rate": 1.5199063052725745e-06, |
| "loss": 0.9346, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.725130890052356, |
| "grad_norm": 1.468890332436025, |
| "learning_rate": 1.516194762609445e-06, |
| "loss": 0.913, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.7277486910994765, |
| "grad_norm": 1.925597137283822, |
| "learning_rate": 1.512473500634277e-06, |
| "loss": 0.9028, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.7303664921465969, |
| "grad_norm": 1.3886760473191113, |
| "learning_rate": 1.5087425894138534e-06, |
| "loss": 0.9048, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.7329842931937173, |
| "grad_norm": 1.5267405887192913, |
| "learning_rate": 1.5050020991966403e-06, |
| "loss": 0.9088, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.7356020942408377, |
| "grad_norm": 1.365489962801578, |
| "learning_rate": 1.501252100411465e-06, |
| "loss": 0.9127, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.7382198952879581, |
| "grad_norm": 1.4404983251988195, |
| "learning_rate": 1.497492663666189e-06, |
| "loss": 0.8482, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.7408376963350786, |
| "grad_norm": 1.3838344605458115, |
| "learning_rate": 1.4937238597463784e-06, |
| "loss": 0.9152, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.743455497382199, |
| "grad_norm": 1.5809314897796825, |
| "learning_rate": 1.4899457596139727e-06, |
| "loss": 0.9415, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.7460732984293194, |
| "grad_norm": 1.400834699407417, |
| "learning_rate": 1.4861584344059474e-06, |
| "loss": 0.883, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.7486910994764397, |
| "grad_norm": 1.5672834188242901, |
| "learning_rate": 1.4823619554329744e-06, |
| "loss": 0.924, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.7513089005235603, |
| "grad_norm": 1.1886413237492293, |
| "learning_rate": 1.4785563941780805e-06, |
| "loss": 0.8636, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.7539267015706806, |
| "grad_norm": 1.3777896211715643, |
| "learning_rate": 1.4747418222952993e-06, |
| "loss": 0.8817, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.756544502617801, |
| "grad_norm": 1.6346869859490836, |
| "learning_rate": 1.4709183116083253e-06, |
| "loss": 0.9214, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.7591623036649214, |
| "grad_norm": 1.3621585720862819, |
| "learning_rate": 1.4670859341091577e-06, |
| "loss": 0.9346, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.7617801047120419, |
| "grad_norm": 1.6112089429608238, |
| "learning_rate": 1.4632447619567488e-06, |
| "loss": 0.8759, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.7643979057591623, |
| "grad_norm": 1.4653972840225233, |
| "learning_rate": 1.4593948674756415e-06, |
| "loss": 0.9032, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.7670157068062827, |
| "grad_norm": 1.3303221288943385, |
| "learning_rate": 1.4555363231546109e-06, |
| "loss": 0.9613, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.7696335078534031, |
| "grad_norm": 1.3332468755879452, |
| "learning_rate": 1.4516692016452979e-06, |
| "loss": 0.8779, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.7722513089005235, |
| "grad_norm": 1.524609493562953, |
| "learning_rate": 1.4477935757608397e-06, |
| "loss": 0.87, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.774869109947644, |
| "grad_norm": 1.4832497839060446, |
| "learning_rate": 1.4439095184745022e-06, |
| "loss": 0.9098, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.7774869109947644, |
| "grad_norm": 2.5552029987523364, |
| "learning_rate": 1.4400171029183035e-06, |
| "loss": 0.8708, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.7801047120418848, |
| "grad_norm": 1.3400617686777159, |
| "learning_rate": 1.4361164023816374e-06, |
| "loss": 0.8961, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.7827225130890052, |
| "grad_norm": 1.3620706429960963, |
| "learning_rate": 1.4322074903098945e-06, |
| "loss": 0.8807, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.7853403141361257, |
| "grad_norm": 1.4384175128345436, |
| "learning_rate": 1.428290440303077e-06, |
| "loss": 0.9545, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.7879581151832461, |
| "grad_norm": 1.4875211888133841, |
| "learning_rate": 1.4243653261144167e-06, |
| "loss": 0.9175, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.7905759162303665, |
| "grad_norm": 1.570980876406564, |
| "learning_rate": 1.4204322216489813e-06, |
| "loss": 0.8901, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.7931937172774869, |
| "grad_norm": 1.4889436989307698, |
| "learning_rate": 1.4164912009622878e-06, |
| "loss": 0.9462, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.7958115183246073, |
| "grad_norm": 1.3340844821967115, |
| "learning_rate": 1.4125423382589048e-06, |
| "loss": 0.9142, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.7984293193717278, |
| "grad_norm": 1.857372509679973, |
| "learning_rate": 1.4085857078910567e-06, |
| "loss": 0.9099, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.8010471204188482, |
| "grad_norm": 1.2983520088704958, |
| "learning_rate": 1.4046213843572234e-06, |
| "loss": 0.9108, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.8036649214659686, |
| "grad_norm": 1.5225781908498903, |
| "learning_rate": 1.400649442300738e-06, |
| "loss": 0.918, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.806282722513089, |
| "grad_norm": 1.311943906666495, |
| "learning_rate": 1.3966699565083803e-06, |
| "loss": 0.897, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.8089005235602095, |
| "grad_norm": 1.82156326592592, |
| "learning_rate": 1.3926830019089694e-06, |
| "loss": 0.9814, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.8115183246073299, |
| "grad_norm": 1.4362834143299723, |
| "learning_rate": 1.3886886535719539e-06, |
| "loss": 0.9087, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.8141361256544503, |
| "grad_norm": 1.4038955725361697, |
| "learning_rate": 1.3846869867059965e-06, |
| "loss": 0.9048, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.8167539267015707, |
| "grad_norm": 1.490438693551047, |
| "learning_rate": 1.3806780766575587e-06, |
| "loss": 0.9029, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.819371727748691, |
| "grad_norm": 1.3239708026053327, |
| "learning_rate": 1.3766619989094827e-06, |
| "loss": 0.8725, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.8219895287958116, |
| "grad_norm": 1.5652591177936177, |
| "learning_rate": 1.3726388290795696e-06, |
| "loss": 0.8587, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.824607329842932, |
| "grad_norm": 1.438755687676097, |
| "learning_rate": 1.3686086429191552e-06, |
| "loss": 0.9112, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.8272251308900523, |
| "grad_norm": 1.5400970488740904, |
| "learning_rate": 1.3645715163116845e-06, |
| "loss": 0.9155, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.8298429319371727, |
| "grad_norm": 1.3899738949730092, |
| "learning_rate": 1.3605275252712826e-06, |
| "loss": 0.8485, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.8324607329842932, |
| "grad_norm": 1.3855057183658486, |
| "learning_rate": 1.3564767459413235e-06, |
| "loss": 0.9124, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.8350785340314136, |
| "grad_norm": 1.467160219223646, |
| "learning_rate": 1.3524192545929963e-06, |
| "loss": 0.8978, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.837696335078534, |
| "grad_norm": 1.5013551770066862, |
| "learning_rate": 1.3483551276238688e-06, |
| "loss": 0.8948, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.8403141361256544, |
| "grad_norm": 1.529726050974898, |
| "learning_rate": 1.3442844415564496e-06, |
| "loss": 0.892, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.8429319371727748, |
| "grad_norm": 1.287976944413352, |
| "learning_rate": 1.3402072730367474e-06, |
| "loss": 0.8911, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.8455497382198953, |
| "grad_norm": 1.3791603667409817, |
| "learning_rate": 1.336123698832827e-06, |
| "loss": 0.9025, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.8481675392670157, |
| "grad_norm": 1.5552743258992312, |
| "learning_rate": 1.3320337958333637e-06, |
| "loss": 0.8899, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.8507853403141361, |
| "grad_norm": 1.4296493584512364, |
| "learning_rate": 1.3279376410461987e-06, |
| "loss": 0.9075, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.8534031413612565, |
| "grad_norm": 1.303929989941917, |
| "learning_rate": 1.3238353115968838e-06, |
| "loss": 0.8963, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.856020942408377, |
| "grad_norm": 1.3368769826989313, |
| "learning_rate": 1.3197268847272338e-06, |
| "loss": 0.9027, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.8586387434554974, |
| "grad_norm": 1.3105891658969575, |
| "learning_rate": 1.3156124377938698e-06, |
| "loss": 0.817, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.8612565445026178, |
| "grad_norm": 1.4615158243671946, |
| "learning_rate": 1.3114920482667633e-06, |
| "loss": 0.9125, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.8638743455497382, |
| "grad_norm": 2.5772755615288983, |
| "learning_rate": 1.307365793727778e-06, |
| "loss": 0.9821, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.8664921465968587, |
| "grad_norm": 1.4612079441188084, |
| "learning_rate": 1.3032337518692079e-06, |
| "loss": 0.8952, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.8691099476439791, |
| "grad_norm": 1.3571686729913004, |
| "learning_rate": 1.2990960004923153e-06, |
| "loss": 0.9153, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.8717277486910995, |
| "grad_norm": 1.4985498764836895, |
| "learning_rate": 1.2949526175058663e-06, |
| "loss": 0.8995, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.8743455497382199, |
| "grad_norm": 1.3963461828617652, |
| "learning_rate": 1.2908036809246622e-06, |
| "loss": 0.8848, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.8769633507853403, |
| "grad_norm": 1.413080507543013, |
| "learning_rate": 1.286649268868073e-06, |
| "loss": 0.8825, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.8795811518324608, |
| "grad_norm": 1.3310073644545, |
| "learning_rate": 1.2824894595585636e-06, |
| "loss": 0.8393, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.8821989528795812, |
| "grad_norm": 1.3857600102660343, |
| "learning_rate": 1.278324331320224e-06, |
| "loss": 0.8838, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.8848167539267016, |
| "grad_norm": 1.4666995255214144, |
| "learning_rate": 1.2741539625772916e-06, |
| "loss": 0.9547, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.887434554973822, |
| "grad_norm": 1.4588445768820393, |
| "learning_rate": 1.269978431852678e-06, |
| "loss": 0.8716, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.8900523560209425, |
| "grad_norm": 1.316646197155156, |
| "learning_rate": 1.265797817766486e-06, |
| "loss": 0.8752, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.8926701570680629, |
| "grad_norm": 1.4817528591310547, |
| "learning_rate": 1.2616121990345344e-06, |
| "loss": 0.9007, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.8952879581151832, |
| "grad_norm": 1.7109135638436224, |
| "learning_rate": 1.2574216544668719e-06, |
| "loss": 0.9345, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.8979057591623036, |
| "grad_norm": 1.3152053363907765, |
| "learning_rate": 1.2532262629662947e-06, |
| "loss": 0.8771, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.900523560209424, |
| "grad_norm": 1.5326873528631686, |
| "learning_rate": 1.2490261035268612e-06, |
| "loss": 0.8396, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.9031413612565445, |
| "grad_norm": 1.4491144795758293, |
| "learning_rate": 1.244821255232404e-06, |
| "loss": 0.8749, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.9057591623036649, |
| "grad_norm": 1.4911538522916175, |
| "learning_rate": 1.2406117972550411e-06, |
| "loss": 0.9156, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.9083769633507853, |
| "grad_norm": 1.3535836069104445, |
| "learning_rate": 1.2363978088536851e-06, |
| "loss": 0.8599, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.9109947643979057, |
| "grad_norm": 1.3954072421965842, |
| "learning_rate": 1.2321793693725506e-06, |
| "loss": 0.904, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.9136125654450262, |
| "grad_norm": 1.3190244040711543, |
| "learning_rate": 1.2279565582396615e-06, |
| "loss": 0.9158, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.9162303664921466, |
| "grad_norm": 1.4456979027927384, |
| "learning_rate": 1.2237294549653539e-06, |
| "loss": 0.9346, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.918848167539267, |
| "grad_norm": 1.3521329743652177, |
| "learning_rate": 1.219498139140779e-06, |
| "loss": 0.9018, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.9214659685863874, |
| "grad_norm": 1.3385214396100333, |
| "learning_rate": 1.2152626904364064e-06, |
| "loss": 0.9184, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.9240837696335078, |
| "grad_norm": 1.4057120516101806, |
| "learning_rate": 1.2110231886005222e-06, |
| "loss": 0.8784, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.9267015706806283, |
| "grad_norm": 1.2291953428596942, |
| "learning_rate": 1.2067797134577273e-06, |
| "loss": 0.9362, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.9293193717277487, |
| "grad_norm": 1.7519367873678091, |
| "learning_rate": 1.202532344907436e-06, |
| "loss": 0.894, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.9319371727748691, |
| "grad_norm": 1.3497711417923366, |
| "learning_rate": 1.198281162922371e-06, |
| "loss": 0.8791, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.9345549738219895, |
| "grad_norm": 1.3737325092735109, |
| "learning_rate": 1.1940262475470555e-06, |
| "loss": 0.9087, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.93717277486911, |
| "grad_norm": 1.542477906016783, |
| "learning_rate": 1.18976767889631e-06, |
| "loss": 0.9362, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.9397905759162304, |
| "grad_norm": 1.3776731028595803, |
| "learning_rate": 1.1855055371537399e-06, |
| "loss": 0.928, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.9424083769633508, |
| "grad_norm": 1.310231224043665, |
| "learning_rate": 1.1812399025702289e-06, |
| "loss": 0.906, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.9450261780104712, |
| "grad_norm": 1.334814152942846, |
| "learning_rate": 1.1769708554624255e-06, |
| "loss": 0.9066, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.9476439790575916, |
| "grad_norm": 1.3128766124551117, |
| "learning_rate": 1.1726984762112326e-06, |
| "loss": 0.8977, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.9502617801047121, |
| "grad_norm": 1.399058882011187, |
| "learning_rate": 1.168422845260293e-06, |
| "loss": 0.9229, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.9528795811518325, |
| "grad_norm": 1.500152687541938, |
| "learning_rate": 1.1641440431144748e-06, |
| "loss": 0.8986, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.9554973821989529, |
| "grad_norm": 1.5084053156949409, |
| "learning_rate": 1.1598621503383564e-06, |
| "loss": 0.9008, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.9581151832460733, |
| "grad_norm": 1.3612062622306267, |
| "learning_rate": 1.1555772475547083e-06, |
| "loss": 0.943, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.9607329842931938, |
| "grad_norm": 1.3867826973216686, |
| "learning_rate": 1.1512894154429757e-06, |
| "loss": 0.89, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.9633507853403142, |
| "grad_norm": 1.4234905166268286, |
| "learning_rate": 1.14699873473776e-06, |
| "loss": 0.8814, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.9659685863874345, |
| "grad_norm": 1.3665524262874564, |
| "learning_rate": 1.1427052862272981e-06, |
| "loss": 0.9265, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.9685863874345549, |
| "grad_norm": 1.4510481388055758, |
| "learning_rate": 1.1384091507519403e-06, |
| "loss": 0.8633, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.9712041884816754, |
| "grad_norm": 1.3161649659452272, |
| "learning_rate": 1.1341104092026302e-06, |
| "loss": 0.8663, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.9738219895287958, |
| "grad_norm": 1.6037169825162456, |
| "learning_rate": 1.1298091425193806e-06, |
| "loss": 0.8744, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.9764397905759162, |
| "grad_norm": 1.362122922346099, |
| "learning_rate": 1.1255054316897482e-06, |
| "loss": 0.8806, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.9790575916230366, |
| "grad_norm": 1.5011277831230372, |
| "learning_rate": 1.121199357747312e-06, |
| "loss": 0.8638, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.981675392670157, |
| "grad_norm": 1.52076108540339, |
| "learning_rate": 1.1168910017701434e-06, |
| "loss": 0.8589, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.9842931937172775, |
| "grad_norm": 1.483128499575889, |
| "learning_rate": 1.112580444879283e-06, |
| "loss": 0.9694, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.9869109947643979, |
| "grad_norm": 1.4797679041408796, |
| "learning_rate": 1.1082677682372112e-06, |
| "loss": 0.9051, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.9895287958115183, |
| "grad_norm": 1.400824430258494, |
| "learning_rate": 1.1039530530463217e-06, |
| "loss": 0.9331, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.9921465968586387, |
| "grad_norm": 1.4604182325202852, |
| "learning_rate": 1.0996363805473902e-06, |
| "loss": 0.9056, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.9947643979057592, |
| "grad_norm": 1.3382499509102248, |
| "learning_rate": 1.0953178320180473e-06, |
| "loss": 0.8635, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.9973821989528796, |
| "grad_norm": 1.3720539659724427, |
| "learning_rate": 1.0909974887712468e-06, |
| "loss": 0.9, |
| "step": 381 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.4704091927119143, |
| "learning_rate": 1.0866754321537337e-06, |
| "loss": 0.8022, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.0026178010471205, |
| "grad_norm": 1.7305033148565634, |
| "learning_rate": 1.0823517435445149e-06, |
| "loss": 0.7697, |
| "step": 383 |
| }, |
| { |
| "epoch": 1.0052356020942408, |
| "grad_norm": 2.1198600306900985, |
| "learning_rate": 1.078026504353325e-06, |
| "loss": 0.8483, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.0078534031413613, |
| "grad_norm": 1.4135913453590154, |
| "learning_rate": 1.0736997960190945e-06, |
| "loss": 0.7525, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.0104712041884816, |
| "grad_norm": 1.462266762149253, |
| "learning_rate": 1.0693717000084158e-06, |
| "loss": 0.8274, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.013089005235602, |
| "grad_norm": 1.4726017966388771, |
| "learning_rate": 1.06504229781401e-06, |
| "loss": 0.8082, |
| "step": 387 |
| }, |
| { |
| "epoch": 1.0157068062827226, |
| "grad_norm": 1.426464680792084, |
| "learning_rate": 1.0607116709531918e-06, |
| "loss": 0.8125, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.0183246073298429, |
| "grad_norm": 1.3058298942696056, |
| "learning_rate": 1.0563799009663343e-06, |
| "loss": 0.8185, |
| "step": 389 |
| }, |
| { |
| "epoch": 1.0209424083769634, |
| "grad_norm": 1.4557168180224875, |
| "learning_rate": 1.0520470694153352e-06, |
| "loss": 0.8916, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.0235602094240839, |
| "grad_norm": 1.5575329181134603, |
| "learning_rate": 1.047713257882079e-06, |
| "loss": 0.8327, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.0261780104712042, |
| "grad_norm": 1.3921168320094996, |
| "learning_rate": 1.0433785479669038e-06, |
| "loss": 0.7948, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.0287958115183247, |
| "grad_norm": 1.3873261021992926, |
| "learning_rate": 1.039043021287061e-06, |
| "loss": 0.8249, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.031413612565445, |
| "grad_norm": 1.4851013242830111, |
| "learning_rate": 1.034706759475182e-06, |
| "loss": 0.8064, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.0340314136125655, |
| "grad_norm": 1.4289503839983853, |
| "learning_rate": 1.03036984417774e-06, |
| "loss": 0.8068, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.036649214659686, |
| "grad_norm": 1.31022316801579, |
| "learning_rate": 1.026032357053512e-06, |
| "loss": 0.8078, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.0392670157068062, |
| "grad_norm": 1.349766167812012, |
| "learning_rate": 1.0216943797720417e-06, |
| "loss": 0.7756, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.0418848167539267, |
| "grad_norm": 1.3804825917028227, |
| "learning_rate": 1.017355994012102e-06, |
| "loss": 0.7969, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.044502617801047, |
| "grad_norm": 1.4959889891350746, |
| "learning_rate": 1.0130172814601574e-06, |
| "loss": 0.8359, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.0471204188481675, |
| "grad_norm": 1.3143021095217375, |
| "learning_rate": 1.0086783238088244e-06, |
| "loss": 0.7916, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.049738219895288, |
| "grad_norm": 1.4091455264086836, |
| "learning_rate": 1.0043392027553359e-06, |
| "loss": 0.817, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.0523560209424083, |
| "grad_norm": 1.3641021774119493, |
| "learning_rate": 1e-06, |
| "loss": 0.8104, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.0549738219895288, |
| "grad_norm": 1.390072313028801, |
| "learning_rate": 9.956607972446642e-07, |
| "loss": 0.8132, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.057591623036649, |
| "grad_norm": 1.2710386403919876, |
| "learning_rate": 9.913216761911753e-07, |
| "loss": 0.8039, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.0602094240837696, |
| "grad_norm": 1.242862890765766, |
| "learning_rate": 9.869827185398427e-07, |
| "loss": 0.7862, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.0628272251308901, |
| "grad_norm": 1.33506569942573, |
| "learning_rate": 9.826440059878981e-07, |
| "loss": 0.8023, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.0654450261780104, |
| "grad_norm": 1.3850524324864126, |
| "learning_rate": 9.783056202279587e-07, |
| "loss": 0.8315, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.068062827225131, |
| "grad_norm": 1.3499847148022843, |
| "learning_rate": 9.73967642946488e-07, |
| "loss": 0.7899, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.0706806282722514, |
| "grad_norm": 1.317431663530825, |
| "learning_rate": 9.6963015582226e-07, |
| "loss": 0.7927, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.0732984293193717, |
| "grad_norm": 1.3432306773320357, |
| "learning_rate": 9.65293240524818e-07, |
| "loss": 0.8614, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.0759162303664922, |
| "grad_norm": 1.465961434384519, |
| "learning_rate": 9.609569787129392e-07, |
| "loss": 0.8446, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.0785340314136125, |
| "grad_norm": 1.4857012200387993, |
| "learning_rate": 9.566214520330965e-07, |
| "loss": 0.8206, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.081151832460733, |
| "grad_norm": 1.4334007452515036, |
| "learning_rate": 9.52286742117921e-07, |
| "loss": 0.7807, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.0837696335078535, |
| "grad_norm": 1.2605877178680933, |
| "learning_rate": 9.479529305846652e-07, |
| "loss": 0.7858, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.0863874345549738, |
| "grad_norm": 1.8031336671335776, |
| "learning_rate": 9.436200990336656e-07, |
| "loss": 0.8199, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.0890052356020943, |
| "grad_norm": 1.3688896906397205, |
| "learning_rate": 9.392883290468082e-07, |
| "loss": 0.839, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.0916230366492146, |
| "grad_norm": 1.465000884517833, |
| "learning_rate": 9.349577021859899e-07, |
| "loss": 0.8224, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.094240837696335, |
| "grad_norm": 1.4130295036929852, |
| "learning_rate": 9.306282999915839e-07, |
| "loss": 0.7733, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.0968586387434556, |
| "grad_norm": 1.6978727982085136, |
| "learning_rate": 9.263002039809055e-07, |
| "loss": 0.8392, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.0994764397905759, |
| "grad_norm": 1.3270609399207, |
| "learning_rate": 9.219734956466752e-07, |
| "loss": 0.81, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.1020942408376964, |
| "grad_norm": 1.494629573530863, |
| "learning_rate": 9.176482564554853e-07, |
| "loss": 0.7934, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.1047120418848166, |
| "grad_norm": 1.9219890871622851, |
| "learning_rate": 9.133245678462662e-07, |
| "loss": 0.7876, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.1073298429319371, |
| "grad_norm": 1.473802471620667, |
| "learning_rate": 9.090025112287532e-07, |
| "loss": 0.8096, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.1099476439790577, |
| "grad_norm": 1.4015065750690658, |
| "learning_rate": 9.046821679819526e-07, |
| "loss": 0.7518, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.112565445026178, |
| "grad_norm": 1.541286789023952, |
| "learning_rate": 9.003636194526098e-07, |
| "loss": 0.7672, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.1151832460732984, |
| "grad_norm": 1.4157184630591406, |
| "learning_rate": 8.960469469536784e-07, |
| "loss": 0.8175, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.117801047120419, |
| "grad_norm": 1.5568862286695853, |
| "learning_rate": 8.917322317627886e-07, |
| "loss": 0.811, |
| "step": 427 |
| }, |
| { |
| "epoch": 1.1204188481675392, |
| "grad_norm": 1.5726052107932276, |
| "learning_rate": 8.874195551207173e-07, |
| "loss": 0.8063, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.1230366492146597, |
| "grad_norm": 1.6122473205768497, |
| "learning_rate": 8.831089982298568e-07, |
| "loss": 0.7934, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.12565445026178, |
| "grad_norm": 1.205866806133684, |
| "learning_rate": 8.78800642252688e-07, |
| "loss": 0.8106, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.1282722513089005, |
| "grad_norm": 1.412392766302022, |
| "learning_rate": 8.744945683102516e-07, |
| "loss": 0.809, |
| "step": 431 |
| }, |
| { |
| "epoch": 1.130890052356021, |
| "grad_norm": 1.3903266086408892, |
| "learning_rate": 8.701908574806198e-07, |
| "loss": 0.7988, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.1335078534031413, |
| "grad_norm": 1.3041510726839904, |
| "learning_rate": 8.658895907973696e-07, |
| "loss": 0.7732, |
| "step": 433 |
| }, |
| { |
| "epoch": 1.1361256544502618, |
| "grad_norm": 1.5128743091161827, |
| "learning_rate": 8.615908492480598e-07, |
| "loss": 0.8066, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.1387434554973823, |
| "grad_norm": 1.3364874437653085, |
| "learning_rate": 8.572947137727022e-07, |
| "loss": 0.7695, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.1413612565445026, |
| "grad_norm": 1.7317675708475033, |
| "learning_rate": 8.530012652622397e-07, |
| "loss": 0.8304, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.143979057591623, |
| "grad_norm": 1.3702435709009628, |
| "learning_rate": 8.487105845570242e-07, |
| "loss": 0.7873, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.1465968586387434, |
| "grad_norm": 1.4627721057512983, |
| "learning_rate": 8.444227524452919e-07, |
| "loss": 0.7968, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.149214659685864, |
| "grad_norm": 1.3798178921172715, |
| "learning_rate": 8.401378496616436e-07, |
| "loss": 0.832, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.1518324607329844, |
| "grad_norm": 1.3471144861335647, |
| "learning_rate": 8.358559568855248e-07, |
| "loss": 0.8503, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.1544502617801047, |
| "grad_norm": 1.6341366653555764, |
| "learning_rate": 8.315771547397069e-07, |
| "loss": 0.8587, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.1570680628272252, |
| "grad_norm": 1.465695318657597, |
| "learning_rate": 8.273015237887673e-07, |
| "loss": 0.8103, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.1596858638743455, |
| "grad_norm": 1.6934157050663563, |
| "learning_rate": 8.230291445375743e-07, |
| "loss": 0.7956, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.162303664921466, |
| "grad_norm": 1.7004531125988867, |
| "learning_rate": 8.187600974297713e-07, |
| "loss": 0.8019, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.1649214659685865, |
| "grad_norm": 1.3294735161678863, |
| "learning_rate": 8.144944628462602e-07, |
| "loss": 0.7767, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.1675392670157068, |
| "grad_norm": 1.267345283601188, |
| "learning_rate": 8.102323211036903e-07, |
| "loss": 0.7901, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.1701570680628273, |
| "grad_norm": 1.3916992316793781, |
| "learning_rate": 8.059737524529443e-07, |
| "loss": 0.7955, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.1727748691099475, |
| "grad_norm": 1.3788939715072452, |
| "learning_rate": 8.017188370776291e-07, |
| "loss": 0.8435, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.175392670157068, |
| "grad_norm": 1.4853305212865673, |
| "learning_rate": 7.974676550925638e-07, |
| "loss": 0.7622, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.1780104712041886, |
| "grad_norm": 1.490040471958494, |
| "learning_rate": 7.932202865422726e-07, |
| "loss": 0.7835, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.1806282722513088, |
| "grad_norm": 1.6151359828705134, |
| "learning_rate": 7.889768113994779e-07, |
| "loss": 0.7932, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.1832460732984293, |
| "grad_norm": 1.3730470495488762, |
| "learning_rate": 7.847373095635936e-07, |
| "loss": 0.794, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.1858638743455496, |
| "grad_norm": 1.3446253635709806, |
| "learning_rate": 7.805018608592211e-07, |
| "loss": 0.8044, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.1884816753926701, |
| "grad_norm": 1.431468131784258, |
| "learning_rate": 7.76270545034646e-07, |
| "loss": 0.7689, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.1910994764397906, |
| "grad_norm": 1.3994617582004658, |
| "learning_rate": 7.720434417603383e-07, |
| "loss": 0.804, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.193717277486911, |
| "grad_norm": 1.325160242198593, |
| "learning_rate": 7.678206306274494e-07, |
| "loss": 0.7841, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.1963350785340314, |
| "grad_norm": 1.4374383575883027, |
| "learning_rate": 7.636021911463151e-07, |
| "loss": 0.8449, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.1989528795811517, |
| "grad_norm": 1.389213362717738, |
| "learning_rate": 7.59388202744959e-07, |
| "loss": 0.8468, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.2015706806282722, |
| "grad_norm": 1.7252740046069512, |
| "learning_rate": 7.551787447675961e-07, |
| "loss": 0.8013, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.2041884816753927, |
| "grad_norm": 1.377920480559359, |
| "learning_rate": 7.509738964731388e-07, |
| "loss": 0.7564, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.206806282722513, |
| "grad_norm": 1.416713350270201, |
| "learning_rate": 7.467737370337053e-07, |
| "loss": 0.7664, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.2094240837696335, |
| "grad_norm": 1.3257995406066596, |
| "learning_rate": 7.42578345533128e-07, |
| "loss": 0.7993, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.212041884816754, |
| "grad_norm": 1.4475869369135934, |
| "learning_rate": 7.383878009654656e-07, |
| "loss": 0.8322, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.2146596858638743, |
| "grad_norm": 1.489457170308261, |
| "learning_rate": 7.342021822335142e-07, |
| "loss": 0.8586, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.2172774869109948, |
| "grad_norm": 1.282686611990987, |
| "learning_rate": 7.300215681473223e-07, |
| "loss": 0.7906, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.2198952879581153, |
| "grad_norm": 1.67325177459973, |
| "learning_rate": 7.258460374227084e-07, |
| "loss": 0.8587, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.2225130890052356, |
| "grad_norm": 1.4567815194341007, |
| "learning_rate": 7.216756686797763e-07, |
| "loss": 0.7958, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.225130890052356, |
| "grad_norm": 1.4072752155576178, |
| "learning_rate": 7.175105404414361e-07, |
| "loss": 0.8664, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.2277486910994764, |
| "grad_norm": 1.3475514454256452, |
| "learning_rate": 7.133507311319269e-07, |
| "loss": 0.8025, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.2303664921465969, |
| "grad_norm": 1.394126307314881, |
| "learning_rate": 7.091963190753377e-07, |
| "loss": 0.836, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.2329842931937174, |
| "grad_norm": 1.5669124274048458, |
| "learning_rate": 7.050473824941339e-07, |
| "loss": 0.8144, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.2356020942408377, |
| "grad_norm": 1.3397006738329051, |
| "learning_rate": 7.009039995076844e-07, |
| "loss": 0.8306, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.2382198952879582, |
| "grad_norm": 1.447525436818219, |
| "learning_rate": 6.967662481307922e-07, |
| "loss": 0.8036, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.2408376963350785, |
| "grad_norm": 1.3721851652369728, |
| "learning_rate": 6.926342062722222e-07, |
| "loss": 0.7815, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.243455497382199, |
| "grad_norm": 1.6329361471554176, |
| "learning_rate": 6.885079517332366e-07, |
| "loss": 0.8039, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.2460732984293195, |
| "grad_norm": 1.9366523541075422, |
| "learning_rate": 6.843875622061304e-07, |
| "loss": 0.7986, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.2486910994764397, |
| "grad_norm": 1.5703879278501358, |
| "learning_rate": 6.802731152727663e-07, |
| "loss": 0.8126, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.2513089005235603, |
| "grad_norm": 1.4579887202591848, |
| "learning_rate": 6.761646884031163e-07, |
| "loss": 0.7649, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.2539267015706805, |
| "grad_norm": 1.3523631867863206, |
| "learning_rate": 6.720623589538013e-07, |
| "loss": 0.8084, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.256544502617801, |
| "grad_norm": 1.492000299071307, |
| "learning_rate": 6.679662041666361e-07, |
| "loss": 0.8046, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.2591623036649215, |
| "grad_norm": 1.2653465436843139, |
| "learning_rate": 6.638763011671735e-07, |
| "loss": 0.7816, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.2617801047120418, |
| "grad_norm": 1.61419432835011, |
| "learning_rate": 6.597927269632526e-07, |
| "loss": 0.7761, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.2643979057591623, |
| "grad_norm": 1.2823619981408256, |
| "learning_rate": 6.557155584435503e-07, |
| "loss": 0.8001, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.2670157068062826, |
| "grad_norm": 1.4646645401032907, |
| "learning_rate": 6.516448723761314e-07, |
| "loss": 0.7766, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.2696335078534031, |
| "grad_norm": 1.5472806089472109, |
| "learning_rate": 6.475807454070039e-07, |
| "loss": 0.7899, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.2722513089005236, |
| "grad_norm": 1.5818984764906228, |
| "learning_rate": 6.435232540586762e-07, |
| "loss": 0.8269, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.274869109947644, |
| "grad_norm": 1.393531877314765, |
| "learning_rate": 6.394724747287172e-07, |
| "loss": 0.7384, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.2774869109947644, |
| "grad_norm": 1.4361166309259563, |
| "learning_rate": 6.354284836883156e-07, |
| "loss": 0.7879, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.2801047120418847, |
| "grad_norm": 1.3699469134438556, |
| "learning_rate": 6.313913570808446e-07, |
| "loss": 0.7789, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.2827225130890052, |
| "grad_norm": 1.3904416790605236, |
| "learning_rate": 6.273611709204303e-07, |
| "loss": 0.8176, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.2853403141361257, |
| "grad_norm": 1.3824996317269527, |
| "learning_rate": 6.233380010905174e-07, |
| "loss": 0.8095, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.2879581151832462, |
| "grad_norm": 1.3960121820574591, |
| "learning_rate": 6.193219233424414e-07, |
| "loss": 0.802, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.2905759162303665, |
| "grad_norm": 1.3873907282523597, |
| "learning_rate": 6.153130132940036e-07, |
| "loss": 0.8303, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.2931937172774868, |
| "grad_norm": 1.51245114633746, |
| "learning_rate": 6.11311346428046e-07, |
| "loss": 0.8678, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.2958115183246073, |
| "grad_norm": 1.3443150762946792, |
| "learning_rate": 6.073169980910307e-07, |
| "loss": 0.7906, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.2984293193717278, |
| "grad_norm": 1.3343485494964347, |
| "learning_rate": 6.033300434916202e-07, |
| "loss": 0.7591, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.3010471204188483, |
| "grad_norm": 1.336624806874358, |
| "learning_rate": 5.993505576992622e-07, |
| "loss": 0.7739, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.3036649214659686, |
| "grad_norm": 1.2527994527755557, |
| "learning_rate": 5.953786156427764e-07, |
| "loss": 0.8258, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.306282722513089, |
| "grad_norm": 1.643889557255892, |
| "learning_rate": 5.914142921089433e-07, |
| "loss": 0.8096, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.3089005235602094, |
| "grad_norm": 1.480290158962771, |
| "learning_rate": 5.874576617410949e-07, |
| "loss": 0.8366, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.3115183246073299, |
| "grad_norm": 3.759429150177347, |
| "learning_rate": 5.835087990377123e-07, |
| "loss": 0.8133, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.3141361256544504, |
| "grad_norm": 1.4298867470452874, |
| "learning_rate": 5.795677783510186e-07, |
| "loss": 0.7678, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.3167539267015707, |
| "grad_norm": 1.7005622783366718, |
| "learning_rate": 5.756346738855835e-07, |
| "loss": 0.7634, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.3193717277486912, |
| "grad_norm": 1.4109970817022124, |
| "learning_rate": 5.717095596969226e-07, |
| "loss": 0.8031, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.3219895287958114, |
| "grad_norm": 1.3252968796234816, |
| "learning_rate": 5.677925096901055e-07, |
| "loss": 0.7736, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.324607329842932, |
| "grad_norm": 1.327548476193648, |
| "learning_rate": 5.638835976183627e-07, |
| "loss": 0.7795, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.3272251308900525, |
| "grad_norm": 1.406946886923445, |
| "learning_rate": 5.599828970816963e-07, |
| "loss": 0.7758, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.3298429319371727, |
| "grad_norm": 1.355344309482442, |
| "learning_rate": 5.560904815254979e-07, |
| "loss": 0.8103, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.3324607329842932, |
| "grad_norm": 1.279043869838049, |
| "learning_rate": 5.522064242391603e-07, |
| "loss": 0.7715, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.3350785340314135, |
| "grad_norm": 1.5570080121103858, |
| "learning_rate": 5.483307983547025e-07, |
| "loss": 0.8313, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.337696335078534, |
| "grad_norm": 1.3122582397024072, |
| "learning_rate": 5.444636768453887e-07, |
| "loss": 0.7797, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.3403141361256545, |
| "grad_norm": 1.4830312584856509, |
| "learning_rate": 5.406051325243585e-07, |
| "loss": 0.8317, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.3429319371727748, |
| "grad_norm": 1.6642265865784418, |
| "learning_rate": 5.367552380432515e-07, |
| "loss": 0.8388, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.3455497382198953, |
| "grad_norm": 1.431660501719772, |
| "learning_rate": 5.329140658908422e-07, |
| "loss": 0.8197, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.3481675392670156, |
| "grad_norm": 1.91812201812069, |
| "learning_rate": 5.290816883916748e-07, |
| "loss": 0.7768, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.350785340314136, |
| "grad_norm": 1.3276086248250083, |
| "learning_rate": 5.252581777047008e-07, |
| "loss": 0.7916, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.3534031413612566, |
| "grad_norm": 1.387870226221796, |
| "learning_rate": 5.214436058219198e-07, |
| "loss": 0.806, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.356020942408377, |
| "grad_norm": 1.3406769634790143, |
| "learning_rate": 5.176380445670254e-07, |
| "loss": 0.7886, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.3586387434554974, |
| "grad_norm": 1.4742827186805514, |
| "learning_rate": 5.138415655940525e-07, |
| "loss": 0.844, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.3612565445026177, |
| "grad_norm": 1.3335834059424618, |
| "learning_rate": 5.100542403860271e-07, |
| "loss": 0.7656, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.3638743455497382, |
| "grad_norm": 1.4312802818181136, |
| "learning_rate": 5.062761402536215e-07, |
| "loss": 0.8414, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.3664921465968587, |
| "grad_norm": 2.239249795631097, |
| "learning_rate": 5.02507336333811e-07, |
| "loss": 0.8036, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.369109947643979, |
| "grad_norm": 1.5871362478088449, |
| "learning_rate": 4.98747899588535e-07, |
| "loss": 0.7791, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.3717277486910995, |
| "grad_norm": 1.9177336066710684, |
| "learning_rate": 4.949979008033595e-07, |
| "loss": 0.7802, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.3743455497382198, |
| "grad_norm": 1.4374991682844207, |
| "learning_rate": 4.912574105861465e-07, |
| "loss": 0.8263, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.3769633507853403, |
| "grad_norm": 1.3243132358220007, |
| "learning_rate": 4.87526499365723e-07, |
| "loss": 0.7641, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.3795811518324608, |
| "grad_norm": 1.6118613530615584, |
| "learning_rate": 4.838052373905553e-07, |
| "loss": 0.794, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.3821989528795813, |
| "grad_norm": 1.3755325567825656, |
| "learning_rate": 4.800936947274254e-07, |
| "loss": 0.7573, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.3848167539267016, |
| "grad_norm": 1.378056950406792, |
| "learning_rate": 4.7639194126011486e-07, |
| "loss": 0.7694, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.387434554973822, |
| "grad_norm": 1.2961092278826385, |
| "learning_rate": 4.7270004668808393e-07, |
| "loss": 0.7955, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.3900523560209423, |
| "grad_norm": 1.6104713131826254, |
| "learning_rate": 4.690180805251643e-07, |
| "loss": 0.7984, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.3926701570680629, |
| "grad_norm": 1.3161540162139154, |
| "learning_rate": 4.653461120982459e-07, |
| "loss": 0.7663, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.3952879581151834, |
| "grad_norm": 1.296818030549471, |
| "learning_rate": 4.6168421054597606e-07, |
| "loss": 0.7751, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.3979057591623036, |
| "grad_norm": 1.480408449991259, |
| "learning_rate": 4.5803244481745276e-07, |
| "loss": 0.7986, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.4005235602094241, |
| "grad_norm": 1.4105752329780448, |
| "learning_rate": 4.5439088367093036e-07, |
| "loss": 0.8615, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.4031413612565444, |
| "grad_norm": 1.6263679074117436, |
| "learning_rate": 4.507595956725233e-07, |
| "loss": 0.8059, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.405759162303665, |
| "grad_norm": 1.3683293249640538, |
| "learning_rate": 4.471386491949151e-07, |
| "loss": 0.7493, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.4083769633507854, |
| "grad_norm": 1.3195129801915084, |
| "learning_rate": 4.4352811241607146e-07, |
| "loss": 0.7748, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.4109947643979057, |
| "grad_norm": 1.394757071376127, |
| "learning_rate": 4.39928053317955e-07, |
| "loss": 0.7928, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.4136125654450262, |
| "grad_norm": 1.3894982769377109, |
| "learning_rate": 4.36338539685249e-07, |
| "loss": 0.8075, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.4162303664921465, |
| "grad_norm": 1.578998531801876, |
| "learning_rate": 4.32759639104076e-07, |
| "loss": 0.828, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.418848167539267, |
| "grad_norm": 1.3184450771650498, |
| "learning_rate": 4.2919141896072965e-07, |
| "loss": 0.8241, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.4214659685863875, |
| "grad_norm": 1.4260325120381079, |
| "learning_rate": 4.256339464404024e-07, |
| "loss": 0.761, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.4240837696335078, |
| "grad_norm": 1.4035239220675064, |
| "learning_rate": 4.2208728852592466e-07, |
| "loss": 0.8549, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.4267015706806283, |
| "grad_norm": 2.1223113557893156, |
| "learning_rate": 4.185515119964985e-07, |
| "loss": 0.8123, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.4293193717277486, |
| "grad_norm": 1.4030573167836808, |
| "learning_rate": 4.150266834264445e-07, |
| "loss": 0.7804, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.431937172774869, |
| "grad_norm": 1.320065259779228, |
| "learning_rate": 4.115128691839463e-07, |
| "loss": 0.7915, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.4345549738219896, |
| "grad_norm": 1.9281985681641691, |
| "learning_rate": 4.0801013542980154e-07, |
| "loss": 0.8026, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.4371727748691099, |
| "grad_norm": 1.673696845466286, |
| "learning_rate": 4.045185481161747e-07, |
| "loss": 0.8696, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.4397905759162304, |
| "grad_norm": 1.4020194532394015, |
| "learning_rate": 4.010381729853579e-07, |
| "loss": 0.7994, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.4424083769633507, |
| "grad_norm": 1.3112132073545109, |
| "learning_rate": 3.975690755685311e-07, |
| "loss": 0.8068, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.4450261780104712, |
| "grad_norm": 1.3212523710057094, |
| "learning_rate": 3.9411132118452893e-07, |
| "loss": 0.8256, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.4476439790575917, |
| "grad_norm": 1.3107699057156226, |
| "learning_rate": 3.906649749386105e-07, |
| "loss": 0.811, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.450261780104712, |
| "grad_norm": 1.373001010932746, |
| "learning_rate": 3.8723010172123373e-07, |
| "loss": 0.8433, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.4528795811518325, |
| "grad_norm": 1.2842242623159779, |
| "learning_rate": 3.838067662068341e-07, |
| "loss": 0.7654, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.4554973821989527, |
| "grad_norm": 1.3586950621309588, |
| "learning_rate": 3.80395032852605e-07, |
| "loss": 0.7493, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.4581151832460733, |
| "grad_norm": 1.401398906686267, |
| "learning_rate": 3.769949658972866e-07, |
| "loss": 0.8288, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.4607329842931938, |
| "grad_norm": 1.606071822525906, |
| "learning_rate": 3.7360662935995504e-07, |
| "loss": 0.8062, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.4633507853403143, |
| "grad_norm": 1.2292695843382375, |
| "learning_rate": 3.70230087038817e-07, |
| "loss": 0.826, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.4659685863874345, |
| "grad_norm": 1.9324250096403122, |
| "learning_rate": 3.6686540251000754e-07, |
| "loss": 0.82, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.468586387434555, |
| "grad_norm": 1.4359728617528764, |
| "learning_rate": 3.635126391263964e-07, |
| "loss": 0.7946, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.4712041884816753, |
| "grad_norm": 1.9775951696627427, |
| "learning_rate": 3.6017186001639035e-07, |
| "loss": 0.8011, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.4738219895287958, |
| "grad_norm": 1.3120085815136922, |
| "learning_rate": 3.5684312808274895e-07, |
| "loss": 0.7543, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.4764397905759163, |
| "grad_norm": 2.4183298885740108, |
| "learning_rate": 3.5352650600139643e-07, |
| "loss": 0.7804, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.4790575916230366, |
| "grad_norm": 1.5770372818222822, |
| "learning_rate": 3.502220562202457e-07, |
| "loss": 0.8137, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.4816753926701571, |
| "grad_norm": 1.393956770661474, |
| "learning_rate": 3.469298409580179e-07, |
| "loss": 0.8003, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.4842931937172774, |
| "grad_norm": 1.4441462191596133, |
| "learning_rate": 3.4364992220307474e-07, |
| "loss": 0.7968, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.486910994764398, |
| "grad_norm": 1.4824859520976754, |
| "learning_rate": 3.4038236171224943e-07, |
| "loss": 0.8448, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.4895287958115184, |
| "grad_norm": 1.4691015298439172, |
| "learning_rate": 3.3712722100968416e-07, |
| "loss": 0.8082, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.4921465968586387, |
| "grad_norm": 1.2672893618896115, |
| "learning_rate": 3.338845613856722e-07, |
| "loss": 0.7622, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.4947643979057592, |
| "grad_norm": 1.2960670490916741, |
| "learning_rate": 3.306544438955021e-07, |
| "loss": 0.7697, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.4973821989528795, |
| "grad_norm": 1.604512579460286, |
| "learning_rate": 3.2743692935831204e-07, |
| "loss": 0.8207, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 1.454683075612178, |
| "learning_rate": 3.2423207835593945e-07, |
| "loss": 0.8307, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.5026178010471205, |
| "grad_norm": 1.580516137010957, |
| "learning_rate": 3.2103995123178485e-07, |
| "loss": 0.8373, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.5052356020942408, |
| "grad_norm": 1.3603455979874735, |
| "learning_rate": 3.17860608089672e-07, |
| "loss": 0.8058, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.5078534031413613, |
| "grad_norm": 1.3809335070542352, |
| "learning_rate": 3.146941087927203e-07, |
| "loss": 0.839, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.5104712041884816, |
| "grad_norm": 1.3146909539755163, |
| "learning_rate": 3.115405129622133e-07, |
| "loss": 0.8226, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.513089005235602, |
| "grad_norm": 1.4185549143237521, |
| "learning_rate": 3.083998799764793e-07, |
| "loss": 0.7839, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.5157068062827226, |
| "grad_norm": 1.8247576748850203, |
| "learning_rate": 3.052722689697719e-07, |
| "loss": 0.777, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.518324607329843, |
| "grad_norm": 1.6076904934924388, |
| "learning_rate": 3.02157738831157e-07, |
| "loss": 0.8374, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.5209424083769634, |
| "grad_norm": 1.644829185090369, |
| "learning_rate": 2.990563482034032e-07, |
| "loss": 0.8144, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.5235602094240837, |
| "grad_norm": 1.714674089099395, |
| "learning_rate": 2.9596815548187906e-07, |
| "loss": 0.817, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.5261780104712042, |
| "grad_norm": 1.2842088779085739, |
| "learning_rate": 2.9289321881345254e-07, |
| "loss": 0.7997, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.5287958115183247, |
| "grad_norm": 1.5975647240336275, |
| "learning_rate": 2.898315960953963e-07, |
| "loss": 0.8064, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.5314136125654452, |
| "grad_norm": 1.4060353372675793, |
| "learning_rate": 2.86783344974298e-07, |
| "loss": 0.7887, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.5340314136125655, |
| "grad_norm": 1.4809194894975817, |
| "learning_rate": 2.837485228449744e-07, |
| "loss": 0.815, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.5366492146596857, |
| "grad_norm": 1.4812590186042058, |
| "learning_rate": 2.80727186849391e-07, |
| "loss": 0.8086, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.5392670157068062, |
| "grad_norm": 1.39751019598529, |
| "learning_rate": 2.777193938755855e-07, |
| "loss": 0.7594, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.5418848167539267, |
| "grad_norm": 1.514738123745371, |
| "learning_rate": 2.7472520055659766e-07, |
| "loss": 0.7437, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.5445026178010473, |
| "grad_norm": 1.4421085580754955, |
| "learning_rate": 2.717446632694025e-07, |
| "loss": 0.8575, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.5471204188481675, |
| "grad_norm": 1.3318374283444232, |
| "learning_rate": 2.6877783813384893e-07, |
| "loss": 0.8003, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.5497382198952878, |
| "grad_norm": 1.3765258900874102, |
| "learning_rate": 2.6582478101160166e-07, |
| "loss": 0.8178, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.5523560209424083, |
| "grad_norm": 1.418984768460615, |
| "learning_rate": 2.6288554750509283e-07, |
| "loss": 0.7876, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.5549738219895288, |
| "grad_norm": 1.2792792535169923, |
| "learning_rate": 2.599601929564709e-07, |
| "loss": 0.7865, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.5575916230366493, |
| "grad_norm": 1.3722485916863456, |
| "learning_rate": 2.57048772446562e-07, |
| "loss": 0.7959, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.5602094240837696, |
| "grad_norm": 1.2960069968983479, |
| "learning_rate": 2.5415134079383004e-07, |
| "loss": 0.7861, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.56282722513089, |
| "grad_norm": 1.354874151909094, |
| "learning_rate": 2.5126795255334787e-07, |
| "loss": 0.8494, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.5654450261780104, |
| "grad_norm": 1.417221429544223, |
| "learning_rate": 2.4839866201576645e-07, |
| "loss": 0.8032, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.568062827225131, |
| "grad_norm": 1.3896810834541278, |
| "learning_rate": 2.4554352320629523e-07, |
| "loss": 0.8263, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.5706806282722514, |
| "grad_norm": 1.4719159440552299, |
| "learning_rate": 2.4270258988368374e-07, |
| "loss": 0.8116, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.5732984293193717, |
| "grad_norm": 1.4447858420521056, |
| "learning_rate": 2.3987591553920996e-07, |
| "loss": 0.7985, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.5759162303664922, |
| "grad_norm": 1.4399309666262106, |
| "learning_rate": 2.3706355339567286e-07, |
| "loss": 0.7826, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.5785340314136125, |
| "grad_norm": 1.3149259880043063, |
| "learning_rate": 2.3426555640638922e-07, |
| "loss": 0.8106, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.581151832460733, |
| "grad_norm": 1.5531838481047473, |
| "learning_rate": 2.3148197725419983e-07, |
| "loss": 0.8211, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.5837696335078535, |
| "grad_norm": 1.4496759050090098, |
| "learning_rate": 2.2871286835047287e-07, |
| "loss": 0.8086, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.5863874345549738, |
| "grad_norm": 1.2839988127548618, |
| "learning_rate": 2.2595828183412168e-07, |
| "loss": 0.7415, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.5890052356020943, |
| "grad_norm": 1.3613405926912308, |
| "learning_rate": 2.2321826957061884e-07, |
| "loss": 0.8165, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.5916230366492146, |
| "grad_norm": 1.4894168574003237, |
| "learning_rate": 2.204928831510241e-07, |
| "loss": 0.8101, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.594240837696335, |
| "grad_norm": 1.2285752175457534, |
| "learning_rate": 2.1778217389100828e-07, |
| "loss": 0.8266, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.5968586387434556, |
| "grad_norm": 1.3240197043664255, |
| "learning_rate": 2.1508619282989083e-07, |
| "loss": 0.7918, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.599476439790576, |
| "grad_norm": 1.5225433588570212, |
| "learning_rate": 2.1240499072967676e-07, |
| "loss": 0.8316, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.6020942408376964, |
| "grad_norm": 1.3658395063596134, |
| "learning_rate": 2.0973861807410187e-07, |
| "loss": 0.7561, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.6047120418848166, |
| "grad_norm": 1.4130544507743816, |
| "learning_rate": 2.0708712506768077e-07, |
| "loss": 0.8052, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.6073298429319371, |
| "grad_norm": 1.4871581640689473, |
| "learning_rate": 2.0445056163476372e-07, |
| "loss": 0.8, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.6099476439790577, |
| "grad_norm": 1.6929581009290537, |
| "learning_rate": 2.0182897741859494e-07, |
| "loss": 0.8095, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.6125654450261782, |
| "grad_norm": 1.4618524446123653, |
| "learning_rate": 1.9922242178037863e-07, |
| "loss": 0.7961, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.6151832460732984, |
| "grad_norm": 1.30079475381395, |
| "learning_rate": 1.966309437983491e-07, |
| "loss": 0.7689, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.6178010471204187, |
| "grad_norm": 1.3483874029949958, |
| "learning_rate": 1.9405459226684717e-07, |
| "loss": 0.7992, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.6204188481675392, |
| "grad_norm": 1.2776415714434242, |
| "learning_rate": 1.9149341569540156e-07, |
| "loss": 0.7972, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.6230366492146597, |
| "grad_norm": 1.2630189358990098, |
| "learning_rate": 1.88947462307814e-07, |
| "loss": 0.7584, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.6256544502617802, |
| "grad_norm": 1.4447313375218829, |
| "learning_rate": 1.8641678004125362e-07, |
| "loss": 0.7961, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.6282722513089005, |
| "grad_norm": 1.2826614110040238, |
| "learning_rate": 1.8390141654535263e-07, |
| "loss": 0.7706, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.6308900523560208, |
| "grad_norm": 1.3463863974486134, |
| "learning_rate": 1.8140141918131003e-07, |
| "loss": 0.8195, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.6335078534031413, |
| "grad_norm": 1.2889865983410822, |
| "learning_rate": 1.7891683502099831e-07, |
| "loss": 0.7782, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.6361256544502618, |
| "grad_norm": 1.514275415675467, |
| "learning_rate": 1.7644771084608011e-07, |
| "loss": 0.8274, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.6387434554973823, |
| "grad_norm": 1.5769799038828132, |
| "learning_rate": 1.739940931471239e-07, |
| "loss": 0.7833, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.6413612565445026, |
| "grad_norm": 1.3305753381473646, |
| "learning_rate": 1.715560281227315e-07, |
| "loss": 0.7745, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.6439790575916229, |
| "grad_norm": 1.6523520563000411, |
| "learning_rate": 1.6913356167866578e-07, |
| "loss": 0.7976, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.6465968586387434, |
| "grad_norm": 1.3889991776494723, |
| "learning_rate": 1.6672673942698922e-07, |
| "loss": 0.7963, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.649214659685864, |
| "grad_norm": 1.4564242094713644, |
| "learning_rate": 1.6433560668520174e-07, |
| "loss": 0.8187, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.6518324607329844, |
| "grad_norm": 1.5330763133711176, |
| "learning_rate": 1.6196020847539006e-07, |
| "loss": 0.8409, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.6544502617801047, |
| "grad_norm": 1.3341437692998779, |
| "learning_rate": 1.5960058952337884e-07, |
| "loss": 0.7934, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.6570680628272252, |
| "grad_norm": 1.3397089840974183, |
| "learning_rate": 1.572567942578885e-07, |
| "loss": 0.7798, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.6596858638743455, |
| "grad_norm": 1.4339035956342077, |
| "learning_rate": 1.5492886680969964e-07, |
| "loss": 0.8218, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.662303664921466, |
| "grad_norm": 1.3784063434598477, |
| "learning_rate": 1.526168510108199e-07, |
| "loss": 0.8057, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.6649214659685865, |
| "grad_norm": 1.3997086127725333, |
| "learning_rate": 1.5032079039366208e-07, |
| "loss": 0.8004, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.6675392670157068, |
| "grad_norm": 1.4007994321647084, |
| "learning_rate": 1.4804072819022106e-07, |
| "loss": 0.8388, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.6701570680628273, |
| "grad_norm": 1.3326785046628704, |
| "learning_rate": 1.45776707331262e-07, |
| "loss": 0.8284, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.6727748691099475, |
| "grad_norm": 1.3353139413717556, |
| "learning_rate": 1.4352877044551048e-07, |
| "loss": 0.8025, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.675392670157068, |
| "grad_norm": 1.58505707104324, |
| "learning_rate": 1.4129695985885227e-07, |
| "loss": 0.8125, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.6780104712041886, |
| "grad_norm": 1.6745117928613125, |
| "learning_rate": 1.3908131759353304e-07, |
| "loss": 0.7898, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.680628272251309, |
| "grad_norm": 1.3909882037596468, |
| "learning_rate": 1.3688188536736968e-07, |
| "loss": 0.8318, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.6832460732984293, |
| "grad_norm": 2.1951204907567288, |
| "learning_rate": 1.3469870459296406e-07, |
| "loss": 0.8011, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.6858638743455496, |
| "grad_norm": 1.2526629580139643, |
| "learning_rate": 1.3253181637692324e-07, |
| "loss": 0.8, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.6884816753926701, |
| "grad_norm": 1.399785550512741, |
| "learning_rate": 1.303812615190849e-07, |
| "loss": 0.8482, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.6910994764397906, |
| "grad_norm": 1.203118226750969, |
| "learning_rate": 1.2824708051175014e-07, |
| "loss": 0.7639, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.6937172774869111, |
| "grad_norm": 3.3151303463091857, |
| "learning_rate": 1.2612931353892074e-07, |
| "loss": 0.8005, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.6963350785340314, |
| "grad_norm": 1.330299763937507, |
| "learning_rate": 1.2402800047554206e-07, |
| "loss": 0.7909, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.6989528795811517, |
| "grad_norm": 1.3862265238761402, |
| "learning_rate": 1.2194318088675282e-07, |
| "loss": 0.8123, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.7015706806282722, |
| "grad_norm": 1.274919133998628, |
| "learning_rate": 1.198748940271398e-07, |
| "loss": 0.8201, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.7041884816753927, |
| "grad_norm": 1.3018378077447637, |
| "learning_rate": 1.1782317883999915e-07, |
| "loss": 0.8389, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.7068062827225132, |
| "grad_norm": 1.5066370942887777, |
| "learning_rate": 1.1578807395660206e-07, |
| "loss": 0.796, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.7094240837696335, |
| "grad_norm": 1.349261704790453, |
| "learning_rate": 1.1376961769546889e-07, |
| "loss": 0.8114, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.7120418848167538, |
| "grad_norm": 1.3627444333730776, |
| "learning_rate": 1.1176784806164674e-07, |
| "loss": 0.7675, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.7146596858638743, |
| "grad_norm": 1.7455255048714526, |
| "learning_rate": 1.0978280274599417e-07, |
| "loss": 0.73, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.7172774869109948, |
| "grad_norm": 1.4158444575821112, |
| "learning_rate": 1.078145191244706e-07, |
| "loss": 0.8012, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.7198952879581153, |
| "grad_norm": 1.4363822649791222, |
| "learning_rate": 1.0586303425743493e-07, |
| "loss": 0.7977, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.7225130890052356, |
| "grad_norm": 1.3744102737667037, |
| "learning_rate": 1.0392838488894462e-07, |
| "loss": 0.7715, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.7251308900523559, |
| "grad_norm": 1.3002750750034122, |
| "learning_rate": 1.0201060744606637e-07, |
| "loss": 0.7904, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.7277486910994764, |
| "grad_norm": 1.345462376576293, |
| "learning_rate": 1.0010973803818856e-07, |
| "loss": 0.8442, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.7303664921465969, |
| "grad_norm": 1.1727958466968662, |
| "learning_rate": 9.822581245634321e-08, |
| "loss": 0.7259, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.7329842931937174, |
| "grad_norm": 1.4244501190989374, |
| "learning_rate": 9.635886617252975e-08, |
| "loss": 0.8052, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.7356020942408377, |
| "grad_norm": 1.5251798972718038, |
| "learning_rate": 9.450893433904895e-08, |
| "loss": 0.7403, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.738219895287958, |
| "grad_norm": 1.391940951778632, |
| "learning_rate": 9.267605178784033e-08, |
| "loss": 0.794, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.7408376963350785, |
| "grad_norm": 1.4484378614919133, |
| "learning_rate": 9.086025302982648e-08, |
| "loss": 0.7947, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.743455497382199, |
| "grad_norm": 3.337271024131123, |
| "learning_rate": 8.906157225426313e-08, |
| "loss": 0.8675, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.7460732984293195, |
| "grad_norm": 1.346520110764039, |
| "learning_rate": 8.728004332809514e-08, |
| "loss": 0.7613, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.7486910994764397, |
| "grad_norm": 1.397438506733914, |
| "learning_rate": 8.55156997953197e-08, |
| "loss": 0.779, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.7513089005235603, |
| "grad_norm": 1.6788387769625028, |
| "learning_rate": 8.37685748763538e-08, |
| "loss": 0.8117, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.7539267015706805, |
| "grad_norm": 1.5831100169398993, |
| "learning_rate": 8.203870146740932e-08, |
| "loss": 0.8567, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.756544502617801, |
| "grad_norm": 1.3571635467136167, |
| "learning_rate": 8.03261121398735e-08, |
| "loss": 0.8109, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.7591623036649215, |
| "grad_norm": 1.3483951740257418, |
| "learning_rate": 7.86308391396956e-08, |
| "loss": 0.7726, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.761780104712042, |
| "grad_norm": 1.2849988749934957, |
| "learning_rate": 7.695291438677931e-08, |
| "loss": 0.7859, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.7643979057591623, |
| "grad_norm": 1.62566978678994, |
| "learning_rate": 7.529236947438256e-08, |
| "loss": 0.8316, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.7670157068062826, |
| "grad_norm": 1.397977587937563, |
| "learning_rate": 7.364923566852244e-08, |
| "loss": 0.8038, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.7696335078534031, |
| "grad_norm": 1.6249359544303996, |
| "learning_rate": 7.202354390738608e-08, |
| "loss": 0.7589, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.7722513089005236, |
| "grad_norm": 1.342274085142869, |
| "learning_rate": 7.041532480074819e-08, |
| "loss": 0.817, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.7748691099476441, |
| "grad_norm": 1.3135650350395103, |
| "learning_rate": 6.88246086293952e-08, |
| "loss": 0.8502, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.7774869109947644, |
| "grad_norm": 5.863988193738712, |
| "learning_rate": 6.725142534455486e-08, |
| "loss": 0.7852, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.7801047120418847, |
| "grad_norm": 1.3357189869150106, |
| "learning_rate": 6.569580456733204e-08, |
| "loss": 0.7954, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.7827225130890052, |
| "grad_norm": 1.4367263763718134, |
| "learning_rate": 6.415777558815138e-08, |
| "loss": 0.8034, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.7853403141361257, |
| "grad_norm": 1.503975945315533, |
| "learning_rate": 6.263736736620551e-08, |
| "loss": 0.833, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.7879581151832462, |
| "grad_norm": 1.3586054609187552, |
| "learning_rate": 6.113460852890973e-08, |
| "loss": 0.7554, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.7905759162303665, |
| "grad_norm": 1.4470810245955972, |
| "learning_rate": 5.964952737136353e-08, |
| "loss": 0.8083, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.7931937172774868, |
| "grad_norm": 1.310548213337723, |
| "learning_rate": 5.8182151855816986e-08, |
| "loss": 0.7801, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.7958115183246073, |
| "grad_norm": 1.3463195860089903, |
| "learning_rate": 5.6732509611145284e-08, |
| "loss": 0.8139, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.7984293193717278, |
| "grad_norm": 1.4380427564891194, |
| "learning_rate": 5.5300627932327706e-08, |
| "loss": 0.83, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.8010471204188483, |
| "grad_norm": 1.8105570054747142, |
| "learning_rate": 5.388653377993324e-08, |
| "loss": 0.8225, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.8036649214659686, |
| "grad_norm": 1.3901555055692667, |
| "learning_rate": 5.2490253779615133e-08, |
| "loss": 0.7836, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.8062827225130889, |
| "grad_norm": 1.3272661328531419, |
| "learning_rate": 5.111181422160671e-08, |
| "loss": 0.8377, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.8089005235602094, |
| "grad_norm": 1.5288175932161538, |
| "learning_rate": 4.975124106022843e-08, |
| "loss": 0.8155, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.8115183246073299, |
| "grad_norm": 1.5031624565627322, |
| "learning_rate": 4.840855991339798e-08, |
| "loss": 0.7703, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.8141361256544504, |
| "grad_norm": 1.3318204885777967, |
| "learning_rate": 4.7083796062149297e-08, |
| "loss": 0.7703, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.8167539267015707, |
| "grad_norm": 1.4072121620798181, |
| "learning_rate": 4.577697445015471e-08, |
| "loss": 0.7427, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.819371727748691, |
| "grad_norm": 1.4501030520404574, |
| "learning_rate": 4.448811968325683e-08, |
| "loss": 0.7612, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.8219895287958114, |
| "grad_norm": 1.485574023292867, |
| "learning_rate": 4.321725602900472e-08, |
| "loss": 0.8433, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.824607329842932, |
| "grad_norm": 1.2190437469029929, |
| "learning_rate": 4.196440741619678e-08, |
| "loss": 0.791, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.8272251308900525, |
| "grad_norm": 1.2895823786206642, |
| "learning_rate": 4.0729597434430164e-08, |
| "loss": 0.744, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.8298429319371727, |
| "grad_norm": 1.3874648155131097, |
| "learning_rate": 3.9512849333657064e-08, |
| "loss": 0.7917, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.8324607329842932, |
| "grad_norm": 1.4556070486247028, |
| "learning_rate": 3.8314186023746696e-08, |
| "loss": 0.7676, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.8350785340314135, |
| "grad_norm": 1.4129389758126438, |
| "learning_rate": 3.713363007405379e-08, |
| "loss": 0.7826, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.837696335078534, |
| "grad_norm": 1.406077520198999, |
| "learning_rate": 3.5971203712993894e-08, |
| "loss": 0.8105, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.8403141361256545, |
| "grad_norm": 1.333875265031629, |
| "learning_rate": 3.482692882762461e-08, |
| "loss": 0.795, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.8429319371727748, |
| "grad_norm": 1.3683777643880346, |
| "learning_rate": 3.3700826963233734e-08, |
| "loss": 0.7705, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.8455497382198953, |
| "grad_norm": 1.4709225891547952, |
| "learning_rate": 3.2592919322933174e-08, |
| "loss": 0.8004, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.8481675392670156, |
| "grad_norm": 1.3784174982531394, |
| "learning_rate": 3.150322676726025e-08, |
| "loss": 0.7968, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.850785340314136, |
| "grad_norm": 1.3119763744169533, |
| "learning_rate": 3.0431769813784595e-08, |
| "loss": 0.8328, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.8534031413612566, |
| "grad_norm": 1.3388881082324198, |
| "learning_rate": 2.9378568636721836e-08, |
| "loss": 0.8182, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.8560209424083771, |
| "grad_norm": 1.4478346583849298, |
| "learning_rate": 2.834364306655379e-08, |
| "loss": 0.8434, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.8586387434554974, |
| "grad_norm": 1.3661419005645459, |
| "learning_rate": 2.7327012589655307e-08, |
| "loss": 0.8087, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.8612565445026177, |
| "grad_norm": 1.6255187456262732, |
| "learning_rate": 2.6328696347926783e-08, |
| "loss": 0.8425, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.8638743455497382, |
| "grad_norm": 1.4508005806057562, |
| "learning_rate": 2.5348713138434564e-08, |
| "loss": 0.8272, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.8664921465968587, |
| "grad_norm": 1.3679469259148305, |
| "learning_rate": 2.43870814130559e-08, |
| "loss": 0.811, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.8691099476439792, |
| "grad_norm": 1.3414355872591919, |
| "learning_rate": 2.3443819278132992e-08, |
| "loss": 0.8187, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.8717277486910995, |
| "grad_norm": 1.5156992387312602, |
| "learning_rate": 2.251894449413061e-08, |
| "loss": 0.814, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.8743455497382198, |
| "grad_norm": 1.3765601891891213, |
| "learning_rate": 2.161247447530268e-08, |
| "loss": 0.8186, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.8769633507853403, |
| "grad_norm": 1.5813599209358808, |
| "learning_rate": 2.0724426289363995e-08, |
| "loss": 0.7714, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.8795811518324608, |
| "grad_norm": 1.3335529115792435, |
| "learning_rate": 1.9854816657168817e-08, |
| "loss": 0.8044, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.8821989528795813, |
| "grad_norm": 1.633533088724918, |
| "learning_rate": 1.9003661952396223e-08, |
| "loss": 0.8021, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.8848167539267016, |
| "grad_norm": 1.385553968147877, |
| "learning_rate": 1.817097820124147e-08, |
| "loss": 0.7701, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.8874345549738218, |
| "grad_norm": 1.3122958336734714, |
| "learning_rate": 1.7356781082115024e-08, |
| "loss": 0.7942, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.8900523560209423, |
| "grad_norm": 1.348122686262712, |
| "learning_rate": 1.656108592534633e-08, |
| "loss": 0.7778, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.8926701570680629, |
| "grad_norm": 1.5998455578678354, |
| "learning_rate": 1.578390771289606e-08, |
| "loss": 0.7819, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.8952879581151834, |
| "grad_norm": 1.4161827918618148, |
| "learning_rate": 1.5025261078073003e-08, |
| "loss": 0.8595, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.8979057591623036, |
| "grad_norm": 1.4120483689060008, |
| "learning_rate": 1.4285160305259836e-08, |
| "loss": 0.8133, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.900523560209424, |
| "grad_norm": 1.3452622944217065, |
| "learning_rate": 1.3563619329643117e-08, |
| "loss": 0.7922, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.9031413612565444, |
| "grad_norm": 1.3462821048493736, |
| "learning_rate": 1.2860651736951278e-08, |
| "loss": 0.7897, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.905759162303665, |
| "grad_norm": 1.2882450090073565, |
| "learning_rate": 1.2176270763198825e-08, |
| "loss": 0.7841, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.9083769633507854, |
| "grad_norm": 1.3599512276126853, |
| "learning_rate": 1.1510489294437431e-08, |
| "loss": 0.8084, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.9109947643979057, |
| "grad_norm": 1.3582022660187236, |
| "learning_rate": 1.0863319866512344e-08, |
| "loss": 0.7841, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.9136125654450262, |
| "grad_norm": 1.4661385410640886, |
| "learning_rate": 1.0234774664827473e-08, |
| "loss": 0.8058, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.9162303664921465, |
| "grad_norm": 1.3373090337197915, |
| "learning_rate": 9.624865524115344e-09, |
| "loss": 0.7869, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.918848167539267, |
| "grad_norm": 7.424831871562731, |
| "learning_rate": 9.033603928214396e-09, |
| "loss": 0.8024, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.9214659685863875, |
| "grad_norm": 1.3389085814711572, |
| "learning_rate": 8.461001009852809e-09, |
| "loss": 0.7585, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.9240837696335078, |
| "grad_norm": 1.321321690596193, |
| "learning_rate": 7.907067550438684e-09, |
| "loss": 0.7943, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.9267015706806283, |
| "grad_norm": 1.3041529247796586, |
| "learning_rate": 7.371813979857311e-09, |
| "loss": 0.846, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.9293193717277486, |
| "grad_norm": 1.6480049680582938, |
| "learning_rate": 6.855250376274546e-09, |
| "loss": 0.8317, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.931937172774869, |
| "grad_norm": 1.583954013442226, |
| "learning_rate": 6.357386465947301e-09, |
| "loss": 0.8391, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.9345549738219896, |
| "grad_norm": 1.677765126336499, |
| "learning_rate": 5.878231623040242e-09, |
| "loss": 0.8186, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.93717277486911, |
| "grad_norm": 1.5460126045288283, |
| "learning_rate": 5.417794869449377e-09, |
| "loss": 0.8227, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.9397905759162304, |
| "grad_norm": 1.4312649967731466, |
| "learning_rate": 4.9760848746319695e-09, |
| "loss": 0.8021, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.9424083769633507, |
| "grad_norm": 1.4154058122833189, |
| "learning_rate": 4.553109955443557e-09, |
| "loss": 0.8241, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.9450261780104712, |
| "grad_norm": 1.281327931906069, |
| "learning_rate": 4.148878075981299e-09, |
| "loss": 0.7944, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.9476439790575917, |
| "grad_norm": 1.5389129836173667, |
| "learning_rate": 3.763396847433875e-09, |
| "loss": 0.7842, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.9502617801047122, |
| "grad_norm": 1.409969772944509, |
| "learning_rate": 3.3966735279384875e-09, |
| "loss": 0.755, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.9528795811518325, |
| "grad_norm": 1.2563861684214475, |
| "learning_rate": 3.0487150224437487e-09, |
| "loss": 0.765, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.9554973821989527, |
| "grad_norm": 1.3439315228390742, |
| "learning_rate": 2.7195278825801195e-09, |
| "loss": 0.7537, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.9581151832460733, |
| "grad_norm": 1.685283540845881, |
| "learning_rate": 2.4091183065362285e-09, |
| "loss": 0.821, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.9607329842931938, |
| "grad_norm": 1.2757831073007893, |
| "learning_rate": 2.1174921389424114e-09, |
| "loss": 0.7853, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.9633507853403143, |
| "grad_norm": 1.3441788515983824, |
| "learning_rate": 1.8446548707604648e-09, |
| "loss": 0.8169, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.9659685863874345, |
| "grad_norm": 2.1793656620745274, |
| "learning_rate": 1.5906116391801726e-09, |
| "loss": 0.8164, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.9685863874345548, |
| "grad_norm": 1.5895341237579368, |
| "learning_rate": 1.355367227523052e-09, |
| "loss": 0.8164, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.9712041884816753, |
| "grad_norm": 1.4520764398320047, |
| "learning_rate": 1.1389260651518684e-09, |
| "loss": 0.743, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.9738219895287958, |
| "grad_norm": 1.2649604889074193, |
| "learning_rate": 9.412922273871471e-10, |
| "loss": 0.8085, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.9764397905759163, |
| "grad_norm": 1.3429274097375765, |
| "learning_rate": 7.624694354309014e-10, |
| "loss": 0.7761, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.9790575916230366, |
| "grad_norm": 1.3574856337300194, |
| "learning_rate": 6.02461056296244e-10, |
| "loss": 0.8159, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.981675392670157, |
| "grad_norm": 1.3845533094673734, |
| "learning_rate": 4.6127010274399356e-10, |
| "loss": 0.8309, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.9842931937172774, |
| "grad_norm": 1.6155184897511028, |
| "learning_rate": 3.3889923322594217e-10, |
| "loss": 0.8176, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.986910994764398, |
| "grad_norm": 1.583699217780584, |
| "learning_rate": 2.353507518350062e-10, |
| "loss": 0.777, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.9895287958115184, |
| "grad_norm": 1.2731241156029796, |
| "learning_rate": 1.506266082615948e-10, |
| "loss": 0.7812, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.9921465968586387, |
| "grad_norm": 2.430092717658536, |
| "learning_rate": 8.472839775719442e-11, |
| "loss": 0.8205, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.9947643979057592, |
| "grad_norm": 1.6594947929124721, |
| "learning_rate": 3.765736110383777e-11, |
| "loss": 0.8433, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.9973821989528795, |
| "grad_norm": 2.5782988609959125, |
| "learning_rate": 9.414384591233116e-12, |
| "loss": 0.8128, |
| "step": 763 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.423729911874669, |
| "learning_rate": 0.0, |
| "loss": 0.8282, |
| "step": 764 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 764, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 191, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 996711585546240.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|