| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 939, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.003194888178913738, | |
| "grad_norm": 6.409946841419442, | |
| "learning_rate": 4.2553191489361704e-07, | |
| "loss": 0.9383, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.006389776357827476, | |
| "grad_norm": 5.883137519343072, | |
| "learning_rate": 8.510638297872341e-07, | |
| "loss": 0.8501, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.009584664536741214, | |
| "grad_norm": 5.946755544649649, | |
| "learning_rate": 1.276595744680851e-06, | |
| "loss": 0.887, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.012779552715654952, | |
| "grad_norm": 5.642225969670194, | |
| "learning_rate": 1.7021276595744682e-06, | |
| "loss": 0.8099, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.01597444089456869, | |
| "grad_norm": 5.891932559187005, | |
| "learning_rate": 2.1276595744680853e-06, | |
| "loss": 0.876, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.019169329073482427, | |
| "grad_norm": 4.9025637632451495, | |
| "learning_rate": 2.553191489361702e-06, | |
| "loss": 0.8158, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.022364217252396165, | |
| "grad_norm": 4.57848090524475, | |
| "learning_rate": 2.978723404255319e-06, | |
| "loss": 0.8368, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.025559105431309903, | |
| "grad_norm": 2.658016400637032, | |
| "learning_rate": 3.4042553191489363e-06, | |
| "loss": 0.7747, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.02875399361022364, | |
| "grad_norm": 2.4579449141086083, | |
| "learning_rate": 3.8297872340425535e-06, | |
| "loss": 0.7551, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.03194888178913738, | |
| "grad_norm": 2.072756095086418, | |
| "learning_rate": 4.255319148936171e-06, | |
| "loss": 0.7211, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03514376996805112, | |
| "grad_norm": 3.5764214287428824, | |
| "learning_rate": 4.680851063829788e-06, | |
| "loss": 0.7667, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.038338658146964855, | |
| "grad_norm": 4.214989656079249, | |
| "learning_rate": 5.106382978723404e-06, | |
| "loss": 0.8135, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.04153354632587859, | |
| "grad_norm": 3.695642879844999, | |
| "learning_rate": 5.531914893617022e-06, | |
| "loss": 0.738, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.04472843450479233, | |
| "grad_norm": 4.169046468693503, | |
| "learning_rate": 5.957446808510638e-06, | |
| "loss": 0.7383, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.04792332268370607, | |
| "grad_norm": 3.911669767568645, | |
| "learning_rate": 6.382978723404256e-06, | |
| "loss": 0.6796, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.051118210862619806, | |
| "grad_norm": 3.3181523814007345, | |
| "learning_rate": 6.808510638297873e-06, | |
| "loss": 0.6902, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.054313099041533544, | |
| "grad_norm": 2.287624393972901, | |
| "learning_rate": 7.234042553191491e-06, | |
| "loss": 0.6386, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.05750798722044728, | |
| "grad_norm": 1.9124190484261288, | |
| "learning_rate": 7.659574468085107e-06, | |
| "loss": 0.6893, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.06070287539936102, | |
| "grad_norm": 1.8739939120437843, | |
| "learning_rate": 8.085106382978723e-06, | |
| "loss": 0.6776, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.06389776357827476, | |
| "grad_norm": 2.1937664044946783, | |
| "learning_rate": 8.510638297872341e-06, | |
| "loss": 0.6205, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0670926517571885, | |
| "grad_norm": 2.2715620880143916, | |
| "learning_rate": 8.936170212765958e-06, | |
| "loss": 0.6056, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.07028753993610223, | |
| "grad_norm": 1.6081424374861002, | |
| "learning_rate": 9.361702127659576e-06, | |
| "loss": 0.555, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.07348242811501597, | |
| "grad_norm": 1.763358212351945, | |
| "learning_rate": 9.787234042553192e-06, | |
| "loss": 0.6666, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.07667731629392971, | |
| "grad_norm": 1.4811530280287506, | |
| "learning_rate": 1.0212765957446808e-05, | |
| "loss": 0.6504, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.07987220447284345, | |
| "grad_norm": 1.3405676738844483, | |
| "learning_rate": 1.0638297872340426e-05, | |
| "loss": 0.5772, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.08306709265175719, | |
| "grad_norm": 1.2991745234008487, | |
| "learning_rate": 1.1063829787234044e-05, | |
| "loss": 0.6445, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.08626198083067092, | |
| "grad_norm": 1.0505598061019923, | |
| "learning_rate": 1.1489361702127662e-05, | |
| "loss": 0.5923, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.08945686900958466, | |
| "grad_norm": 1.103208098271378, | |
| "learning_rate": 1.1914893617021277e-05, | |
| "loss": 0.6241, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.0926517571884984, | |
| "grad_norm": 1.2201972114638524, | |
| "learning_rate": 1.2340425531914895e-05, | |
| "loss": 0.6283, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.09584664536741214, | |
| "grad_norm": 1.1112064502121644, | |
| "learning_rate": 1.2765957446808513e-05, | |
| "loss": 0.6223, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.09904153354632587, | |
| "grad_norm": 0.9923339302816265, | |
| "learning_rate": 1.3191489361702127e-05, | |
| "loss": 0.5931, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.10223642172523961, | |
| "grad_norm": 1.0477203467646528, | |
| "learning_rate": 1.3617021276595745e-05, | |
| "loss": 0.6182, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.10543130990415335, | |
| "grad_norm": 1.1019257759272982, | |
| "learning_rate": 1.4042553191489363e-05, | |
| "loss": 0.5675, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.10862619808306709, | |
| "grad_norm": 1.0627070873952427, | |
| "learning_rate": 1.4468085106382981e-05, | |
| "loss": 0.596, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.11182108626198083, | |
| "grad_norm": 1.1260018768412945, | |
| "learning_rate": 1.4893617021276596e-05, | |
| "loss": 0.5888, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.11501597444089456, | |
| "grad_norm": 1.0131578830510992, | |
| "learning_rate": 1.5319148936170214e-05, | |
| "loss": 0.53, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.1182108626198083, | |
| "grad_norm": 0.9462086601742236, | |
| "learning_rate": 1.5744680851063832e-05, | |
| "loss": 0.5871, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.12140575079872204, | |
| "grad_norm": 1.089392870614615, | |
| "learning_rate": 1.6170212765957446e-05, | |
| "loss": 0.5879, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.12460063897763578, | |
| "grad_norm": 0.9813042474086496, | |
| "learning_rate": 1.6595744680851064e-05, | |
| "loss": 0.5539, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.12779552715654952, | |
| "grad_norm": 0.8478222123412902, | |
| "learning_rate": 1.7021276595744682e-05, | |
| "loss": 0.5272, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.13099041533546327, | |
| "grad_norm": 0.9509316220604798, | |
| "learning_rate": 1.74468085106383e-05, | |
| "loss": 0.5524, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.134185303514377, | |
| "grad_norm": 0.9910817013897578, | |
| "learning_rate": 1.7872340425531915e-05, | |
| "loss": 0.5781, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.13738019169329074, | |
| "grad_norm": 1.1162723782576445, | |
| "learning_rate": 1.8297872340425533e-05, | |
| "loss": 0.57, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.14057507987220447, | |
| "grad_norm": 0.9939969861590426, | |
| "learning_rate": 1.872340425531915e-05, | |
| "loss": 0.5459, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.14376996805111822, | |
| "grad_norm": 1.2439803148930444, | |
| "learning_rate": 1.914893617021277e-05, | |
| "loss": 0.5854, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.14696485623003194, | |
| "grad_norm": 1.2941430779218954, | |
| "learning_rate": 1.9574468085106384e-05, | |
| "loss": 0.5667, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.1501597444089457, | |
| "grad_norm": 1.062058714539539, | |
| "learning_rate": 2e-05, | |
| "loss": 0.6011, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.15335463258785942, | |
| "grad_norm": 1.0851192274830292, | |
| "learning_rate": 2.0425531914893616e-05, | |
| "loss": 0.58, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.15654952076677317, | |
| "grad_norm": 1.0375577575144754, | |
| "learning_rate": 2.0851063829787238e-05, | |
| "loss": 0.5729, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.1597444089456869, | |
| "grad_norm": 1.147815879136087, | |
| "learning_rate": 2.1276595744680852e-05, | |
| "loss": 0.5442, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.16293929712460065, | |
| "grad_norm": 1.0221514663668887, | |
| "learning_rate": 2.1702127659574467e-05, | |
| "loss": 0.5857, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.16613418530351437, | |
| "grad_norm": 1.0076524214922307, | |
| "learning_rate": 2.2127659574468088e-05, | |
| "loss": 0.6012, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.16932907348242812, | |
| "grad_norm": 0.9868930985691252, | |
| "learning_rate": 2.2553191489361703e-05, | |
| "loss": 0.5524, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.17252396166134185, | |
| "grad_norm": 0.8832842960378782, | |
| "learning_rate": 2.2978723404255324e-05, | |
| "loss": 0.5442, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.1757188498402556, | |
| "grad_norm": 0.8366367306264574, | |
| "learning_rate": 2.340425531914894e-05, | |
| "loss": 0.5611, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.17891373801916932, | |
| "grad_norm": 1.0326981018410393, | |
| "learning_rate": 2.3829787234042553e-05, | |
| "loss": 0.5304, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.18210862619808307, | |
| "grad_norm": 0.9142673475609777, | |
| "learning_rate": 2.4255319148936175e-05, | |
| "loss": 0.5587, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.1853035143769968, | |
| "grad_norm": 1.056499453713147, | |
| "learning_rate": 2.468085106382979e-05, | |
| "loss": 0.5374, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.18849840255591055, | |
| "grad_norm": 1.0459906767360574, | |
| "learning_rate": 2.5106382978723404e-05, | |
| "loss": 0.5635, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.19169329073482427, | |
| "grad_norm": 1.0845897272985423, | |
| "learning_rate": 2.5531914893617025e-05, | |
| "loss": 0.5536, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.19488817891373802, | |
| "grad_norm": 1.2724090682106446, | |
| "learning_rate": 2.595744680851064e-05, | |
| "loss": 0.5549, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.19808306709265175, | |
| "grad_norm": 0.9769443793575512, | |
| "learning_rate": 2.6382978723404255e-05, | |
| "loss": 0.5142, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.2012779552715655, | |
| "grad_norm": 1.0004128504205176, | |
| "learning_rate": 2.6808510638297876e-05, | |
| "loss": 0.5533, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.20447284345047922, | |
| "grad_norm": 1.2165255582257546, | |
| "learning_rate": 2.723404255319149e-05, | |
| "loss": 0.5484, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.20766773162939298, | |
| "grad_norm": 1.1589649633600527, | |
| "learning_rate": 2.7659574468085112e-05, | |
| "loss": 0.5833, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.2108626198083067, | |
| "grad_norm": 0.8637922666258719, | |
| "learning_rate": 2.8085106382978727e-05, | |
| "loss": 0.5216, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.21405750798722045, | |
| "grad_norm": 1.3422386117294691, | |
| "learning_rate": 2.851063829787234e-05, | |
| "loss": 0.5304, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.21725239616613418, | |
| "grad_norm": 1.0665495345944047, | |
| "learning_rate": 2.8936170212765963e-05, | |
| "loss": 0.5217, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.22044728434504793, | |
| "grad_norm": 1.1974304023992344, | |
| "learning_rate": 2.9361702127659577e-05, | |
| "loss": 0.5758, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.22364217252396165, | |
| "grad_norm": 1.055001246229523, | |
| "learning_rate": 2.9787234042553192e-05, | |
| "loss": 0.5959, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.2268370607028754, | |
| "grad_norm": 0.9295929863968017, | |
| "learning_rate": 3.0212765957446813e-05, | |
| "loss": 0.5113, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.23003194888178913, | |
| "grad_norm": 1.0895682011148657, | |
| "learning_rate": 3.063829787234043e-05, | |
| "loss": 0.5343, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.23322683706070288, | |
| "grad_norm": 1.0443823721191292, | |
| "learning_rate": 3.1063829787234046e-05, | |
| "loss": 0.5368, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.2364217252396166, | |
| "grad_norm": 1.0778250715462803, | |
| "learning_rate": 3.1489361702127664e-05, | |
| "loss": 0.5187, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.23961661341853036, | |
| "grad_norm": 1.1417565226413213, | |
| "learning_rate": 3.191489361702128e-05, | |
| "loss": 0.5248, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.24281150159744408, | |
| "grad_norm": 1.1170007131913982, | |
| "learning_rate": 3.234042553191489e-05, | |
| "loss": 0.547, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.24600638977635783, | |
| "grad_norm": 1.1070104668983525, | |
| "learning_rate": 3.276595744680851e-05, | |
| "loss": 0.4987, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.24920127795527156, | |
| "grad_norm": 1.2780289742174922, | |
| "learning_rate": 3.319148936170213e-05, | |
| "loss": 0.5372, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.2523961661341853, | |
| "grad_norm": 1.1736505513452447, | |
| "learning_rate": 3.361702127659575e-05, | |
| "loss": 0.5534, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.25559105431309903, | |
| "grad_norm": 0.9677573531833106, | |
| "learning_rate": 3.4042553191489365e-05, | |
| "loss": 0.5124, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.25878594249201275, | |
| "grad_norm": 1.257228321448476, | |
| "learning_rate": 3.446808510638298e-05, | |
| "loss": 0.5622, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.26198083067092653, | |
| "grad_norm": 1.0555331388479319, | |
| "learning_rate": 3.48936170212766e-05, | |
| "loss": 0.5765, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.26517571884984026, | |
| "grad_norm": 1.0776914247686873, | |
| "learning_rate": 3.531914893617022e-05, | |
| "loss": 0.5258, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.268370607028754, | |
| "grad_norm": 1.0056240006776036, | |
| "learning_rate": 3.574468085106383e-05, | |
| "loss": 0.4866, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.2715654952076677, | |
| "grad_norm": 1.1298508994667116, | |
| "learning_rate": 3.617021276595745e-05, | |
| "loss": 0.5806, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.2747603833865815, | |
| "grad_norm": 1.0948113866316362, | |
| "learning_rate": 3.6595744680851066e-05, | |
| "loss": 0.5617, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.2779552715654952, | |
| "grad_norm": 1.0650985230812091, | |
| "learning_rate": 3.7021276595744684e-05, | |
| "loss": 0.5656, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.28115015974440893, | |
| "grad_norm": 1.0504970870871342, | |
| "learning_rate": 3.74468085106383e-05, | |
| "loss": 0.5978, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.28434504792332266, | |
| "grad_norm": 0.8855494930537949, | |
| "learning_rate": 3.787234042553192e-05, | |
| "loss": 0.4934, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.28753993610223644, | |
| "grad_norm": 1.030400345834846, | |
| "learning_rate": 3.829787234042554e-05, | |
| "loss": 0.5292, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.29073482428115016, | |
| "grad_norm": 1.2762844738209218, | |
| "learning_rate": 3.872340425531915e-05, | |
| "loss": 0.5444, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.2939297124600639, | |
| "grad_norm": 1.126915075174302, | |
| "learning_rate": 3.914893617021277e-05, | |
| "loss": 0.5111, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.2971246006389776, | |
| "grad_norm": 1.4237336432448202, | |
| "learning_rate": 3.9574468085106385e-05, | |
| "loss": 0.5495, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.3003194888178914, | |
| "grad_norm": 1.162381948355831, | |
| "learning_rate": 4e-05, | |
| "loss": 0.5999, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.3035143769968051, | |
| "grad_norm": 1.3353166705017592, | |
| "learning_rate": 3.999986177524551e-05, | |
| "loss": 0.5371, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.30670926517571884, | |
| "grad_norm": 1.1533614278616147, | |
| "learning_rate": 3.999944710289265e-05, | |
| "loss": 0.5636, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.30990415335463256, | |
| "grad_norm": 1.2821208981923682, | |
| "learning_rate": 3.9998755988673205e-05, | |
| "loss": 0.5319, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.31309904153354634, | |
| "grad_norm": 1.3064500394077125, | |
| "learning_rate": 3.9997788442140105e-05, | |
| "loss": 0.5626, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.31629392971246006, | |
| "grad_norm": 1.1664124835581022, | |
| "learning_rate": 3.999654447666721e-05, | |
| "loss": 0.5379, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.3194888178913738, | |
| "grad_norm": 1.1049186599225438, | |
| "learning_rate": 3.999502410944923e-05, | |
| "loss": 0.5968, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3226837060702875, | |
| "grad_norm": 0.9776199567560799, | |
| "learning_rate": 3.99932273615014e-05, | |
| "loss": 0.5493, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.3258785942492013, | |
| "grad_norm": 1.082662657590535, | |
| "learning_rate": 3.99911542576592e-05, | |
| "loss": 0.582, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.329073482428115, | |
| "grad_norm": 1.0520705991247479, | |
| "learning_rate": 3.998880482657809e-05, | |
| "loss": 0.5239, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.33226837060702874, | |
| "grad_norm": 1.0198152234253615, | |
| "learning_rate": 3.9986179100733e-05, | |
| "loss": 0.5562, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.3354632587859425, | |
| "grad_norm": 1.1581233439140284, | |
| "learning_rate": 3.9983277116417974e-05, | |
| "loss": 0.5199, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.33865814696485624, | |
| "grad_norm": 1.2617196945442968, | |
| "learning_rate": 3.998009891374561e-05, | |
| "loss": 0.5158, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.34185303514376997, | |
| "grad_norm": 1.233474385279073, | |
| "learning_rate": 3.997664453664654e-05, | |
| "loss": 0.5796, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.3450479233226837, | |
| "grad_norm": 1.383774855291399, | |
| "learning_rate": 3.9972914032868805e-05, | |
| "loss": 0.5355, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.34824281150159747, | |
| "grad_norm": 1.2889281329856341, | |
| "learning_rate": 3.99689074539772e-05, | |
| "loss": 0.5459, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.3514376996805112, | |
| "grad_norm": 0.8238497044674719, | |
| "learning_rate": 3.996462485535257e-05, | |
| "loss": 0.5503, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.3546325878594249, | |
| "grad_norm": 1.4807124856342684, | |
| "learning_rate": 3.996006629619103e-05, | |
| "loss": 0.6508, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.35782747603833864, | |
| "grad_norm": 1.4078520868755624, | |
| "learning_rate": 3.995523183950314e-05, | |
| "loss": 0.6093, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.3610223642172524, | |
| "grad_norm": 0.9015312070363262, | |
| "learning_rate": 3.9950121552113076e-05, | |
| "loss": 0.5879, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.36421725239616615, | |
| "grad_norm": 1.386297055498617, | |
| "learning_rate": 3.994473550465765e-05, | |
| "loss": 0.6371, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.36741214057507987, | |
| "grad_norm": 1.186356563974775, | |
| "learning_rate": 3.993907377158537e-05, | |
| "loss": 0.5058, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.3706070287539936, | |
| "grad_norm": 1.046808813203276, | |
| "learning_rate": 3.993313643115541e-05, | |
| "loss": 0.5475, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.3738019169329074, | |
| "grad_norm": 1.1184791458413588, | |
| "learning_rate": 3.992692356543649e-05, | |
| "loss": 0.5131, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.3769968051118211, | |
| "grad_norm": 0.9927859768100731, | |
| "learning_rate": 3.992043526030582e-05, | |
| "loss": 0.5351, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.3801916932907348, | |
| "grad_norm": 0.7194994252219997, | |
| "learning_rate": 3.991367160544783e-05, | |
| "loss": 0.5241, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.38338658146964855, | |
| "grad_norm": 1.1103180710410239, | |
| "learning_rate": 3.990663269435298e-05, | |
| "loss": 0.5341, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.3865814696485623, | |
| "grad_norm": 0.891726161610439, | |
| "learning_rate": 3.9899318624316424e-05, | |
| "loss": 0.56, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.38977635782747605, | |
| "grad_norm": 0.9833837615596567, | |
| "learning_rate": 3.9891729496436736e-05, | |
| "loss": 0.5275, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.3929712460063898, | |
| "grad_norm": 1.0423973561059756, | |
| "learning_rate": 3.988386541561444e-05, | |
| "loss": 0.5726, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.3961661341853035, | |
| "grad_norm": 1.051551443360772, | |
| "learning_rate": 3.9875726490550606e-05, | |
| "loss": 0.5689, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.3993610223642173, | |
| "grad_norm": 1.0379195132528112, | |
| "learning_rate": 3.986731283374533e-05, | |
| "loss": 0.5532, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.402555910543131, | |
| "grad_norm": 1.066307983698186, | |
| "learning_rate": 3.985862456149616e-05, | |
| "loss": 0.6265, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.4057507987220447, | |
| "grad_norm": 1.0375163774393865, | |
| "learning_rate": 3.9849661793896537e-05, | |
| "loss": 0.573, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.40894568690095845, | |
| "grad_norm": 0.8956353921945285, | |
| "learning_rate": 3.984042465483409e-05, | |
| "loss": 0.5177, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.41214057507987223, | |
| "grad_norm": 1.079166565420564, | |
| "learning_rate": 3.983091327198896e-05, | |
| "loss": 0.5476, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.41533546325878595, | |
| "grad_norm": 0.9871062497017694, | |
| "learning_rate": 3.982112777683199e-05, | |
| "loss": 0.5868, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.4185303514376997, | |
| "grad_norm": 1.1607498270093395, | |
| "learning_rate": 3.981106830462296e-05, | |
| "loss": 0.5829, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.4217252396166134, | |
| "grad_norm": 0.938424834465439, | |
| "learning_rate": 3.9800734994408657e-05, | |
| "loss": 0.5233, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.4249201277955272, | |
| "grad_norm": 0.9723972932143454, | |
| "learning_rate": 3.9790127989021024e-05, | |
| "loss": 0.5452, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.4281150159744409, | |
| "grad_norm": 1.1133970630266223, | |
| "learning_rate": 3.977924743507513e-05, | |
| "loss": 0.6125, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.43130990415335463, | |
| "grad_norm": 0.9887899047167236, | |
| "learning_rate": 3.976809348296716e-05, | |
| "loss": 0.5532, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.43450479233226835, | |
| "grad_norm": 1.0482567177622366, | |
| "learning_rate": 3.9756666286872345e-05, | |
| "loss": 0.5469, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.43769968051118213, | |
| "grad_norm": 0.9896665341430675, | |
| "learning_rate": 3.974496600474282e-05, | |
| "loss": 0.5496, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.44089456869009586, | |
| "grad_norm": 1.1889549086086342, | |
| "learning_rate": 3.9732992798305465e-05, | |
| "loss": 0.5767, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.4440894568690096, | |
| "grad_norm": 0.9876667745869144, | |
| "learning_rate": 3.972074683305961e-05, | |
| "loss": 0.5746, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.4472843450479233, | |
| "grad_norm": 1.0145350383168896, | |
| "learning_rate": 3.9708228278274816e-05, | |
| "loss": 0.5332, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.4504792332268371, | |
| "grad_norm": 0.898832690907024, | |
| "learning_rate": 3.96954373069885e-05, | |
| "loss": 0.6213, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.4536741214057508, | |
| "grad_norm": 0.9695396273633855, | |
| "learning_rate": 3.968237409600355e-05, | |
| "loss": 0.5389, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.45686900958466453, | |
| "grad_norm": 0.8686445117159894, | |
| "learning_rate": 3.9669038825885875e-05, | |
| "loss": 0.5514, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.46006389776357826, | |
| "grad_norm": 0.9730958936696619, | |
| "learning_rate": 3.9655431680961924e-05, | |
| "loss": 0.5342, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.46325878594249204, | |
| "grad_norm": 1.085850372058602, | |
| "learning_rate": 3.964155284931612e-05, | |
| "loss": 0.5637, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.46645367412140576, | |
| "grad_norm": 0.8490885837637923, | |
| "learning_rate": 3.962740252278827e-05, | |
| "loss": 0.5432, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.4696485623003195, | |
| "grad_norm": 1.208209634263402, | |
| "learning_rate": 3.961298089697093e-05, | |
| "loss": 0.5406, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.4728434504792332, | |
| "grad_norm": 1.2369537682088747, | |
| "learning_rate": 3.959828817120665e-05, | |
| "loss": 0.5871, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.476038338658147, | |
| "grad_norm": 1.0963511451666044, | |
| "learning_rate": 3.9583324548585276e-05, | |
| "loss": 0.5965, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.4792332268370607, | |
| "grad_norm": 1.1210854346222001, | |
| "learning_rate": 3.956809023594112e-05, | |
| "loss": 0.4888, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.48242811501597443, | |
| "grad_norm": 1.10943858725056, | |
| "learning_rate": 3.955258544385009e-05, | |
| "loss": 0.568, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.48562300319488816, | |
| "grad_norm": 0.9114400050058079, | |
| "learning_rate": 3.95368103866268e-05, | |
| "loss": 0.5329, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.48881789137380194, | |
| "grad_norm": 1.2588910487562754, | |
| "learning_rate": 3.9520765282321584e-05, | |
| "loss": 0.5458, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.49201277955271566, | |
| "grad_norm": 1.161279475928506, | |
| "learning_rate": 3.9504450352717514e-05, | |
| "loss": 0.534, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.4952076677316294, | |
| "grad_norm": 1.1666419752273152, | |
| "learning_rate": 3.948786582332728e-05, | |
| "loss": 0.5449, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.4984025559105431, | |
| "grad_norm": 1.2141093053662426, | |
| "learning_rate": 3.947101192339016e-05, | |
| "loss": 0.5761, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.5015974440894568, | |
| "grad_norm": 0.9508186492271605, | |
| "learning_rate": 3.9453888885868756e-05, | |
| "loss": 0.5019, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.5047923322683706, | |
| "grad_norm": 1.0363555761012373, | |
| "learning_rate": 3.943649694744584e-05, | |
| "loss": 0.5156, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.5079872204472844, | |
| "grad_norm": 1.0787946362260752, | |
| "learning_rate": 3.9418836348521045e-05, | |
| "loss": 0.5405, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.5111821086261981, | |
| "grad_norm": 0.9707452122472157, | |
| "learning_rate": 3.940090733320757e-05, | |
| "loss": 0.5227, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.5143769968051118, | |
| "grad_norm": 0.8852169354166731, | |
| "learning_rate": 3.93827101493288e-05, | |
| "loss": 0.5471, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.5175718849840255, | |
| "grad_norm": 1.0544907272347088, | |
| "learning_rate": 3.936424504841485e-05, | |
| "loss": 0.5495, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.5207667731629393, | |
| "grad_norm": 0.8460840865557951, | |
| "learning_rate": 3.934551228569913e-05, | |
| "loss": 0.5919, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.5239616613418531, | |
| "grad_norm": 1.3423757776337415, | |
| "learning_rate": 3.932651212011479e-05, | |
| "loss": 0.5745, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.5271565495207667, | |
| "grad_norm": 0.8367417092044956, | |
| "learning_rate": 3.930724481429114e-05, | |
| "loss": 0.5582, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.5303514376996805, | |
| "grad_norm": 0.8162694542335893, | |
| "learning_rate": 3.928771063455007e-05, | |
| "loss": 0.5382, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.5335463258785943, | |
| "grad_norm": 1.055654119658905, | |
| "learning_rate": 3.926790985090228e-05, | |
| "loss": 0.5127, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.536741214057508, | |
| "grad_norm": 0.9342358155994782, | |
| "learning_rate": 3.924784273704363e-05, | |
| "loss": 0.5133, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.5399361022364217, | |
| "grad_norm": 0.8868699043103929, | |
| "learning_rate": 3.922750957035128e-05, | |
| "loss": 0.5757, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.5431309904153354, | |
| "grad_norm": 0.9258969627974063, | |
| "learning_rate": 3.920691063187995e-05, | |
| "loss": 0.5588, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.5463258785942492, | |
| "grad_norm": 0.9405026305900314, | |
| "learning_rate": 3.918604620635797e-05, | |
| "loss": 0.5652, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.549520766773163, | |
| "grad_norm": 0.8161599160408928, | |
| "learning_rate": 3.916491658218333e-05, | |
| "loss": 0.5586, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.5527156549520766, | |
| "grad_norm": 0.9068770118413155, | |
| "learning_rate": 3.914352205141975e-05, | |
| "loss": 0.566, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.5559105431309904, | |
| "grad_norm": 0.9468726642569554, | |
| "learning_rate": 3.91218629097926e-05, | |
| "loss": 0.6058, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.5591054313099042, | |
| "grad_norm": 0.8130418886800949, | |
| "learning_rate": 3.909993945668484e-05, | |
| "loss": 0.5453, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.5623003194888179, | |
| "grad_norm": 0.909689386530599, | |
| "learning_rate": 3.907775199513286e-05, | |
| "loss": 0.5348, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.5654952076677316, | |
| "grad_norm": 0.9448685038012304, | |
| "learning_rate": 3.905530083182231e-05, | |
| "loss": 0.5615, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.5686900958466453, | |
| "grad_norm": 0.8887839168967605, | |
| "learning_rate": 3.903258627708383e-05, | |
| "loss": 0.5318, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.5718849840255591, | |
| "grad_norm": 0.8148539808805777, | |
| "learning_rate": 3.90096086448888e-05, | |
| "loss": 0.531, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.5750798722044729, | |
| "grad_norm": 0.9702327272254745, | |
| "learning_rate": 3.898636825284499e-05, | |
| "loss": 0.5679, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.5782747603833865, | |
| "grad_norm": 0.9398312312400305, | |
| "learning_rate": 3.896286542219212e-05, | |
| "loss": 0.566, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.5814696485623003, | |
| "grad_norm": 0.986760124602772, | |
| "learning_rate": 3.893910047779752e-05, | |
| "loss": 0.5536, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.5846645367412141, | |
| "grad_norm": 0.7553011510468522, | |
| "learning_rate": 3.891507374815153e-05, | |
| "loss": 0.5491, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.5878594249201278, | |
| "grad_norm": 0.7490780167276647, | |
| "learning_rate": 3.8890785565363046e-05, | |
| "loss": 0.5278, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.5910543130990416, | |
| "grad_norm": 0.927901642878805, | |
| "learning_rate": 3.8866236265154864e-05, | |
| "loss": 0.5493, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.5942492012779552, | |
| "grad_norm": 0.9377247340300795, | |
| "learning_rate": 3.8841426186859095e-05, | |
| "loss": 0.5118, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.597444089456869, | |
| "grad_norm": 0.7685781592155283, | |
| "learning_rate": 3.881635567341243e-05, | |
| "loss": 0.5213, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.6006389776357828, | |
| "grad_norm": 0.9161772600103768, | |
| "learning_rate": 3.879102507135142e-05, | |
| "loss": 0.528, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.6038338658146964, | |
| "grad_norm": 0.7614758217612799, | |
| "learning_rate": 3.876543473080771e-05, | |
| "loss": 0.5121, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.6070287539936102, | |
| "grad_norm": 1.0241100063473434, | |
| "learning_rate": 3.8739585005503136e-05, | |
| "loss": 0.5557, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.610223642172524, | |
| "grad_norm": 0.8917603175893786, | |
| "learning_rate": 3.8713476252744896e-05, | |
| "loss": 0.5033, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.6134185303514377, | |
| "grad_norm": 0.8866544366619419, | |
| "learning_rate": 3.8687108833420585e-05, | |
| "loss": 0.5459, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.6166134185303515, | |
| "grad_norm": 0.8490994667110613, | |
| "learning_rate": 3.866048311199321e-05, | |
| "loss": 0.5761, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.6198083067092651, | |
| "grad_norm": 1.0117997679265072, | |
| "learning_rate": 3.863359945649615e-05, | |
| "loss": 0.5597, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.6230031948881789, | |
| "grad_norm": 0.8115827675976007, | |
| "learning_rate": 3.860645823852808e-05, | |
| "loss": 0.5415, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.6261980830670927, | |
| "grad_norm": 1.0259667321397283, | |
| "learning_rate": 3.85790598332478e-05, | |
| "loss": 0.5435, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.6293929712460063, | |
| "grad_norm": 0.7714558957563437, | |
| "learning_rate": 3.8551404619369115e-05, | |
| "loss": 0.5485, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.6325878594249201, | |
| "grad_norm": 0.8744712822558212, | |
| "learning_rate": 3.8523492979155534e-05, | |
| "loss": 0.5025, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.6357827476038339, | |
| "grad_norm": 0.9290898869366451, | |
| "learning_rate": 3.849532529841502e-05, | |
| "loss": 0.5205, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.6389776357827476, | |
| "grad_norm": 0.9659909853502232, | |
| "learning_rate": 3.846690196649464e-05, | |
| "loss": 0.5074, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.6421725239616614, | |
| "grad_norm": 0.9633294322023886, | |
| "learning_rate": 3.84382233762752e-05, | |
| "loss": 0.5504, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.645367412140575, | |
| "grad_norm": 1.0208058978197756, | |
| "learning_rate": 3.840928992416583e-05, | |
| "loss": 0.5683, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.6485623003194888, | |
| "grad_norm": 0.7720130021877678, | |
| "learning_rate": 3.8380102010098436e-05, | |
| "loss": 0.5101, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.6517571884984026, | |
| "grad_norm": 0.9815422862820409, | |
| "learning_rate": 3.835066003752226e-05, | |
| "loss": 0.564, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.6549520766773163, | |
| "grad_norm": 0.8106906464515937, | |
| "learning_rate": 3.832096441339825e-05, | |
| "loss": 0.5227, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.65814696485623, | |
| "grad_norm": 0.9833175984925693, | |
| "learning_rate": 3.829101554819341e-05, | |
| "loss": 0.5564, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.6613418530351438, | |
| "grad_norm": 0.74809638164356, | |
| "learning_rate": 3.826081385587523e-05, | |
| "loss": 0.5172, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.6645367412140575, | |
| "grad_norm": 0.8646454866233487, | |
| "learning_rate": 3.823035975390585e-05, | |
| "loss": 0.5888, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.6677316293929713, | |
| "grad_norm": 0.8049832826819769, | |
| "learning_rate": 3.8199653663236336e-05, | |
| "loss": 0.5792, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.670926517571885, | |
| "grad_norm": 0.7543833230398367, | |
| "learning_rate": 3.8168696008300884e-05, | |
| "loss": 0.5196, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.6741214057507987, | |
| "grad_norm": 0.9148118195164756, | |
| "learning_rate": 3.813748721701091e-05, | |
| "loss": 0.5444, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.6773162939297125, | |
| "grad_norm": 0.7522832027542112, | |
| "learning_rate": 3.8106027720749176e-05, | |
| "loss": 0.5673, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.6805111821086262, | |
| "grad_norm": 0.8180964056504714, | |
| "learning_rate": 3.807431795436379e-05, | |
| "loss": 0.5756, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.6837060702875399, | |
| "grad_norm": 0.9161522568428497, | |
| "learning_rate": 3.8042358356162215e-05, | |
| "loss": 0.5901, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.6869009584664537, | |
| "grad_norm": 0.943077264357207, | |
| "learning_rate": 3.801014936790522e-05, | |
| "loss": 0.4931, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.6900958466453674, | |
| "grad_norm": 0.7950064346283184, | |
| "learning_rate": 3.797769143480075e-05, | |
| "loss": 0.5441, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.6932907348242812, | |
| "grad_norm": 0.9413760937926158, | |
| "learning_rate": 3.79449850054978e-05, | |
| "loss": 0.5904, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.6964856230031949, | |
| "grad_norm": 1.212260052461721, | |
| "learning_rate": 3.791203053208017e-05, | |
| "loss": 0.5766, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.6996805111821086, | |
| "grad_norm": 0.8484051659303491, | |
| "learning_rate": 3.7878828470060274e-05, | |
| "loss": 0.5772, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.7028753993610224, | |
| "grad_norm": 1.1355674304167553, | |
| "learning_rate": 3.7845379278372775e-05, | |
| "loss": 0.5679, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.7060702875399361, | |
| "grad_norm": 0.984582570050398, | |
| "learning_rate": 3.781168341936834e-05, | |
| "loss": 0.5432, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.7092651757188498, | |
| "grad_norm": 0.9476742495794508, | |
| "learning_rate": 3.777774135880712e-05, | |
| "loss": 0.5682, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.7124600638977636, | |
| "grad_norm": 1.139069082180595, | |
| "learning_rate": 3.774355356585243e-05, | |
| "loss": 0.5121, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.7156549520766773, | |
| "grad_norm": 0.7997882642082141, | |
| "learning_rate": 3.7709120513064196e-05, | |
| "loss": 0.5196, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.7188498402555911, | |
| "grad_norm": 0.912339161175054, | |
| "learning_rate": 3.7674442676392456e-05, | |
| "loss": 0.5309, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.7220447284345048, | |
| "grad_norm": 0.8646463276372267, | |
| "learning_rate": 3.7639520535170736e-05, | |
| "loss": 0.5764, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.7252396166134185, | |
| "grad_norm": 0.8485244000483088, | |
| "learning_rate": 3.760435457210948e-05, | |
| "loss": 0.5711, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.7284345047923323, | |
| "grad_norm": 0.8913545645187945, | |
| "learning_rate": 3.7568945273289355e-05, | |
| "loss": 0.5355, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.731629392971246, | |
| "grad_norm": 0.8760365656277963, | |
| "learning_rate": 3.753329312815453e-05, | |
| "loss": 0.5402, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.7348242811501597, | |
| "grad_norm": 0.9291923706353212, | |
| "learning_rate": 3.749739862950589e-05, | |
| "loss": 0.5323, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.7380191693290735, | |
| "grad_norm": 1.1191138286045244, | |
| "learning_rate": 3.7461262273494277e-05, | |
| "loss": 0.5401, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.7412140575079872, | |
| "grad_norm": 0.8298783641280376, | |
| "learning_rate": 3.742488455961358e-05, | |
| "loss": 0.5489, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.744408945686901, | |
| "grad_norm": 0.8194676142692792, | |
| "learning_rate": 3.738826599069385e-05, | |
| "loss": 0.5277, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.7476038338658147, | |
| "grad_norm": 0.7674059358894463, | |
| "learning_rate": 3.7351407072894356e-05, | |
| "loss": 0.5169, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.7507987220447284, | |
| "grad_norm": 0.8486151770014176, | |
| "learning_rate": 3.7314308315696604e-05, | |
| "loss": 0.535, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.7539936102236422, | |
| "grad_norm": 0.7391651407617897, | |
| "learning_rate": 3.7276970231897225e-05, | |
| "loss": 0.504, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.7571884984025559, | |
| "grad_norm": 0.902259232556215, | |
| "learning_rate": 3.723939333760099e-05, | |
| "loss": 0.5613, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.7603833865814696, | |
| "grad_norm": 0.8625873135638807, | |
| "learning_rate": 3.720157815221358e-05, | |
| "loss": 0.5244, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.7635782747603834, | |
| "grad_norm": 0.8936431918114204, | |
| "learning_rate": 3.716352519843448e-05, | |
| "loss": 0.5426, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.7667731629392971, | |
| "grad_norm": 0.995320435639344, | |
| "learning_rate": 3.71252350022497e-05, | |
| "loss": 0.5104, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.7699680511182109, | |
| "grad_norm": 0.8031264080307341, | |
| "learning_rate": 3.708670809292455e-05, | |
| "loss": 0.5246, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.7731629392971247, | |
| "grad_norm": 1.0835434981781038, | |
| "learning_rate": 3.704794500299627e-05, | |
| "loss": 0.5003, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.7763578274760383, | |
| "grad_norm": 0.9478013614793179, | |
| "learning_rate": 3.700894626826674e-05, | |
| "loss": 0.5116, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.7795527156549521, | |
| "grad_norm": 1.0469893211794972, | |
| "learning_rate": 3.696971242779499e-05, | |
| "loss": 0.6261, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.7827476038338658, | |
| "grad_norm": 0.9297231377773715, | |
| "learning_rate": 3.693024402388984e-05, | |
| "loss": 0.5502, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.7859424920127795, | |
| "grad_norm": 0.7610910877457355, | |
| "learning_rate": 3.689054160210232e-05, | |
| "loss": 0.542, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.7891373801916933, | |
| "grad_norm": 1.1342717917686111, | |
| "learning_rate": 3.6850605711218176e-05, | |
| "loss": 0.5844, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.792332268370607, | |
| "grad_norm": 0.6995938546636964, | |
| "learning_rate": 3.681043690325029e-05, | |
| "loss": 0.5343, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.7955271565495208, | |
| "grad_norm": 1.1509298073605394, | |
| "learning_rate": 3.6770035733431014e-05, | |
| "loss": 0.5209, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.7987220447284346, | |
| "grad_norm": 0.8295131498958032, | |
| "learning_rate": 3.6729402760204535e-05, | |
| "loss": 0.5369, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.8019169329073482, | |
| "grad_norm": 0.9218479825070323, | |
| "learning_rate": 3.668853854521913e-05, | |
| "loss": 0.4855, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.805111821086262, | |
| "grad_norm": 0.7441334936621781, | |
| "learning_rate": 3.66474436533194e-05, | |
| "loss": 0.5268, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.8083067092651757, | |
| "grad_norm": 0.7507501619024712, | |
| "learning_rate": 3.660611865253848e-05, | |
| "loss": 0.5105, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.8115015974440895, | |
| "grad_norm": 0.7924592861345243, | |
| "learning_rate": 3.6564564114090175e-05, | |
| "loss": 0.4829, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.8146964856230032, | |
| "grad_norm": 0.7720230321659396, | |
| "learning_rate": 3.652278061236109e-05, | |
| "loss": 0.5, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.8178913738019169, | |
| "grad_norm": 0.7979403807748843, | |
| "learning_rate": 3.648076872490263e-05, | |
| "loss": 0.5296, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.8210862619808307, | |
| "grad_norm": 0.8135723649226206, | |
| "learning_rate": 3.6438529032423086e-05, | |
| "loss": 0.507, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.8242811501597445, | |
| "grad_norm": 0.7476070159945926, | |
| "learning_rate": 3.639606211877958e-05, | |
| "loss": 0.5006, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.8274760383386581, | |
| "grad_norm": 0.8362960189666772, | |
| "learning_rate": 3.635336857096997e-05, | |
| "loss": 0.5254, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.8306709265175719, | |
| "grad_norm": 0.9452074954015198, | |
| "learning_rate": 3.631044897912478e-05, | |
| "loss": 0.5499, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.8338658146964856, | |
| "grad_norm": 0.6680220539129625, | |
| "learning_rate": 3.6267303936499006e-05, | |
| "loss": 0.5311, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.8370607028753994, | |
| "grad_norm": 0.9178609027182498, | |
| "learning_rate": 3.622393403946395e-05, | |
| "loss": 0.5675, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.8402555910543131, | |
| "grad_norm": 0.8455480168827179, | |
| "learning_rate": 3.6180339887498953e-05, | |
| "loss": 0.5582, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.8434504792332268, | |
| "grad_norm": 0.7440034772147296, | |
| "learning_rate": 3.6136522083183096e-05, | |
| "loss": 0.5813, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.8466453674121406, | |
| "grad_norm": 0.718472836263842, | |
| "learning_rate": 3.6092481232186905e-05, | |
| "loss": 0.5302, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.8498402555910544, | |
| "grad_norm": 0.8057027197237966, | |
| "learning_rate": 3.604821794326398e-05, | |
| "loss": 0.4891, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.853035143769968, | |
| "grad_norm": 0.8835357018051417, | |
| "learning_rate": 3.600373282824252e-05, | |
| "loss": 0.5171, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.8562300319488818, | |
| "grad_norm": 0.7771156562797832, | |
| "learning_rate": 3.595902650201695e-05, | |
| "loss": 0.5085, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.8594249201277955, | |
| "grad_norm": 0.8465271291843096, | |
| "learning_rate": 3.591409958253937e-05, | |
| "loss": 0.5324, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.8626198083067093, | |
| "grad_norm": 0.7718211766797882, | |
| "learning_rate": 3.5868952690811015e-05, | |
| "loss": 0.5752, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.865814696485623, | |
| "grad_norm": 0.9775795997595186, | |
| "learning_rate": 3.582358645087368e-05, | |
| "loss": 0.5599, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.8690095846645367, | |
| "grad_norm": 0.8312123906165099, | |
| "learning_rate": 3.577800148980112e-05, | |
| "loss": 0.5223, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.8722044728434505, | |
| "grad_norm": 0.935336068810225, | |
| "learning_rate": 3.573219843769033e-05, | |
| "loss": 0.5083, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.8753993610223643, | |
| "grad_norm": 0.8720606095772891, | |
| "learning_rate": 3.568617792765287e-05, | |
| "loss": 0.5636, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.8785942492012779, | |
| "grad_norm": 0.7824277269020549, | |
| "learning_rate": 3.563994059580611e-05, | |
| "loss": 0.5461, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.8817891373801917, | |
| "grad_norm": 1.1055706045113576, | |
| "learning_rate": 3.559348708126445e-05, | |
| "loss": 0.5623, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.8849840255591054, | |
| "grad_norm": 0.7710258784149644, | |
| "learning_rate": 3.5546818026130444e-05, | |
| "loss": 0.5279, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.8881789137380192, | |
| "grad_norm": 0.8879078744940178, | |
| "learning_rate": 3.549993407548595e-05, | |
| "loss": 0.4966, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.8913738019169329, | |
| "grad_norm": 1.0405338141997835, | |
| "learning_rate": 3.545283587738324e-05, | |
| "loss": 0.5365, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.8945686900958466, | |
| "grad_norm": 0.7971271443117728, | |
| "learning_rate": 3.5405524082836e-05, | |
| "loss": 0.5672, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.8977635782747604, | |
| "grad_norm": 0.931450449668035, | |
| "learning_rate": 3.5357999345810335e-05, | |
| "loss": 0.5668, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.9009584664536742, | |
| "grad_norm": 0.7895639631341859, | |
| "learning_rate": 3.5310262323215774e-05, | |
| "loss": 0.4955, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.9041533546325878, | |
| "grad_norm": 0.9607888759719633, | |
| "learning_rate": 3.5262313674896125e-05, | |
| "loss": 0.5147, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.9073482428115016, | |
| "grad_norm": 0.9689194114193417, | |
| "learning_rate": 3.521415406362041e-05, | |
| "loss": 0.5062, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.9105431309904153, | |
| "grad_norm": 0.9647692491089708, | |
| "learning_rate": 3.5165784155073665e-05, | |
| "loss": 0.5625, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.9137380191693291, | |
| "grad_norm": 0.8493612979149395, | |
| "learning_rate": 3.511720461784778e-05, | |
| "loss": 0.5424, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.9169329073482428, | |
| "grad_norm": 0.8442771355018583, | |
| "learning_rate": 3.50684161234322e-05, | |
| "loss": 0.5632, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.9201277955271565, | |
| "grad_norm": 0.8552088751974364, | |
| "learning_rate": 3.50194193462047e-05, | |
| "loss": 0.5372, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.9233226837060703, | |
| "grad_norm": 0.8152267999689093, | |
| "learning_rate": 3.497021496342203e-05, | |
| "loss": 0.4861, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.9265175718849841, | |
| "grad_norm": 0.9398979894292626, | |
| "learning_rate": 3.4920803655210553e-05, | |
| "loss": 0.5801, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.9297124600638977, | |
| "grad_norm": 0.977293682261694, | |
| "learning_rate": 3.4871186104556874e-05, | |
| "loss": 0.5207, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.9329073482428115, | |
| "grad_norm": 0.8549875703308473, | |
| "learning_rate": 3.482136299729836e-05, | |
| "loss": 0.5349, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.9361022364217252, | |
| "grad_norm": 0.8819868669919189, | |
| "learning_rate": 3.4771335022113705e-05, | |
| "loss": 0.5597, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.939297124600639, | |
| "grad_norm": 0.9666325301004642, | |
| "learning_rate": 3.4721102870513345e-05, | |
| "loss": 0.5329, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.9424920127795527, | |
| "grad_norm": 0.8203876358079187, | |
| "learning_rate": 3.467066723682998e-05, | |
| "loss": 0.5246, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.9456869009584664, | |
| "grad_norm": 1.1228342280176937, | |
| "learning_rate": 3.462002881820891e-05, | |
| "loss": 0.6097, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.9488817891373802, | |
| "grad_norm": 0.7732572275271596, | |
| "learning_rate": 3.456918831459844e-05, | |
| "loss": 0.5253, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.952076677316294, | |
| "grad_norm": 0.9752827068051154, | |
| "learning_rate": 3.451814642874017e-05, | |
| "loss": 0.5539, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.9552715654952076, | |
| "grad_norm": 0.7900396864367236, | |
| "learning_rate": 3.4466903866159326e-05, | |
| "loss": 0.5457, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.9584664536741214, | |
| "grad_norm": 0.8596711318037582, | |
| "learning_rate": 3.441546133515496e-05, | |
| "loss": 0.5266, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.9616613418530351, | |
| "grad_norm": 0.9071723802112778, | |
| "learning_rate": 3.4363819546790216e-05, | |
| "loss": 0.5189, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.9648562300319489, | |
| "grad_norm": 0.6897307625929432, | |
| "learning_rate": 3.431197921488242e-05, | |
| "loss": 0.5258, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.9680511182108626, | |
| "grad_norm": 0.8072267951218067, | |
| "learning_rate": 3.425994105599331e-05, | |
| "loss": 0.5025, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.9712460063897763, | |
| "grad_norm": 0.7299045123280457, | |
| "learning_rate": 3.4207705789419035e-05, | |
| "loss": 0.4942, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.9744408945686901, | |
| "grad_norm": 0.811210391135453, | |
| "learning_rate": 3.41552741371803e-05, | |
| "loss": 0.5128, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.9776357827476039, | |
| "grad_norm": 0.6833163220999185, | |
| "learning_rate": 3.4102646824012333e-05, | |
| "loss": 0.5036, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.9808306709265175, | |
| "grad_norm": 0.7318928742301355, | |
| "learning_rate": 3.404982457735487e-05, | |
| "loss": 0.5248, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.9840255591054313, | |
| "grad_norm": 0.8151408628855044, | |
| "learning_rate": 3.399680812734213e-05, | |
| "loss": 0.5244, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.987220447284345, | |
| "grad_norm": 0.7365970167922717, | |
| "learning_rate": 3.3943598206792665e-05, | |
| "loss": 0.5334, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.9904153354632588, | |
| "grad_norm": 0.6444531685595024, | |
| "learning_rate": 3.3890195551199334e-05, | |
| "loss": 0.506, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.9936102236421726, | |
| "grad_norm": 0.7379917525512831, | |
| "learning_rate": 3.3836600898719e-05, | |
| "loss": 0.4884, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.9968051118210862, | |
| "grad_norm": 0.8006414034782756, | |
| "learning_rate": 3.3782814990162457e-05, | |
| "loss": 0.6063, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.8714839087095215, | |
| "learning_rate": 3.372883856898408e-05, | |
| "loss": 0.5957, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.0031948881789137, | |
| "grad_norm": 0.8608076101674021, | |
| "learning_rate": 3.367467238127165e-05, | |
| "loss": 0.4153, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.0063897763578276, | |
| "grad_norm": 0.7863968465089736, | |
| "learning_rate": 3.3620317175735945e-05, | |
| "loss": 0.4178, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.0095846645367412, | |
| "grad_norm": 0.6819742061303111, | |
| "learning_rate": 3.3565773703700474e-05, | |
| "loss": 0.3475, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.012779552715655, | |
| "grad_norm": 0.7870011705239355, | |
| "learning_rate": 3.351104271909104e-05, | |
| "loss": 0.3629, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.0159744408945688, | |
| "grad_norm": 0.8316632267191167, | |
| "learning_rate": 3.345612497842532e-05, | |
| "loss": 0.3761, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.0191693290734825, | |
| "grad_norm": 0.7862604900294438, | |
| "learning_rate": 3.3401021240802446e-05, | |
| "loss": 0.3627, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.0223642172523961, | |
| "grad_norm": 1.0061847359485523, | |
| "learning_rate": 3.334573226789249e-05, | |
| "loss": 0.4051, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.0255591054313098, | |
| "grad_norm": 0.8807904375499824, | |
| "learning_rate": 3.32902588239259e-05, | |
| "loss": 0.3968, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.0287539936102237, | |
| "grad_norm": 1.1981019094258039, | |
| "learning_rate": 3.3234601675683005e-05, | |
| "loss": 0.4202, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.0319488817891374, | |
| "grad_norm": 0.844697383840708, | |
| "learning_rate": 3.317876159248337e-05, | |
| "loss": 0.3743, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.035143769968051, | |
| "grad_norm": 0.9126433986445898, | |
| "learning_rate": 3.3122739346175176e-05, | |
| "loss": 0.3855, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.038338658146965, | |
| "grad_norm": 0.6598463168728887, | |
| "learning_rate": 3.306653571112454e-05, | |
| "loss": 0.3476, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.0415335463258786, | |
| "grad_norm": 0.8158289993131012, | |
| "learning_rate": 3.301015146420484e-05, | |
| "loss": 0.3718, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.0447284345047922, | |
| "grad_norm": 0.9397053531896098, | |
| "learning_rate": 3.295358738478593e-05, | |
| "loss": 0.4497, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.0479233226837061, | |
| "grad_norm": 0.6428708472951121, | |
| "learning_rate": 3.2896844254723414e-05, | |
| "loss": 0.3422, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.0511182108626198, | |
| "grad_norm": 0.9006480199213088, | |
| "learning_rate": 3.283992285834782e-05, | |
| "loss": 0.3803, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.0543130990415335, | |
| "grad_norm": 0.804430943445223, | |
| "learning_rate": 3.2782823982453746e-05, | |
| "loss": 0.3999, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.0575079872204474, | |
| "grad_norm": 0.9270904209069851, | |
| "learning_rate": 3.272554841628901e-05, | |
| "loss": 0.4319, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.060702875399361, | |
| "grad_norm": 0.845825639588173, | |
| "learning_rate": 3.266809695154371e-05, | |
| "loss": 0.3746, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.0638977635782747, | |
| "grad_norm": 0.8494398683413655, | |
| "learning_rate": 3.261047038233931e-05, | |
| "loss": 0.3969, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.0670926517571886, | |
| "grad_norm": 0.7126024738944513, | |
| "learning_rate": 3.2552669505217646e-05, | |
| "loss": 0.3474, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.0702875399361023, | |
| "grad_norm": 0.9359004672009658, | |
| "learning_rate": 3.2494695119129924e-05, | |
| "loss": 0.3707, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.073482428115016, | |
| "grad_norm": 0.6005459131808137, | |
| "learning_rate": 3.243654802542568e-05, | |
| "loss": 0.3063, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.0766773162939298, | |
| "grad_norm": 0.8842790772014463, | |
| "learning_rate": 3.2378229027841675e-05, | |
| "loss": 0.3765, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.0798722044728435, | |
| "grad_norm": 0.9070675817695492, | |
| "learning_rate": 3.231973893249083e-05, | |
| "loss": 0.3779, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.0830670926517572, | |
| "grad_norm": 0.670447746767721, | |
| "learning_rate": 3.226107854785106e-05, | |
| "loss": 0.4082, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.0862619808306708, | |
| "grad_norm": 0.9213283733651502, | |
| "learning_rate": 3.220224868475408e-05, | |
| "loss": 0.4237, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.0894568690095847, | |
| "grad_norm": 0.6839594328860654, | |
| "learning_rate": 3.2143250156374226e-05, | |
| "loss": 0.4307, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.0926517571884984, | |
| "grad_norm": 0.7489428445729561, | |
| "learning_rate": 3.208408377821722e-05, | |
| "loss": 0.3652, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.095846645367412, | |
| "grad_norm": 0.7550186524407567, | |
| "learning_rate": 3.202475036810886e-05, | |
| "loss": 0.406, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.099041533546326, | |
| "grad_norm": 0.6395054008269865, | |
| "learning_rate": 3.1965250746183755e-05, | |
| "loss": 0.3711, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.1022364217252396, | |
| "grad_norm": 0.657977446052051, | |
| "learning_rate": 3.190558573487397e-05, | |
| "loss": 0.3542, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.1054313099041533, | |
| "grad_norm": 0.8206988613968245, | |
| "learning_rate": 3.1845756158897654e-05, | |
| "loss": 0.3985, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.1086261980830672, | |
| "grad_norm": 0.7479778299255093, | |
| "learning_rate": 3.178576284524765e-05, | |
| "loss": 0.3371, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.1118210862619808, | |
| "grad_norm": 0.753877520359999, | |
| "learning_rate": 3.1725606623180086e-05, | |
| "loss": 0.3699, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.1150159744408945, | |
| "grad_norm": 0.8504076245127729, | |
| "learning_rate": 3.166528832420283e-05, | |
| "loss": 0.3912, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.1182108626198084, | |
| "grad_norm": 0.6964611574953985, | |
| "learning_rate": 3.160480878206412e-05, | |
| "loss": 0.3386, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.121405750798722, | |
| "grad_norm": 0.6824499798649496, | |
| "learning_rate": 3.154416883274092e-05, | |
| "loss": 0.3709, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.1246006389776357, | |
| "grad_norm": 1.0185089841293544, | |
| "learning_rate": 3.148336931442745e-05, | |
| "loss": 0.3634, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.1277955271565494, | |
| "grad_norm": 0.7083393549747615, | |
| "learning_rate": 3.142241106752356e-05, | |
| "loss": 0.3941, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.1309904153354633, | |
| "grad_norm": 0.8028674737087654, | |
| "learning_rate": 3.136129493462312e-05, | |
| "loss": 0.3424, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.134185303514377, | |
| "grad_norm": 0.9169589595364791, | |
| "learning_rate": 3.130002176050238e-05, | |
| "loss": 0.37, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.1373801916932909, | |
| "grad_norm": 0.6610294457775537, | |
| "learning_rate": 3.123859239210827e-05, | |
| "loss": 0.3673, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.1405750798722045, | |
| "grad_norm": 0.7218101821218883, | |
| "learning_rate": 3.1177007678546746e-05, | |
| "loss": 0.4232, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.1437699680511182, | |
| "grad_norm": 0.9486057955199187, | |
| "learning_rate": 3.111526847107099e-05, | |
| "loss": 0.3852, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.1469648562300319, | |
| "grad_norm": 0.6192218269870875, | |
| "learning_rate": 3.105337562306968e-05, | |
| "loss": 0.3301, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.1501597444089458, | |
| "grad_norm": 0.6912746341979487, | |
| "learning_rate": 3.099132999005519e-05, | |
| "loss": 0.3625, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.1533546325878594, | |
| "grad_norm": 0.8493378937993168, | |
| "learning_rate": 3.092913242965175e-05, | |
| "loss": 0.3951, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.156549520766773, | |
| "grad_norm": 0.833993863731902, | |
| "learning_rate": 3.086678380158364e-05, | |
| "loss": 0.3902, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.159744408945687, | |
| "grad_norm": 0.7398039881016663, | |
| "learning_rate": 3.0804284967663214e-05, | |
| "loss": 0.3924, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.1629392971246006, | |
| "grad_norm": 0.7703232633649562, | |
| "learning_rate": 3.074163679177907e-05, | |
| "loss": 0.3761, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.1661341853035143, | |
| "grad_norm": 0.89436430995006, | |
| "learning_rate": 3.06788401398841e-05, | |
| "loss": 0.3701, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.1693290734824282, | |
| "grad_norm": 0.7039513259201169, | |
| "learning_rate": 3.061589587998346e-05, | |
| "loss": 0.3622, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.1725239616613419, | |
| "grad_norm": 0.7355381764642869, | |
| "learning_rate": 3.055280488212266e-05, | |
| "loss": 0.3969, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.1757188498402555, | |
| "grad_norm": 0.8100722031529548, | |
| "learning_rate": 3.0489568018375447e-05, | |
| "loss": 0.3718, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.1789137380191694, | |
| "grad_norm": 0.7026117502103958, | |
| "learning_rate": 3.042618616283184e-05, | |
| "loss": 0.3405, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.182108626198083, | |
| "grad_norm": 0.7207118281643955, | |
| "learning_rate": 3.036266019158596e-05, | |
| "loss": 0.3889, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.1853035143769968, | |
| "grad_norm": 0.79060661451023, | |
| "learning_rate": 3.0298990982724e-05, | |
| "loss": 0.3994, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.1884984025559104, | |
| "grad_norm": 0.8441043300302222, | |
| "learning_rate": 3.0235179416312025e-05, | |
| "loss": 0.3508, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.1916932907348243, | |
| "grad_norm": 0.7349501648718484, | |
| "learning_rate": 3.017122637438385e-05, | |
| "loss": 0.3847, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.194888178913738, | |
| "grad_norm": 0.8725781608132315, | |
| "learning_rate": 3.0107132740928832e-05, | |
| "loss": 0.3926, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.1980830670926517, | |
| "grad_norm": 0.8327406117293235, | |
| "learning_rate": 3.004289940187964e-05, | |
| "loss": 0.3802, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.2012779552715656, | |
| "grad_norm": 0.7199915365896178, | |
| "learning_rate": 2.9978527245100034e-05, | |
| "loss": 0.354, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.2044728434504792, | |
| "grad_norm": 0.7419030308089841, | |
| "learning_rate": 2.991401716037255e-05, | |
| "loss": 0.3884, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.207667731629393, | |
| "grad_norm": 0.8062260851883908, | |
| "learning_rate": 2.9849370039386284e-05, | |
| "loss": 0.366, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.2108626198083068, | |
| "grad_norm": 0.7591181232522807, | |
| "learning_rate": 2.9784586775724443e-05, | |
| "loss": 0.3579, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.2140575079872205, | |
| "grad_norm": 0.8434345898198293, | |
| "learning_rate": 2.971966826485212e-05, | |
| "loss": 0.4524, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.2172523961661341, | |
| "grad_norm": 0.8495573303145608, | |
| "learning_rate": 2.9654615404103837e-05, | |
| "loss": 0.434, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.220447284345048, | |
| "grad_norm": 0.6933721175439074, | |
| "learning_rate": 2.9589429092671155e-05, | |
| "loss": 0.4343, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.2236421725239617, | |
| "grad_norm": 0.7274608975530785, | |
| "learning_rate": 2.952411023159027e-05, | |
| "loss": 0.3298, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.2268370607028753, | |
| "grad_norm": 0.8168975435659065, | |
| "learning_rate": 2.945865972372954e-05, | |
| "loss": 0.4002, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.230031948881789, | |
| "grad_norm": 0.8873260212581557, | |
| "learning_rate": 2.939307847377699e-05, | |
| "loss": 0.4397, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.233226837060703, | |
| "grad_norm": 0.6916592999050895, | |
| "learning_rate": 2.9327367388227847e-05, | |
| "loss": 0.391, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.2364217252396166, | |
| "grad_norm": 0.7553082417173358, | |
| "learning_rate": 2.926152737537198e-05, | |
| "loss": 0.3466, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.2396166134185305, | |
| "grad_norm": 0.8650485170070908, | |
| "learning_rate": 2.9195559345281336e-05, | |
| "loss": 0.4146, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.2428115015974441, | |
| "grad_norm": 0.7330543803583387, | |
| "learning_rate": 2.9129464209797404e-05, | |
| "loss": 0.3898, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.2460063897763578, | |
| "grad_norm": 0.7363439527629848, | |
| "learning_rate": 2.906324288251857e-05, | |
| "loss": 0.4112, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.2492012779552715, | |
| "grad_norm": 0.8327177625635779, | |
| "learning_rate": 2.8996896278787504e-05, | |
| "loss": 0.3905, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.2523961661341854, | |
| "grad_norm": 0.6537335351551286, | |
| "learning_rate": 2.893042531567851e-05, | |
| "loss": 0.4207, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.255591054313099, | |
| "grad_norm": 0.7146002049281741, | |
| "learning_rate": 2.886383091198483e-05, | |
| "loss": 0.3441, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.2587859424920127, | |
| "grad_norm": 0.8078428165110145, | |
| "learning_rate": 2.8797113988205992e-05, | |
| "loss": 0.4221, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.2619808306709266, | |
| "grad_norm": 0.5972404381082699, | |
| "learning_rate": 2.8730275466535027e-05, | |
| "loss": 0.3144, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.2651757188498403, | |
| "grad_norm": 0.7706819426110447, | |
| "learning_rate": 2.866331627084576e-05, | |
| "loss": 0.3822, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.268370607028754, | |
| "grad_norm": 0.6908783947369732, | |
| "learning_rate": 2.8596237326680035e-05, | |
| "loss": 0.3535, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.2715654952076676, | |
| "grad_norm": 0.712295172157205, | |
| "learning_rate": 2.8529039561234904e-05, | |
| "loss": 0.3748, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.2747603833865815, | |
| "grad_norm": 0.842337102034422, | |
| "learning_rate": 2.846172390334983e-05, | |
| "loss": 0.3949, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.2779552715654952, | |
| "grad_norm": 0.7287013629663512, | |
| "learning_rate": 2.8394291283493846e-05, | |
| "loss": 0.419, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.281150159744409, | |
| "grad_norm": 0.7190834632935403, | |
| "learning_rate": 2.8326742633752693e-05, | |
| "loss": 0.3852, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.2843450479233227, | |
| "grad_norm": 0.76075306510952, | |
| "learning_rate": 2.82590788878159e-05, | |
| "loss": 0.4172, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.2875399361022364, | |
| "grad_norm": 0.7039618344228508, | |
| "learning_rate": 2.8191300980963956e-05, | |
| "loss": 0.4121, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.29073482428115, | |
| "grad_norm": 0.6460440389338991, | |
| "learning_rate": 2.8123409850055307e-05, | |
| "loss": 0.3896, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.293929712460064, | |
| "grad_norm": 0.6974323179340803, | |
| "learning_rate": 2.8055406433513437e-05, | |
| "loss": 0.3549, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.2971246006389776, | |
| "grad_norm": 0.8173306197186939, | |
| "learning_rate": 2.798729167131391e-05, | |
| "loss": 0.4078, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.3003194888178915, | |
| "grad_norm": 0.7127151827524344, | |
| "learning_rate": 2.7919066504971355e-05, | |
| "loss": 0.3622, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.3035143769968052, | |
| "grad_norm": 0.732663954477486, | |
| "learning_rate": 2.7850731877526454e-05, | |
| "loss": 0.3845, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.3067092651757188, | |
| "grad_norm": 0.6128280361598326, | |
| "learning_rate": 2.7782288733532915e-05, | |
| "loss": 0.3877, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.3099041533546325, | |
| "grad_norm": 0.6950127994352941, | |
| "learning_rate": 2.7713738019044424e-05, | |
| "loss": 0.3538, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.3130990415335464, | |
| "grad_norm": 0.686187825577798, | |
| "learning_rate": 2.764508068160154e-05, | |
| "loss": 0.387, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.31629392971246, | |
| "grad_norm": 0.6478458382514949, | |
| "learning_rate": 2.7576317670218626e-05, | |
| "loss": 0.3751, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.3194888178913737, | |
| "grad_norm": 0.6815550771368388, | |
| "learning_rate": 2.7507449935370726e-05, | |
| "loss": 0.3475, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.3226837060702876, | |
| "grad_norm": 0.6741037656638967, | |
| "learning_rate": 2.7438478428980407e-05, | |
| "loss": 0.396, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.3258785942492013, | |
| "grad_norm": 0.7167174755449621, | |
| "learning_rate": 2.736940410440462e-05, | |
| "loss": 0.3964, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.329073482428115, | |
| "grad_norm": 0.6842795899926827, | |
| "learning_rate": 2.7300227916421528e-05, | |
| "loss": 0.3973, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.3322683706070286, | |
| "grad_norm": 0.702058426076705, | |
| "learning_rate": 2.7230950821217294e-05, | |
| "loss": 0.387, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.3354632587859425, | |
| "grad_norm": 0.6368932760300601, | |
| "learning_rate": 2.7161573776372856e-05, | |
| "loss": 0.3964, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.3386581469648562, | |
| "grad_norm": 0.6782570721877241, | |
| "learning_rate": 2.7092097740850712e-05, | |
| "loss": 0.4042, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.34185303514377, | |
| "grad_norm": 0.5931978671155632, | |
| "learning_rate": 2.7022523674981674e-05, | |
| "loss": 0.395, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.3450479233226837, | |
| "grad_norm": 0.7201174578306515, | |
| "learning_rate": 2.6952852540451536e-05, | |
| "loss": 0.3921, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.3482428115015974, | |
| "grad_norm": 0.6946900695747145, | |
| "learning_rate": 2.688308530028786e-05, | |
| "loss": 0.3652, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.351437699680511, | |
| "grad_norm": 0.698534785763629, | |
| "learning_rate": 2.6813222918846613e-05, | |
| "loss": 0.3741, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.354632587859425, | |
| "grad_norm": 0.7564830310668963, | |
| "learning_rate": 2.6743266361798833e-05, | |
| "loss": 0.4067, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.3578274760383386, | |
| "grad_norm": 0.8625137509946531, | |
| "learning_rate": 2.6673216596117324e-05, | |
| "loss": 0.3512, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.3610223642172525, | |
| "grad_norm": 0.6004241242239579, | |
| "learning_rate": 2.660307459006325e-05, | |
| "loss": 0.3885, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.3642172523961662, | |
| "grad_norm": 0.8493675462357329, | |
| "learning_rate": 2.653284131317276e-05, | |
| "loss": 0.359, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.3674121405750799, | |
| "grad_norm": 0.7054484310915905, | |
| "learning_rate": 2.6462517736243612e-05, | |
| "loss": 0.3776, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.3706070287539935, | |
| "grad_norm": 0.7635212688991575, | |
| "learning_rate": 2.639210483132171e-05, | |
| "loss": 0.3678, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.3738019169329074, | |
| "grad_norm": 0.7551414066116379, | |
| "learning_rate": 2.6321603571687714e-05, | |
| "loss": 0.3538, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.376996805111821, | |
| "grad_norm": 0.5933801420526407, | |
| "learning_rate": 2.625101493184355e-05, | |
| "loss": 0.3608, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.3801916932907348, | |
| "grad_norm": 0.7485716718685856, | |
| "learning_rate": 2.618033988749895e-05, | |
| "loss": 0.3392, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.3833865814696487, | |
| "grad_norm": 0.6856872085204931, | |
| "learning_rate": 2.6109579415557997e-05, | |
| "loss": 0.3696, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.3865814696485623, | |
| "grad_norm": 0.6906273049790326, | |
| "learning_rate": 2.6038734494105562e-05, | |
| "loss": 0.3949, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.389776357827476, | |
| "grad_norm": 0.7317141874141749, | |
| "learning_rate": 2.5967806102393844e-05, | |
| "loss": 0.3961, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.3929712460063897, | |
| "grad_norm": 0.7084024455483174, | |
| "learning_rate": 2.5896795220828786e-05, | |
| "loss": 0.3729, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.3961661341853036, | |
| "grad_norm": 0.6735234384239845, | |
| "learning_rate": 2.582570283095656e-05, | |
| "loss": 0.3755, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.3993610223642172, | |
| "grad_norm": 0.639545495865673, | |
| "learning_rate": 2.575452991544998e-05, | |
| "loss": 0.3461, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.4025559105431311, | |
| "grad_norm": 0.8314980113052899, | |
| "learning_rate": 2.5683277458094926e-05, | |
| "loss": 0.3766, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.4057507987220448, | |
| "grad_norm": 0.5774548863952043, | |
| "learning_rate": 2.5611946443776733e-05, | |
| "loss": 0.3822, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.4089456869009584, | |
| "grad_norm": 0.5808224894717059, | |
| "learning_rate": 2.5540537858466596e-05, | |
| "loss": 0.3936, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.4121405750798721, | |
| "grad_norm": 0.6123085104187193, | |
| "learning_rate": 2.546905268920794e-05, | |
| "loss": 0.344, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.415335463258786, | |
| "grad_norm": 0.5345986142478568, | |
| "learning_rate": 2.5397491924102758e-05, | |
| "loss": 0.3681, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.4185303514376997, | |
| "grad_norm": 0.7318254435855026, | |
| "learning_rate": 2.532585655229797e-05, | |
| "loss": 0.3677, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.4217252396166133, | |
| "grad_norm": 0.6012026977631383, | |
| "learning_rate": 2.525414756397174e-05, | |
| "loss": 0.334, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.4249201277955272, | |
| "grad_norm": 0.7187336200880879, | |
| "learning_rate": 2.51823659503198e-05, | |
| "loss": 0.4127, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.428115015974441, | |
| "grad_norm": 0.657446388301636, | |
| "learning_rate": 2.5110512703541747e-05, | |
| "loss": 0.367, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.4313099041533546, | |
| "grad_norm": 0.6399302817983334, | |
| "learning_rate": 2.503858881682731e-05, | |
| "loss": 0.4096, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.4345047923322682, | |
| "grad_norm": 0.6063474923650863, | |
| "learning_rate": 2.4966595284342664e-05, | |
| "loss": 0.3701, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.4376996805111821, | |
| "grad_norm": 0.6983262454628201, | |
| "learning_rate": 2.489453310121663e-05, | |
| "loss": 0.3796, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.4408945686900958, | |
| "grad_norm": 0.7119999272751214, | |
| "learning_rate": 2.4822403263526966e-05, | |
| "loss": 0.3553, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.4440894568690097, | |
| "grad_norm": 0.67185797833669, | |
| "learning_rate": 2.4750206768286593e-05, | |
| "loss": 0.3517, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.4472843450479234, | |
| "grad_norm": 0.6486337996555229, | |
| "learning_rate": 2.4677944613429778e-05, | |
| "loss": 0.3287, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.450479233226837, | |
| "grad_norm": 0.6516969882558993, | |
| "learning_rate": 2.46056177977984e-05, | |
| "loss": 0.3514, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.4536741214057507, | |
| "grad_norm": 0.7388191757920071, | |
| "learning_rate": 2.4533227321128084e-05, | |
| "loss": 0.4362, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.4568690095846646, | |
| "grad_norm": 0.6119479848006957, | |
| "learning_rate": 2.4460774184034408e-05, | |
| "loss": 0.3825, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.4600638977635783, | |
| "grad_norm": 0.7425050306965432, | |
| "learning_rate": 2.4388259387999097e-05, | |
| "loss": 0.3759, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.4632587859424921, | |
| "grad_norm": 0.6502397685694954, | |
| "learning_rate": 2.4315683935356127e-05, | |
| "loss": 0.3829, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.4664536741214058, | |
| "grad_norm": 0.6725716076372529, | |
| "learning_rate": 2.4243048829277916e-05, | |
| "loss": 0.3861, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.4696485623003195, | |
| "grad_norm": 0.6219046409113833, | |
| "learning_rate": 2.4170355073761433e-05, | |
| "loss": 0.3736, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.4728434504792332, | |
| "grad_norm": 0.5835078991741417, | |
| "learning_rate": 2.4097603673614325e-05, | |
| "loss": 0.3531, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.476038338658147, | |
| "grad_norm": 0.6550851854107704, | |
| "learning_rate": 2.4024795634441044e-05, | |
| "loss": 0.3262, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.4792332268370607, | |
| "grad_norm": 0.7675785572629827, | |
| "learning_rate": 2.3951931962628918e-05, | |
| "loss": 0.392, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.4824281150159744, | |
| "grad_norm": 0.6411841347495657, | |
| "learning_rate": 2.3879013665334258e-05, | |
| "loss": 0.4024, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.4856230031948883, | |
| "grad_norm": 0.6007938608835436, | |
| "learning_rate": 2.380604175046844e-05, | |
| "loss": 0.3661, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.488817891373802, | |
| "grad_norm": 0.6525253798215147, | |
| "learning_rate": 2.373301722668398e-05, | |
| "loss": 0.3746, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.4920127795527156, | |
| "grad_norm": 0.6446114375585952, | |
| "learning_rate": 2.365994110336054e-05, | |
| "loss": 0.3889, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.4952076677316293, | |
| "grad_norm": 0.6653896661247665, | |
| "learning_rate": 2.358681439059106e-05, | |
| "loss": 0.3594, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.4984025559105432, | |
| "grad_norm": 0.7011886854730449, | |
| "learning_rate": 2.3513638099167723e-05, | |
| "loss": 0.3889, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.5015974440894568, | |
| "grad_norm": 0.7538883021599542, | |
| "learning_rate": 2.3440413240568022e-05, | |
| "loss": 0.3642, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.5047923322683707, | |
| "grad_norm": 0.6733515586731865, | |
| "learning_rate": 2.3367140826940768e-05, | |
| "loss": 0.3482, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.5079872204472844, | |
| "grad_norm": 0.600416095532099, | |
| "learning_rate": 2.329382187109211e-05, | |
| "loss": 0.3399, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.511182108626198, | |
| "grad_norm": 0.687374773018976, | |
| "learning_rate": 2.3220457386471496e-05, | |
| "loss": 0.3754, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.5143769968051117, | |
| "grad_norm": 0.748016995785705, | |
| "learning_rate": 2.3147048387157725e-05, | |
| "loss": 0.3648, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.5175718849840254, | |
| "grad_norm": 0.7005861674092242, | |
| "learning_rate": 2.3073595887844884e-05, | |
| "loss": 0.305, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.5207667731629393, | |
| "grad_norm": 0.608521420596584, | |
| "learning_rate": 2.3000100903828343e-05, | |
| "loss": 0.3601, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.5239616613418532, | |
| "grad_norm": 0.6464475093135752, | |
| "learning_rate": 2.2926564450990716e-05, | |
| "loss": 0.3746, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.5271565495207668, | |
| "grad_norm": 0.6448444881128504, | |
| "learning_rate": 2.2852987545787815e-05, | |
| "loss": 0.3714, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.5303514376996805, | |
| "grad_norm": 0.6107451967926957, | |
| "learning_rate": 2.2779371205234604e-05, | |
| "loss": 0.3796, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.5335463258785942, | |
| "grad_norm": 0.660686053273227, | |
| "learning_rate": 2.2705716446891143e-05, | |
| "loss": 0.3822, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.5367412140575079, | |
| "grad_norm": 0.7750842489034678, | |
| "learning_rate": 2.263202428884853e-05, | |
| "loss": 0.4105, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.5399361022364217, | |
| "grad_norm": 0.6055959124966067, | |
| "learning_rate": 2.2558295749714794e-05, | |
| "loss": 0.4151, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.5431309904153354, | |
| "grad_norm": 0.6183198946717873, | |
| "learning_rate": 2.2484531848600866e-05, | |
| "loss": 0.3262, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.5463258785942493, | |
| "grad_norm": 0.6370711793501769, | |
| "learning_rate": 2.2410733605106462e-05, | |
| "loss": 0.3857, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.549520766773163, | |
| "grad_norm": 0.8015901036246532, | |
| "learning_rate": 2.233690203930599e-05, | |
| "loss": 0.3496, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.5527156549520766, | |
| "grad_norm": 0.7615585707403518, | |
| "learning_rate": 2.2263038171734447e-05, | |
| "loss": 0.3672, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.5559105431309903, | |
| "grad_norm": 0.5663892496577331, | |
| "learning_rate": 2.2189143023373337e-05, | |
| "loss": 0.3761, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.5591054313099042, | |
| "grad_norm": 0.7878946252561335, | |
| "learning_rate": 2.2115217615636534e-05, | |
| "loss": 0.3588, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.5623003194888179, | |
| "grad_norm": 0.7764697921864392, | |
| "learning_rate": 2.204126297035617e-05, | |
| "loss": 0.4196, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.5654952076677318, | |
| "grad_norm": 0.6542418496260201, | |
| "learning_rate": 2.1967280109768505e-05, | |
| "loss": 0.3408, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.5686900958466454, | |
| "grad_norm": 0.6223599528533879, | |
| "learning_rate": 2.1893270056499832e-05, | |
| "loss": 0.3777, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.571884984025559, | |
| "grad_norm": 0.661700414509121, | |
| "learning_rate": 2.1819233833552275e-05, | |
| "loss": 0.3128, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.5750798722044728, | |
| "grad_norm": 0.6234974404457322, | |
| "learning_rate": 2.1745172464289722e-05, | |
| "loss": 0.3962, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.5782747603833864, | |
| "grad_norm": 0.6676512384075517, | |
| "learning_rate": 2.167108697242363e-05, | |
| "loss": 0.3468, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.5814696485623003, | |
| "grad_norm": 0.6209137519763187, | |
| "learning_rate": 2.1596978381998883e-05, | |
| "loss": 0.344, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.5846645367412142, | |
| "grad_norm": 0.6215059716975698, | |
| "learning_rate": 2.152284771737966e-05, | |
| "loss": 0.3742, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.5878594249201279, | |
| "grad_norm": 0.6023037671559589, | |
| "learning_rate": 2.1448696003235252e-05, | |
| "loss": 0.3752, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.5910543130990416, | |
| "grad_norm": 0.6369188120702737, | |
| "learning_rate": 2.1374524264525905e-05, | |
| "loss": 0.3796, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.5942492012779552, | |
| "grad_norm": 0.5900720442619971, | |
| "learning_rate": 2.130033352648866e-05, | |
| "loss": 0.3535, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.5974440894568689, | |
| "grad_norm": 0.6255668983966362, | |
| "learning_rate": 2.122612481462316e-05, | |
| "loss": 0.4114, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.6006389776357828, | |
| "grad_norm": 0.6798385259233033, | |
| "learning_rate": 2.115189915467752e-05, | |
| "loss": 0.389, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.6038338658146964, | |
| "grad_norm": 0.6905443213231501, | |
| "learning_rate": 2.1077657572634092e-05, | |
| "loss": 0.3246, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.6070287539936103, | |
| "grad_norm": 0.571423682418217, | |
| "learning_rate": 2.1003401094695325e-05, | |
| "loss": 0.3344, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.610223642172524, | |
| "grad_norm": 0.6504514480344465, | |
| "learning_rate": 2.0929130747269567e-05, | |
| "loss": 0.3621, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.6134185303514377, | |
| "grad_norm": 0.6411322199210792, | |
| "learning_rate": 2.0854847556956856e-05, | |
| "loss": 0.3734, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.6166134185303513, | |
| "grad_norm": 0.6380721378438481, | |
| "learning_rate": 2.078055255053478e-05, | |
| "loss": 0.4034, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.619808306709265, | |
| "grad_norm": 0.6249192416083079, | |
| "learning_rate": 2.070624675494424e-05, | |
| "loss": 0.3504, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.623003194888179, | |
| "grad_norm": 0.6741471873600642, | |
| "learning_rate": 2.0631931197275267e-05, | |
| "loss": 0.3197, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.6261980830670928, | |
| "grad_norm": 0.6125040165749199, | |
| "learning_rate": 2.0557606904752833e-05, | |
| "loss": 0.3419, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.6293929712460065, | |
| "grad_norm": 0.6665831610245562, | |
| "learning_rate": 2.0483274904722647e-05, | |
| "loss": 0.3399, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.6325878594249201, | |
| "grad_norm": 0.6782509805533894, | |
| "learning_rate": 2.0408936224636958e-05, | |
| "loss": 0.384, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.6357827476038338, | |
| "grad_norm": 0.7436644924935581, | |
| "learning_rate": 2.033459189204034e-05, | |
| "loss": 0.3595, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.6389776357827475, | |
| "grad_norm": 0.7574508093688915, | |
| "learning_rate": 2.026024293455551e-05, | |
| "loss": 0.403, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.6421725239616614, | |
| "grad_norm": 0.6776067254300645, | |
| "learning_rate": 2.0185890379869115e-05, | |
| "loss": 0.3563, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.645367412140575, | |
| "grad_norm": 0.5691981666919255, | |
| "learning_rate": 2.0111535255717496e-05, | |
| "loss": 0.3613, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.648562300319489, | |
| "grad_norm": 0.6501032885266717, | |
| "learning_rate": 2.0037178589872547e-05, | |
| "loss": 0.3553, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.6517571884984026, | |
| "grad_norm": 0.5894185843432012, | |
| "learning_rate": 1.9962821410127456e-05, | |
| "loss": 0.3335, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.6549520766773163, | |
| "grad_norm": 0.639710192390572, | |
| "learning_rate": 1.9888464744282504e-05, | |
| "loss": 0.3627, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.65814696485623, | |
| "grad_norm": 0.6216821494956912, | |
| "learning_rate": 1.981410962013089e-05, | |
| "loss": 0.3344, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.6613418530351438, | |
| "grad_norm": 0.6345005257067161, | |
| "learning_rate": 1.9739757065444492e-05, | |
| "loss": 0.3698, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.6645367412140575, | |
| "grad_norm": 0.8101905044732475, | |
| "learning_rate": 1.9665408107959657e-05, | |
| "loss": 0.3861, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.6677316293929714, | |
| "grad_norm": 0.5961761807889937, | |
| "learning_rate": 1.9591063775363045e-05, | |
| "loss": 0.3535, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.670926517571885, | |
| "grad_norm": 0.7163037162115128, | |
| "learning_rate": 1.951672509527736e-05, | |
| "loss": 0.3573, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.6741214057507987, | |
| "grad_norm": 0.7626145562758659, | |
| "learning_rate": 1.944239309524717e-05, | |
| "loss": 0.3943, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.6773162939297124, | |
| "grad_norm": 0.6369019346205154, | |
| "learning_rate": 1.936806880272474e-05, | |
| "loss": 0.3311, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.680511182108626, | |
| "grad_norm": 0.6564798194303639, | |
| "learning_rate": 1.9293753245055772e-05, | |
| "loss": 0.4014, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.68370607028754, | |
| "grad_norm": 0.6312445430308768, | |
| "learning_rate": 1.9219447449465222e-05, | |
| "loss": 0.3123, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.6869009584664538, | |
| "grad_norm": 0.674163910217299, | |
| "learning_rate": 1.9145152443043147e-05, | |
| "loss": 0.4069, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.6900958466453675, | |
| "grad_norm": 0.5512026479165967, | |
| "learning_rate": 1.9070869252730443e-05, | |
| "loss": 0.3823, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.6932907348242812, | |
| "grad_norm": 0.5459303135589316, | |
| "learning_rate": 1.899659890530468e-05, | |
| "loss": 0.3801, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.6964856230031948, | |
| "grad_norm": 0.7107489428061275, | |
| "learning_rate": 1.8922342427365915e-05, | |
| "loss": 0.3743, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.6996805111821085, | |
| "grad_norm": 0.6821024548908552, | |
| "learning_rate": 1.8848100845322486e-05, | |
| "loss": 0.4001, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.7028753993610224, | |
| "grad_norm": 0.7194530880466413, | |
| "learning_rate": 1.8773875185376845e-05, | |
| "loss": 0.3967, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.706070287539936, | |
| "grad_norm": 0.619789910277277, | |
| "learning_rate": 1.869966647351135e-05, | |
| "loss": 0.3914, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.70926517571885, | |
| "grad_norm": 0.6514123622117488, | |
| "learning_rate": 1.86254757354741e-05, | |
| "loss": 0.355, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.7124600638977636, | |
| "grad_norm": 0.5831800498454969, | |
| "learning_rate": 1.8551303996764755e-05, | |
| "loss": 0.3715, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.7156549520766773, | |
| "grad_norm": 0.7041038447903819, | |
| "learning_rate": 1.8477152282620344e-05, | |
| "loss": 0.3452, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.718849840255591, | |
| "grad_norm": 0.6489046638136863, | |
| "learning_rate": 1.8403021618001124e-05, | |
| "loss": 0.328, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.7220447284345048, | |
| "grad_norm": 0.6220134083521842, | |
| "learning_rate": 1.8328913027576373e-05, | |
| "loss": 0.3644, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.7252396166134185, | |
| "grad_norm": 0.691164548614104, | |
| "learning_rate": 1.825482753571028e-05, | |
| "loss": 0.386, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.7284345047923324, | |
| "grad_norm": 0.6471216980585122, | |
| "learning_rate": 1.818076616644773e-05, | |
| "loss": 0.3863, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.731629392971246, | |
| "grad_norm": 0.5955048340238702, | |
| "learning_rate": 1.8106729943500174e-05, | |
| "loss": 0.3813, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.7348242811501597, | |
| "grad_norm": 0.5557497123983334, | |
| "learning_rate": 1.80327198902315e-05, | |
| "loss": 0.4207, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.7380191693290734, | |
| "grad_norm": 0.6140635892910118, | |
| "learning_rate": 1.7958737029643835e-05, | |
| "loss": 0.352, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.741214057507987, | |
| "grad_norm": 0.654044064069364, | |
| "learning_rate": 1.788478238436347e-05, | |
| "loss": 0.3887, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.744408945686901, | |
| "grad_norm": 0.5807508632491186, | |
| "learning_rate": 1.781085697662667e-05, | |
| "loss": 0.3833, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.7476038338658149, | |
| "grad_norm": 0.7236056668219373, | |
| "learning_rate": 1.7736961828265553e-05, | |
| "loss": 0.387, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.7507987220447285, | |
| "grad_norm": 0.6286981610831269, | |
| "learning_rate": 1.7663097960694017e-05, | |
| "loss": 0.413, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.7539936102236422, | |
| "grad_norm": 0.6471275268399443, | |
| "learning_rate": 1.758926639489354e-05, | |
| "loss": 0.3265, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.7571884984025559, | |
| "grad_norm": 0.6634839205028399, | |
| "learning_rate": 1.7515468151399134e-05, | |
| "loss": 0.3959, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.7603833865814695, | |
| "grad_norm": 0.6755904835423869, | |
| "learning_rate": 1.7441704250285212e-05, | |
| "loss": 0.3606, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.7635782747603834, | |
| "grad_norm": 0.6500797325645201, | |
| "learning_rate": 1.7367975711151483e-05, | |
| "loss": 0.3876, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.766773162939297, | |
| "grad_norm": 0.6477835743216911, | |
| "learning_rate": 1.729428355310886e-05, | |
| "loss": 0.3158, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.769968051118211, | |
| "grad_norm": 0.6950215837197072, | |
| "learning_rate": 1.7220628794765403e-05, | |
| "loss": 0.3578, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.7731629392971247, | |
| "grad_norm": 0.7161970787305121, | |
| "learning_rate": 1.7147012454212195e-05, | |
| "loss": 0.4181, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.7763578274760383, | |
| "grad_norm": 0.56303267169658, | |
| "learning_rate": 1.7073435549009288e-05, | |
| "loss": 0.3609, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.779552715654952, | |
| "grad_norm": 0.6914199086511422, | |
| "learning_rate": 1.699989909617166e-05, | |
| "loss": 0.3109, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.7827476038338657, | |
| "grad_norm": 0.6547054751902353, | |
| "learning_rate": 1.6926404112155123e-05, | |
| "loss": 0.3595, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.7859424920127795, | |
| "grad_norm": 0.7444527094326194, | |
| "learning_rate": 1.6852951612842278e-05, | |
| "loss": 0.3476, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.7891373801916934, | |
| "grad_norm": 0.8274662111243524, | |
| "learning_rate": 1.677954261352851e-05, | |
| "loss": 0.3673, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.792332268370607, | |
| "grad_norm": 0.7793207248626209, | |
| "learning_rate": 1.6706178128907897e-05, | |
| "loss": 0.3756, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.7955271565495208, | |
| "grad_norm": 0.6411675936700109, | |
| "learning_rate": 1.6632859173059232e-05, | |
| "loss": 0.3573, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.7987220447284344, | |
| "grad_norm": 0.7225443818014319, | |
| "learning_rate": 1.655958675943198e-05, | |
| "loss": 0.3443, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.8019169329073481, | |
| "grad_norm": 0.7764335703579314, | |
| "learning_rate": 1.6486361900832284e-05, | |
| "loss": 0.3644, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.805111821086262, | |
| "grad_norm": 0.7643738155044116, | |
| "learning_rate": 1.6413185609408946e-05, | |
| "loss": 0.3814, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.8083067092651757, | |
| "grad_norm": 0.6797205069953752, | |
| "learning_rate": 1.6340058896639464e-05, | |
| "loss": 0.3431, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.8115015974440896, | |
| "grad_norm": 0.6487569782925131, | |
| "learning_rate": 1.6266982773316032e-05, | |
| "loss": 0.3827, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.8146964856230032, | |
| "grad_norm": 0.7645835950918118, | |
| "learning_rate": 1.6193958249531562e-05, | |
| "loss": 0.4112, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.817891373801917, | |
| "grad_norm": 0.7113029044269039, | |
| "learning_rate": 1.612098633466575e-05, | |
| "loss": 0.3779, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.8210862619808306, | |
| "grad_norm": 0.5948671080899526, | |
| "learning_rate": 1.6048068037371092e-05, | |
| "loss": 0.3195, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.8242811501597445, | |
| "grad_norm": 0.70421201984334, | |
| "learning_rate": 1.597520436555896e-05, | |
| "loss": 0.3776, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.8274760383386581, | |
| "grad_norm": 0.6726359632379854, | |
| "learning_rate": 1.590239632638568e-05, | |
| "loss": 0.4225, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.830670926517572, | |
| "grad_norm": 0.681506981858163, | |
| "learning_rate": 1.582964492623857e-05, | |
| "loss": 0.3627, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.8338658146964857, | |
| "grad_norm": 0.6579916459695835, | |
| "learning_rate": 1.575695117072209e-05, | |
| "loss": 0.3566, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.8370607028753994, | |
| "grad_norm": 0.5767726433601224, | |
| "learning_rate": 1.568431606464388e-05, | |
| "loss": 0.3582, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.840255591054313, | |
| "grad_norm": 0.6950140418517404, | |
| "learning_rate": 1.5611740612000906e-05, | |
| "loss": 0.3861, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.8434504792332267, | |
| "grad_norm": 0.5791968225578055, | |
| "learning_rate": 1.5539225815965595e-05, | |
| "loss": 0.3383, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.8466453674121406, | |
| "grad_norm": 0.6049662279870999, | |
| "learning_rate": 1.546677267887193e-05, | |
| "loss": 0.3719, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.8498402555910545, | |
| "grad_norm": 0.6161946790988828, | |
| "learning_rate": 1.5394382202201605e-05, | |
| "loss": 0.382, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.8530351437699681, | |
| "grad_norm": 0.6325260524024612, | |
| "learning_rate": 1.5322055386570225e-05, | |
| "loss": 0.3587, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.8562300319488818, | |
| "grad_norm": 0.6241897380088132, | |
| "learning_rate": 1.5249793231713418e-05, | |
| "loss": 0.3386, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.8594249201277955, | |
| "grad_norm": 0.565220565027428, | |
| "learning_rate": 1.5177596736473034e-05, | |
| "loss": 0.3774, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.8626198083067091, | |
| "grad_norm": 0.5845662723376586, | |
| "learning_rate": 1.5105466898783379e-05, | |
| "loss": 0.3235, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.865814696485623, | |
| "grad_norm": 0.6433701307468581, | |
| "learning_rate": 1.5033404715657344e-05, | |
| "loss": 0.3728, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.8690095846645367, | |
| "grad_norm": 0.6619778705276104, | |
| "learning_rate": 1.4961411183172686e-05, | |
| "loss": 0.346, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.8722044728434506, | |
| "grad_norm": 0.7410247957434669, | |
| "learning_rate": 1.4889487296458258e-05, | |
| "loss": 0.367, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.8753993610223643, | |
| "grad_norm": 0.6248658361645802, | |
| "learning_rate": 1.4817634049680207e-05, | |
| "loss": 0.3377, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.878594249201278, | |
| "grad_norm": 0.5927935921175941, | |
| "learning_rate": 1.4745852436028262e-05, | |
| "loss": 0.3355, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.8817891373801916, | |
| "grad_norm": 0.6031296090039979, | |
| "learning_rate": 1.4674143447702036e-05, | |
| "loss": 0.3432, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.8849840255591053, | |
| "grad_norm": 0.6978415390998038, | |
| "learning_rate": 1.4602508075897249e-05, | |
| "loss": 0.4307, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.8881789137380192, | |
| "grad_norm": 0.6802206273285568, | |
| "learning_rate": 1.453094731079206e-05, | |
| "loss": 0.3337, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.891373801916933, | |
| "grad_norm": 0.6075920734911536, | |
| "learning_rate": 1.4459462141533407e-05, | |
| "loss": 0.3959, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.8945686900958467, | |
| "grad_norm": 0.5500279524690617, | |
| "learning_rate": 1.4388053556223274e-05, | |
| "loss": 0.3456, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.8977635782747604, | |
| "grad_norm": 0.598113110586812, | |
| "learning_rate": 1.4316722541905081e-05, | |
| "loss": 0.3581, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.900958466453674, | |
| "grad_norm": 0.5879717265811937, | |
| "learning_rate": 1.4245470084550026e-05, | |
| "loss": 0.3484, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.9041533546325877, | |
| "grad_norm": 0.5983260797622781, | |
| "learning_rate": 1.4174297169043447e-05, | |
| "loss": 0.3968, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.9073482428115016, | |
| "grad_norm": 0.5893645004999872, | |
| "learning_rate": 1.410320477917122e-05, | |
| "loss": 0.3377, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.9105431309904153, | |
| "grad_norm": 0.5981250674431736, | |
| "learning_rate": 1.4032193897606164e-05, | |
| "loss": 0.3728, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.9137380191693292, | |
| "grad_norm": 0.7033342157204642, | |
| "learning_rate": 1.3961265505894442e-05, | |
| "loss": 0.3986, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 1.9169329073482428, | |
| "grad_norm": 0.6193784424182153, | |
| "learning_rate": 1.3890420584442007e-05, | |
| "loss": 0.3833, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.9201277955271565, | |
| "grad_norm": 0.6378469529500934, | |
| "learning_rate": 1.3819660112501054e-05, | |
| "loss": 0.4048, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 1.9233226837060702, | |
| "grad_norm": 0.6260896725715146, | |
| "learning_rate": 1.374898506815646e-05, | |
| "loss": 0.3259, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.926517571884984, | |
| "grad_norm": 0.6830049362505771, | |
| "learning_rate": 1.3678396428312291e-05, | |
| "loss": 0.3824, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 1.9297124600638977, | |
| "grad_norm": 0.6210066067190025, | |
| "learning_rate": 1.3607895168678296e-05, | |
| "loss": 0.3612, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.9329073482428116, | |
| "grad_norm": 0.6413094034332517, | |
| "learning_rate": 1.3537482263756391e-05, | |
| "loss": 0.3548, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.9361022364217253, | |
| "grad_norm": 0.620634478966929, | |
| "learning_rate": 1.3467158686827242e-05, | |
| "loss": 0.3884, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.939297124600639, | |
| "grad_norm": 0.6407808790372046, | |
| "learning_rate": 1.339692540993676e-05, | |
| "loss": 0.3948, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 1.9424920127795526, | |
| "grad_norm": 0.7276368275858982, | |
| "learning_rate": 1.332678340388268e-05, | |
| "loss": 0.347, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.9456869009584663, | |
| "grad_norm": 0.6880632837814936, | |
| "learning_rate": 1.3256733638201172e-05, | |
| "loss": 0.3811, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.9488817891373802, | |
| "grad_norm": 0.568414071034355, | |
| "learning_rate": 1.3186777081153398e-05, | |
| "loss": 0.3852, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.952076677316294, | |
| "grad_norm": 0.6125392745773798, | |
| "learning_rate": 1.311691469971214e-05, | |
| "loss": 0.3314, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 1.9552715654952078, | |
| "grad_norm": 0.6154906870246765, | |
| "learning_rate": 1.3047147459548469e-05, | |
| "loss": 0.3983, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.9584664536741214, | |
| "grad_norm": 0.7090879134482768, | |
| "learning_rate": 1.297747632501834e-05, | |
| "loss": 0.3547, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 1.961661341853035, | |
| "grad_norm": 0.7919803140094102, | |
| "learning_rate": 1.2907902259149287e-05, | |
| "loss": 0.3884, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.9648562300319488, | |
| "grad_norm": 0.6135804624651449, | |
| "learning_rate": 1.2838426223627152e-05, | |
| "loss": 0.3292, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.9680511182108626, | |
| "grad_norm": 0.627406953051245, | |
| "learning_rate": 1.2769049178782716e-05, | |
| "loss": 0.3379, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.9712460063897763, | |
| "grad_norm": 0.7149906660956054, | |
| "learning_rate": 1.2699772083578472e-05, | |
| "loss": 0.3727, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 1.9744408945686902, | |
| "grad_norm": 0.7249915937043523, | |
| "learning_rate": 1.2630595895595383e-05, | |
| "loss": 0.3424, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.9776357827476039, | |
| "grad_norm": 0.7589039306193895, | |
| "learning_rate": 1.2561521571019603e-05, | |
| "loss": 0.3637, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 1.9808306709265175, | |
| "grad_norm": 0.6176823766792565, | |
| "learning_rate": 1.249255006462928e-05, | |
| "loss": 0.3495, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.9840255591054312, | |
| "grad_norm": 0.7150454220639018, | |
| "learning_rate": 1.2423682329781378e-05, | |
| "loss": 0.3628, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 1.9872204472843449, | |
| "grad_norm": 0.6869892242638056, | |
| "learning_rate": 1.2354919318398473e-05, | |
| "loss": 0.3528, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.9904153354632588, | |
| "grad_norm": 0.7412388678758558, | |
| "learning_rate": 1.2286261980955583e-05, | |
| "loss": 0.3744, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 1.9936102236421727, | |
| "grad_norm": 0.7562821838566286, | |
| "learning_rate": 1.2217711266467092e-05, | |
| "loss": 0.3947, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.9968051118210863, | |
| "grad_norm": 0.5867017695210721, | |
| "learning_rate": 1.2149268122473554e-05, | |
| "loss": 0.327, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.5635062124393114, | |
| "learning_rate": 1.2080933495028648e-05, | |
| "loss": 0.3089, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 2.0031948881789137, | |
| "grad_norm": 0.6715285391032262, | |
| "learning_rate": 1.2012708328686093e-05, | |
| "loss": 0.2396, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 2.0063897763578273, | |
| "grad_norm": 0.5999774675489589, | |
| "learning_rate": 1.1944593566486562e-05, | |
| "loss": 0.2402, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 2.009584664536741, | |
| "grad_norm": 0.6523644403725044, | |
| "learning_rate": 1.18765901499447e-05, | |
| "loss": 0.2595, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 2.012779552715655, | |
| "grad_norm": 0.6688113012671898, | |
| "learning_rate": 1.1808699019036047e-05, | |
| "loss": 0.2063, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.015974440894569, | |
| "grad_norm": 0.8690136893873772, | |
| "learning_rate": 1.17409211121841e-05, | |
| "loss": 0.2628, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 2.0191693290734825, | |
| "grad_norm": 0.5849734190846696, | |
| "learning_rate": 1.1673257366247319e-05, | |
| "loss": 0.2178, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 2.022364217252396, | |
| "grad_norm": 0.6360648521093288, | |
| "learning_rate": 1.1605708716506161e-05, | |
| "loss": 0.2374, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 2.02555910543131, | |
| "grad_norm": 0.5809260418478515, | |
| "learning_rate": 1.1538276096650175e-05, | |
| "loss": 0.1988, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 2.0287539936102235, | |
| "grad_norm": 0.6364023975071599, | |
| "learning_rate": 1.1470960438765108e-05, | |
| "loss": 0.253, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 2.0319488817891376, | |
| "grad_norm": 0.608832086457546, | |
| "learning_rate": 1.1403762673319983e-05, | |
| "loss": 0.2254, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 2.0351437699680512, | |
| "grad_norm": 0.6196453859771298, | |
| "learning_rate": 1.133668372915425e-05, | |
| "loss": 0.2477, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 2.038338658146965, | |
| "grad_norm": 0.49320636326890754, | |
| "learning_rate": 1.1269724533464984e-05, | |
| "loss": 0.2344, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 2.0415335463258786, | |
| "grad_norm": 0.5893706817555677, | |
| "learning_rate": 1.1202886011794023e-05, | |
| "loss": 0.284, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 2.0447284345047922, | |
| "grad_norm": 0.5927365358551429, | |
| "learning_rate": 1.1136169088015177e-05, | |
| "loss": 0.2446, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.047923322683706, | |
| "grad_norm": 0.5912266488563296, | |
| "learning_rate": 1.1069574684321505e-05, | |
| "loss": 0.249, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 2.0511182108626196, | |
| "grad_norm": 0.5068898035040869, | |
| "learning_rate": 1.1003103721212503e-05, | |
| "loss": 0.2404, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 2.0543130990415337, | |
| "grad_norm": 0.49830277604732454, | |
| "learning_rate": 1.0936757117481438e-05, | |
| "loss": 0.2226, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 2.0575079872204474, | |
| "grad_norm": 0.5137466675546016, | |
| "learning_rate": 1.0870535790202606e-05, | |
| "loss": 0.2246, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 2.060702875399361, | |
| "grad_norm": 0.5579716315671657, | |
| "learning_rate": 1.080444065471867e-05, | |
| "loss": 0.218, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 2.0638977635782747, | |
| "grad_norm": 0.5528250590936209, | |
| "learning_rate": 1.0738472624628034e-05, | |
| "loss": 0.2485, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 2.0670926517571884, | |
| "grad_norm": 0.5493618613823972, | |
| "learning_rate": 1.0672632611772156e-05, | |
| "loss": 0.2425, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 2.070287539936102, | |
| "grad_norm": 0.5673228300389772, | |
| "learning_rate": 1.0606921526223016e-05, | |
| "loss": 0.2497, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 2.073482428115016, | |
| "grad_norm": 0.5203097618726312, | |
| "learning_rate": 1.0541340276270468e-05, | |
| "loss": 0.212, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 2.07667731629393, | |
| "grad_norm": 0.5534033378725121, | |
| "learning_rate": 1.0475889768409729e-05, | |
| "loss": 0.1971, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.0798722044728435, | |
| "grad_norm": 0.5300132980491199, | |
| "learning_rate": 1.0410570907328848e-05, | |
| "loss": 0.2576, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 2.083067092651757, | |
| "grad_norm": 0.5690239162135191, | |
| "learning_rate": 1.0345384595896161e-05, | |
| "loss": 0.2366, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 2.086261980830671, | |
| "grad_norm": 0.5256402067518808, | |
| "learning_rate": 1.028033173514788e-05, | |
| "loss": 0.235, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 2.0894568690095845, | |
| "grad_norm": 0.48537333431940805, | |
| "learning_rate": 1.0215413224275552e-05, | |
| "loss": 0.2154, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 2.0926517571884986, | |
| "grad_norm": 0.5161821862609771, | |
| "learning_rate": 1.0150629960613721e-05, | |
| "loss": 0.21, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 2.0958466453674123, | |
| "grad_norm": 0.5281317540190527, | |
| "learning_rate": 1.0085982839627445e-05, | |
| "loss": 0.1899, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 2.099041533546326, | |
| "grad_norm": 0.4709200890585819, | |
| "learning_rate": 1.0021472754899966e-05, | |
| "loss": 0.2292, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 2.1022364217252396, | |
| "grad_norm": 0.45322842520766593, | |
| "learning_rate": 9.957100598120357e-06, | |
| "loss": 0.2147, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 2.1054313099041533, | |
| "grad_norm": 0.5018911122380977, | |
| "learning_rate": 9.89286725907117e-06, | |
| "loss": 0.2665, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 2.108626198083067, | |
| "grad_norm": 0.5090958885022994, | |
| "learning_rate": 9.828773625616145e-06, | |
| "loss": 0.2071, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.1118210862619806, | |
| "grad_norm": 0.5042008645673148, | |
| "learning_rate": 9.764820583687978e-06, | |
| "loss": 0.22, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 2.1150159744408947, | |
| "grad_norm": 0.5636525223827146, | |
| "learning_rate": 9.701009017276008e-06, | |
| "loss": 0.2255, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 2.1182108626198084, | |
| "grad_norm": 0.4968534545435798, | |
| "learning_rate": 9.637339808414042e-06, | |
| "loss": 0.2187, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 2.121405750798722, | |
| "grad_norm": 0.536209126935731, | |
| "learning_rate": 9.573813837168166e-06, | |
| "loss": 0.2212, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 2.1246006389776357, | |
| "grad_norm": 0.477893192356411, | |
| "learning_rate": 9.510431981624554e-06, | |
| "loss": 0.2084, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 2.1277955271565494, | |
| "grad_norm": 0.47741193568130846, | |
| "learning_rate": 9.447195117877343e-06, | |
| "loss": 0.216, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 2.130990415335463, | |
| "grad_norm": 0.5101367545264079, | |
| "learning_rate": 9.384104120016542e-06, | |
| "loss": 0.2634, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 2.134185303514377, | |
| "grad_norm": 0.49187172517640576, | |
| "learning_rate": 9.321159860115909e-06, | |
| "loss": 0.224, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 2.137380191693291, | |
| "grad_norm": 0.5056192356778784, | |
| "learning_rate": 9.258363208220929e-06, | |
| "loss": 0.2228, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 2.1405750798722045, | |
| "grad_norm": 0.5445137543547477, | |
| "learning_rate": 9.195715032336794e-06, | |
| "loss": 0.2252, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.143769968051118, | |
| "grad_norm": 0.5060926192022697, | |
| "learning_rate": 9.13321619841637e-06, | |
| "loss": 0.2774, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 2.146964856230032, | |
| "grad_norm": 0.47010736862307156, | |
| "learning_rate": 9.070867570348247e-06, | |
| "loss": 0.22, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 2.1501597444089455, | |
| "grad_norm": 0.5094958330325633, | |
| "learning_rate": 9.00867000994482e-06, | |
| "loss": 0.2256, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 2.1533546325878596, | |
| "grad_norm": 0.5259589614128072, | |
| "learning_rate": 8.946624376930333e-06, | |
| "loss": 0.2755, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 2.1565495207667733, | |
| "grad_norm": 0.5139540383730451, | |
| "learning_rate": 8.884731528929019e-06, | |
| "loss": 0.2659, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 2.159744408945687, | |
| "grad_norm": 0.4832097809114296, | |
| "learning_rate": 8.822992321453264e-06, | |
| "loss": 0.215, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 2.1629392971246006, | |
| "grad_norm": 0.44217298247605424, | |
| "learning_rate": 8.76140760789174e-06, | |
| "loss": 0.2212, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 2.1661341853035143, | |
| "grad_norm": 0.5003409469817744, | |
| "learning_rate": 8.69997823949763e-06, | |
| "loss": 0.1728, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 2.169329073482428, | |
| "grad_norm": 0.4819137577229696, | |
| "learning_rate": 8.638705065376887e-06, | |
| "loss": 0.2091, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 2.1725239616613417, | |
| "grad_norm": 0.48347484950099356, | |
| "learning_rate": 8.577588932476448e-06, | |
| "loss": 0.208, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.1757188498402558, | |
| "grad_norm": 0.48841924781203694, | |
| "learning_rate": 8.516630685572553e-06, | |
| "loss": 0.2097, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 2.1789137380191694, | |
| "grad_norm": 0.5540389648785453, | |
| "learning_rate": 8.455831167259086e-06, | |
| "loss": 0.2343, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 2.182108626198083, | |
| "grad_norm": 0.45425575230845544, | |
| "learning_rate": 8.395191217935883e-06, | |
| "loss": 0.2278, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 2.1853035143769968, | |
| "grad_norm": 0.4779637593364659, | |
| "learning_rate": 8.33471167579717e-06, | |
| "loss": 0.2637, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 2.1884984025559104, | |
| "grad_norm": 0.5285221690710397, | |
| "learning_rate": 8.274393376819924e-06, | |
| "loss": 0.2347, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 2.191693290734824, | |
| "grad_norm": 0.473579878257591, | |
| "learning_rate": 8.214237154752345e-06, | |
| "loss": 0.2159, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 2.194888178913738, | |
| "grad_norm": 0.501799232462877, | |
| "learning_rate": 8.154243841102351e-06, | |
| "loss": 0.2476, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 2.198083067092652, | |
| "grad_norm": 0.4627453575153537, | |
| "learning_rate": 8.09441426512604e-06, | |
| "loss": 0.2466, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 2.2012779552715656, | |
| "grad_norm": 0.5185845450190892, | |
| "learning_rate": 8.03474925381625e-06, | |
| "loss": 0.2418, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 2.2044728434504792, | |
| "grad_norm": 0.49421035355847515, | |
| "learning_rate": 7.97524963189115e-06, | |
| "loss": 0.2903, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.207667731629393, | |
| "grad_norm": 0.4884543130989985, | |
| "learning_rate": 7.91591622178279e-06, | |
| "loss": 0.2604, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 2.2108626198083066, | |
| "grad_norm": 0.421393965123483, | |
| "learning_rate": 7.856749843625777e-06, | |
| "loss": 0.2027, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 2.2140575079872207, | |
| "grad_norm": 0.49505325003001727, | |
| "learning_rate": 7.797751315245927e-06, | |
| "loss": 0.2265, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 2.2172523961661343, | |
| "grad_norm": 0.5048775790142346, | |
| "learning_rate": 7.738921452148949e-06, | |
| "loss": 0.2624, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 2.220447284345048, | |
| "grad_norm": 0.48026767805561044, | |
| "learning_rate": 7.68026106750917e-06, | |
| "loss": 0.267, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 2.2236421725239617, | |
| "grad_norm": 0.5099721248485902, | |
| "learning_rate": 7.621770972158331e-06, | |
| "loss": 0.2275, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 2.2268370607028753, | |
| "grad_norm": 0.478130569524219, | |
| "learning_rate": 7.563451974574332e-06, | |
| "loss": 0.2135, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 2.230031948881789, | |
| "grad_norm": 0.4333719232122216, | |
| "learning_rate": 7.5053048808700814e-06, | |
| "loss": 0.2028, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 2.2332268370607027, | |
| "grad_norm": 0.4907979978569055, | |
| "learning_rate": 7.447330494782363e-06, | |
| "loss": 0.2448, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 2.236421725239617, | |
| "grad_norm": 0.6087204868248344, | |
| "learning_rate": 7.389529617660705e-06, | |
| "loss": 0.2514, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.2396166134185305, | |
| "grad_norm": 0.4779517559418497, | |
| "learning_rate": 7.331903048456299e-06, | |
| "loss": 0.1698, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 2.242811501597444, | |
| "grad_norm": 0.48682990012078686, | |
| "learning_rate": 7.274451583711e-06, | |
| "loss": 0.2456, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 2.246006389776358, | |
| "grad_norm": 0.5696452459133338, | |
| "learning_rate": 7.217176017546263e-06, | |
| "loss": 0.23, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 2.2492012779552715, | |
| "grad_norm": 0.47827230908811474, | |
| "learning_rate": 7.160077141652186e-06, | |
| "loss": 0.2121, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 2.252396166134185, | |
| "grad_norm": 0.49227009280747874, | |
| "learning_rate": 7.1031557452765934e-06, | |
| "loss": 0.2662, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 2.255591054313099, | |
| "grad_norm": 0.5350393375681801, | |
| "learning_rate": 7.046412615214075e-06, | |
| "loss": 0.2493, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 2.258785942492013, | |
| "grad_norm": 0.4684828298726711, | |
| "learning_rate": 6.98984853579517e-06, | |
| "loss": 0.1838, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 2.2619808306709266, | |
| "grad_norm": 0.4784754257674382, | |
| "learning_rate": 6.933464288875467e-06, | |
| "loss": 0.1974, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 2.2651757188498403, | |
| "grad_norm": 0.4997974355017908, | |
| "learning_rate": 6.8772606538248285e-06, | |
| "loss": 0.2016, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 2.268370607028754, | |
| "grad_norm": 0.47011903064932126, | |
| "learning_rate": 6.821238407516635e-06, | |
| "loss": 0.2346, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.2715654952076676, | |
| "grad_norm": 0.45084286894131964, | |
| "learning_rate": 6.765398324316996e-06, | |
| "loss": 0.2261, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 2.2747603833865817, | |
| "grad_norm": 0.4586308317736137, | |
| "learning_rate": 6.7097411760741075e-06, | |
| "loss": 0.2141, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 2.2779552715654954, | |
| "grad_norm": 0.452772588205902, | |
| "learning_rate": 6.654267732107516e-06, | |
| "loss": 0.1987, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 2.281150159744409, | |
| "grad_norm": 0.5116763949149057, | |
| "learning_rate": 6.598978759197554e-06, | |
| "loss": 0.2392, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 2.2843450479233227, | |
| "grad_norm": 0.498901942763996, | |
| "learning_rate": 6.543875021574686e-06, | |
| "loss": 0.1921, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 2.2875399361022364, | |
| "grad_norm": 0.49554234705109035, | |
| "learning_rate": 6.4889572809089655e-06, | |
| "loss": 0.2205, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 2.29073482428115, | |
| "grad_norm": 0.4845867709952482, | |
| "learning_rate": 6.43422629629953e-06, | |
| "loss": 0.2497, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 2.2939297124600637, | |
| "grad_norm": 0.47026482069183134, | |
| "learning_rate": 6.379682824264055e-06, | |
| "loss": 0.2295, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 2.297124600638978, | |
| "grad_norm": 0.4649585300155169, | |
| "learning_rate": 6.325327618728356e-06, | |
| "loss": 0.1516, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 2.3003194888178915, | |
| "grad_norm": 0.40206850228054203, | |
| "learning_rate": 6.271161431015922e-06, | |
| "loss": 0.2417, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.303514376996805, | |
| "grad_norm": 0.430228031696727, | |
| "learning_rate": 6.2171850098375475e-06, | |
| "loss": 0.2368, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 2.306709265175719, | |
| "grad_norm": 0.42538480611909024, | |
| "learning_rate": 6.163399101281e-06, | |
| "loss": 0.1991, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 2.3099041533546325, | |
| "grad_norm": 0.5044088982415396, | |
| "learning_rate": 6.1098044488006735e-06, | |
| "loss": 0.1973, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 2.313099041533546, | |
| "grad_norm": 0.4668130698688352, | |
| "learning_rate": 6.056401793207329e-06, | |
| "loss": 0.2262, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 2.31629392971246, | |
| "grad_norm": 0.5258923718903504, | |
| "learning_rate": 6.003191872657878e-06, | |
| "loss": 0.2634, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 2.319488817891374, | |
| "grad_norm": 0.47646849150208387, | |
| "learning_rate": 5.950175422645134e-06, | |
| "loss": 0.2301, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 2.3226837060702876, | |
| "grad_norm": 0.4597740271176972, | |
| "learning_rate": 5.897353175987668e-06, | |
| "loss": 0.2313, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 2.3258785942492013, | |
| "grad_norm": 0.4428572782581877, | |
| "learning_rate": 5.844725862819703e-06, | |
| "loss": 0.2174, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 2.329073482428115, | |
| "grad_norm": 0.5173021855990265, | |
| "learning_rate": 5.792294210580971e-06, | |
| "loss": 0.2068, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 2.3322683706070286, | |
| "grad_norm": 0.4883764779635357, | |
| "learning_rate": 5.740058944006697e-06, | |
| "loss": 0.194, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.3354632587859427, | |
| "grad_norm": 0.5032508220310962, | |
| "learning_rate": 5.688020785117581e-06, | |
| "loss": 0.2653, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 2.3386581469648564, | |
| "grad_norm": 0.5190580775432023, | |
| "learning_rate": 5.636180453209789e-06, | |
| "loss": 0.2323, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 2.34185303514377, | |
| "grad_norm": 0.4545099042298369, | |
| "learning_rate": 5.584538664845034e-06, | |
| "loss": 0.2282, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 2.3450479233226837, | |
| "grad_norm": 0.46066006930429154, | |
| "learning_rate": 5.533096133840677e-06, | |
| "loss": 0.2454, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 2.3482428115015974, | |
| "grad_norm": 0.49707563335332516, | |
| "learning_rate": 5.48185357125983e-06, | |
| "loss": 0.2457, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 2.351437699680511, | |
| "grad_norm": 0.4739619495394498, | |
| "learning_rate": 5.4308116854015644e-06, | |
| "loss": 0.2192, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 2.3546325878594248, | |
| "grad_norm": 0.46153270987931605, | |
| "learning_rate": 5.379971181791093e-06, | |
| "loss": 0.2727, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 2.357827476038339, | |
| "grad_norm": 0.44872264474740164, | |
| "learning_rate": 5.3293327631700185e-06, | |
| "loss": 0.2112, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 2.3610223642172525, | |
| "grad_norm": 0.46169938248569553, | |
| "learning_rate": 5.278897129486656e-06, | |
| "loss": 0.2021, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 2.364217252396166, | |
| "grad_norm": 0.4614793999691915, | |
| "learning_rate": 5.228664977886304e-06, | |
| "loss": 0.201, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.36741214057508, | |
| "grad_norm": 0.48601214815606647, | |
| "learning_rate": 5.178637002701639e-06, | |
| "loss": 0.2016, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 2.3706070287539935, | |
| "grad_norm": 0.4448925101508038, | |
| "learning_rate": 5.128813895443132e-06, | |
| "loss": 0.2411, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 2.373801916932907, | |
| "grad_norm": 0.45678562943215706, | |
| "learning_rate": 5.079196344789454e-06, | |
| "loss": 0.2397, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 2.376996805111821, | |
| "grad_norm": 0.47737064374073584, | |
| "learning_rate": 5.029785036577976e-06, | |
| "loss": 0.2168, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 2.380191693290735, | |
| "grad_norm": 0.4330785555805196, | |
| "learning_rate": 4.980580653795306e-06, | |
| "loss": 0.2206, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 2.3833865814696487, | |
| "grad_norm": 0.5282508944251668, | |
| "learning_rate": 4.931583876567807e-06, | |
| "loss": 0.2209, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 2.3865814696485623, | |
| "grad_norm": 0.48191470832355815, | |
| "learning_rate": 4.882795382152223e-06, | |
| "loss": 0.2604, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 2.389776357827476, | |
| "grad_norm": 0.45329891295506547, | |
| "learning_rate": 4.834215844926338e-06, | |
| "loss": 0.2662, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 2.3929712460063897, | |
| "grad_norm": 0.5020462913719165, | |
| "learning_rate": 4.785845936379601e-06, | |
| "loss": 0.1824, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 2.3961661341853033, | |
| "grad_norm": 0.5125956468296781, | |
| "learning_rate": 4.737686325103883e-06, | |
| "loss": 0.2388, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.3993610223642174, | |
| "grad_norm": 0.4610736608378653, | |
| "learning_rate": 4.6897376767842365e-06, | |
| "loss": 0.2184, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 2.402555910543131, | |
| "grad_norm": 0.4215976064237638, | |
| "learning_rate": 4.642000654189673e-06, | |
| "loss": 0.2239, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 2.405750798722045, | |
| "grad_norm": 0.4605285704227125, | |
| "learning_rate": 4.59447591716401e-06, | |
| "loss": 0.2298, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 2.4089456869009584, | |
| "grad_norm": 0.4485924651231439, | |
| "learning_rate": 4.547164122616767e-06, | |
| "loss": 0.2197, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 2.412140575079872, | |
| "grad_norm": 0.4611809765297732, | |
| "learning_rate": 4.500065924514059e-06, | |
| "loss": 0.2405, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 2.415335463258786, | |
| "grad_norm": 0.44198322300277526, | |
| "learning_rate": 4.453181973869565e-06, | |
| "loss": 0.261, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 2.4185303514377, | |
| "grad_norm": 0.4422002397029462, | |
| "learning_rate": 4.406512918735555e-06, | |
| "loss": 0.2086, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 2.4217252396166136, | |
| "grad_norm": 0.44126751575207934, | |
| "learning_rate": 4.360059404193892e-06, | |
| "loss": 0.238, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 2.4249201277955272, | |
| "grad_norm": 0.4639687021386926, | |
| "learning_rate": 4.313822072347136e-06, | |
| "loss": 0.215, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 2.428115015974441, | |
| "grad_norm": 0.44442484361823825, | |
| "learning_rate": 4.267801562309679e-06, | |
| "loss": 0.1696, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.4313099041533546, | |
| "grad_norm": 0.45364102082678376, | |
| "learning_rate": 4.221998510198888e-06, | |
| "loss": 0.2313, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 2.4345047923322682, | |
| "grad_norm": 0.5633393868088448, | |
| "learning_rate": 4.176413549126322e-06, | |
| "loss": 0.1996, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 2.437699680511182, | |
| "grad_norm": 0.4842473998169101, | |
| "learning_rate": 4.131047309188994e-06, | |
| "loss": 0.2374, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 2.440894568690096, | |
| "grad_norm": 0.49271155780265713, | |
| "learning_rate": 4.085900417460633e-06, | |
| "loss": 0.2428, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 2.4440894568690097, | |
| "grad_norm": 0.4474969801362191, | |
| "learning_rate": 4.040973497983052e-06, | |
| "loss": 0.2149, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 2.4472843450479234, | |
| "grad_norm": 0.49449842276919154, | |
| "learning_rate": 3.996267171757486e-06, | |
| "loss": 0.2221, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 2.450479233226837, | |
| "grad_norm": 0.4045769885160123, | |
| "learning_rate": 3.951782056736027e-06, | |
| "loss": 0.2088, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 2.4536741214057507, | |
| "grad_norm": 0.420197638385653, | |
| "learning_rate": 3.907518767813097e-06, | |
| "loss": 0.2508, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 2.4568690095846644, | |
| "grad_norm": 0.4712286531119528, | |
| "learning_rate": 3.863477916816914e-06, | |
| "loss": 0.2012, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 2.460063897763578, | |
| "grad_norm": 0.4427542368579534, | |
| "learning_rate": 3.819660112501053e-06, | |
| "loss": 0.2397, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.463258785942492, | |
| "grad_norm": 0.49326258562927866, | |
| "learning_rate": 3.7760659605360506e-06, | |
| "loss": 0.2334, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 2.466453674121406, | |
| "grad_norm": 0.4601236932926047, | |
| "learning_rate": 3.732696063500998e-06, | |
| "loss": 0.2034, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 2.4696485623003195, | |
| "grad_norm": 0.4472250977974353, | |
| "learning_rate": 3.689551020875226e-06, | |
| "loss": 0.2271, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 2.472843450479233, | |
| "grad_norm": 0.46936589235681647, | |
| "learning_rate": 3.6466314290300366e-06, | |
| "loss": 0.1636, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 2.476038338658147, | |
| "grad_norm": 0.4357541144936099, | |
| "learning_rate": 3.603937881220425e-06, | |
| "loss": 0.2292, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 2.479233226837061, | |
| "grad_norm": 0.6172961003964446, | |
| "learning_rate": 3.5614709675769166e-06, | |
| "loss": 0.2371, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 2.4824281150159746, | |
| "grad_norm": 0.42287295079164994, | |
| "learning_rate": 3.519231275097372e-06, | |
| "loss": 0.192, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 2.4856230031948883, | |
| "grad_norm": 0.44907721421445357, | |
| "learning_rate": 3.477219387638917e-06, | |
| "loss": 0.275, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 2.488817891373802, | |
| "grad_norm": 0.5148479668458735, | |
| "learning_rate": 3.435435885909828e-06, | |
| "loss": 0.2505, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 2.4920127795527156, | |
| "grad_norm": 0.4708481234430973, | |
| "learning_rate": 3.393881347461525e-06, | |
| "loss": 0.2337, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.4952076677316293, | |
| "grad_norm": 0.4652941463915953, | |
| "learning_rate": 3.3525563466806068e-06, | |
| "loss": 0.2068, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 2.498402555910543, | |
| "grad_norm": 0.45937429044470846, | |
| "learning_rate": 3.311461454780871e-06, | |
| "loss": 0.2616, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 2.501597444089457, | |
| "grad_norm": 0.4540154081882579, | |
| "learning_rate": 3.2705972397954655e-06, | |
| "loss": 0.2004, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 2.5047923322683707, | |
| "grad_norm": 0.44687303635857867, | |
| "learning_rate": 3.22996426656899e-06, | |
| "loss": 0.2137, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 2.5079872204472844, | |
| "grad_norm": 0.42549399369528, | |
| "learning_rate": 3.1895630967497147e-06, | |
| "loss": 0.237, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 2.511182108626198, | |
| "grad_norm": 0.4686919863303311, | |
| "learning_rate": 3.1493942887818287e-06, | |
| "loss": 0.1818, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 2.5143769968051117, | |
| "grad_norm": 0.43180677830115527, | |
| "learning_rate": 3.1094583978976887e-06, | |
| "loss": 0.2135, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 2.5175718849840254, | |
| "grad_norm": 0.41051305687717876, | |
| "learning_rate": 3.0697559761101623e-06, | |
| "loss": 0.2362, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 2.520766773162939, | |
| "grad_norm": 0.4339764538651385, | |
| "learning_rate": 3.0302875722050064e-06, | |
| "loss": 0.1871, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 2.523961661341853, | |
| "grad_norm": 0.44537108832627426, | |
| "learning_rate": 2.99105373173326e-06, | |
| "loss": 0.2317, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.527156549520767, | |
| "grad_norm": 0.48453456082844887, | |
| "learning_rate": 2.9520549970037238e-06, | |
| "loss": 0.1899, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 2.5303514376996805, | |
| "grad_norm": 0.4280884923970404, | |
| "learning_rate": 2.913291907075451e-06, | |
| "loss": 0.2116, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 2.533546325878594, | |
| "grad_norm": 0.4503925387506874, | |
| "learning_rate": 2.8747649977502945e-06, | |
| "loss": 0.266, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 2.536741214057508, | |
| "grad_norm": 0.44028462915181193, | |
| "learning_rate": 2.836474801565521e-06, | |
| "loss": 0.216, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 2.539936102236422, | |
| "grad_norm": 0.4484069903506647, | |
| "learning_rate": 2.7984218477864213e-06, | |
| "loss": 0.2081, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 2.543130990415335, | |
| "grad_norm": 0.4412193065052018, | |
| "learning_rate": 2.7606066623990145e-06, | |
| "loss": 0.2943, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 2.5463258785942493, | |
| "grad_norm": 0.4281448882843457, | |
| "learning_rate": 2.723029768102776e-06, | |
| "loss": 0.1912, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 2.549520766773163, | |
| "grad_norm": 0.48266966006465656, | |
| "learning_rate": 2.6856916843034062e-06, | |
| "loss": 0.2363, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 2.5527156549520766, | |
| "grad_norm": 0.4313024255011029, | |
| "learning_rate": 2.648592927105642e-06, | |
| "loss": 0.2356, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 2.5559105431309903, | |
| "grad_norm": 0.42876561566865096, | |
| "learning_rate": 2.611734009306155e-06, | |
| "loss": 0.2222, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.559105431309904, | |
| "grad_norm": 0.39838879945078226, | |
| "learning_rate": 2.5751154403864264e-06, | |
| "loss": 0.2119, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 2.562300319488818, | |
| "grad_norm": 0.4755341593418832, | |
| "learning_rate": 2.5387377265057246e-06, | |
| "loss": 0.235, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 2.5654952076677318, | |
| "grad_norm": 0.45820320153330835, | |
| "learning_rate": 2.502601370494111e-06, | |
| "loss": 0.2016, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 2.5686900958466454, | |
| "grad_norm": 0.49185493187997026, | |
| "learning_rate": 2.4667068718454766e-06, | |
| "loss": 0.2038, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 2.571884984025559, | |
| "grad_norm": 0.4346051571719638, | |
| "learning_rate": 2.4310547267106443e-06, | |
| "loss": 0.168, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 2.5750798722044728, | |
| "grad_norm": 0.47891912799119724, | |
| "learning_rate": 2.395645427890525e-06, | |
| "loss": 0.192, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 2.5782747603833864, | |
| "grad_norm": 0.4957624567636564, | |
| "learning_rate": 2.360479464829275e-06, | |
| "loss": 0.2015, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 2.5814696485623, | |
| "grad_norm": 0.43031345754147116, | |
| "learning_rate": 2.3255573236075523e-06, | |
| "loss": 0.2332, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 2.584664536741214, | |
| "grad_norm": 0.4493876332117976, | |
| "learning_rate": 2.2908794869358044e-06, | |
| "loss": 0.1779, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 2.587859424920128, | |
| "grad_norm": 0.48046582003978555, | |
| "learning_rate": 2.2564464341475724e-06, | |
| "loss": 0.2085, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 2.5910543130990416, | |
| "grad_norm": 0.4610477157344099, | |
| "learning_rate": 2.2222586411928826e-06, | |
| "loss": 0.2733, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 2.594249201277955, | |
| "grad_norm": 0.42491586645922075, | |
| "learning_rate": 2.1883165806316688e-06, | |
| "loss": 0.2045, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 2.597444089456869, | |
| "grad_norm": 0.4558620955090394, | |
| "learning_rate": 2.154620721627225e-06, | |
| "loss": 0.2348, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 2.600638977635783, | |
| "grad_norm": 0.4177140807716825, | |
| "learning_rate": 2.121171529939734e-06, | |
| "loss": 0.2154, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 2.6038338658146962, | |
| "grad_norm": 0.44671878373727514, | |
| "learning_rate": 2.0879694679198346e-06, | |
| "loss": 0.2339, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 2.6070287539936103, | |
| "grad_norm": 0.5421104481464741, | |
| "learning_rate": 2.055014994502207e-06, | |
| "loss": 0.2628, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 2.610223642172524, | |
| "grad_norm": 0.4183342876241132, | |
| "learning_rate": 2.022308565199249e-06, | |
| "loss": 0.2308, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 2.6134185303514377, | |
| "grad_norm": 0.4076706224784102, | |
| "learning_rate": 1.989850632094783e-06, | |
| "loss": 0.2697, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 2.6166134185303513, | |
| "grad_norm": 0.4327417815919417, | |
| "learning_rate": 1.9576416438377864e-06, | |
| "loss": 0.2204, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 2.619808306709265, | |
| "grad_norm": 0.4465688214774834, | |
| "learning_rate": 1.925682045636217e-06, | |
| "loss": 0.2326, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.623003194888179, | |
| "grad_norm": 0.4574212377076256, | |
| "learning_rate": 1.8939722792508307e-06, | |
| "loss": 0.2263, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 2.626198083067093, | |
| "grad_norm": 0.4455916657403554, | |
| "learning_rate": 1.8625127829890922e-06, | |
| "loss": 0.2387, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 2.6293929712460065, | |
| "grad_norm": 0.436528550029091, | |
| "learning_rate": 1.8313039916991204e-06, | |
| "loss": 0.2384, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 2.63258785942492, | |
| "grad_norm": 0.44219679143144724, | |
| "learning_rate": 1.8003463367636676e-06, | |
| "loss": 0.2269, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 2.635782747603834, | |
| "grad_norm": 0.44101913923028035, | |
| "learning_rate": 1.7696402460941554e-06, | |
| "loss": 0.2712, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 2.6389776357827475, | |
| "grad_norm": 0.43323308580675207, | |
| "learning_rate": 1.7391861441247715e-06, | |
| "loss": 0.2645, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 2.642172523961661, | |
| "grad_norm": 0.451593663495069, | |
| "learning_rate": 1.7089844518065902e-06, | |
| "loss": 0.2218, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 2.6453674121405752, | |
| "grad_norm": 0.42165221052628815, | |
| "learning_rate": 1.6790355866017604e-06, | |
| "loss": 0.2272, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 2.648562300319489, | |
| "grad_norm": 0.45660435100783486, | |
| "learning_rate": 1.6493399624777428e-06, | |
| "loss": 0.2322, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 2.6517571884984026, | |
| "grad_norm": 0.46289265194890444, | |
| "learning_rate": 1.6198979899015687e-06, | |
| "loss": 0.2469, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 2.6549520766773163, | |
| "grad_norm": 0.42669559181390876, | |
| "learning_rate": 1.5907100758341787e-06, | |
| "loss": 0.1998, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 2.65814696485623, | |
| "grad_norm": 0.4190815694114573, | |
| "learning_rate": 1.5617766237248023e-06, | |
| "loss": 0.2103, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 2.661341853035144, | |
| "grad_norm": 0.41531478233240604, | |
| "learning_rate": 1.5330980335053714e-06, | |
| "loss": 0.2039, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 2.6645367412140573, | |
| "grad_norm": 0.4357539666161216, | |
| "learning_rate": 1.5046747015849893e-06, | |
| "loss": 0.2375, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 2.6677316293929714, | |
| "grad_norm": 0.44449107969992363, | |
| "learning_rate": 1.4765070208444732e-06, | |
| "loss": 0.2807, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 2.670926517571885, | |
| "grad_norm": 0.43149390691820894, | |
| "learning_rate": 1.4485953806308883e-06, | |
| "loss": 0.2307, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 2.6741214057507987, | |
| "grad_norm": 0.43315728774154344, | |
| "learning_rate": 1.4209401667522028e-06, | |
| "loss": 0.2276, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 2.6773162939297124, | |
| "grad_norm": 0.4499922141030728, | |
| "learning_rate": 1.3935417614719327e-06, | |
| "loss": 0.2079, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 2.680511182108626, | |
| "grad_norm": 0.49094900978907974, | |
| "learning_rate": 1.366400543503854e-06, | |
| "loss": 0.1824, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 2.68370607028754, | |
| "grad_norm": 0.47322551913300975, | |
| "learning_rate": 1.3395168880067978e-06, | |
| "loss": 0.2501, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.686900958466454, | |
| "grad_norm": 0.43241827743072775, | |
| "learning_rate": 1.3128911665794198e-06, | |
| "loss": 0.2489, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 2.6900958466453675, | |
| "grad_norm": 0.5782151533369344, | |
| "learning_rate": 1.2865237472551106e-06, | |
| "loss": 0.2477, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 2.693290734824281, | |
| "grad_norm": 0.4494764243991699, | |
| "learning_rate": 1.2604149944968725e-06, | |
| "loss": 0.2111, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 2.696485623003195, | |
| "grad_norm": 0.45156798802844117, | |
| "learning_rate": 1.234565269192296e-06, | |
| "loss": 0.176, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 2.6996805111821085, | |
| "grad_norm": 0.4390858768011781, | |
| "learning_rate": 1.2089749286485808e-06, | |
| "loss": 0.2475, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 2.702875399361022, | |
| "grad_norm": 0.4190824848284048, | |
| "learning_rate": 1.183644326587574e-06, | |
| "loss": 0.2275, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 2.7060702875399363, | |
| "grad_norm": 0.4919591533349322, | |
| "learning_rate": 1.1585738131409107e-06, | |
| "loss": 0.2096, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 2.70926517571885, | |
| "grad_norm": 0.4983722601382295, | |
| "learning_rate": 1.1337637348451369e-06, | |
| "loss": 0.2353, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 2.7124600638977636, | |
| "grad_norm": 0.394892627520411, | |
| "learning_rate": 1.1092144346369581e-06, | |
| "loss": 0.2215, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 2.7156549520766773, | |
| "grad_norm": 0.4103602053022109, | |
| "learning_rate": 1.0849262518484704e-06, | |
| "loss": 0.195, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.718849840255591, | |
| "grad_norm": 0.4478481118516963, | |
| "learning_rate": 1.060899522202483e-06, | |
| "loss": 0.2243, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 2.722044728434505, | |
| "grad_norm": 0.44779585136229183, | |
| "learning_rate": 1.037134577807879e-06, | |
| "loss": 0.1981, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 2.7252396166134183, | |
| "grad_norm": 0.4309440677606521, | |
| "learning_rate": 1.0136317471550195e-06, | |
| "loss": 0.2119, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 2.7284345047923324, | |
| "grad_norm": 0.44538470312104783, | |
| "learning_rate": 9.903913551112e-07, | |
| "loss": 0.2343, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 2.731629392971246, | |
| "grad_norm": 0.46088845916460575, | |
| "learning_rate": 9.67413722916175e-07, | |
| "loss": 0.2384, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 2.7348242811501597, | |
| "grad_norm": 0.4435462990703814, | |
| "learning_rate": 9.446991681776985e-07, | |
| "loss": 0.2338, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 2.7380191693290734, | |
| "grad_norm": 0.4083232549654808, | |
| "learning_rate": 9.222480048671412e-07, | |
| "loss": 0.2039, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 2.741214057507987, | |
| "grad_norm": 0.4751109728713644, | |
| "learning_rate": 9.000605433151643e-07, | |
| "loss": 0.2202, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 2.744408945686901, | |
| "grad_norm": 0.42315898167185495, | |
| "learning_rate": 8.781370902074049e-07, | |
| "loss": 0.2429, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 2.747603833865815, | |
| "grad_norm": 0.4389817241489918, | |
| "learning_rate": 8.564779485802566e-07, | |
| "loss": 0.2523, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.7507987220447285, | |
| "grad_norm": 0.40824675808125727, | |
| "learning_rate": 8.350834178166755e-07, | |
| "loss": 0.2317, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 2.753993610223642, | |
| "grad_norm": 0.44644266395927934, | |
| "learning_rate": 8.139537936420372e-07, | |
| "loss": 0.2198, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 2.757188498402556, | |
| "grad_norm": 0.4364831760939215, | |
| "learning_rate": 7.93089368120048e-07, | |
| "loss": 0.2424, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 2.7603833865814695, | |
| "grad_norm": 0.47326054771389603, | |
| "learning_rate": 7.724904296487246e-07, | |
| "loss": 0.2386, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 2.763578274760383, | |
| "grad_norm": 0.4571089142274389, | |
| "learning_rate": 7.521572629563834e-07, | |
| "loss": 0.1619, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 2.7667731629392973, | |
| "grad_norm": 0.4221090741397694, | |
| "learning_rate": 7.320901490977217e-07, | |
| "loss": 0.2486, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 2.769968051118211, | |
| "grad_norm": 0.44759152235134214, | |
| "learning_rate": 7.122893654499318e-07, | |
| "loss": 0.2376, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 2.7731629392971247, | |
| "grad_norm": 0.4559649616233877, | |
| "learning_rate": 6.927551857088576e-07, | |
| "loss": 0.2216, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 2.7763578274760383, | |
| "grad_norm": 0.3938914848222988, | |
| "learning_rate": 6.734878798852174e-07, | |
| "loss": 0.2331, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 2.779552715654952, | |
| "grad_norm": 0.44967947965212385, | |
| "learning_rate": 6.544877143008777e-07, | |
| "loss": 0.2303, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.7827476038338657, | |
| "grad_norm": 0.4204688969968103, | |
| "learning_rate": 6.357549515851525e-07, | |
| "loss": 0.2497, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 2.7859424920127793, | |
| "grad_norm": 0.42708869477683215, | |
| "learning_rate": 6.172898506712033e-07, | |
| "loss": 0.2502, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 2.7891373801916934, | |
| "grad_norm": 0.4289703544035532, | |
| "learning_rate": 5.990926667924313e-07, | |
| "loss": 0.2637, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 2.792332268370607, | |
| "grad_norm": 0.43987128340382603, | |
| "learning_rate": 5.811636514789598e-07, | |
| "loss": 0.193, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 2.7955271565495208, | |
| "grad_norm": 0.5004291788721248, | |
| "learning_rate": 5.635030525541685e-07, | |
| "loss": 0.2105, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 2.7987220447284344, | |
| "grad_norm": 0.44122554674258213, | |
| "learning_rate": 5.461111141312492e-07, | |
| "loss": 0.1874, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 2.801916932907348, | |
| "grad_norm": 0.4230374283981073, | |
| "learning_rate": 5.289880766098421e-07, | |
| "loss": 0.2113, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 2.8051118210862622, | |
| "grad_norm": 0.4219143270150449, | |
| "learning_rate": 5.121341766727184e-07, | |
| "loss": 0.1856, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 2.8083067092651754, | |
| "grad_norm": 0.4280311905897577, | |
| "learning_rate": 4.955496472824939e-07, | |
| "loss": 0.2479, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 2.8115015974440896, | |
| "grad_norm": 0.4275114109142309, | |
| "learning_rate": 4.79234717678414e-07, | |
| "loss": 0.2109, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.8146964856230032, | |
| "grad_norm": 0.41231768670069835, | |
| "learning_rate": 4.631896133732006e-07, | |
| "loss": 0.1914, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 2.817891373801917, | |
| "grad_norm": 0.4046385798085887, | |
| "learning_rate": 4.474145561499099e-07, | |
| "loss": 0.2497, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 2.8210862619808306, | |
| "grad_norm": 0.4337798293798453, | |
| "learning_rate": 4.319097640588821e-07, | |
| "loss": 0.2105, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 2.8242811501597442, | |
| "grad_norm": 0.4367363776983278, | |
| "learning_rate": 4.166754514147275e-07, | |
| "loss": 0.2541, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 2.8274760383386583, | |
| "grad_norm": 0.4121592143726346, | |
| "learning_rate": 4.0171182879335856e-07, | |
| "loss": 0.2934, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 2.830670926517572, | |
| "grad_norm": 0.45692427171461536, | |
| "learning_rate": 3.870191030290782e-07, | |
| "loss": 0.2123, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 2.8338658146964857, | |
| "grad_norm": 0.44515522805300783, | |
| "learning_rate": 3.7259747721173134e-07, | |
| "loss": 0.1926, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 2.8370607028753994, | |
| "grad_norm": 0.41210623411640795, | |
| "learning_rate": 3.584471506838871e-07, | |
| "loss": 0.2355, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 2.840255591054313, | |
| "grad_norm": 0.7278276221930801, | |
| "learning_rate": 3.445683190380833e-07, | |
| "loss": 0.2734, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 2.8434504792332267, | |
| "grad_norm": 0.4293466579902048, | |
| "learning_rate": 3.3096117411413056e-07, | |
| "loss": 0.2084, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.8466453674121404, | |
| "grad_norm": 0.3945276989591356, | |
| "learning_rate": 3.1762590399645907e-07, | |
| "loss": 0.2355, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 2.8498402555910545, | |
| "grad_norm": 0.4169674719101088, | |
| "learning_rate": 3.045626930115053e-07, | |
| "loss": 0.2556, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 2.853035143769968, | |
| "grad_norm": 0.4677238531453402, | |
| "learning_rate": 2.917717217251914e-07, | |
| "loss": 0.2067, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 2.856230031948882, | |
| "grad_norm": 0.4508849526360919, | |
| "learning_rate": 2.7925316694039637e-07, | |
| "loss": 0.2264, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 2.8594249201277955, | |
| "grad_norm": 1.3406369405711993, | |
| "learning_rate": 2.670072016945402e-07, | |
| "loss": 0.3042, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 2.862619808306709, | |
| "grad_norm": 0.3953393489058587, | |
| "learning_rate": 2.5503399525717674e-07, | |
| "loss": 0.2038, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 2.8658146964856233, | |
| "grad_norm": 0.3991610340225267, | |
| "learning_rate": 2.433337131276581e-07, | |
| "loss": 0.2806, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 2.8690095846645365, | |
| "grad_norm": 0.42671305379955443, | |
| "learning_rate": 2.3190651703284273e-07, | |
| "loss": 0.2369, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 2.8722044728434506, | |
| "grad_norm": 0.41224602176747227, | |
| "learning_rate": 2.207525649248754e-07, | |
| "loss": 0.2171, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 2.8753993610223643, | |
| "grad_norm": 0.509647560563868, | |
| "learning_rate": 2.0987201097897757e-07, | |
| "loss": 0.2097, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.878594249201278, | |
| "grad_norm": 0.4724203634577862, | |
| "learning_rate": 1.9926500559134477e-07, | |
| "loss": 0.24, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 2.8817891373801916, | |
| "grad_norm": 0.44222221500216696, | |
| "learning_rate": 1.8893169537704813e-07, | |
| "loss": 0.2815, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 2.8849840255591053, | |
| "grad_norm": 0.43359238337650113, | |
| "learning_rate": 1.7887222316800957e-07, | |
| "loss": 0.2058, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 2.8881789137380194, | |
| "grad_norm": 0.4309496609438018, | |
| "learning_rate": 1.690867280110431e-07, | |
| "loss": 0.2481, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 2.891373801916933, | |
| "grad_norm": 0.4330580118936636, | |
| "learning_rate": 1.5957534516590988e-07, | |
| "loss": 0.2267, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 2.8945686900958467, | |
| "grad_norm": 0.41196459499404176, | |
| "learning_rate": 1.503382061034686e-07, | |
| "loss": 0.2471, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 2.8977635782747604, | |
| "grad_norm": 0.46467452130408804, | |
| "learning_rate": 1.4137543850384572e-07, | |
| "loss": 0.2321, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 2.900958466453674, | |
| "grad_norm": 0.430788659047838, | |
| "learning_rate": 1.3268716625467914e-07, | |
| "loss": 0.2805, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 2.9041533546325877, | |
| "grad_norm": 0.4431748531892815, | |
| "learning_rate": 1.242735094493952e-07, | |
| "loss": 0.2397, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 2.9073482428115014, | |
| "grad_norm": 0.4435883937921819, | |
| "learning_rate": 1.1613458438556102e-07, | |
| "loss": 0.2752, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.9105431309904155, | |
| "grad_norm": 0.4304268716716866, | |
| "learning_rate": 1.0827050356326585e-07, | |
| "loss": 0.26, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 2.913738019169329, | |
| "grad_norm": 0.4399673032424353, | |
| "learning_rate": 1.0068137568357783e-07, | |
| "loss": 0.2205, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 2.916932907348243, | |
| "grad_norm": 0.4445872824299983, | |
| "learning_rate": 9.336730564702745e-08, | |
| "loss": 0.1941, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 2.9201277955271565, | |
| "grad_norm": 0.4433121701499639, | |
| "learning_rate": 8.632839455216869e-08, | |
| "loss": 0.2012, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 2.92332268370607, | |
| "grad_norm": 0.445710760902897, | |
| "learning_rate": 7.956473969417789e-08, | |
| "loss": 0.1946, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 2.9265175718849843, | |
| "grad_norm": 0.44031338022101135, | |
| "learning_rate": 7.307643456351044e-08, | |
| "loss": 0.236, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 2.9297124600638975, | |
| "grad_norm": 0.42316443693466144, | |
| "learning_rate": 6.686356884460177e-08, | |
| "loss": 0.2314, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 2.9329073482428116, | |
| "grad_norm": 0.4449659893027522, | |
| "learning_rate": 6.092622841463502e-08, | |
| "loss": 0.1657, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 2.9361022364217253, | |
| "grad_norm": 0.4225339222713818, | |
| "learning_rate": 5.526449534235534e-08, | |
| "loss": 0.2542, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 2.939297124600639, | |
| "grad_norm": 0.44649236358142536, | |
| "learning_rate": 4.9878447886926305e-08, | |
| "loss": 0.2343, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.9424920127795526, | |
| "grad_norm": 0.4096672005948774, | |
| "learning_rate": 4.4768160496859725e-08, | |
| "loss": 0.2727, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 2.9456869009584663, | |
| "grad_norm": 0.4393531135600368, | |
| "learning_rate": 3.993370380897421e-08, | |
| "loss": 0.225, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 2.9488817891373804, | |
| "grad_norm": 0.47244217125437676, | |
| "learning_rate": 3.537514464743152e-08, | |
| "loss": 0.2135, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 2.952076677316294, | |
| "grad_norm": 0.4328106188446803, | |
| "learning_rate": 3.109254602280398e-08, | |
| "loss": 0.218, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 2.9552715654952078, | |
| "grad_norm": 0.4358995352547161, | |
| "learning_rate": 2.7085967131201818e-08, | |
| "loss": 0.2325, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 2.9584664536741214, | |
| "grad_norm": 0.4376093029807377, | |
| "learning_rate": 2.3355463353467168e-08, | |
| "loss": 0.2082, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 2.961661341853035, | |
| "grad_norm": 0.42933069783001954, | |
| "learning_rate": 1.9901086254396908e-08, | |
| "loss": 0.2006, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 2.9648562300319488, | |
| "grad_norm": 0.43253728612411796, | |
| "learning_rate": 1.672288358203211e-08, | |
| "loss": 0.2086, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 2.9680511182108624, | |
| "grad_norm": 0.49147199258587176, | |
| "learning_rate": 1.382089926700303e-08, | |
| "loss": 0.2226, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 2.9712460063897765, | |
| "grad_norm": 0.4073568057815397, | |
| "learning_rate": 1.1195173421914007e-08, | |
| "loss": 0.2687, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.97444089456869, | |
| "grad_norm": 0.392034693925328, | |
| "learning_rate": 8.84574234079727e-09, | |
| "loss": 0.2252, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 2.977635782747604, | |
| "grad_norm": 0.44649691262839863, | |
| "learning_rate": 6.772638498606654e-09, | |
| "loss": 0.241, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 2.9808306709265175, | |
| "grad_norm": 0.4212064663206084, | |
| "learning_rate": 4.97589055076908e-09, | |
| "loss": 0.2497, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 2.984025559105431, | |
| "grad_norm": 0.4438386239006071, | |
| "learning_rate": 3.4555233327893124e-09, | |
| "loss": 0.2348, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 2.987220447284345, | |
| "grad_norm": 0.43882394327805996, | |
| "learning_rate": 2.2115578599035683e-09, | |
| "loss": 0.2373, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 2.9904153354632586, | |
| "grad_norm": 0.4544247984602708, | |
| "learning_rate": 1.244011326797523e-09, | |
| "loss": 0.2172, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 2.9936102236421727, | |
| "grad_norm": 0.4700550963913251, | |
| "learning_rate": 5.52897107355399e-10, | |
| "loss": 0.2571, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 2.9968051118210863, | |
| "grad_norm": 0.4038371285545047, | |
| "learning_rate": 1.3822475449121186e-10, | |
| "loss": 0.2118, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.3831820442811552, | |
| "learning_rate": 0.0, | |
| "loss": 0.2001, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 939, | |
| "total_flos": 406391461183488.0, | |
| "train_loss": 0.3877937021696022, | |
| "train_runtime": 10098.5367, | |
| "train_samples_per_second": 2.97, | |
| "train_steps_per_second": 0.093 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 939, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 406391461183488.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |