| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.25250227479526843, |
| "eval_steps": 500, |
| "global_step": 555, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00045495905368516835, |
| "grad_norm": 9.460982644953551, |
| "learning_rate": 1e-06, |
| "loss": 0.1263, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0009099181073703367, |
| "grad_norm": 8.496088007652286, |
| "learning_rate": 9.999999795711289e-07, |
| "loss": 0.1976, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.001364877161055505, |
| "grad_norm": 8.795727896186301, |
| "learning_rate": 9.999999182845176e-07, |
| "loss": 0.1936, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0018198362147406734, |
| "grad_norm": 2.884208258076819, |
| "learning_rate": 9.999998161401707e-07, |
| "loss": 0.1341, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0022747952684258415, |
| "grad_norm": 6.093272608844917, |
| "learning_rate": 9.999996731380972e-07, |
| "loss": 0.1653, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.00272975432211101, |
| "grad_norm": 6.206942040526342, |
| "learning_rate": 9.99999489278308e-07, |
| "loss": 0.1233, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0031847133757961785, |
| "grad_norm": 5.623248620409181, |
| "learning_rate": 9.999992645608189e-07, |
| "loss": 0.11, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.003639672429481347, |
| "grad_norm": 5.6212334255704395, |
| "learning_rate": 9.999989989856477e-07, |
| "loss": 0.1256, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.004094631483166515, |
| "grad_norm": 5.115332368305895, |
| "learning_rate": 9.999986925528161e-07, |
| "loss": 0.0802, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.004549590536851683, |
| "grad_norm": 7.6754517372106115, |
| "learning_rate": 9.999983452623496e-07, |
| "loss": 0.1339, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.005004549590536852, |
| "grad_norm": 7.4519771920318885, |
| "learning_rate": 9.999979571142762e-07, |
| "loss": 0.1241, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.00545950864422202, |
| "grad_norm": 5.281625514057187, |
| "learning_rate": 9.999975281086276e-07, |
| "loss": 0.1225, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.005914467697907188, |
| "grad_norm": 2.826051783008299, |
| "learning_rate": 9.99997058245439e-07, |
| "loss": 0.0938, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.006369426751592357, |
| "grad_norm": 16.98790451752032, |
| "learning_rate": 9.99996547524749e-07, |
| "loss": 0.1451, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.006824385805277525, |
| "grad_norm": 3.879403880177224, |
| "learning_rate": 9.999959959465988e-07, |
| "loss": 0.1075, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.007279344858962694, |
| "grad_norm": 6.135164623225957, |
| "learning_rate": 9.999954035110341e-07, |
| "loss": 0.0868, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0077343039126478615, |
| "grad_norm": 7.353193747413332, |
| "learning_rate": 9.999947702181026e-07, |
| "loss": 0.1358, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.00818926296633303, |
| "grad_norm": 4.665297196059896, |
| "learning_rate": 9.999940960678568e-07, |
| "loss": 0.1028, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.008644222020018199, |
| "grad_norm": 6.343910178022308, |
| "learning_rate": 9.99993381060351e-07, |
| "loss": 0.1079, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.009099181073703366, |
| "grad_norm": 3.1318769019186106, |
| "learning_rate": 9.999926251956445e-07, |
| "loss": 0.0762, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.009554140127388535, |
| "grad_norm": 5.639674558770591, |
| "learning_rate": 9.999918284737984e-07, |
| "loss": 0.1107, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.010009099181073703, |
| "grad_norm": 3.944694198517734, |
| "learning_rate": 9.999909908948781e-07, |
| "loss": 0.0792, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.010464058234758872, |
| "grad_norm": 5.277637634775166, |
| "learning_rate": 9.999901124589518e-07, |
| "loss": 0.0955, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.01091901728844404, |
| "grad_norm": 3.159102517555636, |
| "learning_rate": 9.999891931660915e-07, |
| "loss": 0.1177, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.011373976342129208, |
| "grad_norm": 3.876069018264601, |
| "learning_rate": 9.999882330163725e-07, |
| "loss": 0.0942, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.011828935395814377, |
| "grad_norm": 5.310251289002937, |
| "learning_rate": 9.999872320098726e-07, |
| "loss": 0.0772, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.012283894449499545, |
| "grad_norm": 2.4013872287511204, |
| "learning_rate": 9.999861901466744e-07, |
| "loss": 0.113, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.012738853503184714, |
| "grad_norm": 3.226872006603208, |
| "learning_rate": 9.999851074268623e-07, |
| "loss": 0.0948, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.013193812556869881, |
| "grad_norm": 4.948814654016484, |
| "learning_rate": 9.999839838505255e-07, |
| "loss": 0.1225, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.01364877161055505, |
| "grad_norm": 2.1915501350462883, |
| "learning_rate": 9.999828194177554e-07, |
| "loss": 0.1092, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.014103730664240218, |
| "grad_norm": 2.457727680328418, |
| "learning_rate": 9.999816141286471e-07, |
| "loss": 0.091, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.014558689717925387, |
| "grad_norm": 2.096610231076784, |
| "learning_rate": 9.99980367983299e-07, |
| "loss": 0.079, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.015013648771610554, |
| "grad_norm": 3.4789632001628985, |
| "learning_rate": 9.999790809818133e-07, |
| "loss": 0.069, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.015468607825295723, |
| "grad_norm": 2.432234105105967, |
| "learning_rate": 9.99977753124295e-07, |
| "loss": 0.0916, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.01592356687898089, |
| "grad_norm": 2.8811408833854797, |
| "learning_rate": 9.999763844108525e-07, |
| "loss": 0.0558, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.01637852593266606, |
| "grad_norm": 3.0517587459597832, |
| "learning_rate": 9.99974974841598e-07, |
| "loss": 0.0574, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.01683348498635123, |
| "grad_norm": 5.911523880873187, |
| "learning_rate": 9.999735244166462e-07, |
| "loss": 0.1149, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.017288444040036398, |
| "grad_norm": 2.2249115738169034, |
| "learning_rate": 9.99972033136116e-07, |
| "loss": 0.1024, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.017743403093721567, |
| "grad_norm": 18.233741266972995, |
| "learning_rate": 9.99970501000129e-07, |
| "loss": 0.0919, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.018198362147406732, |
| "grad_norm": 5.1798303460368755, |
| "learning_rate": 9.999689280088103e-07, |
| "loss": 0.0775, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0186533212010919, |
| "grad_norm": 2.1459089246867316, |
| "learning_rate": 9.99967314162289e-07, |
| "loss": 0.0771, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.01910828025477707, |
| "grad_norm": 2.310140859334937, |
| "learning_rate": 9.999656594606964e-07, |
| "loss": 0.096, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.019563239308462238, |
| "grad_norm": 1.7877695653527323, |
| "learning_rate": 9.99963963904168e-07, |
| "loss": 0.0889, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.020018198362147407, |
| "grad_norm": 2.3036066458968403, |
| "learning_rate": 9.999622274928424e-07, |
| "loss": 0.0904, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.020473157415832575, |
| "grad_norm": 8.068779945527933, |
| "learning_rate": 9.999604502268613e-07, |
| "loss": 0.1085, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.020928116469517744, |
| "grad_norm": 4.428001428676284, |
| "learning_rate": 9.999586321063698e-07, |
| "loss": 0.0724, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.021383075523202913, |
| "grad_norm": 3.472814836739773, |
| "learning_rate": 9.999567731315169e-07, |
| "loss": 0.1807, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.02183803457688808, |
| "grad_norm": 4.53270932228853, |
| "learning_rate": 9.999548733024543e-07, |
| "loss": 0.183, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.022292993630573247, |
| "grad_norm": 4.635214304750299, |
| "learning_rate": 9.99952932619337e-07, |
| "loss": 0.0963, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.022747952684258416, |
| "grad_norm": 2.524081592582436, |
| "learning_rate": 9.99950951082324e-07, |
| "loss": 0.0768, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.023202911737943584, |
| "grad_norm": 2.4497670829961495, |
| "learning_rate": 9.999489286915772e-07, |
| "loss": 0.0754, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.023657870791628753, |
| "grad_norm": 2.3453588015577154, |
| "learning_rate": 9.999468654472614e-07, |
| "loss": 0.0742, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.024112829845313922, |
| "grad_norm": 1.0559086878519577, |
| "learning_rate": 9.999447613495457e-07, |
| "loss": 0.0561, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.02456778889899909, |
| "grad_norm": 4.180479602032368, |
| "learning_rate": 9.999426163986018e-07, |
| "loss": 0.0685, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.02502274795268426, |
| "grad_norm": 1.485403464030856, |
| "learning_rate": 9.99940430594605e-07, |
| "loss": 0.0595, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.025477707006369428, |
| "grad_norm": 3.818350504676734, |
| "learning_rate": 9.999382039377338e-07, |
| "loss": 0.0703, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.025932666060054597, |
| "grad_norm": 3.712467276552201, |
| "learning_rate": 9.999359364281704e-07, |
| "loss": 0.0465, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.026387625113739762, |
| "grad_norm": 2.5314824071119997, |
| "learning_rate": 9.999336280660999e-07, |
| "loss": 0.1118, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.02684258416742493, |
| "grad_norm": 2.01187791111904, |
| "learning_rate": 9.99931278851711e-07, |
| "loss": 0.0779, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.0272975432211101, |
| "grad_norm": 1.8034515649341665, |
| "learning_rate": 9.999288887851956e-07, |
| "loss": 0.0714, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.027752502274795268, |
| "grad_norm": 5.939307329468221, |
| "learning_rate": 9.999264578667492e-07, |
| "loss": 0.0624, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.028207461328480437, |
| "grad_norm": 3.5462049672421694, |
| "learning_rate": 9.999239860965701e-07, |
| "loss": 0.084, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.028662420382165606, |
| "grad_norm": 1.9190519391810084, |
| "learning_rate": 9.999214734748607e-07, |
| "loss": 0.0868, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.029117379435850774, |
| "grad_norm": 2.990182475432006, |
| "learning_rate": 9.999189200018262e-07, |
| "loss": 0.0687, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.029572338489535943, |
| "grad_norm": 3.6283565145413035, |
| "learning_rate": 9.999163256776748e-07, |
| "loss": 0.1066, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.03002729754322111, |
| "grad_norm": 2.46041046107172, |
| "learning_rate": 9.999136905026192e-07, |
| "loss": 0.1429, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.030482256596906277, |
| "grad_norm": 2.540310192062455, |
| "learning_rate": 9.999110144768743e-07, |
| "loss": 0.0871, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.030937215650591446, |
| "grad_norm": 2.3481485598576555, |
| "learning_rate": 9.999082976006589e-07, |
| "loss": 0.0708, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.03139217470427662, |
| "grad_norm": 1.5421437731161647, |
| "learning_rate": 9.99905539874195e-07, |
| "loss": 0.0646, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.03184713375796178, |
| "grad_norm": 1.862783458602375, |
| "learning_rate": 9.99902741297708e-07, |
| "loss": 0.073, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03230209281164695, |
| "grad_norm": 2.7514307829983196, |
| "learning_rate": 9.998999018714263e-07, |
| "loss": 0.0636, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.03275705186533212, |
| "grad_norm": 2.088273099435258, |
| "learning_rate": 9.998970215955822e-07, |
| "loss": 0.0759, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.033212010919017286, |
| "grad_norm": 4.880835542241873, |
| "learning_rate": 9.998941004704111e-07, |
| "loss": 0.0607, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.03366696997270246, |
| "grad_norm": 2.294548042436939, |
| "learning_rate": 9.998911384961517e-07, |
| "loss": 0.0851, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.034121929026387623, |
| "grad_norm": 3.135395035411956, |
| "learning_rate": 9.998881356730458e-07, |
| "loss": 0.0615, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.034576888080072796, |
| "grad_norm": 2.575133750401631, |
| "learning_rate": 9.998850920013388e-07, |
| "loss": 0.0799, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.03503184713375796, |
| "grad_norm": 2.565050657771875, |
| "learning_rate": 9.998820074812797e-07, |
| "loss": 0.0696, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.03548680618744313, |
| "grad_norm": 2.2396897888125915, |
| "learning_rate": 9.998788821131206e-07, |
| "loss": 0.1037, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.0359417652411283, |
| "grad_norm": 1.617159978432207, |
| "learning_rate": 9.998757158971164e-07, |
| "loss": 0.0709, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.036396724294813464, |
| "grad_norm": 2.8489915433996913, |
| "learning_rate": 9.998725088335263e-07, |
| "loss": 0.0888, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.036851683348498636, |
| "grad_norm": 0.9923870511013093, |
| "learning_rate": 9.99869260922612e-07, |
| "loss": 0.0475, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.0373066424021838, |
| "grad_norm": 9.863429979788943, |
| "learning_rate": 9.998659721646392e-07, |
| "loss": 0.082, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.03776160145586897, |
| "grad_norm": 1.468013198811096, |
| "learning_rate": 9.998626425598765e-07, |
| "loss": 0.0783, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.03821656050955414, |
| "grad_norm": 2.6390574435332756, |
| "learning_rate": 9.99859272108596e-07, |
| "loss": 0.0803, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.03867151956323931, |
| "grad_norm": 1.4779232115601733, |
| "learning_rate": 9.998558608110731e-07, |
| "loss": 0.0871, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.039126478616924476, |
| "grad_norm": 1.5249547529775251, |
| "learning_rate": 9.998524086675866e-07, |
| "loss": 0.0723, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.03958143767060965, |
| "grad_norm": 2.119420999088343, |
| "learning_rate": 9.998489156784186e-07, |
| "loss": 0.0816, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.040036396724294813, |
| "grad_norm": 1.6071500805434977, |
| "learning_rate": 9.998453818438546e-07, |
| "loss": 0.0882, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.04049135577797998, |
| "grad_norm": 1.3520647569618234, |
| "learning_rate": 9.998418071641832e-07, |
| "loss": 0.0717, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.04094631483166515, |
| "grad_norm": 1.3581714785879615, |
| "learning_rate": 9.998381916396965e-07, |
| "loss": 0.0619, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.041401273885350316, |
| "grad_norm": 1.2211168419055232, |
| "learning_rate": 9.9983453527069e-07, |
| "loss": 0.049, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.04185623293903549, |
| "grad_norm": 1.4130875618701322, |
| "learning_rate": 9.998308380574627e-07, |
| "loss": 0.0603, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.042311191992720654, |
| "grad_norm": 2.5116395183528146, |
| "learning_rate": 9.998271000003164e-07, |
| "loss": 0.117, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.042766151046405826, |
| "grad_norm": 1.3221909432162122, |
| "learning_rate": 9.998233210995568e-07, |
| "loss": 0.0752, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.04322111010009099, |
| "grad_norm": 8.422591179527869, |
| "learning_rate": 9.998195013554926e-07, |
| "loss": 0.1043, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.04367606915377616, |
| "grad_norm": 1.3172883326606952, |
| "learning_rate": 9.998156407684356e-07, |
| "loss": 0.0932, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.04413102820746133, |
| "grad_norm": 1.6768409465958336, |
| "learning_rate": 9.99811739338702e-07, |
| "loss": 0.0542, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.044585987261146494, |
| "grad_norm": 2.7309524458462535, |
| "learning_rate": 9.9980779706661e-07, |
| "loss": 0.0695, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.045040946314831666, |
| "grad_norm": 1.7815589902074003, |
| "learning_rate": 9.998038139524819e-07, |
| "loss": 0.0677, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.04549590536851683, |
| "grad_norm": 2.0846413148949043, |
| "learning_rate": 9.997997899966432e-07, |
| "loss": 0.0721, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.045950864422202004, |
| "grad_norm": 1.6681584962433291, |
| "learning_rate": 9.997957251994229e-07, |
| "loss": 0.092, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.04640582347588717, |
| "grad_norm": 2.0217998424011934, |
| "learning_rate": 9.99791619561153e-07, |
| "loss": 0.0785, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.04686078252957234, |
| "grad_norm": 1.31297777091851, |
| "learning_rate": 9.997874730821687e-07, |
| "loss": 0.062, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.047315741583257506, |
| "grad_norm": 28.84255784761873, |
| "learning_rate": 9.997832857628093e-07, |
| "loss": 0.2905, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.04777070063694268, |
| "grad_norm": 2.1953371485715953, |
| "learning_rate": 9.997790576034168e-07, |
| "loss": 0.1315, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.048225659690627844, |
| "grad_norm": 1.752579441726796, |
| "learning_rate": 9.997747886043366e-07, |
| "loss": 0.0592, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.04868061874431301, |
| "grad_norm": 1.214479974144816, |
| "learning_rate": 9.99770478765918e-07, |
| "loss": 0.0658, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.04913557779799818, |
| "grad_norm": 2.119879894581063, |
| "learning_rate": 9.997661280885123e-07, |
| "loss": 0.1249, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.049590536851683346, |
| "grad_norm": 1.6123956274563962, |
| "learning_rate": 9.997617365724757e-07, |
| "loss": 0.0721, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.05004549590536852, |
| "grad_norm": 3.8956481181984954, |
| "learning_rate": 9.997573042181672e-07, |
| "loss": 0.0668, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.050500454959053684, |
| "grad_norm": 1.302369323422075, |
| "learning_rate": 9.997528310259483e-07, |
| "loss": 0.0978, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.050955414012738856, |
| "grad_norm": 1.9393382827627617, |
| "learning_rate": 9.997483169961851e-07, |
| "loss": 0.0919, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.05141037306642402, |
| "grad_norm": 1.8924498716761984, |
| "learning_rate": 9.99743762129246e-07, |
| "loss": 0.0737, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.051865332120109194, |
| "grad_norm": 1.5015649842043453, |
| "learning_rate": 9.99739166425504e-07, |
| "loss": 0.0614, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.05232029117379436, |
| "grad_norm": 2.1366922787715863, |
| "learning_rate": 9.997345298853336e-07, |
| "loss": 0.1374, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.052775250227479524, |
| "grad_norm": 1.9167133713490603, |
| "learning_rate": 9.997298525091147e-07, |
| "loss": 0.0726, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.053230209281164696, |
| "grad_norm": 1.340457987244246, |
| "learning_rate": 9.997251342972286e-07, |
| "loss": 0.0685, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.05368516833484986, |
| "grad_norm": 1.6965958368742107, |
| "learning_rate": 9.997203752500615e-07, |
| "loss": 0.074, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.054140127388535034, |
| "grad_norm": 2.096296179848149, |
| "learning_rate": 9.99715575368002e-07, |
| "loss": 0.0716, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.0545950864422202, |
| "grad_norm": 1.2376773021772507, |
| "learning_rate": 9.997107346514424e-07, |
| "loss": 0.066, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.05505004549590537, |
| "grad_norm": 0.9612721709748118, |
| "learning_rate": 9.99705853100778e-07, |
| "loss": 0.0512, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.055505004549590536, |
| "grad_norm": 1.4181375311562314, |
| "learning_rate": 9.99700930716408e-07, |
| "loss": 0.0823, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.05595996360327571, |
| "grad_norm": 3.5180526458549797, |
| "learning_rate": 9.99695967498735e-07, |
| "loss": 0.0983, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.056414922656960874, |
| "grad_norm": 4.112860504161507, |
| "learning_rate": 9.996909634481637e-07, |
| "loss": 0.0561, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.05686988171064604, |
| "grad_norm": 28.236450106120056, |
| "learning_rate": 9.996859185651036e-07, |
| "loss": 0.2988, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.05732484076433121, |
| "grad_norm": 1.5902388588480536, |
| "learning_rate": 9.99680832849967e-07, |
| "loss": 0.0975, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.05777979981801638, |
| "grad_norm": 1.2347324615542328, |
| "learning_rate": 9.996757063031689e-07, |
| "loss": 0.0494, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.05823475887170155, |
| "grad_norm": 4.028509250415612, |
| "learning_rate": 9.996705389251287e-07, |
| "loss": 0.0937, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.058689717925386714, |
| "grad_norm": 1.771230906552479, |
| "learning_rate": 9.996653307162686e-07, |
| "loss": 0.0639, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.059144676979071886, |
| "grad_norm": 2.795754260594168, |
| "learning_rate": 9.996600816770142e-07, |
| "loss": 0.0768, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.05959963603275705, |
| "grad_norm": 1.876401549309021, |
| "learning_rate": 9.996547918077943e-07, |
| "loss": 0.0952, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.06005459508644222, |
| "grad_norm": 1.6308837105022365, |
| "learning_rate": 9.996494611090412e-07, |
| "loss": 0.0774, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.06050955414012739, |
| "grad_norm": 1.0100069577180617, |
| "learning_rate": 9.996440895811907e-07, |
| "loss": 0.0523, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.060964513193812554, |
| "grad_norm": 1.9655730360996575, |
| "learning_rate": 9.996386772246814e-07, |
| "loss": 0.0858, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.061419472247497726, |
| "grad_norm": 2.098545470677641, |
| "learning_rate": 9.996332240399558e-07, |
| "loss": 0.0791, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.06187443130118289, |
| "grad_norm": 10.955612559167871, |
| "learning_rate": 9.996277300274595e-07, |
| "loss": 0.1788, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.062329390354868064, |
| "grad_norm": 1.389590670783533, |
| "learning_rate": 9.996221951876414e-07, |
| "loss": 0.1015, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.06278434940855324, |
| "grad_norm": 1.75423879868311, |
| "learning_rate": 9.996166195209536e-07, |
| "loss": 0.1009, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.0632393084622384, |
| "grad_norm": 1.1611431350130335, |
| "learning_rate": 9.996110030278523e-07, |
| "loss": 0.0542, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.06369426751592357, |
| "grad_norm": 1.177330204385083, |
| "learning_rate": 9.996053457087956e-07, |
| "loss": 0.0678, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.06414922656960874, |
| "grad_norm": 1.1343819646499882, |
| "learning_rate": 9.995996475642466e-07, |
| "loss": 0.0735, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.0646041856232939, |
| "grad_norm": 1.6851312977878308, |
| "learning_rate": 9.995939085946703e-07, |
| "loss": 0.0627, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.06505914467697907, |
| "grad_norm": 1.5381425131907351, |
| "learning_rate": 9.995881288005362e-07, |
| "loss": 0.0713, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.06551410373066424, |
| "grad_norm": 2.1112756199827003, |
| "learning_rate": 9.995823081823161e-07, |
| "loss": 0.0619, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.06596906278434941, |
| "grad_norm": 1.4393207733449387, |
| "learning_rate": 9.99576446740486e-07, |
| "loss": 0.0779, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.06642402183803457, |
| "grad_norm": 4.030195148512648, |
| "learning_rate": 9.995705444755247e-07, |
| "loss": 0.1547, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.06687898089171974, |
| "grad_norm": 1.0198030200938744, |
| "learning_rate": 9.995646013879146e-07, |
| "loss": 0.0591, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.06733393994540492, |
| "grad_norm": 2.5309104773358615, |
| "learning_rate": 9.995586174781413e-07, |
| "loss": 0.0702, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.06778889899909009, |
| "grad_norm": 1.2810295777297256, |
| "learning_rate": 9.995525927466935e-07, |
| "loss": 0.0764, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.06824385805277525, |
| "grad_norm": 1.4301504427281813, |
| "learning_rate": 9.995465271940642e-07, |
| "loss": 0.0643, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.06869881710646042, |
| "grad_norm": 1.8711805426782187, |
| "learning_rate": 9.995404208207482e-07, |
| "loss": 0.0826, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.06915377616014559, |
| "grad_norm": 1.875735096442477, |
| "learning_rate": 9.99534273627245e-07, |
| "loss": 0.0949, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.06960873521383075, |
| "grad_norm": 3.96430833554185, |
| "learning_rate": 9.99528085614057e-07, |
| "loss": 0.1094, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.07006369426751592, |
| "grad_norm": 1.4251935695546751, |
| "learning_rate": 9.995218567816899e-07, |
| "loss": 0.0564, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.0705186533212011, |
| "grad_norm": 1.3176900708215529, |
| "learning_rate": 9.99515587130652e-07, |
| "loss": 0.0631, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.07097361237488627, |
| "grad_norm": 1.7624159071995484, |
| "learning_rate": 9.995092766614566e-07, |
| "loss": 0.0572, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.07142857142857142, |
| "grad_norm": 1.3925157957876113, |
| "learning_rate": 9.995029253746186e-07, |
| "loss": 0.0805, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.0718835304822566, |
| "grad_norm": 1.4376441428641062, |
| "learning_rate": 9.994965332706572e-07, |
| "loss": 0.0548, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.07233848953594177, |
| "grad_norm": 1.7813293579714622, |
| "learning_rate": 9.99490100350095e-07, |
| "loss": 0.0842, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.07279344858962693, |
| "grad_norm": 1.2919649617338766, |
| "learning_rate": 9.994836266134575e-07, |
| "loss": 0.0707, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0732484076433121, |
| "grad_norm": 1.464068687152245, |
| "learning_rate": 9.994771120612734e-07, |
| "loss": 0.0802, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.07370336669699727, |
| "grad_norm": 1.4409455272274696, |
| "learning_rate": 9.994705566940756e-07, |
| "loss": 0.0735, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.07415832575068244, |
| "grad_norm": 1.6004712311017868, |
| "learning_rate": 9.994639605123993e-07, |
| "loss": 0.0751, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.0746132848043676, |
| "grad_norm": 1.4178445048602115, |
| "learning_rate": 9.994573235167837e-07, |
| "loss": 0.0677, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.07506824385805277, |
| "grad_norm": 1.9143467099814084, |
| "learning_rate": 9.994506457077713e-07, |
| "loss": 0.0803, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.07552320291173795, |
| "grad_norm": 2.308950856245503, |
| "learning_rate": 9.994439270859077e-07, |
| "loss": 0.1114, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.07597816196542312, |
| "grad_norm": 1.4976553220218047, |
| "learning_rate": 9.994371676517416e-07, |
| "loss": 0.0648, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.07643312101910828, |
| "grad_norm": 1.3873054469654502, |
| "learning_rate": 9.994303674058258e-07, |
| "loss": 0.0656, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.07688808007279345, |
| "grad_norm": 1.4242003105924865, |
| "learning_rate": 9.994235263487158e-07, |
| "loss": 0.0696, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.07734303912647862, |
| "grad_norm": 1.5170054860626223, |
| "learning_rate": 9.994166444809704e-07, |
| "loss": 0.06, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.07779799818016378, |
| "grad_norm": 1.3198324267816783, |
| "learning_rate": 9.994097218031523e-07, |
| "loss": 0.0572, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.07825295723384895, |
| "grad_norm": 1.2355032396516417, |
| "learning_rate": 9.99402758315827e-07, |
| "loss": 0.0714, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.07870791628753412, |
| "grad_norm": 2.2363524490578994, |
| "learning_rate": 9.993957540195637e-07, |
| "loss": 0.069, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.0791628753412193, |
| "grad_norm": 1.1537045057849564, |
| "learning_rate": 9.993887089149345e-07, |
| "loss": 0.0781, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.07961783439490445, |
| "grad_norm": 1.1483158852396702, |
| "learning_rate": 9.993816230025151e-07, |
| "loss": 0.0586, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.08007279344858963, |
| "grad_norm": 2.060757875095848, |
| "learning_rate": 9.99374496282885e-07, |
| "loss": 0.084, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.0805277525022748, |
| "grad_norm": 2.745784074986501, |
| "learning_rate": 9.99367328756626e-07, |
| "loss": 0.1264, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.08098271155595996, |
| "grad_norm": 0.8765067657246923, |
| "learning_rate": 9.99360120424324e-07, |
| "loss": 0.0442, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.08143767060964513, |
| "grad_norm": 0.8649614951279925, |
| "learning_rate": 9.99352871286568e-07, |
| "loss": 0.0614, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.0818926296633303, |
| "grad_norm": 1.891800612550326, |
| "learning_rate": 9.993455813439506e-07, |
| "loss": 0.0612, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.08234758871701547, |
| "grad_norm": 1.2201801550256919, |
| "learning_rate": 9.993382505970671e-07, |
| "loss": 0.0491, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.08280254777070063, |
| "grad_norm": 1.3933852940059142, |
| "learning_rate": 9.99330879046517e-07, |
| "loss": 0.0621, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.0832575068243858, |
| "grad_norm": 1.0007322979585427, |
| "learning_rate": 9.993234666929023e-07, |
| "loss": 0.0512, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.08371246587807098, |
| "grad_norm": 1.5648385532765003, |
| "learning_rate": 9.993160135368288e-07, |
| "loss": 0.0641, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.08416742493175614, |
| "grad_norm": 1.1000106659426367, |
| "learning_rate": 9.993085195789055e-07, |
| "loss": 0.088, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.08462238398544131, |
| "grad_norm": 1.3353211888071153, |
| "learning_rate": 9.99300984819745e-07, |
| "loss": 0.0618, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.08507734303912648, |
| "grad_norm": 1.166219455861875, |
| "learning_rate": 9.992934092599627e-07, |
| "loss": 0.0615, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.08553230209281165, |
| "grad_norm": 1.2821420783611344, |
| "learning_rate": 9.99285792900178e-07, |
| "loss": 0.0902, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.08598726114649681, |
| "grad_norm": 1.4699402496364335, |
| "learning_rate": 9.99278135741013e-07, |
| "loss": 0.0965, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.08644222020018198, |
| "grad_norm": 1.2092187178175942, |
| "learning_rate": 9.992704377830933e-07, |
| "loss": 0.0495, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.08689717925386715, |
| "grad_norm": 2.4242211713295094, |
| "learning_rate": 9.99262699027048e-07, |
| "loss": 0.1044, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.08735213830755233, |
| "grad_norm": 1.4912711552359401, |
| "learning_rate": 9.9925491947351e-07, |
| "loss": 0.0803, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.08780709736123748, |
| "grad_norm": 0.8264184113688682, |
| "learning_rate": 9.992470991231143e-07, |
| "loss": 0.044, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.08826205641492266, |
| "grad_norm": 1.5856229366100132, |
| "learning_rate": 9.992392379765003e-07, |
| "loss": 0.0846, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.08871701546860783, |
| "grad_norm": 1.711195707943432, |
| "learning_rate": 9.992313360343104e-07, |
| "loss": 0.0953, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.08917197452229299, |
| "grad_norm": 1.4375057634941018, |
| "learning_rate": 9.992233932971901e-07, |
| "loss": 0.0598, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.08962693357597816, |
| "grad_norm": 1.9214247088463914, |
| "learning_rate": 9.992154097657887e-07, |
| "loss": 0.0844, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.09008189262966333, |
| "grad_norm": 1.5260802768094281, |
| "learning_rate": 9.992073854407584e-07, |
| "loss": 0.0821, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.0905368516833485, |
| "grad_norm": 4.544205646306275, |
| "learning_rate": 9.991993203227549e-07, |
| "loss": 0.0774, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.09099181073703366, |
| "grad_norm": 0.9784468610737326, |
| "learning_rate": 9.991912144124373e-07, |
| "loss": 0.0665, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09144676979071883, |
| "grad_norm": 1.533362131047682, |
| "learning_rate": 9.991830677104681e-07, |
| "loss": 0.094, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.09190172884440401, |
| "grad_norm": 1.4442948304988266, |
| "learning_rate": 9.991748802175128e-07, |
| "loss": 0.0679, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.09235668789808917, |
| "grad_norm": 1.778353868301846, |
| "learning_rate": 9.991666519342406e-07, |
| "loss": 0.0802, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.09281164695177434, |
| "grad_norm": 1.4080886395933547, |
| "learning_rate": 9.991583828613238e-07, |
| "loss": 0.0714, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.09326660600545951, |
| "grad_norm": 1.7407833171061422, |
| "learning_rate": 9.991500729994381e-07, |
| "loss": 0.0708, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.09372156505914468, |
| "grad_norm": 1.309202587334328, |
| "learning_rate": 9.991417223492626e-07, |
| "loss": 0.061, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.09417652411282984, |
| "grad_norm": 1.700325090710129, |
| "learning_rate": 9.991333309114798e-07, |
| "loss": 0.0765, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.09463148316651501, |
| "grad_norm": 1.279994579409623, |
| "learning_rate": 9.991248986867752e-07, |
| "loss": 0.0764, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.09508644222020018, |
| "grad_norm": 2.1446370224541944, |
| "learning_rate": 9.991164256758377e-07, |
| "loss": 0.0849, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.09554140127388536, |
| "grad_norm": 1.9861669945110423, |
| "learning_rate": 9.991079118793599e-07, |
| "loss": 0.1085, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.09599636032757052, |
| "grad_norm": 1.2727945113244215, |
| "learning_rate": 9.990993572980376e-07, |
| "loss": 0.0604, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.09645131938125569, |
| "grad_norm": 1.5167661565264423, |
| "learning_rate": 9.990907619325699e-07, |
| "loss": 0.0764, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.09690627843494086, |
| "grad_norm": 1.8297145638902679, |
| "learning_rate": 9.990821257836587e-07, |
| "loss": 0.0645, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.09736123748862602, |
| "grad_norm": 1.5890455937330983, |
| "learning_rate": 9.990734488520102e-07, |
| "loss": 0.0921, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.09781619654231119, |
| "grad_norm": 1.1160881756443122, |
| "learning_rate": 9.990647311383334e-07, |
| "loss": 0.0438, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.09827115559599636, |
| "grad_norm": 2.4418827554895692, |
| "learning_rate": 9.990559726433402e-07, |
| "loss": 0.0815, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.09872611464968153, |
| "grad_norm": 1.1378394657905113, |
| "learning_rate": 9.990471733677468e-07, |
| "loss": 0.0533, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.09918107370336669, |
| "grad_norm": 1.0552640743236452, |
| "learning_rate": 9.99038333312272e-07, |
| "loss": 0.059, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.09963603275705187, |
| "grad_norm": 0.9353344080834061, |
| "learning_rate": 9.990294524776383e-07, |
| "loss": 0.0453, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.10009099181073704, |
| "grad_norm": 1.157580669698293, |
| "learning_rate": 9.990205308645714e-07, |
| "loss": 0.0656, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1005459508644222, |
| "grad_norm": 0.97129693549583, |
| "learning_rate": 9.990115684738003e-07, |
| "loss": 0.0409, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.10100090991810737, |
| "grad_norm": 1.114729616865196, |
| "learning_rate": 9.990025653060571e-07, |
| "loss": 0.0678, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.10145586897179254, |
| "grad_norm": 1.6357787684705223, |
| "learning_rate": 9.98993521362078e-07, |
| "loss": 0.0982, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.10191082802547771, |
| "grad_norm": 1.7357276362378158, |
| "learning_rate": 9.989844366426017e-07, |
| "loss": 0.1056, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.10236578707916287, |
| "grad_norm": 0.8966549628660352, |
| "learning_rate": 9.989753111483705e-07, |
| "loss": 0.0379, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.10282074613284804, |
| "grad_norm": 1.475329265238904, |
| "learning_rate": 9.989661448801302e-07, |
| "loss": 0.0826, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.10327570518653321, |
| "grad_norm": 1.078938369819215, |
| "learning_rate": 9.989569378386302e-07, |
| "loss": 0.0632, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.10373066424021839, |
| "grad_norm": 0.8607431700980628, |
| "learning_rate": 9.989476900246223e-07, |
| "loss": 0.0481, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.10418562329390355, |
| "grad_norm": 1.5366919549219145, |
| "learning_rate": 9.989384014388623e-07, |
| "loss": 0.0824, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.10464058234758872, |
| "grad_norm": 0.9856687411956576, |
| "learning_rate": 9.989290720821093e-07, |
| "loss": 0.0602, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.10509554140127389, |
| "grad_norm": 1.15791640911629, |
| "learning_rate": 9.989197019551259e-07, |
| "loss": 0.0636, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.10555050045495905, |
| "grad_norm": 1.109842184462552, |
| "learning_rate": 9.989102910586774e-07, |
| "loss": 0.0538, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.10600545950864422, |
| "grad_norm": 1.2643069571576309, |
| "learning_rate": 9.98900839393533e-07, |
| "loss": 0.0667, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.10646041856232939, |
| "grad_norm": 1.884418925771264, |
| "learning_rate": 9.988913469604649e-07, |
| "loss": 0.0643, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.10691537761601456, |
| "grad_norm": 0.8160882482716078, |
| "learning_rate": 9.988818137602492e-07, |
| "loss": 0.056, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.10737033666969972, |
| "grad_norm": 0.9647512675410035, |
| "learning_rate": 9.988722397936644e-07, |
| "loss": 0.0644, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.1078252957233849, |
| "grad_norm": 1.4831200436529093, |
| "learning_rate": 9.98862625061493e-07, |
| "loss": 0.0704, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.10828025477707007, |
| "grad_norm": 1.1363581545946497, |
| "learning_rate": 9.988529695645208e-07, |
| "loss": 0.0594, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.10873521383075523, |
| "grad_norm": 1.0825887558294587, |
| "learning_rate": 9.988432733035368e-07, |
| "loss": 0.0425, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.1091901728844404, |
| "grad_norm": 1.0742908192322842, |
| "learning_rate": 9.988335362793332e-07, |
| "loss": 0.0612, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.10964513193812557, |
| "grad_norm": 1.4490316756613832, |
| "learning_rate": 9.988237584927058e-07, |
| "loss": 0.0852, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.11010009099181074, |
| "grad_norm": 1.7627374272466951, |
| "learning_rate": 9.988139399444533e-07, |
| "loss": 0.1052, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.1105550500454959, |
| "grad_norm": 1.2720322330503708, |
| "learning_rate": 9.988040806353785e-07, |
| "loss": 0.0749, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.11101000909918107, |
| "grad_norm": 1.3864180916313802, |
| "learning_rate": 9.987941805662868e-07, |
| "loss": 0.0875, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.11146496815286625, |
| "grad_norm": 1.7655726368367308, |
| "learning_rate": 9.98784239737987e-07, |
| "loss": 0.0517, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.11191992720655142, |
| "grad_norm": 1.1316272357044634, |
| "learning_rate": 9.987742581512916e-07, |
| "loss": 0.0507, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.11237488626023658, |
| "grad_norm": 1.002032807340167, |
| "learning_rate": 9.987642358070166e-07, |
| "loss": 0.0474, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.11282984531392175, |
| "grad_norm": 1.2909143562499068, |
| "learning_rate": 9.987541727059802e-07, |
| "loss": 0.0797, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.11328480436760692, |
| "grad_norm": 1.261509337485699, |
| "learning_rate": 9.987440688490057e-07, |
| "loss": 0.0581, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.11373976342129208, |
| "grad_norm": 1.4433807092243254, |
| "learning_rate": 9.987339242369178e-07, |
| "loss": 0.0648, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.11419472247497725, |
| "grad_norm": 1.1641001365141075, |
| "learning_rate": 9.98723738870546e-07, |
| "loss": 0.0536, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.11464968152866242, |
| "grad_norm": 1.463192110680986, |
| "learning_rate": 9.987135127507225e-07, |
| "loss": 0.0687, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.1151046405823476, |
| "grad_norm": 1.5589009929123119, |
| "learning_rate": 9.987032458782826e-07, |
| "loss": 0.0809, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.11555959963603275, |
| "grad_norm": 0.8981801314196295, |
| "learning_rate": 9.986929382540661e-07, |
| "loss": 0.0377, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.11601455868971793, |
| "grad_norm": 1.3046134929926776, |
| "learning_rate": 9.986825898789143e-07, |
| "loss": 0.0584, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.1164695177434031, |
| "grad_norm": 1.1968271902775738, |
| "learning_rate": 9.986722007536736e-07, |
| "loss": 0.0604, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.11692447679708826, |
| "grad_norm": 0.9256739589207105, |
| "learning_rate": 9.986617708791926e-07, |
| "loss": 0.0563, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.11737943585077343, |
| "grad_norm": 1.0562236765098105, |
| "learning_rate": 9.986513002563234e-07, |
| "loss": 0.0441, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.1178343949044586, |
| "grad_norm": 1.1396797838942825, |
| "learning_rate": 9.986407888859221e-07, |
| "loss": 0.0779, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.11828935395814377, |
| "grad_norm": 0.9511338510504711, |
| "learning_rate": 9.986302367688472e-07, |
| "loss": 0.0479, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.11874431301182893, |
| "grad_norm": 1.2626792340501498, |
| "learning_rate": 9.986196439059613e-07, |
| "loss": 0.054, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.1191992720655141, |
| "grad_norm": 1.2414582106094108, |
| "learning_rate": 9.986090102981297e-07, |
| "loss": 0.0637, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.11965423111919928, |
| "grad_norm": 1.3386637457925286, |
| "learning_rate": 9.985983359462214e-07, |
| "loss": 0.0559, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.12010919017288443, |
| "grad_norm": 1.5528857942460832, |
| "learning_rate": 9.98587620851109e-07, |
| "loss": 0.1111, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.1205641492265696, |
| "grad_norm": 1.0084557380928558, |
| "learning_rate": 9.985768650136676e-07, |
| "loss": 0.0528, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.12101910828025478, |
| "grad_norm": 0.9836579322808242, |
| "learning_rate": 9.985660684347765e-07, |
| "loss": 0.0723, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.12147406733393995, |
| "grad_norm": 1.651136541287689, |
| "learning_rate": 9.985552311153176e-07, |
| "loss": 0.0901, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.12192902638762511, |
| "grad_norm": 0.99646358827593, |
| "learning_rate": 9.985443530561768e-07, |
| "loss": 0.077, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.12238398544131028, |
| "grad_norm": 1.6243418259246538, |
| "learning_rate": 9.98533434258243e-07, |
| "loss": 0.0919, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.12283894449499545, |
| "grad_norm": 1.0467582706654044, |
| "learning_rate": 9.985224747224082e-07, |
| "loss": 0.0484, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.12329390354868063, |
| "grad_norm": 1.736810556376589, |
| "learning_rate": 9.98511474449568e-07, |
| "loss": 0.0753, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.12374886260236578, |
| "grad_norm": 1.0004729483670864, |
| "learning_rate": 9.985004334406213e-07, |
| "loss": 0.0596, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.12420382165605096, |
| "grad_norm": 1.2151561323315139, |
| "learning_rate": 9.984893516964706e-07, |
| "loss": 0.0582, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.12465878070973613, |
| "grad_norm": 1.828699579113966, |
| "learning_rate": 9.98478229218021e-07, |
| "loss": 0.1102, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.1251137397634213, |
| "grad_norm": 0.9077132204761329, |
| "learning_rate": 9.984670660061819e-07, |
| "loss": 0.0481, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.12556869881710647, |
| "grad_norm": 0.9423342802512885, |
| "learning_rate": 9.98455862061865e-07, |
| "loss": 0.0767, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.12602365787079162, |
| "grad_norm": 1.6287766199301192, |
| "learning_rate": 9.984446173859861e-07, |
| "loss": 0.1155, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.1264786169244768, |
| "grad_norm": 1.0561750039623734, |
| "learning_rate": 9.98433331979464e-07, |
| "loss": 0.0595, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.12693357597816196, |
| "grad_norm": 0.972781180486093, |
| "learning_rate": 9.98422005843221e-07, |
| "loss": 0.0484, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.12738853503184713, |
| "grad_norm": 1.1854391526869976, |
| "learning_rate": 9.984106389781825e-07, |
| "loss": 0.0482, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.1278434940855323, |
| "grad_norm": 1.3197396977134914, |
| "learning_rate": 9.983992313852773e-07, |
| "loss": 0.0623, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.12829845313921748, |
| "grad_norm": 1.6362190624757336, |
| "learning_rate": 9.983877830654378e-07, |
| "loss": 0.0715, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.12875341219290265, |
| "grad_norm": 0.9077240765741056, |
| "learning_rate": 9.983762940195995e-07, |
| "loss": 0.0631, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.1292083712465878, |
| "grad_norm": 1.1421219589647587, |
| "learning_rate": 9.983647642487009e-07, |
| "loss": 0.0583, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.12966333030027297, |
| "grad_norm": 1.8376550060739605, |
| "learning_rate": 9.983531937536844e-07, |
| "loss": 0.0801, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.13011828935395814, |
| "grad_norm": 1.2064252363743322, |
| "learning_rate": 9.983415825354954e-07, |
| "loss": 0.0479, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.1305732484076433, |
| "grad_norm": 1.3522899893414668, |
| "learning_rate": 9.983299305950828e-07, |
| "loss": 0.0533, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.13102820746132848, |
| "grad_norm": 0.8486243193478907, |
| "learning_rate": 9.983182379333988e-07, |
| "loss": 0.0466, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.13148316651501366, |
| "grad_norm": 1.1699982302380614, |
| "learning_rate": 9.983065045513985e-07, |
| "loss": 0.0623, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.13193812556869883, |
| "grad_norm": 0.8620933594779188, |
| "learning_rate": 9.982947304500413e-07, |
| "loss": 0.0448, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.13239308462238397, |
| "grad_norm": 0.9334314458594886, |
| "learning_rate": 9.982829156302889e-07, |
| "loss": 0.0525, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.13284804367606914, |
| "grad_norm": 2.1870474292658284, |
| "learning_rate": 9.982710600931068e-07, |
| "loss": 0.0999, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.13330300272975432, |
| "grad_norm": 1.206782826393217, |
| "learning_rate": 9.982591638394639e-07, |
| "loss": 0.0581, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.1337579617834395, |
| "grad_norm": 0.9252727575790715, |
| "learning_rate": 9.98247226870332e-07, |
| "loss": 0.0421, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.13421292083712466, |
| "grad_norm": 1.1920898160478353, |
| "learning_rate": 9.982352491866872e-07, |
| "loss": 0.0568, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.13466787989080983, |
| "grad_norm": 0.9875857130572099, |
| "learning_rate": 9.982232307895076e-07, |
| "loss": 0.0472, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.135122838944495, |
| "grad_norm": 2.2221045162643143, |
| "learning_rate": 9.982111716797757e-07, |
| "loss": 0.0822, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.13557779799818018, |
| "grad_norm": 1.4938459925674554, |
| "learning_rate": 9.981990718584767e-07, |
| "loss": 0.0852, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.13603275705186532, |
| "grad_norm": 1.1066809951614336, |
| "learning_rate": 9.981869313265993e-07, |
| "loss": 0.0703, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.1364877161055505, |
| "grad_norm": 1.6923355030264176, |
| "learning_rate": 9.981747500851356e-07, |
| "loss": 0.0553, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.13694267515923567, |
| "grad_norm": 1.171335220808776, |
| "learning_rate": 9.981625281350813e-07, |
| "loss": 0.0586, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.13739763421292084, |
| "grad_norm": 1.070896005930655, |
| "learning_rate": 9.981502654774347e-07, |
| "loss": 0.0444, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.137852593266606, |
| "grad_norm": 1.1728237717715497, |
| "learning_rate": 9.98137962113198e-07, |
| "loss": 0.0618, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.13830755232029118, |
| "grad_norm": 1.1905967346525261, |
| "learning_rate": 9.981256180433768e-07, |
| "loss": 0.0675, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.13876251137397635, |
| "grad_norm": 0.8684976727617932, |
| "learning_rate": 9.981132332689794e-07, |
| "loss": 0.0466, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.1392174704276615, |
| "grad_norm": 1.036935632301667, |
| "learning_rate": 9.981008077910183e-07, |
| "loss": 0.0579, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.13967242948134667, |
| "grad_norm": 1.5316413035476537, |
| "learning_rate": 9.980883416105083e-07, |
| "loss": 0.0588, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.14012738853503184, |
| "grad_norm": 0.831351740285057, |
| "learning_rate": 9.980758347284685e-07, |
| "loss": 0.0477, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.14058234758871702, |
| "grad_norm": 1.1114213029102946, |
| "learning_rate": 9.980632871459208e-07, |
| "loss": 0.056, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.1410373066424022, |
| "grad_norm": 0.9174179487762331, |
| "learning_rate": 9.980506988638905e-07, |
| "loss": 0.0689, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.14149226569608736, |
| "grad_norm": 0.8932952501778931, |
| "learning_rate": 9.980380698834064e-07, |
| "loss": 0.0594, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.14194722474977253, |
| "grad_norm": 1.2719258167198786, |
| "learning_rate": 9.980254002055e-07, |
| "loss": 0.0588, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.14240218380345768, |
| "grad_norm": 0.9786921183743694, |
| "learning_rate": 9.980126898312072e-07, |
| "loss": 0.0457, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 1.2371622871220518, |
| "learning_rate": 9.979999387615663e-07, |
| "loss": 0.0671, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.14331210191082802, |
| "grad_norm": 1.470791435721143, |
| "learning_rate": 9.979871469976195e-07, |
| "loss": 0.0698, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.1437670609645132, |
| "grad_norm": 1.7240976919181443, |
| "learning_rate": 9.97974314540412e-07, |
| "loss": 0.0601, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.14422202001819837, |
| "grad_norm": 1.977436972779476, |
| "learning_rate": 9.979614413909921e-07, |
| "loss": 0.0926, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.14467697907188354, |
| "grad_norm": 0.6968374388220965, |
| "learning_rate": 9.979485275504121e-07, |
| "loss": 0.0407, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.1451319381255687, |
| "grad_norm": 1.973909641193579, |
| "learning_rate": 9.97935573019727e-07, |
| "loss": 0.0893, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.14558689717925385, |
| "grad_norm": 0.9393128304458204, |
| "learning_rate": 9.979225777999956e-07, |
| "loss": 0.037, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.14604185623293903, |
| "grad_norm": 1.3609794979063135, |
| "learning_rate": 9.979095418922797e-07, |
| "loss": 0.0582, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.1464968152866242, |
| "grad_norm": 0.8197293804224054, |
| "learning_rate": 9.978964652976446e-07, |
| "loss": 0.0426, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.14695177434030937, |
| "grad_norm": 1.158997352707507, |
| "learning_rate": 9.978833480171591e-07, |
| "loss": 0.0645, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.14740673339399454, |
| "grad_norm": 1.3440812732849405, |
| "learning_rate": 9.978701900518946e-07, |
| "loss": 0.0516, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.14786169244767972, |
| "grad_norm": 1.237216388178485, |
| "learning_rate": 9.978569914029265e-07, |
| "loss": 0.0744, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.1483166515013649, |
| "grad_norm": 1.2020225514578025, |
| "learning_rate": 9.978437520713334e-07, |
| "loss": 0.0648, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.14877161055505003, |
| "grad_norm": 1.707463401339971, |
| "learning_rate": 9.97830472058197e-07, |
| "loss": 0.0659, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.1492265696087352, |
| "grad_norm": 1.5089520418583864, |
| "learning_rate": 9.97817151364603e-07, |
| "loss": 0.0873, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.14968152866242038, |
| "grad_norm": 1.1521285274825894, |
| "learning_rate": 9.978037899916391e-07, |
| "loss": 0.0675, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.15013648771610555, |
| "grad_norm": 1.2580161193848187, |
| "learning_rate": 9.977903879403978e-07, |
| "loss": 0.041, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.15059144676979072, |
| "grad_norm": 1.6179915778926504, |
| "learning_rate": 9.97776945211974e-07, |
| "loss": 0.0642, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.1510464058234759, |
| "grad_norm": 2.0103740445789726, |
| "learning_rate": 9.97763461807466e-07, |
| "loss": 0.0653, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.15150136487716107, |
| "grad_norm": 1.7810437636409457, |
| "learning_rate": 9.977499377279759e-07, |
| "loss": 0.0782, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.15195632393084624, |
| "grad_norm": 1.2715761230366713, |
| "learning_rate": 9.977363729746086e-07, |
| "loss": 0.0638, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.15241128298453138, |
| "grad_norm": 1.0301107381047787, |
| "learning_rate": 9.977227675484728e-07, |
| "loss": 0.0489, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.15286624203821655, |
| "grad_norm": 1.559853805666478, |
| "learning_rate": 9.977091214506801e-07, |
| "loss": 0.0685, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.15332120109190173, |
| "grad_norm": 1.9357378638541771, |
| "learning_rate": 9.976954346823455e-07, |
| "loss": 0.0706, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.1537761601455869, |
| "grad_norm": 0.9386653307796838, |
| "learning_rate": 9.976817072445876e-07, |
| "loss": 0.0384, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.15423111919927207, |
| "grad_norm": 1.1849304842409263, |
| "learning_rate": 9.976679391385283e-07, |
| "loss": 0.0544, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.15468607825295724, |
| "grad_norm": 0.9794514859133302, |
| "learning_rate": 9.976541303652921e-07, |
| "loss": 0.0395, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.15514103730664242, |
| "grad_norm": 1.522980464711304, |
| "learning_rate": 9.97640280926008e-07, |
| "loss": 0.0553, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.15559599636032756, |
| "grad_norm": 1.114897917471998, |
| "learning_rate": 9.976263908218075e-07, |
| "loss": 0.0562, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.15605095541401273, |
| "grad_norm": 1.4106813797280966, |
| "learning_rate": 9.976124600538254e-07, |
| "loss": 0.0842, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.1565059144676979, |
| "grad_norm": 0.9765199742846444, |
| "learning_rate": 9.975984886232005e-07, |
| "loss": 0.0604, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.15696087352138308, |
| "grad_norm": 1.7134665109938934, |
| "learning_rate": 9.975844765310742e-07, |
| "loss": 0.065, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.15741583257506825, |
| "grad_norm": 1.6486501390150328, |
| "learning_rate": 9.975704237785914e-07, |
| "loss": 0.0981, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.15787079162875342, |
| "grad_norm": 1.3598544586513228, |
| "learning_rate": 9.975563303669005e-07, |
| "loss": 0.074, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.1583257506824386, |
| "grad_norm": 1.3859738189047066, |
| "learning_rate": 9.975421962971535e-07, |
| "loss": 0.0771, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.15878070973612374, |
| "grad_norm": 1.3273068228762428, |
| "learning_rate": 9.97528021570505e-07, |
| "loss": 0.0891, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.1592356687898089, |
| "grad_norm": 1.008603335566121, |
| "learning_rate": 9.975138061881134e-07, |
| "loss": 0.0501, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.15969062784349408, |
| "grad_norm": 1.2683219501430032, |
| "learning_rate": 9.974995501511404e-07, |
| "loss": 0.0436, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.16014558689717925, |
| "grad_norm": 1.7861764054006601, |
| "learning_rate": 9.974852534607505e-07, |
| "loss": 0.0792, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.16060054595086443, |
| "grad_norm": 1.1324807753830382, |
| "learning_rate": 9.974709161181125e-07, |
| "loss": 0.067, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.1610555050045496, |
| "grad_norm": 1.4672040252585383, |
| "learning_rate": 9.97456538124398e-07, |
| "loss": 0.0708, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.16151046405823477, |
| "grad_norm": 0.9934827912617749, |
| "learning_rate": 9.974421194807814e-07, |
| "loss": 0.0564, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.16196542311191992, |
| "grad_norm": 1.414051763184749, |
| "learning_rate": 9.974276601884415e-07, |
| "loss": 0.0583, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.1624203821656051, |
| "grad_norm": 1.0382802063632235, |
| "learning_rate": 9.974131602485593e-07, |
| "loss": 0.0487, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.16287534121929026, |
| "grad_norm": 1.2586454862238812, |
| "learning_rate": 9.9739861966232e-07, |
| "loss": 0.0673, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.16333030027297543, |
| "grad_norm": 1.6838254742760412, |
| "learning_rate": 9.97384038430912e-07, |
| "loss": 0.0581, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.1637852593266606, |
| "grad_norm": 0.8399174068109083, |
| "learning_rate": 9.973694165555263e-07, |
| "loss": 0.0466, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.16424021838034578, |
| "grad_norm": 1.69300928648134, |
| "learning_rate": 9.973547540373581e-07, |
| "loss": 0.0665, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.16469517743403095, |
| "grad_norm": 1.3262644115821105, |
| "learning_rate": 9.973400508776053e-07, |
| "loss": 0.0782, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.1651501364877161, |
| "grad_norm": 0.8615488987713292, |
| "learning_rate": 9.973253070774696e-07, |
| "loss": 0.0457, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.16560509554140126, |
| "grad_norm": 1.4429396631470555, |
| "learning_rate": 9.973105226381557e-07, |
| "loss": 0.0573, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.16606005459508644, |
| "grad_norm": 1.2208505509003302, |
| "learning_rate": 9.972956975608717e-07, |
| "loss": 0.0775, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.1665150136487716, |
| "grad_norm": 1.5660572502770571, |
| "learning_rate": 9.97280831846829e-07, |
| "loss": 0.0716, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.16696997270245678, |
| "grad_norm": 1.1700776292958537, |
| "learning_rate": 9.972659254972426e-07, |
| "loss": 0.0452, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.16742493175614195, |
| "grad_norm": 1.5529551923189542, |
| "learning_rate": 9.972509785133304e-07, |
| "loss": 0.0757, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.16787989080982713, |
| "grad_norm": 1.0967911703334838, |
| "learning_rate": 9.972359908963134e-07, |
| "loss": 0.0479, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.16833484986351227, |
| "grad_norm": 1.121195555833821, |
| "learning_rate": 9.972209626474171e-07, |
| "loss": 0.059, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.16878980891719744, |
| "grad_norm": 1.0602851512538802, |
| "learning_rate": 9.97205893767869e-07, |
| "loss": 0.0472, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.16924476797088261, |
| "grad_norm": 1.6258552400713955, |
| "learning_rate": 9.97190784258901e-07, |
| "loss": 0.0836, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.1696997270245678, |
| "grad_norm": 1.1061834571332927, |
| "learning_rate": 9.97175634121747e-07, |
| "loss": 0.0409, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.17015468607825296, |
| "grad_norm": 1.2773941061251708, |
| "learning_rate": 9.971604433576454e-07, |
| "loss": 0.0481, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.17060964513193813, |
| "grad_norm": 1.2094234552253622, |
| "learning_rate": 9.971452119678378e-07, |
| "loss": 0.0617, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.1710646041856233, |
| "grad_norm": 1.3606176451795315, |
| "learning_rate": 9.971299399535683e-07, |
| "loss": 0.04, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.17151956323930848, |
| "grad_norm": 0.961589378037891, |
| "learning_rate": 9.971146273160853e-07, |
| "loss": 0.0518, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.17197452229299362, |
| "grad_norm": 0.9551141213830807, |
| "learning_rate": 9.970992740566397e-07, |
| "loss": 0.0435, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.1724294813466788, |
| "grad_norm": 1.1061327916265118, |
| "learning_rate": 9.970838801764864e-07, |
| "loss": 0.0684, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.17288444040036396, |
| "grad_norm": 1.7551193923627544, |
| "learning_rate": 9.970684456768834e-07, |
| "loss": 0.0923, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.17333939945404914, |
| "grad_norm": 61.55739208115707, |
| "learning_rate": 9.970529705590917e-07, |
| "loss": 0.7199, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.1737943585077343, |
| "grad_norm": 1.062292372101319, |
| "learning_rate": 9.97037454824376e-07, |
| "loss": 0.0599, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.17424931756141948, |
| "grad_norm": 2.182495657030285, |
| "learning_rate": 9.970218984740038e-07, |
| "loss": 0.0867, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.17470427661510465, |
| "grad_norm": 1.2018828615521753, |
| "learning_rate": 9.970063015092466e-07, |
| "loss": 0.0575, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.1751592356687898, |
| "grad_norm": 2.090124221916724, |
| "learning_rate": 9.96990663931379e-07, |
| "loss": 0.0891, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.17561419472247497, |
| "grad_norm": 1.5328987798385059, |
| "learning_rate": 9.969749857416787e-07, |
| "loss": 0.0568, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.17606915377616014, |
| "grad_norm": 1.6062714136358844, |
| "learning_rate": 9.969592669414271e-07, |
| "loss": 0.0754, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.17652411282984531, |
| "grad_norm": 61.447015835286706, |
| "learning_rate": 9.969435075319082e-07, |
| "loss": 0.3728, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.1769790718835305, |
| "grad_norm": 1.196371461247331, |
| "learning_rate": 9.969277075144103e-07, |
| "loss": 0.0339, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.17743403093721566, |
| "grad_norm": 1.0547191705603283, |
| "learning_rate": 9.96911866890224e-07, |
| "loss": 0.052, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.17788898999090083, |
| "grad_norm": 1.636068604244968, |
| "learning_rate": 9.96895985660644e-07, |
| "loss": 0.0476, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.17834394904458598, |
| "grad_norm": 1.1179771422167613, |
| "learning_rate": 9.968800638269681e-07, |
| "loss": 0.0485, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.17879890809827115, |
| "grad_norm": 1.8062730281108257, |
| "learning_rate": 9.968641013904973e-07, |
| "loss": 0.101, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.17925386715195632, |
| "grad_norm": 1.3090872450284912, |
| "learning_rate": 9.968480983525359e-07, |
| "loss": 0.0726, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.1797088262056415, |
| "grad_norm": 1.0383883885799372, |
| "learning_rate": 9.968320547143916e-07, |
| "loss": 0.0506, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.18016378525932666, |
| "grad_norm": 1.2807828034028284, |
| "learning_rate": 9.968159704773755e-07, |
| "loss": 0.0743, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.18061874431301184, |
| "grad_norm": 0.9611825955304264, |
| "learning_rate": 9.96799845642802e-07, |
| "loss": 0.0482, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.181073703366697, |
| "grad_norm": 1.0411952429933702, |
| "learning_rate": 9.967836802119884e-07, |
| "loss": 0.0498, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.18152866242038215, |
| "grad_norm": 1.222101450416741, |
| "learning_rate": 9.96767474186256e-07, |
| "loss": 0.0789, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.18198362147406733, |
| "grad_norm": 1.4322600827316334, |
| "learning_rate": 9.967512275669292e-07, |
| "loss": 0.106, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1824385805277525, |
| "grad_norm": 1.2829081018513966, |
| "learning_rate": 9.967349403553352e-07, |
| "loss": 0.063, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.18289353958143767, |
| "grad_norm": 1.200933243638394, |
| "learning_rate": 9.967186125528051e-07, |
| "loss": 0.0743, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.18334849863512284, |
| "grad_norm": 1.8295483448302041, |
| "learning_rate": 9.967022441606733e-07, |
| "loss": 0.0874, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.18380345768880801, |
| "grad_norm": 1.0306589085176303, |
| "learning_rate": 9.96685835180277e-07, |
| "loss": 0.0577, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.1842584167424932, |
| "grad_norm": 0.9874463007020853, |
| "learning_rate": 9.966693856129574e-07, |
| "loss": 0.047, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.18471337579617833, |
| "grad_norm": 1.1366418912620115, |
| "learning_rate": 9.966528954600586e-07, |
| "loss": 0.0622, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.1851683348498635, |
| "grad_norm": 0.8680772630500574, |
| "learning_rate": 9.96636364722928e-07, |
| "loss": 0.0548, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.18562329390354868, |
| "grad_norm": 1.3804602960080863, |
| "learning_rate": 9.966197934029165e-07, |
| "loss": 0.0683, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.18607825295723385, |
| "grad_norm": 1.59662534548154, |
| "learning_rate": 9.96603181501378e-07, |
| "loss": 0.0675, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.18653321201091902, |
| "grad_norm": 1.3299660068409016, |
| "learning_rate": 9.965865290196703e-07, |
| "loss": 0.0663, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.1869881710646042, |
| "grad_norm": 2.671540218875107, |
| "learning_rate": 9.96569835959154e-07, |
| "loss": 0.0553, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.18744313011828936, |
| "grad_norm": 1.0571773866876972, |
| "learning_rate": 9.96553102321193e-07, |
| "loss": 0.0578, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.18789808917197454, |
| "grad_norm": 1.0398857538940627, |
| "learning_rate": 9.965363281071551e-07, |
| "loss": 0.0668, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.18835304822565968, |
| "grad_norm": 1.4312019555605475, |
| "learning_rate": 9.965195133184108e-07, |
| "loss": 0.0369, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.18880800727934485, |
| "grad_norm": 1.5897636214571498, |
| "learning_rate": 9.96502657956334e-07, |
| "loss": 0.0818, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.18926296633303002, |
| "grad_norm": 1.1532024813728672, |
| "learning_rate": 9.964857620223023e-07, |
| "loss": 0.0596, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.1897179253867152, |
| "grad_norm": 0.9518278091196013, |
| "learning_rate": 9.964688255176962e-07, |
| "loss": 0.05, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.19017288444040037, |
| "grad_norm": 1.5471894337999064, |
| "learning_rate": 9.964518484438998e-07, |
| "loss": 0.0563, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.19062784349408554, |
| "grad_norm": 1.3042951137857424, |
| "learning_rate": 9.964348308023001e-07, |
| "loss": 0.0458, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.1910828025477707, |
| "grad_norm": 1.089779462541599, |
| "learning_rate": 9.96417772594288e-07, |
| "loss": 0.0614, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.19153776160145586, |
| "grad_norm": 1.4054118320593119, |
| "learning_rate": 9.964006738212574e-07, |
| "loss": 0.0679, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.19199272065514103, |
| "grad_norm": 1.2982954130784632, |
| "learning_rate": 9.963835344846054e-07, |
| "loss": 0.0381, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.1924476797088262, |
| "grad_norm": 2.01173969090651, |
| "learning_rate": 9.963663545857326e-07, |
| "loss": 0.0528, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.19290263876251137, |
| "grad_norm": 1.9618141972515633, |
| "learning_rate": 9.96349134126043e-07, |
| "loss": 0.0838, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.19335759781619655, |
| "grad_norm": 1.0042217144229835, |
| "learning_rate": 9.963318731069436e-07, |
| "loss": 0.0733, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.19381255686988172, |
| "grad_norm": 2.4100555778342794, |
| "learning_rate": 9.963145715298449e-07, |
| "loss": 0.0739, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.1942675159235669, |
| "grad_norm": 1.500503281681586, |
| "learning_rate": 9.962972293961606e-07, |
| "loss": 0.0446, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.19472247497725204, |
| "grad_norm": 3.5903262452802367, |
| "learning_rate": 9.962798467073081e-07, |
| "loss": 0.0646, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.1951774340309372, |
| "grad_norm": 1.1886352602369927, |
| "learning_rate": 9.96262423464708e-07, |
| "loss": 0.0678, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.19563239308462238, |
| "grad_norm": 1.0944066722324761, |
| "learning_rate": 9.962449596697833e-07, |
| "loss": 0.0438, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.19608735213830755, |
| "grad_norm": 1.6641191926729932, |
| "learning_rate": 9.962274553239618e-07, |
| "loss": 0.091, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.19654231119199272, |
| "grad_norm": 0.8676639680944157, |
| "learning_rate": 9.962099104286735e-07, |
| "loss": 0.0514, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.1969972702456779, |
| "grad_norm": 1.517836992908138, |
| "learning_rate": 9.961923249853521e-07, |
| "loss": 0.085, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.19745222929936307, |
| "grad_norm": 1.7718658971906733, |
| "learning_rate": 9.961746989954348e-07, |
| "loss": 0.0776, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.1979071883530482, |
| "grad_norm": 1.0729427219458574, |
| "learning_rate": 9.961570324603619e-07, |
| "loss": 0.0421, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.19836214740673339, |
| "grad_norm": 1.2745743932943103, |
| "learning_rate": 9.961393253815766e-07, |
| "loss": 0.0655, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.19881710646041856, |
| "grad_norm": 1.7408841931848638, |
| "learning_rate": 9.961215777605264e-07, |
| "loss": 0.068, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.19927206551410373, |
| "grad_norm": 1.0885959204768643, |
| "learning_rate": 9.961037895986615e-07, |
| "loss": 0.0638, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.1997270245677889, |
| "grad_norm": 1.3587225368396716, |
| "learning_rate": 9.96085960897435e-07, |
| "loss": 0.0586, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.20018198362147407, |
| "grad_norm": 1.6064555114395305, |
| "learning_rate": 9.960680916583041e-07, |
| "loss": 0.0453, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.20063694267515925, |
| "grad_norm": 1.0240837682499038, |
| "learning_rate": 9.960501818827291e-07, |
| "loss": 0.0631, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.2010919017288444, |
| "grad_norm": 1.344540696401774, |
| "learning_rate": 9.960322315721735e-07, |
| "loss": 0.0493, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.20154686078252956, |
| "grad_norm": 1.7156213755343936, |
| "learning_rate": 9.960142407281039e-07, |
| "loss": 0.0669, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.20200181983621474, |
| "grad_norm": 0.6644642115399774, |
| "learning_rate": 9.959962093519902e-07, |
| "loss": 0.0361, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.2024567788898999, |
| "grad_norm": 1.049223043207897, |
| "learning_rate": 9.959781374453065e-07, |
| "loss": 0.0568, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.20291173794358508, |
| "grad_norm": 1.3083536330510794, |
| "learning_rate": 9.959600250095293e-07, |
| "loss": 0.0511, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.20336669699727025, |
| "grad_norm": 1.6091097914590038, |
| "learning_rate": 9.959418720461382e-07, |
| "loss": 0.0648, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.20382165605095542, |
| "grad_norm": 1.1847392762582725, |
| "learning_rate": 9.959236785566173e-07, |
| "loss": 0.0405, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.20427661510464057, |
| "grad_norm": 0.8757076055677958, |
| "learning_rate": 9.959054445424532e-07, |
| "loss": 0.0524, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.20473157415832574, |
| "grad_norm": 0.9429373676892292, |
| "learning_rate": 9.95887170005135e-07, |
| "loss": 0.0441, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2051865332120109, |
| "grad_norm": 1.0894783560908516, |
| "learning_rate": 9.958688549461571e-07, |
| "loss": 0.0555, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.20564149226569609, |
| "grad_norm": 1.3373909572267781, |
| "learning_rate": 9.958504993670157e-07, |
| "loss": 0.0732, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.20609645131938126, |
| "grad_norm": 1.4816850476226722, |
| "learning_rate": 9.958321032692107e-07, |
| "loss": 0.1016, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.20655141037306643, |
| "grad_norm": 53.81813108365593, |
| "learning_rate": 9.958136666542454e-07, |
| "loss": 0.898, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.2070063694267516, |
| "grad_norm": 1.3654333042202123, |
| "learning_rate": 9.957951895236261e-07, |
| "loss": 0.0608, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.20746132848043677, |
| "grad_norm": 1.0155389279636473, |
| "learning_rate": 9.957766718788632e-07, |
| "loss": 0.0787, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.20791628753412192, |
| "grad_norm": 1.2290510013718745, |
| "learning_rate": 9.957581137214693e-07, |
| "loss": 0.0553, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.2083712465878071, |
| "grad_norm": 1.2111687813458196, |
| "learning_rate": 9.957395150529613e-07, |
| "loss": 0.0796, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.20882620564149226, |
| "grad_norm": 0.8268484664908767, |
| "learning_rate": 9.95720875874859e-07, |
| "loss": 0.0467, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.20928116469517744, |
| "grad_norm": 1.0724121457636888, |
| "learning_rate": 9.957021961886852e-07, |
| "loss": 0.0472, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2097361237488626, |
| "grad_norm": 1.36288005491486, |
| "learning_rate": 9.956834759959667e-07, |
| "loss": 0.069, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.21019108280254778, |
| "grad_norm": 1.0671698426170464, |
| "learning_rate": 9.956647152982327e-07, |
| "loss": 0.0483, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.21064604185623295, |
| "grad_norm": 1.0082396911079197, |
| "learning_rate": 9.95645914097017e-07, |
| "loss": 0.0693, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.2111010009099181, |
| "grad_norm": 0.9047703234477522, |
| "learning_rate": 9.95627072393855e-07, |
| "loss": 0.0527, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.21155595996360327, |
| "grad_norm": 1.0304355765633446, |
| "learning_rate": 9.956081901902873e-07, |
| "loss": 0.0561, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.21201091901728844, |
| "grad_norm": 2.1359343716316777, |
| "learning_rate": 9.955892674878562e-07, |
| "loss": 0.1373, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.2124658780709736, |
| "grad_norm": 1.095854007962646, |
| "learning_rate": 9.955703042881084e-07, |
| "loss": 0.0708, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.21292083712465878, |
| "grad_norm": 1.1290628737529602, |
| "learning_rate": 9.955513005925934e-07, |
| "loss": 0.065, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.21337579617834396, |
| "grad_norm": 0.8400043936670604, |
| "learning_rate": 9.955322564028639e-07, |
| "loss": 0.0363, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.21383075523202913, |
| "grad_norm": 0.7393598237168953, |
| "learning_rate": 9.955131717204761e-07, |
| "loss": 0.0485, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.21428571428571427, |
| "grad_norm": 1.0910463695302328, |
| "learning_rate": 9.954940465469896e-07, |
| "loss": 0.0545, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.21474067333939945, |
| "grad_norm": 1.0009365863779738, |
| "learning_rate": 9.954748808839674e-07, |
| "loss": 0.0543, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.21519563239308462, |
| "grad_norm": 1.126683118313642, |
| "learning_rate": 9.954556747329753e-07, |
| "loss": 0.054, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.2156505914467698, |
| "grad_norm": 2.642066847598599, |
| "learning_rate": 9.95436428095583e-07, |
| "loss": 0.1234, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.21610555050045496, |
| "grad_norm": 0.9976397272874167, |
| "learning_rate": 9.954171409733632e-07, |
| "loss": 0.0385, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.21656050955414013, |
| "grad_norm": 0.823622963218879, |
| "learning_rate": 9.95397813367892e-07, |
| "loss": 0.0504, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.2170154686078253, |
| "grad_norm": 1.3652338974635019, |
| "learning_rate": 9.953784452807485e-07, |
| "loss": 0.0736, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.21747042766151045, |
| "grad_norm": 0.7847256244107973, |
| "learning_rate": 9.953590367135157e-07, |
| "loss": 0.041, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.21792538671519562, |
| "grad_norm": 1.129093580464179, |
| "learning_rate": 9.953395876677795e-07, |
| "loss": 0.041, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.2183803457688808, |
| "grad_norm": 0.7402537923820297, |
| "learning_rate": 9.95320098145129e-07, |
| "loss": 0.0369, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.21883530482256597, |
| "grad_norm": 1.268108716381297, |
| "learning_rate": 9.95300568147157e-07, |
| "loss": 0.0988, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.21929026387625114, |
| "grad_norm": 0.9397260025959921, |
| "learning_rate": 9.952809976754592e-07, |
| "loss": 0.0368, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.2197452229299363, |
| "grad_norm": 1.3811632120582926, |
| "learning_rate": 9.95261386731635e-07, |
| "loss": 0.0748, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.22020018198362148, |
| "grad_norm": 1.1016438359971403, |
| "learning_rate": 9.95241735317287e-07, |
| "loss": 0.0802, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.22065514103730663, |
| "grad_norm": 1.3205496202774076, |
| "learning_rate": 9.952220434340208e-07, |
| "loss": 0.0517, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.2211101000909918, |
| "grad_norm": 1.0603265513262499, |
| "learning_rate": 9.952023110834455e-07, |
| "loss": 0.0508, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.22156505914467697, |
| "grad_norm": 0.8451250385457886, |
| "learning_rate": 9.951825382671737e-07, |
| "loss": 0.0527, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.22202001819836215, |
| "grad_norm": 1.0171819004806768, |
| "learning_rate": 9.951627249868212e-07, |
| "loss": 0.0399, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.22247497725204732, |
| "grad_norm": 1.0453017379544192, |
| "learning_rate": 9.951428712440069e-07, |
| "loss": 0.0468, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.2229299363057325, |
| "grad_norm": 0.8995715474035276, |
| "learning_rate": 9.95122977040353e-07, |
| "loss": 0.0283, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.22338489535941766, |
| "grad_norm": 0.9039851559662124, |
| "learning_rate": 9.951030423774858e-07, |
| "loss": 0.0556, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.22383985441310283, |
| "grad_norm": 1.6175216247717694, |
| "learning_rate": 9.950830672570335e-07, |
| "loss": 0.0591, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.22429481346678798, |
| "grad_norm": 1.7511997475900873, |
| "learning_rate": 9.950630516806288e-07, |
| "loss": 0.1114, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.22474977252047315, |
| "grad_norm": 1.1373427610842786, |
| "learning_rate": 9.950429956499072e-07, |
| "loss": 0.0518, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.22520473157415832, |
| "grad_norm": 0.8358050895339546, |
| "learning_rate": 9.950228991665078e-07, |
| "loss": 0.0531, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.2256596906278435, |
| "grad_norm": 1.3531918761874655, |
| "learning_rate": 9.950027622320723e-07, |
| "loss": 0.0573, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.22611464968152867, |
| "grad_norm": 1.2768329156974692, |
| "learning_rate": 9.949825848482465e-07, |
| "loss": 0.0597, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.22656960873521384, |
| "grad_norm": 1.0159602004460029, |
| "learning_rate": 9.949623670166793e-07, |
| "loss": 0.0584, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.227024567788899, |
| "grad_norm": 4.869001899537454, |
| "learning_rate": 9.949421087390225e-07, |
| "loss": 0.1167, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.22747952684258416, |
| "grad_norm": 1.5083888104631782, |
| "learning_rate": 9.94921810016932e-07, |
| "loss": 0.0954, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.22793448589626933, |
| "grad_norm": 1.3342128194101248, |
| "learning_rate": 9.949014708520663e-07, |
| "loss": 0.0754, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.2283894449499545, |
| "grad_norm": 1.2498802034306913, |
| "learning_rate": 9.94881091246087e-07, |
| "loss": 0.0675, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.22884440400363967, |
| "grad_norm": 1.523623590025437, |
| "learning_rate": 9.9486067120066e-07, |
| "loss": 0.0547, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.22929936305732485, |
| "grad_norm": 1.0293101719236053, |
| "learning_rate": 9.948402107174536e-07, |
| "loss": 0.055, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.22975432211101002, |
| "grad_norm": 0.9895457631928973, |
| "learning_rate": 9.9481970979814e-07, |
| "loss": 0.0429, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.2302092811646952, |
| "grad_norm": 1.8146151677736246, |
| "learning_rate": 9.94799168444394e-07, |
| "loss": 0.063, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.23066424021838033, |
| "grad_norm": 1.5501254904610202, |
| "learning_rate": 9.94778586657895e-07, |
| "loss": 0.0722, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.2311191992720655, |
| "grad_norm": 0.9718145426138437, |
| "learning_rate": 9.94757964440324e-07, |
| "loss": 0.0537, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.23157415832575068, |
| "grad_norm": 1.6338186673908792, |
| "learning_rate": 9.947373017933663e-07, |
| "loss": 0.0378, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.23202911737943585, |
| "grad_norm": 0.9556135092500326, |
| "learning_rate": 9.947165987187107e-07, |
| "loss": 0.0447, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.23248407643312102, |
| "grad_norm": 0.8078513633293719, |
| "learning_rate": 9.946958552180486e-07, |
| "loss": 0.0328, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.2329390354868062, |
| "grad_norm": 1.022761761604989, |
| "learning_rate": 9.946750712930754e-07, |
| "loss": 0.0359, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.23339399454049137, |
| "grad_norm": 1.1366788360995828, |
| "learning_rate": 9.946542469454893e-07, |
| "loss": 0.0718, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.2338489535941765, |
| "grad_norm": 0.9810382870543687, |
| "learning_rate": 9.94633382176992e-07, |
| "loss": 0.0534, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.23430391264786168, |
| "grad_norm": 1.581853556545306, |
| "learning_rate": 9.946124769892882e-07, |
| "loss": 0.0773, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.23475887170154686, |
| "grad_norm": 0.7644818434101259, |
| "learning_rate": 9.945915313840867e-07, |
| "loss": 0.0423, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.23521383075523203, |
| "grad_norm": 1.1026489788865934, |
| "learning_rate": 9.945705453630988e-07, |
| "loss": 0.0701, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.2356687898089172, |
| "grad_norm": 1.30242239343761, |
| "learning_rate": 9.945495189280394e-07, |
| "loss": 0.0775, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.23612374886260237, |
| "grad_norm": 1.3643812174363148, |
| "learning_rate": 9.945284520806266e-07, |
| "loss": 0.0592, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.23657870791628755, |
| "grad_norm": 0.7728712449894798, |
| "learning_rate": 9.94507344822582e-07, |
| "loss": 0.045, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.2370336669699727, |
| "grad_norm": 0.9753489954613876, |
| "learning_rate": 9.944861971556302e-07, |
| "loss": 0.0401, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.23748862602365786, |
| "grad_norm": 1.116447882604098, |
| "learning_rate": 9.944650090814996e-07, |
| "loss": 0.0506, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.23794358507734303, |
| "grad_norm": 1.7959469233171275, |
| "learning_rate": 9.944437806019215e-07, |
| "loss": 0.0531, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.2383985441310282, |
| "grad_norm": 0.9179182439207999, |
| "learning_rate": 9.944225117186304e-07, |
| "loss": 0.0561, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.23885350318471338, |
| "grad_norm": 0.753951520122613, |
| "learning_rate": 9.944012024333646e-07, |
| "loss": 0.0376, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.23930846223839855, |
| "grad_norm": 0.8101275452789138, |
| "learning_rate": 9.94379852747865e-07, |
| "loss": 0.0431, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.23976342129208372, |
| "grad_norm": 1.3152440421129992, |
| "learning_rate": 9.943584626638767e-07, |
| "loss": 0.0705, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.24021838034576887, |
| "grad_norm": 0.9322535268543007, |
| "learning_rate": 9.943370321831472e-07, |
| "loss": 0.0493, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.24067333939945404, |
| "grad_norm": 2.3492333838000303, |
| "learning_rate": 9.943155613074278e-07, |
| "loss": 0.0422, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.2411282984531392, |
| "grad_norm": 0.974498951251244, |
| "learning_rate": 9.94294050038473e-07, |
| "loss": 0.0641, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.24158325750682438, |
| "grad_norm": 37.95804074486314, |
| "learning_rate": 9.942724983780409e-07, |
| "loss": 0.3584, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.24203821656050956, |
| "grad_norm": 1.637859871500906, |
| "learning_rate": 9.942509063278921e-07, |
| "loss": 0.0868, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.24249317561419473, |
| "grad_norm": 1.1722515185911306, |
| "learning_rate": 9.942292738897913e-07, |
| "loss": 0.0593, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.2429481346678799, |
| "grad_norm": 1.018141151759373, |
| "learning_rate": 9.942076010655062e-07, |
| "loss": 0.0663, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.24340309372156507, |
| "grad_norm": 1.5807982542236025, |
| "learning_rate": 9.941858878568076e-07, |
| "loss": 0.0846, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.24385805277525022, |
| "grad_norm": 1.4544459365608695, |
| "learning_rate": 9.941641342654701e-07, |
| "loss": 0.0795, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.2443130118289354, |
| "grad_norm": 0.7991375127435578, |
| "learning_rate": 9.941423402932712e-07, |
| "loss": 0.041, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.24476797088262056, |
| "grad_norm": 1.3935629119093738, |
| "learning_rate": 9.941205059419918e-07, |
| "loss": 0.0807, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.24522292993630573, |
| "grad_norm": 1.0563722753433893, |
| "learning_rate": 9.94098631213416e-07, |
| "loss": 0.0525, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.2456778889899909, |
| "grad_norm": 1.1009271567670207, |
| "learning_rate": 9.940767161093316e-07, |
| "loss": 0.041, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.24613284804367608, |
| "grad_norm": 1.7826575028692246, |
| "learning_rate": 9.940547606315289e-07, |
| "loss": 0.0689, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.24658780709736125, |
| "grad_norm": 1.0336207217621083, |
| "learning_rate": 9.940327647818025e-07, |
| "loss": 0.0535, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.2470427661510464, |
| "grad_norm": 2.45401048234137, |
| "learning_rate": 9.940107285619495e-07, |
| "loss": 0.0512, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.24749772520473157, |
| "grad_norm": 1.19308205945342, |
| "learning_rate": 9.939886519737706e-07, |
| "loss": 0.0556, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.24795268425841674, |
| "grad_norm": 1.5225580058372714, |
| "learning_rate": 9.9396653501907e-07, |
| "loss": 0.0688, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.2484076433121019, |
| "grad_norm": 1.3121205826406792, |
| "learning_rate": 9.939443776996548e-07, |
| "loss": 0.0761, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.24886260236578708, |
| "grad_norm": 1.186650953021527, |
| "learning_rate": 9.93922180017336e-07, |
| "loss": 0.0487, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.24931756141947226, |
| "grad_norm": 1.2365716348507871, |
| "learning_rate": 9.93899941973927e-07, |
| "loss": 0.0926, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.24977252047315743, |
| "grad_norm": 1.159458828975085, |
| "learning_rate": 9.938776635712448e-07, |
| "loss": 0.0633, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.2502274795268426, |
| "grad_norm": 1.3562046587474255, |
| "learning_rate": 9.938553448111106e-07, |
| "loss": 0.0832, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.25068243858052774, |
| "grad_norm": 1.2395861301175688, |
| "learning_rate": 9.93832985695348e-07, |
| "loss": 0.0643, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.25113739763421294, |
| "grad_norm": 1.0153499872507543, |
| "learning_rate": 9.938105862257838e-07, |
| "loss": 0.0545, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.2515923566878981, |
| "grad_norm": 0.9451041008867292, |
| "learning_rate": 9.937881464042485e-07, |
| "loss": 0.0536, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.25204731574158323, |
| "grad_norm": 1.1282651296840591, |
| "learning_rate": 9.937656662325756e-07, |
| "loss": 0.0799, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.25250227479526843, |
| "grad_norm": 2.314570695596751, |
| "learning_rate": 9.937431457126027e-07, |
| "loss": 0.0623, |
| "step": 555 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 10990, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 555, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3666645319680.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|