| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.989993328885924, |
| "eval_steps": 500, |
| "global_step": 935, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00533689126084056, |
| "grad_norm": 6.148000037568765, |
| "learning_rate": 4.2553191489361704e-07, |
| "loss": 0.9543, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.01067378252168112, |
| "grad_norm": 5.76207120136237, |
| "learning_rate": 8.510638297872341e-07, |
| "loss": 0.9275, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.016010673782521682, |
| "grad_norm": 5.846133487180721, |
| "learning_rate": 1.276595744680851e-06, |
| "loss": 0.9352, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.02134756504336224, |
| "grad_norm": 5.948943250861422, |
| "learning_rate": 1.7021276595744682e-06, |
| "loss": 0.9911, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0266844563042028, |
| "grad_norm": 5.471501096137681, |
| "learning_rate": 2.1276595744680853e-06, |
| "loss": 0.9284, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.032021347565043365, |
| "grad_norm": 4.598154721919055, |
| "learning_rate": 2.553191489361702e-06, |
| "loss": 0.9151, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.037358238825883926, |
| "grad_norm": 4.191582543667081, |
| "learning_rate": 2.978723404255319e-06, |
| "loss": 0.9174, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.04269513008672448, |
| "grad_norm": 2.2852207165276175, |
| "learning_rate": 3.4042553191489363e-06, |
| "loss": 0.8733, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.04803202134756504, |
| "grad_norm": 1.9926907110993184, |
| "learning_rate": 3.8297872340425535e-06, |
| "loss": 0.8863, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0533689126084056, |
| "grad_norm": 1.6897648811838724, |
| "learning_rate": 4.255319148936171e-06, |
| "loss": 0.8197, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.05870580386924616, |
| "grad_norm": 4.197834380211571, |
| "learning_rate": 4.680851063829788e-06, |
| "loss": 0.8614, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.06404269513008673, |
| "grad_norm": 4.349335585259934, |
| "learning_rate": 5.106382978723404e-06, |
| "loss": 0.8723, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.06937958639092728, |
| "grad_norm": 3.969299235666275, |
| "learning_rate": 5.531914893617022e-06, |
| "loss": 0.8121, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.07471647765176785, |
| "grad_norm": 3.1468331398249854, |
| "learning_rate": 5.957446808510638e-06, |
| "loss": 0.8111, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0800533689126084, |
| "grad_norm": 3.060340690213249, |
| "learning_rate": 6.382978723404256e-06, |
| "loss": 0.7968, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.08539026017344896, |
| "grad_norm": 2.5011639930656364, |
| "learning_rate": 6.808510638297873e-06, |
| "loss": 0.7474, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.09072715143428953, |
| "grad_norm": 2.0423245778644086, |
| "learning_rate": 7.234042553191491e-06, |
| "loss": 0.7869, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.09606404269513008, |
| "grad_norm": 1.7376707160976537, |
| "learning_rate": 7.659574468085107e-06, |
| "loss": 0.7671, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.10140093395597065, |
| "grad_norm": 1.615732209627739, |
| "learning_rate": 8.085106382978723e-06, |
| "loss": 0.7463, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.1067378252168112, |
| "grad_norm": 1.728049657874694, |
| "learning_rate": 8.510638297872341e-06, |
| "loss": 0.7339, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.11207471647765177, |
| "grad_norm": 1.7850972209703333, |
| "learning_rate": 8.936170212765958e-06, |
| "loss": 0.7487, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.11741160773849232, |
| "grad_norm": 1.427350518416662, |
| "learning_rate": 9.361702127659576e-06, |
| "loss": 0.7327, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.12274849899933289, |
| "grad_norm": 1.259778470170131, |
| "learning_rate": 9.787234042553192e-06, |
| "loss": 0.7032, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.12808539026017346, |
| "grad_norm": 1.2541341499268899, |
| "learning_rate": 1.0212765957446808e-05, |
| "loss": 0.7157, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.133422281521014, |
| "grad_norm": 1.209399238284116, |
| "learning_rate": 1.0638297872340426e-05, |
| "loss": 0.696, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.13875917278185457, |
| "grad_norm": 1.1148869413377325, |
| "learning_rate": 1.1063829787234044e-05, |
| "loss": 0.7342, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.14409606404269512, |
| "grad_norm": 1.0267231678585527, |
| "learning_rate": 1.1489361702127662e-05, |
| "loss": 0.7014, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.1494329553035357, |
| "grad_norm": 1.053000012577453, |
| "learning_rate": 1.1914893617021277e-05, |
| "loss": 0.7193, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.15476984656437626, |
| "grad_norm": 0.9236120588909748, |
| "learning_rate": 1.2340425531914895e-05, |
| "loss": 0.6848, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.1601067378252168, |
| "grad_norm": 0.707299824861324, |
| "learning_rate": 1.2765957446808513e-05, |
| "loss": 0.6967, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.16544362908605736, |
| "grad_norm": 0.7954547209115258, |
| "learning_rate": 1.3191489361702127e-05, |
| "loss": 0.7101, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.17078052034689792, |
| "grad_norm": 0.7740855587703414, |
| "learning_rate": 1.3617021276595745e-05, |
| "loss": 0.6959, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.1761174116077385, |
| "grad_norm": 0.6911657011135235, |
| "learning_rate": 1.4042553191489363e-05, |
| "loss": 0.6649, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.18145430286857905, |
| "grad_norm": 0.6352745397687202, |
| "learning_rate": 1.4468085106382981e-05, |
| "loss": 0.7012, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.1867911941294196, |
| "grad_norm": 0.5651606409699009, |
| "learning_rate": 1.4893617021276596e-05, |
| "loss": 0.6294, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.19212808539026016, |
| "grad_norm": 0.7512985605813014, |
| "learning_rate": 1.5319148936170214e-05, |
| "loss": 0.6641, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.19746497665110074, |
| "grad_norm": 0.6367644839194788, |
| "learning_rate": 1.5744680851063832e-05, |
| "loss": 0.6414, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.2028018679119413, |
| "grad_norm": 0.6727260374168498, |
| "learning_rate": 1.6170212765957446e-05, |
| "loss": 0.6606, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.20813875917278185, |
| "grad_norm": 0.5863518339156979, |
| "learning_rate": 1.6595744680851064e-05, |
| "loss": 0.629, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.2134756504336224, |
| "grad_norm": 0.5957413274178028, |
| "learning_rate": 1.7021276595744682e-05, |
| "loss": 0.6433, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.218812541694463, |
| "grad_norm": 0.6599440739948166, |
| "learning_rate": 1.74468085106383e-05, |
| "loss": 0.6458, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.22414943295530354, |
| "grad_norm": 0.6721754940213285, |
| "learning_rate": 1.7872340425531915e-05, |
| "loss": 0.6492, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.2294863242161441, |
| "grad_norm": 0.7632199119447193, |
| "learning_rate": 1.8297872340425533e-05, |
| "loss": 0.6746, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.23482321547698465, |
| "grad_norm": 0.5166294877933729, |
| "learning_rate": 1.872340425531915e-05, |
| "loss": 0.6295, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.24016010673782523, |
| "grad_norm": 0.829534817664272, |
| "learning_rate": 1.914893617021277e-05, |
| "loss": 0.6375, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.24549699799866578, |
| "grad_norm": 0.5857977032260275, |
| "learning_rate": 1.9574468085106384e-05, |
| "loss": 0.6572, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.25083388925950634, |
| "grad_norm": 0.7823636354279289, |
| "learning_rate": 2e-05, |
| "loss": 0.6504, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.2561707805203469, |
| "grad_norm": 0.7047081341160781, |
| "learning_rate": 2.0425531914893616e-05, |
| "loss": 0.6228, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.26150767178118745, |
| "grad_norm": 0.6863987126187889, |
| "learning_rate": 2.0851063829787238e-05, |
| "loss": 0.6277, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.266844563042028, |
| "grad_norm": 0.6190735802489663, |
| "learning_rate": 2.1276595744680852e-05, |
| "loss": 0.6425, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.27218145430286855, |
| "grad_norm": 0.6101823544223539, |
| "learning_rate": 2.1702127659574467e-05, |
| "loss": 0.6437, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.27751834556370913, |
| "grad_norm": 0.6644263069643335, |
| "learning_rate": 2.2127659574468088e-05, |
| "loss": 0.6387, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.2828552368245497, |
| "grad_norm": 0.5986962358886303, |
| "learning_rate": 2.2553191489361703e-05, |
| "loss": 0.6385, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.28819212808539024, |
| "grad_norm": 0.6094843736073496, |
| "learning_rate": 2.2978723404255324e-05, |
| "loss": 0.6117, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.2935290193462308, |
| "grad_norm": 0.6020778721421863, |
| "learning_rate": 2.340425531914894e-05, |
| "loss": 0.6254, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.2988659106070714, |
| "grad_norm": 0.811050821660795, |
| "learning_rate": 2.3829787234042553e-05, |
| "loss": 0.6128, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.30420280186791193, |
| "grad_norm": 0.5754298827220308, |
| "learning_rate": 2.4255319148936175e-05, |
| "loss": 0.6311, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.3095396931287525, |
| "grad_norm": 0.7520600773223384, |
| "learning_rate": 2.468085106382979e-05, |
| "loss": 0.6179, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.31487658438959304, |
| "grad_norm": 0.6029568660086547, |
| "learning_rate": 2.5106382978723404e-05, |
| "loss": 0.6297, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.3202134756504336, |
| "grad_norm": 0.5648234220070932, |
| "learning_rate": 2.5531914893617025e-05, |
| "loss": 0.6281, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3255503669112742, |
| "grad_norm": 0.6476246719033275, |
| "learning_rate": 2.595744680851064e-05, |
| "loss": 0.627, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.33088725817211473, |
| "grad_norm": 0.7385881909771188, |
| "learning_rate": 2.6382978723404255e-05, |
| "loss": 0.6173, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.3362241494329553, |
| "grad_norm": 0.7821703232556236, |
| "learning_rate": 2.6808510638297876e-05, |
| "loss": 0.6044, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.34156104069379584, |
| "grad_norm": 0.6836861306090984, |
| "learning_rate": 2.723404255319149e-05, |
| "loss": 0.588, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.3468979319546364, |
| "grad_norm": 0.6580791298040481, |
| "learning_rate": 2.7659574468085112e-05, |
| "loss": 0.6286, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.352234823215477, |
| "grad_norm": 0.864506493478045, |
| "learning_rate": 2.8085106382978727e-05, |
| "loss": 0.5923, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.3575717144763175, |
| "grad_norm": 0.7017167406064279, |
| "learning_rate": 2.851063829787234e-05, |
| "loss": 0.5999, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.3629086057371581, |
| "grad_norm": 0.9052162942115397, |
| "learning_rate": 2.8936170212765963e-05, |
| "loss": 0.6348, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.3682454969979987, |
| "grad_norm": 0.7977337352191972, |
| "learning_rate": 2.9361702127659577e-05, |
| "loss": 0.5823, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.3735823882588392, |
| "grad_norm": 0.741861801885776, |
| "learning_rate": 2.9787234042553192e-05, |
| "loss": 0.6078, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.3789192795196798, |
| "grad_norm": 0.8436055916627975, |
| "learning_rate": 3.0212765957446813e-05, |
| "loss": 0.65, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.3842561707805203, |
| "grad_norm": 0.9156977886774698, |
| "learning_rate": 3.063829787234043e-05, |
| "loss": 0.6131, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.3895930620413609, |
| "grad_norm": 0.7176731849912837, |
| "learning_rate": 3.1063829787234046e-05, |
| "loss": 0.6065, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.3949299533022015, |
| "grad_norm": 1.0096379849584094, |
| "learning_rate": 3.1489361702127664e-05, |
| "loss": 0.6437, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.400266844563042, |
| "grad_norm": 0.9015077136133018, |
| "learning_rate": 3.191489361702128e-05, |
| "loss": 0.5947, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.4056037358238826, |
| "grad_norm": 0.9057483735249481, |
| "learning_rate": 3.234042553191489e-05, |
| "loss": 0.6039, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.4109406270847232, |
| "grad_norm": 1.036920332839965, |
| "learning_rate": 3.276595744680851e-05, |
| "loss": 0.6058, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.4162775183455637, |
| "grad_norm": 0.6532671217107379, |
| "learning_rate": 3.319148936170213e-05, |
| "loss": 0.6039, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.4216144096064043, |
| "grad_norm": 0.861543032559001, |
| "learning_rate": 3.361702127659575e-05, |
| "loss": 0.6169, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.4269513008672448, |
| "grad_norm": 0.9427328391918289, |
| "learning_rate": 3.4042553191489365e-05, |
| "loss": 0.5972, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.4322881921280854, |
| "grad_norm": 0.8268349751981375, |
| "learning_rate": 3.446808510638298e-05, |
| "loss": 0.6028, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.437625083388926, |
| "grad_norm": 0.8010280657011346, |
| "learning_rate": 3.48936170212766e-05, |
| "loss": 0.5937, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.4429619746497665, |
| "grad_norm": 1.4163426245382202, |
| "learning_rate": 3.531914893617022e-05, |
| "loss": 0.5801, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.4482988659106071, |
| "grad_norm": 0.8866001712478151, |
| "learning_rate": 3.574468085106383e-05, |
| "loss": 0.6121, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.4536357571714476, |
| "grad_norm": 1.2990003965290253, |
| "learning_rate": 3.617021276595745e-05, |
| "loss": 0.6509, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.4589726484322882, |
| "grad_norm": 0.9828125705811529, |
| "learning_rate": 3.6595744680851066e-05, |
| "loss": 0.6122, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.46430953969312877, |
| "grad_norm": 1.1075341839055335, |
| "learning_rate": 3.7021276595744684e-05, |
| "loss": 0.6258, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.4696464309539693, |
| "grad_norm": 1.0144524046084673, |
| "learning_rate": 3.74468085106383e-05, |
| "loss": 0.6086, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.4749833222148099, |
| "grad_norm": 1.0772540630578744, |
| "learning_rate": 3.787234042553192e-05, |
| "loss": 0.5999, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.48032021347565046, |
| "grad_norm": 0.881806469299722, |
| "learning_rate": 3.829787234042554e-05, |
| "loss": 0.6498, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.485657104736491, |
| "grad_norm": 0.9769728043077871, |
| "learning_rate": 3.872340425531915e-05, |
| "loss": 0.632, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.49099399599733157, |
| "grad_norm": 0.9506924647295948, |
| "learning_rate": 3.914893617021277e-05, |
| "loss": 0.5646, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.4963308872581721, |
| "grad_norm": 0.697157023956671, |
| "learning_rate": 3.9574468085106385e-05, |
| "loss": 0.5814, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.5016677785190127, |
| "grad_norm": 0.8702722934298305, |
| "learning_rate": 4e-05, |
| "loss": 0.6086, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.5070046697798533, |
| "grad_norm": 0.8781146273756057, |
| "learning_rate": 3.9999860457259224e-05, |
| "loss": 0.579, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.5123415610406938, |
| "grad_norm": 0.6961405489963192, |
| "learning_rate": 3.99994418309841e-05, |
| "loss": 0.6133, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.5176784523015343, |
| "grad_norm": 0.7087239184079108, |
| "learning_rate": 3.9998744127016264e-05, |
| "loss": 0.5902, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.5230153435623749, |
| "grad_norm": 0.5997862763213605, |
| "learning_rate": 3.999776735509166e-05, |
| "loss": 0.5875, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.5283522348232155, |
| "grad_norm": 0.660612211013302, |
| "learning_rate": 3.999651152884044e-05, |
| "loss": 0.6003, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.533689126084056, |
| "grad_norm": 0.6214742049455745, |
| "learning_rate": 3.999497666578674e-05, |
| "loss": 0.6351, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5390260173448966, |
| "grad_norm": 0.764941637291837, |
| "learning_rate": 3.999316278734846e-05, |
| "loss": 0.5883, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.5443629086057371, |
| "grad_norm": 0.5428320010347989, |
| "learning_rate": 3.9991069918836966e-05, |
| "loss": 0.607, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.5496997998665777, |
| "grad_norm": 0.6973175643455113, |
| "learning_rate": 3.998869808945671e-05, |
| "loss": 0.5755, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.5550366911274183, |
| "grad_norm": 0.5367648197100708, |
| "learning_rate": 3.998604733230485e-05, |
| "loss": 0.577, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.5603735823882589, |
| "grad_norm": 0.6880065518848788, |
| "learning_rate": 3.998311768437078e-05, |
| "loss": 0.606, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.5657104736490994, |
| "grad_norm": 0.6838905198209162, |
| "learning_rate": 3.9979909186535606e-05, |
| "loss": 0.5624, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.57104736490994, |
| "grad_norm": 0.6426687525240344, |
| "learning_rate": 3.9976421883571594e-05, |
| "loss": 0.6195, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.5763842561707805, |
| "grad_norm": 0.7965064222410392, |
| "learning_rate": 3.9972655824141524e-05, |
| "loss": 0.6357, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.5817211474316211, |
| "grad_norm": 0.5885934942607214, |
| "learning_rate": 3.996861106079801e-05, |
| "loss": 0.558, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.5870580386924616, |
| "grad_norm": 0.7774147945831619, |
| "learning_rate": 3.9964287649982805e-05, |
| "loss": 0.5971, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.5923949299533022, |
| "grad_norm": 0.5353107229236527, |
| "learning_rate": 3.9959685652025954e-05, |
| "loss": 0.5731, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.5977318212141428, |
| "grad_norm": 0.8291069965223162, |
| "learning_rate": 3.995480513114501e-05, |
| "loss": 0.6127, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.6030687124749833, |
| "grad_norm": 0.7325810415707809, |
| "learning_rate": 3.994964615544409e-05, |
| "loss": 0.6041, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.6084056037358239, |
| "grad_norm": 0.6428699302157482, |
| "learning_rate": 3.994420879691296e-05, |
| "loss": 0.5808, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.6137424949966644, |
| "grad_norm": 0.5760017273903351, |
| "learning_rate": 3.993849313142601e-05, |
| "loss": 0.5625, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.619079386257505, |
| "grad_norm": 0.6798380160844573, |
| "learning_rate": 3.9932499238741205e-05, |
| "loss": 0.5739, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.6244162775183456, |
| "grad_norm": 0.6045010645880844, |
| "learning_rate": 3.992622720249896e-05, |
| "loss": 0.5535, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.6297531687791861, |
| "grad_norm": 0.6499452053823308, |
| "learning_rate": 3.991967711022099e-05, |
| "loss": 0.5774, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.6350900600400267, |
| "grad_norm": 0.852341222777626, |
| "learning_rate": 3.991284905330908e-05, |
| "loss": 0.6156, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.6404269513008672, |
| "grad_norm": 0.7942631863588528, |
| "learning_rate": 3.99057431270438e-05, |
| "loss": 0.6146, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.6457638425617078, |
| "grad_norm": 0.7346243276112419, |
| "learning_rate": 3.989835943058321e-05, |
| "loss": 0.6245, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.6511007338225484, |
| "grad_norm": 0.5552728410457584, |
| "learning_rate": 3.989069806696141e-05, |
| "loss": 0.5767, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.6564376250833889, |
| "grad_norm": 0.7057507722370929, |
| "learning_rate": 3.9882759143087194e-05, |
| "loss": 0.588, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.6617745163442295, |
| "grad_norm": 0.6039953078185621, |
| "learning_rate": 3.9874542769742465e-05, |
| "loss": 0.5562, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.66711140760507, |
| "grad_norm": 0.6614177548639164, |
| "learning_rate": 3.9866049061580754e-05, |
| "loss": 0.6246, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.6724482988659106, |
| "grad_norm": 0.4994146494205324, |
| "learning_rate": 3.985727813712559e-05, |
| "loss": 0.5781, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.6777851901267512, |
| "grad_norm": 0.6316687122403261, |
| "learning_rate": 3.984823011876885e-05, |
| "loss": 0.5849, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.6831220813875917, |
| "grad_norm": 0.6451958765097028, |
| "learning_rate": 3.983890513276908e-05, |
| "loss": 0.5729, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.6884589726484323, |
| "grad_norm": 0.6080146197253071, |
| "learning_rate": 3.982930330924968e-05, |
| "loss": 0.581, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.6937958639092728, |
| "grad_norm": 0.6403798321958414, |
| "learning_rate": 3.981942478219712e-05, |
| "loss": 0.58, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.6991327551701134, |
| "grad_norm": 0.5940580400091781, |
| "learning_rate": 3.980926968945909e-05, |
| "loss": 0.5851, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.704469646430954, |
| "grad_norm": 0.6114900367863954, |
| "learning_rate": 3.9798838172742523e-05, |
| "loss": 0.5861, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.7098065376917946, |
| "grad_norm": 0.578905757005195, |
| "learning_rate": 3.978813037761167e-05, |
| "loss": 0.5686, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.715143428952635, |
| "grad_norm": 0.4992188099121227, |
| "learning_rate": 3.977714645348603e-05, |
| "loss": 0.5839, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.7204803202134756, |
| "grad_norm": 0.6287390942847498, |
| "learning_rate": 3.9765886553638305e-05, |
| "loss": 0.5935, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.7258172114743162, |
| "grad_norm": 0.5377587251455253, |
| "learning_rate": 3.975435083519221e-05, |
| "loss": 0.5908, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.7311541027351568, |
| "grad_norm": 0.5666232050777832, |
| "learning_rate": 3.974253945912033e-05, |
| "loss": 0.5761, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.7364909939959974, |
| "grad_norm": 0.5157376399011453, |
| "learning_rate": 3.9730452590241855e-05, |
| "loss": 0.5535, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.7418278852568378, |
| "grad_norm": 0.5906288982124854, |
| "learning_rate": 3.9718090397220235e-05, |
| "loss": 0.5669, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.7471647765176784, |
| "grad_norm": 0.4830392788438412, |
| "learning_rate": 3.9705453052560935e-05, |
| "loss": 0.5834, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.752501667778519, |
| "grad_norm": 0.49576189569537626, |
| "learning_rate": 3.9692540732608895e-05, |
| "loss": 0.5695, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.7578385590393596, |
| "grad_norm": 0.5736086879059951, |
| "learning_rate": 3.9679353617546185e-05, |
| "loss": 0.5677, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.7631754503002002, |
| "grad_norm": 0.6081982434820203, |
| "learning_rate": 3.966589189138941e-05, |
| "loss": 0.5737, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.7685123415610406, |
| "grad_norm": 0.4513895583255699, |
| "learning_rate": 3.9652155741987204e-05, |
| "loss": 0.5747, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.7738492328218812, |
| "grad_norm": 0.5662887213755944, |
| "learning_rate": 3.963814536101756e-05, |
| "loss": 0.5642, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.7791861240827218, |
| "grad_norm": 0.5224566159232965, |
| "learning_rate": 3.962386094398515e-05, |
| "loss": 0.557, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.7845230153435624, |
| "grad_norm": 0.630041060013869, |
| "learning_rate": 3.960930269021866e-05, |
| "loss": 0.5868, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.789859906604403, |
| "grad_norm": 0.4804049278016382, |
| "learning_rate": 3.959447080286795e-05, |
| "loss": 0.562, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.7951967978652434, |
| "grad_norm": 0.6899935340653105, |
| "learning_rate": 3.957936548890126e-05, |
| "loss": 0.5842, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.800533689126084, |
| "grad_norm": 0.5121084821707802, |
| "learning_rate": 3.956398695910225e-05, |
| "loss": 0.5835, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.8058705803869246, |
| "grad_norm": 0.668616893098578, |
| "learning_rate": 3.954833542806716e-05, |
| "loss": 0.5861, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.8112074716477652, |
| "grad_norm": 0.45695972645097455, |
| "learning_rate": 3.953241111420174e-05, |
| "loss": 0.5684, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.8165443629086058, |
| "grad_norm": 0.5756971939096189, |
| "learning_rate": 3.951621423971822e-05, |
| "loss": 0.5609, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.8218812541694464, |
| "grad_norm": 0.6608106731864758, |
| "learning_rate": 3.949974503063224e-05, |
| "loss": 0.5832, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.8272181454302868, |
| "grad_norm": 0.45567367046023793, |
| "learning_rate": 3.9483003716759656e-05, |
| "loss": 0.5733, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.8325550366911274, |
| "grad_norm": 0.5216817660800224, |
| "learning_rate": 3.946599053171334e-05, |
| "loss": 0.5863, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.837891927951968, |
| "grad_norm": 0.5621476528316465, |
| "learning_rate": 3.944870571289995e-05, |
| "loss": 0.6054, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.8432288192128086, |
| "grad_norm": 0.5106888550302184, |
| "learning_rate": 3.943114950151658e-05, |
| "loss": 0.5567, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.8485657104736491, |
| "grad_norm": 0.48507154178393364, |
| "learning_rate": 3.94133221425474e-05, |
| "loss": 0.5759, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.8539026017344896, |
| "grad_norm": 0.5726162474627128, |
| "learning_rate": 3.93952238847603e-05, |
| "loss": 0.5689, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.8592394929953302, |
| "grad_norm": 0.5476504710325754, |
| "learning_rate": 3.9376854980703305e-05, |
| "loss": 0.5509, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.8645763842561708, |
| "grad_norm": 0.5841063194600445, |
| "learning_rate": 3.935821568670113e-05, |
| "loss": 0.5787, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.8699132755170114, |
| "grad_norm": 0.5131288550994832, |
| "learning_rate": 3.9339306262851604e-05, |
| "loss": 0.5543, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.875250166777852, |
| "grad_norm": 0.46011085571103316, |
| "learning_rate": 3.932012697302202e-05, |
| "loss": 0.5432, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.8805870580386924, |
| "grad_norm": 1.3675570339416427, |
| "learning_rate": 3.9300678084845414e-05, |
| "loss": 0.5746, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.885923949299533, |
| "grad_norm": 0.5279362760899026, |
| "learning_rate": 3.928095986971693e-05, |
| "loss": 0.5498, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.8912608405603736, |
| "grad_norm": 0.5332378885535611, |
| "learning_rate": 3.926097260278994e-05, |
| "loss": 0.5896, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.8965977318212142, |
| "grad_norm": 0.45490314214957983, |
| "learning_rate": 3.924071656297224e-05, |
| "loss": 0.5788, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.9019346230820547, |
| "grad_norm": 0.57394261542064, |
| "learning_rate": 3.922019203292217e-05, |
| "loss": 0.572, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.9072715143428952, |
| "grad_norm": 0.5549810760866458, |
| "learning_rate": 3.9199399299044636e-05, |
| "loss": 0.604, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.9126084056037358, |
| "grad_norm": 0.47674471227279036, |
| "learning_rate": 3.9178338651487146e-05, |
| "loss": 0.5958, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.9179452968645764, |
| "grad_norm": 0.5092134083211615, |
| "learning_rate": 3.915701038413575e-05, |
| "loss": 0.5463, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.923282188125417, |
| "grad_norm": 0.5631301439234547, |
| "learning_rate": 3.913541479461095e-05, |
| "loss": 0.5829, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.9286190793862575, |
| "grad_norm": 0.46094438063306387, |
| "learning_rate": 3.9113552184263506e-05, |
| "loss": 0.5647, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.933955970647098, |
| "grad_norm": 0.5137591466008057, |
| "learning_rate": 3.9091422858170275e-05, |
| "loss": 0.571, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.9392928619079386, |
| "grad_norm": 0.5226838432471194, |
| "learning_rate": 3.906902712512994e-05, |
| "loss": 0.5626, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.9446297531687792, |
| "grad_norm": 0.5466147422388645, |
| "learning_rate": 3.904636529765872e-05, |
| "loss": 0.5726, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.9499666444296198, |
| "grad_norm": 0.5487628898316014, |
| "learning_rate": 3.902343769198592e-05, |
| "loss": 0.5629, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.9553035356904603, |
| "grad_norm": 0.5645880019209112, |
| "learning_rate": 3.900024462804968e-05, |
| "loss": 0.5379, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.9606404269513009, |
| "grad_norm": 0.5714192698614569, |
| "learning_rate": 3.897678642949234e-05, |
| "loss": 0.559, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.9659773182121414, |
| "grad_norm": 0.8028140925883746, |
| "learning_rate": 3.8953063423656055e-05, |
| "loss": 0.5528, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.971314209472982, |
| "grad_norm": 0.5192388665650057, |
| "learning_rate": 3.892907594157813e-05, |
| "loss": 0.6081, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.9766511007338226, |
| "grad_norm": 0.6005085196506503, |
| "learning_rate": 3.8904824317986475e-05, |
| "loss": 0.597, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.9819879919946631, |
| "grad_norm": 0.5300178874667031, |
| "learning_rate": 3.8880308891294894e-05, |
| "loss": 0.5569, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.9873248832555037, |
| "grad_norm": 0.507750454533361, |
| "learning_rate": 3.885553000359836e-05, |
| "loss": 0.5902, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.9926617745163442, |
| "grad_norm": 0.5220015768920386, |
| "learning_rate": 3.8830488000668276e-05, |
| "loss": 0.5907, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.9979986657771848, |
| "grad_norm": 0.4864525986978283, |
| "learning_rate": 3.8805183231947605e-05, |
| "loss": 0.5545, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.0033355570380253, |
| "grad_norm": 0.7889929121631584, |
| "learning_rate": 3.8779616050546035e-05, |
| "loss": 0.8624, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.0086724482988658, |
| "grad_norm": 0.6971636670787393, |
| "learning_rate": 3.875378681323501e-05, |
| "loss": 0.4685, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.0140093395597065, |
| "grad_norm": 0.8621305134857299, |
| "learning_rate": 3.872769588044279e-05, |
| "loss": 0.5079, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.019346230820547, |
| "grad_norm": 0.868037119002654, |
| "learning_rate": 3.8701343616249415e-05, |
| "loss": 0.4286, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.0246831220813877, |
| "grad_norm": 0.8244134271269803, |
| "learning_rate": 3.867473038838158e-05, |
| "loss": 0.5353, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.0300200133422281, |
| "grad_norm": 0.750497350373635, |
| "learning_rate": 3.864785656820758e-05, |
| "loss": 0.4478, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.0353569046030686, |
| "grad_norm": 0.7802579665937702, |
| "learning_rate": 3.862072253073207e-05, |
| "loss": 0.4809, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.0406937958639093, |
| "grad_norm": 0.6332504376170986, |
| "learning_rate": 3.859332865459082e-05, |
| "loss": 0.4659, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.0460306871247498, |
| "grad_norm": 0.5212841037738816, |
| "learning_rate": 3.856567532204551e-05, |
| "loss": 0.4419, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.0513675783855905, |
| "grad_norm": 0.5429591096952704, |
| "learning_rate": 3.853776291897831e-05, |
| "loss": 0.471, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.056704469646431, |
| "grad_norm": 0.6105400333523633, |
| "learning_rate": 3.850959183488655e-05, |
| "loss": 0.4869, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.0620413609072714, |
| "grad_norm": 1.389605521972471, |
| "learning_rate": 3.848116246287725e-05, |
| "loss": 0.4194, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.067378252168112, |
| "grad_norm": 0.6500172584353179, |
| "learning_rate": 3.845247519966167e-05, |
| "loss": 0.4742, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.0727151434289526, |
| "grad_norm": 0.6231565335876623, |
| "learning_rate": 3.842353044554973e-05, |
| "loss": 0.4883, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.0780520346897933, |
| "grad_norm": 0.5326635797858091, |
| "learning_rate": 3.839432860444447e-05, |
| "loss": 0.4684, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.0833889259506337, |
| "grad_norm": 0.5630660263478705, |
| "learning_rate": 3.836487008383638e-05, |
| "loss": 0.4974, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.0887258172114742, |
| "grad_norm": 0.5426950088640516, |
| "learning_rate": 3.8335155294797744e-05, |
| "loss": 0.4966, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.094062708472315, |
| "grad_norm": 0.46720899935844223, |
| "learning_rate": 3.8305184651976855e-05, |
| "loss": 0.4518, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.0993995997331554, |
| "grad_norm": 0.6590650012988468, |
| "learning_rate": 3.827495857359228e-05, |
| "loss": 0.5123, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.104736490993996, |
| "grad_norm": 0.4688566921090766, |
| "learning_rate": 3.824447748142701e-05, |
| "loss": 0.4665, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.1100733822548365, |
| "grad_norm": 0.6539956626751293, |
| "learning_rate": 3.821374180082256e-05, |
| "loss": 0.4836, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.115410273515677, |
| "grad_norm": 0.4735771906265032, |
| "learning_rate": 3.8182751960673024e-05, |
| "loss": 0.4854, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.1207471647765177, |
| "grad_norm": 0.5752246665082068, |
| "learning_rate": 3.815150839341915e-05, |
| "loss": 0.4598, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.1260840560373582, |
| "grad_norm": 0.5222212002375315, |
| "learning_rate": 3.812001153504221e-05, |
| "loss": 0.4436, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.1314209472981989, |
| "grad_norm": 0.4870834555429964, |
| "learning_rate": 3.8088261825058025e-05, |
| "loss": 0.468, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.1367578385590393, |
| "grad_norm": 0.5849873353511134, |
| "learning_rate": 3.8056259706510735e-05, |
| "loss": 0.4751, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.1420947298198798, |
| "grad_norm": 0.6135788225861464, |
| "learning_rate": 3.802400562596668e-05, |
| "loss": 0.4372, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.1474316210807205, |
| "grad_norm": 0.4949403107444529, |
| "learning_rate": 3.799150003350813e-05, |
| "loss": 0.4886, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.152768512341561, |
| "grad_norm": 0.4908480402002135, |
| "learning_rate": 3.795874338272705e-05, |
| "loss": 0.4244, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.1581054036024017, |
| "grad_norm": 0.5302662495518609, |
| "learning_rate": 3.79257361307187e-05, |
| "loss": 0.5117, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.1634422948632421, |
| "grad_norm": 0.47953450158842736, |
| "learning_rate": 3.7892478738075335e-05, |
| "loss": 0.4814, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.1687791861240826, |
| "grad_norm": 0.4762686857338102, |
| "learning_rate": 3.785897166887973e-05, |
| "loss": 0.4668, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.1741160773849233, |
| "grad_norm": 0.4687082531990241, |
| "learning_rate": 3.7825215390698696e-05, |
| "loss": 0.4596, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.1794529686457638, |
| "grad_norm": 0.4882003652888224, |
| "learning_rate": 3.779121037457661e-05, |
| "loss": 0.4827, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.1847898599066045, |
| "grad_norm": 0.4673722770115682, |
| "learning_rate": 3.7756957095028776e-05, |
| "loss": 0.4739, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.190126751167445, |
| "grad_norm": 0.46193789537235297, |
| "learning_rate": 3.772245603003485e-05, |
| "loss": 0.4785, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.1954636424282854, |
| "grad_norm": 0.41695599713095927, |
| "learning_rate": 3.768770766103214e-05, |
| "loss": 0.4574, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.200800533689126, |
| "grad_norm": 0.48103374543287214, |
| "learning_rate": 3.765271247290892e-05, |
| "loss": 0.4968, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.2061374249499666, |
| "grad_norm": 0.4107744420495834, |
| "learning_rate": 3.761747095399764e-05, |
| "loss": 0.4691, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.2114743162108073, |
| "grad_norm": 0.5216753656288083, |
| "learning_rate": 3.75819835960681e-05, |
| "loss": 0.4655, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.2168112074716477, |
| "grad_norm": 0.47480701486406535, |
| "learning_rate": 3.754625089432062e-05, |
| "loss": 0.4659, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.2221480987324884, |
| "grad_norm": 0.5167936671852781, |
| "learning_rate": 3.751027334737913e-05, |
| "loss": 0.4789, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.227484989993329, |
| "grad_norm": 0.5894608057326902, |
| "learning_rate": 3.747405145728416e-05, |
| "loss": 0.4857, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.2328218812541694, |
| "grad_norm": 0.4441843183602042, |
| "learning_rate": 3.743758572948591e-05, |
| "loss": 0.4711, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.23815877251501, |
| "grad_norm": 0.44680579976734436, |
| "learning_rate": 3.740087667283712e-05, |
| "loss": 0.4913, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.2434956637758505, |
| "grad_norm": 0.4767148142121902, |
| "learning_rate": 3.736392479958606e-05, |
| "loss": 0.4583, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.2488325550366912, |
| "grad_norm": 0.4360066844228599, |
| "learning_rate": 3.732673062536926e-05, |
| "loss": 0.461, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.2541694462975317, |
| "grad_norm": 0.49211626438882655, |
| "learning_rate": 3.728929466920445e-05, |
| "loss": 0.4771, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.2595063375583724, |
| "grad_norm": 0.4462865895003735, |
| "learning_rate": 3.72516174534832e-05, |
| "loss": 0.4735, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.2648432288192129, |
| "grad_norm": 0.3962897023044732, |
| "learning_rate": 3.721369950396373e-05, |
| "loss": 0.44, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.2701801200800533, |
| "grad_norm": 0.47458867881560224, |
| "learning_rate": 3.7175541349763474e-05, |
| "loss": 0.4798, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.2755170113408938, |
| "grad_norm": 0.47364034729607424, |
| "learning_rate": 3.7137143523351787e-05, |
| "loss": 0.4918, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.2808539026017345, |
| "grad_norm": 0.5140571591821321, |
| "learning_rate": 3.7098506560542464e-05, |
| "loss": 0.4755, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.2861907938625752, |
| "grad_norm": 0.4575077757654535, |
| "learning_rate": 3.705963100048627e-05, |
| "loss": 0.4618, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.2915276851234156, |
| "grad_norm": 0.4752155163431786, |
| "learning_rate": 3.702051738566343e-05, |
| "loss": 0.4805, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.2968645763842561, |
| "grad_norm": 0.44739905544161696, |
| "learning_rate": 3.698116626187603e-05, |
| "loss": 0.4553, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.3022014676450968, |
| "grad_norm": 0.4313224074744495, |
| "learning_rate": 3.694157817824046e-05, |
| "loss": 0.4586, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.3075383589059373, |
| "grad_norm": 0.4540941645048732, |
| "learning_rate": 3.6901753687179674e-05, |
| "loss": 0.4484, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.312875250166778, |
| "grad_norm": 0.4544044695491594, |
| "learning_rate": 3.686169334441554e-05, |
| "loss": 0.4662, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.3182121414276184, |
| "grad_norm": 0.4324267581370308, |
| "learning_rate": 3.6821397708961045e-05, |
| "loss": 0.4973, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.323549032688459, |
| "grad_norm": 0.5525969912230031, |
| "learning_rate": 3.678086734311256e-05, |
| "loss": 0.4824, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.3288859239492996, |
| "grad_norm": 0.48134801141058897, |
| "learning_rate": 3.67401028124419e-05, |
| "loss": 0.4953, |
| "step": 249 |
| }, |
| { |
| "epoch": 1.33422281521014, |
| "grad_norm": 0.5606903378778597, |
| "learning_rate": 3.66991046857885e-05, |
| "loss": 0.4763, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.3395597064709808, |
| "grad_norm": 0.4778001351815925, |
| "learning_rate": 3.6657873535251456e-05, |
| "loss": 0.4427, |
| "step": 251 |
| }, |
| { |
| "epoch": 1.3448965977318212, |
| "grad_norm": 0.5198221637041993, |
| "learning_rate": 3.661640993618155e-05, |
| "loss": 0.4962, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.3502334889926617, |
| "grad_norm": 0.614188855305235, |
| "learning_rate": 3.6574714467173194e-05, |
| "loss": 0.4621, |
| "step": 253 |
| }, |
| { |
| "epoch": 1.3555703802535024, |
| "grad_norm": 0.47715507181630035, |
| "learning_rate": 3.6532787710056405e-05, |
| "loss": 0.4506, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.3609072715143429, |
| "grad_norm": 0.4823822750756398, |
| "learning_rate": 3.649063024988864e-05, |
| "loss": 0.4813, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.3662441627751836, |
| "grad_norm": 0.5035590732756817, |
| "learning_rate": 3.644824267494664e-05, |
| "loss": 0.4732, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.371581054036024, |
| "grad_norm": 0.4775276009416485, |
| "learning_rate": 3.6405625576718256e-05, |
| "loss": 0.502, |
| "step": 257 |
| }, |
| { |
| "epoch": 1.3769179452968645, |
| "grad_norm": 0.4616087736602264, |
| "learning_rate": 3.6362779549894155e-05, |
| "loss": 0.4687, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.3822548365577052, |
| "grad_norm": 0.5051090242818488, |
| "learning_rate": 3.631970519235954e-05, |
| "loss": 0.4527, |
| "step": 259 |
| }, |
| { |
| "epoch": 1.3875917278185457, |
| "grad_norm": 0.46170933564301747, |
| "learning_rate": 3.62764031051858e-05, |
| "loss": 0.4531, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.3929286190793864, |
| "grad_norm": 0.4597839229598545, |
| "learning_rate": 3.623287389262211e-05, |
| "loss": 0.4428, |
| "step": 261 |
| }, |
| { |
| "epoch": 1.3982655103402268, |
| "grad_norm": 0.47396042868358945, |
| "learning_rate": 3.618911816208707e-05, |
| "loss": 0.4748, |
| "step": 262 |
| }, |
| { |
| "epoch": 1.4036024016010673, |
| "grad_norm": 0.46865196681556354, |
| "learning_rate": 3.614513652416011e-05, |
| "loss": 0.4555, |
| "step": 263 |
| }, |
| { |
| "epoch": 1.408939292861908, |
| "grad_norm": 0.4941893702835204, |
| "learning_rate": 3.610092959257306e-05, |
| "loss": 0.4475, |
| "step": 264 |
| }, |
| { |
| "epoch": 1.4142761841227485, |
| "grad_norm": 0.45053813237543944, |
| "learning_rate": 3.6056497984201566e-05, |
| "loss": 0.5037, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.4196130753835892, |
| "grad_norm": 0.4602967972562359, |
| "learning_rate": 3.601184231905647e-05, |
| "loss": 0.4625, |
| "step": 266 |
| }, |
| { |
| "epoch": 1.4249499666444296, |
| "grad_norm": 0.40312077116267125, |
| "learning_rate": 3.5966963220275155e-05, |
| "loss": 0.4322, |
| "step": 267 |
| }, |
| { |
| "epoch": 1.43028685790527, |
| "grad_norm": 0.43605945616048775, |
| "learning_rate": 3.592186131411288e-05, |
| "loss": 0.4758, |
| "step": 268 |
| }, |
| { |
| "epoch": 1.4356237491661108, |
| "grad_norm": 0.4490964338856413, |
| "learning_rate": 3.5876537229933994e-05, |
| "loss": 0.4606, |
| "step": 269 |
| }, |
| { |
| "epoch": 1.4409606404269513, |
| "grad_norm": 0.4488693733839842, |
| "learning_rate": 3.583099160020319e-05, |
| "loss": 0.5358, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.446297531687792, |
| "grad_norm": 0.4399709509429179, |
| "learning_rate": 3.578522506047667e-05, |
| "loss": 0.4585, |
| "step": 271 |
| }, |
| { |
| "epoch": 1.4516344229486324, |
| "grad_norm": 0.48797858146525214, |
| "learning_rate": 3.573923824939327e-05, |
| "loss": 0.4934, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.456971314209473, |
| "grad_norm": 0.43113845680671775, |
| "learning_rate": 3.5693031808665563e-05, |
| "loss": 0.4624, |
| "step": 273 |
| }, |
| { |
| "epoch": 1.4623082054703136, |
| "grad_norm": 0.4622009600777627, |
| "learning_rate": 3.564660638307088e-05, |
| "loss": 0.4418, |
| "step": 274 |
| }, |
| { |
| "epoch": 1.467645096731154, |
| "grad_norm": 0.5575580830867934, |
| "learning_rate": 3.5599962620442344e-05, |
| "loss": 0.507, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.4729819879919948, |
| "grad_norm": 0.4878453026449952, |
| "learning_rate": 3.555310117165979e-05, |
| "loss": 0.4176, |
| "step": 276 |
| }, |
| { |
| "epoch": 1.4783188792528352, |
| "grad_norm": 0.5728589696523062, |
| "learning_rate": 3.550602269064073e-05, |
| "loss": 0.5278, |
| "step": 277 |
| }, |
| { |
| "epoch": 1.4836557705136757, |
| "grad_norm": 0.48279056904217577, |
| "learning_rate": 3.545872783433118e-05, |
| "loss": 0.4131, |
| "step": 278 |
| }, |
| { |
| "epoch": 1.4889926617745164, |
| "grad_norm": 0.5888361010983522, |
| "learning_rate": 3.541121726269654e-05, |
| "loss": 0.4494, |
| "step": 279 |
| }, |
| { |
| "epoch": 1.4943295530353569, |
| "grad_norm": 0.5243711854925368, |
| "learning_rate": 3.5363491638712326e-05, |
| "loss": 0.4546, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.4996664442961976, |
| "grad_norm": 0.565934472490769, |
| "learning_rate": 3.531555162835501e-05, |
| "loss": 0.4774, |
| "step": 281 |
| }, |
| { |
| "epoch": 1.505003335557038, |
| "grad_norm": 0.46628567085954914, |
| "learning_rate": 3.52673979005926e-05, |
| "loss": 0.4572, |
| "step": 282 |
| }, |
| { |
| "epoch": 1.5103402268178785, |
| "grad_norm": 0.5254588931292046, |
| "learning_rate": 3.521903112737544e-05, |
| "loss": 0.5014, |
| "step": 283 |
| }, |
| { |
| "epoch": 1.5156771180787192, |
| "grad_norm": 0.4149936994490417, |
| "learning_rate": 3.517045198362672e-05, |
| "loss": 0.4611, |
| "step": 284 |
| }, |
| { |
| "epoch": 1.5210140093395597, |
| "grad_norm": 0.4142527610245841, |
| "learning_rate": 3.512166114723314e-05, |
| "loss": 0.4378, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.5263509006004004, |
| "grad_norm": 0.4952367594365313, |
| "learning_rate": 3.507265929903539e-05, |
| "loss": 0.5056, |
| "step": 286 |
| }, |
| { |
| "epoch": 1.5316877918612408, |
| "grad_norm": 0.49022347663893007, |
| "learning_rate": 3.5023447122818696e-05, |
| "loss": 0.4144, |
| "step": 287 |
| }, |
| { |
| "epoch": 1.5370246831220813, |
| "grad_norm": 0.5284452745454441, |
| "learning_rate": 3.497402530530326e-05, |
| "loss": 0.4864, |
| "step": 288 |
| }, |
| { |
| "epoch": 1.542361574382922, |
| "grad_norm": 0.6563737965515248, |
| "learning_rate": 3.492439453613466e-05, |
| "loss": 0.4772, |
| "step": 289 |
| }, |
| { |
| "epoch": 1.5476984656437625, |
| "grad_norm": 0.4605849913602888, |
| "learning_rate": 3.487455550787426e-05, |
| "loss": 0.4519, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.5530353569046031, |
| "grad_norm": 0.5170611719530955, |
| "learning_rate": 3.482450891598951e-05, |
| "loss": 0.4967, |
| "step": 291 |
| }, |
| { |
| "epoch": 1.5583722481654436, |
| "grad_norm": 0.46268930563862615, |
| "learning_rate": 3.4774255458844273e-05, |
| "loss": 0.4515, |
| "step": 292 |
| }, |
| { |
| "epoch": 1.563709139426284, |
| "grad_norm": 0.4177933995256324, |
| "learning_rate": 3.472379583768906e-05, |
| "loss": 0.4557, |
| "step": 293 |
| }, |
| { |
| "epoch": 1.5690460306871248, |
| "grad_norm": 0.4790527598758891, |
| "learning_rate": 3.4673130756651266e-05, |
| "loss": 0.4557, |
| "step": 294 |
| }, |
| { |
| "epoch": 1.5743829219479655, |
| "grad_norm": 0.38274726550797006, |
| "learning_rate": 3.4622260922725315e-05, |
| "loss": 0.4655, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.579719813208806, |
| "grad_norm": 0.5046386994690183, |
| "learning_rate": 3.457118704576281e-05, |
| "loss": 0.5072, |
| "step": 296 |
| }, |
| { |
| "epoch": 1.5850567044696464, |
| "grad_norm": 0.3438922118300985, |
| "learning_rate": 3.451990983846262e-05, |
| "loss": 0.4092, |
| "step": 297 |
| }, |
| { |
| "epoch": 1.5903935957304869, |
| "grad_norm": 0.47843066059231854, |
| "learning_rate": 3.4468430016360955e-05, |
| "loss": 0.4719, |
| "step": 298 |
| }, |
| { |
| "epoch": 1.5957304869913276, |
| "grad_norm": 0.41900316347012834, |
| "learning_rate": 3.4416748297821375e-05, |
| "loss": 0.4697, |
| "step": 299 |
| }, |
| { |
| "epoch": 1.6010673782521683, |
| "grad_norm": 0.4504191244471495, |
| "learning_rate": 3.4364865404024725e-05, |
| "loss": 0.4716, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.6064042695130087, |
| "grad_norm": 0.40206430223864725, |
| "learning_rate": 3.4312782058959136e-05, |
| "loss": 0.4693, |
| "step": 301 |
| }, |
| { |
| "epoch": 1.6117411607738492, |
| "grad_norm": 0.4131717908261127, |
| "learning_rate": 3.426049898940988e-05, |
| "loss": 0.4326, |
| "step": 302 |
| }, |
| { |
| "epoch": 1.6170780520346897, |
| "grad_norm": 0.4192801860140271, |
| "learning_rate": 3.420801692494923e-05, |
| "loss": 0.4816, |
| "step": 303 |
| }, |
| { |
| "epoch": 1.6224149432955304, |
| "grad_norm": 0.45720379651134796, |
| "learning_rate": 3.415533659792631e-05, |
| "loss": 0.4762, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.627751834556371, |
| "grad_norm": 0.4322890724213698, |
| "learning_rate": 3.4102458743456836e-05, |
| "loss": 0.4956, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.6330887258172115, |
| "grad_norm": 0.3701032350873174, |
| "learning_rate": 3.404938409941288e-05, |
| "loss": 0.4258, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.638425617078052, |
| "grad_norm": 0.4418583797952319, |
| "learning_rate": 3.3996113406412575e-05, |
| "loss": 0.4635, |
| "step": 307 |
| }, |
| { |
| "epoch": 1.6437625083388925, |
| "grad_norm": 0.5119651096884537, |
| "learning_rate": 3.394264740780977e-05, |
| "loss": 0.4565, |
| "step": 308 |
| }, |
| { |
| "epoch": 1.6490993995997332, |
| "grad_norm": 0.439255187163439, |
| "learning_rate": 3.388898684968367e-05, |
| "loss": 0.4244, |
| "step": 309 |
| }, |
| { |
| "epoch": 1.6544362908605739, |
| "grad_norm": 0.6314072128471104, |
| "learning_rate": 3.3835132480828395e-05, |
| "loss": 0.4979, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.6597731821214143, |
| "grad_norm": 0.45356149569718207, |
| "learning_rate": 3.3781085052742587e-05, |
| "loss": 0.4659, |
| "step": 311 |
| }, |
| { |
| "epoch": 1.6651100733822548, |
| "grad_norm": 0.6595936619453585, |
| "learning_rate": 3.372684531961885e-05, |
| "loss": 0.4715, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.6704469646430953, |
| "grad_norm": 0.40146590046819736, |
| "learning_rate": 3.3672414038333294e-05, |
| "loss": 0.442, |
| "step": 313 |
| }, |
| { |
| "epoch": 1.675783855903936, |
| "grad_norm": 0.5439115291324319, |
| "learning_rate": 3.361779196843495e-05, |
| "loss": 0.4642, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.6811207471647767, |
| "grad_norm": 0.4925416574124429, |
| "learning_rate": 3.356297987213514e-05, |
| "loss": 0.4799, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.6864576384256171, |
| "grad_norm": 0.41997671654731134, |
| "learning_rate": 3.350797851429688e-05, |
| "loss": 0.4485, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.6917945296864576, |
| "grad_norm": 0.5655372175649206, |
| "learning_rate": 3.345278866242419e-05, |
| "loss": 0.4933, |
| "step": 317 |
| }, |
| { |
| "epoch": 1.697131420947298, |
| "grad_norm": 0.45516429030940203, |
| "learning_rate": 3.339741108665139e-05, |
| "loss": 0.4693, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.7024683122081388, |
| "grad_norm": 0.5182373828396348, |
| "learning_rate": 3.334184655973236e-05, |
| "loss": 0.4318, |
| "step": 319 |
| }, |
| { |
| "epoch": 1.7078052034689795, |
| "grad_norm": 0.5043642496605346, |
| "learning_rate": 3.3286095857029724e-05, |
| "loss": 0.5043, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.71314209472982, |
| "grad_norm": 0.5143484476959619, |
| "learning_rate": 3.3230159756504065e-05, |
| "loss": 0.4523, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.7184789859906604, |
| "grad_norm": 0.4004155764578505, |
| "learning_rate": 3.317403903870308e-05, |
| "loss": 0.4542, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.7238158772515009, |
| "grad_norm": 0.43510446375283907, |
| "learning_rate": 3.311773448675063e-05, |
| "loss": 0.4591, |
| "step": 323 |
| }, |
| { |
| "epoch": 1.7291527685123416, |
| "grad_norm": 0.40735475539646576, |
| "learning_rate": 3.3061246886335866e-05, |
| "loss": 0.4767, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.7344896597731823, |
| "grad_norm": 0.5027103748999582, |
| "learning_rate": 3.300457702570225e-05, |
| "loss": 0.42, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.7398265510340227, |
| "grad_norm": 0.39562071018506284, |
| "learning_rate": 3.294772569563656e-05, |
| "loss": 0.5089, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.7451634422948632, |
| "grad_norm": 0.45829391562540117, |
| "learning_rate": 3.2890693689457817e-05, |
| "loss": 0.4785, |
| "step": 327 |
| }, |
| { |
| "epoch": 1.7505003335557037, |
| "grad_norm": 0.3733919681858633, |
| "learning_rate": 3.283348180300627e-05, |
| "loss": 0.4503, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.7558372248165444, |
| "grad_norm": 0.4599706292196285, |
| "learning_rate": 3.277609083463228e-05, |
| "loss": 0.4637, |
| "step": 329 |
| }, |
| { |
| "epoch": 1.761174116077385, |
| "grad_norm": 0.4095854985895356, |
| "learning_rate": 3.271852158518514e-05, |
| "loss": 0.4707, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.7665110073382255, |
| "grad_norm": 0.4230424744489382, |
| "learning_rate": 3.266077485800192e-05, |
| "loss": 0.4611, |
| "step": 331 |
| }, |
| { |
| "epoch": 1.771847898599066, |
| "grad_norm": 0.40727774451181376, |
| "learning_rate": 3.26028514588963e-05, |
| "loss": 0.4899, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.7771847898599065, |
| "grad_norm": 0.4257028861000557, |
| "learning_rate": 3.2544752196147266e-05, |
| "loss": 0.4759, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.7825216811207472, |
| "grad_norm": 0.4044619820594187, |
| "learning_rate": 3.248647788048784e-05, |
| "loss": 0.4589, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.7878585723815879, |
| "grad_norm": 0.41896755044660233, |
| "learning_rate": 3.2428029325093794e-05, |
| "loss": 0.457, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.7931954636424283, |
| "grad_norm": 0.4226741156494024, |
| "learning_rate": 3.23694073455723e-05, |
| "loss": 0.468, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.7985323549032688, |
| "grad_norm": 0.3803247543345477, |
| "learning_rate": 3.2310612759950535e-05, |
| "loss": 0.4548, |
| "step": 337 |
| }, |
| { |
| "epoch": 1.8038692461641093, |
| "grad_norm": 0.4456607835139093, |
| "learning_rate": 3.225164638866424e-05, |
| "loss": 0.4808, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.80920613742495, |
| "grad_norm": 0.43894216697827915, |
| "learning_rate": 3.219250905454633e-05, |
| "loss": 0.4538, |
| "step": 339 |
| }, |
| { |
| "epoch": 1.8145430286857906, |
| "grad_norm": 0.3622459739187082, |
| "learning_rate": 3.213320158281538e-05, |
| "loss": 0.429, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.8198799199466311, |
| "grad_norm": 0.45236032311104785, |
| "learning_rate": 3.207372480106409e-05, |
| "loss": 0.4955, |
| "step": 341 |
| }, |
| { |
| "epoch": 1.8252168112074716, |
| "grad_norm": 0.40834732197197826, |
| "learning_rate": 3.201407953924779e-05, |
| "loss": 0.4419, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.830553702468312, |
| "grad_norm": 0.4934429140575739, |
| "learning_rate": 3.195426662967281e-05, |
| "loss": 0.5097, |
| "step": 343 |
| }, |
| { |
| "epoch": 1.8358905937291528, |
| "grad_norm": 0.40870574603025267, |
| "learning_rate": 3.189428690698487e-05, |
| "loss": 0.4398, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.8412274849899934, |
| "grad_norm": 0.459285280694887, |
| "learning_rate": 3.183414120815747e-05, |
| "loss": 0.4808, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.846564376250834, |
| "grad_norm": 0.3783731629311537, |
| "learning_rate": 3.177383037248018e-05, |
| "loss": 0.4393, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.8519012675116744, |
| "grad_norm": 0.43203892549748285, |
| "learning_rate": 3.171335524154691e-05, |
| "loss": 0.4496, |
| "step": 347 |
| }, |
| { |
| "epoch": 1.8572381587725149, |
| "grad_norm": 0.5083672035817517, |
| "learning_rate": 3.165271665924424e-05, |
| "loss": 0.4537, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.8625750500333556, |
| "grad_norm": 0.470971515949764, |
| "learning_rate": 3.159191547173955e-05, |
| "loss": 0.4534, |
| "step": 349 |
| }, |
| { |
| "epoch": 1.8679119412941962, |
| "grad_norm": 0.5043411580642327, |
| "learning_rate": 3.153095252746928e-05, |
| "loss": 0.455, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.8732488325550367, |
| "grad_norm": 0.5422278049069252, |
| "learning_rate": 3.146982867712706e-05, |
| "loss": 0.4976, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.8785857238158772, |
| "grad_norm": 0.4604934576130623, |
| "learning_rate": 3.140854477365185e-05, |
| "loss": 0.4338, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.8839226150767177, |
| "grad_norm": 0.5227311072550253, |
| "learning_rate": 3.134710167221604e-05, |
| "loss": 0.4867, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.8892595063375583, |
| "grad_norm": 0.5016387520943354, |
| "learning_rate": 3.12855002302135e-05, |
| "loss": 0.4463, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.894596397598399, |
| "grad_norm": 0.48681527381444745, |
| "learning_rate": 3.122374130724765e-05, |
| "loss": 0.4878, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.8999332888592395, |
| "grad_norm": 0.4753924890233577, |
| "learning_rate": 3.116182576511941e-05, |
| "loss": 0.4614, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.90527018012008, |
| "grad_norm": 0.40839824511015216, |
| "learning_rate": 3.1099754467815244e-05, |
| "loss": 0.4551, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.9106070713809205, |
| "grad_norm": 0.5233984189857435, |
| "learning_rate": 3.103752828149502e-05, |
| "loss": 0.4852, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.9159439626417611, |
| "grad_norm": 0.44864050338135375, |
| "learning_rate": 3.0975148074480026e-05, |
| "loss": 0.4786, |
| "step": 359 |
| }, |
| { |
| "epoch": 1.9212808539026018, |
| "grad_norm": 0.43437456170806327, |
| "learning_rate": 3.0912614717240745e-05, |
| "loss": 0.464, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.9266177451634423, |
| "grad_norm": 0.399850990382915, |
| "learning_rate": 3.08499290823848e-05, |
| "loss": 0.4438, |
| "step": 361 |
| }, |
| { |
| "epoch": 1.9319546364242828, |
| "grad_norm": 0.43768868935382793, |
| "learning_rate": 3.07870920446447e-05, |
| "loss": 0.4603, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.9372915276851232, |
| "grad_norm": 0.5552601097943353, |
| "learning_rate": 3.072410448086572e-05, |
| "loss": 0.4762, |
| "step": 363 |
| }, |
| { |
| "epoch": 1.942628418945964, |
| "grad_norm": 0.40557866679728677, |
| "learning_rate": 3.066096726999357e-05, |
| "loss": 0.4607, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.9479653102068046, |
| "grad_norm": 0.5268701709396402, |
| "learning_rate": 3.0597681293062187e-05, |
| "loss": 0.4389, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.953302201467645, |
| "grad_norm": 0.4027710666772095, |
| "learning_rate": 3.053424743318146e-05, |
| "loss": 0.4517, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.9586390927284856, |
| "grad_norm": 0.44216756582628347, |
| "learning_rate": 3.047066657552484e-05, |
| "loss": 0.4616, |
| "step": 367 |
| }, |
| { |
| "epoch": 1.9639759839893263, |
| "grad_norm": 0.4117419874422281, |
| "learning_rate": 3.040693960731704e-05, |
| "loss": 0.4517, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.9693128752501667, |
| "grad_norm": 0.4044754379711903, |
| "learning_rate": 3.034306741782166e-05, |
| "loss": 0.4719, |
| "step": 369 |
| }, |
| { |
| "epoch": 1.9746497665110074, |
| "grad_norm": 0.3728722340841331, |
| "learning_rate": 3.0279050898328716e-05, |
| "loss": 0.4168, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.979986657771848, |
| "grad_norm": 0.4162488452438543, |
| "learning_rate": 3.021489094214228e-05, |
| "loss": 0.4688, |
| "step": 371 |
| }, |
| { |
| "epoch": 1.9853235490326884, |
| "grad_norm": 0.43351004658705333, |
| "learning_rate": 3.0150588444567962e-05, |
| "loss": 0.4519, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.990660440293529, |
| "grad_norm": 0.3852293267610788, |
| "learning_rate": 3.0086144302900425e-05, |
| "loss": 0.4819, |
| "step": 373 |
| }, |
| { |
| "epoch": 1.9959973315543695, |
| "grad_norm": 0.36569663935905455, |
| "learning_rate": 3.002155941641091e-05, |
| "loss": 0.4535, |
| "step": 374 |
| }, |
| { |
| "epoch": 2.0013342228152102, |
| "grad_norm": 0.608016827751094, |
| "learning_rate": 2.99568346863346e-05, |
| "loss": 0.7085, |
| "step": 375 |
| }, |
| { |
| "epoch": 2.0066711140760507, |
| "grad_norm": 0.6591571174677568, |
| "learning_rate": 2.989197101585813e-05, |
| "loss": 0.3487, |
| "step": 376 |
| }, |
| { |
| "epoch": 2.012008005336891, |
| "grad_norm": 0.6684266661006703, |
| "learning_rate": 2.9826969310106927e-05, |
| "loss": 0.3407, |
| "step": 377 |
| }, |
| { |
| "epoch": 2.0173448965977316, |
| "grad_norm": 0.6399522965663798, |
| "learning_rate": 2.976183047613262e-05, |
| "loss": 0.3667, |
| "step": 378 |
| }, |
| { |
| "epoch": 2.0226817878585726, |
| "grad_norm": 0.5520199671519408, |
| "learning_rate": 2.9696555422900352e-05, |
| "loss": 0.3039, |
| "step": 379 |
| }, |
| { |
| "epoch": 2.028018679119413, |
| "grad_norm": 0.5699997898314517, |
| "learning_rate": 2.9631145061276093e-05, |
| "loss": 0.3339, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.0333555703802535, |
| "grad_norm": 0.6020462454607703, |
| "learning_rate": 2.956560030401397e-05, |
| "loss": 0.3557, |
| "step": 381 |
| }, |
| { |
| "epoch": 2.038692461641094, |
| "grad_norm": 0.5721714900378834, |
| "learning_rate": 2.949992206574348e-05, |
| "loss": 0.3475, |
| "step": 382 |
| }, |
| { |
| "epoch": 2.0440293529019344, |
| "grad_norm": 0.49943187780464754, |
| "learning_rate": 2.9434111262956767e-05, |
| "loss": 0.3062, |
| "step": 383 |
| }, |
| { |
| "epoch": 2.0493662441627754, |
| "grad_norm": 0.6557048215847663, |
| "learning_rate": 2.9368168813995806e-05, |
| "loss": 0.3715, |
| "step": 384 |
| }, |
| { |
| "epoch": 2.054703135423616, |
| "grad_norm": 0.5041985226572085, |
| "learning_rate": 2.9302095639039607e-05, |
| "loss": 0.3342, |
| "step": 385 |
| }, |
| { |
| "epoch": 2.0600400266844563, |
| "grad_norm": 0.5594849349537587, |
| "learning_rate": 2.923589266009136e-05, |
| "loss": 0.3116, |
| "step": 386 |
| }, |
| { |
| "epoch": 2.0653769179452968, |
| "grad_norm": 0.4290186723298561, |
| "learning_rate": 2.9169560800965583e-05, |
| "loss": 0.3593, |
| "step": 387 |
| }, |
| { |
| "epoch": 2.0707138092061372, |
| "grad_norm": 0.526856097744535, |
| "learning_rate": 2.910310098727521e-05, |
| "loss": 0.3534, |
| "step": 388 |
| }, |
| { |
| "epoch": 2.076050700466978, |
| "grad_norm": 0.4372849060681286, |
| "learning_rate": 2.9036514146418705e-05, |
| "loss": 0.3224, |
| "step": 389 |
| }, |
| { |
| "epoch": 2.0813875917278186, |
| "grad_norm": 0.4365535359116469, |
| "learning_rate": 2.896980120756709e-05, |
| "loss": 0.3381, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.086724482988659, |
| "grad_norm": 0.4371195260504785, |
| "learning_rate": 2.8902963101651004e-05, |
| "loss": 0.3337, |
| "step": 391 |
| }, |
| { |
| "epoch": 2.0920613742494996, |
| "grad_norm": 0.4119069627388638, |
| "learning_rate": 2.883600076134768e-05, |
| "loss": 0.3396, |
| "step": 392 |
| }, |
| { |
| "epoch": 2.09739826551034, |
| "grad_norm": 0.4389727835717101, |
| "learning_rate": 2.8768915121067987e-05, |
| "loss": 0.3544, |
| "step": 393 |
| }, |
| { |
| "epoch": 2.102735156771181, |
| "grad_norm": 0.39839899451373395, |
| "learning_rate": 2.870170711694333e-05, |
| "loss": 0.3258, |
| "step": 394 |
| }, |
| { |
| "epoch": 2.1080720480320214, |
| "grad_norm": 0.42893629704061353, |
| "learning_rate": 2.8634377686812608e-05, |
| "loss": 0.3257, |
| "step": 395 |
| }, |
| { |
| "epoch": 2.113408939292862, |
| "grad_norm": 0.41063083754083984, |
| "learning_rate": 2.8566927770209153e-05, |
| "loss": 0.335, |
| "step": 396 |
| }, |
| { |
| "epoch": 2.1187458305537024, |
| "grad_norm": 0.48368270464573715, |
| "learning_rate": 2.8499358308347595e-05, |
| "loss": 0.3707, |
| "step": 397 |
| }, |
| { |
| "epoch": 2.124082721814543, |
| "grad_norm": 0.3961965622304182, |
| "learning_rate": 2.843167024411071e-05, |
| "loss": 0.3395, |
| "step": 398 |
| }, |
| { |
| "epoch": 2.1294196130753837, |
| "grad_norm": 0.4082398648072262, |
| "learning_rate": 2.8363864522036298e-05, |
| "loss": 0.3116, |
| "step": 399 |
| }, |
| { |
| "epoch": 2.134756504336224, |
| "grad_norm": 0.39241622775035234, |
| "learning_rate": 2.8295942088304004e-05, |
| "loss": 0.3331, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.1400933955970647, |
| "grad_norm": 0.43575155152898126, |
| "learning_rate": 2.822790389072207e-05, |
| "loss": 0.3516, |
| "step": 401 |
| }, |
| { |
| "epoch": 2.145430286857905, |
| "grad_norm": 0.34607848217775505, |
| "learning_rate": 2.815975087871416e-05, |
| "loss": 0.2954, |
| "step": 402 |
| }, |
| { |
| "epoch": 2.1507671781187456, |
| "grad_norm": 0.40910730417092234, |
| "learning_rate": 2.8091484003306074e-05, |
| "loss": 0.3335, |
| "step": 403 |
| }, |
| { |
| "epoch": 2.1561040693795865, |
| "grad_norm": 0.3834996888062665, |
| "learning_rate": 2.802310421711252e-05, |
| "loss": 0.3293, |
| "step": 404 |
| }, |
| { |
| "epoch": 2.161440960640427, |
| "grad_norm": 0.39804942929539155, |
| "learning_rate": 2.7954612474323754e-05, |
| "loss": 0.35, |
| "step": 405 |
| }, |
| { |
| "epoch": 2.1667778519012675, |
| "grad_norm": 0.40500969458813774, |
| "learning_rate": 2.788600973069234e-05, |
| "loss": 0.3375, |
| "step": 406 |
| }, |
| { |
| "epoch": 2.172114743162108, |
| "grad_norm": 0.38166833304508063, |
| "learning_rate": 2.781729694351976e-05, |
| "loss": 0.3614, |
| "step": 407 |
| }, |
| { |
| "epoch": 2.1774516344229484, |
| "grad_norm": 0.39321975173629375, |
| "learning_rate": 2.7748475071643085e-05, |
| "loss": 0.3361, |
| "step": 408 |
| }, |
| { |
| "epoch": 2.1827885256837893, |
| "grad_norm": 0.4264354527637513, |
| "learning_rate": 2.7679545075421573e-05, |
| "loss": 0.3403, |
| "step": 409 |
| }, |
| { |
| "epoch": 2.18812541694463, |
| "grad_norm": 0.4009398961332454, |
| "learning_rate": 2.7610507916723283e-05, |
| "loss": 0.3494, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.1934623082054703, |
| "grad_norm": 0.44735512944431943, |
| "learning_rate": 2.754136455891165e-05, |
| "loss": 0.3381, |
| "step": 411 |
| }, |
| { |
| "epoch": 2.1987991994663107, |
| "grad_norm": 0.40873569246310604, |
| "learning_rate": 2.7472115966832044e-05, |
| "loss": 0.32, |
| "step": 412 |
| }, |
| { |
| "epoch": 2.204136090727151, |
| "grad_norm": 0.451038278060695, |
| "learning_rate": 2.7402763106798295e-05, |
| "loss": 0.3537, |
| "step": 413 |
| }, |
| { |
| "epoch": 2.209472981987992, |
| "grad_norm": 0.43597576256513326, |
| "learning_rate": 2.733330694657921e-05, |
| "loss": 0.3341, |
| "step": 414 |
| }, |
| { |
| "epoch": 2.2148098732488326, |
| "grad_norm": 0.4216053478782035, |
| "learning_rate": 2.7263748455385098e-05, |
| "loss": 0.3496, |
| "step": 415 |
| }, |
| { |
| "epoch": 2.220146764509673, |
| "grad_norm": 0.4741703909383378, |
| "learning_rate": 2.719408860385421e-05, |
| "loss": 0.3387, |
| "step": 416 |
| }, |
| { |
| "epoch": 2.2254836557705135, |
| "grad_norm": 0.396307019176774, |
| "learning_rate": 2.7124328364039203e-05, |
| "loss": 0.3447, |
| "step": 417 |
| }, |
| { |
| "epoch": 2.230820547031354, |
| "grad_norm": 0.44800881081287386, |
| "learning_rate": 2.7054468709393575e-05, |
| "loss": 0.3513, |
| "step": 418 |
| }, |
| { |
| "epoch": 2.236157438292195, |
| "grad_norm": 0.3929341967270722, |
| "learning_rate": 2.6984510614758112e-05, |
| "loss": 0.3298, |
| "step": 419 |
| }, |
| { |
| "epoch": 2.2414943295530354, |
| "grad_norm": 0.4067007662384614, |
| "learning_rate": 2.6914455056347225e-05, |
| "loss": 0.3325, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.246831220813876, |
| "grad_norm": 0.43892260225513113, |
| "learning_rate": 2.6844303011735385e-05, |
| "loss": 0.3449, |
| "step": 421 |
| }, |
| { |
| "epoch": 2.2521681120747163, |
| "grad_norm": 0.41729138409748806, |
| "learning_rate": 2.677405545984344e-05, |
| "loss": 0.3414, |
| "step": 422 |
| }, |
| { |
| "epoch": 2.257505003335557, |
| "grad_norm": 0.38213817747852563, |
| "learning_rate": 2.6703713380924993e-05, |
| "loss": 0.343, |
| "step": 423 |
| }, |
| { |
| "epoch": 2.2628418945963977, |
| "grad_norm": 0.38560789077218444, |
| "learning_rate": 2.6633277756552683e-05, |
| "loss": 0.3193, |
| "step": 424 |
| }, |
| { |
| "epoch": 2.268178785857238, |
| "grad_norm": 0.40286245157177797, |
| "learning_rate": 2.6562749569604527e-05, |
| "loss": 0.3133, |
| "step": 425 |
| }, |
| { |
| "epoch": 2.2735156771180787, |
| "grad_norm": 0.41596719722020303, |
| "learning_rate": 2.6492129804250173e-05, |
| "loss": 0.3523, |
| "step": 426 |
| }, |
| { |
| "epoch": 2.278852568378919, |
| "grad_norm": 0.4153442559638167, |
| "learning_rate": 2.642141944593718e-05, |
| "loss": 0.3541, |
| "step": 427 |
| }, |
| { |
| "epoch": 2.2841894596397596, |
| "grad_norm": 0.40946075262767867, |
| "learning_rate": 2.635061948137727e-05, |
| "loss": 0.3321, |
| "step": 428 |
| }, |
| { |
| "epoch": 2.2895263509006005, |
| "grad_norm": 0.3585516295174825, |
| "learning_rate": 2.6279730898532548e-05, |
| "loss": 0.3568, |
| "step": 429 |
| }, |
| { |
| "epoch": 2.294863242161441, |
| "grad_norm": 0.4126272108188889, |
| "learning_rate": 2.6208754686601735e-05, |
| "loss": 0.3522, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.3002001334222815, |
| "grad_norm": 0.339353916561967, |
| "learning_rate": 2.613769183600634e-05, |
| "loss": 0.2944, |
| "step": 431 |
| }, |
| { |
| "epoch": 2.305537024683122, |
| "grad_norm": 0.4282861919791616, |
| "learning_rate": 2.6066543338376865e-05, |
| "loss": 0.3318, |
| "step": 432 |
| }, |
| { |
| "epoch": 2.3108739159439624, |
| "grad_norm": 0.35897497473677303, |
| "learning_rate": 2.599531018653893e-05, |
| "loss": 0.3378, |
| "step": 433 |
| }, |
| { |
| "epoch": 2.3162108072048033, |
| "grad_norm": 0.47411097531132296, |
| "learning_rate": 2.5923993374499475e-05, |
| "loss": 0.3662, |
| "step": 434 |
| }, |
| { |
| "epoch": 2.321547698465644, |
| "grad_norm": 0.3568620059406491, |
| "learning_rate": 2.585259389743284e-05, |
| "loss": 0.3143, |
| "step": 435 |
| }, |
| { |
| "epoch": 2.3268845897264843, |
| "grad_norm": 0.4473037089763921, |
| "learning_rate": 2.5781112751666886e-05, |
| "loss": 0.3753, |
| "step": 436 |
| }, |
| { |
| "epoch": 2.3322214809873247, |
| "grad_norm": 0.3799705626691493, |
| "learning_rate": 2.5709550934669123e-05, |
| "loss": 0.3361, |
| "step": 437 |
| }, |
| { |
| "epoch": 2.337558372248165, |
| "grad_norm": 0.39700940455368583, |
| "learning_rate": 2.5637909445032752e-05, |
| "loss": 0.3174, |
| "step": 438 |
| }, |
| { |
| "epoch": 2.342895263509006, |
| "grad_norm": 0.4519495883374496, |
| "learning_rate": 2.5566189282462766e-05, |
| "loss": 0.356, |
| "step": 439 |
| }, |
| { |
| "epoch": 2.3482321547698466, |
| "grad_norm": 0.3870700641532921, |
| "learning_rate": 2.549439144776195e-05, |
| "loss": 0.3332, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.353569046030687, |
| "grad_norm": 0.4068180001517662, |
| "learning_rate": 2.542251694281699e-05, |
| "loss": 0.3342, |
| "step": 441 |
| }, |
| { |
| "epoch": 2.3589059372915275, |
| "grad_norm": 0.3931784325165116, |
| "learning_rate": 2.5350566770584423e-05, |
| "loss": 0.3316, |
| "step": 442 |
| }, |
| { |
| "epoch": 2.364242828552368, |
| "grad_norm": 0.44083065881755534, |
| "learning_rate": 2.5278541935076656e-05, |
| "loss": 0.3493, |
| "step": 443 |
| }, |
| { |
| "epoch": 2.369579719813209, |
| "grad_norm": 0.41781820434477857, |
| "learning_rate": 2.5206443441347995e-05, |
| "loss": 0.3334, |
| "step": 444 |
| }, |
| { |
| "epoch": 2.3749166110740494, |
| "grad_norm": 0.4211075137727358, |
| "learning_rate": 2.5134272295480587e-05, |
| "loss": 0.353, |
| "step": 445 |
| }, |
| { |
| "epoch": 2.38025350233489, |
| "grad_norm": 0.4276719699292506, |
| "learning_rate": 2.506202950457038e-05, |
| "loss": 0.3194, |
| "step": 446 |
| }, |
| { |
| "epoch": 2.3855903935957303, |
| "grad_norm": 0.3891436905777496, |
| "learning_rate": 2.4989716076713063e-05, |
| "loss": 0.3298, |
| "step": 447 |
| }, |
| { |
| "epoch": 2.390927284856571, |
| "grad_norm": 0.4136388035611674, |
| "learning_rate": 2.4917333020990045e-05, |
| "loss": 0.3316, |
| "step": 448 |
| }, |
| { |
| "epoch": 2.3962641761174117, |
| "grad_norm": 0.46318938516966707, |
| "learning_rate": 2.4844881347454326e-05, |
| "loss": 0.3561, |
| "step": 449 |
| }, |
| { |
| "epoch": 2.401601067378252, |
| "grad_norm": 0.4361718813430768, |
| "learning_rate": 2.477236206711641e-05, |
| "loss": 0.3353, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.4069379586390927, |
| "grad_norm": 0.4231633561528134, |
| "learning_rate": 2.46997761919302e-05, |
| "loss": 0.3528, |
| "step": 451 |
| }, |
| { |
| "epoch": 2.412274849899933, |
| "grad_norm": 0.44841625308699595, |
| "learning_rate": 2.4627124734778905e-05, |
| "loss": 0.3407, |
| "step": 452 |
| }, |
| { |
| "epoch": 2.417611741160774, |
| "grad_norm": 0.368806895133852, |
| "learning_rate": 2.4554408709460873e-05, |
| "loss": 0.3171, |
| "step": 453 |
| }, |
| { |
| "epoch": 2.4229486324216145, |
| "grad_norm": 0.46212577157467083, |
| "learning_rate": 2.4481629130675444e-05, |
| "loss": 0.3593, |
| "step": 454 |
| }, |
| { |
| "epoch": 2.428285523682455, |
| "grad_norm": 0.4396967709166417, |
| "learning_rate": 2.4408787014008807e-05, |
| "loss": 0.3441, |
| "step": 455 |
| }, |
| { |
| "epoch": 2.4336224149432955, |
| "grad_norm": 0.47046726193724747, |
| "learning_rate": 2.4335883375919828e-05, |
| "loss": 0.354, |
| "step": 456 |
| }, |
| { |
| "epoch": 2.438959306204136, |
| "grad_norm": 0.4453837386684812, |
| "learning_rate": 2.4262919233725853e-05, |
| "loss": 0.3545, |
| "step": 457 |
| }, |
| { |
| "epoch": 2.444296197464977, |
| "grad_norm": 0.4077861792439572, |
| "learning_rate": 2.418989560558852e-05, |
| "loss": 0.3307, |
| "step": 458 |
| }, |
| { |
| "epoch": 2.4496330887258173, |
| "grad_norm": 0.4120092551712366, |
| "learning_rate": 2.411681351049954e-05, |
| "loss": 0.3594, |
| "step": 459 |
| }, |
| { |
| "epoch": 2.454969979986658, |
| "grad_norm": 0.3903774406478128, |
| "learning_rate": 2.404367396826651e-05, |
| "loss": 0.3511, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.4603068712474983, |
| "grad_norm": 0.3537187224774452, |
| "learning_rate": 2.3970477999498648e-05, |
| "loss": 0.3413, |
| "step": 461 |
| }, |
| { |
| "epoch": 2.4656437625083387, |
| "grad_norm": 0.37223078801129084, |
| "learning_rate": 2.3897226625592555e-05, |
| "loss": 0.3289, |
| "step": 462 |
| }, |
| { |
| "epoch": 2.4709806537691796, |
| "grad_norm": 0.3724310146397289, |
| "learning_rate": 2.3823920868717982e-05, |
| "loss": 0.333, |
| "step": 463 |
| }, |
| { |
| "epoch": 2.47631754503002, |
| "grad_norm": 0.37821163484897535, |
| "learning_rate": 2.3750561751803563e-05, |
| "loss": 0.3397, |
| "step": 464 |
| }, |
| { |
| "epoch": 2.4816544362908606, |
| "grad_norm": 0.3963343238925445, |
| "learning_rate": 2.3677150298522513e-05, |
| "loss": 0.3728, |
| "step": 465 |
| }, |
| { |
| "epoch": 2.486991327551701, |
| "grad_norm": 0.36709987148577683, |
| "learning_rate": 2.3603687533278364e-05, |
| "loss": 0.3222, |
| "step": 466 |
| }, |
| { |
| "epoch": 2.4923282188125415, |
| "grad_norm": 0.37561130929516573, |
| "learning_rate": 2.3530174481190692e-05, |
| "loss": 0.3266, |
| "step": 467 |
| }, |
| { |
| "epoch": 2.4976651100733824, |
| "grad_norm": 0.3478504823013864, |
| "learning_rate": 2.3456612168080764e-05, |
| "loss": 0.3445, |
| "step": 468 |
| }, |
| { |
| "epoch": 2.503002001334223, |
| "grad_norm": 0.43376543680900886, |
| "learning_rate": 2.338300162045726e-05, |
| "loss": 0.3578, |
| "step": 469 |
| }, |
| { |
| "epoch": 2.5083388925950634, |
| "grad_norm": 0.3467248206638372, |
| "learning_rate": 2.330934386550194e-05, |
| "loss": 0.3527, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.513675783855904, |
| "grad_norm": 0.3703030631269327, |
| "learning_rate": 2.32356399310553e-05, |
| "loss": 0.3372, |
| "step": 471 |
| }, |
| { |
| "epoch": 2.5190126751167448, |
| "grad_norm": 0.36617067977815226, |
| "learning_rate": 2.316189084560224e-05, |
| "loss": 0.3212, |
| "step": 472 |
| }, |
| { |
| "epoch": 2.524349566377585, |
| "grad_norm": 0.4109293782714893, |
| "learning_rate": 2.3088097638257722e-05, |
| "loss": 0.3669, |
| "step": 473 |
| }, |
| { |
| "epoch": 2.5296864576384257, |
| "grad_norm": 0.40880336771156084, |
| "learning_rate": 2.3014261338752376e-05, |
| "loss": 0.3373, |
| "step": 474 |
| }, |
| { |
| "epoch": 2.535023348899266, |
| "grad_norm": 0.41531532752412315, |
| "learning_rate": 2.294038297741817e-05, |
| "loss": 0.3412, |
| "step": 475 |
| }, |
| { |
| "epoch": 2.5403602401601066, |
| "grad_norm": 0.4088747291326209, |
| "learning_rate": 2.2866463585174007e-05, |
| "loss": 0.3429, |
| "step": 476 |
| }, |
| { |
| "epoch": 2.5456971314209476, |
| "grad_norm": 0.3575193305915961, |
| "learning_rate": 2.2792504193511338e-05, |
| "loss": 0.3681, |
| "step": 477 |
| }, |
| { |
| "epoch": 2.5510340226817876, |
| "grad_norm": 0.4035090318465378, |
| "learning_rate": 2.2718505834479787e-05, |
| "loss": 0.335, |
| "step": 478 |
| }, |
| { |
| "epoch": 2.5563709139426285, |
| "grad_norm": 0.34216276945535873, |
| "learning_rate": 2.2644469540672736e-05, |
| "loss": 0.3387, |
| "step": 479 |
| }, |
| { |
| "epoch": 2.561707805203469, |
| "grad_norm": 0.37240018289403937, |
| "learning_rate": 2.2570396345212932e-05, |
| "loss": 0.3565, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.5670446964643094, |
| "grad_norm": 0.33808417077793806, |
| "learning_rate": 2.2496287281738033e-05, |
| "loss": 0.3349, |
| "step": 481 |
| }, |
| { |
| "epoch": 2.5723815877251504, |
| "grad_norm": 0.3746743257239068, |
| "learning_rate": 2.2422143384386222e-05, |
| "loss": 0.3495, |
| "step": 482 |
| }, |
| { |
| "epoch": 2.577718478985991, |
| "grad_norm": 0.3494328211369956, |
| "learning_rate": 2.234796568778178e-05, |
| "loss": 0.3434, |
| "step": 483 |
| }, |
| { |
| "epoch": 2.5830553702468313, |
| "grad_norm": 0.38271781562914736, |
| "learning_rate": 2.22737552270206e-05, |
| "loss": 0.3177, |
| "step": 484 |
| }, |
| { |
| "epoch": 2.5883922615076718, |
| "grad_norm": 0.37885165545443145, |
| "learning_rate": 2.219951303765579e-05, |
| "loss": 0.345, |
| "step": 485 |
| }, |
| { |
| "epoch": 2.5937291527685122, |
| "grad_norm": 0.417558398659155, |
| "learning_rate": 2.212524015568322e-05, |
| "loss": 0.3384, |
| "step": 486 |
| }, |
| { |
| "epoch": 2.599066044029353, |
| "grad_norm": 0.33797987587981193, |
| "learning_rate": 2.205093761752704e-05, |
| "loss": 0.3424, |
| "step": 487 |
| }, |
| { |
| "epoch": 2.6044029352901936, |
| "grad_norm": 0.3436020891272047, |
| "learning_rate": 2.197660646002523e-05, |
| "loss": 0.3098, |
| "step": 488 |
| }, |
| { |
| "epoch": 2.609739826551034, |
| "grad_norm": 0.36972858197550945, |
| "learning_rate": 2.190224772041512e-05, |
| "loss": 0.3573, |
| "step": 489 |
| }, |
| { |
| "epoch": 2.6150767178118746, |
| "grad_norm": 0.3473912972172743, |
| "learning_rate": 2.1827862436318964e-05, |
| "loss": 0.3435, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.620413609072715, |
| "grad_norm": 0.355276880649726, |
| "learning_rate": 2.175345164572939e-05, |
| "loss": 0.3507, |
| "step": 491 |
| }, |
| { |
| "epoch": 2.625750500333556, |
| "grad_norm": 0.36822265084446165, |
| "learning_rate": 2.1679016386994972e-05, |
| "loss": 0.3484, |
| "step": 492 |
| }, |
| { |
| "epoch": 2.6310873915943964, |
| "grad_norm": 0.34120947707028404, |
| "learning_rate": 2.1604557698805707e-05, |
| "loss": 0.3581, |
| "step": 493 |
| }, |
| { |
| "epoch": 2.636424282855237, |
| "grad_norm": 0.3625974151488874, |
| "learning_rate": 2.153007662017854e-05, |
| "loss": 0.3486, |
| "step": 494 |
| }, |
| { |
| "epoch": 2.6417611741160774, |
| "grad_norm": 0.3810454285560133, |
| "learning_rate": 2.145557419044286e-05, |
| "loss": 0.3262, |
| "step": 495 |
| }, |
| { |
| "epoch": 2.647098065376918, |
| "grad_norm": 0.3567767494033327, |
| "learning_rate": 2.1381051449225977e-05, |
| "loss": 0.3523, |
| "step": 496 |
| }, |
| { |
| "epoch": 2.6524349566377587, |
| "grad_norm": 0.33557909084194115, |
| "learning_rate": 2.130650943643866e-05, |
| "loss": 0.3318, |
| "step": 497 |
| }, |
| { |
| "epoch": 2.657771847898599, |
| "grad_norm": 0.3835013976581898, |
| "learning_rate": 2.123194919226058e-05, |
| "loss": 0.3419, |
| "step": 498 |
| }, |
| { |
| "epoch": 2.6631087391594397, |
| "grad_norm": 0.3425374794771174, |
| "learning_rate": 2.1157371757125827e-05, |
| "loss": 0.3314, |
| "step": 499 |
| }, |
| { |
| "epoch": 2.66844563042028, |
| "grad_norm": 0.3613222705329777, |
| "learning_rate": 2.1082778171708355e-05, |
| "loss": 0.3367, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.6737825216811206, |
| "grad_norm": 0.39030975504768284, |
| "learning_rate": 2.100816947690751e-05, |
| "loss": 0.3633, |
| "step": 501 |
| }, |
| { |
| "epoch": 2.6791194129419615, |
| "grad_norm": 0.34775441424791625, |
| "learning_rate": 2.0933546713833474e-05, |
| "loss": 0.3261, |
| "step": 502 |
| }, |
| { |
| "epoch": 2.684456304202802, |
| "grad_norm": 0.35619640082486737, |
| "learning_rate": 2.0858910923792725e-05, |
| "loss": 0.3468, |
| "step": 503 |
| }, |
| { |
| "epoch": 2.6897931954636425, |
| "grad_norm": 0.36749874498504337, |
| "learning_rate": 2.0784263148273537e-05, |
| "loss": 0.3222, |
| "step": 504 |
| }, |
| { |
| "epoch": 2.695130086724483, |
| "grad_norm": 0.32371066166225, |
| "learning_rate": 2.070960442893143e-05, |
| "loss": 0.3295, |
| "step": 505 |
| }, |
| { |
| "epoch": 2.7004669779853234, |
| "grad_norm": 0.35511531727159107, |
| "learning_rate": 2.0634935807574633e-05, |
| "loss": 0.3412, |
| "step": 506 |
| }, |
| { |
| "epoch": 2.7058038692461643, |
| "grad_norm": 0.36830566559856426, |
| "learning_rate": 2.0560258326149557e-05, |
| "loss": 0.3419, |
| "step": 507 |
| }, |
| { |
| "epoch": 2.711140760507005, |
| "grad_norm": 0.34429904324010957, |
| "learning_rate": 2.0485573026726243e-05, |
| "loss": 0.3339, |
| "step": 508 |
| }, |
| { |
| "epoch": 2.7164776517678453, |
| "grad_norm": 0.3237344051103606, |
| "learning_rate": 2.041088095148383e-05, |
| "loss": 0.3129, |
| "step": 509 |
| }, |
| { |
| "epoch": 2.7218145430286858, |
| "grad_norm": 0.34535776524886275, |
| "learning_rate": 2.0336183142696006e-05, |
| "loss": 0.3493, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.727151434289526, |
| "grad_norm": 0.37790756994617064, |
| "learning_rate": 2.0261480642716462e-05, |
| "loss": 0.3668, |
| "step": 511 |
| }, |
| { |
| "epoch": 2.732488325550367, |
| "grad_norm": 0.31766118054327325, |
| "learning_rate": 2.018677449396437e-05, |
| "loss": 0.3307, |
| "step": 512 |
| }, |
| { |
| "epoch": 2.7378252168112076, |
| "grad_norm": 0.36231855507183786, |
| "learning_rate": 2.01120657389098e-05, |
| "loss": 0.3419, |
| "step": 513 |
| }, |
| { |
| "epoch": 2.743162108072048, |
| "grad_norm": 0.3366225474904379, |
| "learning_rate": 2.0037355420059193e-05, |
| "loss": 0.3281, |
| "step": 514 |
| }, |
| { |
| "epoch": 2.7484989993328885, |
| "grad_norm": 0.3288307627791964, |
| "learning_rate": 1.9962644579940814e-05, |
| "loss": 0.3446, |
| "step": 515 |
| }, |
| { |
| "epoch": 2.753835890593729, |
| "grad_norm": 0.4002909383646017, |
| "learning_rate": 1.988793426109021e-05, |
| "loss": 0.3407, |
| "step": 516 |
| }, |
| { |
| "epoch": 2.75917278185457, |
| "grad_norm": 0.361890263436206, |
| "learning_rate": 1.9813225506035637e-05, |
| "loss": 0.3573, |
| "step": 517 |
| }, |
| { |
| "epoch": 2.7645096731154104, |
| "grad_norm": 0.36972409412445767, |
| "learning_rate": 1.973851935728354e-05, |
| "loss": 0.3386, |
| "step": 518 |
| }, |
| { |
| "epoch": 2.769846564376251, |
| "grad_norm": 0.3890677812342347, |
| "learning_rate": 1.9663816857304005e-05, |
| "loss": 0.3526, |
| "step": 519 |
| }, |
| { |
| "epoch": 2.7751834556370913, |
| "grad_norm": 0.3515870789484941, |
| "learning_rate": 1.9589119048516177e-05, |
| "loss": 0.327, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.780520346897932, |
| "grad_norm": 0.33110583319058595, |
| "learning_rate": 1.951442697327376e-05, |
| "loss": 0.3114, |
| "step": 521 |
| }, |
| { |
| "epoch": 2.7858572381587727, |
| "grad_norm": 0.3644018882302513, |
| "learning_rate": 1.943974167385045e-05, |
| "loss": 0.3222, |
| "step": 522 |
| }, |
| { |
| "epoch": 2.791194129419613, |
| "grad_norm": 0.3435318930149615, |
| "learning_rate": 1.936506419242537e-05, |
| "loss": 0.3289, |
| "step": 523 |
| }, |
| { |
| "epoch": 2.7965310206804537, |
| "grad_norm": 0.3927187524254616, |
| "learning_rate": 1.9290395571068573e-05, |
| "loss": 0.3519, |
| "step": 524 |
| }, |
| { |
| "epoch": 2.801867911941294, |
| "grad_norm": 0.3388044424262462, |
| "learning_rate": 1.921573685172647e-05, |
| "loss": 0.3167, |
| "step": 525 |
| }, |
| { |
| "epoch": 2.8072048032021346, |
| "grad_norm": 0.3587054307128672, |
| "learning_rate": 1.914108907620728e-05, |
| "loss": 0.3389, |
| "step": 526 |
| }, |
| { |
| "epoch": 2.8125416944629755, |
| "grad_norm": 0.3485342273967904, |
| "learning_rate": 1.9066453286166536e-05, |
| "loss": 0.3412, |
| "step": 527 |
| }, |
| { |
| "epoch": 2.817878585723816, |
| "grad_norm": 0.3341586078617526, |
| "learning_rate": 1.8991830523092497e-05, |
| "loss": 0.3216, |
| "step": 528 |
| }, |
| { |
| "epoch": 2.8232154769846565, |
| "grad_norm": 0.37102695451617046, |
| "learning_rate": 1.8917221828291652e-05, |
| "loss": 0.356, |
| "step": 529 |
| }, |
| { |
| "epoch": 2.828552368245497, |
| "grad_norm": 0.3137005027651722, |
| "learning_rate": 1.8842628242874187e-05, |
| "loss": 0.3256, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.8338892595063374, |
| "grad_norm": 0.39850109158745595, |
| "learning_rate": 1.8768050807739425e-05, |
| "loss": 0.3387, |
| "step": 531 |
| }, |
| { |
| "epoch": 2.8392261507671783, |
| "grad_norm": 0.36284113759820136, |
| "learning_rate": 1.8693490563561343e-05, |
| "loss": 0.3234, |
| "step": 532 |
| }, |
| { |
| "epoch": 2.844563042028019, |
| "grad_norm": 0.3420275126150477, |
| "learning_rate": 1.8618948550774033e-05, |
| "loss": 0.3297, |
| "step": 533 |
| }, |
| { |
| "epoch": 2.8498999332888593, |
| "grad_norm": 0.39257170087476245, |
| "learning_rate": 1.854442580955715e-05, |
| "loss": 0.3321, |
| "step": 534 |
| }, |
| { |
| "epoch": 2.8552368245496997, |
| "grad_norm": 0.3523635968791173, |
| "learning_rate": 1.846992337982147e-05, |
| "loss": 0.3585, |
| "step": 535 |
| }, |
| { |
| "epoch": 2.86057371581054, |
| "grad_norm": 0.337680075516661, |
| "learning_rate": 1.83954423011943e-05, |
| "loss": 0.3269, |
| "step": 536 |
| }, |
| { |
| "epoch": 2.865910607071381, |
| "grad_norm": 0.3968837701031737, |
| "learning_rate": 1.832098361300503e-05, |
| "loss": 0.3644, |
| "step": 537 |
| }, |
| { |
| "epoch": 2.8712474983322216, |
| "grad_norm": 0.35722398722368337, |
| "learning_rate": 1.8246548354270616e-05, |
| "loss": 0.335, |
| "step": 538 |
| }, |
| { |
| "epoch": 2.876584389593062, |
| "grad_norm": 0.3572447721264452, |
| "learning_rate": 1.8172137563681042e-05, |
| "loss": 0.3513, |
| "step": 539 |
| }, |
| { |
| "epoch": 2.8819212808539025, |
| "grad_norm": 0.3689075039010707, |
| "learning_rate": 1.809775227958488e-05, |
| "loss": 0.3479, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.887258172114743, |
| "grad_norm": 0.4157671175592236, |
| "learning_rate": 1.802339353997478e-05, |
| "loss": 0.3629, |
| "step": 541 |
| }, |
| { |
| "epoch": 2.892595063375584, |
| "grad_norm": 0.3447307730764763, |
| "learning_rate": 1.7949062382472967e-05, |
| "loss": 0.3172, |
| "step": 542 |
| }, |
| { |
| "epoch": 2.8979319546364244, |
| "grad_norm": 0.3990421667679026, |
| "learning_rate": 1.787475984431678e-05, |
| "loss": 0.3568, |
| "step": 543 |
| }, |
| { |
| "epoch": 2.903268845897265, |
| "grad_norm": 0.3284492948137503, |
| "learning_rate": 1.7800486962344213e-05, |
| "loss": 0.3232, |
| "step": 544 |
| }, |
| { |
| "epoch": 2.9086057371581053, |
| "grad_norm": 0.3564409700384773, |
| "learning_rate": 1.7726244772979408e-05, |
| "loss": 0.3262, |
| "step": 545 |
| }, |
| { |
| "epoch": 2.913942628418946, |
| "grad_norm": 0.38056209814773034, |
| "learning_rate": 1.7652034312218234e-05, |
| "loss": 0.3504, |
| "step": 546 |
| }, |
| { |
| "epoch": 2.9192795196797867, |
| "grad_norm": 0.33330941463187824, |
| "learning_rate": 1.757785661561378e-05, |
| "loss": 0.3503, |
| "step": 547 |
| }, |
| { |
| "epoch": 2.924616410940627, |
| "grad_norm": 0.32533655359519337, |
| "learning_rate": 1.7503712718261977e-05, |
| "loss": 0.3306, |
| "step": 548 |
| }, |
| { |
| "epoch": 2.9299533022014677, |
| "grad_norm": 0.34762836109457435, |
| "learning_rate": 1.7429603654787078e-05, |
| "loss": 0.3464, |
| "step": 549 |
| }, |
| { |
| "epoch": 2.935290193462308, |
| "grad_norm": 0.355505370708806, |
| "learning_rate": 1.7355530459327267e-05, |
| "loss": 0.3355, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.9406270847231486, |
| "grad_norm": 0.34476430444950434, |
| "learning_rate": 1.7281494165520217e-05, |
| "loss": 0.3215, |
| "step": 551 |
| }, |
| { |
| "epoch": 2.9459639759839895, |
| "grad_norm": 0.34463395158247007, |
| "learning_rate": 1.7207495806488672e-05, |
| "loss": 0.3286, |
| "step": 552 |
| }, |
| { |
| "epoch": 2.95130086724483, |
| "grad_norm": 0.39694696141098895, |
| "learning_rate": 1.7133536414826e-05, |
| "loss": 0.348, |
| "step": 553 |
| }, |
| { |
| "epoch": 2.9566377585056705, |
| "grad_norm": 0.3414356307623428, |
| "learning_rate": 1.705961702258183e-05, |
| "loss": 0.3266, |
| "step": 554 |
| }, |
| { |
| "epoch": 2.961974649766511, |
| "grad_norm": 0.34062712306187115, |
| "learning_rate": 1.6985738661247627e-05, |
| "loss": 0.3245, |
| "step": 555 |
| }, |
| { |
| "epoch": 2.9673115410273514, |
| "grad_norm": 0.35420373579360254, |
| "learning_rate": 1.691190236174228e-05, |
| "loss": 0.3094, |
| "step": 556 |
| }, |
| { |
| "epoch": 2.9726484322881923, |
| "grad_norm": 0.35393965464453037, |
| "learning_rate": 1.6838109154397764e-05, |
| "loss": 0.3636, |
| "step": 557 |
| }, |
| { |
| "epoch": 2.977985323549033, |
| "grad_norm": 0.3398430528475795, |
| "learning_rate": 1.6764360068944706e-05, |
| "loss": 0.3489, |
| "step": 558 |
| }, |
| { |
| "epoch": 2.9833222148098733, |
| "grad_norm": 0.36959208816642103, |
| "learning_rate": 1.6690656134498063e-05, |
| "loss": 0.3545, |
| "step": 559 |
| }, |
| { |
| "epoch": 2.9886591060707137, |
| "grad_norm": 0.3244411298894109, |
| "learning_rate": 1.661699837954275e-05, |
| "loss": 0.3024, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.993995997331554, |
| "grad_norm": 0.31714532814318064, |
| "learning_rate": 1.6543387831919243e-05, |
| "loss": 0.3196, |
| "step": 561 |
| }, |
| { |
| "epoch": 2.999332888592395, |
| "grad_norm": 0.49216959263692955, |
| "learning_rate": 1.646982551880931e-05, |
| "loss": 0.4825, |
| "step": 562 |
| }, |
| { |
| "epoch": 3.0046697798532356, |
| "grad_norm": 0.6375873638749311, |
| "learning_rate": 1.639631246672164e-05, |
| "loss": 0.3264, |
| "step": 563 |
| }, |
| { |
| "epoch": 3.010006671114076, |
| "grad_norm": 0.4465753271154956, |
| "learning_rate": 1.632284970147749e-05, |
| "loss": 0.2328, |
| "step": 564 |
| }, |
| { |
| "epoch": 3.0153435623749165, |
| "grad_norm": 0.4868378113812574, |
| "learning_rate": 1.6249438248196437e-05, |
| "loss": 0.2209, |
| "step": 565 |
| }, |
| { |
| "epoch": 3.020680453635757, |
| "grad_norm": 0.7954769693611728, |
| "learning_rate": 1.617607913128202e-05, |
| "loss": 0.2305, |
| "step": 566 |
| }, |
| { |
| "epoch": 3.026017344896598, |
| "grad_norm": 0.4733054296578894, |
| "learning_rate": 1.610277337440745e-05, |
| "loss": 0.231, |
| "step": 567 |
| }, |
| { |
| "epoch": 3.0313542361574384, |
| "grad_norm": 0.44905546773789157, |
| "learning_rate": 1.6029522000501362e-05, |
| "loss": 0.2129, |
| "step": 568 |
| }, |
| { |
| "epoch": 3.036691127418279, |
| "grad_norm": 0.46572310673573625, |
| "learning_rate": 1.5956326031733496e-05, |
| "loss": 0.2092, |
| "step": 569 |
| }, |
| { |
| "epoch": 3.0420280186791193, |
| "grad_norm": 0.46263530375828704, |
| "learning_rate": 1.5883186489500465e-05, |
| "loss": 0.2214, |
| "step": 570 |
| }, |
| { |
| "epoch": 3.04736490993996, |
| "grad_norm": 0.4153475909168177, |
| "learning_rate": 1.5810104394411494e-05, |
| "loss": 0.2374, |
| "step": 571 |
| }, |
| { |
| "epoch": 3.0527018012008007, |
| "grad_norm": 0.3926765936070903, |
| "learning_rate": 1.5737080766274154e-05, |
| "loss": 0.2181, |
| "step": 572 |
| }, |
| { |
| "epoch": 3.058038692461641, |
| "grad_norm": 0.4423780999916496, |
| "learning_rate": 1.5664116624080176e-05, |
| "loss": 0.2263, |
| "step": 573 |
| }, |
| { |
| "epoch": 3.0633755837224816, |
| "grad_norm": 0.48104075830395865, |
| "learning_rate": 1.55912129859912e-05, |
| "loss": 0.2299, |
| "step": 574 |
| }, |
| { |
| "epoch": 3.068712474983322, |
| "grad_norm": 0.34802553512115314, |
| "learning_rate": 1.5518370869324562e-05, |
| "loss": 0.2038, |
| "step": 575 |
| }, |
| { |
| "epoch": 3.0740493662441626, |
| "grad_norm": 0.4024921353804397, |
| "learning_rate": 1.5445591290539133e-05, |
| "loss": 0.2306, |
| "step": 576 |
| }, |
| { |
| "epoch": 3.0793862575050035, |
| "grad_norm": 0.39223703505556246, |
| "learning_rate": 1.5372875265221098e-05, |
| "loss": 0.2146, |
| "step": 577 |
| }, |
| { |
| "epoch": 3.084723148765844, |
| "grad_norm": 0.37616914852450856, |
| "learning_rate": 1.53002238080698e-05, |
| "loss": 0.2215, |
| "step": 578 |
| }, |
| { |
| "epoch": 3.0900600400266844, |
| "grad_norm": 0.38195507124837264, |
| "learning_rate": 1.5227637932883603e-05, |
| "loss": 0.2008, |
| "step": 579 |
| }, |
| { |
| "epoch": 3.095396931287525, |
| "grad_norm": 0.37714457548841984, |
| "learning_rate": 1.515511865254568e-05, |
| "loss": 0.2257, |
| "step": 580 |
| }, |
| { |
| "epoch": 3.1007338225483654, |
| "grad_norm": 0.36734917441929993, |
| "learning_rate": 1.5082666979009953e-05, |
| "loss": 0.2081, |
| "step": 581 |
| }, |
| { |
| "epoch": 3.1060707138092063, |
| "grad_norm": 0.38769476025178157, |
| "learning_rate": 1.5010283923286944e-05, |
| "loss": 0.2271, |
| "step": 582 |
| }, |
| { |
| "epoch": 3.1114076050700468, |
| "grad_norm": 0.3586607196800494, |
| "learning_rate": 1.493797049542963e-05, |
| "loss": 0.2021, |
| "step": 583 |
| }, |
| { |
| "epoch": 3.1167444963308872, |
| "grad_norm": 0.37496022310615634, |
| "learning_rate": 1.4865727704519416e-05, |
| "loss": 0.2443, |
| "step": 584 |
| }, |
| { |
| "epoch": 3.1220813875917277, |
| "grad_norm": 0.34669037639542094, |
| "learning_rate": 1.4793556558652012e-05, |
| "loss": 0.2221, |
| "step": 585 |
| }, |
| { |
| "epoch": 3.127418278852568, |
| "grad_norm": 0.33087385453332585, |
| "learning_rate": 1.472145806492335e-05, |
| "loss": 0.2068, |
| "step": 586 |
| }, |
| { |
| "epoch": 3.132755170113409, |
| "grad_norm": 0.3375224847458593, |
| "learning_rate": 1.4649433229415588e-05, |
| "loss": 0.2167, |
| "step": 587 |
| }, |
| { |
| "epoch": 3.1380920613742496, |
| "grad_norm": 0.34607209711469494, |
| "learning_rate": 1.457748305718301e-05, |
| "loss": 0.2175, |
| "step": 588 |
| }, |
| { |
| "epoch": 3.14342895263509, |
| "grad_norm": 0.330025155691137, |
| "learning_rate": 1.4505608552238047e-05, |
| "loss": 0.2341, |
| "step": 589 |
| }, |
| { |
| "epoch": 3.1487658438959305, |
| "grad_norm": 0.34147253971194746, |
| "learning_rate": 1.4433810717537244e-05, |
| "loss": 0.2146, |
| "step": 590 |
| }, |
| { |
| "epoch": 3.154102735156771, |
| "grad_norm": 0.3453098452736006, |
| "learning_rate": 1.436209055496725e-05, |
| "loss": 0.2024, |
| "step": 591 |
| }, |
| { |
| "epoch": 3.159439626417612, |
| "grad_norm": 0.3466365634852416, |
| "learning_rate": 1.429044906533088e-05, |
| "loss": 0.242, |
| "step": 592 |
| }, |
| { |
| "epoch": 3.1647765176784524, |
| "grad_norm": 0.3290618234863436, |
| "learning_rate": 1.4218887248333123e-05, |
| "loss": 0.2111, |
| "step": 593 |
| }, |
| { |
| "epoch": 3.170113408939293, |
| "grad_norm": 0.351656000947437, |
| "learning_rate": 1.414740610256717e-05, |
| "loss": 0.225, |
| "step": 594 |
| }, |
| { |
| "epoch": 3.1754503002001333, |
| "grad_norm": 0.3176619140403144, |
| "learning_rate": 1.4076006625500526e-05, |
| "loss": 0.2234, |
| "step": 595 |
| }, |
| { |
| "epoch": 3.1807871914609738, |
| "grad_norm": 0.3355617376830507, |
| "learning_rate": 1.4004689813461072e-05, |
| "loss": 0.2105, |
| "step": 596 |
| }, |
| { |
| "epoch": 3.1861240827218147, |
| "grad_norm": 0.3361984337758312, |
| "learning_rate": 1.3933456661623142e-05, |
| "loss": 0.2243, |
| "step": 597 |
| }, |
| { |
| "epoch": 3.191460973982655, |
| "grad_norm": 0.3271843509477646, |
| "learning_rate": 1.3862308163993667e-05, |
| "loss": 0.2094, |
| "step": 598 |
| }, |
| { |
| "epoch": 3.1967978652434956, |
| "grad_norm": 0.3571117855172697, |
| "learning_rate": 1.379124531339827e-05, |
| "loss": 0.2162, |
| "step": 599 |
| }, |
| { |
| "epoch": 3.202134756504336, |
| "grad_norm": 0.3273165917601703, |
| "learning_rate": 1.3720269101467454e-05, |
| "loss": 0.2061, |
| "step": 600 |
| }, |
| { |
| "epoch": 3.2074716477651766, |
| "grad_norm": 0.3284542388193666, |
| "learning_rate": 1.364938051862274e-05, |
| "loss": 0.2066, |
| "step": 601 |
| }, |
| { |
| "epoch": 3.2128085390260175, |
| "grad_norm": 0.36447675615077985, |
| "learning_rate": 1.3578580554062826e-05, |
| "loss": 0.2281, |
| "step": 602 |
| }, |
| { |
| "epoch": 3.218145430286858, |
| "grad_norm": 0.3262380092074158, |
| "learning_rate": 1.3507870195749829e-05, |
| "loss": 0.203, |
| "step": 603 |
| }, |
| { |
| "epoch": 3.2234823215476984, |
| "grad_norm": 0.3457313320643053, |
| "learning_rate": 1.3437250430395478e-05, |
| "loss": 0.2379, |
| "step": 604 |
| }, |
| { |
| "epoch": 3.228819212808539, |
| "grad_norm": 0.3441380900113131, |
| "learning_rate": 1.336672224344732e-05, |
| "loss": 0.2245, |
| "step": 605 |
| }, |
| { |
| "epoch": 3.2341561040693794, |
| "grad_norm": 0.32996002379601147, |
| "learning_rate": 1.3296286619075016e-05, |
| "loss": 0.2227, |
| "step": 606 |
| }, |
| { |
| "epoch": 3.2394929953302203, |
| "grad_norm": 0.3282894261484981, |
| "learning_rate": 1.3225944540156565e-05, |
| "loss": 0.2046, |
| "step": 607 |
| }, |
| { |
| "epoch": 3.2448298865910608, |
| "grad_norm": 0.32710033563774793, |
| "learning_rate": 1.3155696988264621e-05, |
| "loss": 0.1997, |
| "step": 608 |
| }, |
| { |
| "epoch": 3.2501667778519012, |
| "grad_norm": 0.3762040570145167, |
| "learning_rate": 1.3085544943652783e-05, |
| "loss": 0.235, |
| "step": 609 |
| }, |
| { |
| "epoch": 3.2555036691127417, |
| "grad_norm": 0.3440879224425546, |
| "learning_rate": 1.3015489385241895e-05, |
| "loss": 0.2028, |
| "step": 610 |
| }, |
| { |
| "epoch": 3.260840560373582, |
| "grad_norm": 0.38071227441207245, |
| "learning_rate": 1.2945531290606423e-05, |
| "loss": 0.2346, |
| "step": 611 |
| }, |
| { |
| "epoch": 3.266177451634423, |
| "grad_norm": 0.31980915182543196, |
| "learning_rate": 1.2875671635960807e-05, |
| "loss": 0.214, |
| "step": 612 |
| }, |
| { |
| "epoch": 3.2715143428952635, |
| "grad_norm": 0.35716629327114985, |
| "learning_rate": 1.2805911396145794e-05, |
| "loss": 0.2117, |
| "step": 613 |
| }, |
| { |
| "epoch": 3.276851234156104, |
| "grad_norm": 0.34571819244635243, |
| "learning_rate": 1.2736251544614903e-05, |
| "loss": 0.2304, |
| "step": 614 |
| }, |
| { |
| "epoch": 3.2821881254169445, |
| "grad_norm": 0.3208986289612815, |
| "learning_rate": 1.2666693053420795e-05, |
| "loss": 0.218, |
| "step": 615 |
| }, |
| { |
| "epoch": 3.287525016677785, |
| "grad_norm": 0.3567717531275436, |
| "learning_rate": 1.2597236893201712e-05, |
| "loss": 0.2036, |
| "step": 616 |
| }, |
| { |
| "epoch": 3.292861907938626, |
| "grad_norm": 0.32882485699097, |
| "learning_rate": 1.2527884033167966e-05, |
| "loss": 0.2248, |
| "step": 617 |
| }, |
| { |
| "epoch": 3.2981987991994663, |
| "grad_norm": 0.33037504825082553, |
| "learning_rate": 1.2458635441088354e-05, |
| "loss": 0.2136, |
| "step": 618 |
| }, |
| { |
| "epoch": 3.303535690460307, |
| "grad_norm": 0.3715971284418149, |
| "learning_rate": 1.2389492083276719e-05, |
| "loss": 0.2321, |
| "step": 619 |
| }, |
| { |
| "epoch": 3.3088725817211473, |
| "grad_norm": 0.33125816705438366, |
| "learning_rate": 1.2320454924578435e-05, |
| "loss": 0.2364, |
| "step": 620 |
| }, |
| { |
| "epoch": 3.3142094729819878, |
| "grad_norm": 0.33161675743461216, |
| "learning_rate": 1.225152492835692e-05, |
| "loss": 0.2092, |
| "step": 621 |
| }, |
| { |
| "epoch": 3.3195463642428287, |
| "grad_norm": 0.34978398552096673, |
| "learning_rate": 1.2182703056480243e-05, |
| "loss": 0.2293, |
| "step": 622 |
| }, |
| { |
| "epoch": 3.324883255503669, |
| "grad_norm": 0.32364794551815557, |
| "learning_rate": 1.211399026930767e-05, |
| "loss": 0.2204, |
| "step": 623 |
| }, |
| { |
| "epoch": 3.3302201467645096, |
| "grad_norm": 0.3499702901876252, |
| "learning_rate": 1.2045387525676253e-05, |
| "loss": 0.2159, |
| "step": 624 |
| }, |
| { |
| "epoch": 3.33555703802535, |
| "grad_norm": 0.29554206232950536, |
| "learning_rate": 1.1976895782887488e-05, |
| "loss": 0.199, |
| "step": 625 |
| }, |
| { |
| "epoch": 3.3408939292861906, |
| "grad_norm": 0.3556138967561671, |
| "learning_rate": 1.1908515996693927e-05, |
| "loss": 0.2231, |
| "step": 626 |
| }, |
| { |
| "epoch": 3.3462308205470315, |
| "grad_norm": 0.3363266594913331, |
| "learning_rate": 1.1840249121285843e-05, |
| "loss": 0.2252, |
| "step": 627 |
| }, |
| { |
| "epoch": 3.351567711807872, |
| "grad_norm": 0.29662341791312835, |
| "learning_rate": 1.1772096109277937e-05, |
| "loss": 0.2, |
| "step": 628 |
| }, |
| { |
| "epoch": 3.3569046030687124, |
| "grad_norm": 0.32468090780131365, |
| "learning_rate": 1.1704057911696003e-05, |
| "loss": 0.2124, |
| "step": 629 |
| }, |
| { |
| "epoch": 3.362241494329553, |
| "grad_norm": 0.33364365893577863, |
| "learning_rate": 1.1636135477963702e-05, |
| "loss": 0.2418, |
| "step": 630 |
| }, |
| { |
| "epoch": 3.3675783855903934, |
| "grad_norm": 0.32925700089942483, |
| "learning_rate": 1.15683297558893e-05, |
| "loss": 0.2139, |
| "step": 631 |
| }, |
| { |
| "epoch": 3.3729152768512343, |
| "grad_norm": 0.3328549235968737, |
| "learning_rate": 1.1500641691652412e-05, |
| "loss": 0.2165, |
| "step": 632 |
| }, |
| { |
| "epoch": 3.3782521681120747, |
| "grad_norm": 0.30882570277397, |
| "learning_rate": 1.1433072229790847e-05, |
| "loss": 0.2128, |
| "step": 633 |
| }, |
| { |
| "epoch": 3.383589059372915, |
| "grad_norm": 0.3370088932960149, |
| "learning_rate": 1.1365622313187402e-05, |
| "loss": 0.2289, |
| "step": 634 |
| }, |
| { |
| "epoch": 3.3889259506337557, |
| "grad_norm": 0.3419578297439491, |
| "learning_rate": 1.1298292883056682e-05, |
| "loss": 0.2295, |
| "step": 635 |
| }, |
| { |
| "epoch": 3.394262841894596, |
| "grad_norm": 0.310821271131648, |
| "learning_rate": 1.1231084878932018e-05, |
| "loss": 0.1937, |
| "step": 636 |
| }, |
| { |
| "epoch": 3.399599733155437, |
| "grad_norm": 0.32752165763324353, |
| "learning_rate": 1.1163999238652328e-05, |
| "loss": 0.2342, |
| "step": 637 |
| }, |
| { |
| "epoch": 3.4049366244162775, |
| "grad_norm": 0.33007003787949457, |
| "learning_rate": 1.109703689834901e-05, |
| "loss": 0.2068, |
| "step": 638 |
| }, |
| { |
| "epoch": 3.410273515677118, |
| "grad_norm": 0.34735756204554885, |
| "learning_rate": 1.1030198792432915e-05, |
| "loss": 0.2414, |
| "step": 639 |
| }, |
| { |
| "epoch": 3.4156104069379585, |
| "grad_norm": 0.305560108048619, |
| "learning_rate": 1.09634858535813e-05, |
| "loss": 0.2097, |
| "step": 640 |
| }, |
| { |
| "epoch": 3.4209472981987994, |
| "grad_norm": 0.3389861898800808, |
| "learning_rate": 1.089689901272479e-05, |
| "loss": 0.2127, |
| "step": 641 |
| }, |
| { |
| "epoch": 3.42628418945964, |
| "grad_norm": 0.3207807998603898, |
| "learning_rate": 1.0830439199034424e-05, |
| "loss": 0.2226, |
| "step": 642 |
| }, |
| { |
| "epoch": 3.4316210807204803, |
| "grad_norm": 0.3264054440024789, |
| "learning_rate": 1.0764107339908643e-05, |
| "loss": 0.222, |
| "step": 643 |
| }, |
| { |
| "epoch": 3.436957971981321, |
| "grad_norm": 0.32600813384811467, |
| "learning_rate": 1.0697904360960392e-05, |
| "loss": 0.209, |
| "step": 644 |
| }, |
| { |
| "epoch": 3.4422948632421613, |
| "grad_norm": 0.3177047337904919, |
| "learning_rate": 1.06318311860042e-05, |
| "loss": 0.2089, |
| "step": 645 |
| }, |
| { |
| "epoch": 3.447631754503002, |
| "grad_norm": 0.3210423679681483, |
| "learning_rate": 1.0565888737043238e-05, |
| "loss": 0.2274, |
| "step": 646 |
| }, |
| { |
| "epoch": 3.4529686457638427, |
| "grad_norm": 0.3142756745691945, |
| "learning_rate": 1.050007793425653e-05, |
| "loss": 0.208, |
| "step": 647 |
| }, |
| { |
| "epoch": 3.458305537024683, |
| "grad_norm": 0.3189823143454847, |
| "learning_rate": 1.0434399695986038e-05, |
| "loss": 0.237, |
| "step": 648 |
| }, |
| { |
| "epoch": 3.4636424282855236, |
| "grad_norm": 0.35446119159135153, |
| "learning_rate": 1.0368854938723909e-05, |
| "loss": 0.2257, |
| "step": 649 |
| }, |
| { |
| "epoch": 3.468979319546364, |
| "grad_norm": 0.30379218539151415, |
| "learning_rate": 1.0303444577099657e-05, |
| "loss": 0.2055, |
| "step": 650 |
| }, |
| { |
| "epoch": 3.474316210807205, |
| "grad_norm": 0.3334793611412552, |
| "learning_rate": 1.023816952386738e-05, |
| "loss": 0.2259, |
| "step": 651 |
| }, |
| { |
| "epoch": 3.4796531020680455, |
| "grad_norm": 0.31245098736700866, |
| "learning_rate": 1.0173030689893073e-05, |
| "loss": 0.217, |
| "step": 652 |
| }, |
| { |
| "epoch": 3.484989993328886, |
| "grad_norm": 0.298270066885541, |
| "learning_rate": 1.010802898414188e-05, |
| "loss": 0.1971, |
| "step": 653 |
| }, |
| { |
| "epoch": 3.4903268845897264, |
| "grad_norm": 0.3270591833255489, |
| "learning_rate": 1.0043165313665408e-05, |
| "loss": 0.2278, |
| "step": 654 |
| }, |
| { |
| "epoch": 3.495663775850567, |
| "grad_norm": 0.305730303527883, |
| "learning_rate": 9.978440583589097e-06, |
| "loss": 0.2177, |
| "step": 655 |
| }, |
| { |
| "epoch": 3.5010006671114073, |
| "grad_norm": 0.3144303533888383, |
| "learning_rate": 9.913855697099581e-06, |
| "loss": 0.2244, |
| "step": 656 |
| }, |
| { |
| "epoch": 3.5063375583722483, |
| "grad_norm": 0.3186316247887544, |
| "learning_rate": 9.84941155543205e-06, |
| "loss": 0.2117, |
| "step": 657 |
| }, |
| { |
| "epoch": 3.5116744496330887, |
| "grad_norm": 0.3338362484473223, |
| "learning_rate": 9.785109057857724e-06, |
| "loss": 0.2175, |
| "step": 658 |
| }, |
| { |
| "epoch": 3.517011340893929, |
| "grad_norm": 0.33877190549699376, |
| "learning_rate": 9.720949101671283e-06, |
| "loss": 0.2072, |
| "step": 659 |
| }, |
| { |
| "epoch": 3.52234823215477, |
| "grad_norm": 0.2878815210471794, |
| "learning_rate": 9.65693258217834e-06, |
| "loss": 0.2024, |
| "step": 660 |
| }, |
| { |
| "epoch": 3.52768512341561, |
| "grad_norm": 0.3229015014259361, |
| "learning_rate": 9.59306039268296e-06, |
| "loss": 0.2309, |
| "step": 661 |
| }, |
| { |
| "epoch": 3.533022014676451, |
| "grad_norm": 0.329774980311716, |
| "learning_rate": 9.529333424475165e-06, |
| "loss": 0.21, |
| "step": 662 |
| }, |
| { |
| "epoch": 3.5383589059372915, |
| "grad_norm": 0.3254821489847073, |
| "learning_rate": 9.465752566818545e-06, |
| "loss": 0.2255, |
| "step": 663 |
| }, |
| { |
| "epoch": 3.543695797198132, |
| "grad_norm": 0.3318586960366466, |
| "learning_rate": 9.402318706937818e-06, |
| "loss": 0.2298, |
| "step": 664 |
| }, |
| { |
| "epoch": 3.549032688458973, |
| "grad_norm": 0.3116201867034196, |
| "learning_rate": 9.33903273000644e-06, |
| "loss": 0.2054, |
| "step": 665 |
| }, |
| { |
| "epoch": 3.554369579719813, |
| "grad_norm": 0.31694579049650856, |
| "learning_rate": 9.275895519134284e-06, |
| "loss": 0.222, |
| "step": 666 |
| }, |
| { |
| "epoch": 3.559706470980654, |
| "grad_norm": 0.32312031003526426, |
| "learning_rate": 9.212907955355302e-06, |
| "loss": 0.2173, |
| "step": 667 |
| }, |
| { |
| "epoch": 3.5650433622414943, |
| "grad_norm": 0.32535627286627056, |
| "learning_rate": 9.150070917615209e-06, |
| "loss": 0.224, |
| "step": 668 |
| }, |
| { |
| "epoch": 3.570380253502335, |
| "grad_norm": 0.32754134685676567, |
| "learning_rate": 9.087385282759262e-06, |
| "loss": 0.2056, |
| "step": 669 |
| }, |
| { |
| "epoch": 3.5757171447631757, |
| "grad_norm": 0.32097631798875437, |
| "learning_rate": 9.024851925519984e-06, |
| "loss": 0.2167, |
| "step": 670 |
| }, |
| { |
| "epoch": 3.581054036024016, |
| "grad_norm": 0.3224914181755297, |
| "learning_rate": 8.962471718504981e-06, |
| "loss": 0.2318, |
| "step": 671 |
| }, |
| { |
| "epoch": 3.5863909272848566, |
| "grad_norm": 0.3069463307574391, |
| "learning_rate": 8.90024553218477e-06, |
| "loss": 0.2147, |
| "step": 672 |
| }, |
| { |
| "epoch": 3.591727818545697, |
| "grad_norm": 0.34503320829284617, |
| "learning_rate": 8.838174234880595e-06, |
| "loss": 0.2115, |
| "step": 673 |
| }, |
| { |
| "epoch": 3.5970647098065376, |
| "grad_norm": 0.33721115686062114, |
| "learning_rate": 8.776258692752355e-06, |
| "loss": 0.2125, |
| "step": 674 |
| }, |
| { |
| "epoch": 3.6024016010673785, |
| "grad_norm": 0.2988266857938936, |
| "learning_rate": 8.714499769786504e-06, |
| "loss": 0.2068, |
| "step": 675 |
| }, |
| { |
| "epoch": 3.607738492328219, |
| "grad_norm": 0.33052375619924823, |
| "learning_rate": 8.652898327783966e-06, |
| "loss": 0.2081, |
| "step": 676 |
| }, |
| { |
| "epoch": 3.6130753835890594, |
| "grad_norm": 0.2972205217005894, |
| "learning_rate": 8.591455226348153e-06, |
| "loss": 0.2165, |
| "step": 677 |
| }, |
| { |
| "epoch": 3.6184122748499, |
| "grad_norm": 0.29969511000575444, |
| "learning_rate": 8.530171322872943e-06, |
| "loss": 0.2013, |
| "step": 678 |
| }, |
| { |
| "epoch": 3.6237491661107404, |
| "grad_norm": 0.3304499281322153, |
| "learning_rate": 8.469047472530721e-06, |
| "loss": 0.2355, |
| "step": 679 |
| }, |
| { |
| "epoch": 3.6290860573715813, |
| "grad_norm": 0.290277414481082, |
| "learning_rate": 8.408084528260454e-06, |
| "loss": 0.2072, |
| "step": 680 |
| }, |
| { |
| "epoch": 3.6344229486324218, |
| "grad_norm": 0.33406571248792316, |
| "learning_rate": 8.347283340755762e-06, |
| "loss": 0.2103, |
| "step": 681 |
| }, |
| { |
| "epoch": 3.6397598398932622, |
| "grad_norm": 0.33772777521240266, |
| "learning_rate": 8.286644758453084e-06, |
| "loss": 0.2277, |
| "step": 682 |
| }, |
| { |
| "epoch": 3.6450967311541027, |
| "grad_norm": 0.32580165533637756, |
| "learning_rate": 8.226169627519829e-06, |
| "loss": 0.1972, |
| "step": 683 |
| }, |
| { |
| "epoch": 3.650433622414943, |
| "grad_norm": 0.327604705269643, |
| "learning_rate": 8.165858791842531e-06, |
| "loss": 0.2301, |
| "step": 684 |
| }, |
| { |
| "epoch": 3.655770513675784, |
| "grad_norm": 0.30673340194726156, |
| "learning_rate": 8.10571309301513e-06, |
| "loss": 0.2214, |
| "step": 685 |
| }, |
| { |
| "epoch": 3.6611074049366246, |
| "grad_norm": 0.3082993998431756, |
| "learning_rate": 8.045733370327197e-06, |
| "loss": 0.2142, |
| "step": 686 |
| }, |
| { |
| "epoch": 3.666444296197465, |
| "grad_norm": 0.3264898640096142, |
| "learning_rate": 7.98592046075221e-06, |
| "loss": 0.2239, |
| "step": 687 |
| }, |
| { |
| "epoch": 3.6717811874583055, |
| "grad_norm": 0.3267139343805825, |
| "learning_rate": 7.926275198935915e-06, |
| "loss": 0.2174, |
| "step": 688 |
| }, |
| { |
| "epoch": 3.677118078719146, |
| "grad_norm": 0.3038869171361146, |
| "learning_rate": 7.866798417184631e-06, |
| "loss": 0.2167, |
| "step": 689 |
| }, |
| { |
| "epoch": 3.682454969979987, |
| "grad_norm": 0.3153766070982165, |
| "learning_rate": 7.807490945453675e-06, |
| "loss": 0.2214, |
| "step": 690 |
| }, |
| { |
| "epoch": 3.6877918612408274, |
| "grad_norm": 0.3359093682690147, |
| "learning_rate": 7.748353611335772e-06, |
| "loss": 0.2282, |
| "step": 691 |
| }, |
| { |
| "epoch": 3.693128752501668, |
| "grad_norm": 0.3235870658057771, |
| "learning_rate": 7.689387240049475e-06, |
| "loss": 0.215, |
| "step": 692 |
| }, |
| { |
| "epoch": 3.6984656437625083, |
| "grad_norm": 0.32596212481215736, |
| "learning_rate": 7.6305926544277e-06, |
| "loss": 0.2204, |
| "step": 693 |
| }, |
| { |
| "epoch": 3.7038025350233488, |
| "grad_norm": 0.3089795878726115, |
| "learning_rate": 7.571970674906212e-06, |
| "loss": 0.2013, |
| "step": 694 |
| }, |
| { |
| "epoch": 3.7091394262841897, |
| "grad_norm": 0.3036096075117797, |
| "learning_rate": 7.513522119512171e-06, |
| "loss": 0.2128, |
| "step": 695 |
| }, |
| { |
| "epoch": 3.71447631754503, |
| "grad_norm": 0.31370142673701046, |
| "learning_rate": 7.455247803852741e-06, |
| "loss": 0.2153, |
| "step": 696 |
| }, |
| { |
| "epoch": 3.7198132088058706, |
| "grad_norm": 0.3164732161874329, |
| "learning_rate": 7.397148541103698e-06, |
| "loss": 0.215, |
| "step": 697 |
| }, |
| { |
| "epoch": 3.725150100066711, |
| "grad_norm": 0.3143756963158667, |
| "learning_rate": 7.339225141998076e-06, |
| "loss": 0.2274, |
| "step": 698 |
| }, |
| { |
| "epoch": 3.7304869913275516, |
| "grad_norm": 0.3259467908831874, |
| "learning_rate": 7.281478414814869e-06, |
| "loss": 0.2248, |
| "step": 699 |
| }, |
| { |
| "epoch": 3.7358238825883925, |
| "grad_norm": 0.3243759701523797, |
| "learning_rate": 7.223909165367722e-06, |
| "loss": 0.2267, |
| "step": 700 |
| }, |
| { |
| "epoch": 3.741160773849233, |
| "grad_norm": 0.29306669061610235, |
| "learning_rate": 7.166518196993726e-06, |
| "loss": 0.2039, |
| "step": 701 |
| }, |
| { |
| "epoch": 3.7464976651100734, |
| "grad_norm": 0.3122606127392118, |
| "learning_rate": 7.109306310542193e-06, |
| "loss": 0.2247, |
| "step": 702 |
| }, |
| { |
| "epoch": 3.751834556370914, |
| "grad_norm": 0.3084720774983147, |
| "learning_rate": 7.052274304363449e-06, |
| "loss": 0.2263, |
| "step": 703 |
| }, |
| { |
| "epoch": 3.7571714476317544, |
| "grad_norm": 0.32807197455446596, |
| "learning_rate": 6.995422974297748e-06, |
| "loss": 0.2182, |
| "step": 704 |
| }, |
| { |
| "epoch": 3.7625083388925953, |
| "grad_norm": 0.29627151938725177, |
| "learning_rate": 6.938753113664138e-06, |
| "loss": 0.223, |
| "step": 705 |
| }, |
| { |
| "epoch": 3.7678452301534358, |
| "grad_norm": 0.30481388666511067, |
| "learning_rate": 6.882265513249376e-06, |
| "loss": 0.2153, |
| "step": 706 |
| }, |
| { |
| "epoch": 3.7731821214142762, |
| "grad_norm": 0.30367279516217394, |
| "learning_rate": 6.8259609612969245e-06, |
| "loss": 0.1961, |
| "step": 707 |
| }, |
| { |
| "epoch": 3.7785190126751167, |
| "grad_norm": 0.3311032509813633, |
| "learning_rate": 6.769840243495937e-06, |
| "loss": 0.2451, |
| "step": 708 |
| }, |
| { |
| "epoch": 3.783855903935957, |
| "grad_norm": 0.3124678571976269, |
| "learning_rate": 6.713904142970282e-06, |
| "loss": 0.215, |
| "step": 709 |
| }, |
| { |
| "epoch": 3.789192795196798, |
| "grad_norm": 0.31743166594564953, |
| "learning_rate": 6.658153440267649e-06, |
| "loss": 0.2176, |
| "step": 710 |
| }, |
| { |
| "epoch": 3.7945296864576386, |
| "grad_norm": 0.28782289424591967, |
| "learning_rate": 6.602588913348611e-06, |
| "loss": 0.2094, |
| "step": 711 |
| }, |
| { |
| "epoch": 3.799866577718479, |
| "grad_norm": 0.3124172224100049, |
| "learning_rate": 6.547211337575812e-06, |
| "loss": 0.2083, |
| "step": 712 |
| }, |
| { |
| "epoch": 3.8052034689793195, |
| "grad_norm": 0.3173760507861447, |
| "learning_rate": 6.4920214857031286e-06, |
| "loss": 0.2181, |
| "step": 713 |
| }, |
| { |
| "epoch": 3.81054036024016, |
| "grad_norm": 0.3191329518696444, |
| "learning_rate": 6.437020127864863e-06, |
| "loss": 0.2134, |
| "step": 714 |
| }, |
| { |
| "epoch": 3.815877251501001, |
| "grad_norm": 0.3051391344384358, |
| "learning_rate": 6.382208031565051e-06, |
| "loss": 0.2204, |
| "step": 715 |
| }, |
| { |
| "epoch": 3.8212141427618413, |
| "grad_norm": 0.31850920002112115, |
| "learning_rate": 6.327585961666703e-06, |
| "loss": 0.2253, |
| "step": 716 |
| }, |
| { |
| "epoch": 3.826551034022682, |
| "grad_norm": 0.29793389352549826, |
| "learning_rate": 6.273154680381152e-06, |
| "loss": 0.2078, |
| "step": 717 |
| }, |
| { |
| "epoch": 3.8318879252835223, |
| "grad_norm": 0.3003506055760915, |
| "learning_rate": 6.218914947257424e-06, |
| "loss": 0.231, |
| "step": 718 |
| }, |
| { |
| "epoch": 3.8372248165443628, |
| "grad_norm": 0.320501388188613, |
| "learning_rate": 6.164867519171609e-06, |
| "loss": 0.2244, |
| "step": 719 |
| }, |
| { |
| "epoch": 3.8425617078052037, |
| "grad_norm": 0.3141510603372196, |
| "learning_rate": 6.111013150316336e-06, |
| "loss": 0.2145, |
| "step": 720 |
| }, |
| { |
| "epoch": 3.847898599066044, |
| "grad_norm": 0.28759406096829065, |
| "learning_rate": 6.057352592190233e-06, |
| "loss": 0.2059, |
| "step": 721 |
| }, |
| { |
| "epoch": 3.8532354903268846, |
| "grad_norm": 0.2965011224320686, |
| "learning_rate": 6.003886593587429e-06, |
| "loss": 0.1994, |
| "step": 722 |
| }, |
| { |
| "epoch": 3.858572381587725, |
| "grad_norm": 0.32309929809226157, |
| "learning_rate": 5.9506159005871225e-06, |
| "loss": 0.2223, |
| "step": 723 |
| }, |
| { |
| "epoch": 3.8639092728485656, |
| "grad_norm": 0.3064344176543871, |
| "learning_rate": 5.897541256543171e-06, |
| "loss": 0.2165, |
| "step": 724 |
| }, |
| { |
| "epoch": 3.8692461641094065, |
| "grad_norm": 0.30830702220241546, |
| "learning_rate": 5.844663402073696e-06, |
| "loss": 0.222, |
| "step": 725 |
| }, |
| { |
| "epoch": 3.874583055370247, |
| "grad_norm": 0.29076332143502975, |
| "learning_rate": 5.791983075050773e-06, |
| "loss": 0.2048, |
| "step": 726 |
| }, |
| { |
| "epoch": 3.8799199466310874, |
| "grad_norm": 0.300114275064821, |
| "learning_rate": 5.739501010590132e-06, |
| "loss": 0.2084, |
| "step": 727 |
| }, |
| { |
| "epoch": 3.885256837891928, |
| "grad_norm": 0.3045488309986525, |
| "learning_rate": 5.68721794104087e-06, |
| "loss": 0.2197, |
| "step": 728 |
| }, |
| { |
| "epoch": 3.8905937291527684, |
| "grad_norm": 0.327552950910515, |
| "learning_rate": 5.635134595975285e-06, |
| "loss": 0.2175, |
| "step": 729 |
| }, |
| { |
| "epoch": 3.8959306204136093, |
| "grad_norm": 0.30484919275231337, |
| "learning_rate": 5.583251702178634e-06, |
| "loss": 0.2093, |
| "step": 730 |
| }, |
| { |
| "epoch": 3.9012675116744497, |
| "grad_norm": 0.2921364622612547, |
| "learning_rate": 5.531569983639045e-06, |
| "loss": 0.2058, |
| "step": 731 |
| }, |
| { |
| "epoch": 3.90660440293529, |
| "grad_norm": 0.30049818877171114, |
| "learning_rate": 5.480090161537388e-06, |
| "loss": 0.2259, |
| "step": 732 |
| }, |
| { |
| "epoch": 3.9119412941961307, |
| "grad_norm": 0.3139671791644443, |
| "learning_rate": 5.4288129542371995e-06, |
| "loss": 0.2197, |
| "step": 733 |
| }, |
| { |
| "epoch": 3.917278185456971, |
| "grad_norm": 0.31085024950173556, |
| "learning_rate": 5.377739077274688e-06, |
| "loss": 0.2223, |
| "step": 734 |
| }, |
| { |
| "epoch": 3.922615076717812, |
| "grad_norm": 0.27795634676595493, |
| "learning_rate": 5.326869243348734e-06, |
| "loss": 0.2087, |
| "step": 735 |
| }, |
| { |
| "epoch": 3.9279519679786525, |
| "grad_norm": 0.292718813818914, |
| "learning_rate": 5.276204162310938e-06, |
| "loss": 0.2138, |
| "step": 736 |
| }, |
| { |
| "epoch": 3.933288859239493, |
| "grad_norm": 0.3124029055450099, |
| "learning_rate": 5.225744541155731e-06, |
| "loss": 0.2202, |
| "step": 737 |
| }, |
| { |
| "epoch": 3.9386257505003335, |
| "grad_norm": 0.3056001516045311, |
| "learning_rate": 5.1754910840105e-06, |
| "loss": 0.2203, |
| "step": 738 |
| }, |
| { |
| "epoch": 3.943962641761174, |
| "grad_norm": 0.28861734955419716, |
| "learning_rate": 5.125444492125748e-06, |
| "loss": 0.2118, |
| "step": 739 |
| }, |
| { |
| "epoch": 3.949299533022015, |
| "grad_norm": 0.2944862914827709, |
| "learning_rate": 5.075605463865348e-06, |
| "loss": 0.215, |
| "step": 740 |
| }, |
| { |
| "epoch": 3.9546364242828553, |
| "grad_norm": 0.3018187959209579, |
| "learning_rate": 5.025974694696747e-06, |
| "loss": 0.2121, |
| "step": 741 |
| }, |
| { |
| "epoch": 3.959973315543696, |
| "grad_norm": 0.32968530294516596, |
| "learning_rate": 4.9765528771813065e-06, |
| "loss": 0.2194, |
| "step": 742 |
| }, |
| { |
| "epoch": 3.9653102068045363, |
| "grad_norm": 0.2895487447372504, |
| "learning_rate": 4.92734070096462e-06, |
| "loss": 0.1945, |
| "step": 743 |
| }, |
| { |
| "epoch": 3.9706470980653767, |
| "grad_norm": 0.30516795101430666, |
| "learning_rate": 4.878338852766871e-06, |
| "loss": 0.2218, |
| "step": 744 |
| }, |
| { |
| "epoch": 3.9759839893262177, |
| "grad_norm": 0.30604355308841524, |
| "learning_rate": 4.829548016373285e-06, |
| "loss": 0.2166, |
| "step": 745 |
| }, |
| { |
| "epoch": 3.981320880587058, |
| "grad_norm": 0.2907648161711581, |
| "learning_rate": 4.780968872624569e-06, |
| "loss": 0.2155, |
| "step": 746 |
| }, |
| { |
| "epoch": 3.9866577718478986, |
| "grad_norm": 0.2981568120720774, |
| "learning_rate": 4.732602099407402e-06, |
| "loss": 0.2271, |
| "step": 747 |
| }, |
| { |
| "epoch": 3.991994663108739, |
| "grad_norm": 0.30885152124963694, |
| "learning_rate": 4.684448371645003e-06, |
| "loss": 0.2183, |
| "step": 748 |
| }, |
| { |
| "epoch": 3.9973315543695795, |
| "grad_norm": 0.29884575186957585, |
| "learning_rate": 4.636508361287675e-06, |
| "loss": 0.2158, |
| "step": 749 |
| }, |
| { |
| "epoch": 4.0026684456304205, |
| "grad_norm": 0.601890871189883, |
| "learning_rate": 4.58878273730347e-06, |
| "loss": 0.3212, |
| "step": 750 |
| }, |
| { |
| "epoch": 4.0080053368912605, |
| "grad_norm": 0.5298412615002074, |
| "learning_rate": 4.541272165668829e-06, |
| "loss": 0.1575, |
| "step": 751 |
| }, |
| { |
| "epoch": 4.013342228152101, |
| "grad_norm": 0.42841188304774436, |
| "learning_rate": 4.493977309359279e-06, |
| "loss": 0.1504, |
| "step": 752 |
| }, |
| { |
| "epoch": 4.018679119412942, |
| "grad_norm": 0.3120373077214368, |
| "learning_rate": 4.4468988283402135e-06, |
| "loss": 0.1287, |
| "step": 753 |
| }, |
| { |
| "epoch": 4.024016010673782, |
| "grad_norm": 0.33027912692579064, |
| "learning_rate": 4.40003737955766e-06, |
| "loss": 0.161, |
| "step": 754 |
| }, |
| { |
| "epoch": 4.029352901934623, |
| "grad_norm": 0.39662465830617283, |
| "learning_rate": 4.353393616929118e-06, |
| "loss": 0.15, |
| "step": 755 |
| }, |
| { |
| "epoch": 4.034689793195463, |
| "grad_norm": 0.4875405375103197, |
| "learning_rate": 4.306968191334437e-06, |
| "loss": 0.1542, |
| "step": 756 |
| }, |
| { |
| "epoch": 4.040026684456304, |
| "grad_norm": 0.4647132436994288, |
| "learning_rate": 4.260761750606734e-06, |
| "loss": 0.1372, |
| "step": 757 |
| }, |
| { |
| "epoch": 4.045363575717145, |
| "grad_norm": 0.4316859008197165, |
| "learning_rate": 4.2147749395233365e-06, |
| "loss": 0.1507, |
| "step": 758 |
| }, |
| { |
| "epoch": 4.050700466977985, |
| "grad_norm": 0.36991769149322723, |
| "learning_rate": 4.1690083997968216e-06, |
| "loss": 0.1399, |
| "step": 759 |
| }, |
| { |
| "epoch": 4.056037358238826, |
| "grad_norm": 0.3081151075975586, |
| "learning_rate": 4.123462770066013e-06, |
| "loss": 0.1358, |
| "step": 760 |
| }, |
| { |
| "epoch": 4.061374249499666, |
| "grad_norm": 0.33035087289352366, |
| "learning_rate": 4.078138685887125e-06, |
| "loss": 0.137, |
| "step": 761 |
| }, |
| { |
| "epoch": 4.066711140760507, |
| "grad_norm": 0.35282903465510224, |
| "learning_rate": 4.033036779724848e-06, |
| "loss": 0.1423, |
| "step": 762 |
| }, |
| { |
| "epoch": 4.072048032021348, |
| "grad_norm": 0.35728767536135847, |
| "learning_rate": 3.988157680943536e-06, |
| "loss": 0.1471, |
| "step": 763 |
| }, |
| { |
| "epoch": 4.077384923282188, |
| "grad_norm": 0.35082302549758365, |
| "learning_rate": 3.943502015798437e-06, |
| "loss": 0.1385, |
| "step": 764 |
| }, |
| { |
| "epoch": 4.082721814543029, |
| "grad_norm": 0.30763129510502485, |
| "learning_rate": 3.899070407426948e-06, |
| "loss": 0.1308, |
| "step": 765 |
| }, |
| { |
| "epoch": 4.088058705803869, |
| "grad_norm": 0.31052345237501106, |
| "learning_rate": 3.854863475839898e-06, |
| "loss": 0.1508, |
| "step": 766 |
| }, |
| { |
| "epoch": 4.09339559706471, |
| "grad_norm": 0.29117416131590373, |
| "learning_rate": 3.810881837912934e-06, |
| "loss": 0.1359, |
| "step": 767 |
| }, |
| { |
| "epoch": 4.098732488325551, |
| "grad_norm": 0.32738573833211637, |
| "learning_rate": 3.7671261073778875e-06, |
| "loss": 0.1551, |
| "step": 768 |
| }, |
| { |
| "epoch": 4.104069379586391, |
| "grad_norm": 0.2879978040339806, |
| "learning_rate": 3.7235968948142098e-06, |
| "loss": 0.1285, |
| "step": 769 |
| }, |
| { |
| "epoch": 4.109406270847232, |
| "grad_norm": 0.3297634034251823, |
| "learning_rate": 3.6802948076404675e-06, |
| "loss": 0.1476, |
| "step": 770 |
| }, |
| { |
| "epoch": 4.114743162108072, |
| "grad_norm": 0.30016892407880663, |
| "learning_rate": 3.6372204501058494e-06, |
| "loss": 0.1295, |
| "step": 771 |
| }, |
| { |
| "epoch": 4.120080053368913, |
| "grad_norm": 0.3145248211219936, |
| "learning_rate": 3.5943744232817455e-06, |
| "loss": 0.1296, |
| "step": 772 |
| }, |
| { |
| "epoch": 4.1254169446297535, |
| "grad_norm": 0.31318694134932973, |
| "learning_rate": 3.551757325053362e-06, |
| "loss": 0.1482, |
| "step": 773 |
| }, |
| { |
| "epoch": 4.1307538358905935, |
| "grad_norm": 0.2948784037632883, |
| "learning_rate": 3.5093697501113645e-06, |
| "loss": 0.1422, |
| "step": 774 |
| }, |
| { |
| "epoch": 4.136090727151434, |
| "grad_norm": 0.2801444303260538, |
| "learning_rate": 3.4672122899435935e-06, |
| "loss": 0.1352, |
| "step": 775 |
| }, |
| { |
| "epoch": 4.1414276184122745, |
| "grad_norm": 0.2824012703736762, |
| "learning_rate": 3.4252855328268055e-06, |
| "loss": 0.1505, |
| "step": 776 |
| }, |
| { |
| "epoch": 4.146764509673115, |
| "grad_norm": 0.2877569800912583, |
| "learning_rate": 3.3835900638184538e-06, |
| "loss": 0.1336, |
| "step": 777 |
| }, |
| { |
| "epoch": 4.152101400933956, |
| "grad_norm": 0.2922681146418443, |
| "learning_rate": 3.3421264647485476e-06, |
| "loss": 0.1403, |
| "step": 778 |
| }, |
| { |
| "epoch": 4.157438292194796, |
| "grad_norm": 0.29571418661558607, |
| "learning_rate": 3.300895314211503e-06, |
| "loss": 0.1417, |
| "step": 779 |
| }, |
| { |
| "epoch": 4.162775183455637, |
| "grad_norm": 0.2807197693531769, |
| "learning_rate": 3.259897187558101e-06, |
| "loss": 0.1321, |
| "step": 780 |
| }, |
| { |
| "epoch": 4.168112074716477, |
| "grad_norm": 0.26947880551875214, |
| "learning_rate": 3.219132656887445e-06, |
| "loss": 0.1393, |
| "step": 781 |
| }, |
| { |
| "epoch": 4.173448965977318, |
| "grad_norm": 0.30694317413028555, |
| "learning_rate": 3.1786022910389524e-06, |
| "loss": 0.1541, |
| "step": 782 |
| }, |
| { |
| "epoch": 4.178785857238159, |
| "grad_norm": 0.2702053773322514, |
| "learning_rate": 3.1383066555844686e-06, |
| "loss": 0.1245, |
| "step": 783 |
| }, |
| { |
| "epoch": 4.184122748498999, |
| "grad_norm": 0.3036431279445982, |
| "learning_rate": 3.0982463128203346e-06, |
| "loss": 0.1378, |
| "step": 784 |
| }, |
| { |
| "epoch": 4.18945963975984, |
| "grad_norm": 0.2903132944155927, |
| "learning_rate": 3.058421821759545e-06, |
| "loss": 0.1446, |
| "step": 785 |
| }, |
| { |
| "epoch": 4.19479653102068, |
| "grad_norm": 0.2693686823614546, |
| "learning_rate": 3.0188337381239696e-06, |
| "loss": 0.1388, |
| "step": 786 |
| }, |
| { |
| "epoch": 4.200133422281521, |
| "grad_norm": 0.2547143693355037, |
| "learning_rate": 2.9794826143365794e-06, |
| "loss": 0.1284, |
| "step": 787 |
| }, |
| { |
| "epoch": 4.205470313542362, |
| "grad_norm": 0.2751535551881633, |
| "learning_rate": 2.940368999513734e-06, |
| "loss": 0.1375, |
| "step": 788 |
| }, |
| { |
| "epoch": 4.210807204803202, |
| "grad_norm": 0.2778666347500376, |
| "learning_rate": 2.901493439457543e-06, |
| "loss": 0.1303, |
| "step": 789 |
| }, |
| { |
| "epoch": 4.216144096064043, |
| "grad_norm": 0.2846619856811341, |
| "learning_rate": 2.8628564766482193e-06, |
| "loss": 0.1492, |
| "step": 790 |
| }, |
| { |
| "epoch": 4.221480987324883, |
| "grad_norm": 0.281092186868078, |
| "learning_rate": 2.824458650236532e-06, |
| "loss": 0.1414, |
| "step": 791 |
| }, |
| { |
| "epoch": 4.226817878585724, |
| "grad_norm": 0.2832353643098496, |
| "learning_rate": 2.7863004960362784e-06, |
| "loss": 0.141, |
| "step": 792 |
| }, |
| { |
| "epoch": 4.232154769846565, |
| "grad_norm": 0.26863830308187436, |
| "learning_rate": 2.748382546516799e-06, |
| "loss": 0.1276, |
| "step": 793 |
| }, |
| { |
| "epoch": 4.237491661107405, |
| "grad_norm": 0.2837290905016847, |
| "learning_rate": 2.7107053307955535e-06, |
| "loss": 0.1464, |
| "step": 794 |
| }, |
| { |
| "epoch": 4.242828552368246, |
| "grad_norm": 0.30003856403259976, |
| "learning_rate": 2.6732693746307405e-06, |
| "loss": 0.1467, |
| "step": 795 |
| }, |
| { |
| "epoch": 4.248165443629086, |
| "grad_norm": 0.3031551246837749, |
| "learning_rate": 2.6360752004139457e-06, |
| "loss": 0.141, |
| "step": 796 |
| }, |
| { |
| "epoch": 4.253502334889927, |
| "grad_norm": 0.2839203101647867, |
| "learning_rate": 2.599123327162876e-06, |
| "loss": 0.128, |
| "step": 797 |
| }, |
| { |
| "epoch": 4.2588392261507675, |
| "grad_norm": 0.28309288050123305, |
| "learning_rate": 2.5624142705140974e-06, |
| "loss": 0.1424, |
| "step": 798 |
| }, |
| { |
| "epoch": 4.2641761174116075, |
| "grad_norm": 0.3055488535217384, |
| "learning_rate": 2.5259485427158436e-06, |
| "loss": 0.1489, |
| "step": 799 |
| }, |
| { |
| "epoch": 4.269513008672448, |
| "grad_norm": 0.300540177885969, |
| "learning_rate": 2.489726652620883e-06, |
| "loss": 0.1362, |
| "step": 800 |
| }, |
| { |
| "epoch": 4.2748498999332885, |
| "grad_norm": 0.29347736303425087, |
| "learning_rate": 2.453749105679386e-06, |
| "loss": 0.1465, |
| "step": 801 |
| }, |
| { |
| "epoch": 4.280186791194129, |
| "grad_norm": 0.27707210287753226, |
| "learning_rate": 2.418016403931909e-06, |
| "loss": 0.1329, |
| "step": 802 |
| }, |
| { |
| "epoch": 4.28552368245497, |
| "grad_norm": 0.2920650228372569, |
| "learning_rate": 2.382529046002371e-06, |
| "loss": 0.1527, |
| "step": 803 |
| }, |
| { |
| "epoch": 4.29086057371581, |
| "grad_norm": 0.287631363925361, |
| "learning_rate": 2.347287527091082e-06, |
| "loss": 0.1422, |
| "step": 804 |
| }, |
| { |
| "epoch": 4.296197464976651, |
| "grad_norm": 0.28709688346306034, |
| "learning_rate": 2.3122923389678607e-06, |
| "loss": 0.1285, |
| "step": 805 |
| }, |
| { |
| "epoch": 4.301534356237491, |
| "grad_norm": 0.27342773854893104, |
| "learning_rate": 2.2775439699651567e-06, |
| "loss": 0.1388, |
| "step": 806 |
| }, |
| { |
| "epoch": 4.306871247498332, |
| "grad_norm": 0.2666822866123337, |
| "learning_rate": 2.2430429049712268e-06, |
| "loss": 0.1336, |
| "step": 807 |
| }, |
| { |
| "epoch": 4.312208138759173, |
| "grad_norm": 0.2983890987441143, |
| "learning_rate": 2.208789625423391e-06, |
| "loss": 0.1508, |
| "step": 808 |
| }, |
| { |
| "epoch": 4.317545030020013, |
| "grad_norm": 0.2800353859335473, |
| "learning_rate": 2.174784609301306e-06, |
| "loss": 0.1319, |
| "step": 809 |
| }, |
| { |
| "epoch": 4.322881921280854, |
| "grad_norm": 0.29623766050646816, |
| "learning_rate": 2.141028331120276e-06, |
| "loss": 0.1513, |
| "step": 810 |
| }, |
| { |
| "epoch": 4.328218812541694, |
| "grad_norm": 0.27571067910906955, |
| "learning_rate": 2.107521261924668e-06, |
| "loss": 0.1358, |
| "step": 811 |
| }, |
| { |
| "epoch": 4.333555703802535, |
| "grad_norm": 0.29103395140128885, |
| "learning_rate": 2.0742638692813033e-06, |
| "loss": 0.1309, |
| "step": 812 |
| }, |
| { |
| "epoch": 4.338892595063376, |
| "grad_norm": 0.2933371021781572, |
| "learning_rate": 2.0412566172729554e-06, |
| "loss": 0.1351, |
| "step": 813 |
| }, |
| { |
| "epoch": 4.344229486324216, |
| "grad_norm": 0.27215608874364955, |
| "learning_rate": 2.0084999664918725e-06, |
| "loss": 0.1475, |
| "step": 814 |
| }, |
| { |
| "epoch": 4.349566377585057, |
| "grad_norm": 0.2883150380634738, |
| "learning_rate": 1.9759943740333256e-06, |
| "loss": 0.1475, |
| "step": 815 |
| }, |
| { |
| "epoch": 4.354903268845897, |
| "grad_norm": 0.2777208010579726, |
| "learning_rate": 1.943740293489267e-06, |
| "loss": 0.1479, |
| "step": 816 |
| }, |
| { |
| "epoch": 4.360240160106738, |
| "grad_norm": 0.2824439004885457, |
| "learning_rate": 1.9117381749419794e-06, |
| "loss": 0.1471, |
| "step": 817 |
| }, |
| { |
| "epoch": 4.365577051367579, |
| "grad_norm": 0.28150269206830153, |
| "learning_rate": 1.8799884649577915e-06, |
| "loss": 0.132, |
| "step": 818 |
| }, |
| { |
| "epoch": 4.370913942628419, |
| "grad_norm": 0.29174288272730886, |
| "learning_rate": 1.8484916065808622e-06, |
| "loss": 0.1482, |
| "step": 819 |
| }, |
| { |
| "epoch": 4.37625083388926, |
| "grad_norm": 0.27656705304849727, |
| "learning_rate": 1.8172480393269797e-06, |
| "loss": 0.137, |
| "step": 820 |
| }, |
| { |
| "epoch": 4.3815877251501, |
| "grad_norm": 0.2757754977908488, |
| "learning_rate": 1.7862581991774486e-06, |
| "loss": 0.1377, |
| "step": 821 |
| }, |
| { |
| "epoch": 4.386924616410941, |
| "grad_norm": 0.2750527107134215, |
| "learning_rate": 1.755522518572994e-06, |
| "loss": 0.1207, |
| "step": 822 |
| }, |
| { |
| "epoch": 4.3922615076717815, |
| "grad_norm": 0.29276192591791234, |
| "learning_rate": 1.725041426407723e-06, |
| "loss": 0.1401, |
| "step": 823 |
| }, |
| { |
| "epoch": 4.3975983989326215, |
| "grad_norm": 0.29627925099149893, |
| "learning_rate": 1.6948153480231511e-06, |
| "loss": 0.1403, |
| "step": 824 |
| }, |
| { |
| "epoch": 4.402935290193462, |
| "grad_norm": 0.2781883617488326, |
| "learning_rate": 1.6648447052022643e-06, |
| "loss": 0.1266, |
| "step": 825 |
| }, |
| { |
| "epoch": 4.408272181454302, |
| "grad_norm": 0.2996634586484779, |
| "learning_rate": 1.6351299161636202e-06, |
| "loss": 0.1599, |
| "step": 826 |
| }, |
| { |
| "epoch": 4.413609072715143, |
| "grad_norm": 0.26162944731844284, |
| "learning_rate": 1.6056713955555349e-06, |
| "loss": 0.1365, |
| "step": 827 |
| }, |
| { |
| "epoch": 4.418945963975984, |
| "grad_norm": 0.30091733604376597, |
| "learning_rate": 1.5764695544502774e-06, |
| "loss": 0.1536, |
| "step": 828 |
| }, |
| { |
| "epoch": 4.424282855236824, |
| "grad_norm": 0.28660862503859624, |
| "learning_rate": 1.5475248003383382e-06, |
| "loss": 0.1343, |
| "step": 829 |
| }, |
| { |
| "epoch": 4.429619746497665, |
| "grad_norm": 0.2913443376429063, |
| "learning_rate": 1.5188375371227525e-06, |
| "loss": 0.1519, |
| "step": 830 |
| }, |
| { |
| "epoch": 4.434956637758505, |
| "grad_norm": 0.2701467713064408, |
| "learning_rate": 1.4904081651134527e-06, |
| "loss": 0.1262, |
| "step": 831 |
| }, |
| { |
| "epoch": 4.440293529019346, |
| "grad_norm": 0.2852211039949671, |
| "learning_rate": 1.462237081021689e-06, |
| "loss": 0.1501, |
| "step": 832 |
| }, |
| { |
| "epoch": 4.445630420280187, |
| "grad_norm": 0.28582152725735244, |
| "learning_rate": 1.4343246779544929e-06, |
| "loss": 0.1482, |
| "step": 833 |
| }, |
| { |
| "epoch": 4.450967311541027, |
| "grad_norm": 0.2763605840804222, |
| "learning_rate": 1.4066713454091808e-06, |
| "loss": 0.1375, |
| "step": 834 |
| }, |
| { |
| "epoch": 4.456304202801868, |
| "grad_norm": 0.2918688271426615, |
| "learning_rate": 1.3792774692679366e-06, |
| "loss": 0.1432, |
| "step": 835 |
| }, |
| { |
| "epoch": 4.461641094062708, |
| "grad_norm": 0.3054006010342176, |
| "learning_rate": 1.3521434317924186e-06, |
| "loss": 0.1491, |
| "step": 836 |
| }, |
| { |
| "epoch": 4.466977985323549, |
| "grad_norm": 0.2824524390265842, |
| "learning_rate": 1.3252696116184184e-06, |
| "loss": 0.13, |
| "step": 837 |
| }, |
| { |
| "epoch": 4.47231487658439, |
| "grad_norm": 0.2892331408612295, |
| "learning_rate": 1.2986563837505894e-06, |
| "loss": 0.1433, |
| "step": 838 |
| }, |
| { |
| "epoch": 4.47765176784523, |
| "grad_norm": 0.27746574091084786, |
| "learning_rate": 1.2723041195572106e-06, |
| "loss": 0.1375, |
| "step": 839 |
| }, |
| { |
| "epoch": 4.482988659106071, |
| "grad_norm": 0.2907412475565416, |
| "learning_rate": 1.246213186764995e-06, |
| "loss": 0.1578, |
| "step": 840 |
| }, |
| { |
| "epoch": 4.488325550366911, |
| "grad_norm": 0.27648095040345927, |
| "learning_rate": 1.2203839494539738e-06, |
| "loss": 0.1415, |
| "step": 841 |
| }, |
| { |
| "epoch": 4.493662441627752, |
| "grad_norm": 0.2821554850140892, |
| "learning_rate": 1.1948167680523981e-06, |
| "loss": 0.1416, |
| "step": 842 |
| }, |
| { |
| "epoch": 4.498999332888593, |
| "grad_norm": 0.2659555704630584, |
| "learning_rate": 1.1695119993317271e-06, |
| "loss": 0.1262, |
| "step": 843 |
| }, |
| { |
| "epoch": 4.504336224149433, |
| "grad_norm": 0.27101074307755924, |
| "learning_rate": 1.1444699964016448e-06, |
| "loss": 0.1339, |
| "step": 844 |
| }, |
| { |
| "epoch": 4.509673115410274, |
| "grad_norm": 0.27920563868852216, |
| "learning_rate": 1.1196911087051143e-06, |
| "loss": 0.144, |
| "step": 845 |
| }, |
| { |
| "epoch": 4.515010006671114, |
| "grad_norm": 0.2917517252311828, |
| "learning_rate": 1.0951756820135294e-06, |
| "loss": 0.1562, |
| "step": 846 |
| }, |
| { |
| "epoch": 4.5203468979319545, |
| "grad_norm": 0.2719082360193771, |
| "learning_rate": 1.070924058421876e-06, |
| "loss": 0.1393, |
| "step": 847 |
| }, |
| { |
| "epoch": 4.5256837891927955, |
| "grad_norm": 0.255901572414045, |
| "learning_rate": 1.0469365763439532e-06, |
| "loss": 0.1277, |
| "step": 848 |
| }, |
| { |
| "epoch": 4.5310206804536355, |
| "grad_norm": 0.2763364181685816, |
| "learning_rate": 1.0232135705076596e-06, |
| "loss": 0.1551, |
| "step": 849 |
| }, |
| { |
| "epoch": 4.536357571714476, |
| "grad_norm": 0.2962740172540047, |
| "learning_rate": 9.997553719503239e-07, |
| "loss": 0.1561, |
| "step": 850 |
| }, |
| { |
| "epoch": 4.541694462975316, |
| "grad_norm": 0.268054629742994, |
| "learning_rate": 9.765623080140774e-07, |
| "loss": 0.1234, |
| "step": 851 |
| }, |
| { |
| "epoch": 4.547031354236157, |
| "grad_norm": 0.3035670976166526, |
| "learning_rate": 9.536347023412928e-07, |
| "loss": 0.1636, |
| "step": 852 |
| }, |
| { |
| "epoch": 4.552368245496998, |
| "grad_norm": 0.2793525064851917, |
| "learning_rate": 9.309728748700574e-07, |
| "loss": 0.1398, |
| "step": 853 |
| }, |
| { |
| "epoch": 4.557705136757838, |
| "grad_norm": 0.28016641299104156, |
| "learning_rate": 9.085771418297274e-07, |
| "loss": 0.1292, |
| "step": 854 |
| }, |
| { |
| "epoch": 4.563042028018679, |
| "grad_norm": 0.27699022892058145, |
| "learning_rate": 8.864478157364997e-07, |
| "loss": 0.1435, |
| "step": 855 |
| }, |
| { |
| "epoch": 4.568378919279519, |
| "grad_norm": 0.2720294929447123, |
| "learning_rate": 8.645852053890547e-07, |
| "loss": 0.1324, |
| "step": 856 |
| }, |
| { |
| "epoch": 4.57371581054036, |
| "grad_norm": 0.28532423639550997, |
| "learning_rate": 8.429896158642492e-07, |
| "loss": 0.1472, |
| "step": 857 |
| }, |
| { |
| "epoch": 4.579052701801201, |
| "grad_norm": 0.26985015917853045, |
| "learning_rate": 8.216613485128611e-07, |
| "loss": 0.141, |
| "step": 858 |
| }, |
| { |
| "epoch": 4.584389593062041, |
| "grad_norm": 0.2704957723845787, |
| "learning_rate": 8.00600700955374e-07, |
| "loss": 0.1361, |
| "step": 859 |
| }, |
| { |
| "epoch": 4.589726484322882, |
| "grad_norm": 0.28012140457224527, |
| "learning_rate": 7.798079670778391e-07, |
| "loss": 0.1282, |
| "step": 860 |
| }, |
| { |
| "epoch": 4.595063375583722, |
| "grad_norm": 0.2782972818576599, |
| "learning_rate": 7.592834370277624e-07, |
| "loss": 0.1243, |
| "step": 861 |
| }, |
| { |
| "epoch": 4.600400266844563, |
| "grad_norm": 0.29307525545180607, |
| "learning_rate": 7.390273972100614e-07, |
| "loss": 0.1442, |
| "step": 862 |
| }, |
| { |
| "epoch": 4.605737158105404, |
| "grad_norm": 0.2702797863957165, |
| "learning_rate": 7.190401302830729e-07, |
| "loss": 0.1313, |
| "step": 863 |
| }, |
| { |
| "epoch": 4.611074049366244, |
| "grad_norm": 0.2884103769485427, |
| "learning_rate": 6.993219151545871e-07, |
| "loss": 0.1507, |
| "step": 864 |
| }, |
| { |
| "epoch": 4.616410940627085, |
| "grad_norm": 0.3042376630394671, |
| "learning_rate": 6.798730269779907e-07, |
| "loss": 0.1413, |
| "step": 865 |
| }, |
| { |
| "epoch": 4.621747831887925, |
| "grad_norm": 0.30142009513304885, |
| "learning_rate": 6.60693737148399e-07, |
| "loss": 0.1411, |
| "step": 866 |
| }, |
| { |
| "epoch": 4.627084723148766, |
| "grad_norm": 0.28563889877580295, |
| "learning_rate": 6.417843132988744e-07, |
| "loss": 0.138, |
| "step": 867 |
| }, |
| { |
| "epoch": 4.632421614409607, |
| "grad_norm": 0.2656629603197653, |
| "learning_rate": 6.231450192967048e-07, |
| "loss": 0.1421, |
| "step": 868 |
| }, |
| { |
| "epoch": 4.637758505670447, |
| "grad_norm": 0.26399752051623476, |
| "learning_rate": 6.047761152397025e-07, |
| "loss": 0.1325, |
| "step": 869 |
| }, |
| { |
| "epoch": 4.643095396931288, |
| "grad_norm": 0.27939357858366015, |
| "learning_rate": 5.866778574525933e-07, |
| "loss": 0.1346, |
| "step": 870 |
| }, |
| { |
| "epoch": 4.648432288192128, |
| "grad_norm": 0.268759877392077, |
| "learning_rate": 5.688504984834287e-07, |
| "loss": 0.1367, |
| "step": 871 |
| }, |
| { |
| "epoch": 4.6537691794529685, |
| "grad_norm": 0.2898319032741956, |
| "learning_rate": 5.512942871000549e-07, |
| "loss": 0.1516, |
| "step": 872 |
| }, |
| { |
| "epoch": 4.6591060707138094, |
| "grad_norm": 0.28063437477691194, |
| "learning_rate": 5.340094682866603e-07, |
| "loss": 0.1278, |
| "step": 873 |
| }, |
| { |
| "epoch": 4.6644429619746495, |
| "grad_norm": 0.2872370132188477, |
| "learning_rate": 5.169962832403475e-07, |
| "loss": 0.1502, |
| "step": 874 |
| }, |
| { |
| "epoch": 4.66977985323549, |
| "grad_norm": 0.2712192364166597, |
| "learning_rate": 5.002549693677594e-07, |
| "loss": 0.1337, |
| "step": 875 |
| }, |
| { |
| "epoch": 4.67511674449633, |
| "grad_norm": 0.29491089886365257, |
| "learning_rate": 4.837857602817808e-07, |
| "loss": 0.1503, |
| "step": 876 |
| }, |
| { |
| "epoch": 4.680453635757171, |
| "grad_norm": 0.27587942631382745, |
| "learning_rate": 4.675888857982669e-07, |
| "loss": 0.1327, |
| "step": 877 |
| }, |
| { |
| "epoch": 4.685790527018012, |
| "grad_norm": 0.29332732966215386, |
| "learning_rate": 4.5166457193284386e-07, |
| "loss": 0.1458, |
| "step": 878 |
| }, |
| { |
| "epoch": 4.691127418278852, |
| "grad_norm": 0.28714021402000245, |
| "learning_rate": 4.3601304089775366e-07, |
| "loss": 0.1466, |
| "step": 879 |
| }, |
| { |
| "epoch": 4.696464309539693, |
| "grad_norm": 0.26784688798952216, |
| "learning_rate": 4.2063451109874756e-07, |
| "loss": 0.1274, |
| "step": 880 |
| }, |
| { |
| "epoch": 4.701801200800533, |
| "grad_norm": 0.2797049205351308, |
| "learning_rate": 4.055291971320485e-07, |
| "loss": 0.142, |
| "step": 881 |
| }, |
| { |
| "epoch": 4.707138092061374, |
| "grad_norm": 0.26674171880102343, |
| "learning_rate": 3.906973097813449e-07, |
| "loss": 0.1293, |
| "step": 882 |
| }, |
| { |
| "epoch": 4.712474983322215, |
| "grad_norm": 0.25308546573318846, |
| "learning_rate": 3.76139056014857e-07, |
| "loss": 0.1315, |
| "step": 883 |
| }, |
| { |
| "epoch": 4.717811874583055, |
| "grad_norm": 0.2710104168106361, |
| "learning_rate": 3.6185463898245066e-07, |
| "loss": 0.1489, |
| "step": 884 |
| }, |
| { |
| "epoch": 4.723148765843896, |
| "grad_norm": 0.28048882254908447, |
| "learning_rate": 3.478442580127972e-07, |
| "loss": 0.144, |
| "step": 885 |
| }, |
| { |
| "epoch": 4.728485657104736, |
| "grad_norm": 0.26485229244166536, |
| "learning_rate": 3.341081086105891e-07, |
| "loss": 0.1255, |
| "step": 886 |
| }, |
| { |
| "epoch": 4.733822548365577, |
| "grad_norm": 0.2759331791232634, |
| "learning_rate": 3.2064638245382194e-07, |
| "loss": 0.1568, |
| "step": 887 |
| }, |
| { |
| "epoch": 4.739159439626418, |
| "grad_norm": 0.26413539339616676, |
| "learning_rate": 3.0745926739111033e-07, |
| "loss": 0.1342, |
| "step": 888 |
| }, |
| { |
| "epoch": 4.744496330887258, |
| "grad_norm": 0.28475596898962846, |
| "learning_rate": 2.9454694743907386e-07, |
| "loss": 0.1398, |
| "step": 889 |
| }, |
| { |
| "epoch": 4.749833222148099, |
| "grad_norm": 0.277687235094201, |
| "learning_rate": 2.819096027797641e-07, |
| "loss": 0.1342, |
| "step": 890 |
| }, |
| { |
| "epoch": 4.755170113408939, |
| "grad_norm": 0.2957077369519711, |
| "learning_rate": 2.6954740975815076e-07, |
| "loss": 0.1464, |
| "step": 891 |
| }, |
| { |
| "epoch": 4.76050700466978, |
| "grad_norm": 0.2907912947380036, |
| "learning_rate": 2.57460540879666e-07, |
| "loss": 0.154, |
| "step": 892 |
| }, |
| { |
| "epoch": 4.765843895930621, |
| "grad_norm": 0.2801914453007158, |
| "learning_rate": 2.4564916480778855e-07, |
| "loss": 0.1468, |
| "step": 893 |
| }, |
| { |
| "epoch": 4.771180787191461, |
| "grad_norm": 0.2711625198266653, |
| "learning_rate": 2.3411344636169898e-07, |
| "loss": 0.1301, |
| "step": 894 |
| }, |
| { |
| "epoch": 4.776517678452302, |
| "grad_norm": 0.2839350340429628, |
| "learning_rate": 2.228535465139703e-07, |
| "loss": 0.1495, |
| "step": 895 |
| }, |
| { |
| "epoch": 4.781854569713142, |
| "grad_norm": 0.27595704547443245, |
| "learning_rate": 2.1186962238833653e-07, |
| "loss": 0.1238, |
| "step": 896 |
| }, |
| { |
| "epoch": 4.7871914609739825, |
| "grad_norm": 0.28199603087443903, |
| "learning_rate": 2.0116182725748334e-07, |
| "loss": 0.1334, |
| "step": 897 |
| }, |
| { |
| "epoch": 4.792528352234823, |
| "grad_norm": 0.2743312329373981, |
| "learning_rate": 1.907303105409164e-07, |
| "loss": 0.146, |
| "step": 898 |
| }, |
| { |
| "epoch": 4.7978652434956635, |
| "grad_norm": 0.2780483555703688, |
| "learning_rate": 1.80575217802883e-07, |
| "loss": 0.1459, |
| "step": 899 |
| }, |
| { |
| "epoch": 4.803202134756504, |
| "grad_norm": 0.2805564993027487, |
| "learning_rate": 1.7069669075032492e-07, |
| "loss": 0.1393, |
| "step": 900 |
| }, |
| { |
| "epoch": 4.808539026017344, |
| "grad_norm": 0.2703447762812398, |
| "learning_rate": 1.6109486723092426e-07, |
| "loss": 0.1315, |
| "step": 901 |
| }, |
| { |
| "epoch": 4.813875917278185, |
| "grad_norm": 0.2847007738527403, |
| "learning_rate": 1.5176988123114966e-07, |
| "loss": 0.1495, |
| "step": 902 |
| }, |
| { |
| "epoch": 4.819212808539026, |
| "grad_norm": 0.2653003442730712, |
| "learning_rate": 1.4272186287441535e-07, |
| "loss": 0.1355, |
| "step": 903 |
| }, |
| { |
| "epoch": 4.824549699799866, |
| "grad_norm": 0.2822152085841712, |
| "learning_rate": 1.3395093841925166e-07, |
| "loss": 0.1576, |
| "step": 904 |
| }, |
| { |
| "epoch": 4.829886591060707, |
| "grad_norm": 0.27304693619702214, |
| "learning_rate": 1.2545723025753743e-07, |
| "loss": 0.1316, |
| "step": 905 |
| }, |
| { |
| "epoch": 4.835223482321548, |
| "grad_norm": 0.27468533143866714, |
| "learning_rate": 1.1724085691280806e-07, |
| "loss": 0.132, |
| "step": 906 |
| }, |
| { |
| "epoch": 4.840560373582388, |
| "grad_norm": 0.27963914069565776, |
| "learning_rate": 1.0930193303858805e-07, |
| "loss": 0.1461, |
| "step": 907 |
| }, |
| { |
| "epoch": 4.845897264843229, |
| "grad_norm": 0.2565432498525225, |
| "learning_rate": 1.0164056941679657e-07, |
| "loss": 0.1246, |
| "step": 908 |
| }, |
| { |
| "epoch": 4.851234156104069, |
| "grad_norm": 0.2811061477351414, |
| "learning_rate": 9.42568729561999e-08, |
| "loss": 0.1443, |
| "step": 909 |
| }, |
| { |
| "epoch": 4.85657104736491, |
| "grad_norm": 0.2857439707607634, |
| "learning_rate": 8.715094669092816e-08, |
| "loss": 0.1338, |
| "step": 910 |
| }, |
| { |
| "epoch": 4.861907938625751, |
| "grad_norm": 0.2767918923747537, |
| "learning_rate": 8.032288977901647e-08, |
| "loss": 0.1317, |
| "step": 911 |
| }, |
| { |
| "epoch": 4.867244829886591, |
| "grad_norm": 0.27736070767280885, |
| "learning_rate": 7.377279750104605e-08, |
| "loss": 0.1332, |
| "step": 912 |
| }, |
| { |
| "epoch": 4.872581721147432, |
| "grad_norm": 0.27702046457250123, |
| "learning_rate": 6.750076125880079e-08, |
| "loss": 0.1403, |
| "step": 913 |
| }, |
| { |
| "epoch": 4.877918612408272, |
| "grad_norm": 0.2726767818543548, |
| "learning_rate": 6.150686857399057e-08, |
| "loss": 0.1375, |
| "step": 914 |
| }, |
| { |
| "epoch": 4.883255503669113, |
| "grad_norm": 0.2770778486020138, |
| "learning_rate": 5.5791203087041114e-08, |
| "loss": 0.1448, |
| "step": 915 |
| }, |
| { |
| "epoch": 4.888592394929954, |
| "grad_norm": 0.2673740108848184, |
| "learning_rate": 5.0353844555910415e-08, |
| "loss": 0.1401, |
| "step": 916 |
| }, |
| { |
| "epoch": 4.893929286190794, |
| "grad_norm": 0.2773633602325955, |
| "learning_rate": 4.5194868854991913e-08, |
| "loss": 0.1408, |
| "step": 917 |
| }, |
| { |
| "epoch": 4.899266177451635, |
| "grad_norm": 0.29556441642236714, |
| "learning_rate": 4.031434797404421e-08, |
| "loss": 0.1381, |
| "step": 918 |
| }, |
| { |
| "epoch": 4.904603068712475, |
| "grad_norm": 0.2597939381066577, |
| "learning_rate": 3.571235001719853e-08, |
| "loss": 0.1283, |
| "step": 919 |
| }, |
| { |
| "epoch": 4.909939959973316, |
| "grad_norm": 0.26747252132965876, |
| "learning_rate": 3.13889392019906e-08, |
| "loss": 0.1424, |
| "step": 920 |
| }, |
| { |
| "epoch": 4.9152768512341565, |
| "grad_norm": 0.28253875872873746, |
| "learning_rate": 2.734417585848137e-08, |
| "loss": 0.1428, |
| "step": 921 |
| }, |
| { |
| "epoch": 4.9206137424949965, |
| "grad_norm": 0.28810571816428815, |
| "learning_rate": 2.3578116428408792e-08, |
| "loss": 0.1478, |
| "step": 922 |
| }, |
| { |
| "epoch": 4.925950633755837, |
| "grad_norm": 0.2585384085401356, |
| "learning_rate": 2.0090813464395122e-08, |
| "loss": 0.1388, |
| "step": 923 |
| }, |
| { |
| "epoch": 4.931287525016677, |
| "grad_norm": 0.2820464252629273, |
| "learning_rate": 1.6882315629225267e-08, |
| "loss": 0.1495, |
| "step": 924 |
| }, |
| { |
| "epoch": 4.936624416277518, |
| "grad_norm": 0.283069152205124, |
| "learning_rate": 1.3952667695156241e-08, |
| "loss": 0.1303, |
| "step": 925 |
| }, |
| { |
| "epoch": 4.941961307538359, |
| "grad_norm": 0.27731020890016556, |
| "learning_rate": 1.1301910543295436e-08, |
| "loss": 0.1329, |
| "step": 926 |
| }, |
| { |
| "epoch": 4.947298198799199, |
| "grad_norm": 0.2906776181838218, |
| "learning_rate": 8.93008116303884e-09, |
| "loss": 0.1623, |
| "step": 927 |
| }, |
| { |
| "epoch": 4.95263509006004, |
| "grad_norm": 0.27016994542838946, |
| "learning_rate": 6.8372126515403594e-09, |
| "loss": 0.1419, |
| "step": 928 |
| }, |
| { |
| "epoch": 4.95797198132088, |
| "grad_norm": 0.268976173834872, |
| "learning_rate": 5.0233342132632865e-09, |
| "loss": 0.1335, |
| "step": 929 |
| }, |
| { |
| "epoch": 4.963308872581721, |
| "grad_norm": 0.2831312656649367, |
| "learning_rate": 3.4884711595650765e-09, |
| "loss": 0.1481, |
| "step": 930 |
| }, |
| { |
| "epoch": 4.968645763842562, |
| "grad_norm": 0.2723703468394432, |
| "learning_rate": 2.2326449083420745e-09, |
| "loss": 0.1325, |
| "step": 931 |
| }, |
| { |
| "epoch": 4.973982655103402, |
| "grad_norm": 0.2875352034119992, |
| "learning_rate": 1.255872983740858e-09, |
| "loss": 0.1477, |
| "step": 932 |
| }, |
| { |
| "epoch": 4.979319546364243, |
| "grad_norm": 0.2737232836278831, |
| "learning_rate": 5.581690159006669e-10, |
| "loss": 0.1447, |
| "step": 933 |
| }, |
| { |
| "epoch": 4.984656437625083, |
| "grad_norm": 0.2956259263292112, |
| "learning_rate": 1.3954274078020748e-10, |
| "loss": 0.1562, |
| "step": 934 |
| }, |
| { |
| "epoch": 4.989993328885924, |
| "grad_norm": 0.285955394980644, |
| "learning_rate": 0.0, |
| "loss": 0.1344, |
| "step": 935 |
| }, |
| { |
| "epoch": 4.989993328885924, |
| "step": 935, |
| "total_flos": 1.946622601061204e+18, |
| "train_loss": 0.3608587793966028, |
| "train_runtime": 57464.5588, |
| "train_samples_per_second": 2.087, |
| "train_steps_per_second": 0.016 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 935, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.946622601061204e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|