| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9975440032746623, |
| "eval_steps": 500, |
| "global_step": 1220, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "grad_norm": 0.3300960063934326, |
| "learning_rate": 2.9999999999999997e-05, |
| "loss": 0.9966, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 0.4113194942474365, |
| "learning_rate": 5.9999999999999995e-05, |
| "loss": 1.1253, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 0.2486647665500641, |
| "learning_rate": 8.999999999999999e-05, |
| "loss": 1.0721, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.2249160259962082, |
| "learning_rate": 0.00011999999999999999, |
| "loss": 0.9033, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.3706735074520111, |
| "learning_rate": 0.00015, |
| "loss": 1.0498, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.28104931116104126, |
| "learning_rate": 0.00017999999999999998, |
| "loss": 0.9108, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.27497801184654236, |
| "learning_rate": 0.00020999999999999998, |
| "loss": 0.9038, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.30283215641975403, |
| "learning_rate": 0.00023999999999999998, |
| "loss": 0.8605, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.33457252383232117, |
| "learning_rate": 0.00027, |
| "loss": 0.9049, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.37725692987442017, |
| "learning_rate": 0.0003, |
| "loss": 0.772, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.27986466884613037, |
| "learning_rate": 0.00029975206611570246, |
| "loss": 0.7666, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.30687034130096436, |
| "learning_rate": 0.00029950413223140494, |
| "loss": 0.8312, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.3321741819381714, |
| "learning_rate": 0.0002992561983471074, |
| "loss": 0.8308, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.29080134630203247, |
| "learning_rate": 0.0002990082644628099, |
| "loss": 0.7597, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.33823856711387634, |
| "learning_rate": 0.0002987603305785124, |
| "loss": 0.8693, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.3461182117462158, |
| "learning_rate": 0.0002985123966942149, |
| "loss": 1.0571, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.22306275367736816, |
| "learning_rate": 0.0002982644628099173, |
| "loss": 0.7706, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 154.4940643310547, |
| "learning_rate": 0.0002980165289256198, |
| "loss": 2.6519, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.22956405580043793, |
| "learning_rate": 0.00029776859504132227, |
| "loss": 0.6897, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.25711989402770996, |
| "learning_rate": 0.00029752066115702476, |
| "loss": 0.7338, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.2565441131591797, |
| "learning_rate": 0.00029727272727272724, |
| "loss": 0.8211, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.2437434047460556, |
| "learning_rate": 0.0002970247933884297, |
| "loss": 0.8027, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.21284469962120056, |
| "learning_rate": 0.0002967768595041322, |
| "loss": 0.7944, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.23338356614112854, |
| "learning_rate": 0.0002965289256198347, |
| "loss": 0.7696, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.25512659549713135, |
| "learning_rate": 0.0002962809917355372, |
| "loss": 0.7693, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.19500921666622162, |
| "learning_rate": 0.0002960330578512396, |
| "loss": 0.7599, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.2554054260253906, |
| "learning_rate": 0.00029578512396694214, |
| "loss": 0.966, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.17682747542858124, |
| "learning_rate": 0.0002955371900826446, |
| "loss": 0.676, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.20516635477542877, |
| "learning_rate": 0.0002952892561983471, |
| "loss": 0.8144, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.3275119662284851, |
| "learning_rate": 0.0002950413223140496, |
| "loss": 0.7704, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.22231778502464294, |
| "learning_rate": 0.000294793388429752, |
| "loss": 0.7614, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.17065812647342682, |
| "learning_rate": 0.0002945454545454545, |
| "loss": 0.5634, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.1771956831216812, |
| "learning_rate": 0.000294297520661157, |
| "loss": 0.7607, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.26693442463874817, |
| "learning_rate": 0.00029404958677685947, |
| "loss": 0.8171, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 1.409070611000061, |
| "learning_rate": 0.00029380165289256196, |
| "loss": 0.7791, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.20727217197418213, |
| "learning_rate": 0.00029355371900826444, |
| "loss": 0.7357, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.2145707905292511, |
| "learning_rate": 0.0002933057851239669, |
| "loss": 0.8458, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.2068527340888977, |
| "learning_rate": 0.0002930578512396694, |
| "loss": 0.78, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.22432388365268707, |
| "learning_rate": 0.00029280991735537184, |
| "loss": 0.8523, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.19982610642910004, |
| "learning_rate": 0.0002925619834710743, |
| "loss": 0.7372, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 6.248472213745117, |
| "learning_rate": 0.00029231404958677686, |
| "loss": 0.7399, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.2269737422466278, |
| "learning_rate": 0.00029206611570247934, |
| "loss": 0.7842, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.23117898404598236, |
| "learning_rate": 0.0002918181818181818, |
| "loss": 0.7111, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.22466522455215454, |
| "learning_rate": 0.00029157024793388425, |
| "loss": 0.8979, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.20770332217216492, |
| "learning_rate": 0.00029132231404958674, |
| "loss": 0.774, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.2376495748758316, |
| "learning_rate": 0.0002910743801652892, |
| "loss": 0.7216, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.2470778226852417, |
| "learning_rate": 0.0002908264462809917, |
| "loss": 0.7369, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.23465900123119354, |
| "learning_rate": 0.0002905785123966942, |
| "loss": 0.7528, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.5718627572059631, |
| "learning_rate": 0.00029033057851239667, |
| "loss": 0.7535, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.21493370831012726, |
| "learning_rate": 0.00029008264462809916, |
| "loss": 0.8593, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.21197210252285004, |
| "learning_rate": 0.00028983471074380164, |
| "loss": 0.8013, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.20836398005485535, |
| "learning_rate": 0.0002895867768595041, |
| "loss": 0.7905, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.2096678912639618, |
| "learning_rate": 0.00028933884297520655, |
| "loss": 0.6754, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.25898435711860657, |
| "learning_rate": 0.00028909090909090904, |
| "loss": 0.7725, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.23370735347270966, |
| "learning_rate": 0.0002888429752066116, |
| "loss": 0.7007, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.23006942868232727, |
| "learning_rate": 0.00028859504132231406, |
| "loss": 0.7534, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.20855402946472168, |
| "learning_rate": 0.0002883471074380165, |
| "loss": 0.9491, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.24340493977069855, |
| "learning_rate": 0.00028809917355371897, |
| "loss": 0.8089, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.20169466733932495, |
| "learning_rate": 0.00028785123966942145, |
| "loss": 0.64, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.23272906243801117, |
| "learning_rate": 0.00028760330578512394, |
| "loss": 0.8456, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.1767100691795349, |
| "learning_rate": 0.0002873553719008264, |
| "loss": 0.6686, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.24511106312274933, |
| "learning_rate": 0.0002871074380165289, |
| "loss": 0.6998, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.22284479439258575, |
| "learning_rate": 0.0002868595041322314, |
| "loss": 0.6699, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.21842750906944275, |
| "learning_rate": 0.00028661157024793387, |
| "loss": 0.7413, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.2669163644313812, |
| "learning_rate": 0.00028636363636363636, |
| "loss": 0.931, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.1864808052778244, |
| "learning_rate": 0.0002861157024793388, |
| "loss": 0.5652, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.18369853496551514, |
| "learning_rate": 0.00028586776859504127, |
| "loss": 0.6847, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.22353056073188782, |
| "learning_rate": 0.00028561983471074375, |
| "loss": 0.598, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.20269523561000824, |
| "learning_rate": 0.0002853719008264463, |
| "loss": 0.8688, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.2291198968887329, |
| "learning_rate": 0.0002851239669421488, |
| "loss": 0.7535, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.22033120691776276, |
| "learning_rate": 0.0002848760330578512, |
| "loss": 0.8377, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.2687983214855194, |
| "learning_rate": 0.0002846280991735537, |
| "loss": 0.6926, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.1933681070804596, |
| "learning_rate": 0.00028438016528925617, |
| "loss": 0.6276, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.2820705473423004, |
| "learning_rate": 0.00028413223140495865, |
| "loss": 0.848, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.19532324373722076, |
| "learning_rate": 0.00028388429752066114, |
| "loss": 0.6198, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.25057846307754517, |
| "learning_rate": 0.0002836363636363636, |
| "loss": 0.6838, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.2168462574481964, |
| "learning_rate": 0.0002833884297520661, |
| "loss": 0.7885, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.2106674313545227, |
| "learning_rate": 0.0002831404958677686, |
| "loss": 0.6757, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.24460363388061523, |
| "learning_rate": 0.000282892561983471, |
| "loss": 0.7414, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.3706071078777313, |
| "learning_rate": 0.0002826446280991735, |
| "loss": 0.621, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.2251998782157898, |
| "learning_rate": 0.000282396694214876, |
| "loss": 0.7453, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.24521738290786743, |
| "learning_rate": 0.00028214876033057847, |
| "loss": 0.6985, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.2262742966413498, |
| "learning_rate": 0.000281900826446281, |
| "loss": 0.6316, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.19723354279994965, |
| "learning_rate": 0.00028165289256198344, |
| "loss": 0.4798, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.20684833824634552, |
| "learning_rate": 0.0002814049586776859, |
| "loss": 0.7993, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.19534814357757568, |
| "learning_rate": 0.0002811570247933884, |
| "loss": 0.7735, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.2585545480251312, |
| "learning_rate": 0.0002809090909090909, |
| "loss": 0.8126, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.2510583996772766, |
| "learning_rate": 0.00028066115702479337, |
| "loss": 0.6973, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.1884051263332367, |
| "learning_rate": 0.00028041322314049585, |
| "loss": 0.701, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.2526257038116455, |
| "learning_rate": 0.00028016528925619834, |
| "loss": 0.7132, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.200734481215477, |
| "learning_rate": 0.0002799173553719008, |
| "loss": 0.7024, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.2404022514820099, |
| "learning_rate": 0.0002796694214876033, |
| "loss": 0.704, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.23063871264457703, |
| "learning_rate": 0.00027942148760330573, |
| "loss": 0.6312, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.1759747564792633, |
| "learning_rate": 0.0002791735537190082, |
| "loss": 0.6577, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.2009582370519638, |
| "learning_rate": 0.0002789256198347107, |
| "loss": 0.8036, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.2200164943933487, |
| "learning_rate": 0.0002786776859504132, |
| "loss": 0.7101, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.19693537056446075, |
| "learning_rate": 0.00027842975206611567, |
| "loss": 0.6221, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.23269779980182648, |
| "learning_rate": 0.00027818181818181815, |
| "loss": 0.8264, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.2440226823091507, |
| "learning_rate": 0.00027793388429752064, |
| "loss": 0.8051, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.2307034134864807, |
| "learning_rate": 0.0002776859504132231, |
| "loss": 0.631, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.2530567944049835, |
| "learning_rate": 0.0002774380165289256, |
| "loss": 0.8616, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.2808806300163269, |
| "learning_rate": 0.0002771900826446281, |
| "loss": 0.8333, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.20667941868305206, |
| "learning_rate": 0.00027694214876033057, |
| "loss": 0.7212, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.17540781199932098, |
| "learning_rate": 0.00027669421487603305, |
| "loss": 0.5964, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.2526637613773346, |
| "learning_rate": 0.00027644628099173554, |
| "loss": 0.6868, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.2137339860200882, |
| "learning_rate": 0.00027619834710743797, |
| "loss": 0.6155, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.21061092615127563, |
| "learning_rate": 0.00027595041322314045, |
| "loss": 0.813, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.21619191765785217, |
| "learning_rate": 0.00027570247933884293, |
| "loss": 0.8046, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.2212170660495758, |
| "learning_rate": 0.0002754545454545454, |
| "loss": 0.6706, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.23427413403987885, |
| "learning_rate": 0.0002752066115702479, |
| "loss": 0.7152, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.20566123723983765, |
| "learning_rate": 0.0002749586776859504, |
| "loss": 0.6568, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.22977930307388306, |
| "learning_rate": 0.00027471074380165287, |
| "loss": 0.7832, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.28307485580444336, |
| "learning_rate": 0.00027446280991735535, |
| "loss": 0.7446, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.19567596912384033, |
| "learning_rate": 0.00027421487603305784, |
| "loss": 0.6394, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.24577689170837402, |
| "learning_rate": 0.0002739669421487603, |
| "loss": 0.6389, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.2180463820695877, |
| "learning_rate": 0.0002737190082644628, |
| "loss": 0.7814, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.19546380639076233, |
| "learning_rate": 0.0002734710743801653, |
| "loss": 0.8312, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.22698360681533813, |
| "learning_rate": 0.00027322314049586777, |
| "loss": 0.7443, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.22987066209316254, |
| "learning_rate": 0.0002729752066115702, |
| "loss": 0.7839, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.20548178255558014, |
| "learning_rate": 0.0002727272727272727, |
| "loss": 0.7805, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.2477702796459198, |
| "learning_rate": 0.00027247933884297517, |
| "loss": 0.5694, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.20593340694904327, |
| "learning_rate": 0.00027223140495867765, |
| "loss": 0.6479, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.23635917901992798, |
| "learning_rate": 0.00027198347107438013, |
| "loss": 0.8107, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.25808119773864746, |
| "learning_rate": 0.0002717355371900826, |
| "loss": 0.7876, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.22156469523906708, |
| "learning_rate": 0.0002714876033057851, |
| "loss": 0.7261, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.19892215728759766, |
| "learning_rate": 0.0002712396694214876, |
| "loss": 0.6874, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.24936752021312714, |
| "learning_rate": 0.00027099173553719007, |
| "loss": 0.6155, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.23287539184093475, |
| "learning_rate": 0.0002707438016528925, |
| "loss": 0.602, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.2086639404296875, |
| "learning_rate": 0.00027049586776859504, |
| "loss": 0.7198, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.24974922835826874, |
| "learning_rate": 0.0002702479338842975, |
| "loss": 0.6873, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.2066827118396759, |
| "learning_rate": 0.00027, |
| "loss": 0.5821, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.28004395961761475, |
| "learning_rate": 0.0002697520661157025, |
| "loss": 0.7864, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.22391608357429504, |
| "learning_rate": 0.0002695041322314049, |
| "loss": 0.6773, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.2821199297904968, |
| "learning_rate": 0.0002692561983471074, |
| "loss": 0.6806, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.21736428141593933, |
| "learning_rate": 0.0002690082644628099, |
| "loss": 0.6662, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.23889939486980438, |
| "learning_rate": 0.00026876033057851237, |
| "loss": 0.6356, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.21096719801425934, |
| "learning_rate": 0.00026851239669421485, |
| "loss": 0.6762, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.22622421383857727, |
| "learning_rate": 0.00026826446280991733, |
| "loss": 0.8085, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.19824957847595215, |
| "learning_rate": 0.0002680165289256198, |
| "loss": 0.6031, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.24482691287994385, |
| "learning_rate": 0.0002677685950413223, |
| "loss": 0.6649, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.21291929483413696, |
| "learning_rate": 0.0002675206611570248, |
| "loss": 0.6671, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.2202674299478531, |
| "learning_rate": 0.0002672727272727272, |
| "loss": 0.6469, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.23572632670402527, |
| "learning_rate": 0.0002670247933884297, |
| "loss": 0.7377, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.2051907777786255, |
| "learning_rate": 0.00026677685950413224, |
| "loss": 0.6217, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.23270072042942047, |
| "learning_rate": 0.0002665289256198347, |
| "loss": 0.7933, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.20652809739112854, |
| "learning_rate": 0.00026628099173553715, |
| "loss": 0.6007, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.23084674775600433, |
| "learning_rate": 0.00026603305785123963, |
| "loss": 0.701, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.25663891434669495, |
| "learning_rate": 0.0002657851239669421, |
| "loss": 0.7271, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.25880497694015503, |
| "learning_rate": 0.0002655371900826446, |
| "loss": 0.6562, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.19349205493927002, |
| "learning_rate": 0.0002652892561983471, |
| "loss": 0.5016, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.2401740401983261, |
| "learning_rate": 0.00026504132231404957, |
| "loss": 0.6978, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.19495394825935364, |
| "learning_rate": 0.00026479338842975205, |
| "loss": 0.5562, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.21485286951065063, |
| "learning_rate": 0.00026454545454545453, |
| "loss": 0.7847, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.241348534822464, |
| "learning_rate": 0.000264297520661157, |
| "loss": 0.7513, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.3316986858844757, |
| "learning_rate": 0.00026404958677685945, |
| "loss": 0.664, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.2419958859682083, |
| "learning_rate": 0.00026380165289256193, |
| "loss": 0.7322, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.2868640124797821, |
| "learning_rate": 0.0002635537190082644, |
| "loss": 0.7004, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.24806949496269226, |
| "learning_rate": 0.00026330578512396695, |
| "loss": 0.6497, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.23873400688171387, |
| "learning_rate": 0.00026305785123966944, |
| "loss": 0.7543, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.2480355203151703, |
| "learning_rate": 0.00026280991735537187, |
| "loss": 0.6048, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.2619112730026245, |
| "learning_rate": 0.00026256198347107435, |
| "loss": 0.762, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.22763262689113617, |
| "learning_rate": 0.00026231404958677683, |
| "loss": 0.6557, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.3291528522968292, |
| "learning_rate": 0.0002620661157024793, |
| "loss": 0.7059, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.2959338426589966, |
| "learning_rate": 0.0002618181818181818, |
| "loss": 0.6622, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.23001112043857574, |
| "learning_rate": 0.0002615702479338843, |
| "loss": 0.6465, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.1998877376317978, |
| "learning_rate": 0.00026132231404958677, |
| "loss": 0.666, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.23009613156318665, |
| "learning_rate": 0.00026107438016528925, |
| "loss": 0.8793, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.24525685608386993, |
| "learning_rate": 0.0002608264462809917, |
| "loss": 0.8009, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.21605077385902405, |
| "learning_rate": 0.00026057851239669416, |
| "loss": 0.5459, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.2576725482940674, |
| "learning_rate": 0.00026033057851239665, |
| "loss": 0.6818, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.23385170102119446, |
| "learning_rate": 0.00026008264462809913, |
| "loss": 0.7559, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.1973017454147339, |
| "learning_rate": 0.00025983471074380167, |
| "loss": 0.6798, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.22262559831142426, |
| "learning_rate": 0.0002595867768595041, |
| "loss": 0.5566, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.23010462522506714, |
| "learning_rate": 0.0002593388429752066, |
| "loss": 0.7101, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.21676452457904816, |
| "learning_rate": 0.00025909090909090907, |
| "loss": 0.7038, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.22475261986255646, |
| "learning_rate": 0.00025884297520661155, |
| "loss": 0.7812, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.28893202543258667, |
| "learning_rate": 0.00025859504132231403, |
| "loss": 0.5925, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.22777552902698517, |
| "learning_rate": 0.0002583471074380165, |
| "loss": 0.7319, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.2287953644990921, |
| "learning_rate": 0.000258099173553719, |
| "loss": 0.7775, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.2049843668937683, |
| "learning_rate": 0.0002578512396694215, |
| "loss": 0.7448, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.22585280239582062, |
| "learning_rate": 0.00025760330578512397, |
| "loss": 0.59, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.23159150779247284, |
| "learning_rate": 0.0002573553719008264, |
| "loss": 0.737, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.3393082320690155, |
| "learning_rate": 0.0002571074380165289, |
| "loss": 0.6948, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.2345617413520813, |
| "learning_rate": 0.00025685950413223136, |
| "loss": 0.6351, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.23474591970443726, |
| "learning_rate": 0.00025661157024793385, |
| "loss": 0.6643, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.2473030984401703, |
| "learning_rate": 0.00025636363636363633, |
| "loss": 0.7663, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.2971685230731964, |
| "learning_rate": 0.0002561157024793388, |
| "loss": 0.7449, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.2745087742805481, |
| "learning_rate": 0.0002558677685950413, |
| "loss": 0.6125, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.23520545661449432, |
| "learning_rate": 0.0002556198347107438, |
| "loss": 0.573, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.2955464720726013, |
| "learning_rate": 0.00025537190082644627, |
| "loss": 0.5315, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.23987281322479248, |
| "learning_rate": 0.00025512396694214875, |
| "loss": 0.5636, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.24263744056224823, |
| "learning_rate": 0.00025487603305785123, |
| "loss": 0.6047, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.26061922311782837, |
| "learning_rate": 0.0002546280991735537, |
| "loss": 0.7812, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.2458687126636505, |
| "learning_rate": 0.0002543801652892562, |
| "loss": 0.58, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.24598994851112366, |
| "learning_rate": 0.00025413223140495863, |
| "loss": 0.7432, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.248992919921875, |
| "learning_rate": 0.0002538842975206611, |
| "loss": 0.6953, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.2518531382083893, |
| "learning_rate": 0.0002536363636363636, |
| "loss": 0.6707, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.23844210803508759, |
| "learning_rate": 0.0002533884297520661, |
| "loss": 0.6285, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.21948237717151642, |
| "learning_rate": 0.00025314049586776856, |
| "loss": 0.6859, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.2003835141658783, |
| "learning_rate": 0.00025289256198347105, |
| "loss": 0.6305, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.23421582579612732, |
| "learning_rate": 0.00025264462809917353, |
| "loss": 0.7164, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.22344104945659637, |
| "learning_rate": 0.000252396694214876, |
| "loss": 0.6498, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.17792212963104248, |
| "learning_rate": 0.0002521487603305785, |
| "loss": 0.614, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.217886820435524, |
| "learning_rate": 0.000251900826446281, |
| "loss": 0.7033, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.209726020693779, |
| "learning_rate": 0.00025165289256198347, |
| "loss": 0.5913, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.2401910424232483, |
| "learning_rate": 0.00025140495867768595, |
| "loss": 0.6405, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.21315626800060272, |
| "learning_rate": 0.00025115702479338843, |
| "loss": 0.7369, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.20102320611476898, |
| "learning_rate": 0.00025090909090909086, |
| "loss": 0.6245, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.20447981357574463, |
| "learning_rate": 0.00025066115702479335, |
| "loss": 0.5423, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.24979281425476074, |
| "learning_rate": 0.00025041322314049583, |
| "loss": 0.8078, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.20141547918319702, |
| "learning_rate": 0.0002501652892561983, |
| "loss": 0.7386, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.2538990378379822, |
| "learning_rate": 0.0002499173553719008, |
| "loss": 0.7219, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.2613961100578308, |
| "learning_rate": 0.0002496694214876033, |
| "loss": 0.7903, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.24777857959270477, |
| "learning_rate": 0.00024942148760330576, |
| "loss": 0.664, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.21958425641059875, |
| "learning_rate": 0.00024917355371900825, |
| "loss": 0.6755, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.2218528538942337, |
| "learning_rate": 0.00024892561983471073, |
| "loss": 0.5568, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.23632755875587463, |
| "learning_rate": 0.00024867768595041316, |
| "loss": 0.6858, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.2641279697418213, |
| "learning_rate": 0.0002484297520661157, |
| "loss": 0.7783, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.3147680163383484, |
| "learning_rate": 0.0002481818181818182, |
| "loss": 0.662, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.27947697043418884, |
| "learning_rate": 0.00024793388429752067, |
| "loss": 0.6477, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.2297278195619583, |
| "learning_rate": 0.00024768595041322315, |
| "loss": 0.5895, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.23085851967334747, |
| "learning_rate": 0.0002474380165289256, |
| "loss": 0.5806, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.19654251635074615, |
| "learning_rate": 0.00024719008264462806, |
| "loss": 0.5942, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.2467166632413864, |
| "learning_rate": 0.00024694214876033055, |
| "loss": 0.5059, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.22614917159080505, |
| "learning_rate": 0.00024669421487603303, |
| "loss": 0.643, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.2622920274734497, |
| "learning_rate": 0.0002464462809917355, |
| "loss": 0.6257, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.21843163669109344, |
| "learning_rate": 0.000246198347107438, |
| "loss": 0.6057, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.2294640988111496, |
| "learning_rate": 0.0002459504132231405, |
| "loss": 0.6876, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.1791463941335678, |
| "learning_rate": 0.00024570247933884296, |
| "loss": 0.5348, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.17243699729442596, |
| "learning_rate": 0.00024545454545454545, |
| "loss": 0.5966, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.22769273817539215, |
| "learning_rate": 0.0002452066115702479, |
| "loss": 0.7912, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.2325255423784256, |
| "learning_rate": 0.0002449586776859504, |
| "loss": 0.7441, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.24277740716934204, |
| "learning_rate": 0.0002447107438016529, |
| "loss": 0.6653, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.21596141159534454, |
| "learning_rate": 0.0002444628099173554, |
| "loss": 0.6668, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.20814135670661926, |
| "learning_rate": 0.0002442148760330578, |
| "loss": 0.6306, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.25570017099380493, |
| "learning_rate": 0.0002439669421487603, |
| "loss": 0.6524, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.2502390146255493, |
| "learning_rate": 0.00024371900826446278, |
| "loss": 0.6048, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.23688243329524994, |
| "learning_rate": 0.0002434710743801653, |
| "loss": 0.568, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.21041709184646606, |
| "learning_rate": 0.00024322314049586777, |
| "loss": 0.6908, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.21656759083271027, |
| "learning_rate": 0.00024297520661157023, |
| "loss": 0.4993, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.25133028626441956, |
| "learning_rate": 0.0002427272727272727, |
| "loss": 0.718, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.22228790819644928, |
| "learning_rate": 0.0002424793388429752, |
| "loss": 0.6146, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.26273205876350403, |
| "learning_rate": 0.00024223140495867768, |
| "loss": 0.7459, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.2156606763601303, |
| "learning_rate": 0.00024198347107438014, |
| "loss": 0.6692, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.2075020670890808, |
| "learning_rate": 0.00024173553719008262, |
| "loss": 0.6427, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.25821176171302795, |
| "learning_rate": 0.0002414876033057851, |
| "loss": 0.7964, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.23016126453876495, |
| "learning_rate": 0.0002412396694214876, |
| "loss": 0.536, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.23115016520023346, |
| "learning_rate": 0.00024099173553719004, |
| "loss": 0.6053, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.18249157071113586, |
| "learning_rate": 0.00024074380165289253, |
| "loss": 0.6574, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.28391778469085693, |
| "learning_rate": 0.000240495867768595, |
| "loss": 0.7152, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.2581539452075958, |
| "learning_rate": 0.0002402479338842975, |
| "loss": 0.8476, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.2304867058992386, |
| "learning_rate": 0.00023999999999999998, |
| "loss": 0.5781, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.239717036485672, |
| "learning_rate": 0.00023975206611570244, |
| "loss": 0.6543, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.22493794560432434, |
| "learning_rate": 0.00023950413223140495, |
| "loss": 0.7048, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.22085991501808167, |
| "learning_rate": 0.00023925619834710743, |
| "loss": 0.5572, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.35917988419532776, |
| "learning_rate": 0.0002390082644628099, |
| "loss": 0.8485, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.28269943594932556, |
| "learning_rate": 0.00023876033057851237, |
| "loss": 0.5732, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.26313093304634094, |
| "learning_rate": 0.00023851239669421485, |
| "loss": 0.8212, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.30286532640457153, |
| "learning_rate": 0.00023826446280991734, |
| "loss": 0.5878, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.22270837426185608, |
| "learning_rate": 0.00023801652892561982, |
| "loss": 0.6933, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.29011014103889465, |
| "learning_rate": 0.0002377685950413223, |
| "loss": 0.6188, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.2390982061624527, |
| "learning_rate": 0.00023752066115702476, |
| "loss": 0.6426, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.3416346609592438, |
| "learning_rate": 0.00023727272727272724, |
| "loss": 0.8845, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.25051388144493103, |
| "learning_rate": 0.00023702479338842973, |
| "loss": 0.7286, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.2497546523809433, |
| "learning_rate": 0.0002367768595041322, |
| "loss": 0.6027, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.23835037648677826, |
| "learning_rate": 0.00023652892561983467, |
| "loss": 0.7052, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.22467398643493652, |
| "learning_rate": 0.00023628099173553715, |
| "loss": 0.5806, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.2663390338420868, |
| "learning_rate": 0.00023603305785123964, |
| "loss": 0.6943, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.22997191548347473, |
| "learning_rate": 0.00023578512396694215, |
| "loss": 0.6411, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.23266558349132538, |
| "learning_rate": 0.00023553719008264463, |
| "loss": 0.6068, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.2304474264383316, |
| "learning_rate": 0.00023528925619834709, |
| "loss": 0.6427, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.28231826424598694, |
| "learning_rate": 0.00023504132231404957, |
| "loss": 0.8011, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.28013259172439575, |
| "learning_rate": 0.00023479338842975205, |
| "loss": 0.5988, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.22702372074127197, |
| "learning_rate": 0.00023454545454545454, |
| "loss": 0.6737, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.27958643436431885, |
| "learning_rate": 0.000234297520661157, |
| "loss": 0.6621, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.23902451992034912, |
| "learning_rate": 0.00023404958677685948, |
| "loss": 0.6525, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.2778523564338684, |
| "learning_rate": 0.00023380165289256196, |
| "loss": 0.6697, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.2382276952266693, |
| "learning_rate": 0.00023355371900826444, |
| "loss": 0.6281, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.24487091600894928, |
| "learning_rate": 0.00023330578512396693, |
| "loss": 0.6842, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.2063397765159607, |
| "learning_rate": 0.00023305785123966938, |
| "loss": 0.6554, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.21523278951644897, |
| "learning_rate": 0.00023280991735537187, |
| "loss": 0.632, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.2420080006122589, |
| "learning_rate": 0.00023256198347107435, |
| "loss": 0.6001, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.2390110194683075, |
| "learning_rate": 0.00023231404958677686, |
| "loss": 0.5648, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.24080687761306763, |
| "learning_rate": 0.0002320661157024793, |
| "loss": 0.86, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.29456445574760437, |
| "learning_rate": 0.0002318181818181818, |
| "loss": 0.7418, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.23326683044433594, |
| "learning_rate": 0.00023157024793388429, |
| "loss": 0.6967, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.20866093039512634, |
| "learning_rate": 0.00023132231404958677, |
| "loss": 0.5205, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.3158474266529083, |
| "learning_rate": 0.00023107438016528925, |
| "loss": 0.7879, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.2730140686035156, |
| "learning_rate": 0.0002308264462809917, |
| "loss": 0.7292, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.25384965538978577, |
| "learning_rate": 0.0002305785123966942, |
| "loss": 0.7258, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.20765069127082825, |
| "learning_rate": 0.00023033057851239668, |
| "loss": 0.7108, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.25662195682525635, |
| "learning_rate": 0.00023008264462809916, |
| "loss": 0.7473, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.300243616104126, |
| "learning_rate": 0.00022983471074380162, |
| "loss": 0.6902, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.23513919115066528, |
| "learning_rate": 0.0002295867768595041, |
| "loss": 0.5888, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.2077571451663971, |
| "learning_rate": 0.00022933884297520658, |
| "loss": 0.6256, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.266201376914978, |
| "learning_rate": 0.00022909090909090907, |
| "loss": 0.6913, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.2239614725112915, |
| "learning_rate": 0.00022884297520661152, |
| "loss": 0.7369, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.21509824693202972, |
| "learning_rate": 0.000228595041322314, |
| "loss": 0.4445, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.21956239640712738, |
| "learning_rate": 0.00022834710743801652, |
| "loss": 0.6732, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.18832357227802277, |
| "learning_rate": 0.000228099173553719, |
| "loss": 0.6808, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.21115505695343018, |
| "learning_rate": 0.0002278512396694215, |
| "loss": 0.5323, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.23715418577194214, |
| "learning_rate": 0.00022760330578512394, |
| "loss": 0.8333, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.29385048151016235, |
| "learning_rate": 0.00022735537190082643, |
| "loss": 0.6, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.26947689056396484, |
| "learning_rate": 0.0002271074380165289, |
| "loss": 0.8788, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.2778269946575165, |
| "learning_rate": 0.0002268595041322314, |
| "loss": 0.7073, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.20938479900360107, |
| "learning_rate": 0.00022661157024793385, |
| "loss": 0.6422, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.2777106761932373, |
| "learning_rate": 0.00022636363636363633, |
| "loss": 0.7495, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.20872819423675537, |
| "learning_rate": 0.00022611570247933882, |
| "loss": 0.6492, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.2752722501754761, |
| "learning_rate": 0.0002258677685950413, |
| "loss": 0.6014, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.24615786969661713, |
| "learning_rate": 0.00022561983471074378, |
| "loss": 0.6287, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.24146385490894318, |
| "learning_rate": 0.00022537190082644624, |
| "loss": 0.6151, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.24762235581874847, |
| "learning_rate": 0.00022512396694214872, |
| "loss": 0.6377, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.24630331993103027, |
| "learning_rate": 0.00022487603305785124, |
| "loss": 0.7255, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.2922554612159729, |
| "learning_rate": 0.00022462809917355372, |
| "loss": 0.6645, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.21686063706874847, |
| "learning_rate": 0.00022438016528925618, |
| "loss": 0.5606, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.2216208428144455, |
| "learning_rate": 0.00022413223140495866, |
| "loss": 0.5126, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.25635436177253723, |
| "learning_rate": 0.00022388429752066114, |
| "loss": 0.7387, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.2786000669002533, |
| "learning_rate": 0.00022363636363636363, |
| "loss": 0.5941, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.26092806458473206, |
| "learning_rate": 0.0002233884297520661, |
| "loss": 0.7851, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.23881889879703522, |
| "learning_rate": 0.00022314049586776857, |
| "loss": 0.598, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.23304526507854462, |
| "learning_rate": 0.00022289256198347105, |
| "loss": 0.7165, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.2340225875377655, |
| "learning_rate": 0.00022264462809917353, |
| "loss": 0.6608, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.31176140904426575, |
| "learning_rate": 0.00022239669421487602, |
| "loss": 0.6711, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.23832640051841736, |
| "learning_rate": 0.00022214876033057847, |
| "loss": 0.732, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.28845977783203125, |
| "learning_rate": 0.00022190082644628096, |
| "loss": 0.7968, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.1978536993265152, |
| "learning_rate": 0.00022165289256198344, |
| "loss": 0.6592, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.26940053701400757, |
| "learning_rate": 0.00022140495867768595, |
| "loss": 0.7953, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.20393389463424683, |
| "learning_rate": 0.00022115702479338844, |
| "loss": 0.4871, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.27152347564697266, |
| "learning_rate": 0.0002209090909090909, |
| "loss": 0.5583, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.2883144021034241, |
| "learning_rate": 0.00022066115702479338, |
| "loss": 0.6156, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.1987351030111313, |
| "learning_rate": 0.00022041322314049586, |
| "loss": 0.5196, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.2651583254337311, |
| "learning_rate": 0.00022016528925619834, |
| "loss": 0.6099, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.2574511468410492, |
| "learning_rate": 0.0002199173553719008, |
| "loss": 0.6925, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.27730292081832886, |
| "learning_rate": 0.00021966942148760328, |
| "loss": 0.6752, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.2001207172870636, |
| "learning_rate": 0.00021942148760330577, |
| "loss": 0.75, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.24222363531589508, |
| "learning_rate": 0.00021917355371900825, |
| "loss": 0.6364, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.26326724886894226, |
| "learning_rate": 0.0002189256198347107, |
| "loss": 0.673, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.2272881418466568, |
| "learning_rate": 0.0002186776859504132, |
| "loss": 0.561, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.24880024790763855, |
| "learning_rate": 0.00021842975206611567, |
| "loss": 0.5552, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.2593706548213959, |
| "learning_rate": 0.00021818181818181816, |
| "loss": 0.5417, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.19063642621040344, |
| "learning_rate": 0.00021793388429752067, |
| "loss": 0.5694, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.2146475464105606, |
| "learning_rate": 0.0002176859504132231, |
| "loss": 0.4314, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.25150927901268005, |
| "learning_rate": 0.0002174380165289256, |
| "loss": 0.631, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.2753889858722687, |
| "learning_rate": 0.0002171900826446281, |
| "loss": 0.6859, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.20773079991340637, |
| "learning_rate": 0.00021694214876033058, |
| "loss": 0.7515, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 0.2547062635421753, |
| "learning_rate": 0.00021669421487603303, |
| "loss": 0.7582, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 0.24687208235263824, |
| "learning_rate": 0.00021644628099173552, |
| "loss": 0.5865, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 0.24116279184818268, |
| "learning_rate": 0.000216198347107438, |
| "loss": 0.4841, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 0.2270282804965973, |
| "learning_rate": 0.00021595041322314048, |
| "loss": 0.5933, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 0.21436922252178192, |
| "learning_rate": 0.00021570247933884297, |
| "loss": 0.6959, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 0.25802701711654663, |
| "learning_rate": 0.00021545454545454542, |
| "loss": 0.729, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 0.23808260262012482, |
| "learning_rate": 0.0002152066115702479, |
| "loss": 0.6346, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 0.23161651194095612, |
| "learning_rate": 0.0002149586776859504, |
| "loss": 0.6459, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 0.2442287802696228, |
| "learning_rate": 0.00021471074380165287, |
| "loss": 0.6803, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 0.19150683283805847, |
| "learning_rate": 0.00021446280991735533, |
| "loss": 0.4375, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 0.23142127692699432, |
| "learning_rate": 0.00021421487603305781, |
| "loss": 0.5505, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 0.22447548806667328, |
| "learning_rate": 0.00021396694214876033, |
| "loss": 0.6368, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 0.25168758630752563, |
| "learning_rate": 0.0002137190082644628, |
| "loss": 0.6322, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 0.25538235902786255, |
| "learning_rate": 0.0002134710743801653, |
| "loss": 0.5317, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 0.2565425634384155, |
| "learning_rate": 0.00021322314049586775, |
| "loss": 0.6261, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 0.25399863719940186, |
| "learning_rate": 0.00021297520661157023, |
| "loss": 0.596, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 0.27143988013267517, |
| "learning_rate": 0.00021272727272727272, |
| "loss": 0.6691, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 0.2387736439704895, |
| "learning_rate": 0.0002124793388429752, |
| "loss": 0.5288, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.2549780607223511, |
| "learning_rate": 0.00021223140495867766, |
| "loss": 0.7455, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.2740858793258667, |
| "learning_rate": 0.00021198347107438014, |
| "loss": 0.4921, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.25273847579956055, |
| "learning_rate": 0.00021173553719008262, |
| "loss": 0.7965, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.25858959555625916, |
| "learning_rate": 0.0002114876033057851, |
| "loss": 0.7303, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.2599296271800995, |
| "learning_rate": 0.0002112396694214876, |
| "loss": 0.6342, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.21084599196910858, |
| "learning_rate": 0.00021099173553719005, |
| "loss": 0.633, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 0.24272632598876953, |
| "learning_rate": 0.00021074380165289253, |
| "loss": 0.6213, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 0.26323699951171875, |
| "learning_rate": 0.00021049586776859501, |
| "loss": 0.563, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 0.20646587014198303, |
| "learning_rate": 0.00021024793388429753, |
| "loss": 0.6248, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 0.21778297424316406, |
| "learning_rate": 0.00020999999999999998, |
| "loss": 0.7186, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 0.21315112709999084, |
| "learning_rate": 0.00020975206611570247, |
| "loss": 0.5961, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 0.20787106454372406, |
| "learning_rate": 0.00020950413223140495, |
| "loss": 0.5917, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 0.23541009426116943, |
| "learning_rate": 0.00020925619834710743, |
| "loss": 0.7803, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 0.22649626433849335, |
| "learning_rate": 0.00020900826446280992, |
| "loss": 0.5895, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 0.23644742369651794, |
| "learning_rate": 0.00020876033057851237, |
| "loss": 0.6656, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 0.22934262454509735, |
| "learning_rate": 0.00020851239669421486, |
| "loss": 0.5933, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 0.289989709854126, |
| "learning_rate": 0.00020826446280991734, |
| "loss": 0.6852, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 0.24489325284957886, |
| "learning_rate": 0.00020801652892561982, |
| "loss": 0.5546, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 0.27165278792381287, |
| "learning_rate": 0.00020776859504132228, |
| "loss": 0.6845, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 0.19467370212078094, |
| "learning_rate": 0.00020752066115702476, |
| "loss": 0.5587, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 0.27320200204849243, |
| "learning_rate": 0.00020727272727272725, |
| "loss": 0.7144, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 0.28100526332855225, |
| "learning_rate": 0.00020702479338842973, |
| "loss": 0.6914, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 0.3059975504875183, |
| "learning_rate": 0.0002067768595041322, |
| "loss": 0.6075, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 0.24904222786426544, |
| "learning_rate": 0.00020652892561983467, |
| "loss": 0.5543, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.24768255650997162, |
| "learning_rate": 0.00020628099173553718, |
| "loss": 0.607, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.25083738565444946, |
| "learning_rate": 0.00020603305785123967, |
| "loss": 0.7961, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.26338303089141846, |
| "learning_rate": 0.00020578512396694215, |
| "loss": 0.6467, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.25761598348617554, |
| "learning_rate": 0.0002055371900826446, |
| "loss": 0.5891, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.2616937756538391, |
| "learning_rate": 0.0002052892561983471, |
| "loss": 0.5706, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.18980839848518372, |
| "learning_rate": 0.00020504132231404957, |
| "loss": 0.4479, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.250431627035141, |
| "learning_rate": 0.00020479338842975206, |
| "loss": 0.6006, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.2146655172109604, |
| "learning_rate": 0.0002045454545454545, |
| "loss": 0.7113, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.2195209115743637, |
| "learning_rate": 0.000204297520661157, |
| "loss": 0.5354, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.24879257380962372, |
| "learning_rate": 0.00020404958677685948, |
| "loss": 0.5478, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.27159082889556885, |
| "learning_rate": 0.00020380165289256196, |
| "loss": 0.7681, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.20614947378635406, |
| "learning_rate": 0.00020355371900826445, |
| "loss": 0.6357, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.25690051913261414, |
| "learning_rate": 0.0002033057851239669, |
| "loss": 0.5731, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 0.24473583698272705, |
| "learning_rate": 0.0002030578512396694, |
| "loss": 0.6784, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 0.32395297288894653, |
| "learning_rate": 0.0002028099173553719, |
| "loss": 0.7118, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 0.2975274324417114, |
| "learning_rate": 0.00020256198347107438, |
| "loss": 0.6504, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 0.2652553915977478, |
| "learning_rate": 0.00020231404958677684, |
| "loss": 0.6986, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 0.29475778341293335, |
| "learning_rate": 0.00020206611570247932, |
| "loss": 0.6525, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 0.24549973011016846, |
| "learning_rate": 0.0002018181818181818, |
| "loss": 0.5408, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 0.2181435376405716, |
| "learning_rate": 0.0002015702479338843, |
| "loss": 0.6146, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 0.2682584226131439, |
| "learning_rate": 0.00020132231404958677, |
| "loss": 0.6368, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 0.2641114592552185, |
| "learning_rate": 0.00020107438016528923, |
| "loss": 0.51, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 0.27871838212013245, |
| "learning_rate": 0.0002008264462809917, |
| "loss": 0.7269, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 0.23890569806098938, |
| "learning_rate": 0.0002005785123966942, |
| "loss": 0.6444, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 0.2451583445072174, |
| "learning_rate": 0.00020033057851239668, |
| "loss": 0.5806, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.2743864953517914, |
| "learning_rate": 0.00020008264462809914, |
| "loss": 0.6305, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.2626914978027344, |
| "learning_rate": 0.00019983471074380162, |
| "loss": 0.5765, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.2874875068664551, |
| "learning_rate": 0.0001995867768595041, |
| "loss": 0.5928, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.30499163269996643, |
| "learning_rate": 0.00019933884297520661, |
| "loss": 0.6271, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.30474454164505005, |
| "learning_rate": 0.0001990909090909091, |
| "loss": 0.6755, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.1819755882024765, |
| "learning_rate": 0.00019884297520661155, |
| "loss": 0.394, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 0.25470343232154846, |
| "learning_rate": 0.00019859504132231404, |
| "loss": 0.7121, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 0.26749151945114136, |
| "learning_rate": 0.00019834710743801652, |
| "loss": 0.6487, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 0.20643912255764008, |
| "learning_rate": 0.000198099173553719, |
| "loss": 0.4585, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 0.2576930522918701, |
| "learning_rate": 0.00019785123966942146, |
| "loss": 0.5235, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 0.2899012863636017, |
| "learning_rate": 0.00019760330578512395, |
| "loss": 0.6292, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 0.2541065216064453, |
| "learning_rate": 0.00019735537190082643, |
| "loss": 0.648, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.24382047355175018, |
| "learning_rate": 0.0001971074380165289, |
| "loss": 0.5939, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.22931940853595734, |
| "learning_rate": 0.00019685950413223137, |
| "loss": 0.6812, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.2592567205429077, |
| "learning_rate": 0.00019661157024793385, |
| "loss": 0.69, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.2516980767250061, |
| "learning_rate": 0.00019636363636363634, |
| "loss": 0.5707, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.23515059053897858, |
| "learning_rate": 0.00019611570247933882, |
| "loss": 0.6739, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.24742184579372406, |
| "learning_rate": 0.00019586776859504133, |
| "loss": 0.6761, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 0.26232922077178955, |
| "learning_rate": 0.00019561983471074376, |
| "loss": 0.7071, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 0.2853042781352997, |
| "learning_rate": 0.00019537190082644627, |
| "loss": 0.7667, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 0.251169353723526, |
| "learning_rate": 0.00019512396694214875, |
| "loss": 0.6518, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 0.2321665734052658, |
| "learning_rate": 0.00019487603305785124, |
| "loss": 0.4377, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 0.25216928124427795, |
| "learning_rate": 0.0001946280991735537, |
| "loss": 0.7173, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 0.19498330354690552, |
| "learning_rate": 0.00019438016528925618, |
| "loss": 0.5584, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.32786309719085693, |
| "learning_rate": 0.00019413223140495866, |
| "loss": 0.6583, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.25834760069847107, |
| "learning_rate": 0.00019388429752066115, |
| "loss": 0.4957, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.3462083041667938, |
| "learning_rate": 0.00019363636363636363, |
| "loss": 0.5205, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.27106693387031555, |
| "learning_rate": 0.00019338842975206609, |
| "loss": 0.6803, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.28165388107299805, |
| "learning_rate": 0.00019314049586776857, |
| "loss": 0.7049, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.20732273161411285, |
| "learning_rate": 0.00019289256198347105, |
| "loss": 0.6407, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 0.2609116733074188, |
| "learning_rate": 0.00019264462809917354, |
| "loss": 0.5377, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 0.2561998963356018, |
| "learning_rate": 0.000192396694214876, |
| "loss": 0.6212, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 0.27699044346809387, |
| "learning_rate": 0.00019214876033057848, |
| "loss": 0.5482, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 0.2426328808069229, |
| "learning_rate": 0.000191900826446281, |
| "loss": 0.6444, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 0.26187026500701904, |
| "learning_rate": 0.00019165289256198347, |
| "loss": 0.5443, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 0.2719630002975464, |
| "learning_rate": 0.00019140495867768595, |
| "loss": 0.6886, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 0.18477971851825714, |
| "learning_rate": 0.0001911570247933884, |
| "loss": 0.5292, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 0.2144313007593155, |
| "learning_rate": 0.0001909090909090909, |
| "loss": 0.4613, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 0.2580784857273102, |
| "learning_rate": 0.00019066115702479338, |
| "loss": 0.5606, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 0.3073588013648987, |
| "learning_rate": 0.00019041322314049586, |
| "loss": 0.6123, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 0.21787844598293304, |
| "learning_rate": 0.00019016528925619832, |
| "loss": 0.5939, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 0.255750447511673, |
| "learning_rate": 0.0001899173553719008, |
| "loss": 0.5739, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 0.24147820472717285, |
| "learning_rate": 0.00018966942148760329, |
| "loss": 0.6026, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.26172590255737305, |
| "learning_rate": 0.00018942148760330577, |
| "loss": 0.5166, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.2710455358028412, |
| "learning_rate": 0.00018917355371900825, |
| "loss": 0.6429, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.1971074640750885, |
| "learning_rate": 0.0001889256198347107, |
| "loss": 0.4799, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.23394368588924408, |
| "learning_rate": 0.0001886776859504132, |
| "loss": 0.5491, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.22820048034191132, |
| "learning_rate": 0.0001884297520661157, |
| "loss": 0.5343, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.23169974982738495, |
| "learning_rate": 0.0001881818181818182, |
| "loss": 0.5852, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.24015003442764282, |
| "learning_rate": 0.00018793388429752064, |
| "loss": 0.6209, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.2230776697397232, |
| "learning_rate": 0.00018768595041322313, |
| "loss": 0.6296, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.2518354654312134, |
| "learning_rate": 0.0001874380165289256, |
| "loss": 0.6167, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.338256299495697, |
| "learning_rate": 0.0001871900826446281, |
| "loss": 0.6512, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.23796728253364563, |
| "learning_rate": 0.00018694214876033055, |
| "loss": 0.8155, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.31516361236572266, |
| "learning_rate": 0.00018669421487603303, |
| "loss": 0.8023, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 0.2371574491262436, |
| "learning_rate": 0.00018644628099173552, |
| "loss": 0.5613, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 0.2822033762931824, |
| "learning_rate": 0.000186198347107438, |
| "loss": 0.5549, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 0.25953295826911926, |
| "learning_rate": 0.00018595041322314049, |
| "loss": 0.6199, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 0.2478639930486679, |
| "learning_rate": 0.00018570247933884294, |
| "loss": 0.5806, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 0.2439350187778473, |
| "learning_rate": 0.00018545454545454543, |
| "loss": 0.6222, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 0.24993474781513214, |
| "learning_rate": 0.0001852066115702479, |
| "loss": 0.6048, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 0.24781496822834015, |
| "learning_rate": 0.00018495867768595042, |
| "loss": 0.5941, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 0.1847202032804489, |
| "learning_rate": 0.00018471074380165285, |
| "loss": 0.609, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 0.21596528589725494, |
| "learning_rate": 0.00018446280991735536, |
| "loss": 0.4457, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 0.240879625082016, |
| "learning_rate": 0.00018421487603305784, |
| "loss": 0.6118, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 0.2898111641407013, |
| "learning_rate": 0.00018396694214876033, |
| "loss": 0.7725, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 0.27428382635116577, |
| "learning_rate": 0.0001837190082644628, |
| "loss": 0.5366, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 0.23467296361923218, |
| "learning_rate": 0.00018347107438016527, |
| "loss": 0.6018, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 0.2190561592578888, |
| "learning_rate": 0.00018322314049586775, |
| "loss": 0.5249, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 0.2240625023841858, |
| "learning_rate": 0.00018297520661157024, |
| "loss": 0.6891, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 0.24726848304271698, |
| "learning_rate": 0.00018272727272727272, |
| "loss": 0.5545, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 0.3318251371383667, |
| "learning_rate": 0.00018247933884297518, |
| "loss": 0.4809, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 0.2396695613861084, |
| "learning_rate": 0.00018223140495867766, |
| "loss": 0.4942, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.25009942054748535, |
| "learning_rate": 0.00018198347107438014, |
| "loss": 0.7381, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.22655311226844788, |
| "learning_rate": 0.00018173553719008263, |
| "loss": 0.4729, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.23187695443630219, |
| "learning_rate": 0.0001814876033057851, |
| "loss": 0.5719, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.2703653573989868, |
| "learning_rate": 0.00018123966942148757, |
| "loss": 0.6031, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.2207796424627304, |
| "learning_rate": 0.00018099173553719008, |
| "loss": 0.5361, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.24914169311523438, |
| "learning_rate": 0.00018074380165289256, |
| "loss": 0.6547, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 0.2714746594429016, |
| "learning_rate": 0.00018049586776859504, |
| "loss": 0.5702, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 0.3201580047607422, |
| "learning_rate": 0.0001802479338842975, |
| "loss": 0.6119, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 0.2548397183418274, |
| "learning_rate": 0.00017999999999999998, |
| "loss": 0.5251, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 0.28669115900993347, |
| "learning_rate": 0.00017975206611570247, |
| "loss": 0.5773, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 0.26253971457481384, |
| "learning_rate": 0.00017950413223140495, |
| "loss": 0.6504, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 0.22113384306430817, |
| "learning_rate": 0.00017925619834710744, |
| "loss": 0.4741, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 0.261636346578598, |
| "learning_rate": 0.0001790082644628099, |
| "loss": 0.6241, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 0.1780402809381485, |
| "learning_rate": 0.00017876033057851238, |
| "loss": 0.5207, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 0.26149195432662964, |
| "learning_rate": 0.00017851239669421486, |
| "loss": 0.5872, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 0.26113009452819824, |
| "learning_rate": 0.00017826446280991734, |
| "loss": 0.6163, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 0.21397502720355988, |
| "learning_rate": 0.0001780165289256198, |
| "loss": 0.479, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 0.21250088512897491, |
| "learning_rate": 0.00017776859504132228, |
| "loss": 0.6978, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 0.2556426525115967, |
| "learning_rate": 0.00017752066115702477, |
| "loss": 0.6128, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 0.24139715731143951, |
| "learning_rate": 0.00017727272727272728, |
| "loss": 0.5066, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 0.23671215772628784, |
| "learning_rate": 0.00017702479338842976, |
| "loss": 0.5183, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 0.23494285345077515, |
| "learning_rate": 0.00017677685950413222, |
| "loss": 0.5181, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 0.2547609806060791, |
| "learning_rate": 0.0001765289256198347, |
| "loss": 0.5406, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 0.3042651414871216, |
| "learning_rate": 0.00017628099173553718, |
| "loss": 0.5551, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.22910748422145844, |
| "learning_rate": 0.00017603305785123967, |
| "loss": 0.6373, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.19777967035770416, |
| "learning_rate": 0.00017578512396694212, |
| "loss": 0.5471, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.31034502387046814, |
| "learning_rate": 0.0001755371900826446, |
| "loss": 0.7017, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.3504410684108734, |
| "learning_rate": 0.0001752892561983471, |
| "loss": 0.7208, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.24271292984485626, |
| "learning_rate": 0.00017504132231404958, |
| "loss": 0.5563, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.27147865295410156, |
| "learning_rate": 0.00017479338842975203, |
| "loss": 0.5869, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.2976628839969635, |
| "learning_rate": 0.00017454545454545452, |
| "loss": 0.5471, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.28489646315574646, |
| "learning_rate": 0.000174297520661157, |
| "loss": 0.6053, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.30020108819007874, |
| "learning_rate": 0.00017404958677685948, |
| "loss": 0.6178, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.23986253142356873, |
| "learning_rate": 0.000173801652892562, |
| "loss": 0.5896, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.2667832374572754, |
| "learning_rate": 0.00017355371900826442, |
| "loss": 0.5375, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.22176356613636017, |
| "learning_rate": 0.00017330578512396693, |
| "loss": 0.5723, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.263257771730423, |
| "learning_rate": 0.00017305785123966942, |
| "loss": 0.7317, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 0.24838753044605255, |
| "learning_rate": 0.0001728099173553719, |
| "loss": 0.5849, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 0.24839664995670319, |
| "learning_rate": 0.00017256198347107436, |
| "loss": 0.6678, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 0.2849573493003845, |
| "learning_rate": 0.00017231404958677684, |
| "loss": 0.7144, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 0.26900768280029297, |
| "learning_rate": 0.00017206611570247932, |
| "loss": 0.5156, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 0.2212425172328949, |
| "learning_rate": 0.0001718181818181818, |
| "loss": 0.4551, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 0.2066129595041275, |
| "learning_rate": 0.0001715702479338843, |
| "loss": 0.4193, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 0.2838365137577057, |
| "learning_rate": 0.00017132231404958675, |
| "loss": 0.6078, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 0.239679753780365, |
| "learning_rate": 0.00017107438016528923, |
| "loss": 0.616, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 0.23269398510456085, |
| "learning_rate": 0.00017082644628099172, |
| "loss": 0.542, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 0.23838558793067932, |
| "learning_rate": 0.0001705785123966942, |
| "loss": 0.5147, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 0.2819415330886841, |
| "learning_rate": 0.00017033057851239666, |
| "loss": 0.6437, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 0.243398055434227, |
| "learning_rate": 0.00017008264462809914, |
| "loss": 0.6611, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.22569122910499573, |
| "learning_rate": 0.00016983471074380165, |
| "loss": 0.3979, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.33265820145606995, |
| "learning_rate": 0.00016958677685950413, |
| "loss": 0.6005, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.26828673481941223, |
| "learning_rate": 0.00016933884297520662, |
| "loss": 0.608, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.24439513683319092, |
| "learning_rate": 0.00016909090909090907, |
| "loss": 0.5572, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.22491876780986786, |
| "learning_rate": 0.00016884297520661156, |
| "loss": 0.7226, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.24468480050563812, |
| "learning_rate": 0.00016859504132231404, |
| "loss": 0.4582, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 0.23392945528030396, |
| "learning_rate": 0.00016834710743801652, |
| "loss": 0.6477, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 0.27548858523368835, |
| "learning_rate": 0.00016809917355371898, |
| "loss": 0.5846, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 0.2861180603504181, |
| "learning_rate": 0.00016785123966942146, |
| "loss": 0.6412, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 0.24700766801834106, |
| "learning_rate": 0.00016760330578512395, |
| "loss": 0.6947, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 0.2600953280925751, |
| "learning_rate": 0.00016735537190082643, |
| "loss": 0.6165, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 0.26876646280288696, |
| "learning_rate": 0.00016710743801652892, |
| "loss": 0.6855, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.26161080598831177, |
| "learning_rate": 0.00016685950413223137, |
| "loss": 0.5066, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.25190046429634094, |
| "learning_rate": 0.00016661157024793386, |
| "loss": 0.5902, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.25269225239753723, |
| "learning_rate": 0.00016636363636363637, |
| "loss": 0.7017, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.28042706847190857, |
| "learning_rate": 0.00016611570247933885, |
| "loss": 0.6264, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.2767360508441925, |
| "learning_rate": 0.0001658677685950413, |
| "loss": 0.7562, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.2771216034889221, |
| "learning_rate": 0.0001656198347107438, |
| "loss": 0.5333, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 0.189210906624794, |
| "learning_rate": 0.00016537190082644627, |
| "loss": 0.5378, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 0.22517065703868866, |
| "learning_rate": 0.00016512396694214876, |
| "loss": 0.5292, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 0.2390165776014328, |
| "learning_rate": 0.00016487603305785121, |
| "loss": 0.4407, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 0.21548262238502502, |
| "learning_rate": 0.0001646280991735537, |
| "loss": 0.4504, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 0.20831167697906494, |
| "learning_rate": 0.00016438016528925618, |
| "loss": 0.6848, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 0.271257609128952, |
| "learning_rate": 0.00016413223140495866, |
| "loss": 0.535, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.32008254528045654, |
| "learning_rate": 0.00016388429752066115, |
| "loss": 0.5107, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.34058302640914917, |
| "learning_rate": 0.0001636363636363636, |
| "loss": 0.5708, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.28070059418678284, |
| "learning_rate": 0.0001633884297520661, |
| "loss": 0.5086, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.25487688183784485, |
| "learning_rate": 0.00016314049586776857, |
| "loss": 0.5184, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.3240332007408142, |
| "learning_rate": 0.00016289256198347108, |
| "loss": 0.6774, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.30744409561157227, |
| "learning_rate": 0.0001626446280991735, |
| "loss": 0.5314, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 0.25220754742622375, |
| "learning_rate": 0.00016239669421487602, |
| "loss": 0.6308, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 0.29116958379745483, |
| "learning_rate": 0.0001621487603305785, |
| "loss": 0.5685, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 0.23250073194503784, |
| "learning_rate": 0.000161900826446281, |
| "loss": 0.4318, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 0.2808091640472412, |
| "learning_rate": 0.00016165289256198347, |
| "loss": 0.6313, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 0.2711193561553955, |
| "learning_rate": 0.00016140495867768593, |
| "loss": 0.4651, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 0.29540935158729553, |
| "learning_rate": 0.00016115702479338841, |
| "loss": 0.6663, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 0.23418714106082916, |
| "learning_rate": 0.0001609090909090909, |
| "loss": 0.448, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 0.21675793826580048, |
| "learning_rate": 0.00016066115702479338, |
| "loss": 0.5034, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 0.22451865673065186, |
| "learning_rate": 0.00016041322314049584, |
| "loss": 0.4476, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 0.26300856471061707, |
| "learning_rate": 0.00016016528925619832, |
| "loss": 0.6646, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 0.3377116918563843, |
| "learning_rate": 0.0001599173553719008, |
| "loss": 0.6029, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 0.23391880095005035, |
| "learning_rate": 0.0001596694214876033, |
| "loss": 0.6277, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 0.19620922207832336, |
| "learning_rate": 0.0001594214876033058, |
| "loss": 0.4638, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.22981096804141998, |
| "learning_rate": 0.00015917355371900823, |
| "loss": 0.5826, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.34321555495262146, |
| "learning_rate": 0.00015892561983471074, |
| "loss": 0.5618, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.28461968898773193, |
| "learning_rate": 0.00015867768595041322, |
| "loss": 0.5129, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.24368269741535187, |
| "learning_rate": 0.0001584297520661157, |
| "loss": 0.5866, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.282255083322525, |
| "learning_rate": 0.00015818181818181816, |
| "loss": 0.6274, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.26298072934150696, |
| "learning_rate": 0.00015793388429752065, |
| "loss": 0.5187, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.2671455144882202, |
| "learning_rate": 0.00015768595041322313, |
| "loss": 0.6878, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.2681390643119812, |
| "learning_rate": 0.00015743801652892561, |
| "loss": 0.5469, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.38484248518943787, |
| "learning_rate": 0.0001571900826446281, |
| "loss": 0.6364, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.23353587090969086, |
| "learning_rate": 0.00015694214876033055, |
| "loss": 0.4844, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.29452502727508545, |
| "learning_rate": 0.00015669421487603304, |
| "loss": 0.5059, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.2460879236459732, |
| "learning_rate": 0.00015644628099173552, |
| "loss": 0.6495, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 0.30693721771240234, |
| "learning_rate": 0.000156198347107438, |
| "loss": 0.5165, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 0.2171495109796524, |
| "learning_rate": 0.00015595041322314046, |
| "loss": 0.6172, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 0.24301984906196594, |
| "learning_rate": 0.00015570247933884294, |
| "loss": 0.6786, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 0.2288222461938858, |
| "learning_rate": 0.00015545454545454546, |
| "loss": 0.5669, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 0.2407921552658081, |
| "learning_rate": 0.00015520661157024794, |
| "loss": 0.5968, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 0.2591527998447418, |
| "learning_rate": 0.0001549586776859504, |
| "loss": 0.544, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 0.25770679116249084, |
| "learning_rate": 0.00015471074380165288, |
| "loss": 0.7177, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 0.2528848648071289, |
| "learning_rate": 0.00015446280991735536, |
| "loss": 0.4703, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 0.24993537366390228, |
| "learning_rate": 0.00015421487603305785, |
| "loss": 0.6003, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 0.25807908177375793, |
| "learning_rate": 0.00015396694214876033, |
| "loss": 0.465, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 0.3142452836036682, |
| "learning_rate": 0.0001537190082644628, |
| "loss": 0.6122, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 0.27111849188804626, |
| "learning_rate": 0.00015347107438016527, |
| "loss": 0.5962, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 0.28503674268722534, |
| "learning_rate": 0.00015322314049586775, |
| "loss": 0.6667, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 0.27074381709098816, |
| "learning_rate": 0.00015297520661157024, |
| "loss": 0.6115, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 0.25918465852737427, |
| "learning_rate": 0.0001527272727272727, |
| "loss": 0.4483, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 0.24476633965969086, |
| "learning_rate": 0.00015247933884297518, |
| "loss": 0.6501, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 0.21205200254917145, |
| "learning_rate": 0.00015223140495867766, |
| "loss": 0.3914, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 0.25496751070022583, |
| "learning_rate": 0.00015198347107438017, |
| "loss": 0.5335, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.27991780638694763, |
| "learning_rate": 0.00015173553719008266, |
| "loss": 0.6083, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.23995639383792877, |
| "learning_rate": 0.0001514876033057851, |
| "loss": 0.55, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.2349666953086853, |
| "learning_rate": 0.0001512396694214876, |
| "loss": 0.7054, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.27498871088027954, |
| "learning_rate": 0.00015099173553719008, |
| "loss": 0.55, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.21346105635166168, |
| "learning_rate": 0.00015074380165289256, |
| "loss": 0.3467, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.2638354003429413, |
| "learning_rate": 0.00015049586776859502, |
| "loss": 0.5624, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.01, |
| "grad_norm": 0.2751975953578949, |
| "learning_rate": 0.0001502479338842975, |
| "loss": 0.3814, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.01, |
| "grad_norm": 0.225106880068779, |
| "learning_rate": 0.00015, |
| "loss": 0.479, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.01, |
| "grad_norm": 0.22013232111930847, |
| "learning_rate": 0.00014975206611570247, |
| "loss": 0.5672, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.01, |
| "grad_norm": 0.21252033114433289, |
| "learning_rate": 0.00014950413223140495, |
| "loss": 0.546, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.01, |
| "grad_norm": 0.2847185432910919, |
| "learning_rate": 0.00014925619834710744, |
| "loss": 0.4434, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.01, |
| "grad_norm": 0.25599631667137146, |
| "learning_rate": 0.0001490082644628099, |
| "loss": 0.4713, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.02, |
| "grad_norm": 0.2719402611255646, |
| "learning_rate": 0.00014876033057851238, |
| "loss": 0.4475, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.02, |
| "grad_norm": 0.26454958319664, |
| "learning_rate": 0.00014851239669421486, |
| "loss": 0.4515, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.02, |
| "grad_norm": 0.39801672101020813, |
| "learning_rate": 0.00014826446280991735, |
| "loss": 0.4647, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.02, |
| "grad_norm": 0.3378361463546753, |
| "learning_rate": 0.0001480165289256198, |
| "loss": 0.4414, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.02, |
| "grad_norm": 0.3039036989212036, |
| "learning_rate": 0.0001477685950413223, |
| "loss": 0.5634, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.02, |
| "grad_norm": 0.3506157398223877, |
| "learning_rate": 0.0001475206611570248, |
| "loss": 0.5001, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.02, |
| "grad_norm": 0.2508845925331116, |
| "learning_rate": 0.00014727272727272725, |
| "loss": 0.3379, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.03, |
| "grad_norm": 0.26913216710090637, |
| "learning_rate": 0.00014702479338842974, |
| "loss": 0.4575, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.03, |
| "grad_norm": 0.329659640789032, |
| "learning_rate": 0.00014677685950413222, |
| "loss": 0.437, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.03, |
| "grad_norm": 0.2972075343132019, |
| "learning_rate": 0.0001465289256198347, |
| "loss": 0.5048, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.03, |
| "grad_norm": 0.3184354603290558, |
| "learning_rate": 0.00014628099173553716, |
| "loss": 0.4374, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.03, |
| "grad_norm": 0.3377355635166168, |
| "learning_rate": 0.00014603305785123967, |
| "loss": 0.4946, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.03, |
| "grad_norm": 0.29106801748275757, |
| "learning_rate": 0.00014578512396694213, |
| "loss": 0.5414, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.04, |
| "grad_norm": 0.22808948159217834, |
| "learning_rate": 0.0001455371900826446, |
| "loss": 0.3739, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.04, |
| "grad_norm": 0.27818021178245544, |
| "learning_rate": 0.0001452892561983471, |
| "loss": 0.4172, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.04, |
| "grad_norm": 0.25634923577308655, |
| "learning_rate": 0.00014504132231404958, |
| "loss": 0.4293, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.04, |
| "grad_norm": 0.30696937441825867, |
| "learning_rate": 0.00014479338842975206, |
| "loss": 0.4454, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.04, |
| "grad_norm": 0.26105087995529175, |
| "learning_rate": 0.00014454545454545452, |
| "loss": 0.2978, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.04, |
| "grad_norm": 0.3100634515285492, |
| "learning_rate": 0.00014429752066115703, |
| "loss": 0.4499, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.05, |
| "grad_norm": 0.27640992403030396, |
| "learning_rate": 0.00014404958677685949, |
| "loss": 0.3837, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.05, |
| "grad_norm": 0.24559038877487183, |
| "learning_rate": 0.00014380165289256197, |
| "loss": 0.3347, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.05, |
| "grad_norm": 0.2920415699481964, |
| "learning_rate": 0.00014355371900826445, |
| "loss": 0.4333, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.05, |
| "grad_norm": 0.3147384226322174, |
| "learning_rate": 0.00014330578512396694, |
| "loss": 0.4385, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.05, |
| "grad_norm": 0.35469138622283936, |
| "learning_rate": 0.0001430578512396694, |
| "loss": 0.5442, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.05, |
| "grad_norm": 0.2619563043117523, |
| "learning_rate": 0.00014280991735537188, |
| "loss": 0.3837, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.06, |
| "grad_norm": 0.32273221015930176, |
| "learning_rate": 0.0001425619834710744, |
| "loss": 0.4946, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.06, |
| "grad_norm": 0.2692110538482666, |
| "learning_rate": 0.00014231404958677684, |
| "loss": 0.4683, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.06, |
| "grad_norm": 0.35255464911460876, |
| "learning_rate": 0.00014206611570247933, |
| "loss": 0.5456, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.06, |
| "grad_norm": 0.29768630862236023, |
| "learning_rate": 0.0001418181818181818, |
| "loss": 0.3394, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.06, |
| "grad_norm": 0.30738797783851624, |
| "learning_rate": 0.0001415702479338843, |
| "loss": 0.3583, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.06, |
| "grad_norm": 0.33226314187049866, |
| "learning_rate": 0.00014132231404958675, |
| "loss": 0.4477, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.07, |
| "grad_norm": 0.2842199504375458, |
| "learning_rate": 0.00014107438016528923, |
| "loss": 0.4454, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.07, |
| "grad_norm": 0.28207266330718994, |
| "learning_rate": 0.00014082644628099172, |
| "loss": 0.3665, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.07, |
| "grad_norm": 0.2228500097990036, |
| "learning_rate": 0.0001405785123966942, |
| "loss": 0.3446, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.07, |
| "grad_norm": 0.2969403564929962, |
| "learning_rate": 0.00014033057851239669, |
| "loss": 0.377, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.07, |
| "grad_norm": 0.28087565302848816, |
| "learning_rate": 0.00014008264462809917, |
| "loss": 0.3683, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.07, |
| "grad_norm": 0.27268192172050476, |
| "learning_rate": 0.00013983471074380165, |
| "loss": 0.427, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.08, |
| "grad_norm": 0.339070588350296, |
| "learning_rate": 0.0001395867768595041, |
| "loss": 0.4887, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.08, |
| "grad_norm": 0.3170423209667206, |
| "learning_rate": 0.0001393388429752066, |
| "loss": 0.5097, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.08, |
| "grad_norm": 0.3114936947822571, |
| "learning_rate": 0.00013909090909090908, |
| "loss": 0.4587, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.08, |
| "grad_norm": 0.28112486004829407, |
| "learning_rate": 0.00013884297520661156, |
| "loss": 0.4781, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.08, |
| "grad_norm": 0.28116974234580994, |
| "learning_rate": 0.00013859504132231404, |
| "loss": 0.3546, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.08, |
| "grad_norm": 0.25061559677124023, |
| "learning_rate": 0.00013834710743801653, |
| "loss": 0.4512, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.09, |
| "grad_norm": 0.29854199290275574, |
| "learning_rate": 0.00013809917355371898, |
| "loss": 0.6068, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.09, |
| "grad_norm": 0.2901363670825958, |
| "learning_rate": 0.00013785123966942147, |
| "loss": 0.3667, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.09, |
| "grad_norm": 0.29766595363616943, |
| "learning_rate": 0.00013760330578512395, |
| "loss": 0.5194, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.09, |
| "grad_norm": 0.2765616476535797, |
| "learning_rate": 0.00013735537190082643, |
| "loss": 0.5079, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.09, |
| "grad_norm": 0.27531540393829346, |
| "learning_rate": 0.00013710743801652892, |
| "loss": 0.4423, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.09, |
| "grad_norm": 0.3063349425792694, |
| "learning_rate": 0.0001368595041322314, |
| "loss": 0.4666, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 0.24519848823547363, |
| "learning_rate": 0.00013661157024793389, |
| "loss": 0.2995, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 0.4366275370121002, |
| "learning_rate": 0.00013636363636363634, |
| "loss": 0.4961, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 0.28639987111091614, |
| "learning_rate": 0.00013611570247933883, |
| "loss": 0.5015, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 0.2763878107070923, |
| "learning_rate": 0.0001358677685950413, |
| "loss": 0.4883, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 0.5589582324028015, |
| "learning_rate": 0.0001356198347107438, |
| "loss": 0.5072, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 0.238887220621109, |
| "learning_rate": 0.00013537190082644625, |
| "loss": 0.411, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.11, |
| "grad_norm": 0.2899521589279175, |
| "learning_rate": 0.00013512396694214876, |
| "loss": 0.3478, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.11, |
| "grad_norm": 0.30960512161254883, |
| "learning_rate": 0.00013487603305785124, |
| "loss": 0.5058, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.11, |
| "grad_norm": 0.33305928111076355, |
| "learning_rate": 0.0001346280991735537, |
| "loss": 0.4528, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.11, |
| "grad_norm": 0.33324292302131653, |
| "learning_rate": 0.00013438016528925618, |
| "loss": 0.3523, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.11, |
| "grad_norm": 0.25855520367622375, |
| "learning_rate": 0.00013413223140495867, |
| "loss": 0.4257, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.11, |
| "grad_norm": 0.36000239849090576, |
| "learning_rate": 0.00013388429752066115, |
| "loss": 0.4963, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 0.30540961027145386, |
| "learning_rate": 0.0001336363636363636, |
| "loss": 0.4706, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 0.2791118025779724, |
| "learning_rate": 0.00013338842975206612, |
| "loss": 0.4543, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 0.37401753664016724, |
| "learning_rate": 0.00013314049586776857, |
| "loss": 0.5614, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 0.2772528827190399, |
| "learning_rate": 0.00013289256198347106, |
| "loss": 0.3881, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 0.29219475388526917, |
| "learning_rate": 0.00013264462809917354, |
| "loss": 0.5418, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 0.3255159258842468, |
| "learning_rate": 0.00013239669421487603, |
| "loss": 0.4669, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 0.2640572488307953, |
| "learning_rate": 0.0001321487603305785, |
| "loss": 0.4156, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.13, |
| "grad_norm": 0.2618845999240875, |
| "learning_rate": 0.00013190082644628097, |
| "loss": 0.3537, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.13, |
| "grad_norm": 0.27396076917648315, |
| "learning_rate": 0.00013165289256198348, |
| "loss": 0.4391, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.13, |
| "grad_norm": 0.5098498463630676, |
| "learning_rate": 0.00013140495867768593, |
| "loss": 0.3863, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.13, |
| "grad_norm": 0.31764644384384155, |
| "learning_rate": 0.00013115702479338842, |
| "loss": 0.3874, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.13, |
| "grad_norm": 0.28738152980804443, |
| "learning_rate": 0.0001309090909090909, |
| "loss": 0.3209, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.13, |
| "grad_norm": 0.32756757736206055, |
| "learning_rate": 0.00013066115702479338, |
| "loss": 0.4614, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.14, |
| "grad_norm": 0.27650028467178345, |
| "learning_rate": 0.00013041322314049584, |
| "loss": 0.4717, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.14, |
| "grad_norm": 0.33100056648254395, |
| "learning_rate": 0.00013016528925619832, |
| "loss": 0.4317, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.14, |
| "grad_norm": 0.3200342357158661, |
| "learning_rate": 0.00012991735537190083, |
| "loss": 0.4494, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.14, |
| "grad_norm": 0.29615214467048645, |
| "learning_rate": 0.0001296694214876033, |
| "loss": 0.3786, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.14, |
| "grad_norm": 0.278094619512558, |
| "learning_rate": 0.00012942148760330577, |
| "loss": 0.4484, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.14, |
| "grad_norm": 0.32800769805908203, |
| "learning_rate": 0.00012917355371900826, |
| "loss": 0.4635, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.15, |
| "grad_norm": 0.3319619596004486, |
| "learning_rate": 0.00012892561983471074, |
| "loss": 0.5001, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.15, |
| "grad_norm": 0.2818608283996582, |
| "learning_rate": 0.0001286776859504132, |
| "loss": 0.3536, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.15, |
| "grad_norm": 0.28644126653671265, |
| "learning_rate": 0.00012842975206611568, |
| "loss": 0.4168, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.15, |
| "grad_norm": 0.2802482545375824, |
| "learning_rate": 0.00012818181818181817, |
| "loss": 0.3918, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.15, |
| "grad_norm": 0.21232947707176208, |
| "learning_rate": 0.00012793388429752065, |
| "loss": 0.3218, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.15, |
| "grad_norm": 0.36512815952301025, |
| "learning_rate": 0.00012768595041322313, |
| "loss": 0.4566, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.16, |
| "grad_norm": 0.26876160502433777, |
| "learning_rate": 0.00012743801652892562, |
| "loss": 0.4394, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.16, |
| "grad_norm": 0.3757662773132324, |
| "learning_rate": 0.0001271900826446281, |
| "loss": 0.574, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.16, |
| "grad_norm": 0.3161550760269165, |
| "learning_rate": 0.00012694214876033056, |
| "loss": 0.4524, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.16, |
| "grad_norm": 0.31256961822509766, |
| "learning_rate": 0.00012669421487603304, |
| "loss": 0.4332, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.16, |
| "grad_norm": 0.3122079074382782, |
| "learning_rate": 0.00012644628099173552, |
| "loss": 0.5669, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.16, |
| "grad_norm": 0.33779048919677734, |
| "learning_rate": 0.000126198347107438, |
| "loss": 0.515, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.17, |
| "grad_norm": 0.38516169786453247, |
| "learning_rate": 0.0001259504132231405, |
| "loss": 0.5502, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.17, |
| "grad_norm": 0.2803480625152588, |
| "learning_rate": 0.00012570247933884297, |
| "loss": 0.404, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.17, |
| "grad_norm": 0.31674399971961975, |
| "learning_rate": 0.00012545454545454543, |
| "loss": 0.4403, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.17, |
| "grad_norm": 0.3029496669769287, |
| "learning_rate": 0.00012520661157024791, |
| "loss": 0.372, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.17, |
| "grad_norm": 0.22542959451675415, |
| "learning_rate": 0.0001249586776859504, |
| "loss": 0.355, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.17, |
| "grad_norm": 0.32029619812965393, |
| "learning_rate": 0.00012471074380165288, |
| "loss": 0.4845, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.18, |
| "grad_norm": 0.34882861375808716, |
| "learning_rate": 0.00012446280991735537, |
| "loss": 0.4184, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.18, |
| "grad_norm": 0.3319970667362213, |
| "learning_rate": 0.00012421487603305785, |
| "loss": 0.5733, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.18, |
| "grad_norm": 0.2770652770996094, |
| "learning_rate": 0.00012396694214876033, |
| "loss": 0.4296, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.18, |
| "grad_norm": 0.3109978437423706, |
| "learning_rate": 0.0001237190082644628, |
| "loss": 0.3757, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.18, |
| "grad_norm": 0.23606395721435547, |
| "learning_rate": 0.00012347107438016527, |
| "loss": 0.2713, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.18, |
| "grad_norm": 0.304574579000473, |
| "learning_rate": 0.00012322314049586776, |
| "loss": 0.4451, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.19, |
| "grad_norm": 0.31314462423324585, |
| "learning_rate": 0.00012297520661157024, |
| "loss": 0.493, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.19, |
| "grad_norm": 0.32014840841293335, |
| "learning_rate": 0.00012272727272727272, |
| "loss": 0.3784, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.19, |
| "grad_norm": 0.29856279492378235, |
| "learning_rate": 0.0001224793388429752, |
| "loss": 0.581, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.19, |
| "grad_norm": 0.30951863527297974, |
| "learning_rate": 0.0001222314049586777, |
| "loss": 0.4851, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.19, |
| "grad_norm": 0.264663428068161, |
| "learning_rate": 0.00012198347107438015, |
| "loss": 0.431, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.19, |
| "grad_norm": 0.3092226982116699, |
| "learning_rate": 0.00012173553719008264, |
| "loss": 0.4553, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.33568286895751953, |
| "learning_rate": 0.00012148760330578511, |
| "loss": 0.4894, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.2966444492340088, |
| "learning_rate": 0.0001212396694214876, |
| "loss": 0.3855, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.2829122841358185, |
| "learning_rate": 0.00012099173553719007, |
| "loss": 0.5328, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.31785663962364197, |
| "learning_rate": 0.00012074380165289255, |
| "loss": 0.4142, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.2983114719390869, |
| "learning_rate": 0.00012049586776859502, |
| "loss": 0.4168, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.2514868378639221, |
| "learning_rate": 0.0001202479338842975, |
| "loss": 0.4728, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.21, |
| "grad_norm": 0.2959445118904114, |
| "learning_rate": 0.00011999999999999999, |
| "loss": 0.458, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.21, |
| "grad_norm": 0.31830325722694397, |
| "learning_rate": 0.00011975206611570247, |
| "loss": 0.5035, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.21, |
| "grad_norm": 0.31181418895721436, |
| "learning_rate": 0.00011950413223140496, |
| "loss": 0.3776, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.21, |
| "grad_norm": 0.3027549684047699, |
| "learning_rate": 0.00011925619834710743, |
| "loss": 0.4483, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.21, |
| "grad_norm": 0.28026890754699707, |
| "learning_rate": 0.00011900826446280991, |
| "loss": 0.4236, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.21, |
| "grad_norm": 0.29137665033340454, |
| "learning_rate": 0.00011876033057851238, |
| "loss": 0.3615, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.21, |
| "grad_norm": 0.282008558511734, |
| "learning_rate": 0.00011851239669421486, |
| "loss": 0.4335, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.22, |
| "grad_norm": 0.297736793756485, |
| "learning_rate": 0.00011826446280991733, |
| "loss": 0.4945, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.22, |
| "grad_norm": 0.3276868164539337, |
| "learning_rate": 0.00011801652892561982, |
| "loss": 0.5379, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.22, |
| "grad_norm": 0.3510095179080963, |
| "learning_rate": 0.00011776859504132231, |
| "loss": 0.3589, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.22, |
| "grad_norm": 0.29952242970466614, |
| "learning_rate": 0.00011752066115702478, |
| "loss": 0.3805, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.22, |
| "grad_norm": 0.220473513007164, |
| "learning_rate": 0.00011727272727272727, |
| "loss": 0.3978, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.22, |
| "grad_norm": 0.30668944120407104, |
| "learning_rate": 0.00011702479338842974, |
| "loss": 0.3577, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.23, |
| "grad_norm": 0.3152049779891968, |
| "learning_rate": 0.00011677685950413222, |
| "loss": 0.5186, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.23, |
| "grad_norm": 0.17376375198364258, |
| "learning_rate": 0.00011652892561983469, |
| "loss": 0.32, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.23, |
| "grad_norm": 0.32847121357917786, |
| "learning_rate": 0.00011628099173553718, |
| "loss": 0.5403, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.23, |
| "grad_norm": 0.28821662068367004, |
| "learning_rate": 0.00011603305785123965, |
| "loss": 0.3516, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.23, |
| "grad_norm": 0.23324501514434814, |
| "learning_rate": 0.00011578512396694214, |
| "loss": 0.3398, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.23, |
| "grad_norm": 0.2897385060787201, |
| "learning_rate": 0.00011553719008264463, |
| "loss": 0.3775, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.24, |
| "grad_norm": 0.33701419830322266, |
| "learning_rate": 0.0001152892561983471, |
| "loss": 0.5225, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.24, |
| "grad_norm": 0.3228382468223572, |
| "learning_rate": 0.00011504132231404958, |
| "loss": 0.4384, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.24, |
| "grad_norm": 0.24733024835586548, |
| "learning_rate": 0.00011479338842975205, |
| "loss": 0.2883, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.24, |
| "grad_norm": 0.2824367880821228, |
| "learning_rate": 0.00011454545454545453, |
| "loss": 0.3141, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.24, |
| "grad_norm": 0.27844521403312683, |
| "learning_rate": 0.000114297520661157, |
| "loss": 0.3327, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.24, |
| "grad_norm": 0.26114732027053833, |
| "learning_rate": 0.0001140495867768595, |
| "loss": 0.4071, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 0.34284186363220215, |
| "learning_rate": 0.00011380165289256197, |
| "loss": 0.4619, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 0.2463303506374359, |
| "learning_rate": 0.00011355371900826446, |
| "loss": 0.3038, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 0.26452890038490295, |
| "learning_rate": 0.00011330578512396693, |
| "loss": 0.3603, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 0.27888497710227966, |
| "learning_rate": 0.00011305785123966941, |
| "loss": 0.5109, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 0.3039766252040863, |
| "learning_rate": 0.00011280991735537189, |
| "loss": 0.5377, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 0.28995901346206665, |
| "learning_rate": 0.00011256198347107436, |
| "loss": 0.4797, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.26, |
| "grad_norm": 0.3420790135860443, |
| "learning_rate": 0.00011231404958677686, |
| "loss": 0.5209, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.26, |
| "grad_norm": 0.33119046688079834, |
| "learning_rate": 0.00011206611570247933, |
| "loss": 0.3709, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.26, |
| "grad_norm": 0.3408135175704956, |
| "learning_rate": 0.00011181818181818181, |
| "loss": 0.4389, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.26, |
| "grad_norm": 0.29120129346847534, |
| "learning_rate": 0.00011157024793388428, |
| "loss": 0.4327, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.26, |
| "grad_norm": 0.32718029618263245, |
| "learning_rate": 0.00011132231404958677, |
| "loss": 0.4859, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.26, |
| "grad_norm": 0.34422147274017334, |
| "learning_rate": 0.00011107438016528924, |
| "loss": 0.5184, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.27, |
| "grad_norm": 0.330323189496994, |
| "learning_rate": 0.00011082644628099172, |
| "loss": 0.4322, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.27, |
| "grad_norm": 0.3218427002429962, |
| "learning_rate": 0.00011057851239669422, |
| "loss": 0.4129, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.27, |
| "grad_norm": 0.2976725995540619, |
| "learning_rate": 0.00011033057851239669, |
| "loss": 0.5039, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.27, |
| "grad_norm": 0.32841789722442627, |
| "learning_rate": 0.00011008264462809917, |
| "loss": 0.4718, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.27, |
| "grad_norm": 0.32977914810180664, |
| "learning_rate": 0.00010983471074380164, |
| "loss": 0.4248, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.27, |
| "grad_norm": 0.2632751166820526, |
| "learning_rate": 0.00010958677685950413, |
| "loss": 0.3458, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 0.33028510212898254, |
| "learning_rate": 0.0001093388429752066, |
| "loss": 0.4884, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 0.30288752913475037, |
| "learning_rate": 0.00010909090909090908, |
| "loss": 0.3776, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 0.32292476296424866, |
| "learning_rate": 0.00010884297520661155, |
| "loss": 0.392, |
| "step": 781 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 0.31956765055656433, |
| "learning_rate": 0.00010859504132231405, |
| "loss": 0.3308, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 0.280553936958313, |
| "learning_rate": 0.00010834710743801652, |
| "loss": 0.5806, |
| "step": 783 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 0.35859328508377075, |
| "learning_rate": 0.000108099173553719, |
| "loss": 0.5059, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.29, |
| "grad_norm": 0.2944432497024536, |
| "learning_rate": 0.00010785123966942148, |
| "loss": 0.5132, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.29, |
| "grad_norm": 0.27504968643188477, |
| "learning_rate": 0.00010760330578512395, |
| "loss": 0.3741, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.29, |
| "grad_norm": 0.29401764273643494, |
| "learning_rate": 0.00010735537190082644, |
| "loss": 0.4992, |
| "step": 787 |
| }, |
| { |
| "epoch": 1.29, |
| "grad_norm": 0.30569151043891907, |
| "learning_rate": 0.00010710743801652891, |
| "loss": 0.5029, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.29, |
| "grad_norm": 0.28654801845550537, |
| "learning_rate": 0.0001068595041322314, |
| "loss": 0.4618, |
| "step": 789 |
| }, |
| { |
| "epoch": 1.29, |
| "grad_norm": 0.26424363255500793, |
| "learning_rate": 0.00010661157024793387, |
| "loss": 0.3929, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 0.28117212653160095, |
| "learning_rate": 0.00010636363636363636, |
| "loss": 0.5116, |
| "step": 791 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 0.28402891755104065, |
| "learning_rate": 0.00010611570247933883, |
| "loss": 0.3758, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 0.32903602719306946, |
| "learning_rate": 0.00010586776859504131, |
| "loss": 0.3594, |
| "step": 793 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 0.4285104274749756, |
| "learning_rate": 0.0001056198347107438, |
| "loss": 0.3007, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 0.27649369835853577, |
| "learning_rate": 0.00010537190082644627, |
| "loss": 0.342, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 0.3094039261341095, |
| "learning_rate": 0.00010512396694214876, |
| "loss": 0.4452, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 0.32547199726104736, |
| "learning_rate": 0.00010487603305785123, |
| "loss": 0.4274, |
| "step": 797 |
| }, |
| { |
| "epoch": 1.31, |
| "grad_norm": 0.30244141817092896, |
| "learning_rate": 0.00010462809917355372, |
| "loss": 0.393, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.31, |
| "grad_norm": 0.3018583655357361, |
| "learning_rate": 0.00010438016528925619, |
| "loss": 0.4012, |
| "step": 799 |
| }, |
| { |
| "epoch": 1.31, |
| "grad_norm": 0.36397960782051086, |
| "learning_rate": 0.00010413223140495867, |
| "loss": 0.5231, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.31, |
| "grad_norm": 0.3178517520427704, |
| "learning_rate": 0.00010388429752066114, |
| "loss": 0.4036, |
| "step": 801 |
| }, |
| { |
| "epoch": 1.31, |
| "grad_norm": 0.34640219807624817, |
| "learning_rate": 0.00010363636363636362, |
| "loss": 0.4717, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.31, |
| "grad_norm": 0.302775114774704, |
| "learning_rate": 0.0001033884297520661, |
| "loss": 0.4207, |
| "step": 803 |
| }, |
| { |
| "epoch": 1.32, |
| "grad_norm": 0.30845245718955994, |
| "learning_rate": 0.00010314049586776859, |
| "loss": 0.3976, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.32, |
| "grad_norm": 0.2689266502857208, |
| "learning_rate": 0.00010289256198347107, |
| "loss": 0.3777, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.32, |
| "grad_norm": 0.33539149165153503, |
| "learning_rate": 0.00010264462809917354, |
| "loss": 0.3896, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.32, |
| "grad_norm": 0.2548604905605316, |
| "learning_rate": 0.00010239669421487603, |
| "loss": 0.4026, |
| "step": 807 |
| }, |
| { |
| "epoch": 1.32, |
| "grad_norm": 0.5050720572471619, |
| "learning_rate": 0.0001021487603305785, |
| "loss": 0.4008, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.32, |
| "grad_norm": 0.2518717646598816, |
| "learning_rate": 0.00010190082644628098, |
| "loss": 0.348, |
| "step": 809 |
| }, |
| { |
| "epoch": 1.33, |
| "grad_norm": 0.39397895336151123, |
| "learning_rate": 0.00010165289256198345, |
| "loss": 0.5369, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.33, |
| "grad_norm": 0.3471471965312958, |
| "learning_rate": 0.00010140495867768595, |
| "loss": 0.5272, |
| "step": 811 |
| }, |
| { |
| "epoch": 1.33, |
| "grad_norm": 0.4147883355617523, |
| "learning_rate": 0.00010115702479338842, |
| "loss": 0.427, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.33, |
| "grad_norm": 0.2932160794734955, |
| "learning_rate": 0.0001009090909090909, |
| "loss": 0.3274, |
| "step": 813 |
| }, |
| { |
| "epoch": 1.33, |
| "grad_norm": 0.28647059202194214, |
| "learning_rate": 0.00010066115702479339, |
| "loss": 0.3346, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.33, |
| "grad_norm": 0.28154057264328003, |
| "learning_rate": 0.00010041322314049586, |
| "loss": 0.3785, |
| "step": 815 |
| }, |
| { |
| "epoch": 1.34, |
| "grad_norm": 0.25706711411476135, |
| "learning_rate": 0.00010016528925619834, |
| "loss": 0.3261, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.34, |
| "grad_norm": 0.3318668603897095, |
| "learning_rate": 9.991735537190081e-05, |
| "loss": 0.4362, |
| "step": 817 |
| }, |
| { |
| "epoch": 1.34, |
| "grad_norm": 0.33185282349586487, |
| "learning_rate": 9.966942148760331e-05, |
| "loss": 0.5219, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.34, |
| "grad_norm": 0.2683846056461334, |
| "learning_rate": 9.942148760330578e-05, |
| "loss": 0.3657, |
| "step": 819 |
| }, |
| { |
| "epoch": 1.34, |
| "grad_norm": 0.2643420100212097, |
| "learning_rate": 9.917355371900826e-05, |
| "loss": 0.4697, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.34, |
| "grad_norm": 0.32440856099128723, |
| "learning_rate": 9.892561983471073e-05, |
| "loss": 0.5572, |
| "step": 821 |
| }, |
| { |
| "epoch": 1.35, |
| "grad_norm": 0.22183597087860107, |
| "learning_rate": 9.867768595041321e-05, |
| "loss": 0.3379, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.35, |
| "grad_norm": 0.26266101002693176, |
| "learning_rate": 9.842975206611568e-05, |
| "loss": 0.439, |
| "step": 823 |
| }, |
| { |
| "epoch": 1.35, |
| "grad_norm": 0.2978360950946808, |
| "learning_rate": 9.818181818181817e-05, |
| "loss": 0.4654, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.35, |
| "grad_norm": 0.2713984251022339, |
| "learning_rate": 9.793388429752067e-05, |
| "loss": 0.2983, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.35, |
| "grad_norm": 0.2561984956264496, |
| "learning_rate": 9.768595041322314e-05, |
| "loss": 0.3381, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.35, |
| "grad_norm": 0.2766323983669281, |
| "learning_rate": 9.743801652892562e-05, |
| "loss": 0.4167, |
| "step": 827 |
| }, |
| { |
| "epoch": 1.36, |
| "grad_norm": 0.33810022473335266, |
| "learning_rate": 9.719008264462809e-05, |
| "loss": 0.3793, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.36, |
| "grad_norm": 0.3332251310348511, |
| "learning_rate": 9.694214876033057e-05, |
| "loss": 0.5517, |
| "step": 829 |
| }, |
| { |
| "epoch": 1.36, |
| "grad_norm": 0.2713959515094757, |
| "learning_rate": 9.669421487603304e-05, |
| "loss": 0.3583, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.36, |
| "grad_norm": 0.2778157889842987, |
| "learning_rate": 9.644628099173553e-05, |
| "loss": 0.3089, |
| "step": 831 |
| }, |
| { |
| "epoch": 1.36, |
| "grad_norm": 0.33538392186164856, |
| "learning_rate": 9.6198347107438e-05, |
| "loss": 0.3776, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.36, |
| "grad_norm": 0.32728123664855957, |
| "learning_rate": 9.59504132231405e-05, |
| "loss": 0.434, |
| "step": 833 |
| }, |
| { |
| "epoch": 1.37, |
| "grad_norm": 0.30630162358283997, |
| "learning_rate": 9.570247933884298e-05, |
| "loss": 0.3913, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.37, |
| "grad_norm": 0.2960034906864166, |
| "learning_rate": 9.545454545454545e-05, |
| "loss": 0.4368, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.37, |
| "grad_norm": 0.35711923241615295, |
| "learning_rate": 9.520661157024793e-05, |
| "loss": 0.399, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.37, |
| "grad_norm": 0.30195897817611694, |
| "learning_rate": 9.49586776859504e-05, |
| "loss": 0.4421, |
| "step": 837 |
| }, |
| { |
| "epoch": 1.37, |
| "grad_norm": 0.3220643401145935, |
| "learning_rate": 9.471074380165288e-05, |
| "loss": 0.3441, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.37, |
| "grad_norm": 0.3709239661693573, |
| "learning_rate": 9.446280991735535e-05, |
| "loss": 0.4095, |
| "step": 839 |
| }, |
| { |
| "epoch": 1.38, |
| "grad_norm": 0.40360063314437866, |
| "learning_rate": 9.421487603305785e-05, |
| "loss": 0.5692, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.38, |
| "grad_norm": 0.32428041100502014, |
| "learning_rate": 9.396694214876032e-05, |
| "loss": 0.4306, |
| "step": 841 |
| }, |
| { |
| "epoch": 1.38, |
| "grad_norm": 0.2750518321990967, |
| "learning_rate": 9.37190082644628e-05, |
| "loss": 0.3905, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.38, |
| "grad_norm": 0.331478476524353, |
| "learning_rate": 9.347107438016528e-05, |
| "loss": 0.6008, |
| "step": 843 |
| }, |
| { |
| "epoch": 1.38, |
| "grad_norm": 0.3165242671966553, |
| "learning_rate": 9.322314049586776e-05, |
| "loss": 0.4624, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.38, |
| "grad_norm": 0.26457470655441284, |
| "learning_rate": 9.297520661157024e-05, |
| "loss": 0.4462, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.39, |
| "grad_norm": 0.3557126522064209, |
| "learning_rate": 9.272727272727271e-05, |
| "loss": 0.5737, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.39, |
| "grad_norm": 0.3306926488876343, |
| "learning_rate": 9.247933884297521e-05, |
| "loss": 0.4597, |
| "step": 847 |
| }, |
| { |
| "epoch": 1.39, |
| "grad_norm": 0.24906127154827118, |
| "learning_rate": 9.223140495867768e-05, |
| "loss": 0.378, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.39, |
| "grad_norm": 0.29440054297447205, |
| "learning_rate": 9.198347107438016e-05, |
| "loss": 0.4562, |
| "step": 849 |
| }, |
| { |
| "epoch": 1.39, |
| "grad_norm": 0.34878161549568176, |
| "learning_rate": 9.173553719008263e-05, |
| "loss": 0.4546, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.39, |
| "grad_norm": 0.3725307583808899, |
| "learning_rate": 9.148760330578512e-05, |
| "loss": 0.4119, |
| "step": 851 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.30648747086524963, |
| "learning_rate": 9.123966942148759e-05, |
| "loss": 0.4428, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.2755535840988159, |
| "learning_rate": 9.099173553719007e-05, |
| "loss": 0.3592, |
| "step": 853 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.2802577614784241, |
| "learning_rate": 9.074380165289255e-05, |
| "loss": 0.472, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.28871360421180725, |
| "learning_rate": 9.049586776859504e-05, |
| "loss": 0.4532, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.37071362137794495, |
| "learning_rate": 9.024793388429752e-05, |
| "loss": 0.3426, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.30081430077552795, |
| "learning_rate": 8.999999999999999e-05, |
| "loss": 0.4069, |
| "step": 857 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.3186596930027008, |
| "learning_rate": 8.975206611570248e-05, |
| "loss": 0.4997, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.41, |
| "grad_norm": 0.286479115486145, |
| "learning_rate": 8.950413223140495e-05, |
| "loss": 0.3902, |
| "step": 859 |
| }, |
| { |
| "epoch": 1.41, |
| "grad_norm": 0.3457258939743042, |
| "learning_rate": 8.925619834710743e-05, |
| "loss": 0.4339, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.41, |
| "grad_norm": 0.30513113737106323, |
| "learning_rate": 8.90082644628099e-05, |
| "loss": 0.3414, |
| "step": 861 |
| }, |
| { |
| "epoch": 1.41, |
| "grad_norm": 0.30697953701019287, |
| "learning_rate": 8.876033057851238e-05, |
| "loss": 0.4657, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.41, |
| "grad_norm": 0.3395203649997711, |
| "learning_rate": 8.851239669421488e-05, |
| "loss": 0.3945, |
| "step": 863 |
| }, |
| { |
| "epoch": 1.41, |
| "grad_norm": 0.43322789669036865, |
| "learning_rate": 8.826446280991735e-05, |
| "loss": 0.5337, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.42, |
| "grad_norm": 0.3421814739704132, |
| "learning_rate": 8.801652892561983e-05, |
| "loss": 0.4481, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.42, |
| "grad_norm": 0.24497461318969727, |
| "learning_rate": 8.77685950413223e-05, |
| "loss": 0.4199, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.42, |
| "grad_norm": 0.3835270404815674, |
| "learning_rate": 8.752066115702479e-05, |
| "loss": 0.5534, |
| "step": 867 |
| }, |
| { |
| "epoch": 1.42, |
| "grad_norm": 0.3144569396972656, |
| "learning_rate": 8.727272727272726e-05, |
| "loss": 0.4563, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.42, |
| "grad_norm": 0.2757865786552429, |
| "learning_rate": 8.702479338842974e-05, |
| "loss": 0.4241, |
| "step": 869 |
| }, |
| { |
| "epoch": 1.42, |
| "grad_norm": 0.28413090109825134, |
| "learning_rate": 8.677685950413221e-05, |
| "loss": 0.3484, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.43, |
| "grad_norm": 0.27918362617492676, |
| "learning_rate": 8.652892561983471e-05, |
| "loss": 0.4133, |
| "step": 871 |
| }, |
| { |
| "epoch": 1.43, |
| "grad_norm": 0.3901917040348053, |
| "learning_rate": 8.628099173553718e-05, |
| "loss": 0.4755, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.43, |
| "grad_norm": 0.34810692071914673, |
| "learning_rate": 8.603305785123966e-05, |
| "loss": 0.4516, |
| "step": 873 |
| }, |
| { |
| "epoch": 1.43, |
| "grad_norm": 0.3317393958568573, |
| "learning_rate": 8.578512396694215e-05, |
| "loss": 0.4995, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.43, |
| "grad_norm": 0.26235052943229675, |
| "learning_rate": 8.553719008264462e-05, |
| "loss": 0.3348, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.43, |
| "grad_norm": 0.2735447585582733, |
| "learning_rate": 8.52892561983471e-05, |
| "loss": 0.2932, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.44, |
| "grad_norm": 0.30968329310417175, |
| "learning_rate": 8.504132231404957e-05, |
| "loss": 0.3783, |
| "step": 877 |
| }, |
| { |
| "epoch": 1.44, |
| "grad_norm": 0.30193984508514404, |
| "learning_rate": 8.479338842975207e-05, |
| "loss": 0.4357, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.44, |
| "grad_norm": 0.3407258987426758, |
| "learning_rate": 8.454545454545454e-05, |
| "loss": 0.4821, |
| "step": 879 |
| }, |
| { |
| "epoch": 1.44, |
| "grad_norm": 0.28090009093284607, |
| "learning_rate": 8.429752066115702e-05, |
| "loss": 0.4158, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.44, |
| "grad_norm": 0.2898884415626526, |
| "learning_rate": 8.404958677685949e-05, |
| "loss": 0.3091, |
| "step": 881 |
| }, |
| { |
| "epoch": 1.44, |
| "grad_norm": 0.31658637523651123, |
| "learning_rate": 8.380165289256197e-05, |
| "loss": 0.3773, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.45, |
| "grad_norm": 0.2722189724445343, |
| "learning_rate": 8.355371900826446e-05, |
| "loss": 0.4483, |
| "step": 883 |
| }, |
| { |
| "epoch": 1.45, |
| "grad_norm": 0.23621954023838043, |
| "learning_rate": 8.330578512396693e-05, |
| "loss": 0.3112, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.45, |
| "grad_norm": 0.3659461438655853, |
| "learning_rate": 8.305785123966942e-05, |
| "loss": 0.4507, |
| "step": 885 |
| }, |
| { |
| "epoch": 1.45, |
| "grad_norm": 0.3253099322319031, |
| "learning_rate": 8.28099173553719e-05, |
| "loss": 0.4854, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.45, |
| "grad_norm": 0.3201637864112854, |
| "learning_rate": 8.256198347107438e-05, |
| "loss": 0.5687, |
| "step": 887 |
| }, |
| { |
| "epoch": 1.45, |
| "grad_norm": 0.4112270772457123, |
| "learning_rate": 8.231404958677685e-05, |
| "loss": 0.3742, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.46, |
| "grad_norm": 0.3146194517612457, |
| "learning_rate": 8.206611570247933e-05, |
| "loss": 0.4869, |
| "step": 889 |
| }, |
| { |
| "epoch": 1.46, |
| "grad_norm": 0.34321263432502747, |
| "learning_rate": 8.18181818181818e-05, |
| "loss": 0.5154, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.46, |
| "grad_norm": 0.2986968159675598, |
| "learning_rate": 8.157024793388429e-05, |
| "loss": 0.647, |
| "step": 891 |
| }, |
| { |
| "epoch": 1.46, |
| "grad_norm": 0.3427133858203888, |
| "learning_rate": 8.132231404958676e-05, |
| "loss": 0.3912, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.46, |
| "grad_norm": 0.3434309661388397, |
| "learning_rate": 8.107438016528925e-05, |
| "loss": 0.51, |
| "step": 893 |
| }, |
| { |
| "epoch": 1.46, |
| "grad_norm": 0.32024991512298584, |
| "learning_rate": 8.082644628099174e-05, |
| "loss": 0.387, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.47, |
| "grad_norm": 0.2961815595626831, |
| "learning_rate": 8.057851239669421e-05, |
| "loss": 0.3909, |
| "step": 895 |
| }, |
| { |
| "epoch": 1.47, |
| "grad_norm": 0.3219030201435089, |
| "learning_rate": 8.033057851239669e-05, |
| "loss": 0.3911, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.47, |
| "grad_norm": 0.2776000201702118, |
| "learning_rate": 8.008264462809916e-05, |
| "loss": 0.3625, |
| "step": 897 |
| }, |
| { |
| "epoch": 1.47, |
| "grad_norm": 0.31484290957450867, |
| "learning_rate": 7.983471074380164e-05, |
| "loss": 0.6162, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.47, |
| "grad_norm": 0.2789134085178375, |
| "learning_rate": 7.958677685950411e-05, |
| "loss": 0.3199, |
| "step": 899 |
| }, |
| { |
| "epoch": 1.47, |
| "grad_norm": 0.27821627259254456, |
| "learning_rate": 7.933884297520661e-05, |
| "loss": 0.4295, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.48, |
| "grad_norm": 0.3022254705429077, |
| "learning_rate": 7.909090909090908e-05, |
| "loss": 0.309, |
| "step": 901 |
| }, |
| { |
| "epoch": 1.48, |
| "grad_norm": 0.24830293655395508, |
| "learning_rate": 7.884297520661157e-05, |
| "loss": 0.3833, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.48, |
| "grad_norm": 0.31184327602386475, |
| "learning_rate": 7.859504132231405e-05, |
| "loss": 0.3715, |
| "step": 903 |
| }, |
| { |
| "epoch": 1.48, |
| "grad_norm": 0.2993053197860718, |
| "learning_rate": 7.834710743801652e-05, |
| "loss": 0.3825, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.48, |
| "grad_norm": 0.3385005295276642, |
| "learning_rate": 7.8099173553719e-05, |
| "loss": 0.4868, |
| "step": 905 |
| }, |
| { |
| "epoch": 1.48, |
| "grad_norm": 0.26812323927879333, |
| "learning_rate": 7.785123966942147e-05, |
| "loss": 0.2925, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.49, |
| "grad_norm": 0.3275848925113678, |
| "learning_rate": 7.760330578512397e-05, |
| "loss": 0.3657, |
| "step": 907 |
| }, |
| { |
| "epoch": 1.49, |
| "grad_norm": 0.2972089350223541, |
| "learning_rate": 7.735537190082644e-05, |
| "loss": 0.4396, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.49, |
| "grad_norm": 0.27619728446006775, |
| "learning_rate": 7.710743801652892e-05, |
| "loss": 0.3946, |
| "step": 909 |
| }, |
| { |
| "epoch": 1.49, |
| "grad_norm": 0.30436667799949646, |
| "learning_rate": 7.68595041322314e-05, |
| "loss": 0.4177, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.49, |
| "grad_norm": 0.2652393877506256, |
| "learning_rate": 7.661157024793388e-05, |
| "loss": 0.3165, |
| "step": 911 |
| }, |
| { |
| "epoch": 1.49, |
| "grad_norm": 0.28303712606430054, |
| "learning_rate": 7.636363636363635e-05, |
| "loss": 0.4829, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.49, |
| "grad_norm": 0.33964964747428894, |
| "learning_rate": 7.611570247933883e-05, |
| "loss": 0.5043, |
| "step": 913 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.2591302692890167, |
| "learning_rate": 7.586776859504133e-05, |
| "loss": 0.3814, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.3488747179508209, |
| "learning_rate": 7.56198347107438e-05, |
| "loss": 0.5233, |
| "step": 915 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.29015597701072693, |
| "learning_rate": 7.537190082644628e-05, |
| "loss": 0.4672, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.31618839502334595, |
| "learning_rate": 7.512396694214875e-05, |
| "loss": 0.4538, |
| "step": 917 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.35049545764923096, |
| "learning_rate": 7.487603305785124e-05, |
| "loss": 0.4089, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.34093132615089417, |
| "learning_rate": 7.462809917355372e-05, |
| "loss": 0.4306, |
| "step": 919 |
| }, |
| { |
| "epoch": 1.51, |
| "grad_norm": 0.30601584911346436, |
| "learning_rate": 7.438016528925619e-05, |
| "loss": 0.4396, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.51, |
| "grad_norm": 0.45013612508773804, |
| "learning_rate": 7.413223140495867e-05, |
| "loss": 0.4477, |
| "step": 921 |
| }, |
| { |
| "epoch": 1.51, |
| "grad_norm": 0.30486834049224854, |
| "learning_rate": 7.388429752066116e-05, |
| "loss": 0.3777, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.51, |
| "grad_norm": 0.3926061689853668, |
| "learning_rate": 7.363636363636363e-05, |
| "loss": 0.3532, |
| "step": 923 |
| }, |
| { |
| "epoch": 1.51, |
| "grad_norm": 0.3843371272087097, |
| "learning_rate": 7.338842975206611e-05, |
| "loss": 0.5182, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.51, |
| "grad_norm": 0.30922451615333557, |
| "learning_rate": 7.314049586776858e-05, |
| "loss": 0.4361, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.52, |
| "grad_norm": 0.3367323875427246, |
| "learning_rate": 7.289256198347106e-05, |
| "loss": 0.3809, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.52, |
| "grad_norm": 0.39369019865989685, |
| "learning_rate": 7.264462809917355e-05, |
| "loss": 0.3623, |
| "step": 927 |
| }, |
| { |
| "epoch": 1.52, |
| "grad_norm": 0.3159162104129791, |
| "learning_rate": 7.239669421487603e-05, |
| "loss": 0.5059, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.52, |
| "grad_norm": 0.34716740250587463, |
| "learning_rate": 7.214876033057851e-05, |
| "loss": 0.4201, |
| "step": 929 |
| }, |
| { |
| "epoch": 1.52, |
| "grad_norm": 0.20480923354625702, |
| "learning_rate": 7.190082644628098e-05, |
| "loss": 0.2699, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.52, |
| "grad_norm": 0.3518913686275482, |
| "learning_rate": 7.165289256198347e-05, |
| "loss": 0.5337, |
| "step": 931 |
| }, |
| { |
| "epoch": 1.53, |
| "grad_norm": 0.28605952858924866, |
| "learning_rate": 7.140495867768594e-05, |
| "loss": 0.44, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.53, |
| "grad_norm": 0.28229033946990967, |
| "learning_rate": 7.115702479338842e-05, |
| "loss": 0.3534, |
| "step": 933 |
| }, |
| { |
| "epoch": 1.53, |
| "grad_norm": 0.3456754684448242, |
| "learning_rate": 7.09090909090909e-05, |
| "loss": 0.3952, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.53, |
| "grad_norm": 0.27707159519195557, |
| "learning_rate": 7.066115702479338e-05, |
| "loss": 0.3667, |
| "step": 935 |
| }, |
| { |
| "epoch": 1.53, |
| "grad_norm": 0.2811780273914337, |
| "learning_rate": 7.041322314049586e-05, |
| "loss": 0.3954, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.53, |
| "grad_norm": 0.3099793493747711, |
| "learning_rate": 7.016528925619834e-05, |
| "loss": 0.441, |
| "step": 937 |
| }, |
| { |
| "epoch": 1.54, |
| "grad_norm": 0.4153590500354767, |
| "learning_rate": 6.991735537190083e-05, |
| "loss": 0.4462, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.54, |
| "grad_norm": 0.2945801615715027, |
| "learning_rate": 6.96694214876033e-05, |
| "loss": 0.4535, |
| "step": 939 |
| }, |
| { |
| "epoch": 1.54, |
| "grad_norm": 0.2930592894554138, |
| "learning_rate": 6.942148760330578e-05, |
| "loss": 0.5566, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.54, |
| "grad_norm": 0.3034913241863251, |
| "learning_rate": 6.917355371900826e-05, |
| "loss": 0.4695, |
| "step": 941 |
| }, |
| { |
| "epoch": 1.54, |
| "grad_norm": 0.3054913878440857, |
| "learning_rate": 6.892561983471073e-05, |
| "loss": 0.3921, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.54, |
| "grad_norm": 0.3297981917858124, |
| "learning_rate": 6.867768595041322e-05, |
| "loss": 0.5057, |
| "step": 943 |
| }, |
| { |
| "epoch": 1.55, |
| "grad_norm": 0.23640452325344086, |
| "learning_rate": 6.84297520661157e-05, |
| "loss": 0.329, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.55, |
| "grad_norm": 0.2970188856124878, |
| "learning_rate": 6.818181818181817e-05, |
| "loss": 0.4376, |
| "step": 945 |
| }, |
| { |
| "epoch": 1.55, |
| "grad_norm": 0.3243064880371094, |
| "learning_rate": 6.793388429752065e-05, |
| "loss": 0.4922, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.55, |
| "grad_norm": 0.4473859667778015, |
| "learning_rate": 6.768595041322312e-05, |
| "loss": 0.5245, |
| "step": 947 |
| }, |
| { |
| "epoch": 1.55, |
| "grad_norm": 0.2901310622692108, |
| "learning_rate": 6.743801652892562e-05, |
| "loss": 0.4996, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.55, |
| "grad_norm": 0.3633457124233246, |
| "learning_rate": 6.719008264462809e-05, |
| "loss": 0.4669, |
| "step": 949 |
| }, |
| { |
| "epoch": 1.56, |
| "grad_norm": 0.33570581674575806, |
| "learning_rate": 6.694214876033058e-05, |
| "loss": 0.404, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.56, |
| "grad_norm": 0.26466354727745056, |
| "learning_rate": 6.669421487603306e-05, |
| "loss": 0.2881, |
| "step": 951 |
| }, |
| { |
| "epoch": 1.56, |
| "grad_norm": 0.29028353095054626, |
| "learning_rate": 6.644628099173553e-05, |
| "loss": 0.3607, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.56, |
| "grad_norm": 0.2878669798374176, |
| "learning_rate": 6.619834710743801e-05, |
| "loss": 0.4415, |
| "step": 953 |
| }, |
| { |
| "epoch": 1.56, |
| "grad_norm": 0.33260804414749146, |
| "learning_rate": 6.595041322314048e-05, |
| "loss": 0.4424, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.56, |
| "grad_norm": 0.3135119378566742, |
| "learning_rate": 6.570247933884297e-05, |
| "loss": 0.4276, |
| "step": 955 |
| }, |
| { |
| "epoch": 1.57, |
| "grad_norm": 0.2714795470237732, |
| "learning_rate": 6.545454545454545e-05, |
| "loss": 0.2789, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.57, |
| "grad_norm": 0.3564438819885254, |
| "learning_rate": 6.520661157024792e-05, |
| "loss": 0.4683, |
| "step": 957 |
| }, |
| { |
| "epoch": 1.57, |
| "grad_norm": 0.3303399682044983, |
| "learning_rate": 6.495867768595042e-05, |
| "loss": 0.4657, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.57, |
| "grad_norm": 0.30086350440979004, |
| "learning_rate": 6.471074380165289e-05, |
| "loss": 0.3296, |
| "step": 959 |
| }, |
| { |
| "epoch": 1.57, |
| "grad_norm": 0.34699100255966187, |
| "learning_rate": 6.446280991735537e-05, |
| "loss": 0.3543, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.57, |
| "grad_norm": 0.326579213142395, |
| "learning_rate": 6.421487603305784e-05, |
| "loss": 0.4001, |
| "step": 961 |
| }, |
| { |
| "epoch": 1.58, |
| "grad_norm": 0.3462665379047394, |
| "learning_rate": 6.396694214876032e-05, |
| "loss": 0.3999, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.58, |
| "grad_norm": 0.3408821225166321, |
| "learning_rate": 6.371900826446281e-05, |
| "loss": 0.3614, |
| "step": 963 |
| }, |
| { |
| "epoch": 1.58, |
| "grad_norm": 0.3061428666114807, |
| "learning_rate": 6.347107438016528e-05, |
| "loss": 0.4127, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.58, |
| "grad_norm": 0.30745938420295715, |
| "learning_rate": 6.322314049586776e-05, |
| "loss": 0.3965, |
| "step": 965 |
| }, |
| { |
| "epoch": 1.58, |
| "grad_norm": 0.33782872557640076, |
| "learning_rate": 6.297520661157025e-05, |
| "loss": 0.5026, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.58, |
| "grad_norm": 0.3501698076725006, |
| "learning_rate": 6.272727272727272e-05, |
| "loss": 0.4731, |
| "step": 967 |
| }, |
| { |
| "epoch": 1.58, |
| "grad_norm": 0.3578520119190216, |
| "learning_rate": 6.24793388429752e-05, |
| "loss": 0.4302, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.59, |
| "grad_norm": 0.30132660269737244, |
| "learning_rate": 6.223140495867768e-05, |
| "loss": 0.3784, |
| "step": 969 |
| }, |
| { |
| "epoch": 1.59, |
| "grad_norm": 0.29198774695396423, |
| "learning_rate": 6.198347107438017e-05, |
| "loss": 0.396, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.59, |
| "grad_norm": 0.3028549551963806, |
| "learning_rate": 6.173553719008264e-05, |
| "loss": 0.3531, |
| "step": 971 |
| }, |
| { |
| "epoch": 1.59, |
| "grad_norm": 0.3193860352039337, |
| "learning_rate": 6.148760330578512e-05, |
| "loss": 0.5261, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.59, |
| "grad_norm": 0.330228716135025, |
| "learning_rate": 6.12396694214876e-05, |
| "loss": 0.3853, |
| "step": 973 |
| }, |
| { |
| "epoch": 1.59, |
| "grad_norm": 0.2856347858905792, |
| "learning_rate": 6.0991735537190074e-05, |
| "loss": 0.4543, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.3663886487483978, |
| "learning_rate": 6.074380165289256e-05, |
| "loss": 0.3821, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.3297857642173767, |
| "learning_rate": 6.0495867768595034e-05, |
| "loss": 0.4504, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.29853883385658264, |
| "learning_rate": 6.024793388429751e-05, |
| "loss": 0.3528, |
| "step": 977 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.3246425986289978, |
| "learning_rate": 5.9999999999999995e-05, |
| "loss": 0.3986, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.3537238836288452, |
| "learning_rate": 5.975206611570248e-05, |
| "loss": 0.3776, |
| "step": 979 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.2915757894515991, |
| "learning_rate": 5.9504132231404955e-05, |
| "loss": 0.2895, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.61, |
| "grad_norm": 0.30707284808158875, |
| "learning_rate": 5.925619834710743e-05, |
| "loss": 0.3238, |
| "step": 981 |
| }, |
| { |
| "epoch": 1.61, |
| "grad_norm": 0.301845520734787, |
| "learning_rate": 5.900826446280991e-05, |
| "loss": 0.4031, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.61, |
| "grad_norm": 0.24002347886562347, |
| "learning_rate": 5.876033057851239e-05, |
| "loss": 0.3477, |
| "step": 983 |
| }, |
| { |
| "epoch": 1.61, |
| "grad_norm": 0.3008634150028229, |
| "learning_rate": 5.851239669421487e-05, |
| "loss": 0.4595, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.61, |
| "grad_norm": 0.32416027784347534, |
| "learning_rate": 5.8264462809917346e-05, |
| "loss": 0.403, |
| "step": 985 |
| }, |
| { |
| "epoch": 1.61, |
| "grad_norm": 0.3158760368824005, |
| "learning_rate": 5.801652892561982e-05, |
| "loss": 0.305, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.62, |
| "grad_norm": 0.33743736147880554, |
| "learning_rate": 5.7768595041322313e-05, |
| "loss": 0.4867, |
| "step": 987 |
| }, |
| { |
| "epoch": 1.62, |
| "grad_norm": 0.3402981460094452, |
| "learning_rate": 5.752066115702479e-05, |
| "loss": 0.3982, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.62, |
| "grad_norm": 0.3389660716056824, |
| "learning_rate": 5.727272727272727e-05, |
| "loss": 0.4311, |
| "step": 989 |
| }, |
| { |
| "epoch": 1.62, |
| "grad_norm": 0.47749587893486023, |
| "learning_rate": 5.702479338842975e-05, |
| "loss": 0.3775, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.62, |
| "grad_norm": 0.27538084983825684, |
| "learning_rate": 5.677685950413223e-05, |
| "loss": 0.3568, |
| "step": 991 |
| }, |
| { |
| "epoch": 1.62, |
| "grad_norm": 0.33023789525032043, |
| "learning_rate": 5.6528925619834704e-05, |
| "loss": 0.4225, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.63, |
| "grad_norm": 0.28135445713996887, |
| "learning_rate": 5.628099173553718e-05, |
| "loss": 0.3658, |
| "step": 993 |
| }, |
| { |
| "epoch": 1.63, |
| "grad_norm": 0.3511416018009186, |
| "learning_rate": 5.6033057851239665e-05, |
| "loss": 0.3928, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.63, |
| "grad_norm": 0.2987925708293915, |
| "learning_rate": 5.578512396694214e-05, |
| "loss": 0.4015, |
| "step": 995 |
| }, |
| { |
| "epoch": 1.63, |
| "grad_norm": 0.3340010344982147, |
| "learning_rate": 5.553719008264462e-05, |
| "loss": 0.4566, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.63, |
| "grad_norm": 0.23461014032363892, |
| "learning_rate": 5.528925619834711e-05, |
| "loss": 0.3556, |
| "step": 997 |
| }, |
| { |
| "epoch": 1.63, |
| "grad_norm": 0.3425525724887848, |
| "learning_rate": 5.5041322314049586e-05, |
| "loss": 0.3736, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.64, |
| "grad_norm": 0.33320698142051697, |
| "learning_rate": 5.479338842975206e-05, |
| "loss": 0.3926, |
| "step": 999 |
| }, |
| { |
| "epoch": 1.64, |
| "grad_norm": 0.26936790347099304, |
| "learning_rate": 5.454545454545454e-05, |
| "loss": 0.3587, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.64, |
| "grad_norm": 0.322934091091156, |
| "learning_rate": 5.429752066115702e-05, |
| "loss": 0.3119, |
| "step": 1001 |
| }, |
| { |
| "epoch": 1.64, |
| "grad_norm": 0.3295484483242035, |
| "learning_rate": 5.40495867768595e-05, |
| "loss": 0.3257, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.64, |
| "grad_norm": 0.2893584370613098, |
| "learning_rate": 5.380165289256198e-05, |
| "loss": 0.3451, |
| "step": 1003 |
| }, |
| { |
| "epoch": 1.64, |
| "grad_norm": 0.3215138912200928, |
| "learning_rate": 5.3553719008264454e-05, |
| "loss": 0.4104, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.65, |
| "grad_norm": 0.19545914232730865, |
| "learning_rate": 5.330578512396694e-05, |
| "loss": 0.2245, |
| "step": 1005 |
| }, |
| { |
| "epoch": 1.65, |
| "grad_norm": 0.2952648103237152, |
| "learning_rate": 5.3057851239669414e-05, |
| "loss": 0.3393, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.65, |
| "grad_norm": 0.34105175733566284, |
| "learning_rate": 5.28099173553719e-05, |
| "loss": 0.519, |
| "step": 1007 |
| }, |
| { |
| "epoch": 1.65, |
| "grad_norm": 0.3435216546058655, |
| "learning_rate": 5.256198347107438e-05, |
| "loss": 0.4968, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.65, |
| "grad_norm": 0.29052355885505676, |
| "learning_rate": 5.231404958677686e-05, |
| "loss": 0.4419, |
| "step": 1009 |
| }, |
| { |
| "epoch": 1.65, |
| "grad_norm": 0.3326230049133301, |
| "learning_rate": 5.2066115702479335e-05, |
| "loss": 0.4461, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.66, |
| "grad_norm": 0.35595494508743286, |
| "learning_rate": 5.181818181818181e-05, |
| "loss": 0.4886, |
| "step": 1011 |
| }, |
| { |
| "epoch": 1.66, |
| "grad_norm": 0.3467525541782379, |
| "learning_rate": 5.1570247933884295e-05, |
| "loss": 0.4671, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.66, |
| "grad_norm": 0.29460448026657104, |
| "learning_rate": 5.132231404958677e-05, |
| "loss": 0.3872, |
| "step": 1013 |
| }, |
| { |
| "epoch": 1.66, |
| "grad_norm": 0.273575097322464, |
| "learning_rate": 5.107438016528925e-05, |
| "loss": 0.3603, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.66, |
| "grad_norm": 0.3603818416595459, |
| "learning_rate": 5.0826446280991726e-05, |
| "loss": 0.3539, |
| "step": 1015 |
| }, |
| { |
| "epoch": 1.66, |
| "grad_norm": 0.31469517946243286, |
| "learning_rate": 5.057851239669421e-05, |
| "loss": 0.3988, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.67, |
| "grad_norm": 0.3218969702720642, |
| "learning_rate": 5.033057851239669e-05, |
| "loss": 0.4366, |
| "step": 1017 |
| }, |
| { |
| "epoch": 1.67, |
| "grad_norm": 0.34077420830726624, |
| "learning_rate": 5.008264462809917e-05, |
| "loss": 0.4248, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.67, |
| "grad_norm": 0.322591096162796, |
| "learning_rate": 4.9834710743801654e-05, |
| "loss": 0.5081, |
| "step": 1019 |
| }, |
| { |
| "epoch": 1.67, |
| "grad_norm": 0.35607361793518066, |
| "learning_rate": 4.958677685950413e-05, |
| "loss": 0.3596, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.67, |
| "grad_norm": 0.2865798771381378, |
| "learning_rate": 4.933884297520661e-05, |
| "loss": 0.2703, |
| "step": 1021 |
| }, |
| { |
| "epoch": 1.67, |
| "grad_norm": 0.30387502908706665, |
| "learning_rate": 4.9090909090909084e-05, |
| "loss": 0.3051, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.67, |
| "grad_norm": 0.3474448323249817, |
| "learning_rate": 4.884297520661157e-05, |
| "loss": 0.2851, |
| "step": 1023 |
| }, |
| { |
| "epoch": 1.68, |
| "grad_norm": 0.3696686625480652, |
| "learning_rate": 4.8595041322314045e-05, |
| "loss": 0.4403, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.68, |
| "grad_norm": 0.33602291345596313, |
| "learning_rate": 4.834710743801652e-05, |
| "loss": 0.4134, |
| "step": 1025 |
| }, |
| { |
| "epoch": 1.68, |
| "grad_norm": 0.27331918478012085, |
| "learning_rate": 4.8099173553719e-05, |
| "loss": 0.3303, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.68, |
| "grad_norm": 0.3705825209617615, |
| "learning_rate": 4.785123966942149e-05, |
| "loss": 0.3411, |
| "step": 1027 |
| }, |
| { |
| "epoch": 1.68, |
| "grad_norm": 0.4541082978248596, |
| "learning_rate": 4.7603305785123966e-05, |
| "loss": 0.4263, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.68, |
| "grad_norm": 0.29885897040367126, |
| "learning_rate": 4.735537190082644e-05, |
| "loss": 0.5602, |
| "step": 1029 |
| }, |
| { |
| "epoch": 1.69, |
| "grad_norm": 0.35169675946235657, |
| "learning_rate": 4.7107438016528926e-05, |
| "loss": 0.4409, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.69, |
| "grad_norm": 0.41590291261672974, |
| "learning_rate": 4.68595041322314e-05, |
| "loss": 0.4355, |
| "step": 1031 |
| }, |
| { |
| "epoch": 1.69, |
| "grad_norm": 0.33613288402557373, |
| "learning_rate": 4.661157024793388e-05, |
| "loss": 0.4399, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.69, |
| "grad_norm": 0.3519938886165619, |
| "learning_rate": 4.6363636363636356e-05, |
| "loss": 0.4464, |
| "step": 1033 |
| }, |
| { |
| "epoch": 1.69, |
| "grad_norm": 0.2981269359588623, |
| "learning_rate": 4.611570247933884e-05, |
| "loss": 0.3667, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.69, |
| "grad_norm": 0.32030418515205383, |
| "learning_rate": 4.586776859504132e-05, |
| "loss": 0.3759, |
| "step": 1035 |
| }, |
| { |
| "epoch": 1.7, |
| "grad_norm": 0.39815372228622437, |
| "learning_rate": 4.5619834710743794e-05, |
| "loss": 0.3259, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.7, |
| "grad_norm": 0.33106112480163574, |
| "learning_rate": 4.537190082644628e-05, |
| "loss": 0.4985, |
| "step": 1037 |
| }, |
| { |
| "epoch": 1.7, |
| "grad_norm": 0.3748137950897217, |
| "learning_rate": 4.512396694214876e-05, |
| "loss": 0.5177, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.7, |
| "grad_norm": 0.31328514218330383, |
| "learning_rate": 4.487603305785124e-05, |
| "loss": 0.3406, |
| "step": 1039 |
| }, |
| { |
| "epoch": 1.7, |
| "grad_norm": 0.35391247272491455, |
| "learning_rate": 4.4628099173553715e-05, |
| "loss": 0.4216, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.7, |
| "grad_norm": 0.37352749705314636, |
| "learning_rate": 4.438016528925619e-05, |
| "loss": 0.4936, |
| "step": 1041 |
| }, |
| { |
| "epoch": 1.71, |
| "grad_norm": 0.20523978769779205, |
| "learning_rate": 4.4132231404958675e-05, |
| "loss": 0.2241, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.71, |
| "grad_norm": 0.26052072644233704, |
| "learning_rate": 4.388429752066115e-05, |
| "loss": 0.352, |
| "step": 1043 |
| }, |
| { |
| "epoch": 1.71, |
| "grad_norm": 0.30189159512519836, |
| "learning_rate": 4.363636363636363e-05, |
| "loss": 0.3956, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.71, |
| "grad_norm": 0.28206998109817505, |
| "learning_rate": 4.3388429752066106e-05, |
| "loss": 0.3073, |
| "step": 1045 |
| }, |
| { |
| "epoch": 1.71, |
| "grad_norm": 0.3497346341609955, |
| "learning_rate": 4.314049586776859e-05, |
| "loss": 0.4544, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.71, |
| "grad_norm": 0.31490492820739746, |
| "learning_rate": 4.289256198347107e-05, |
| "loss": 0.4809, |
| "step": 1047 |
| }, |
| { |
| "epoch": 1.72, |
| "grad_norm": 0.26548659801483154, |
| "learning_rate": 4.264462809917355e-05, |
| "loss": 0.3189, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.72, |
| "grad_norm": 0.40890252590179443, |
| "learning_rate": 4.239669421487603e-05, |
| "loss": 0.4825, |
| "step": 1049 |
| }, |
| { |
| "epoch": 1.72, |
| "grad_norm": 0.392419695854187, |
| "learning_rate": 4.214876033057851e-05, |
| "loss": 0.3518, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.72, |
| "grad_norm": 0.3267776370048523, |
| "learning_rate": 4.190082644628099e-05, |
| "loss": 0.5964, |
| "step": 1051 |
| }, |
| { |
| "epoch": 1.72, |
| "grad_norm": 0.29872927069664, |
| "learning_rate": 4.1652892561983464e-05, |
| "loss": 0.3496, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.72, |
| "grad_norm": 0.3140263259410858, |
| "learning_rate": 4.140495867768595e-05, |
| "loss": 0.3496, |
| "step": 1053 |
| }, |
| { |
| "epoch": 1.73, |
| "grad_norm": 0.35923945903778076, |
| "learning_rate": 4.1157024793388424e-05, |
| "loss": 0.4328, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.73, |
| "grad_norm": 0.24899311363697052, |
| "learning_rate": 4.09090909090909e-05, |
| "loss": 0.3662, |
| "step": 1055 |
| }, |
| { |
| "epoch": 1.73, |
| "grad_norm": 0.300325870513916, |
| "learning_rate": 4.066115702479338e-05, |
| "loss": 0.3714, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.73, |
| "grad_norm": 0.26927053928375244, |
| "learning_rate": 4.041322314049587e-05, |
| "loss": 0.3518, |
| "step": 1057 |
| }, |
| { |
| "epoch": 1.73, |
| "grad_norm": 0.28170421719551086, |
| "learning_rate": 4.0165289256198345e-05, |
| "loss": 0.4214, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.73, |
| "grad_norm": 0.3097275197505951, |
| "learning_rate": 3.991735537190082e-05, |
| "loss": 0.3387, |
| "step": 1059 |
| }, |
| { |
| "epoch": 1.74, |
| "grad_norm": 0.36259180307388306, |
| "learning_rate": 3.9669421487603306e-05, |
| "loss": 0.4968, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.74, |
| "grad_norm": 0.3555668592453003, |
| "learning_rate": 3.942148760330578e-05, |
| "loss": 0.4415, |
| "step": 1061 |
| }, |
| { |
| "epoch": 1.74, |
| "grad_norm": 0.2894740104675293, |
| "learning_rate": 3.917355371900826e-05, |
| "loss": 0.3911, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.74, |
| "grad_norm": 0.3361656665802002, |
| "learning_rate": 3.8925619834710736e-05, |
| "loss": 0.4286, |
| "step": 1063 |
| }, |
| { |
| "epoch": 1.74, |
| "grad_norm": 0.33269697427749634, |
| "learning_rate": 3.867768595041322e-05, |
| "loss": 0.5162, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.74, |
| "grad_norm": 0.3324260711669922, |
| "learning_rate": 3.84297520661157e-05, |
| "loss": 0.4073, |
| "step": 1065 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 0.3037840723991394, |
| "learning_rate": 3.8181818181818174e-05, |
| "loss": 0.4084, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 0.29843100905418396, |
| "learning_rate": 3.7933884297520664e-05, |
| "loss": 0.4028, |
| "step": 1067 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 0.24433061480522156, |
| "learning_rate": 3.768595041322314e-05, |
| "loss": 0.3769, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 0.31540754437446594, |
| "learning_rate": 3.743801652892562e-05, |
| "loss": 0.4006, |
| "step": 1069 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 0.3915780186653137, |
| "learning_rate": 3.7190082644628094e-05, |
| "loss": 0.3859, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 0.7843402028083801, |
| "learning_rate": 3.694214876033058e-05, |
| "loss": 0.4284, |
| "step": 1071 |
| }, |
| { |
| "epoch": 1.76, |
| "grad_norm": 0.3000487685203552, |
| "learning_rate": 3.6694214876033055e-05, |
| "loss": 0.6066, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.76, |
| "grad_norm": 0.2342897206544876, |
| "learning_rate": 3.644628099173553e-05, |
| "loss": 0.3012, |
| "step": 1073 |
| }, |
| { |
| "epoch": 1.76, |
| "grad_norm": 0.3100823760032654, |
| "learning_rate": 3.6198347107438015e-05, |
| "loss": 0.4236, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.76, |
| "grad_norm": 0.3442421853542328, |
| "learning_rate": 3.595041322314049e-05, |
| "loss": 0.4716, |
| "step": 1075 |
| }, |
| { |
| "epoch": 1.76, |
| "grad_norm": 0.2785506546497345, |
| "learning_rate": 3.570247933884297e-05, |
| "loss": 0.307, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.76, |
| "grad_norm": 0.333635151386261, |
| "learning_rate": 3.545454545454545e-05, |
| "loss": 0.4521, |
| "step": 1077 |
| }, |
| { |
| "epoch": 1.77, |
| "grad_norm": 0.3365010619163513, |
| "learning_rate": 3.520661157024793e-05, |
| "loss": 0.4522, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.77, |
| "grad_norm": 0.31510964035987854, |
| "learning_rate": 3.495867768595041e-05, |
| "loss": 0.4101, |
| "step": 1079 |
| }, |
| { |
| "epoch": 1.77, |
| "grad_norm": 0.2939818501472473, |
| "learning_rate": 3.471074380165289e-05, |
| "loss": 0.378, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.77, |
| "grad_norm": 0.33073171973228455, |
| "learning_rate": 3.446280991735537e-05, |
| "loss": 0.4319, |
| "step": 1081 |
| }, |
| { |
| "epoch": 1.77, |
| "grad_norm": 0.306769460439682, |
| "learning_rate": 3.421487603305785e-05, |
| "loss": 0.4584, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.77, |
| "grad_norm": 0.3151317536830902, |
| "learning_rate": 3.396694214876033e-05, |
| "loss": 0.3202, |
| "step": 1083 |
| }, |
| { |
| "epoch": 1.77, |
| "grad_norm": 0.313348650932312, |
| "learning_rate": 3.371900826446281e-05, |
| "loss": 0.4051, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.78, |
| "grad_norm": 0.3377431333065033, |
| "learning_rate": 3.347107438016529e-05, |
| "loss": 0.3842, |
| "step": 1085 |
| }, |
| { |
| "epoch": 1.78, |
| "grad_norm": 0.31378257274627686, |
| "learning_rate": 3.3223140495867765e-05, |
| "loss": 0.377, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.78, |
| "grad_norm": 0.31627315282821655, |
| "learning_rate": 3.297520661157024e-05, |
| "loss": 0.4278, |
| "step": 1087 |
| }, |
| { |
| "epoch": 1.78, |
| "grad_norm": 0.2957272529602051, |
| "learning_rate": 3.2727272727272725e-05, |
| "loss": 0.3384, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.78, |
| "grad_norm": 0.3261624872684479, |
| "learning_rate": 3.247933884297521e-05, |
| "loss": 0.4525, |
| "step": 1089 |
| }, |
| { |
| "epoch": 1.78, |
| "grad_norm": 0.28680557012557983, |
| "learning_rate": 3.2231404958677685e-05, |
| "loss": 0.3627, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.79, |
| "grad_norm": 0.29543063044548035, |
| "learning_rate": 3.198347107438016e-05, |
| "loss": 0.2922, |
| "step": 1091 |
| }, |
| { |
| "epoch": 1.79, |
| "grad_norm": 0.3554795980453491, |
| "learning_rate": 3.173553719008264e-05, |
| "loss": 0.4692, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.79, |
| "grad_norm": 0.28728747367858887, |
| "learning_rate": 3.148760330578512e-05, |
| "loss": 0.2595, |
| "step": 1093 |
| }, |
| { |
| "epoch": 1.79, |
| "grad_norm": 0.3099517524242401, |
| "learning_rate": 3.12396694214876e-05, |
| "loss": 0.3912, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.79, |
| "grad_norm": 0.3173176050186157, |
| "learning_rate": 3.099173553719008e-05, |
| "loss": 0.4186, |
| "step": 1095 |
| }, |
| { |
| "epoch": 1.79, |
| "grad_norm": 0.3445116877555847, |
| "learning_rate": 3.074380165289256e-05, |
| "loss": 0.4966, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.32030245661735535, |
| "learning_rate": 3.0495867768595037e-05, |
| "loss": 0.4671, |
| "step": 1097 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.3321797847747803, |
| "learning_rate": 3.0247933884297517e-05, |
| "loss": 0.4265, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.36085036396980286, |
| "learning_rate": 2.9999999999999997e-05, |
| "loss": 0.4324, |
| "step": 1099 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.2999497950077057, |
| "learning_rate": 2.9752066115702478e-05, |
| "loss": 0.3173, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.31063607335090637, |
| "learning_rate": 2.9504132231404954e-05, |
| "loss": 0.4941, |
| "step": 1101 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.2864468991756439, |
| "learning_rate": 2.9256198347107435e-05, |
| "loss": 0.4309, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.81, |
| "grad_norm": 0.2904879152774811, |
| "learning_rate": 2.900826446280991e-05, |
| "loss": 0.4782, |
| "step": 1103 |
| }, |
| { |
| "epoch": 1.81, |
| "grad_norm": 0.31169822812080383, |
| "learning_rate": 2.8760330578512395e-05, |
| "loss": 0.4881, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.81, |
| "grad_norm": 0.3462170660495758, |
| "learning_rate": 2.8512396694214875e-05, |
| "loss": 0.3551, |
| "step": 1105 |
| }, |
| { |
| "epoch": 1.81, |
| "grad_norm": 0.3066549301147461, |
| "learning_rate": 2.8264462809917352e-05, |
| "loss": 0.4522, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.81, |
| "grad_norm": 0.33785369992256165, |
| "learning_rate": 2.8016528925619832e-05, |
| "loss": 0.3763, |
| "step": 1107 |
| }, |
| { |
| "epoch": 1.81, |
| "grad_norm": 0.2975507378578186, |
| "learning_rate": 2.776859504132231e-05, |
| "loss": 0.3193, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.82, |
| "grad_norm": 0.31934845447540283, |
| "learning_rate": 2.7520661157024793e-05, |
| "loss": 0.2994, |
| "step": 1109 |
| }, |
| { |
| "epoch": 1.82, |
| "grad_norm": 0.29450473189353943, |
| "learning_rate": 2.727272727272727e-05, |
| "loss": 0.4279, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.82, |
| "grad_norm": 0.3054717779159546, |
| "learning_rate": 2.702479338842975e-05, |
| "loss": 0.4687, |
| "step": 1111 |
| }, |
| { |
| "epoch": 1.82, |
| "grad_norm": 0.32938167452812195, |
| "learning_rate": 2.6776859504132227e-05, |
| "loss": 0.4815, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.82, |
| "grad_norm": 0.2678495943546295, |
| "learning_rate": 2.6528925619834707e-05, |
| "loss": 0.3116, |
| "step": 1113 |
| }, |
| { |
| "epoch": 1.82, |
| "grad_norm": 0.26357004046440125, |
| "learning_rate": 2.628099173553719e-05, |
| "loss": 0.3286, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.83, |
| "grad_norm": 0.3359578251838684, |
| "learning_rate": 2.6033057851239667e-05, |
| "loss": 0.4137, |
| "step": 1115 |
| }, |
| { |
| "epoch": 1.83, |
| "grad_norm": 0.3395717442035675, |
| "learning_rate": 2.5785123966942148e-05, |
| "loss": 0.3812, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.83, |
| "grad_norm": 0.29891693592071533, |
| "learning_rate": 2.5537190082644625e-05, |
| "loss": 0.2989, |
| "step": 1117 |
| }, |
| { |
| "epoch": 1.83, |
| "grad_norm": 0.402649462223053, |
| "learning_rate": 2.5289256198347105e-05, |
| "loss": 0.4333, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.83, |
| "grad_norm": 0.3397662341594696, |
| "learning_rate": 2.5041322314049585e-05, |
| "loss": 0.4188, |
| "step": 1119 |
| }, |
| { |
| "epoch": 1.83, |
| "grad_norm": 0.33743607997894287, |
| "learning_rate": 2.4793388429752065e-05, |
| "loss": 0.5309, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.84, |
| "grad_norm": 0.3248274624347687, |
| "learning_rate": 2.4545454545454542e-05, |
| "loss": 0.3905, |
| "step": 1121 |
| }, |
| { |
| "epoch": 1.84, |
| "grad_norm": 0.3567257821559906, |
| "learning_rate": 2.4297520661157022e-05, |
| "loss": 0.4107, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.84, |
| "grad_norm": 0.4383893311023712, |
| "learning_rate": 2.40495867768595e-05, |
| "loss": 0.5024, |
| "step": 1123 |
| }, |
| { |
| "epoch": 1.84, |
| "grad_norm": 0.2777807414531708, |
| "learning_rate": 2.3801652892561983e-05, |
| "loss": 0.3289, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.84, |
| "grad_norm": 0.3409118950366974, |
| "learning_rate": 2.3553719008264463e-05, |
| "loss": 0.5199, |
| "step": 1125 |
| }, |
| { |
| "epoch": 1.84, |
| "grad_norm": 0.3060845732688904, |
| "learning_rate": 2.330578512396694e-05, |
| "loss": 0.412, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.85, |
| "grad_norm": 0.3366425335407257, |
| "learning_rate": 2.305785123966942e-05, |
| "loss": 0.484, |
| "step": 1127 |
| }, |
| { |
| "epoch": 1.85, |
| "grad_norm": 0.36060798168182373, |
| "learning_rate": 2.2809917355371897e-05, |
| "loss": 0.5543, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.85, |
| "grad_norm": 0.25729015469551086, |
| "learning_rate": 2.256198347107438e-05, |
| "loss": 0.2763, |
| "step": 1129 |
| }, |
| { |
| "epoch": 1.85, |
| "grad_norm": 0.2890430688858032, |
| "learning_rate": 2.2314049586776857e-05, |
| "loss": 0.3762, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.85, |
| "grad_norm": 0.31579041481018066, |
| "learning_rate": 2.2066115702479338e-05, |
| "loss": 0.396, |
| "step": 1131 |
| }, |
| { |
| "epoch": 1.85, |
| "grad_norm": 0.3136342763900757, |
| "learning_rate": 2.1818181818181814e-05, |
| "loss": 0.4134, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.86, |
| "grad_norm": 0.37239784002304077, |
| "learning_rate": 2.1570247933884295e-05, |
| "loss": 0.4666, |
| "step": 1133 |
| }, |
| { |
| "epoch": 1.86, |
| "grad_norm": 0.2847795784473419, |
| "learning_rate": 2.1322314049586775e-05, |
| "loss": 0.3481, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.86, |
| "grad_norm": 0.27870920300483704, |
| "learning_rate": 2.1074380165289255e-05, |
| "loss": 0.2669, |
| "step": 1135 |
| }, |
| { |
| "epoch": 1.86, |
| "grad_norm": 0.2700231969356537, |
| "learning_rate": 2.0826446280991732e-05, |
| "loss": 0.2798, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.86, |
| "grad_norm": 0.3257925510406494, |
| "learning_rate": 2.0578512396694212e-05, |
| "loss": 0.4931, |
| "step": 1137 |
| }, |
| { |
| "epoch": 1.86, |
| "grad_norm": 0.2964242994785309, |
| "learning_rate": 2.033057851239669e-05, |
| "loss": 0.429, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.86, |
| "grad_norm": 0.32561832666397095, |
| "learning_rate": 2.0082644628099173e-05, |
| "loss": 0.3467, |
| "step": 1139 |
| }, |
| { |
| "epoch": 1.87, |
| "grad_norm": 0.27957382798194885, |
| "learning_rate": 1.9834710743801653e-05, |
| "loss": 0.2686, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.87, |
| "grad_norm": 0.3476884663105011, |
| "learning_rate": 1.958677685950413e-05, |
| "loss": 0.4814, |
| "step": 1141 |
| }, |
| { |
| "epoch": 1.87, |
| "grad_norm": 0.2950107753276825, |
| "learning_rate": 1.933884297520661e-05, |
| "loss": 0.3578, |
| "step": 1142 |
| }, |
| { |
| "epoch": 1.87, |
| "grad_norm": 0.30689096450805664, |
| "learning_rate": 1.9090909090909087e-05, |
| "loss": 0.3725, |
| "step": 1143 |
| }, |
| { |
| "epoch": 1.87, |
| "grad_norm": 0.430915504693985, |
| "learning_rate": 1.884297520661157e-05, |
| "loss": 0.4766, |
| "step": 1144 |
| }, |
| { |
| "epoch": 1.87, |
| "grad_norm": 0.3086168169975281, |
| "learning_rate": 1.8595041322314047e-05, |
| "loss": 0.5506, |
| "step": 1145 |
| }, |
| { |
| "epoch": 1.88, |
| "grad_norm": 0.3441203534603119, |
| "learning_rate": 1.8347107438016527e-05, |
| "loss": 0.4251, |
| "step": 1146 |
| }, |
| { |
| "epoch": 1.88, |
| "grad_norm": 0.2828252613544464, |
| "learning_rate": 1.8099173553719008e-05, |
| "loss": 0.3, |
| "step": 1147 |
| }, |
| { |
| "epoch": 1.88, |
| "grad_norm": 0.33563023805618286, |
| "learning_rate": 1.7851239669421485e-05, |
| "loss": 0.4082, |
| "step": 1148 |
| }, |
| { |
| "epoch": 1.88, |
| "grad_norm": 0.33100175857543945, |
| "learning_rate": 1.7603305785123965e-05, |
| "loss": 0.5853, |
| "step": 1149 |
| }, |
| { |
| "epoch": 1.88, |
| "grad_norm": 0.3554556369781494, |
| "learning_rate": 1.7355371900826445e-05, |
| "loss": 0.5677, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.88, |
| "grad_norm": 0.32995131611824036, |
| "learning_rate": 1.7107438016528925e-05, |
| "loss": 0.3315, |
| "step": 1151 |
| }, |
| { |
| "epoch": 1.89, |
| "grad_norm": 0.3160393238067627, |
| "learning_rate": 1.6859504132231405e-05, |
| "loss": 0.3632, |
| "step": 1152 |
| }, |
| { |
| "epoch": 1.89, |
| "grad_norm": 0.3632807433605194, |
| "learning_rate": 1.6611570247933882e-05, |
| "loss": 0.4053, |
| "step": 1153 |
| }, |
| { |
| "epoch": 1.89, |
| "grad_norm": 0.2931605279445648, |
| "learning_rate": 1.6363636363636363e-05, |
| "loss": 0.358, |
| "step": 1154 |
| }, |
| { |
| "epoch": 1.89, |
| "grad_norm": 0.32687610387802124, |
| "learning_rate": 1.6115702479338843e-05, |
| "loss": 0.4584, |
| "step": 1155 |
| }, |
| { |
| "epoch": 1.89, |
| "grad_norm": 0.3283078074455261, |
| "learning_rate": 1.586776859504132e-05, |
| "loss": 0.3818, |
| "step": 1156 |
| }, |
| { |
| "epoch": 1.89, |
| "grad_norm": 0.31993189454078674, |
| "learning_rate": 1.56198347107438e-05, |
| "loss": 0.3714, |
| "step": 1157 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 0.2674204409122467, |
| "learning_rate": 1.537190082644628e-05, |
| "loss": 0.3943, |
| "step": 1158 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 0.3968242406845093, |
| "learning_rate": 1.5123966942148759e-05, |
| "loss": 0.4465, |
| "step": 1159 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 0.2870213985443115, |
| "learning_rate": 1.4876033057851239e-05, |
| "loss": 0.3616, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 0.29502633213996887, |
| "learning_rate": 1.4628099173553717e-05, |
| "loss": 0.4112, |
| "step": 1161 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 0.36414459347724915, |
| "learning_rate": 1.4380165289256198e-05, |
| "loss": 0.3928, |
| "step": 1162 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 0.274940550327301, |
| "learning_rate": 1.4132231404958676e-05, |
| "loss": 0.3971, |
| "step": 1163 |
| }, |
| { |
| "epoch": 1.91, |
| "grad_norm": 0.3382115364074707, |
| "learning_rate": 1.3884297520661155e-05, |
| "loss": 0.339, |
| "step": 1164 |
| }, |
| { |
| "epoch": 1.91, |
| "grad_norm": 0.32059189677238464, |
| "learning_rate": 1.3636363636363635e-05, |
| "loss": 0.4632, |
| "step": 1165 |
| }, |
| { |
| "epoch": 1.91, |
| "grad_norm": 0.40788954496383667, |
| "learning_rate": 1.3388429752066113e-05, |
| "loss": 0.4729, |
| "step": 1166 |
| }, |
| { |
| "epoch": 1.91, |
| "grad_norm": 0.4415609836578369, |
| "learning_rate": 1.3140495867768595e-05, |
| "loss": 0.4311, |
| "step": 1167 |
| }, |
| { |
| "epoch": 1.91, |
| "grad_norm": 0.29439279437065125, |
| "learning_rate": 1.2892561983471074e-05, |
| "loss": 0.3428, |
| "step": 1168 |
| }, |
| { |
| "epoch": 1.91, |
| "grad_norm": 0.38421952724456787, |
| "learning_rate": 1.2644628099173552e-05, |
| "loss": 0.5504, |
| "step": 1169 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 0.2757047116756439, |
| "learning_rate": 1.2396694214876033e-05, |
| "loss": 0.3488, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 0.27029332518577576, |
| "learning_rate": 1.2148760330578511e-05, |
| "loss": 0.3922, |
| "step": 1171 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 0.29828086495399475, |
| "learning_rate": 1.1900826446280991e-05, |
| "loss": 0.3484, |
| "step": 1172 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 0.3248095214366913, |
| "learning_rate": 1.165289256198347e-05, |
| "loss": 0.4166, |
| "step": 1173 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 0.3183375895023346, |
| "learning_rate": 1.1404958677685948e-05, |
| "loss": 0.4207, |
| "step": 1174 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 0.38209760189056396, |
| "learning_rate": 1.1157024793388429e-05, |
| "loss": 0.4136, |
| "step": 1175 |
| }, |
| { |
| "epoch": 1.93, |
| "grad_norm": 0.31191781163215637, |
| "learning_rate": 1.0909090909090907e-05, |
| "loss": 0.3821, |
| "step": 1176 |
| }, |
| { |
| "epoch": 1.93, |
| "grad_norm": 0.3147072494029999, |
| "learning_rate": 1.0661157024793387e-05, |
| "loss": 0.2973, |
| "step": 1177 |
| }, |
| { |
| "epoch": 1.93, |
| "grad_norm": 0.346629798412323, |
| "learning_rate": 1.0413223140495866e-05, |
| "loss": 0.5924, |
| "step": 1178 |
| }, |
| { |
| "epoch": 1.93, |
| "grad_norm": 0.30329591035842896, |
| "learning_rate": 1.0165289256198345e-05, |
| "loss": 0.4802, |
| "step": 1179 |
| }, |
| { |
| "epoch": 1.93, |
| "grad_norm": 0.3608144521713257, |
| "learning_rate": 9.917355371900826e-06, |
| "loss": 0.4187, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.93, |
| "grad_norm": 0.3330174684524536, |
| "learning_rate": 9.669421487603305e-06, |
| "loss": 0.4585, |
| "step": 1181 |
| }, |
| { |
| "epoch": 1.94, |
| "grad_norm": 0.2880091071128845, |
| "learning_rate": 9.421487603305785e-06, |
| "loss": 0.3926, |
| "step": 1182 |
| }, |
| { |
| "epoch": 1.94, |
| "grad_norm": 0.2711026668548584, |
| "learning_rate": 9.173553719008264e-06, |
| "loss": 0.3128, |
| "step": 1183 |
| }, |
| { |
| "epoch": 1.94, |
| "grad_norm": 0.3472573161125183, |
| "learning_rate": 8.925619834710742e-06, |
| "loss": 0.3626, |
| "step": 1184 |
| }, |
| { |
| "epoch": 1.94, |
| "grad_norm": 0.29903772473335266, |
| "learning_rate": 8.677685950413222e-06, |
| "loss": 0.3778, |
| "step": 1185 |
| }, |
| { |
| "epoch": 1.94, |
| "grad_norm": 0.309654176235199, |
| "learning_rate": 8.429752066115703e-06, |
| "loss": 0.3965, |
| "step": 1186 |
| }, |
| { |
| "epoch": 1.94, |
| "grad_norm": 0.3163444399833679, |
| "learning_rate": 8.181818181818181e-06, |
| "loss": 0.3207, |
| "step": 1187 |
| }, |
| { |
| "epoch": 1.95, |
| "grad_norm": 0.3754628300666809, |
| "learning_rate": 7.93388429752066e-06, |
| "loss": 0.3954, |
| "step": 1188 |
| }, |
| { |
| "epoch": 1.95, |
| "grad_norm": 0.2967177629470825, |
| "learning_rate": 7.68595041322314e-06, |
| "loss": 0.4092, |
| "step": 1189 |
| }, |
| { |
| "epoch": 1.95, |
| "grad_norm": 0.37930914759635925, |
| "learning_rate": 7.438016528925619e-06, |
| "loss": 0.5038, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.95, |
| "grad_norm": 0.31978312134742737, |
| "learning_rate": 7.190082644628099e-06, |
| "loss": 0.3039, |
| "step": 1191 |
| }, |
| { |
| "epoch": 1.95, |
| "grad_norm": 0.34556475281715393, |
| "learning_rate": 6.942148760330577e-06, |
| "loss": 0.3749, |
| "step": 1192 |
| }, |
| { |
| "epoch": 1.95, |
| "grad_norm": 0.33958449959754944, |
| "learning_rate": 6.694214876033057e-06, |
| "loss": 0.4974, |
| "step": 1193 |
| }, |
| { |
| "epoch": 1.95, |
| "grad_norm": 0.34213709831237793, |
| "learning_rate": 6.446280991735537e-06, |
| "loss": 0.4874, |
| "step": 1194 |
| }, |
| { |
| "epoch": 1.96, |
| "grad_norm": 0.3194979131221771, |
| "learning_rate": 6.198347107438016e-06, |
| "loss": 0.4415, |
| "step": 1195 |
| }, |
| { |
| "epoch": 1.96, |
| "grad_norm": 0.3170003890991211, |
| "learning_rate": 5.950413223140496e-06, |
| "loss": 0.299, |
| "step": 1196 |
| }, |
| { |
| "epoch": 1.96, |
| "grad_norm": 0.35796797275543213, |
| "learning_rate": 5.702479338842974e-06, |
| "loss": 0.4516, |
| "step": 1197 |
| }, |
| { |
| "epoch": 1.96, |
| "grad_norm": 0.36410433053970337, |
| "learning_rate": 5.454545454545454e-06, |
| "loss": 0.3137, |
| "step": 1198 |
| }, |
| { |
| "epoch": 1.96, |
| "grad_norm": 0.27563753724098206, |
| "learning_rate": 5.206611570247933e-06, |
| "loss": 0.3465, |
| "step": 1199 |
| }, |
| { |
| "epoch": 1.96, |
| "grad_norm": 0.3430056869983673, |
| "learning_rate": 4.958677685950413e-06, |
| "loss": 0.5325, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.97, |
| "grad_norm": 0.3032241463661194, |
| "learning_rate": 4.710743801652893e-06, |
| "loss": 0.3802, |
| "step": 1201 |
| }, |
| { |
| "epoch": 1.97, |
| "grad_norm": 0.3008878231048584, |
| "learning_rate": 4.462809917355371e-06, |
| "loss": 0.3674, |
| "step": 1202 |
| }, |
| { |
| "epoch": 1.97, |
| "grad_norm": 0.34465453028678894, |
| "learning_rate": 4.214876033057851e-06, |
| "loss": 0.3465, |
| "step": 1203 |
| }, |
| { |
| "epoch": 1.97, |
| "grad_norm": 0.3217530846595764, |
| "learning_rate": 3.96694214876033e-06, |
| "loss": 0.395, |
| "step": 1204 |
| }, |
| { |
| "epoch": 1.97, |
| "grad_norm": 0.3256390690803528, |
| "learning_rate": 3.7190082644628097e-06, |
| "loss": 0.2928, |
| "step": 1205 |
| }, |
| { |
| "epoch": 1.97, |
| "grad_norm": 0.404376357793808, |
| "learning_rate": 3.4710743801652887e-06, |
| "loss": 0.5579, |
| "step": 1206 |
| }, |
| { |
| "epoch": 1.98, |
| "grad_norm": 0.2786218822002411, |
| "learning_rate": 3.2231404958677685e-06, |
| "loss": 0.3842, |
| "step": 1207 |
| }, |
| { |
| "epoch": 1.98, |
| "grad_norm": 0.339501291513443, |
| "learning_rate": 2.975206611570248e-06, |
| "loss": 0.4061, |
| "step": 1208 |
| }, |
| { |
| "epoch": 1.98, |
| "grad_norm": 0.3386409878730774, |
| "learning_rate": 2.727272727272727e-06, |
| "loss": 0.3452, |
| "step": 1209 |
| }, |
| { |
| "epoch": 1.98, |
| "grad_norm": 0.36449265480041504, |
| "learning_rate": 2.4793388429752066e-06, |
| "loss": 0.3769, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.98, |
| "grad_norm": 0.3336932361125946, |
| "learning_rate": 2.2314049586776856e-06, |
| "loss": 0.4361, |
| "step": 1211 |
| }, |
| { |
| "epoch": 1.98, |
| "grad_norm": 0.28075236082077026, |
| "learning_rate": 1.983471074380165e-06, |
| "loss": 0.3614, |
| "step": 1212 |
| }, |
| { |
| "epoch": 1.99, |
| "grad_norm": 0.31337854266166687, |
| "learning_rate": 1.7355371900826443e-06, |
| "loss": 0.37, |
| "step": 1213 |
| }, |
| { |
| "epoch": 1.99, |
| "grad_norm": 0.3034374415874481, |
| "learning_rate": 1.487603305785124e-06, |
| "loss": 0.274, |
| "step": 1214 |
| }, |
| { |
| "epoch": 1.99, |
| "grad_norm": 0.3485061526298523, |
| "learning_rate": 1.2396694214876033e-06, |
| "loss": 0.425, |
| "step": 1215 |
| }, |
| { |
| "epoch": 1.99, |
| "grad_norm": 0.24720066785812378, |
| "learning_rate": 9.917355371900825e-07, |
| "loss": 0.307, |
| "step": 1216 |
| }, |
| { |
| "epoch": 1.99, |
| "grad_norm": 0.2727121412754059, |
| "learning_rate": 7.43801652892562e-07, |
| "loss": 0.2991, |
| "step": 1217 |
| }, |
| { |
| "epoch": 1.99, |
| "grad_norm": 0.33211690187454224, |
| "learning_rate": 4.958677685950412e-07, |
| "loss": 0.5309, |
| "step": 1218 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.328895628452301, |
| "learning_rate": 2.479338842975206e-07, |
| "loss": 0.3547, |
| "step": 1219 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.2642543315887451, |
| "learning_rate": 0.0, |
| "loss": 0.3047, |
| "step": 1220 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1220, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 610, |
| "total_flos": 2.626577866972938e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|