| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.998922800718133, |
| "eval_steps": 500, |
| "global_step": 2784, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0010771992818671453, |
| "grad_norm": 5.9528489112854, |
| "learning_rate": 3.5842293906810036e-08, |
| "loss": 0.8283, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0021543985637342907, |
| "grad_norm": 6.202611446380615, |
| "learning_rate": 7.168458781362007e-08, |
| "loss": 0.8397, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.003231597845601436, |
| "grad_norm": 6.138794898986816, |
| "learning_rate": 1.0752688172043012e-07, |
| "loss": 0.8405, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.004308797127468581, |
| "grad_norm": 6.084003448486328, |
| "learning_rate": 1.4336917562724014e-07, |
| "loss": 0.8372, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.005385996409335727, |
| "grad_norm": 6.136441707611084, |
| "learning_rate": 1.7921146953405018e-07, |
| "loss": 0.8356, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.006463195691202872, |
| "grad_norm": 6.009083271026611, |
| "learning_rate": 2.1505376344086024e-07, |
| "loss": 0.8111, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0075403949730700175, |
| "grad_norm": 5.893197059631348, |
| "learning_rate": 2.508960573476703e-07, |
| "loss": 0.7935, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.008617594254937163, |
| "grad_norm": 6.211028575897217, |
| "learning_rate": 2.867383512544803e-07, |
| "loss": 0.8436, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.00969479353680431, |
| "grad_norm": 6.155322551727295, |
| "learning_rate": 3.2258064516129035e-07, |
| "loss": 0.857, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.010771992818671455, |
| "grad_norm": 6.04060697555542, |
| "learning_rate": 3.5842293906810036e-07, |
| "loss": 0.8241, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0118491921005386, |
| "grad_norm": 5.9382829666137695, |
| "learning_rate": 3.942652329749104e-07, |
| "loss": 0.8224, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.012926391382405745, |
| "grad_norm": 5.773810386657715, |
| "learning_rate": 4.301075268817205e-07, |
| "loss": 0.8267, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.01400359066427289, |
| "grad_norm": 5.450674057006836, |
| "learning_rate": 4.6594982078853055e-07, |
| "loss": 0.7886, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.015080789946140035, |
| "grad_norm": 5.492709636688232, |
| "learning_rate": 5.017921146953406e-07, |
| "loss": 0.7966, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.01615798922800718, |
| "grad_norm": 5.319929122924805, |
| "learning_rate": 5.376344086021506e-07, |
| "loss": 0.7823, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.017235188509874325, |
| "grad_norm": 5.306995868682861, |
| "learning_rate": 5.734767025089606e-07, |
| "loss": 0.7923, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.018312387791741474, |
| "grad_norm": 4.499080657958984, |
| "learning_rate": 6.093189964157707e-07, |
| "loss": 0.7781, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.01938958707360862, |
| "grad_norm": 4.392838001251221, |
| "learning_rate": 6.451612903225807e-07, |
| "loss": 0.786, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.020466786355475764, |
| "grad_norm": 4.141458034515381, |
| "learning_rate": 6.810035842293908e-07, |
| "loss": 0.7563, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.02154398563734291, |
| "grad_norm": 4.173211574554443, |
| "learning_rate": 7.168458781362007e-07, |
| "loss": 0.7871, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.022621184919210054, |
| "grad_norm": 3.9070355892181396, |
| "learning_rate": 7.526881720430108e-07, |
| "loss": 0.7424, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.0236983842010772, |
| "grad_norm": 3.5647006034851074, |
| "learning_rate": 7.885304659498208e-07, |
| "loss": 0.7427, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.024775583482944345, |
| "grad_norm": 2.366295099258423, |
| "learning_rate": 8.243727598566309e-07, |
| "loss": 0.6975, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.02585278276481149, |
| "grad_norm": 2.3025567531585693, |
| "learning_rate": 8.60215053763441e-07, |
| "loss": 0.7618, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.026929982046678635, |
| "grad_norm": 2.1890130043029785, |
| "learning_rate": 8.96057347670251e-07, |
| "loss": 0.7025, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.02800718132854578, |
| "grad_norm": 2.134795665740967, |
| "learning_rate": 9.318996415770611e-07, |
| "loss": 0.7343, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.029084380610412925, |
| "grad_norm": 1.9905577898025513, |
| "learning_rate": 9.67741935483871e-07, |
| "loss": 0.7293, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.03016157989228007, |
| "grad_norm": 1.9130618572235107, |
| "learning_rate": 1.0035842293906811e-06, |
| "loss": 0.6949, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.03123877917414722, |
| "grad_norm": 1.8031822443008423, |
| "learning_rate": 1.039426523297491e-06, |
| "loss": 0.7427, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.03231597845601436, |
| "grad_norm": 1.4684985876083374, |
| "learning_rate": 1.0752688172043011e-06, |
| "loss": 0.7062, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.03339317773788151, |
| "grad_norm": 1.7485400438308716, |
| "learning_rate": 1.111111111111111e-06, |
| "loss": 0.6973, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.03447037701974865, |
| "grad_norm": 2.080551862716675, |
| "learning_rate": 1.1469534050179212e-06, |
| "loss": 0.667, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.0355475763016158, |
| "grad_norm": 2.273972749710083, |
| "learning_rate": 1.1827956989247313e-06, |
| "loss": 0.6813, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.03662477558348295, |
| "grad_norm": 2.2604429721832275, |
| "learning_rate": 1.2186379928315414e-06, |
| "loss": 0.6723, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.03770197486535009, |
| "grad_norm": 2.1238536834716797, |
| "learning_rate": 1.2544802867383513e-06, |
| "loss": 0.6494, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.03877917414721724, |
| "grad_norm": 2.0893118381500244, |
| "learning_rate": 1.2903225806451614e-06, |
| "loss": 0.6818, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.03985637342908438, |
| "grad_norm": 2.0013720989227295, |
| "learning_rate": 1.3261648745519715e-06, |
| "loss": 0.6687, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.04093357271095153, |
| "grad_norm": 1.5645451545715332, |
| "learning_rate": 1.3620071684587816e-06, |
| "loss": 0.6511, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.04201077199281867, |
| "grad_norm": 1.2635418176651, |
| "learning_rate": 1.3978494623655913e-06, |
| "loss": 0.6309, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.04308797127468582, |
| "grad_norm": 1.1290279626846313, |
| "learning_rate": 1.4336917562724014e-06, |
| "loss": 0.6537, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.04416517055655296, |
| "grad_norm": 0.8919850587844849, |
| "learning_rate": 1.4695340501792116e-06, |
| "loss": 0.6563, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.04524236983842011, |
| "grad_norm": 0.9924504160881042, |
| "learning_rate": 1.5053763440860217e-06, |
| "loss": 0.6477, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.04631956912028725, |
| "grad_norm": 1.03560471534729, |
| "learning_rate": 1.5412186379928318e-06, |
| "loss": 0.6306, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.0473967684021544, |
| "grad_norm": 1.0033239126205444, |
| "learning_rate": 1.5770609318996417e-06, |
| "loss": 0.5894, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.04847396768402154, |
| "grad_norm": 1.0003079175949097, |
| "learning_rate": 1.6129032258064516e-06, |
| "loss": 0.6201, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.04955116696588869, |
| "grad_norm": 0.8706744909286499, |
| "learning_rate": 1.6487455197132617e-06, |
| "loss": 0.6225, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.05062836624775584, |
| "grad_norm": 0.773290753364563, |
| "learning_rate": 1.6845878136200718e-06, |
| "loss": 0.6072, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.05170556552962298, |
| "grad_norm": 0.6847248673439026, |
| "learning_rate": 1.720430107526882e-06, |
| "loss": 0.6025, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.05278276481149013, |
| "grad_norm": 0.7232184410095215, |
| "learning_rate": 1.7562724014336918e-06, |
| "loss": 0.5965, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.05385996409335727, |
| "grad_norm": 0.6927616000175476, |
| "learning_rate": 1.792114695340502e-06, |
| "loss": 0.5922, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.05493716337522442, |
| "grad_norm": 0.637067973613739, |
| "learning_rate": 1.827956989247312e-06, |
| "loss": 0.5962, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.05601436265709156, |
| "grad_norm": 0.6449267864227295, |
| "learning_rate": 1.8637992831541222e-06, |
| "loss": 0.6068, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.05709156193895871, |
| "grad_norm": 0.6247251629829407, |
| "learning_rate": 1.8996415770609319e-06, |
| "loss": 0.5889, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.05816876122082585, |
| "grad_norm": 0.5862783789634705, |
| "learning_rate": 1.935483870967742e-06, |
| "loss": 0.6081, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.059245960502693, |
| "grad_norm": 0.6185500621795654, |
| "learning_rate": 1.9713261648745523e-06, |
| "loss": 0.6203, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.06032315978456014, |
| "grad_norm": 0.5183306336402893, |
| "learning_rate": 2.0071684587813622e-06, |
| "loss": 0.5865, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.06140035906642729, |
| "grad_norm": 0.5492181777954102, |
| "learning_rate": 2.043010752688172e-06, |
| "loss": 0.5726, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.06247755834829444, |
| "grad_norm": 0.49654823541641235, |
| "learning_rate": 2.078853046594982e-06, |
| "loss": 0.5994, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.06355475763016158, |
| "grad_norm": 0.5319607853889465, |
| "learning_rate": 2.1146953405017924e-06, |
| "loss": 0.5971, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.06463195691202872, |
| "grad_norm": 0.5596228837966919, |
| "learning_rate": 2.1505376344086023e-06, |
| "loss": 0.5693, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.06570915619389588, |
| "grad_norm": 0.478316068649292, |
| "learning_rate": 2.1863799283154126e-06, |
| "loss": 0.575, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.06678635547576302, |
| "grad_norm": 0.4188225269317627, |
| "learning_rate": 2.222222222222222e-06, |
| "loss": 0.5606, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.06786355475763016, |
| "grad_norm": 0.45341843366622925, |
| "learning_rate": 2.2580645161290324e-06, |
| "loss": 0.5432, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.0689407540394973, |
| "grad_norm": 0.46085870265960693, |
| "learning_rate": 2.2939068100358423e-06, |
| "loss": 0.5684, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.07001795332136446, |
| "grad_norm": 0.4461316466331482, |
| "learning_rate": 2.3297491039426526e-06, |
| "loss": 0.5609, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.0710951526032316, |
| "grad_norm": 0.4758760929107666, |
| "learning_rate": 2.3655913978494625e-06, |
| "loss": 0.5572, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.07217235188509874, |
| "grad_norm": 0.5010541081428528, |
| "learning_rate": 2.4014336917562724e-06, |
| "loss": 0.5853, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.0732495511669659, |
| "grad_norm": 0.4104984402656555, |
| "learning_rate": 2.4372759856630828e-06, |
| "loss": 0.5486, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.07432675044883304, |
| "grad_norm": 0.41943588852882385, |
| "learning_rate": 2.4731182795698927e-06, |
| "loss": 0.5896, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.07540394973070018, |
| "grad_norm": 0.46157947182655334, |
| "learning_rate": 2.5089605734767026e-06, |
| "loss": 0.5513, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.07648114901256732, |
| "grad_norm": 0.39336729049682617, |
| "learning_rate": 2.544802867383513e-06, |
| "loss": 0.5367, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.07755834829443448, |
| "grad_norm": 0.46617960929870605, |
| "learning_rate": 2.580645161290323e-06, |
| "loss": 0.5795, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.07863554757630162, |
| "grad_norm": 0.3717260956764221, |
| "learning_rate": 2.616487455197133e-06, |
| "loss": 0.5363, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.07971274685816876, |
| "grad_norm": 0.4486253261566162, |
| "learning_rate": 2.652329749103943e-06, |
| "loss": 0.5446, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.0807899461400359, |
| "grad_norm": 0.3776644468307495, |
| "learning_rate": 2.688172043010753e-06, |
| "loss": 0.5549, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.08186714542190306, |
| "grad_norm": 0.4512787163257599, |
| "learning_rate": 2.7240143369175633e-06, |
| "loss": 0.5519, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.0829443447037702, |
| "grad_norm": 0.44562798738479614, |
| "learning_rate": 2.7598566308243727e-06, |
| "loss": 0.5542, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.08402154398563734, |
| "grad_norm": 0.41436710953712463, |
| "learning_rate": 2.7956989247311827e-06, |
| "loss": 0.5381, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.0850987432675045, |
| "grad_norm": 0.39803346991539, |
| "learning_rate": 2.831541218637993e-06, |
| "loss": 0.5214, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.08617594254937164, |
| "grad_norm": 0.41391584277153015, |
| "learning_rate": 2.867383512544803e-06, |
| "loss": 0.5379, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.08725314183123878, |
| "grad_norm": 0.4134213924407959, |
| "learning_rate": 2.903225806451613e-06, |
| "loss": 0.5846, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.08833034111310592, |
| "grad_norm": 0.4272231459617615, |
| "learning_rate": 2.939068100358423e-06, |
| "loss": 0.5338, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.08940754039497308, |
| "grad_norm": 0.46061354875564575, |
| "learning_rate": 2.974910394265233e-06, |
| "loss": 0.5532, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.09048473967684022, |
| "grad_norm": 0.4512515664100647, |
| "learning_rate": 3.0107526881720433e-06, |
| "loss": 0.5447, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.09156193895870736, |
| "grad_norm": 0.40919914841651917, |
| "learning_rate": 3.0465949820788532e-06, |
| "loss": 0.531, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.0926391382405745, |
| "grad_norm": 0.3911254405975342, |
| "learning_rate": 3.0824372759856636e-06, |
| "loss": 0.5511, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.09371633752244166, |
| "grad_norm": 0.40862536430358887, |
| "learning_rate": 3.1182795698924735e-06, |
| "loss": 0.5295, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.0947935368043088, |
| "grad_norm": 0.352711021900177, |
| "learning_rate": 3.1541218637992834e-06, |
| "loss": 0.5334, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.09587073608617594, |
| "grad_norm": 0.3540276288986206, |
| "learning_rate": 3.1899641577060937e-06, |
| "loss": 0.5136, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.09694793536804308, |
| "grad_norm": 0.3934048116207123, |
| "learning_rate": 3.225806451612903e-06, |
| "loss": 0.5266, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.09802513464991024, |
| "grad_norm": 0.4379293620586395, |
| "learning_rate": 3.261648745519714e-06, |
| "loss": 0.5493, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.09910233393177738, |
| "grad_norm": 0.3995460271835327, |
| "learning_rate": 3.2974910394265234e-06, |
| "loss": 0.5288, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.10017953321364452, |
| "grad_norm": 0.41289108991622925, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.5408, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.10125673249551168, |
| "grad_norm": 0.34702348709106445, |
| "learning_rate": 3.3691756272401437e-06, |
| "loss": 0.528, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.10233393177737882, |
| "grad_norm": 0.3608455955982208, |
| "learning_rate": 3.4050179211469536e-06, |
| "loss": 0.4823, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.10341113105924596, |
| "grad_norm": 0.4204634428024292, |
| "learning_rate": 3.440860215053764e-06, |
| "loss": 0.565, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.1044883303411131, |
| "grad_norm": 0.3752126395702362, |
| "learning_rate": 3.4767025089605738e-06, |
| "loss": 0.5236, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.10556552962298026, |
| "grad_norm": 0.4099592864513397, |
| "learning_rate": 3.5125448028673837e-06, |
| "loss": 0.5262, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.1066427289048474, |
| "grad_norm": 0.3806130886077881, |
| "learning_rate": 3.548387096774194e-06, |
| "loss": 0.5216, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.10771992818671454, |
| "grad_norm": 0.3793143928050995, |
| "learning_rate": 3.584229390681004e-06, |
| "loss": 0.541, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.10879712746858168, |
| "grad_norm": 0.35247287154197693, |
| "learning_rate": 3.620071684587814e-06, |
| "loss": 0.5112, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.10987432675044884, |
| "grad_norm": 0.39345964789390564, |
| "learning_rate": 3.655913978494624e-06, |
| "loss": 0.5175, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.11095152603231598, |
| "grad_norm": 0.3889741003513336, |
| "learning_rate": 3.691756272401434e-06, |
| "loss": 0.5331, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.11202872531418312, |
| "grad_norm": 0.38461363315582275, |
| "learning_rate": 3.7275985663082444e-06, |
| "loss": 0.5457, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.11310592459605028, |
| "grad_norm": 0.3945530354976654, |
| "learning_rate": 3.763440860215054e-06, |
| "loss": 0.4938, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.11418312387791742, |
| "grad_norm": 0.37691155076026917, |
| "learning_rate": 3.7992831541218638e-06, |
| "loss": 0.5173, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.11526032315978456, |
| "grad_norm": 0.3901672661304474, |
| "learning_rate": 3.8351254480286745e-06, |
| "loss": 0.5205, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.1163375224416517, |
| "grad_norm": 0.4444199800491333, |
| "learning_rate": 3.870967741935484e-06, |
| "loss": 0.5323, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.11741472172351886, |
| "grad_norm": 0.3674546778202057, |
| "learning_rate": 3.906810035842294e-06, |
| "loss": 0.5431, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.118491921005386, |
| "grad_norm": 0.3770754635334015, |
| "learning_rate": 3.942652329749105e-06, |
| "loss": 0.533, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.11956912028725314, |
| "grad_norm": 0.3679567277431488, |
| "learning_rate": 3.978494623655914e-06, |
| "loss": 0.4874, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.12064631956912028, |
| "grad_norm": 0.40148189663887024, |
| "learning_rate": 4.0143369175627245e-06, |
| "loss": 0.5143, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.12172351885098744, |
| "grad_norm": 0.4085795283317566, |
| "learning_rate": 4.050179211469534e-06, |
| "loss": 0.5196, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.12280071813285458, |
| "grad_norm": 0.3965912163257599, |
| "learning_rate": 4.086021505376344e-06, |
| "loss": 0.5159, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.12387791741472172, |
| "grad_norm": 0.36728113889694214, |
| "learning_rate": 4.121863799283155e-06, |
| "loss": 0.5193, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.12495511669658887, |
| "grad_norm": 0.4603145122528076, |
| "learning_rate": 4.157706093189964e-06, |
| "loss": 0.5198, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.12603231597845602, |
| "grad_norm": 0.3981267511844635, |
| "learning_rate": 4.193548387096774e-06, |
| "loss": 0.509, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.12710951526032316, |
| "grad_norm": 0.3856450915336609, |
| "learning_rate": 4.229390681003585e-06, |
| "loss": 0.5195, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.1281867145421903, |
| "grad_norm": 0.35274478793144226, |
| "learning_rate": 4.265232974910394e-06, |
| "loss": 0.5232, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.12926391382405744, |
| "grad_norm": 0.35597339272499084, |
| "learning_rate": 4.3010752688172045e-06, |
| "loss": 0.5078, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.13034111310592458, |
| "grad_norm": 0.395088791847229, |
| "learning_rate": 4.336917562724015e-06, |
| "loss": 0.5084, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.13141831238779175, |
| "grad_norm": 0.4228588044643402, |
| "learning_rate": 4.372759856630825e-06, |
| "loss": 0.503, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.1324955116696589, |
| "grad_norm": 0.4004088342189789, |
| "learning_rate": 4.408602150537635e-06, |
| "loss": 0.4904, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.13357271095152604, |
| "grad_norm": 0.4043223559856415, |
| "learning_rate": 4.444444444444444e-06, |
| "loss": 0.5073, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.13464991023339318, |
| "grad_norm": 0.41619932651519775, |
| "learning_rate": 4.480286738351255e-06, |
| "loss": 0.5268, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.13572710951526032, |
| "grad_norm": 0.4488072395324707, |
| "learning_rate": 4.516129032258065e-06, |
| "loss": 0.4877, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.13680430879712746, |
| "grad_norm": 0.4372597932815552, |
| "learning_rate": 4.551971326164875e-06, |
| "loss": 0.5501, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.1378815080789946, |
| "grad_norm": 0.47266843914985657, |
| "learning_rate": 4.587813620071685e-06, |
| "loss": 0.5074, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.13895870736086177, |
| "grad_norm": 0.38537442684173584, |
| "learning_rate": 4.623655913978495e-06, |
| "loss": 0.5266, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.1400359066427289, |
| "grad_norm": 0.37685397267341614, |
| "learning_rate": 4.659498207885305e-06, |
| "loss": 0.515, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.14111310592459606, |
| "grad_norm": 0.40007224678993225, |
| "learning_rate": 4.695340501792115e-06, |
| "loss": 0.5, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.1421903052064632, |
| "grad_norm": 0.4487532675266266, |
| "learning_rate": 4.731182795698925e-06, |
| "loss": 0.4944, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.14326750448833034, |
| "grad_norm": 0.37208762764930725, |
| "learning_rate": 4.767025089605735e-06, |
| "loss": 0.4854, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.14434470377019748, |
| "grad_norm": 0.36964190006256104, |
| "learning_rate": 4.802867383512545e-06, |
| "loss": 0.528, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.14542190305206462, |
| "grad_norm": 0.442440927028656, |
| "learning_rate": 4.838709677419355e-06, |
| "loss": 0.5021, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.1464991023339318, |
| "grad_norm": 0.4261108338832855, |
| "learning_rate": 4.8745519713261655e-06, |
| "loss": 0.5145, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.14757630161579893, |
| "grad_norm": 0.37978559732437134, |
| "learning_rate": 4.910394265232976e-06, |
| "loss": 0.5031, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.14865350089766607, |
| "grad_norm": 0.3788897693157196, |
| "learning_rate": 4.946236559139785e-06, |
| "loss": 0.5209, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.14973070017953322, |
| "grad_norm": 0.4541829228401184, |
| "learning_rate": 4.982078853046595e-06, |
| "loss": 0.5169, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.15080789946140036, |
| "grad_norm": 0.4718886911869049, |
| "learning_rate": 5.017921146953405e-06, |
| "loss": 0.5229, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.1518850987432675, |
| "grad_norm": 0.3844756484031677, |
| "learning_rate": 5.0537634408602155e-06, |
| "loss": 0.4953, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.15296229802513464, |
| "grad_norm": 0.4022759199142456, |
| "learning_rate": 5.089605734767026e-06, |
| "loss": 0.5184, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.15403949730700178, |
| "grad_norm": 0.47823449969291687, |
| "learning_rate": 5.125448028673835e-06, |
| "loss": 0.5207, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.15511669658886895, |
| "grad_norm": 0.38162872195243835, |
| "learning_rate": 5.161290322580646e-06, |
| "loss": 0.5119, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.1561938958707361, |
| "grad_norm": 0.4117828607559204, |
| "learning_rate": 5.197132616487456e-06, |
| "loss": 0.4929, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.15727109515260324, |
| "grad_norm": 0.4006747305393219, |
| "learning_rate": 5.232974910394266e-06, |
| "loss": 0.5193, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.15834829443447038, |
| "grad_norm": 0.40755030512809753, |
| "learning_rate": 5.268817204301076e-06, |
| "loss": 0.488, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.15942549371633752, |
| "grad_norm": 0.3836679458618164, |
| "learning_rate": 5.304659498207886e-06, |
| "loss": 0.491, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.16050269299820466, |
| "grad_norm": 0.3859885036945343, |
| "learning_rate": 5.340501792114696e-06, |
| "loss": 0.5061, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.1615798922800718, |
| "grad_norm": 0.41005122661590576, |
| "learning_rate": 5.376344086021506e-06, |
| "loss": 0.4995, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.16265709156193897, |
| "grad_norm": 0.40823987126350403, |
| "learning_rate": 5.412186379928316e-06, |
| "loss": 0.5112, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.1637342908438061, |
| "grad_norm": 0.4053882956504822, |
| "learning_rate": 5.4480286738351265e-06, |
| "loss": 0.5142, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.16481149012567325, |
| "grad_norm": 0.4068973958492279, |
| "learning_rate": 5.483870967741935e-06, |
| "loss": 0.5034, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.1658886894075404, |
| "grad_norm": 0.36175912618637085, |
| "learning_rate": 5.5197132616487455e-06, |
| "loss": 0.494, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.16696588868940754, |
| "grad_norm": 0.41930872201919556, |
| "learning_rate": 5.555555555555557e-06, |
| "loss": 0.5213, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.16804308797127468, |
| "grad_norm": 0.38556304574012756, |
| "learning_rate": 5.591397849462365e-06, |
| "loss": 0.4963, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.16912028725314182, |
| "grad_norm": 0.40004435181617737, |
| "learning_rate": 5.627240143369176e-06, |
| "loss": 0.4789, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.170197486535009, |
| "grad_norm": 0.3727085292339325, |
| "learning_rate": 5.663082437275986e-06, |
| "loss": 0.5245, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.17127468581687613, |
| "grad_norm": 0.37608808279037476, |
| "learning_rate": 5.698924731182796e-06, |
| "loss": 0.5005, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.17235188509874327, |
| "grad_norm": 0.4182046353816986, |
| "learning_rate": 5.734767025089606e-06, |
| "loss": 0.4921, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.17342908438061042, |
| "grad_norm": 0.37144991755485535, |
| "learning_rate": 5.770609318996416e-06, |
| "loss": 0.4845, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.17450628366247756, |
| "grad_norm": 0.4116148054599762, |
| "learning_rate": 5.806451612903226e-06, |
| "loss": 0.4995, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.1755834829443447, |
| "grad_norm": 0.37438467144966125, |
| "learning_rate": 5.842293906810036e-06, |
| "loss": 0.4694, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.17666068222621184, |
| "grad_norm": 0.4422660768032074, |
| "learning_rate": 5.878136200716846e-06, |
| "loss": 0.4916, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.17773788150807898, |
| "grad_norm": 0.403758704662323, |
| "learning_rate": 5.9139784946236566e-06, |
| "loss": 0.5126, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.17881508078994615, |
| "grad_norm": 0.4406896233558655, |
| "learning_rate": 5.949820788530466e-06, |
| "loss": 0.4705, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.1798922800718133, |
| "grad_norm": 0.4464195966720581, |
| "learning_rate": 5.985663082437276e-06, |
| "loss": 0.5287, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.18096947935368043, |
| "grad_norm": 0.511547327041626, |
| "learning_rate": 6.021505376344087e-06, |
| "loss": 0.5037, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.18204667863554758, |
| "grad_norm": 0.4360441267490387, |
| "learning_rate": 6.057347670250897e-06, |
| "loss": 0.5254, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.18312387791741472, |
| "grad_norm": 0.44195273518562317, |
| "learning_rate": 6.0931899641577065e-06, |
| "loss": 0.5034, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.18420107719928186, |
| "grad_norm": 0.4096381664276123, |
| "learning_rate": 6.129032258064517e-06, |
| "loss": 0.498, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.185278276481149, |
| "grad_norm": 0.5594648718833923, |
| "learning_rate": 6.164874551971327e-06, |
| "loss": 0.4857, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.18635547576301617, |
| "grad_norm": 0.4190889298915863, |
| "learning_rate": 6.200716845878137e-06, |
| "loss": 0.5091, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.1874326750448833, |
| "grad_norm": 0.5348290801048279, |
| "learning_rate": 6.236559139784947e-06, |
| "loss": 0.528, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.18850987432675045, |
| "grad_norm": 0.5012261867523193, |
| "learning_rate": 6.272401433691757e-06, |
| "loss": 0.4871, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.1895870736086176, |
| "grad_norm": 0.45305487513542175, |
| "learning_rate": 6.308243727598567e-06, |
| "loss": 0.4961, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.19066427289048474, |
| "grad_norm": 0.47502607107162476, |
| "learning_rate": 6.344086021505377e-06, |
| "loss": 0.5079, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.19174147217235188, |
| "grad_norm": 0.4623589813709259, |
| "learning_rate": 6.379928315412187e-06, |
| "loss": 0.494, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.19281867145421902, |
| "grad_norm": 0.3726208209991455, |
| "learning_rate": 6.415770609318996e-06, |
| "loss": 0.4845, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.19389587073608616, |
| "grad_norm": 0.49059590697288513, |
| "learning_rate": 6.451612903225806e-06, |
| "loss": 0.5111, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.19497307001795333, |
| "grad_norm": 0.4289363920688629, |
| "learning_rate": 6.4874551971326176e-06, |
| "loss": 0.4782, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.19605026929982047, |
| "grad_norm": 0.4204278886318207, |
| "learning_rate": 6.523297491039428e-06, |
| "loss": 0.4831, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.19712746858168761, |
| "grad_norm": 0.450005441904068, |
| "learning_rate": 6.5591397849462365e-06, |
| "loss": 0.4832, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.19820466786355476, |
| "grad_norm": 0.44153162837028503, |
| "learning_rate": 6.594982078853047e-06, |
| "loss": 0.5056, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.1992818671454219, |
| "grad_norm": 0.4572013318538666, |
| "learning_rate": 6.630824372759857e-06, |
| "loss": 0.4802, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.20035906642728904, |
| "grad_norm": 0.42412129044532776, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 0.4946, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.20143626570915618, |
| "grad_norm": 0.4811611473560333, |
| "learning_rate": 6.702508960573477e-06, |
| "loss": 0.4898, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.20251346499102335, |
| "grad_norm": 0.47502169013023376, |
| "learning_rate": 6.738351254480287e-06, |
| "loss": 0.5057, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.2035906642728905, |
| "grad_norm": 0.5099421143531799, |
| "learning_rate": 6.774193548387097e-06, |
| "loss": 0.4911, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.20466786355475763, |
| "grad_norm": 0.5116240382194519, |
| "learning_rate": 6.810035842293907e-06, |
| "loss": 0.4898, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.20574506283662478, |
| "grad_norm": 0.44598788022994995, |
| "learning_rate": 6.8458781362007174e-06, |
| "loss": 0.507, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.20682226211849192, |
| "grad_norm": 0.45186325907707214, |
| "learning_rate": 6.881720430107528e-06, |
| "loss": 0.4797, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.20789946140035906, |
| "grad_norm": 0.5284622311592102, |
| "learning_rate": 6.917562724014337e-06, |
| "loss": 0.4945, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.2089766606822262, |
| "grad_norm": 0.39712581038475037, |
| "learning_rate": 6.9534050179211476e-06, |
| "loss": 0.5008, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.21005385996409337, |
| "grad_norm": 0.44355469942092896, |
| "learning_rate": 6.989247311827958e-06, |
| "loss": 0.4826, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.2111310592459605, |
| "grad_norm": 0.40434572100639343, |
| "learning_rate": 7.025089605734767e-06, |
| "loss": 0.468, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.21220825852782765, |
| "grad_norm": 0.43578436970710754, |
| "learning_rate": 7.060931899641578e-06, |
| "loss": 0.4916, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.2132854578096948, |
| "grad_norm": 0.4185442626476288, |
| "learning_rate": 7.096774193548388e-06, |
| "loss": 0.498, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.21436265709156194, |
| "grad_norm": 0.41103053092956543, |
| "learning_rate": 7.1326164874551975e-06, |
| "loss": 0.4768, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.21543985637342908, |
| "grad_norm": 0.4058842062950134, |
| "learning_rate": 7.168458781362008e-06, |
| "loss": 0.4918, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.21651705565529622, |
| "grad_norm": 0.3815259039402008, |
| "learning_rate": 7.204301075268818e-06, |
| "loss": 0.5033, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.21759425493716336, |
| "grad_norm": 0.4531411826610565, |
| "learning_rate": 7.240143369175628e-06, |
| "loss": 0.4867, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.21867145421903053, |
| "grad_norm": 0.3983916640281677, |
| "learning_rate": 7.275985663082438e-06, |
| "loss": 0.467, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.21974865350089767, |
| "grad_norm": 0.4266183078289032, |
| "learning_rate": 7.311827956989248e-06, |
| "loss": 0.4977, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.22082585278276481, |
| "grad_norm": 0.4078884422779083, |
| "learning_rate": 7.347670250896059e-06, |
| "loss": 0.4827, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.22190305206463196, |
| "grad_norm": 0.403360515832901, |
| "learning_rate": 7.383512544802868e-06, |
| "loss": 0.4696, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.2229802513464991, |
| "grad_norm": 0.46340030431747437, |
| "learning_rate": 7.4193548387096784e-06, |
| "loss": 0.503, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.22405745062836624, |
| "grad_norm": 0.431130975484848, |
| "learning_rate": 7.455197132616489e-06, |
| "loss": 0.524, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.22513464991023338, |
| "grad_norm": 0.42561987042427063, |
| "learning_rate": 7.491039426523297e-06, |
| "loss": 0.5082, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.22621184919210055, |
| "grad_norm": 0.42905059456825256, |
| "learning_rate": 7.526881720430108e-06, |
| "loss": 0.4712, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.2272890484739677, |
| "grad_norm": 0.4395630359649658, |
| "learning_rate": 7.562724014336919e-06, |
| "loss": 0.4763, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.22836624775583483, |
| "grad_norm": 0.38788971304893494, |
| "learning_rate": 7.5985663082437275e-06, |
| "loss": 0.4783, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.22944344703770198, |
| "grad_norm": 0.4753347635269165, |
| "learning_rate": 7.634408602150538e-06, |
| "loss": 0.4787, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.23052064631956912, |
| "grad_norm": 0.45695263147354126, |
| "learning_rate": 7.670250896057349e-06, |
| "loss": 0.494, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.23159784560143626, |
| "grad_norm": 0.43443480134010315, |
| "learning_rate": 7.706093189964159e-06, |
| "loss": 0.4722, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.2326750448833034, |
| "grad_norm": 0.4508543610572815, |
| "learning_rate": 7.741935483870968e-06, |
| "loss": 0.4808, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.23375224416517057, |
| "grad_norm": 0.4751185178756714, |
| "learning_rate": 7.77777777777778e-06, |
| "loss": 0.4708, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.2348294434470377, |
| "grad_norm": 0.3885140120983124, |
| "learning_rate": 7.813620071684589e-06, |
| "loss": 0.481, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.23590664272890485, |
| "grad_norm": 0.4592781662940979, |
| "learning_rate": 7.849462365591398e-06, |
| "loss": 0.4857, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.236983842010772, |
| "grad_norm": 0.49362388253211975, |
| "learning_rate": 7.88530465949821e-06, |
| "loss": 0.4959, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.23806104129263914, |
| "grad_norm": 0.41868501901626587, |
| "learning_rate": 7.921146953405019e-06, |
| "loss": 0.499, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.23913824057450628, |
| "grad_norm": 0.4312754273414612, |
| "learning_rate": 7.956989247311828e-06, |
| "loss": 0.4841, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.24021543985637342, |
| "grad_norm": 0.4530230164527893, |
| "learning_rate": 7.992831541218638e-06, |
| "loss": 0.5207, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.24129263913824056, |
| "grad_norm": 0.38147202134132385, |
| "learning_rate": 8.028673835125449e-06, |
| "loss": 0.4681, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.24236983842010773, |
| "grad_norm": 0.4324093759059906, |
| "learning_rate": 8.064516129032258e-06, |
| "loss": 0.4878, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.24344703770197487, |
| "grad_norm": 0.46749916672706604, |
| "learning_rate": 8.100358422939068e-06, |
| "loss": 0.4845, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.244524236983842, |
| "grad_norm": 0.4217725694179535, |
| "learning_rate": 8.136200716845879e-06, |
| "loss": 0.4637, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.24560143626570916, |
| "grad_norm": 0.4384757876396179, |
| "learning_rate": 8.172043010752689e-06, |
| "loss": 0.4792, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.2466786355475763, |
| "grad_norm": 0.44224825501441956, |
| "learning_rate": 8.207885304659498e-06, |
| "loss": 0.4829, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.24775583482944344, |
| "grad_norm": 0.4307554066181183, |
| "learning_rate": 8.24372759856631e-06, |
| "loss": 0.4907, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.24883303411131058, |
| "grad_norm": 0.42513298988342285, |
| "learning_rate": 8.279569892473119e-06, |
| "loss": 0.4889, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.24991023339317775, |
| "grad_norm": 0.4699139893054962, |
| "learning_rate": 8.315412186379928e-06, |
| "loss": 0.4831, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.25098743267504486, |
| "grad_norm": 0.4631340205669403, |
| "learning_rate": 8.35125448028674e-06, |
| "loss": 0.4589, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.25206463195691203, |
| "grad_norm": 0.4672948122024536, |
| "learning_rate": 8.387096774193549e-06, |
| "loss": 0.4882, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.25314183123877915, |
| "grad_norm": 0.4274214804172516, |
| "learning_rate": 8.422939068100358e-06, |
| "loss": 0.4877, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.2542190305206463, |
| "grad_norm": 0.5479405522346497, |
| "learning_rate": 8.45878136200717e-06, |
| "loss": 0.4791, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.2552962298025135, |
| "grad_norm": 0.41502436995506287, |
| "learning_rate": 8.494623655913979e-06, |
| "loss": 0.4607, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.2563734290843806, |
| "grad_norm": 0.44688135385513306, |
| "learning_rate": 8.530465949820788e-06, |
| "loss": 0.4277, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.25745062836624777, |
| "grad_norm": 0.48682472109794617, |
| "learning_rate": 8.5663082437276e-06, |
| "loss": 0.4692, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.2585278276481149, |
| "grad_norm": 0.5049644708633423, |
| "learning_rate": 8.602150537634409e-06, |
| "loss": 0.4812, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.25960502692998205, |
| "grad_norm": 0.44088059663772583, |
| "learning_rate": 8.63799283154122e-06, |
| "loss": 0.4895, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.26068222621184917, |
| "grad_norm": 0.5026620030403137, |
| "learning_rate": 8.67383512544803e-06, |
| "loss": 0.4683, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.26175942549371634, |
| "grad_norm": 0.41200295090675354, |
| "learning_rate": 8.70967741935484e-06, |
| "loss": 0.4672, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.2628366247755835, |
| "grad_norm": 0.4825300872325897, |
| "learning_rate": 8.74551971326165e-06, |
| "loss": 0.4735, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.2639138240574506, |
| "grad_norm": 0.45688295364379883, |
| "learning_rate": 8.78136200716846e-06, |
| "loss": 0.4809, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.2649910233393178, |
| "grad_norm": 0.4559200704097748, |
| "learning_rate": 8.81720430107527e-06, |
| "loss": 0.4656, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.2660682226211849, |
| "grad_norm": 0.49576395750045776, |
| "learning_rate": 8.85304659498208e-06, |
| "loss": 0.4585, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.26714542190305207, |
| "grad_norm": 0.5066803693771362, |
| "learning_rate": 8.888888888888888e-06, |
| "loss": 0.4776, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.2682226211849192, |
| "grad_norm": 0.45653069019317627, |
| "learning_rate": 8.9247311827957e-06, |
| "loss": 0.4718, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.26929982046678635, |
| "grad_norm": 0.5092087984085083, |
| "learning_rate": 8.96057347670251e-06, |
| "loss": 0.4792, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.2703770197486535, |
| "grad_norm": 0.45919767022132874, |
| "learning_rate": 8.99641577060932e-06, |
| "loss": 0.4899, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.27145421903052064, |
| "grad_norm": 0.4007905125617981, |
| "learning_rate": 9.03225806451613e-06, |
| "loss": 0.4885, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.2725314183123878, |
| "grad_norm": 0.43509814143180847, |
| "learning_rate": 9.068100358422939e-06, |
| "loss": 0.488, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.2736086175942549, |
| "grad_norm": 0.4242767095565796, |
| "learning_rate": 9.10394265232975e-06, |
| "loss": 0.4712, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.2746858168761221, |
| "grad_norm": 0.41952240467071533, |
| "learning_rate": 9.13978494623656e-06, |
| "loss": 0.4879, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.2757630161579892, |
| "grad_norm": 0.4186972677707672, |
| "learning_rate": 9.17562724014337e-06, |
| "loss": 0.4629, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.2768402154398564, |
| "grad_norm": 0.49089375138282776, |
| "learning_rate": 9.21146953405018e-06, |
| "loss": 0.4804, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.27791741472172354, |
| "grad_norm": 0.43804821372032166, |
| "learning_rate": 9.24731182795699e-06, |
| "loss": 0.4691, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.27899461400359066, |
| "grad_norm": 0.4435538053512573, |
| "learning_rate": 9.2831541218638e-06, |
| "loss": 0.459, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.2800718132854578, |
| "grad_norm": 0.47574469447135925, |
| "learning_rate": 9.31899641577061e-06, |
| "loss": 0.4785, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.28114901256732494, |
| "grad_norm": 0.41517260670661926, |
| "learning_rate": 9.35483870967742e-06, |
| "loss": 0.5019, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.2822262118491921, |
| "grad_norm": 0.4552224278450012, |
| "learning_rate": 9.39068100358423e-06, |
| "loss": 0.4993, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.2833034111310592, |
| "grad_norm": 0.4515886902809143, |
| "learning_rate": 9.42652329749104e-06, |
| "loss": 0.4738, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.2843806104129264, |
| "grad_norm": 0.47657546401023865, |
| "learning_rate": 9.46236559139785e-06, |
| "loss": 0.4634, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.28545780969479356, |
| "grad_norm": 0.45850643515586853, |
| "learning_rate": 9.49820788530466e-06, |
| "loss": 0.4745, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.2865350089766607, |
| "grad_norm": 0.469215989112854, |
| "learning_rate": 9.53405017921147e-06, |
| "loss": 0.4602, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.28761220825852785, |
| "grad_norm": 0.4392701983451843, |
| "learning_rate": 9.56989247311828e-06, |
| "loss": 0.4716, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.28868940754039496, |
| "grad_norm": 0.40122461318969727, |
| "learning_rate": 9.60573476702509e-06, |
| "loss": 0.4639, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.28976660682226213, |
| "grad_norm": 0.4862971603870392, |
| "learning_rate": 9.641577060931901e-06, |
| "loss": 0.4915, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.29084380610412924, |
| "grad_norm": 0.4537064731121063, |
| "learning_rate": 9.67741935483871e-06, |
| "loss": 0.4882, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.2919210053859964, |
| "grad_norm": 0.478216290473938, |
| "learning_rate": 9.71326164874552e-06, |
| "loss": 0.4644, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.2929982046678636, |
| "grad_norm": 0.5602977871894836, |
| "learning_rate": 9.749103942652331e-06, |
| "loss": 0.4738, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.2940754039497307, |
| "grad_norm": 0.5121861100196838, |
| "learning_rate": 9.78494623655914e-06, |
| "loss": 0.4917, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.29515260323159787, |
| "grad_norm": 0.5532698631286621, |
| "learning_rate": 9.820788530465952e-06, |
| "loss": 0.4485, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.296229802513465, |
| "grad_norm": 0.49017244577407837, |
| "learning_rate": 9.856630824372761e-06, |
| "loss": 0.4491, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.29730700179533215, |
| "grad_norm": 0.5022942423820496, |
| "learning_rate": 9.89247311827957e-06, |
| "loss": 0.4817, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.29838420107719926, |
| "grad_norm": 0.4923330545425415, |
| "learning_rate": 9.928315412186382e-06, |
| "loss": 0.4763, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.29946140035906643, |
| "grad_norm": 0.46441808342933655, |
| "learning_rate": 9.96415770609319e-06, |
| "loss": 0.481, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.30053859964093355, |
| "grad_norm": 0.49813228845596313, |
| "learning_rate": 1e-05, |
| "loss": 0.4779, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.3016157989228007, |
| "grad_norm": 0.533608078956604, |
| "learning_rate": 9.999996067902875e-06, |
| "loss": 0.4802, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.3026929982046679, |
| "grad_norm": 0.5138203501701355, |
| "learning_rate": 9.999984271617681e-06, |
| "loss": 0.4656, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.303770197486535, |
| "grad_norm": 0.45372065901756287, |
| "learning_rate": 9.999964611162973e-06, |
| "loss": 0.4577, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.30484739676840217, |
| "grad_norm": 0.5210473537445068, |
| "learning_rate": 9.999937086569674e-06, |
| "loss": 0.4922, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.3059245960502693, |
| "grad_norm": 0.49455979466438293, |
| "learning_rate": 9.999901697881075e-06, |
| "loss": 0.4849, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.30700179533213645, |
| "grad_norm": 0.4365752935409546, |
| "learning_rate": 9.999858445152838e-06, |
| "loss": 0.4809, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.30807899461400357, |
| "grad_norm": 0.559883177280426, |
| "learning_rate": 9.999807328452991e-06, |
| "loss": 0.4971, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.30915619389587073, |
| "grad_norm": 0.45351046323776245, |
| "learning_rate": 9.999748347861935e-06, |
| "loss": 0.4844, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.3102333931777379, |
| "grad_norm": 0.5587484240531921, |
| "learning_rate": 9.999681503472433e-06, |
| "loss": 0.4572, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.311310592459605, |
| "grad_norm": 0.46588122844696045, |
| "learning_rate": 9.999606795389623e-06, |
| "loss": 0.4648, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.3123877917414722, |
| "grad_norm": 0.4443756639957428, |
| "learning_rate": 9.999524223731009e-06, |
| "loss": 0.4749, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.3134649910233393, |
| "grad_norm": 0.48129507899284363, |
| "learning_rate": 9.999433788626461e-06, |
| "loss": 0.4772, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.31454219030520647, |
| "grad_norm": 0.42726975679397583, |
| "learning_rate": 9.999335490218221e-06, |
| "loss": 0.4763, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.3156193895870736, |
| "grad_norm": 0.4765612781047821, |
| "learning_rate": 9.999229328660896e-06, |
| "loss": 0.4881, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.31669658886894075, |
| "grad_norm": 0.40721437335014343, |
| "learning_rate": 9.999115304121459e-06, |
| "loss": 0.4688, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.3177737881508079, |
| "grad_norm": 0.4598512649536133, |
| "learning_rate": 9.998993416779254e-06, |
| "loss": 0.4472, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.31885098743267504, |
| "grad_norm": 0.46925970911979675, |
| "learning_rate": 9.99886366682599e-06, |
| "loss": 0.4569, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.3199281867145422, |
| "grad_norm": 0.4148475229740143, |
| "learning_rate": 9.998726054465746e-06, |
| "loss": 0.4615, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.3210053859964093, |
| "grad_norm": 0.4196060299873352, |
| "learning_rate": 9.998580579914958e-06, |
| "loss": 0.4578, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.3220825852782765, |
| "grad_norm": 0.4774996042251587, |
| "learning_rate": 9.998427243402437e-06, |
| "loss": 0.4582, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.3231597845601436, |
| "grad_norm": 0.47536271810531616, |
| "learning_rate": 9.998266045169356e-06, |
| "loss": 0.4774, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.3242369838420108, |
| "grad_norm": 0.46730348467826843, |
| "learning_rate": 9.998096985469255e-06, |
| "loss": 0.4848, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.32531418312387794, |
| "grad_norm": 0.46201860904693604, |
| "learning_rate": 9.997920064568037e-06, |
| "loss": 0.471, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.32639138240574506, |
| "grad_norm": 0.46686047315597534, |
| "learning_rate": 9.99773528274397e-06, |
| "loss": 0.4181, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.3274685816876122, |
| "grad_norm": 0.41051632165908813, |
| "learning_rate": 9.997542640287686e-06, |
| "loss": 0.4339, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.32854578096947934, |
| "grad_norm": 0.49075937271118164, |
| "learning_rate": 9.997342137502182e-06, |
| "loss": 0.4619, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.3296229802513465, |
| "grad_norm": 0.43930885195732117, |
| "learning_rate": 9.997133774702813e-06, |
| "loss": 0.4411, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.3307001795332136, |
| "grad_norm": 0.46991202235221863, |
| "learning_rate": 9.996917552217303e-06, |
| "loss": 0.4657, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.3317773788150808, |
| "grad_norm": 0.3972565829753876, |
| "learning_rate": 9.996693470385735e-06, |
| "loss": 0.4495, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.33285457809694796, |
| "grad_norm": 0.4870466887950897, |
| "learning_rate": 9.996461529560553e-06, |
| "loss": 0.482, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.3339317773788151, |
| "grad_norm": 0.4203915297985077, |
| "learning_rate": 9.996221730106561e-06, |
| "loss": 0.4539, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.33500897666068225, |
| "grad_norm": 0.3791579604148865, |
| "learning_rate": 9.995974072400928e-06, |
| "loss": 0.4676, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.33608617594254936, |
| "grad_norm": 0.4781574308872223, |
| "learning_rate": 9.995718556833179e-06, |
| "loss": 0.4452, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.33716337522441653, |
| "grad_norm": 0.4126666486263275, |
| "learning_rate": 9.995455183805195e-06, |
| "loss": 0.4653, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.33824057450628364, |
| "grad_norm": 0.4394056797027588, |
| "learning_rate": 9.995183953731225e-06, |
| "loss": 0.4594, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.3393177737881508, |
| "grad_norm": 0.4282722473144531, |
| "learning_rate": 9.994904867037867e-06, |
| "loss": 0.4553, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.340394973070018, |
| "grad_norm": 0.41215622425079346, |
| "learning_rate": 9.99461792416408e-06, |
| "loss": 0.4687, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.3414721723518851, |
| "grad_norm": 0.44896167516708374, |
| "learning_rate": 9.994323125561179e-06, |
| "loss": 0.4601, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.34254937163375226, |
| "grad_norm": 0.40661004185676575, |
| "learning_rate": 9.994020471692832e-06, |
| "loss": 0.4469, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.3436265709156194, |
| "grad_norm": 0.48509952425956726, |
| "learning_rate": 9.99370996303507e-06, |
| "loss": 0.4431, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.34470377019748655, |
| "grad_norm": 0.46913495659828186, |
| "learning_rate": 9.993391600076268e-06, |
| "loss": 0.4676, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.34578096947935366, |
| "grad_norm": 0.4532509446144104, |
| "learning_rate": 9.993065383317164e-06, |
| "loss": 0.4637, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.34685816876122083, |
| "grad_norm": 0.4724155068397522, |
| "learning_rate": 9.992731313270841e-06, |
| "loss": 0.4771, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.34793536804308794, |
| "grad_norm": 0.42328765988349915, |
| "learning_rate": 9.99238939046274e-06, |
| "loss": 0.45, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.3490125673249551, |
| "grad_norm": 0.5050341486930847, |
| "learning_rate": 9.992039615430648e-06, |
| "loss": 0.4599, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.3500897666068223, |
| "grad_norm": 0.4233899712562561, |
| "learning_rate": 9.991681988724706e-06, |
| "loss": 0.4778, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.3511669658886894, |
| "grad_norm": 0.5298818945884705, |
| "learning_rate": 9.991316510907403e-06, |
| "loss": 0.4604, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.35224416517055657, |
| "grad_norm": 0.43540188670158386, |
| "learning_rate": 9.990943182553578e-06, |
| "loss": 0.4701, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.3533213644524237, |
| "grad_norm": 0.47227743268013, |
| "learning_rate": 9.990562004250415e-06, |
| "loss": 0.4545, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.35439856373429085, |
| "grad_norm": 0.5041068196296692, |
| "learning_rate": 9.990172976597446e-06, |
| "loss": 0.493, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.35547576301615796, |
| "grad_norm": 0.43627163767814636, |
| "learning_rate": 9.989776100206547e-06, |
| "loss": 0.4389, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.35655296229802513, |
| "grad_norm": 0.4468795359134674, |
| "learning_rate": 9.989371375701943e-06, |
| "loss": 0.468, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.3576301615798923, |
| "grad_norm": 0.4504821300506592, |
| "learning_rate": 9.988958803720203e-06, |
| "loss": 0.4567, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.3587073608617594, |
| "grad_norm": 0.4463129937648773, |
| "learning_rate": 9.988538384910231e-06, |
| "loss": 0.4592, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.3597845601436266, |
| "grad_norm": 0.376647025346756, |
| "learning_rate": 9.988110119933281e-06, |
| "loss": 0.444, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.3608617594254937, |
| "grad_norm": 0.49062368273735046, |
| "learning_rate": 9.987674009462943e-06, |
| "loss": 0.48, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.36193895870736087, |
| "grad_norm": 0.4553554058074951, |
| "learning_rate": 9.98723005418515e-06, |
| "loss": 0.4651, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.363016157989228, |
| "grad_norm": 0.559506893157959, |
| "learning_rate": 9.986778254798173e-06, |
| "loss": 0.4609, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.36409335727109515, |
| "grad_norm": 0.43453335762023926, |
| "learning_rate": 9.986318612012618e-06, |
| "loss": 0.461, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.3651705565529623, |
| "grad_norm": 0.44681429862976074, |
| "learning_rate": 9.985851126551428e-06, |
| "loss": 0.471, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.36624775583482944, |
| "grad_norm": 0.46860507130622864, |
| "learning_rate": 9.985375799149883e-06, |
| "loss": 0.4694, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.3673249551166966, |
| "grad_norm": 0.48733362555503845, |
| "learning_rate": 9.9848926305556e-06, |
| "loss": 0.4881, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.3684021543985637, |
| "grad_norm": 0.4342585802078247, |
| "learning_rate": 9.984401621528521e-06, |
| "loss": 0.4841, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.3694793536804309, |
| "grad_norm": 0.46184322237968445, |
| "learning_rate": 9.983902772840925e-06, |
| "loss": 0.4956, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.370556552962298, |
| "grad_norm": 0.41323983669281006, |
| "learning_rate": 9.983396085277421e-06, |
| "loss": 0.4476, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.37163375224416517, |
| "grad_norm": 0.423006147146225, |
| "learning_rate": 9.982881559634946e-06, |
| "loss": 0.4556, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.37271095152603234, |
| "grad_norm": 0.45063871145248413, |
| "learning_rate": 9.982359196722769e-06, |
| "loss": 0.4662, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.37378815080789946, |
| "grad_norm": 0.4173057973384857, |
| "learning_rate": 9.981828997362481e-06, |
| "loss": 0.4592, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.3748653500897666, |
| "grad_norm": 0.4817594587802887, |
| "learning_rate": 9.981290962387998e-06, |
| "loss": 0.4571, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.37594254937163374, |
| "grad_norm": 0.4868641197681427, |
| "learning_rate": 9.980745092645564e-06, |
| "loss": 0.4797, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.3770197486535009, |
| "grad_norm": 0.4591589868068695, |
| "learning_rate": 9.980191388993745e-06, |
| "loss": 0.466, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.378096947935368, |
| "grad_norm": 0.4101882576942444, |
| "learning_rate": 9.979629852303426e-06, |
| "loss": 0.47, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.3791741472172352, |
| "grad_norm": 0.5075291991233826, |
| "learning_rate": 9.979060483457813e-06, |
| "loss": 0.4507, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.38025134649910236, |
| "grad_norm": 0.4138559401035309, |
| "learning_rate": 9.978483283352438e-06, |
| "loss": 0.4771, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.3813285457809695, |
| "grad_norm": 0.5149195790290833, |
| "learning_rate": 9.977898252895133e-06, |
| "loss": 0.4691, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.38240574506283664, |
| "grad_norm": 0.43113377690315247, |
| "learning_rate": 9.977305393006066e-06, |
| "loss": 0.4717, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.38348294434470376, |
| "grad_norm": 0.48590296506881714, |
| "learning_rate": 9.976704704617706e-06, |
| "loss": 0.4621, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.3845601436265709, |
| "grad_norm": 0.4883146286010742, |
| "learning_rate": 9.976096188674837e-06, |
| "loss": 0.4647, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.38563734290843804, |
| "grad_norm": 0.5317089557647705, |
| "learning_rate": 9.975479846134561e-06, |
| "loss": 0.4651, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.3867145421903052, |
| "grad_norm": 0.5008561015129089, |
| "learning_rate": 9.974855677966283e-06, |
| "loss": 0.4538, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.3877917414721723, |
| "grad_norm": 0.48376259207725525, |
| "learning_rate": 9.97422368515172e-06, |
| "loss": 0.4654, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.3888689407540395, |
| "grad_norm": 0.4275338053703308, |
| "learning_rate": 9.973583868684892e-06, |
| "loss": 0.4519, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.38994614003590666, |
| "grad_norm": 0.4228547215461731, |
| "learning_rate": 9.972936229572132e-06, |
| "loss": 0.4502, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.3910233393177738, |
| "grad_norm": 0.4408591687679291, |
| "learning_rate": 9.972280768832068e-06, |
| "loss": 0.4262, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.39210053859964095, |
| "grad_norm": 0.4212534427642822, |
| "learning_rate": 9.971617487495635e-06, |
| "loss": 0.4564, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.39317773788150806, |
| "grad_norm": 0.5158016681671143, |
| "learning_rate": 9.97094638660607e-06, |
| "loss": 0.4892, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.39425493716337523, |
| "grad_norm": 0.4569847285747528, |
| "learning_rate": 9.970267467218905e-06, |
| "loss": 0.459, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.39533213644524234, |
| "grad_norm": 0.44112586975097656, |
| "learning_rate": 9.969580730401966e-06, |
| "loss": 0.4739, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.3964093357271095, |
| "grad_norm": 0.5548210740089417, |
| "learning_rate": 9.968886177235388e-06, |
| "loss": 0.4782, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.3974865350089767, |
| "grad_norm": 0.4073880910873413, |
| "learning_rate": 9.968183808811586e-06, |
| "loss": 0.4584, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.3985637342908438, |
| "grad_norm": 0.4366508722305298, |
| "learning_rate": 9.967473626235273e-06, |
| "loss": 0.4633, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.39964093357271097, |
| "grad_norm": 0.4878612160682678, |
| "learning_rate": 9.966755630623452e-06, |
| "loss": 0.4558, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.4007181328545781, |
| "grad_norm": 0.4154806435108185, |
| "learning_rate": 9.966029823105415e-06, |
| "loss": 0.4659, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.40179533213644525, |
| "grad_norm": 0.512233555316925, |
| "learning_rate": 9.965296204822741e-06, |
| "loss": 0.4756, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.40287253141831236, |
| "grad_norm": 0.4381474554538727, |
| "learning_rate": 9.964554776929289e-06, |
| "loss": 0.4753, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.40394973070017953, |
| "grad_norm": 0.5151176452636719, |
| "learning_rate": 9.96380554059121e-06, |
| "loss": 0.4581, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.4050269299820467, |
| "grad_norm": 0.5161194205284119, |
| "learning_rate": 9.963048496986933e-06, |
| "loss": 0.4902, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.4061041292639138, |
| "grad_norm": 0.44209739565849304, |
| "learning_rate": 9.962283647307162e-06, |
| "loss": 0.4777, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.407181328545781, |
| "grad_norm": 0.4685749411582947, |
| "learning_rate": 9.961510992754883e-06, |
| "loss": 0.4763, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.4082585278276481, |
| "grad_norm": 0.43372246623039246, |
| "learning_rate": 9.960730534545357e-06, |
| "loss": 0.4455, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.40933572710951527, |
| "grad_norm": 0.4805338382720947, |
| "learning_rate": 9.95994227390612e-06, |
| "loss": 0.4361, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.4104129263913824, |
| "grad_norm": 0.3987146317958832, |
| "learning_rate": 9.95914621207698e-06, |
| "loss": 0.4497, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.41149012567324955, |
| "grad_norm": 0.43490350246429443, |
| "learning_rate": 9.958342350310014e-06, |
| "loss": 0.4593, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.4125673249551167, |
| "grad_norm": 0.3805745542049408, |
| "learning_rate": 9.957530689869561e-06, |
| "loss": 0.4567, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.41364452423698383, |
| "grad_norm": 0.42765673995018005, |
| "learning_rate": 9.95671123203224e-06, |
| "loss": 0.4404, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.414721723518851, |
| "grad_norm": 0.41294559836387634, |
| "learning_rate": 9.955883978086922e-06, |
| "loss": 0.458, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.4157989228007181, |
| "grad_norm": 0.4390866756439209, |
| "learning_rate": 9.955048929334744e-06, |
| "loss": 0.4655, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.4168761220825853, |
| "grad_norm": 0.4394264817237854, |
| "learning_rate": 9.954206087089107e-06, |
| "loss": 0.4548, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.4179533213644524, |
| "grad_norm": 0.39660122990608215, |
| "learning_rate": 9.953355452675661e-06, |
| "loss": 0.4537, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.41903052064631957, |
| "grad_norm": 0.4672384262084961, |
| "learning_rate": 9.95249702743232e-06, |
| "loss": 0.4859, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.42010771992818674, |
| "grad_norm": 0.45546823740005493, |
| "learning_rate": 9.951630812709245e-06, |
| "loss": 0.4561, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.42118491921005385, |
| "grad_norm": 0.5013604760169983, |
| "learning_rate": 9.950756809868858e-06, |
| "loss": 0.4402, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.422262118491921, |
| "grad_norm": 0.43657436966896057, |
| "learning_rate": 9.94987502028582e-06, |
| "loss": 0.4837, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.42333931777378814, |
| "grad_norm": 0.5739855766296387, |
| "learning_rate": 9.948985445347045e-06, |
| "loss": 0.4878, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.4244165170556553, |
| "grad_norm": 0.4679727554321289, |
| "learning_rate": 9.948088086451692e-06, |
| "loss": 0.4489, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.4254937163375224, |
| "grad_norm": 0.5196583271026611, |
| "learning_rate": 9.94718294501116e-06, |
| "loss": 0.4685, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.4265709156193896, |
| "grad_norm": 0.5366904735565186, |
| "learning_rate": 9.946270022449093e-06, |
| "loss": 0.4555, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.42764811490125676, |
| "grad_norm": 0.40296608209609985, |
| "learning_rate": 9.94534932020137e-06, |
| "loss": 0.4488, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.4287253141831239, |
| "grad_norm": 0.46213823556900024, |
| "learning_rate": 9.944420839716106e-06, |
| "loss": 0.4534, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.42980251346499104, |
| "grad_norm": 0.5131990313529968, |
| "learning_rate": 9.943484582453653e-06, |
| "loss": 0.4459, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.43087971274685816, |
| "grad_norm": 0.4590674340724945, |
| "learning_rate": 9.942540549886592e-06, |
| "loss": 0.4607, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.4319569120287253, |
| "grad_norm": 0.4340755045413971, |
| "learning_rate": 9.941588743499734e-06, |
| "loss": 0.4537, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.43303411131059244, |
| "grad_norm": 0.4147309958934784, |
| "learning_rate": 9.94062916479012e-06, |
| "loss": 0.4692, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.4341113105924596, |
| "grad_norm": 0.42131778597831726, |
| "learning_rate": 9.939661815267008e-06, |
| "loss": 0.4442, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.4351885098743267, |
| "grad_norm": 0.4066350758075714, |
| "learning_rate": 9.938686696451884e-06, |
| "loss": 0.4673, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.4362657091561939, |
| "grad_norm": 0.42381957173347473, |
| "learning_rate": 9.937703809878455e-06, |
| "loss": 0.4596, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.43734290843806106, |
| "grad_norm": 0.4123949408531189, |
| "learning_rate": 9.936713157092641e-06, |
| "loss": 0.4555, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.4384201077199282, |
| "grad_norm": 0.45048898458480835, |
| "learning_rate": 9.93571473965258e-06, |
| "loss": 0.4704, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.43949730700179535, |
| "grad_norm": 0.35981449484825134, |
| "learning_rate": 9.934708559128624e-06, |
| "loss": 0.4316, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.44057450628366246, |
| "grad_norm": 0.4269760847091675, |
| "learning_rate": 9.933694617103328e-06, |
| "loss": 0.4453, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.44165170556552963, |
| "grad_norm": 0.43287765979766846, |
| "learning_rate": 9.932672915171461e-06, |
| "loss": 0.4604, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.44272890484739674, |
| "grad_norm": 0.4244833290576935, |
| "learning_rate": 9.93164345494e-06, |
| "loss": 0.4732, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.4438061041292639, |
| "grad_norm": 0.38087978959083557, |
| "learning_rate": 9.930606238028116e-06, |
| "loss": 0.4557, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.4448833034111311, |
| "grad_norm": 0.4723820686340332, |
| "learning_rate": 9.929561266067183e-06, |
| "loss": 0.4623, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.4459605026929982, |
| "grad_norm": 0.4074157476425171, |
| "learning_rate": 9.928508540700775e-06, |
| "loss": 0.4358, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.44703770197486536, |
| "grad_norm": 0.48810282349586487, |
| "learning_rate": 9.92744806358466e-06, |
| "loss": 0.479, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.4481149012567325, |
| "grad_norm": 0.382062166929245, |
| "learning_rate": 9.926379836386796e-06, |
| "loss": 0.4684, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.44919210053859965, |
| "grad_norm": 0.40181615948677063, |
| "learning_rate": 9.925303860787335e-06, |
| "loss": 0.4328, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.45026929982046676, |
| "grad_norm": 0.41660112142562866, |
| "learning_rate": 9.924220138478612e-06, |
| "loss": 0.4642, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.45134649910233393, |
| "grad_norm": 0.40994152426719666, |
| "learning_rate": 9.923128671165145e-06, |
| "loss": 0.441, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.4524236983842011, |
| "grad_norm": 0.46039193868637085, |
| "learning_rate": 9.92202946056364e-06, |
| "loss": 0.4558, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.4535008976660682, |
| "grad_norm": 0.40079423785209656, |
| "learning_rate": 9.920922508402975e-06, |
| "loss": 0.4417, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.4545780969479354, |
| "grad_norm": 0.4265320897102356, |
| "learning_rate": 9.91980781642421e-06, |
| "loss": 0.4279, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.4556552962298025, |
| "grad_norm": 0.4361076354980469, |
| "learning_rate": 9.918685386380575e-06, |
| "loss": 0.4563, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.45673249551166967, |
| "grad_norm": 0.39475542306900024, |
| "learning_rate": 9.917555220037469e-06, |
| "loss": 0.4403, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.4578096947935368, |
| "grad_norm": 0.403612345457077, |
| "learning_rate": 9.916417319172466e-06, |
| "loss": 0.4393, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.45888689407540395, |
| "grad_norm": 0.40121960639953613, |
| "learning_rate": 9.915271685575297e-06, |
| "loss": 0.4616, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.4599640933572711, |
| "grad_norm": 0.4377219080924988, |
| "learning_rate": 9.91411832104786e-06, |
| "loss": 0.4575, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.46104129263913823, |
| "grad_norm": 0.44596531987190247, |
| "learning_rate": 9.912957227404215e-06, |
| "loss": 0.454, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.4621184919210054, |
| "grad_norm": 0.4428754150867462, |
| "learning_rate": 9.91178840647057e-06, |
| "loss": 0.4486, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.4631956912028725, |
| "grad_norm": 0.4418964385986328, |
| "learning_rate": 9.910611860085293e-06, |
| "loss": 0.4591, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.4642728904847397, |
| "grad_norm": 0.4350813329219818, |
| "learning_rate": 9.909427590098905e-06, |
| "loss": 0.4604, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.4653500897666068, |
| "grad_norm": 0.40184399485588074, |
| "learning_rate": 9.90823559837407e-06, |
| "loss": 0.4336, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.46642728904847397, |
| "grad_norm": 0.5028787851333618, |
| "learning_rate": 9.907035886785597e-06, |
| "loss": 0.472, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.46750448833034114, |
| "grad_norm": 0.46519935131073, |
| "learning_rate": 9.905828457220442e-06, |
| "loss": 0.4588, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.46858168761220825, |
| "grad_norm": 0.3957783579826355, |
| "learning_rate": 9.904613311577696e-06, |
| "loss": 0.4605, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.4696588868940754, |
| "grad_norm": 0.5078506469726562, |
| "learning_rate": 9.903390451768587e-06, |
| "loss": 0.4516, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.47073608617594254, |
| "grad_norm": 0.4710095524787903, |
| "learning_rate": 9.902159879716475e-06, |
| "loss": 0.4706, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.4718132854578097, |
| "grad_norm": 0.40339967608451843, |
| "learning_rate": 9.900921597356856e-06, |
| "loss": 0.4658, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.4728904847396768, |
| "grad_norm": 0.4403778314590454, |
| "learning_rate": 9.899675606637344e-06, |
| "loss": 0.4394, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.473967684021544, |
| "grad_norm": 0.4328095316886902, |
| "learning_rate": 9.898421909517684e-06, |
| "loss": 0.4669, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.47504488330341116, |
| "grad_norm": 0.44412991404533386, |
| "learning_rate": 9.897160507969737e-06, |
| "loss": 0.4553, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.4761220825852783, |
| "grad_norm": 0.4219338893890381, |
| "learning_rate": 9.895891403977489e-06, |
| "loss": 0.465, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.47719928186714544, |
| "grad_norm": 0.4596662223339081, |
| "learning_rate": 9.894614599537032e-06, |
| "loss": 0.4562, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.47827648114901256, |
| "grad_norm": 0.4500572383403778, |
| "learning_rate": 9.893330096656576e-06, |
| "loss": 0.4402, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.4793536804308797, |
| "grad_norm": 0.405307799577713, |
| "learning_rate": 9.892037897356432e-06, |
| "loss": 0.4668, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.48043087971274684, |
| "grad_norm": 0.4302726686000824, |
| "learning_rate": 9.890738003669029e-06, |
| "loss": 0.4451, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.481508078994614, |
| "grad_norm": 0.4489511549472809, |
| "learning_rate": 9.889430417638886e-06, |
| "loss": 0.4393, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.4825852782764811, |
| "grad_norm": 0.5107079744338989, |
| "learning_rate": 9.888115141322625e-06, |
| "loss": 0.451, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.4836624775583483, |
| "grad_norm": 0.38805079460144043, |
| "learning_rate": 9.886792176788964e-06, |
| "loss": 0.4468, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.48473967684021546, |
| "grad_norm": 0.5426356196403503, |
| "learning_rate": 9.885461526118713e-06, |
| "loss": 0.4754, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.4858168761220826, |
| "grad_norm": 0.40643173456192017, |
| "learning_rate": 9.884123191404772e-06, |
| "loss": 0.4564, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.48689407540394974, |
| "grad_norm": 0.48633506894111633, |
| "learning_rate": 9.882777174752128e-06, |
| "loss": 0.4621, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.48797127468581686, |
| "grad_norm": 0.470723032951355, |
| "learning_rate": 9.881423478277841e-06, |
| "loss": 0.4296, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.489048473967684, |
| "grad_norm": 0.4104878008365631, |
| "learning_rate": 9.880062104111064e-06, |
| "loss": 0.44, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.49012567324955114, |
| "grad_norm": 0.4439231753349304, |
| "learning_rate": 9.878693054393017e-06, |
| "loss": 0.4728, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.4912028725314183, |
| "grad_norm": 0.44275912642478943, |
| "learning_rate": 9.877316331276995e-06, |
| "loss": 0.4614, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.4922800718132855, |
| "grad_norm": 0.4094824194908142, |
| "learning_rate": 9.87593193692836e-06, |
| "loss": 0.4441, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.4933572710951526, |
| "grad_norm": 0.3908982574939728, |
| "learning_rate": 9.874539873524545e-06, |
| "loss": 0.4362, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.49443447037701976, |
| "grad_norm": 0.47311311960220337, |
| "learning_rate": 9.873140143255035e-06, |
| "loss": 0.4788, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.4955116696588869, |
| "grad_norm": 0.4665136933326721, |
| "learning_rate": 9.871732748321388e-06, |
| "loss": 0.4477, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.49658886894075405, |
| "grad_norm": 0.4822944700717926, |
| "learning_rate": 9.870317690937204e-06, |
| "loss": 0.4447, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.49766606822262116, |
| "grad_norm": 0.4455501437187195, |
| "learning_rate": 9.86889497332814e-06, |
| "loss": 0.4439, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.49874326750448833, |
| "grad_norm": 0.545047402381897, |
| "learning_rate": 9.867464597731906e-06, |
| "loss": 0.4414, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.4998204667863555, |
| "grad_norm": 0.43890202045440674, |
| "learning_rate": 9.866026566398248e-06, |
| "loss": 0.439, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.5008976660682226, |
| "grad_norm": 0.4467841684818268, |
| "learning_rate": 9.864580881588958e-06, |
| "loss": 0.4621, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.5019748653500897, |
| "grad_norm": 0.5091913342475891, |
| "learning_rate": 9.863127545577868e-06, |
| "loss": 0.4427, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.503052064631957, |
| "grad_norm": 0.4745034873485565, |
| "learning_rate": 9.86166656065084e-06, |
| "loss": 0.4754, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.5041292639138241, |
| "grad_norm": 0.4376344680786133, |
| "learning_rate": 9.860197929105769e-06, |
| "loss": 0.4786, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.5052064631956912, |
| "grad_norm": 0.4569643437862396, |
| "learning_rate": 9.858721653252571e-06, |
| "loss": 0.4312, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.5062836624775583, |
| "grad_norm": 0.482858806848526, |
| "learning_rate": 9.857237735413194e-06, |
| "loss": 0.4506, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.5073608617594255, |
| "grad_norm": 0.4289652705192566, |
| "learning_rate": 9.855746177921602e-06, |
| "loss": 0.4607, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.5084380610412926, |
| "grad_norm": 0.4449455440044403, |
| "learning_rate": 9.854246983123771e-06, |
| "loss": 0.4436, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.5095152603231597, |
| "grad_norm": 0.4095325767993927, |
| "learning_rate": 9.852740153377698e-06, |
| "loss": 0.4474, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.510592459605027, |
| "grad_norm": 0.45156896114349365, |
| "learning_rate": 9.851225691053382e-06, |
| "loss": 0.4492, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.5116696588868941, |
| "grad_norm": 0.37197962403297424, |
| "learning_rate": 9.849703598532823e-06, |
| "loss": 0.4586, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.5127468581687612, |
| "grad_norm": 0.44566208124160767, |
| "learning_rate": 9.848173878210034e-06, |
| "loss": 0.4366, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.5138240574506283, |
| "grad_norm": 0.4585574269294739, |
| "learning_rate": 9.846636532491015e-06, |
| "loss": 0.4386, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.5149012567324955, |
| "grad_norm": 0.48970794677734375, |
| "learning_rate": 9.845091563793763e-06, |
| "loss": 0.4569, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.5159784560143627, |
| "grad_norm": 0.3991914987564087, |
| "learning_rate": 9.843538974548264e-06, |
| "loss": 0.4386, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.5170556552962298, |
| "grad_norm": 0.5011213421821594, |
| "learning_rate": 9.841978767196495e-06, |
| "loss": 0.4517, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.518132854578097, |
| "grad_norm": 0.4288571774959564, |
| "learning_rate": 9.840410944192407e-06, |
| "loss": 0.4492, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.5192100538599641, |
| "grad_norm": 0.5407549142837524, |
| "learning_rate": 9.838835508001934e-06, |
| "loss": 0.4804, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.5202872531418312, |
| "grad_norm": 0.41034746170043945, |
| "learning_rate": 9.837252461102981e-06, |
| "loss": 0.4582, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.5213644524236983, |
| "grad_norm": 0.4609270989894867, |
| "learning_rate": 9.835661805985432e-06, |
| "loss": 0.4506, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.5224416517055656, |
| "grad_norm": 0.3973561227321625, |
| "learning_rate": 9.834063545151125e-06, |
| "loss": 0.4453, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.5235188509874327, |
| "grad_norm": 0.3957566022872925, |
| "learning_rate": 9.832457681113867e-06, |
| "loss": 0.4625, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.5245960502692998, |
| "grad_norm": 0.427836537361145, |
| "learning_rate": 9.830844216399426e-06, |
| "loss": 0.4334, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.525673249551167, |
| "grad_norm": 0.39041754603385925, |
| "learning_rate": 9.829223153545522e-06, |
| "loss": 0.4597, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.5267504488330341, |
| "grad_norm": 0.37624824047088623, |
| "learning_rate": 9.827594495101824e-06, |
| "loss": 0.4366, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.5278276481149012, |
| "grad_norm": 0.47729989886283875, |
| "learning_rate": 9.825958243629951e-06, |
| "loss": 0.419, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.5289048473967684, |
| "grad_norm": 0.37915152311325073, |
| "learning_rate": 9.824314401703461e-06, |
| "loss": 0.4453, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.5299820466786356, |
| "grad_norm": 0.36883866786956787, |
| "learning_rate": 9.822662971907853e-06, |
| "loss": 0.4203, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.5310592459605027, |
| "grad_norm": 0.4049866497516632, |
| "learning_rate": 9.82100395684056e-06, |
| "loss": 0.4332, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.5321364452423698, |
| "grad_norm": 0.399457186460495, |
| "learning_rate": 9.819337359110945e-06, |
| "loss": 0.4825, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.533213644524237, |
| "grad_norm": 0.34586960077285767, |
| "learning_rate": 9.8176631813403e-06, |
| "loss": 0.4381, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.5342908438061041, |
| "grad_norm": 0.39063382148742676, |
| "learning_rate": 9.815981426161834e-06, |
| "loss": 0.4538, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.5353680430879713, |
| "grad_norm": 0.3408878445625305, |
| "learning_rate": 9.81429209622068e-06, |
| "loss": 0.4722, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.5364452423698384, |
| "grad_norm": 0.3885675370693207, |
| "learning_rate": 9.812595194173875e-06, |
| "loss": 0.4577, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.5375224416517056, |
| "grad_norm": 0.36339253187179565, |
| "learning_rate": 9.81089072269038e-06, |
| "loss": 0.4465, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.5385996409335727, |
| "grad_norm": 0.37926578521728516, |
| "learning_rate": 9.809178684451052e-06, |
| "loss": 0.4482, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5396768402154398, |
| "grad_norm": 0.370346337556839, |
| "learning_rate": 9.807459082148648e-06, |
| "loss": 0.4602, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.540754039497307, |
| "grad_norm": 0.42454493045806885, |
| "learning_rate": 9.805731918487832e-06, |
| "loss": 0.4682, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.5418312387791742, |
| "grad_norm": 0.3915000259876251, |
| "learning_rate": 9.803997196185146e-06, |
| "loss": 0.4494, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.5429084380610413, |
| "grad_norm": 0.43712353706359863, |
| "learning_rate": 9.802254917969033e-06, |
| "loss": 0.4652, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.5439856373429084, |
| "grad_norm": 0.45863014459609985, |
| "learning_rate": 9.800505086579816e-06, |
| "loss": 0.4262, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.5450628366247756, |
| "grad_norm": 0.39945051074028015, |
| "learning_rate": 9.798747704769696e-06, |
| "loss": 0.4586, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.5461400359066427, |
| "grad_norm": 0.4776397943496704, |
| "learning_rate": 9.796982775302755e-06, |
| "loss": 0.4656, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.5472172351885098, |
| "grad_norm": 0.430626779794693, |
| "learning_rate": 9.795210300954938e-06, |
| "loss": 0.457, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.5482944344703771, |
| "grad_norm": 0.4296889007091522, |
| "learning_rate": 9.793430284514063e-06, |
| "loss": 0.4268, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.5493716337522442, |
| "grad_norm": 0.4044749140739441, |
| "learning_rate": 9.79164272877981e-06, |
| "loss": 0.46, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.5504488330341113, |
| "grad_norm": 0.4012002646923065, |
| "learning_rate": 9.789847636563718e-06, |
| "loss": 0.439, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.5515260323159784, |
| "grad_norm": 0.396487295627594, |
| "learning_rate": 9.788045010689173e-06, |
| "loss": 0.442, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.5526032315978456, |
| "grad_norm": 0.3827875554561615, |
| "learning_rate": 9.786234853991419e-06, |
| "loss": 0.4548, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.5536804308797127, |
| "grad_norm": 0.42587071657180786, |
| "learning_rate": 9.78441716931754e-06, |
| "loss": 0.4319, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.5547576301615799, |
| "grad_norm": 0.4159460961818695, |
| "learning_rate": 9.782591959526457e-06, |
| "loss": 0.434, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.5558348294434471, |
| "grad_norm": 0.4378649890422821, |
| "learning_rate": 9.780759227488937e-06, |
| "loss": 0.4422, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.5569120287253142, |
| "grad_norm": 0.3905577063560486, |
| "learning_rate": 9.77891897608757e-06, |
| "loss": 0.4403, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.5579892280071813, |
| "grad_norm": 0.44301673769950867, |
| "learning_rate": 9.777071208216772e-06, |
| "loss": 0.4522, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.5590664272890484, |
| "grad_norm": 0.40776389837265015, |
| "learning_rate": 9.775215926782788e-06, |
| "loss": 0.4511, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.5601436265709157, |
| "grad_norm": 0.44333213567733765, |
| "learning_rate": 9.773353134703675e-06, |
| "loss": 0.4624, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.5612208258527828, |
| "grad_norm": 0.44751521944999695, |
| "learning_rate": 9.771482834909306e-06, |
| "loss": 0.4542, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.5622980251346499, |
| "grad_norm": 0.4059796631336212, |
| "learning_rate": 9.769605030341356e-06, |
| "loss": 0.4471, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.5633752244165171, |
| "grad_norm": 0.44883087277412415, |
| "learning_rate": 9.767719723953315e-06, |
| "loss": 0.4492, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.5644524236983842, |
| "grad_norm": 0.40039992332458496, |
| "learning_rate": 9.765826918710466e-06, |
| "loss": 0.4455, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.5655296229802513, |
| "grad_norm": 0.44426193833351135, |
| "learning_rate": 9.763926617589883e-06, |
| "loss": 0.4551, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.5666068222621184, |
| "grad_norm": 0.4035399854183197, |
| "learning_rate": 9.762018823580436e-06, |
| "loss": 0.4665, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.5676840215439857, |
| "grad_norm": 0.4453595280647278, |
| "learning_rate": 9.760103539682777e-06, |
| "loss": 0.4653, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.5687612208258528, |
| "grad_norm": 0.37671253085136414, |
| "learning_rate": 9.758180768909338e-06, |
| "loss": 0.4205, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.5698384201077199, |
| "grad_norm": 0.42255714535713196, |
| "learning_rate": 9.75625051428433e-06, |
| "loss": 0.4572, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.5709156193895871, |
| "grad_norm": 0.4075011909008026, |
| "learning_rate": 9.754312778843727e-06, |
| "loss": 0.4314, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.5719928186714542, |
| "grad_norm": 0.5155799984931946, |
| "learning_rate": 9.752367565635281e-06, |
| "loss": 0.4794, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.5730700179533214, |
| "grad_norm": 0.41437146067619324, |
| "learning_rate": 9.750414877718495e-06, |
| "loss": 0.4477, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.5741472172351885, |
| "grad_norm": 0.5376664996147156, |
| "learning_rate": 9.748454718164635e-06, |
| "loss": 0.4369, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.5752244165170557, |
| "grad_norm": 0.3836204707622528, |
| "learning_rate": 9.746487090056712e-06, |
| "loss": 0.4521, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.5763016157989228, |
| "grad_norm": 0.4510630667209625, |
| "learning_rate": 9.744511996489495e-06, |
| "loss": 0.446, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.5773788150807899, |
| "grad_norm": 0.40221360325813293, |
| "learning_rate": 9.742529440569481e-06, |
| "loss": 0.4308, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.5784560143626571, |
| "grad_norm": 0.46302559971809387, |
| "learning_rate": 9.740539425414913e-06, |
| "loss": 0.458, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.5795332136445243, |
| "grad_norm": 0.405277818441391, |
| "learning_rate": 9.738541954155766e-06, |
| "loss": 0.4406, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.5806104129263914, |
| "grad_norm": 0.4966040849685669, |
| "learning_rate": 9.736537029933738e-06, |
| "loss": 0.4403, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.5816876122082585, |
| "grad_norm": 0.45864376425743103, |
| "learning_rate": 9.734524655902253e-06, |
| "loss": 0.4404, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.5827648114901257, |
| "grad_norm": 0.4572237730026245, |
| "learning_rate": 9.732504835226451e-06, |
| "loss": 0.4396, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.5838420107719928, |
| "grad_norm": 0.5045959949493408, |
| "learning_rate": 9.730477571083184e-06, |
| "loss": 0.4383, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.5849192100538599, |
| "grad_norm": 0.5284016132354736, |
| "learning_rate": 9.728442866661013e-06, |
| "loss": 0.4508, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.5859964093357272, |
| "grad_norm": 0.39870405197143555, |
| "learning_rate": 9.726400725160199e-06, |
| "loss": 0.45, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.5870736086175943, |
| "grad_norm": 0.48833194375038147, |
| "learning_rate": 9.724351149792702e-06, |
| "loss": 0.4431, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.5881508078994614, |
| "grad_norm": 0.577894926071167, |
| "learning_rate": 9.722294143782171e-06, |
| "loss": 0.4517, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.5892280071813285, |
| "grad_norm": 0.3885682225227356, |
| "learning_rate": 9.720229710363949e-06, |
| "loss": 0.4537, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.5903052064631957, |
| "grad_norm": 0.42628440260887146, |
| "learning_rate": 9.718157852785057e-06, |
| "loss": 0.4396, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.5913824057450628, |
| "grad_norm": 0.5009000897407532, |
| "learning_rate": 9.71607857430419e-06, |
| "loss": 0.4676, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.59245960502693, |
| "grad_norm": 0.39723867177963257, |
| "learning_rate": 9.71399187819172e-06, |
| "loss": 0.4522, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.5935368043087971, |
| "grad_norm": 0.359291136264801, |
| "learning_rate": 9.711897767729683e-06, |
| "loss": 0.4578, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.5946140035906643, |
| "grad_norm": 0.4119752049446106, |
| "learning_rate": 9.709796246211778e-06, |
| "loss": 0.4743, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.5956912028725314, |
| "grad_norm": 0.4480607509613037, |
| "learning_rate": 9.707687316943359e-06, |
| "loss": 0.4669, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.5967684021543985, |
| "grad_norm": 0.3882138133049011, |
| "learning_rate": 9.705570983241433e-06, |
| "loss": 0.4394, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.5978456014362658, |
| "grad_norm": 0.4053027927875519, |
| "learning_rate": 9.70344724843465e-06, |
| "loss": 0.4286, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.5989228007181329, |
| "grad_norm": 0.40790650248527527, |
| "learning_rate": 9.701316115863304e-06, |
| "loss": 0.416, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.40172263979911804, |
| "learning_rate": 9.699177588879323e-06, |
| "loss": 0.4298, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.6010771992818671, |
| "grad_norm": 0.4236910343170166, |
| "learning_rate": 9.697031670846266e-06, |
| "loss": 0.4699, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.6021543985637343, |
| "grad_norm": 0.48015671968460083, |
| "learning_rate": 9.694878365139313e-06, |
| "loss": 0.4487, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.6032315978456014, |
| "grad_norm": 0.4922192096710205, |
| "learning_rate": 9.69271767514527e-06, |
| "loss": 0.448, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.6043087971274685, |
| "grad_norm": 0.36672329902648926, |
| "learning_rate": 9.690549604262556e-06, |
| "loss": 0.4333, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.6053859964093358, |
| "grad_norm": 0.5522063970565796, |
| "learning_rate": 9.688374155901192e-06, |
| "loss": 0.4629, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.6064631956912029, |
| "grad_norm": 0.4292784333229065, |
| "learning_rate": 9.68619133348281e-06, |
| "loss": 0.4395, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.60754039497307, |
| "grad_norm": 0.5118968486785889, |
| "learning_rate": 9.68400114044064e-06, |
| "loss": 0.4449, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.6086175942549371, |
| "grad_norm": 0.47617998719215393, |
| "learning_rate": 9.6818035802195e-06, |
| "loss": 0.4261, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.6096947935368043, |
| "grad_norm": 0.4254116415977478, |
| "learning_rate": 9.679598656275797e-06, |
| "loss": 0.4502, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.6107719928186714, |
| "grad_norm": 0.41793403029441833, |
| "learning_rate": 9.677386372077525e-06, |
| "loss": 0.465, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.6118491921005386, |
| "grad_norm": 0.4164504408836365, |
| "learning_rate": 9.67516673110425e-06, |
| "loss": 0.4436, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.6129263913824058, |
| "grad_norm": 0.38877761363983154, |
| "learning_rate": 9.672939736847104e-06, |
| "loss": 0.4471, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.6140035906642729, |
| "grad_norm": 0.4273861050605774, |
| "learning_rate": 9.670705392808796e-06, |
| "loss": 0.4663, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.61508078994614, |
| "grad_norm": 0.5441392660140991, |
| "learning_rate": 9.668463702503588e-06, |
| "loss": 0.4682, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.6161579892280071, |
| "grad_norm": 0.3736647963523865, |
| "learning_rate": 9.666214669457295e-06, |
| "loss": 0.4447, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.6172351885098744, |
| "grad_norm": 0.5223665237426758, |
| "learning_rate": 9.663958297207286e-06, |
| "loss": 0.4553, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.6183123877917415, |
| "grad_norm": 0.4206368327140808, |
| "learning_rate": 9.661694589302471e-06, |
| "loss": 0.4547, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.6193895870736086, |
| "grad_norm": 0.4165674149990082, |
| "learning_rate": 9.659423549303298e-06, |
| "loss": 0.4339, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.6204667863554758, |
| "grad_norm": 0.3881623446941376, |
| "learning_rate": 9.657145180781745e-06, |
| "loss": 0.4483, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.6215439856373429, |
| "grad_norm": 0.44433334469795227, |
| "learning_rate": 9.654859487321318e-06, |
| "loss": 0.4381, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.62262118491921, |
| "grad_norm": 0.38431665301322937, |
| "learning_rate": 9.652566472517048e-06, |
| "loss": 0.4436, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.6236983842010771, |
| "grad_norm": 0.3904261887073517, |
| "learning_rate": 9.650266139975474e-06, |
| "loss": 0.4563, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.6247755834829444, |
| "grad_norm": 0.450923353433609, |
| "learning_rate": 9.647958493314653e-06, |
| "loss": 0.4458, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.6258527827648115, |
| "grad_norm": 0.4171711504459381, |
| "learning_rate": 9.645643536164136e-06, |
| "loss": 0.4604, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.6269299820466786, |
| "grad_norm": 0.44739070534706116, |
| "learning_rate": 9.643321272164981e-06, |
| "loss": 0.4468, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.6280071813285458, |
| "grad_norm": 0.4494309425354004, |
| "learning_rate": 9.640991704969735e-06, |
| "loss": 0.4547, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.6290843806104129, |
| "grad_norm": 0.3796863853931427, |
| "learning_rate": 9.63865483824243e-06, |
| "loss": 0.4258, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.63016157989228, |
| "grad_norm": 0.4536336660385132, |
| "learning_rate": 9.63631067565858e-06, |
| "loss": 0.4642, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.6312387791741472, |
| "grad_norm": 0.37764403223991394, |
| "learning_rate": 9.633959220905179e-06, |
| "loss": 0.4268, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.6323159784560144, |
| "grad_norm": 0.3929099142551422, |
| "learning_rate": 9.631600477680683e-06, |
| "loss": 0.4421, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.6333931777378815, |
| "grad_norm": 0.4072980284690857, |
| "learning_rate": 9.629234449695015e-06, |
| "loss": 0.4495, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.6344703770197486, |
| "grad_norm": 0.4604843556880951, |
| "learning_rate": 9.62686114066956e-06, |
| "loss": 0.4872, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.6355475763016158, |
| "grad_norm": 0.41248437762260437, |
| "learning_rate": 9.624480554337144e-06, |
| "loss": 0.4375, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.636624775583483, |
| "grad_norm": 0.3883218467235565, |
| "learning_rate": 9.62209269444205e-06, |
| "loss": 0.4502, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.6377019748653501, |
| "grad_norm": 0.44741594791412354, |
| "learning_rate": 9.619697564739996e-06, |
| "loss": 0.4515, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.6387791741472172, |
| "grad_norm": 0.4136967658996582, |
| "learning_rate": 9.617295168998135e-06, |
| "loss": 0.4518, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.6398563734290844, |
| "grad_norm": 0.4265954792499542, |
| "learning_rate": 9.614885510995047e-06, |
| "loss": 0.4418, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.6409335727109515, |
| "grad_norm": 0.4041872024536133, |
| "learning_rate": 9.612468594520739e-06, |
| "loss": 0.4533, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.6420107719928186, |
| "grad_norm": 0.4049215018749237, |
| "learning_rate": 9.610044423376628e-06, |
| "loss": 0.4142, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.6430879712746859, |
| "grad_norm": 0.4189813435077667, |
| "learning_rate": 9.607613001375546e-06, |
| "loss": 0.4374, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.644165170556553, |
| "grad_norm": 0.4443398118019104, |
| "learning_rate": 9.605174332341728e-06, |
| "loss": 0.4228, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.6452423698384201, |
| "grad_norm": 0.499967485666275, |
| "learning_rate": 9.602728420110807e-06, |
| "loss": 0.4484, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.6463195691202872, |
| "grad_norm": 0.4229572117328644, |
| "learning_rate": 9.600275268529809e-06, |
| "loss": 0.4619, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6473967684021544, |
| "grad_norm": 0.4665996730327606, |
| "learning_rate": 9.597814881457144e-06, |
| "loss": 0.4589, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.6484739676840215, |
| "grad_norm": 0.39410340785980225, |
| "learning_rate": 9.595347262762608e-06, |
| "loss": 0.4551, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.6495511669658887, |
| "grad_norm": 0.37037867307662964, |
| "learning_rate": 9.592872416327366e-06, |
| "loss": 0.4338, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.6506283662477559, |
| "grad_norm": 0.38161227107048035, |
| "learning_rate": 9.590390346043952e-06, |
| "loss": 0.4318, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.651705565529623, |
| "grad_norm": 0.3696405291557312, |
| "learning_rate": 9.587901055816262e-06, |
| "loss": 0.4327, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.6527827648114901, |
| "grad_norm": 0.398960143327713, |
| "learning_rate": 9.585404549559551e-06, |
| "loss": 0.4447, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.6538599640933572, |
| "grad_norm": 0.3864385485649109, |
| "learning_rate": 9.582900831200419e-06, |
| "loss": 0.443, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.6549371633752245, |
| "grad_norm": 0.3866807818412781, |
| "learning_rate": 9.580389904676813e-06, |
| "loss": 0.4329, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.6560143626570916, |
| "grad_norm": 0.37632474303245544, |
| "learning_rate": 9.577871773938013e-06, |
| "loss": 0.4549, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.6570915619389587, |
| "grad_norm": 0.3991330862045288, |
| "learning_rate": 9.575346442944635e-06, |
| "loss": 0.456, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.6581687612208259, |
| "grad_norm": 0.44957804679870605, |
| "learning_rate": 9.572813915668618e-06, |
| "loss": 0.4751, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.659245960502693, |
| "grad_norm": 0.40252184867858887, |
| "learning_rate": 9.570274196093217e-06, |
| "loss": 0.4439, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.6603231597845601, |
| "grad_norm": 0.45728814601898193, |
| "learning_rate": 9.567727288213005e-06, |
| "loss": 0.4497, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.6614003590664272, |
| "grad_norm": 0.40386101603507996, |
| "learning_rate": 9.565173196033855e-06, |
| "loss": 0.4626, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.6624775583482945, |
| "grad_norm": 0.3945240378379822, |
| "learning_rate": 9.562611923572944e-06, |
| "loss": 0.4252, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.6635547576301616, |
| "grad_norm": 0.38906341791152954, |
| "learning_rate": 9.56004347485874e-06, |
| "loss": 0.4333, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.6646319569120287, |
| "grad_norm": 0.3659195303916931, |
| "learning_rate": 9.557467853931e-06, |
| "loss": 0.4405, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.6657091561938959, |
| "grad_norm": 0.41885197162628174, |
| "learning_rate": 9.554885064840758e-06, |
| "loss": 0.4554, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.666786355475763, |
| "grad_norm": 0.4376233220100403, |
| "learning_rate": 9.552295111650328e-06, |
| "loss": 0.4395, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.6678635547576302, |
| "grad_norm": 0.4020839035511017, |
| "learning_rate": 9.549697998433286e-06, |
| "loss": 0.4556, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.6689407540394973, |
| "grad_norm": 0.4188641309738159, |
| "learning_rate": 9.547093729274474e-06, |
| "loss": 0.4584, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.6700179533213645, |
| "grad_norm": 0.4054971933364868, |
| "learning_rate": 9.54448230826999e-06, |
| "loss": 0.436, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.6710951526032316, |
| "grad_norm": 0.3803367614746094, |
| "learning_rate": 9.541863739527176e-06, |
| "loss": 0.4861, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.6721723518850987, |
| "grad_norm": 0.43850457668304443, |
| "learning_rate": 9.539238027164618e-06, |
| "loss": 0.4438, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.6732495511669659, |
| "grad_norm": 0.38866519927978516, |
| "learning_rate": 9.536605175312142e-06, |
| "loss": 0.4366, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.6743267504488331, |
| "grad_norm": 0.4269110858440399, |
| "learning_rate": 9.533965188110794e-06, |
| "loss": 0.4523, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.6754039497307002, |
| "grad_norm": 0.4612042307853699, |
| "learning_rate": 9.531318069712854e-06, |
| "loss": 0.4544, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.6764811490125673, |
| "grad_norm": 0.340231329202652, |
| "learning_rate": 9.52866382428181e-06, |
| "loss": 0.4434, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.6775583482944345, |
| "grad_norm": 0.4424141049385071, |
| "learning_rate": 9.526002455992361e-06, |
| "loss": 0.43, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.6786355475763016, |
| "grad_norm": 0.4322042167186737, |
| "learning_rate": 9.523333969030413e-06, |
| "loss": 0.4661, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.6797127468581687, |
| "grad_norm": 0.4067363142967224, |
| "learning_rate": 9.520658367593065e-06, |
| "loss": 0.4607, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.680789946140036, |
| "grad_norm": 0.5061907768249512, |
| "learning_rate": 9.517975655888607e-06, |
| "loss": 0.4365, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.6818671454219031, |
| "grad_norm": 0.3870816230773926, |
| "learning_rate": 9.51528583813651e-06, |
| "loss": 0.4456, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.6829443447037702, |
| "grad_norm": 0.4361846446990967, |
| "learning_rate": 9.512588918567429e-06, |
| "loss": 0.435, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.6840215439856373, |
| "grad_norm": 0.4258888065814972, |
| "learning_rate": 9.509884901423179e-06, |
| "loss": 0.4534, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.6850987432675045, |
| "grad_norm": 0.42448312044143677, |
| "learning_rate": 9.507173790956746e-06, |
| "loss": 0.4447, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.6861759425493716, |
| "grad_norm": 0.40607163310050964, |
| "learning_rate": 9.504455591432269e-06, |
| "loss": 0.4459, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.6872531418312388, |
| "grad_norm": 0.4292491376399994, |
| "learning_rate": 9.501730307125037e-06, |
| "loss": 0.4508, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.6883303411131059, |
| "grad_norm": 0.417349636554718, |
| "learning_rate": 9.498997942321484e-06, |
| "loss": 0.4363, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.6894075403949731, |
| "grad_norm": 0.4843739867210388, |
| "learning_rate": 9.496258501319178e-06, |
| "loss": 0.4408, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.6904847396768402, |
| "grad_norm": 0.49764111638069153, |
| "learning_rate": 9.493511988426822e-06, |
| "loss": 0.4279, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.6915619389587073, |
| "grad_norm": 0.4440729022026062, |
| "learning_rate": 9.490758407964235e-06, |
| "loss": 0.4249, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.6926391382405745, |
| "grad_norm": 0.48478931188583374, |
| "learning_rate": 9.487997764262356e-06, |
| "loss": 0.4678, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.6937163375224417, |
| "grad_norm": 0.44857296347618103, |
| "learning_rate": 9.48523006166323e-06, |
| "loss": 0.4336, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.6947935368043088, |
| "grad_norm": 0.4317927956581116, |
| "learning_rate": 9.482455304520013e-06, |
| "loss": 0.4601, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.6958707360861759, |
| "grad_norm": 0.46467840671539307, |
| "learning_rate": 9.479673497196947e-06, |
| "loss": 0.452, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.6969479353680431, |
| "grad_norm": 0.4180663526058197, |
| "learning_rate": 9.476884644069366e-06, |
| "loss": 0.4407, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.6980251346499102, |
| "grad_norm": 0.44766995310783386, |
| "learning_rate": 9.474088749523689e-06, |
| "loss": 0.4613, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.6991023339317773, |
| "grad_norm": 0.4251432418823242, |
| "learning_rate": 9.471285817957407e-06, |
| "loss": 0.4254, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.7001795332136446, |
| "grad_norm": 0.4335974156856537, |
| "learning_rate": 9.468475853779078e-06, |
| "loss": 0.4205, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.7012567324955117, |
| "grad_norm": 0.4041954576969147, |
| "learning_rate": 9.465658861408325e-06, |
| "loss": 0.4483, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.7023339317773788, |
| "grad_norm": 0.40664151310920715, |
| "learning_rate": 9.462834845275821e-06, |
| "loss": 0.4296, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.7034111310592459, |
| "grad_norm": 0.4748784005641937, |
| "learning_rate": 9.460003809823289e-06, |
| "loss": 0.4431, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.7044883303411131, |
| "grad_norm": 0.4240473806858063, |
| "learning_rate": 9.457165759503492e-06, |
| "loss": 0.4133, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.7055655296229802, |
| "grad_norm": 0.4704611003398895, |
| "learning_rate": 9.454320698780226e-06, |
| "loss": 0.4311, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.7066427289048474, |
| "grad_norm": 0.4679224193096161, |
| "learning_rate": 9.451468632128313e-06, |
| "loss": 0.4565, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.7077199281867146, |
| "grad_norm": 0.3749702274799347, |
| "learning_rate": 9.448609564033594e-06, |
| "loss": 0.4539, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.7087971274685817, |
| "grad_norm": 0.4322480261325836, |
| "learning_rate": 9.445743498992921e-06, |
| "loss": 0.4357, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.7098743267504488, |
| "grad_norm": 0.502750039100647, |
| "learning_rate": 9.442870441514155e-06, |
| "loss": 0.4527, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.7109515260323159, |
| "grad_norm": 0.39252883195877075, |
| "learning_rate": 9.439990396116149e-06, |
| "loss": 0.4479, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.7120287253141832, |
| "grad_norm": 0.505723774433136, |
| "learning_rate": 9.437103367328755e-06, |
| "loss": 0.4363, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.7131059245960503, |
| "grad_norm": 0.44664621353149414, |
| "learning_rate": 9.4342093596928e-06, |
| "loss": 0.4306, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.7141831238779174, |
| "grad_norm": 0.450601190328598, |
| "learning_rate": 9.431308377760095e-06, |
| "loss": 0.4842, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.7152603231597846, |
| "grad_norm": 0.4185259938240051, |
| "learning_rate": 9.428400426093413e-06, |
| "loss": 0.4622, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.7163375224416517, |
| "grad_norm": 0.49219274520874023, |
| "learning_rate": 9.425485509266497e-06, |
| "loss": 0.4521, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.7174147217235188, |
| "grad_norm": 0.45771172642707825, |
| "learning_rate": 9.42256363186404e-06, |
| "loss": 0.4623, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.718491921005386, |
| "grad_norm": 0.4976024329662323, |
| "learning_rate": 9.419634798481681e-06, |
| "loss": 0.47, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.7195691202872532, |
| "grad_norm": 0.43556949496269226, |
| "learning_rate": 9.41669901372601e-06, |
| "loss": 0.4329, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.7206463195691203, |
| "grad_norm": 0.4830540120601654, |
| "learning_rate": 9.413756282214538e-06, |
| "loss": 0.4859, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.7217235188509874, |
| "grad_norm": 0.3960484266281128, |
| "learning_rate": 9.41080660857571e-06, |
| "loss": 0.4528, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.7228007181328546, |
| "grad_norm": 0.4806577265262604, |
| "learning_rate": 9.407849997448884e-06, |
| "loss": 0.445, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.7238779174147217, |
| "grad_norm": 0.40394070744514465, |
| "learning_rate": 9.404886453484337e-06, |
| "loss": 0.4552, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.7249551166965889, |
| "grad_norm": 0.3905143141746521, |
| "learning_rate": 9.401915981343243e-06, |
| "loss": 0.4343, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.726032315978456, |
| "grad_norm": 0.37571895122528076, |
| "learning_rate": 9.398938585697679e-06, |
| "loss": 0.4168, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.7271095152603232, |
| "grad_norm": 0.40112486481666565, |
| "learning_rate": 9.395954271230606e-06, |
| "loss": 0.453, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.7281867145421903, |
| "grad_norm": 0.4205509126186371, |
| "learning_rate": 9.39296304263587e-06, |
| "loss": 0.4406, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.7292639138240574, |
| "grad_norm": 0.3572981059551239, |
| "learning_rate": 9.38996490461819e-06, |
| "loss": 0.4394, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.7303411131059246, |
| "grad_norm": 0.39409223198890686, |
| "learning_rate": 9.386959861893159e-06, |
| "loss": 0.4339, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.7314183123877918, |
| "grad_norm": 0.4158811867237091, |
| "learning_rate": 9.383947919187219e-06, |
| "loss": 0.3963, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.7324955116696589, |
| "grad_norm": 0.40662702918052673, |
| "learning_rate": 9.380929081237676e-06, |
| "loss": 0.4725, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.733572710951526, |
| "grad_norm": 0.40022268891334534, |
| "learning_rate": 9.377903352792672e-06, |
| "loss": 0.4404, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.7346499102333932, |
| "grad_norm": 0.4311719536781311, |
| "learning_rate": 9.374870738611192e-06, |
| "loss": 0.4243, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.7357271095152603, |
| "grad_norm": 0.3858395516872406, |
| "learning_rate": 9.371831243463048e-06, |
| "loss": 0.4383, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.7368043087971274, |
| "grad_norm": 0.4157547950744629, |
| "learning_rate": 9.368784872128877e-06, |
| "loss": 0.4385, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.7378815080789947, |
| "grad_norm": 0.4222763180732727, |
| "learning_rate": 9.36573162940013e-06, |
| "loss": 0.4316, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.7389587073608618, |
| "grad_norm": 0.43726587295532227, |
| "learning_rate": 9.362671520079065e-06, |
| "loss": 0.4432, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.7400359066427289, |
| "grad_norm": 0.47637829184532166, |
| "learning_rate": 9.359604548978742e-06, |
| "loss": 0.4476, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.741113105924596, |
| "grad_norm": 0.37087079882621765, |
| "learning_rate": 9.356530720923012e-06, |
| "loss": 0.4217, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.7421903052064632, |
| "grad_norm": 0.3849940598011017, |
| "learning_rate": 9.35345004074651e-06, |
| "loss": 0.4158, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.7432675044883303, |
| "grad_norm": 0.4626414477825165, |
| "learning_rate": 9.350362513294652e-06, |
| "loss": 0.4254, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.7443447037701975, |
| "grad_norm": 0.5151078701019287, |
| "learning_rate": 9.347268143423619e-06, |
| "loss": 0.4622, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.7454219030520647, |
| "grad_norm": 0.4353354871273041, |
| "learning_rate": 9.344166936000356e-06, |
| "loss": 0.4498, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.7464991023339318, |
| "grad_norm": 0.4860036075115204, |
| "learning_rate": 9.341058895902563e-06, |
| "loss": 0.4427, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.7475763016157989, |
| "grad_norm": 0.38492318987846375, |
| "learning_rate": 9.337944028018689e-06, |
| "loss": 0.4217, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.748653500897666, |
| "grad_norm": 0.42594560980796814, |
| "learning_rate": 9.334822337247916e-06, |
| "loss": 0.4501, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.7497307001795332, |
| "grad_norm": 0.44222599267959595, |
| "learning_rate": 9.33169382850016e-06, |
| "loss": 0.433, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.7508078994614004, |
| "grad_norm": 0.38337865471839905, |
| "learning_rate": 9.328558506696062e-06, |
| "loss": 0.4664, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.7518850987432675, |
| "grad_norm": 0.3998524844646454, |
| "learning_rate": 9.325416376766978e-06, |
| "loss": 0.4384, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.7529622980251347, |
| "grad_norm": 0.45198628306388855, |
| "learning_rate": 9.322267443654974e-06, |
| "loss": 0.4759, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.7540394973070018, |
| "grad_norm": 0.3957984745502472, |
| "learning_rate": 9.319111712312811e-06, |
| "loss": 0.4623, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.7551166965888689, |
| "grad_norm": 0.4148131012916565, |
| "learning_rate": 9.315949187703947e-06, |
| "loss": 0.4394, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.756193895870736, |
| "grad_norm": 0.4065891206264496, |
| "learning_rate": 9.312779874802527e-06, |
| "loss": 0.4458, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.7572710951526033, |
| "grad_norm": 0.4025750756263733, |
| "learning_rate": 9.309603778593364e-06, |
| "loss": 0.4411, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.7583482944344704, |
| "grad_norm": 0.3583240211009979, |
| "learning_rate": 9.306420904071949e-06, |
| "loss": 0.4351, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.7594254937163375, |
| "grad_norm": 0.38981419801712036, |
| "learning_rate": 9.30323125624443e-06, |
| "loss": 0.4305, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.7605026929982047, |
| "grad_norm": 0.3930661976337433, |
| "learning_rate": 9.300034840127608e-06, |
| "loss": 0.4424, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.7615798922800718, |
| "grad_norm": 0.3722686767578125, |
| "learning_rate": 9.29683166074893e-06, |
| "loss": 0.4333, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.762657091561939, |
| "grad_norm": 0.383351594209671, |
| "learning_rate": 9.293621723146485e-06, |
| "loss": 0.4382, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.7637342908438061, |
| "grad_norm": 0.3991412818431854, |
| "learning_rate": 9.290405032368983e-06, |
| "loss": 0.4391, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.7648114901256733, |
| "grad_norm": 0.3822011947631836, |
| "learning_rate": 9.287181593475762e-06, |
| "loss": 0.4373, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.7658886894075404, |
| "grad_norm": 0.3904517889022827, |
| "learning_rate": 9.283951411536774e-06, |
| "loss": 0.4476, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.7669658886894075, |
| "grad_norm": 0.4416585862636566, |
| "learning_rate": 9.28071449163257e-06, |
| "loss": 0.4357, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.7680430879712747, |
| "grad_norm": 0.376960426568985, |
| "learning_rate": 9.277470838854307e-06, |
| "loss": 0.4535, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.7691202872531419, |
| "grad_norm": 0.4053441882133484, |
| "learning_rate": 9.274220458303727e-06, |
| "loss": 0.4332, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.770197486535009, |
| "grad_norm": 0.37079691886901855, |
| "learning_rate": 9.270963355093154e-06, |
| "loss": 0.4827, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.7712746858168761, |
| "grad_norm": 0.40362897515296936, |
| "learning_rate": 9.267699534345488e-06, |
| "loss": 0.457, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.7723518850987433, |
| "grad_norm": 0.3751955032348633, |
| "learning_rate": 9.264429001194193e-06, |
| "loss": 0.4186, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.7734290843806104, |
| "grad_norm": 0.353248655796051, |
| "learning_rate": 9.261151760783289e-06, |
| "loss": 0.4459, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.7745062836624775, |
| "grad_norm": 0.41425472497940063, |
| "learning_rate": 9.257867818267347e-06, |
| "loss": 0.4372, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.7755834829443446, |
| "grad_norm": 0.43795788288116455, |
| "learning_rate": 9.254577178811482e-06, |
| "loss": 0.4458, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.7766606822262119, |
| "grad_norm": 0.3875672519207001, |
| "learning_rate": 9.251279847591338e-06, |
| "loss": 0.4455, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.777737881508079, |
| "grad_norm": 0.36370179057121277, |
| "learning_rate": 9.247975829793086e-06, |
| "loss": 0.4501, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.7788150807899461, |
| "grad_norm": 0.35113972425460815, |
| "learning_rate": 9.244665130613411e-06, |
| "loss": 0.428, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.7798922800718133, |
| "grad_norm": 0.4404146075248718, |
| "learning_rate": 9.241347755259514e-06, |
| "loss": 0.458, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.7809694793536804, |
| "grad_norm": 0.34337273240089417, |
| "learning_rate": 9.238023708949087e-06, |
| "loss": 0.4295, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.7820466786355476, |
| "grad_norm": 0.34650593996047974, |
| "learning_rate": 9.234692996910324e-06, |
| "loss": 0.4494, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.7831238779174147, |
| "grad_norm": 0.3992871940135956, |
| "learning_rate": 9.231355624381893e-06, |
| "loss": 0.4188, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.7842010771992819, |
| "grad_norm": 0.39534062147140503, |
| "learning_rate": 9.22801159661295e-06, |
| "loss": 0.4548, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.785278276481149, |
| "grad_norm": 0.37745481729507446, |
| "learning_rate": 9.224660918863104e-06, |
| "loss": 0.4464, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.7863554757630161, |
| "grad_norm": 0.40657269954681396, |
| "learning_rate": 9.221303596402435e-06, |
| "loss": 0.4624, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.7874326750448833, |
| "grad_norm": 0.4322795569896698, |
| "learning_rate": 9.217939634511473e-06, |
| "loss": 0.4469, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.7885098743267505, |
| "grad_norm": 0.3632979094982147, |
| "learning_rate": 9.214569038481183e-06, |
| "loss": 0.4548, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.7895870736086176, |
| "grad_norm": 0.3777786195278168, |
| "learning_rate": 9.21119181361297e-06, |
| "loss": 0.4183, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.7906642728904847, |
| "grad_norm": 0.39092251658439636, |
| "learning_rate": 9.207807965218668e-06, |
| "loss": 0.4216, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.7917414721723519, |
| "grad_norm": 0.3456818163394928, |
| "learning_rate": 9.204417498620521e-06, |
| "loss": 0.4511, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.792818671454219, |
| "grad_norm": 0.39940401911735535, |
| "learning_rate": 9.201020419151191e-06, |
| "loss": 0.4586, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.7938958707360861, |
| "grad_norm": 0.37644898891448975, |
| "learning_rate": 9.197616732153733e-06, |
| "loss": 0.4608, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.7949730700179534, |
| "grad_norm": 0.36318060755729675, |
| "learning_rate": 9.194206442981601e-06, |
| "loss": 0.4473, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.7960502692998205, |
| "grad_norm": 0.3769637644290924, |
| "learning_rate": 9.190789556998627e-06, |
| "loss": 0.4338, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.7971274685816876, |
| "grad_norm": 0.37595972418785095, |
| "learning_rate": 9.187366079579025e-06, |
| "loss": 0.4374, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.7982046678635547, |
| "grad_norm": 0.35937148332595825, |
| "learning_rate": 9.18393601610737e-06, |
| "loss": 0.4159, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.7992818671454219, |
| "grad_norm": 0.3800438642501831, |
| "learning_rate": 9.180499371978603e-06, |
| "loss": 0.4326, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.800359066427289, |
| "grad_norm": 0.36832118034362793, |
| "learning_rate": 9.17705615259801e-06, |
| "loss": 0.4255, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.8014362657091562, |
| "grad_norm": 0.33298200368881226, |
| "learning_rate": 9.173606363381218e-06, |
| "loss": 0.4216, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.8025134649910234, |
| "grad_norm": 0.39970219135284424, |
| "learning_rate": 9.170150009754193e-06, |
| "loss": 0.4473, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.8035906642728905, |
| "grad_norm": 0.38992324471473694, |
| "learning_rate": 9.16668709715322e-06, |
| "loss": 0.4444, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.8046678635547576, |
| "grad_norm": 0.5129222273826599, |
| "learning_rate": 9.163217631024901e-06, |
| "loss": 0.4381, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.8057450628366247, |
| "grad_norm": 0.4020202159881592, |
| "learning_rate": 9.159741616826152e-06, |
| "loss": 0.4901, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.806822262118492, |
| "grad_norm": 0.3771527111530304, |
| "learning_rate": 9.156259060024177e-06, |
| "loss": 0.452, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.8078994614003591, |
| "grad_norm": 0.397592157125473, |
| "learning_rate": 9.152769966096483e-06, |
| "loss": 0.4195, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.8089766606822262, |
| "grad_norm": 0.3930964171886444, |
| "learning_rate": 9.149274340530848e-06, |
| "loss": 0.4313, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.8100538599640934, |
| "grad_norm": 0.3677625358104706, |
| "learning_rate": 9.145772188825328e-06, |
| "loss": 0.4126, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.8111310592459605, |
| "grad_norm": 0.38228854537010193, |
| "learning_rate": 9.142263516488246e-06, |
| "loss": 0.4368, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.8122082585278276, |
| "grad_norm": 0.37611261010169983, |
| "learning_rate": 9.138748329038175e-06, |
| "loss": 0.4218, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.8132854578096947, |
| "grad_norm": 0.3851577341556549, |
| "learning_rate": 9.135226632003942e-06, |
| "loss": 0.4612, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.814362657091562, |
| "grad_norm": 0.3648301362991333, |
| "learning_rate": 9.131698430924606e-06, |
| "loss": 0.405, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.8154398563734291, |
| "grad_norm": 0.34730029106140137, |
| "learning_rate": 9.12816373134946e-06, |
| "loss": 0.4286, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.8165170556552962, |
| "grad_norm": 0.42742881178855896, |
| "learning_rate": 9.124622538838015e-06, |
| "loss": 0.4343, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.8175942549371634, |
| "grad_norm": 0.36673569679260254, |
| "learning_rate": 9.121074858959997e-06, |
| "loss": 0.4206, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.8186714542190305, |
| "grad_norm": 0.4314405620098114, |
| "learning_rate": 9.117520697295337e-06, |
| "loss": 0.4568, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.8197486535008977, |
| "grad_norm": 0.4013466238975525, |
| "learning_rate": 9.113960059434157e-06, |
| "loss": 0.4349, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.8208258527827648, |
| "grad_norm": 0.4372273087501526, |
| "learning_rate": 9.110392950976765e-06, |
| "loss": 0.4273, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.821903052064632, |
| "grad_norm": 0.4042438566684723, |
| "learning_rate": 9.10681937753365e-06, |
| "loss": 0.4513, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.8229802513464991, |
| "grad_norm": 0.45694154500961304, |
| "learning_rate": 9.103239344725465e-06, |
| "loss": 0.4583, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.8240574506283662, |
| "grad_norm": 0.4015597403049469, |
| "learning_rate": 9.099652858183027e-06, |
| "loss": 0.413, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.8251346499102334, |
| "grad_norm": 0.37840738892555237, |
| "learning_rate": 9.0960599235473e-06, |
| "loss": 0.4404, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.8262118491921006, |
| "grad_norm": 0.4393068552017212, |
| "learning_rate": 9.092460546469393e-06, |
| "loss": 0.4271, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.8272890484739677, |
| "grad_norm": 0.3934628963470459, |
| "learning_rate": 9.088854732610544e-06, |
| "loss": 0.4474, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.8283662477558348, |
| "grad_norm": 0.3966236412525177, |
| "learning_rate": 9.085242487642117e-06, |
| "loss": 0.4413, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.829443447037702, |
| "grad_norm": 0.37130579352378845, |
| "learning_rate": 9.081623817245591e-06, |
| "loss": 0.4308, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.8305206463195691, |
| "grad_norm": 0.45101043581962585, |
| "learning_rate": 9.077998727112553e-06, |
| "loss": 0.4425, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.8315978456014362, |
| "grad_norm": 0.3699485659599304, |
| "learning_rate": 9.074367222944686e-06, |
| "loss": 0.4219, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.8326750448833035, |
| "grad_norm": 0.43512436747550964, |
| "learning_rate": 9.070729310453759e-06, |
| "loss": 0.4318, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.8337522441651706, |
| "grad_norm": 0.34934020042419434, |
| "learning_rate": 9.067084995361623e-06, |
| "loss": 0.4371, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.8348294434470377, |
| "grad_norm": 0.3924720585346222, |
| "learning_rate": 9.063434283400199e-06, |
| "loss": 0.4445, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.8359066427289048, |
| "grad_norm": 0.37331345677375793, |
| "learning_rate": 9.059777180311466e-06, |
| "loss": 0.4383, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.836983842010772, |
| "grad_norm": 0.42386674880981445, |
| "learning_rate": 9.056113691847462e-06, |
| "loss": 0.423, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.8380610412926391, |
| "grad_norm": 0.3515479564666748, |
| "learning_rate": 9.05244382377026e-06, |
| "loss": 0.4491, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.8391382405745063, |
| "grad_norm": 0.3614935874938965, |
| "learning_rate": 9.048767581851973e-06, |
| "loss": 0.463, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.8402154398563735, |
| "grad_norm": 0.4254121780395508, |
| "learning_rate": 9.045084971874738e-06, |
| "loss": 0.4083, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.8412926391382406, |
| "grad_norm": 0.3752104938030243, |
| "learning_rate": 9.041395999630704e-06, |
| "loss": 0.4449, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.8423698384201077, |
| "grad_norm": 0.3893478810787201, |
| "learning_rate": 9.037700670922034e-06, |
| "loss": 0.4313, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.8434470377019748, |
| "grad_norm": 0.45131829380989075, |
| "learning_rate": 9.033998991560881e-06, |
| "loss": 0.4506, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.844524236983842, |
| "grad_norm": 0.36699992418289185, |
| "learning_rate": 9.030290967369392e-06, |
| "loss": 0.4368, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.8456014362657092, |
| "grad_norm": 0.4408794939517975, |
| "learning_rate": 9.026576604179689e-06, |
| "loss": 0.4366, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.8466786355475763, |
| "grad_norm": 0.36175811290740967, |
| "learning_rate": 9.022855907833872e-06, |
| "loss": 0.44, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.8477558348294435, |
| "grad_norm": 0.38207605481147766, |
| "learning_rate": 9.019128884183992e-06, |
| "loss": 0.4197, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.8488330341113106, |
| "grad_norm": 0.3802523612976074, |
| "learning_rate": 9.015395539092057e-06, |
| "loss": 0.4259, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.8499102333931777, |
| "grad_norm": 0.38638371229171753, |
| "learning_rate": 9.011655878430018e-06, |
| "loss": 0.4078, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.8509874326750448, |
| "grad_norm": 0.3900395333766937, |
| "learning_rate": 9.00790990807976e-06, |
| "loss": 0.4568, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.8520646319569121, |
| "grad_norm": 0.42712318897247314, |
| "learning_rate": 9.00415763393309e-06, |
| "loss": 0.4361, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.8531418312387792, |
| "grad_norm": 0.4065674841403961, |
| "learning_rate": 9.000399061891728e-06, |
| "loss": 0.4504, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.8542190305206463, |
| "grad_norm": 0.4172511696815491, |
| "learning_rate": 8.996634197867307e-06, |
| "loss": 0.4325, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.8552962298025135, |
| "grad_norm": 0.40288570523262024, |
| "learning_rate": 8.992863047781346e-06, |
| "loss": 0.4309, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.8563734290843806, |
| "grad_norm": 0.39536306262016296, |
| "learning_rate": 8.989085617565261e-06, |
| "loss": 0.4063, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.8574506283662477, |
| "grad_norm": 0.3941093385219574, |
| "learning_rate": 8.985301913160338e-06, |
| "loss": 0.4214, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.8585278276481149, |
| "grad_norm": 0.381067156791687, |
| "learning_rate": 8.981511940517734e-06, |
| "loss": 0.4384, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.8596050269299821, |
| "grad_norm": 0.3987753689289093, |
| "learning_rate": 8.977715705598469e-06, |
| "loss": 0.4316, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.8606822262118492, |
| "grad_norm": 0.4092214107513428, |
| "learning_rate": 8.973913214373405e-06, |
| "loss": 0.4381, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.8617594254937163, |
| "grad_norm": 0.423684298992157, |
| "learning_rate": 8.970104472823249e-06, |
| "loss": 0.4426, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.8628366247755835, |
| "grad_norm": 0.3508121073246002, |
| "learning_rate": 8.966289486938537e-06, |
| "loss": 0.4275, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.8639138240574507, |
| "grad_norm": 0.3975047767162323, |
| "learning_rate": 8.96246826271963e-06, |
| "loss": 0.4399, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.8649910233393178, |
| "grad_norm": 0.5154191255569458, |
| "learning_rate": 8.958640806176695e-06, |
| "loss": 0.4609, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.8660682226211849, |
| "grad_norm": 0.33385977149009705, |
| "learning_rate": 8.954807123329703e-06, |
| "loss": 0.4431, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.8671454219030521, |
| "grad_norm": 0.460953027009964, |
| "learning_rate": 8.950967220208425e-06, |
| "loss": 0.4309, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.8682226211849192, |
| "grad_norm": 0.3735499680042267, |
| "learning_rate": 8.947121102852402e-06, |
| "loss": 0.445, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.8692998204667863, |
| "grad_norm": 0.37893158197402954, |
| "learning_rate": 8.943268777310965e-06, |
| "loss": 0.4419, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.8703770197486534, |
| "grad_norm": 0.39892786741256714, |
| "learning_rate": 8.939410249643195e-06, |
| "loss": 0.4393, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.8714542190305207, |
| "grad_norm": 0.38968032598495483, |
| "learning_rate": 8.935545525917936e-06, |
| "loss": 0.4303, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.8725314183123878, |
| "grad_norm": 0.35072895884513855, |
| "learning_rate": 8.93167461221378e-06, |
| "loss": 0.4472, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.8736086175942549, |
| "grad_norm": 0.3633287847042084, |
| "learning_rate": 8.927797514619043e-06, |
| "loss": 0.4239, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.8746858168761221, |
| "grad_norm": 0.3359943926334381, |
| "learning_rate": 8.923914239231779e-06, |
| "loss": 0.4284, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.8757630161579892, |
| "grad_norm": 0.3926851749420166, |
| "learning_rate": 8.920024792159754e-06, |
| "loss": 0.4167, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.8768402154398564, |
| "grad_norm": 0.37534475326538086, |
| "learning_rate": 8.916129179520443e-06, |
| "loss": 0.4273, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.8779174147217235, |
| "grad_norm": 0.35639283061027527, |
| "learning_rate": 8.912227407441013e-06, |
| "loss": 0.4063, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.8789946140035907, |
| "grad_norm": 0.4475279152393341, |
| "learning_rate": 8.908319482058325e-06, |
| "loss": 0.4196, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.8800718132854578, |
| "grad_norm": 0.392677903175354, |
| "learning_rate": 8.904405409518916e-06, |
| "loss": 0.4389, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.8811490125673249, |
| "grad_norm": 0.3904305100440979, |
| "learning_rate": 8.90048519597899e-06, |
| "loss": 0.4591, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.8822262118491921, |
| "grad_norm": 0.35532963275909424, |
| "learning_rate": 8.896558847604414e-06, |
| "loss": 0.4203, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.8833034111310593, |
| "grad_norm": 0.3612072467803955, |
| "learning_rate": 8.892626370570699e-06, |
| "loss": 0.4271, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.8843806104129264, |
| "grad_norm": 0.3656427562236786, |
| "learning_rate": 8.888687771062999e-06, |
| "loss": 0.4275, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.8854578096947935, |
| "grad_norm": 0.3995424211025238, |
| "learning_rate": 8.884743055276092e-06, |
| "loss": 0.4418, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.8865350089766607, |
| "grad_norm": 0.37365880608558655, |
| "learning_rate": 8.880792229414387e-06, |
| "loss": 0.4201, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.8876122082585278, |
| "grad_norm": 0.4127524793148041, |
| "learning_rate": 8.876835299691892e-06, |
| "loss": 0.4103, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.8886894075403949, |
| "grad_norm": 0.3745189309120178, |
| "learning_rate": 8.87287227233222e-06, |
| "loss": 0.4315, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.8897666068222622, |
| "grad_norm": 0.3703850507736206, |
| "learning_rate": 8.868903153568577e-06, |
| "loss": 0.4111, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.8908438061041293, |
| "grad_norm": 0.366468608379364, |
| "learning_rate": 8.864927949643744e-06, |
| "loss": 0.4236, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.8919210053859964, |
| "grad_norm": 0.4204203486442566, |
| "learning_rate": 8.860946666810078e-06, |
| "loss": 0.4395, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.8929982046678635, |
| "grad_norm": 0.40308547019958496, |
| "learning_rate": 8.856959311329495e-06, |
| "loss": 0.4254, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.8940754039497307, |
| "grad_norm": 0.38909968733787537, |
| "learning_rate": 8.852965889473464e-06, |
| "loss": 0.4308, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.8951526032315978, |
| "grad_norm": 0.4318833649158478, |
| "learning_rate": 8.848966407522992e-06, |
| "loss": 0.4354, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.896229802513465, |
| "grad_norm": 0.3676931858062744, |
| "learning_rate": 8.844960871768618e-06, |
| "loss": 0.45, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.8973070017953322, |
| "grad_norm": 0.39167362451553345, |
| "learning_rate": 8.84094928851041e-06, |
| "loss": 0.4468, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.8983842010771993, |
| "grad_norm": 0.3510020673274994, |
| "learning_rate": 8.836931664057935e-06, |
| "loss": 0.4294, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.8994614003590664, |
| "grad_norm": 0.3821624517440796, |
| "learning_rate": 8.832908004730274e-06, |
| "loss": 0.4422, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.9005385996409335, |
| "grad_norm": 0.38731488585472107, |
| "learning_rate": 8.828878316855994e-06, |
| "loss": 0.4066, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.9016157989228007, |
| "grad_norm": 0.34374868869781494, |
| "learning_rate": 8.824842606773142e-06, |
| "loss": 0.4421, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.9026929982046679, |
| "grad_norm": 0.41355276107788086, |
| "learning_rate": 8.82080088082924e-06, |
| "loss": 0.421, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.903770197486535, |
| "grad_norm": 0.3896749019622803, |
| "learning_rate": 8.816753145381276e-06, |
| "loss": 0.4304, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.9048473967684022, |
| "grad_norm": 0.39807990193367004, |
| "learning_rate": 8.812699406795683e-06, |
| "loss": 0.4462, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.9059245960502693, |
| "grad_norm": 0.34178224205970764, |
| "learning_rate": 8.808639671448334e-06, |
| "loss": 0.4185, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.9070017953321364, |
| "grad_norm": 0.3645724058151245, |
| "learning_rate": 8.804573945724544e-06, |
| "loss": 0.4167, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.9080789946140035, |
| "grad_norm": 0.35154834389686584, |
| "learning_rate": 8.800502236019045e-06, |
| "loss": 0.4142, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.9091561938958708, |
| "grad_norm": 0.43520885705947876, |
| "learning_rate": 8.796424548735975e-06, |
| "loss": 0.4486, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.9102333931777379, |
| "grad_norm": 0.38887906074523926, |
| "learning_rate": 8.792340890288884e-06, |
| "loss": 0.4419, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.911310592459605, |
| "grad_norm": 0.3473518192768097, |
| "learning_rate": 8.788251267100704e-06, |
| "loss": 0.4151, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.9123877917414722, |
| "grad_norm": 0.4001302421092987, |
| "learning_rate": 8.78415568560376e-06, |
| "loss": 0.4186, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.9134649910233393, |
| "grad_norm": 0.4523400664329529, |
| "learning_rate": 8.780054152239734e-06, |
| "loss": 0.4695, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.9145421903052064, |
| "grad_norm": 0.36641725897789, |
| "learning_rate": 8.775946673459682e-06, |
| "loss": 0.4095, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.9156193895870736, |
| "grad_norm": 0.42685094475746155, |
| "learning_rate": 8.771833255724004e-06, |
| "loss": 0.4286, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.9166965888689408, |
| "grad_norm": 0.46389880776405334, |
| "learning_rate": 8.767713905502444e-06, |
| "loss": 0.4668, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.9177737881508079, |
| "grad_norm": 0.3745497465133667, |
| "learning_rate": 8.763588629274077e-06, |
| "loss": 0.4483, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.918850987432675, |
| "grad_norm": 0.4221557378768921, |
| "learning_rate": 8.759457433527296e-06, |
| "loss": 0.4492, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.9199281867145422, |
| "grad_norm": 0.4107033908367157, |
| "learning_rate": 8.755320324759808e-06, |
| "loss": 0.4216, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.9210053859964094, |
| "grad_norm": 0.37441039085388184, |
| "learning_rate": 8.751177309478618e-06, |
| "loss": 0.4021, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.9220825852782765, |
| "grad_norm": 0.4322914481163025, |
| "learning_rate": 8.747028394200019e-06, |
| "loss": 0.4658, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.9231597845601436, |
| "grad_norm": 0.4002901315689087, |
| "learning_rate": 8.74287358544959e-06, |
| "loss": 0.4388, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.9242369838420108, |
| "grad_norm": 0.4236518442630768, |
| "learning_rate": 8.73871288976217e-06, |
| "loss": 0.4258, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.9253141831238779, |
| "grad_norm": 0.41266000270843506, |
| "learning_rate": 8.734546313681869e-06, |
| "loss": 0.4204, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.926391382405745, |
| "grad_norm": 0.4816506505012512, |
| "learning_rate": 8.730373863762036e-06, |
| "loss": 0.405, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.9274685816876123, |
| "grad_norm": 0.36234742403030396, |
| "learning_rate": 8.726195546565264e-06, |
| "loss": 0.4455, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.9285457809694794, |
| "grad_norm": 0.4364088177680969, |
| "learning_rate": 8.722011368663373e-06, |
| "loss": 0.4361, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.9296229802513465, |
| "grad_norm": 0.46902528405189514, |
| "learning_rate": 8.717821336637397e-06, |
| "loss": 0.4379, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.9307001795332136, |
| "grad_norm": 0.3549644649028778, |
| "learning_rate": 8.713625457077585e-06, |
| "loss": 0.4384, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.9317773788150808, |
| "grad_norm": 0.4156612157821655, |
| "learning_rate": 8.70942373658338e-06, |
| "loss": 0.4329, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.9328545780969479, |
| "grad_norm": 0.4271438717842102, |
| "learning_rate": 8.705216181763407e-06, |
| "loss": 0.4279, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.933931777378815, |
| "grad_norm": 0.3731890022754669, |
| "learning_rate": 8.701002799235475e-06, |
| "loss": 0.4318, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.9350089766606823, |
| "grad_norm": 0.4255085587501526, |
| "learning_rate": 8.696783595626555e-06, |
| "loss": 0.4375, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.9360861759425494, |
| "grad_norm": 0.36597201228141785, |
| "learning_rate": 8.692558577572773e-06, |
| "loss": 0.4118, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.9371633752244165, |
| "grad_norm": 0.42821747064590454, |
| "learning_rate": 8.688327751719403e-06, |
| "loss": 0.4368, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.9382405745062836, |
| "grad_norm": 0.47871869802474976, |
| "learning_rate": 8.684091124720852e-06, |
| "loss": 0.4417, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.9393177737881508, |
| "grad_norm": 0.3639247715473175, |
| "learning_rate": 8.679848703240652e-06, |
| "loss": 0.4333, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.940394973070018, |
| "grad_norm": 0.45801442861557007, |
| "learning_rate": 8.675600493951448e-06, |
| "loss": 0.4501, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.9414721723518851, |
| "grad_norm": 0.39988264441490173, |
| "learning_rate": 8.671346503534987e-06, |
| "loss": 0.4096, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.9425493716337523, |
| "grad_norm": 0.4402971565723419, |
| "learning_rate": 8.667086738682114e-06, |
| "loss": 0.4286, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.9436265709156194, |
| "grad_norm": 0.42627960443496704, |
| "learning_rate": 8.662821206092749e-06, |
| "loss": 0.4597, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.9447037701974865, |
| "grad_norm": 0.4394327402114868, |
| "learning_rate": 8.65854991247589e-06, |
| "loss": 0.4249, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.9457809694793536, |
| "grad_norm": 0.40527400374412537, |
| "learning_rate": 8.654272864549592e-06, |
| "loss": 0.4246, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.9468581687612209, |
| "grad_norm": 0.35895228385925293, |
| "learning_rate": 8.64999006904096e-06, |
| "loss": 0.4421, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.947935368043088, |
| "grad_norm": 0.4643931984901428, |
| "learning_rate": 8.645701532686146e-06, |
| "loss": 0.4203, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.9490125673249551, |
| "grad_norm": 0.40228623151779175, |
| "learning_rate": 8.641407262230325e-06, |
| "loss": 0.4416, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.9500897666068223, |
| "grad_norm": 0.3212444484233856, |
| "learning_rate": 8.63710726442769e-06, |
| "loss": 0.4188, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.9511669658886894, |
| "grad_norm": 0.41637882590293884, |
| "learning_rate": 8.632801546041447e-06, |
| "loss": 0.438, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.9522441651705565, |
| "grad_norm": 0.3686106503009796, |
| "learning_rate": 8.628490113843798e-06, |
| "loss": 0.4165, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.9533213644524237, |
| "grad_norm": 0.3435036540031433, |
| "learning_rate": 8.624172974615926e-06, |
| "loss": 0.4005, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.9543985637342909, |
| "grad_norm": 0.40801119804382324, |
| "learning_rate": 8.619850135148002e-06, |
| "loss": 0.44, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.955475763016158, |
| "grad_norm": 0.3914053738117218, |
| "learning_rate": 8.615521602239151e-06, |
| "loss": 0.43, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.9565529622980251, |
| "grad_norm": 0.3459240794181824, |
| "learning_rate": 8.611187382697459e-06, |
| "loss": 0.4501, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.9576301615798922, |
| "grad_norm": 0.4208565652370453, |
| "learning_rate": 8.606847483339957e-06, |
| "loss": 0.4423, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.9587073608617595, |
| "grad_norm": 0.402261883020401, |
| "learning_rate": 8.602501910992604e-06, |
| "loss": 0.4305, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.9597845601436266, |
| "grad_norm": 0.41275733709335327, |
| "learning_rate": 8.598150672490289e-06, |
| "loss": 0.4377, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.9608617594254937, |
| "grad_norm": 0.4121462106704712, |
| "learning_rate": 8.593793774676804e-06, |
| "loss": 0.4474, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.9619389587073609, |
| "grad_norm": 0.3706258237361908, |
| "learning_rate": 8.58943122440485e-06, |
| "loss": 0.4359, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.963016157989228, |
| "grad_norm": 0.3740246891975403, |
| "learning_rate": 8.585063028536015e-06, |
| "loss": 0.4366, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.9640933572710951, |
| "grad_norm": 0.37208443880081177, |
| "learning_rate": 8.58068919394077e-06, |
| "loss": 0.4444, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.9651705565529622, |
| "grad_norm": 0.39575937390327454, |
| "learning_rate": 8.576309727498446e-06, |
| "loss": 0.4347, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.9662477558348295, |
| "grad_norm": 0.4086984395980835, |
| "learning_rate": 8.571924636097245e-06, |
| "loss": 0.4563, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.9673249551166966, |
| "grad_norm": 0.34942734241485596, |
| "learning_rate": 8.567533926634203e-06, |
| "loss": 0.4363, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.9684021543985637, |
| "grad_norm": 0.38321834802627563, |
| "learning_rate": 8.563137606015201e-06, |
| "loss": 0.4313, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.9694793536804309, |
| "grad_norm": 0.4460221529006958, |
| "learning_rate": 8.558735681154944e-06, |
| "loss": 0.4113, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.970556552962298, |
| "grad_norm": 0.42695295810699463, |
| "learning_rate": 8.554328158976948e-06, |
| "loss": 0.423, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.9716337522441651, |
| "grad_norm": 0.3403206467628479, |
| "learning_rate": 8.549915046413537e-06, |
| "loss": 0.4425, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.9727109515260323, |
| "grad_norm": 0.34595513343811035, |
| "learning_rate": 8.545496350405825e-06, |
| "loss": 0.4319, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.9737881508078995, |
| "grad_norm": 0.43634214997291565, |
| "learning_rate": 8.54107207790371e-06, |
| "loss": 0.4289, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.9748653500897666, |
| "grad_norm": 0.3104211091995239, |
| "learning_rate": 8.536642235865857e-06, |
| "loss": 0.4062, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.9759425493716337, |
| "grad_norm": 0.37977078557014465, |
| "learning_rate": 8.532206831259695e-06, |
| "loss": 0.416, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.9770197486535009, |
| "grad_norm": 0.35528695583343506, |
| "learning_rate": 8.527765871061403e-06, |
| "loss": 0.4385, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.978096947935368, |
| "grad_norm": 0.3993757665157318, |
| "learning_rate": 8.523319362255894e-06, |
| "loss": 0.4515, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.9791741472172352, |
| "grad_norm": 0.36851876974105835, |
| "learning_rate": 8.518867311836808e-06, |
| "loss": 0.419, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.9802513464991023, |
| "grad_norm": 0.3733499348163605, |
| "learning_rate": 8.514409726806506e-06, |
| "loss": 0.4422, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.9813285457809695, |
| "grad_norm": 0.37746962904930115, |
| "learning_rate": 8.509946614176047e-06, |
| "loss": 0.4402, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.9824057450628366, |
| "grad_norm": 0.3641514480113983, |
| "learning_rate": 8.505477980965191e-06, |
| "loss": 0.4472, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.9834829443447037, |
| "grad_norm": 0.41282108426094055, |
| "learning_rate": 8.501003834202377e-06, |
| "loss": 0.4536, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.984560143626571, |
| "grad_norm": 0.38131409883499146, |
| "learning_rate": 8.49652418092472e-06, |
| "loss": 0.437, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.9856373429084381, |
| "grad_norm": 0.4030049741268158, |
| "learning_rate": 8.492039028177985e-06, |
| "loss": 0.4174, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.9867145421903052, |
| "grad_norm": 0.4333740770816803, |
| "learning_rate": 8.487548383016602e-06, |
| "loss": 0.4381, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.9877917414721723, |
| "grad_norm": 0.3468566834926605, |
| "learning_rate": 8.483052252503629e-06, |
| "loss": 0.4582, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.9888689407540395, |
| "grad_norm": 0.36902710795402527, |
| "learning_rate": 8.478550643710754e-06, |
| "loss": 0.4361, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.9899461400359066, |
| "grad_norm": 0.38729527592658997, |
| "learning_rate": 8.474043563718287e-06, |
| "loss": 0.4302, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.9910233393177738, |
| "grad_norm": 0.38706785440444946, |
| "learning_rate": 8.469531019615132e-06, |
| "loss": 0.4726, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.992100538599641, |
| "grad_norm": 0.3167583644390106, |
| "learning_rate": 8.465013018498796e-06, |
| "loss": 0.4359, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.9931777378815081, |
| "grad_norm": 0.360503226518631, |
| "learning_rate": 8.460489567475367e-06, |
| "loss": 0.4346, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.9942549371633752, |
| "grad_norm": 0.42758846282958984, |
| "learning_rate": 8.455960673659507e-06, |
| "loss": 0.4714, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.9953321364452423, |
| "grad_norm": 0.3499755859375, |
| "learning_rate": 8.451426344174433e-06, |
| "loss": 0.4286, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.9964093357271095, |
| "grad_norm": 0.40906208753585815, |
| "learning_rate": 8.446886586151914e-06, |
| "loss": 0.4267, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.9974865350089767, |
| "grad_norm": 0.36410462856292725, |
| "learning_rate": 8.442341406732261e-06, |
| "loss": 0.4222, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.9985637342908438, |
| "grad_norm": 0.42686885595321655, |
| "learning_rate": 8.437790813064305e-06, |
| "loss": 0.4459, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.999640933572711, |
| "grad_norm": 0.36179864406585693, |
| "learning_rate": 8.433234812305402e-06, |
| "loss": 0.4501, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.000718132854578, |
| "grad_norm": 0.7051083445549011, |
| "learning_rate": 8.4286734116214e-06, |
| "loss": 0.6643, |
| "step": 929 |
| }, |
| { |
| "epoch": 1.0017953321364452, |
| "grad_norm": 0.39998215436935425, |
| "learning_rate": 8.424106618186653e-06, |
| "loss": 0.4096, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.0028725314183125, |
| "grad_norm": 0.39742282032966614, |
| "learning_rate": 8.419534439183987e-06, |
| "loss": 0.4254, |
| "step": 931 |
| }, |
| { |
| "epoch": 1.0039497307001795, |
| "grad_norm": 0.37354665994644165, |
| "learning_rate": 8.414956881804706e-06, |
| "loss": 0.3737, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.0050269299820467, |
| "grad_norm": 0.38051116466522217, |
| "learning_rate": 8.41037395324857e-06, |
| "loss": 0.3806, |
| "step": 933 |
| }, |
| { |
| "epoch": 1.006104129263914, |
| "grad_norm": 0.44809573888778687, |
| "learning_rate": 8.405785660723784e-06, |
| "loss": 0.448, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.007181328545781, |
| "grad_norm": 0.42937591671943665, |
| "learning_rate": 8.401192011446995e-06, |
| "loss": 0.3847, |
| "step": 935 |
| }, |
| { |
| "epoch": 1.0082585278276481, |
| "grad_norm": 0.45236900448799133, |
| "learning_rate": 8.396593012643272e-06, |
| "loss": 0.4186, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.0093357271095154, |
| "grad_norm": 0.4174693822860718, |
| "learning_rate": 8.391988671546099e-06, |
| "loss": 0.3773, |
| "step": 937 |
| }, |
| { |
| "epoch": 1.0104129263913824, |
| "grad_norm": 0.446155309677124, |
| "learning_rate": 8.387378995397363e-06, |
| "loss": 0.4318, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.0114901256732496, |
| "grad_norm": 0.3945539593696594, |
| "learning_rate": 8.382763991447344e-06, |
| "loss": 0.4022, |
| "step": 939 |
| }, |
| { |
| "epoch": 1.0125673249551166, |
| "grad_norm": 0.38236725330352783, |
| "learning_rate": 8.378143666954696e-06, |
| "loss": 0.3545, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.0136445242369838, |
| "grad_norm": 0.49245333671569824, |
| "learning_rate": 8.373518029186448e-06, |
| "loss": 0.4435, |
| "step": 941 |
| }, |
| { |
| "epoch": 1.014721723518851, |
| "grad_norm": 0.4444209039211273, |
| "learning_rate": 8.368887085417979e-06, |
| "loss": 0.4233, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.015798922800718, |
| "grad_norm": 0.38781559467315674, |
| "learning_rate": 8.364250842933019e-06, |
| "loss": 0.3919, |
| "step": 943 |
| }, |
| { |
| "epoch": 1.0168761220825853, |
| "grad_norm": 0.47153082489967346, |
| "learning_rate": 8.359609309023632e-06, |
| "loss": 0.4201, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.0179533213644525, |
| "grad_norm": 0.39679715037345886, |
| "learning_rate": 8.354962490990202e-06, |
| "loss": 0.3837, |
| "step": 945 |
| }, |
| { |
| "epoch": 1.0190305206463195, |
| "grad_norm": 0.440632164478302, |
| "learning_rate": 8.350310396141424e-06, |
| "loss": 0.4389, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.0201077199281867, |
| "grad_norm": 0.3632691204547882, |
| "learning_rate": 8.345653031794292e-06, |
| "loss": 0.3517, |
| "step": 947 |
| }, |
| { |
| "epoch": 1.021184919210054, |
| "grad_norm": 0.49212685227394104, |
| "learning_rate": 8.340990405274092e-06, |
| "loss": 0.4624, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.022262118491921, |
| "grad_norm": 0.3657776713371277, |
| "learning_rate": 8.336322523914385e-06, |
| "loss": 0.4006, |
| "step": 949 |
| }, |
| { |
| "epoch": 1.0233393177737882, |
| "grad_norm": 0.47523999214172363, |
| "learning_rate": 8.331649395056996e-06, |
| "loss": 0.434, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.0244165170556554, |
| "grad_norm": 0.3606716990470886, |
| "learning_rate": 8.326971026052e-06, |
| "loss": 0.3835, |
| "step": 951 |
| }, |
| { |
| "epoch": 1.0254937163375224, |
| "grad_norm": 0.4083137810230255, |
| "learning_rate": 8.32228742425772e-06, |
| "loss": 0.4629, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.0265709156193896, |
| "grad_norm": 0.35142168402671814, |
| "learning_rate": 8.317598597040706e-06, |
| "loss": 0.3599, |
| "step": 953 |
| }, |
| { |
| "epoch": 1.0276481149012566, |
| "grad_norm": 0.4037002623081207, |
| "learning_rate": 8.312904551775731e-06, |
| "loss": 0.4073, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.0287253141831239, |
| "grad_norm": 0.39003700017929077, |
| "learning_rate": 8.308205295845769e-06, |
| "loss": 0.4228, |
| "step": 955 |
| }, |
| { |
| "epoch": 1.029802513464991, |
| "grad_norm": 0.41409826278686523, |
| "learning_rate": 8.303500836641992e-06, |
| "loss": 0.4328, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.030879712746858, |
| "grad_norm": 0.3729735314846039, |
| "learning_rate": 8.298791181563755e-06, |
| "loss": 0.4083, |
| "step": 957 |
| }, |
| { |
| "epoch": 1.0319569120287253, |
| "grad_norm": 0.39012017846107483, |
| "learning_rate": 8.29407633801859e-06, |
| "loss": 0.383, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.0330341113105925, |
| "grad_norm": 0.4182110130786896, |
| "learning_rate": 8.289356313422182e-06, |
| "loss": 0.403, |
| "step": 959 |
| }, |
| { |
| "epoch": 1.0341113105924595, |
| "grad_norm": 0.4065341651439667, |
| "learning_rate": 8.284631115198371e-06, |
| "loss": 0.4432, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.0351885098743268, |
| "grad_norm": 0.40570753812789917, |
| "learning_rate": 8.279900750779137e-06, |
| "loss": 0.3694, |
| "step": 961 |
| }, |
| { |
| "epoch": 1.036265709156194, |
| "grad_norm": 0.42988771200180054, |
| "learning_rate": 8.275165227604574e-06, |
| "loss": 0.4224, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.037342908438061, |
| "grad_norm": 0.35634714365005493, |
| "learning_rate": 8.2704245531229e-06, |
| "loss": 0.3693, |
| "step": 963 |
| }, |
| { |
| "epoch": 1.0384201077199282, |
| "grad_norm": 0.4628054201602936, |
| "learning_rate": 8.26567873479043e-06, |
| "loss": 0.4245, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.0394973070017954, |
| "grad_norm": 0.39119595289230347, |
| "learning_rate": 8.260927780071572e-06, |
| "loss": 0.3917, |
| "step": 965 |
| }, |
| { |
| "epoch": 1.0405745062836624, |
| "grad_norm": 0.39787107706069946, |
| "learning_rate": 8.256171696438817e-06, |
| "loss": 0.4077, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.0416517055655297, |
| "grad_norm": 0.38707029819488525, |
| "learning_rate": 8.251410491372711e-06, |
| "loss": 0.39, |
| "step": 967 |
| }, |
| { |
| "epoch": 1.0427289048473967, |
| "grad_norm": 0.42183634638786316, |
| "learning_rate": 8.246644172361866e-06, |
| "loss": 0.4129, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.043806104129264, |
| "grad_norm": 0.39003661274909973, |
| "learning_rate": 8.241872746902934e-06, |
| "loss": 0.3921, |
| "step": 969 |
| }, |
| { |
| "epoch": 1.0448833034111311, |
| "grad_norm": 0.3627468943595886, |
| "learning_rate": 8.237096222500597e-06, |
| "loss": 0.418, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.0459605026929981, |
| "grad_norm": 0.40649735927581787, |
| "learning_rate": 8.232314606667559e-06, |
| "loss": 0.4213, |
| "step": 971 |
| }, |
| { |
| "epoch": 1.0470377019748653, |
| "grad_norm": 0.44418761134147644, |
| "learning_rate": 8.22752790692453e-06, |
| "loss": 0.4184, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.0481149012567326, |
| "grad_norm": 0.3849025070667267, |
| "learning_rate": 8.222736130800219e-06, |
| "loss": 0.42, |
| "step": 973 |
| }, |
| { |
| "epoch": 1.0491921005385996, |
| "grad_norm": 0.36222249269485474, |
| "learning_rate": 8.217939285831315e-06, |
| "loss": 0.3772, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.0502692998204668, |
| "grad_norm": 0.4193369448184967, |
| "learning_rate": 8.213137379562486e-06, |
| "loss": 0.4088, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.051346499102334, |
| "grad_norm": 0.3462792634963989, |
| "learning_rate": 8.208330419546353e-06, |
| "loss": 0.3855, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.052423698384201, |
| "grad_norm": 0.4043339788913727, |
| "learning_rate": 8.203518413343492e-06, |
| "loss": 0.4117, |
| "step": 977 |
| }, |
| { |
| "epoch": 1.0535008976660682, |
| "grad_norm": 0.36096611618995667, |
| "learning_rate": 8.198701368522413e-06, |
| "loss": 0.4169, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.0545780969479355, |
| "grad_norm": 0.34872111678123474, |
| "learning_rate": 8.19387929265955e-06, |
| "loss": 0.4248, |
| "step": 979 |
| }, |
| { |
| "epoch": 1.0556552962298025, |
| "grad_norm": 0.34945443272590637, |
| "learning_rate": 8.189052193339251e-06, |
| "loss": 0.4002, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.0567324955116697, |
| "grad_norm": 0.3416961133480072, |
| "learning_rate": 8.184220078153768e-06, |
| "loss": 0.3687, |
| "step": 981 |
| }, |
| { |
| "epoch": 1.0578096947935367, |
| "grad_norm": 0.3593612611293793, |
| "learning_rate": 8.179382954703236e-06, |
| "loss": 0.4435, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.058886894075404, |
| "grad_norm": 0.40099433064460754, |
| "learning_rate": 8.174540830595674e-06, |
| "loss": 0.4039, |
| "step": 983 |
| }, |
| { |
| "epoch": 1.0599640933572712, |
| "grad_norm": 0.34969210624694824, |
| "learning_rate": 8.16969371344696e-06, |
| "loss": 0.4188, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.0610412926391382, |
| "grad_norm": 0.40064018964767456, |
| "learning_rate": 8.16484161088083e-06, |
| "loss": 0.427, |
| "step": 985 |
| }, |
| { |
| "epoch": 1.0621184919210054, |
| "grad_norm": 0.3965623080730438, |
| "learning_rate": 8.159984530528859e-06, |
| "loss": 0.3961, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.0631956912028726, |
| "grad_norm": 0.37393060326576233, |
| "learning_rate": 8.155122480030454e-06, |
| "loss": 0.4067, |
| "step": 987 |
| }, |
| { |
| "epoch": 1.0642728904847396, |
| "grad_norm": 0.39971593022346497, |
| "learning_rate": 8.150255467032831e-06, |
| "loss": 0.4462, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.0653500897666068, |
| "grad_norm": 0.3918885588645935, |
| "learning_rate": 8.14538349919102e-06, |
| "loss": 0.4158, |
| "step": 989 |
| }, |
| { |
| "epoch": 1.066427289048474, |
| "grad_norm": 0.39894959330558777, |
| "learning_rate": 8.140506584167845e-06, |
| "loss": 0.4069, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.067504488330341, |
| "grad_norm": 0.34239354729652405, |
| "learning_rate": 8.135624729633902e-06, |
| "loss": 0.4134, |
| "step": 991 |
| }, |
| { |
| "epoch": 1.0685816876122083, |
| "grad_norm": 0.4456005394458771, |
| "learning_rate": 8.130737943267563e-06, |
| "loss": 0.4018, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.0696588868940755, |
| "grad_norm": 0.3645940124988556, |
| "learning_rate": 8.12584623275496e-06, |
| "loss": 0.4127, |
| "step": 993 |
| }, |
| { |
| "epoch": 1.0707360861759425, |
| "grad_norm": 0.5911320447921753, |
| "learning_rate": 8.12094960578996e-06, |
| "loss": 0.3952, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.0718132854578097, |
| "grad_norm": 0.3891417384147644, |
| "learning_rate": 8.11604807007417e-06, |
| "loss": 0.4177, |
| "step": 995 |
| }, |
| { |
| "epoch": 1.0728904847396767, |
| "grad_norm": 0.3503113090991974, |
| "learning_rate": 8.111141633316914e-06, |
| "loss": 0.3882, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.073967684021544, |
| "grad_norm": 0.3674125373363495, |
| "learning_rate": 8.10623030323523e-06, |
| "loss": 0.4164, |
| "step": 997 |
| }, |
| { |
| "epoch": 1.0750448833034112, |
| "grad_norm": 0.36334046721458435, |
| "learning_rate": 8.101314087553845e-06, |
| "loss": 0.3952, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.0761220825852782, |
| "grad_norm": 0.35265591740608215, |
| "learning_rate": 8.096392994005177e-06, |
| "loss": 0.4227, |
| "step": 999 |
| }, |
| { |
| "epoch": 1.0771992818671454, |
| "grad_norm": 0.3510865569114685, |
| "learning_rate": 8.091467030329309e-06, |
| "loss": 0.3634, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.0782764811490126, |
| "grad_norm": 0.38359448313713074, |
| "learning_rate": 8.086536204273994e-06, |
| "loss": 0.4036, |
| "step": 1001 |
| }, |
| { |
| "epoch": 1.0793536804308796, |
| "grad_norm": 0.3547118008136749, |
| "learning_rate": 8.081600523594622e-06, |
| "loss": 0.4408, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.0804308797127469, |
| "grad_norm": 0.3304808437824249, |
| "learning_rate": 8.076659996054226e-06, |
| "loss": 0.3613, |
| "step": 1003 |
| }, |
| { |
| "epoch": 1.081508078994614, |
| "grad_norm": 0.34392449259757996, |
| "learning_rate": 8.071714629423459e-06, |
| "loss": 0.4102, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.082585278276481, |
| "grad_norm": 0.3510746657848358, |
| "learning_rate": 8.066764431480584e-06, |
| "loss": 0.4384, |
| "step": 1005 |
| }, |
| { |
| "epoch": 1.0836624775583483, |
| "grad_norm": 0.35708552598953247, |
| "learning_rate": 8.061809410011466e-06, |
| "loss": 0.4413, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.0847396768402153, |
| "grad_norm": 0.35590386390686035, |
| "learning_rate": 8.056849572809555e-06, |
| "loss": 0.4162, |
| "step": 1007 |
| }, |
| { |
| "epoch": 1.0858168761220826, |
| "grad_norm": 0.3295712172985077, |
| "learning_rate": 8.051884927675879e-06, |
| "loss": 0.4208, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.0868940754039498, |
| "grad_norm": 0.3516232967376709, |
| "learning_rate": 8.046915482419018e-06, |
| "loss": 0.3652, |
| "step": 1009 |
| }, |
| { |
| "epoch": 1.0879712746858168, |
| "grad_norm": 0.3285958170890808, |
| "learning_rate": 8.041941244855113e-06, |
| "loss": 0.3883, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.089048473967684, |
| "grad_norm": 0.35579437017440796, |
| "learning_rate": 8.036962222807838e-06, |
| "loss": 0.4285, |
| "step": 1011 |
| }, |
| { |
| "epoch": 1.0901256732495512, |
| "grad_norm": 0.4027876853942871, |
| "learning_rate": 8.031978424108392e-06, |
| "loss": 0.4357, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.0912028725314182, |
| "grad_norm": 0.3562896251678467, |
| "learning_rate": 8.026989856595486e-06, |
| "loss": 0.437, |
| "step": 1013 |
| }, |
| { |
| "epoch": 1.0922800718132855, |
| "grad_norm": 0.373172402381897, |
| "learning_rate": 8.021996528115335e-06, |
| "loss": 0.3692, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.0933572710951527, |
| "grad_norm": 0.3635936379432678, |
| "learning_rate": 8.016998446521637e-06, |
| "loss": 0.4235, |
| "step": 1015 |
| }, |
| { |
| "epoch": 1.0944344703770197, |
| "grad_norm": 0.3417677581310272, |
| "learning_rate": 8.011995619675572e-06, |
| "loss": 0.4168, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.095511669658887, |
| "grad_norm": 0.46394291520118713, |
| "learning_rate": 8.00698805544578e-06, |
| "loss": 0.4154, |
| "step": 1017 |
| }, |
| { |
| "epoch": 1.0965888689407541, |
| "grad_norm": 0.3501376807689667, |
| "learning_rate": 8.001975761708348e-06, |
| "loss": 0.4171, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.0976660682226211, |
| "grad_norm": 0.38096755743026733, |
| "learning_rate": 7.996958746346812e-06, |
| "loss": 0.4483, |
| "step": 1019 |
| }, |
| { |
| "epoch": 1.0987432675044884, |
| "grad_norm": 0.37238049507141113, |
| "learning_rate": 7.991937017252127e-06, |
| "loss": 0.4116, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.0998204667863556, |
| "grad_norm": 0.36288678646087646, |
| "learning_rate": 7.986910582322663e-06, |
| "loss": 0.39, |
| "step": 1021 |
| }, |
| { |
| "epoch": 1.1008976660682226, |
| "grad_norm": 0.3652805685997009, |
| "learning_rate": 7.981879449464191e-06, |
| "loss": 0.3878, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.1019748653500898, |
| "grad_norm": 0.40952664613723755, |
| "learning_rate": 7.976843626589876e-06, |
| "loss": 0.4132, |
| "step": 1023 |
| }, |
| { |
| "epoch": 1.1030520646319568, |
| "grad_norm": 0.3170050382614136, |
| "learning_rate": 7.971803121620252e-06, |
| "loss": 0.3741, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.104129263913824, |
| "grad_norm": 0.4450155794620514, |
| "learning_rate": 7.966757942483224e-06, |
| "loss": 0.4434, |
| "step": 1025 |
| }, |
| { |
| "epoch": 1.1052064631956913, |
| "grad_norm": 0.4054987132549286, |
| "learning_rate": 7.96170809711404e-06, |
| "loss": 0.3978, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.1062836624775583, |
| "grad_norm": 0.4028407633304596, |
| "learning_rate": 7.9566535934553e-06, |
| "loss": 0.3979, |
| "step": 1027 |
| }, |
| { |
| "epoch": 1.1073608617594255, |
| "grad_norm": 0.3763718605041504, |
| "learning_rate": 7.951594439456921e-06, |
| "loss": 0.4084, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.1084380610412927, |
| "grad_norm": 0.398004412651062, |
| "learning_rate": 7.946530643076138e-06, |
| "loss": 0.4045, |
| "step": 1029 |
| }, |
| { |
| "epoch": 1.1095152603231597, |
| "grad_norm": 0.3450349271297455, |
| "learning_rate": 7.941462212277484e-06, |
| "loss": 0.3927, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.110592459605027, |
| "grad_norm": 0.3936854302883148, |
| "learning_rate": 7.936389155032785e-06, |
| "loss": 0.4304, |
| "step": 1031 |
| }, |
| { |
| "epoch": 1.1116696588868942, |
| "grad_norm": 0.3582758605480194, |
| "learning_rate": 7.931311479321144e-06, |
| "loss": 0.3867, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.1127468581687612, |
| "grad_norm": 0.38478732109069824, |
| "learning_rate": 7.926229193128924e-06, |
| "loss": 0.4265, |
| "step": 1033 |
| }, |
| { |
| "epoch": 1.1138240574506284, |
| "grad_norm": 0.32597222924232483, |
| "learning_rate": 7.921142304449744e-06, |
| "loss": 0.3484, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.1149012567324954, |
| "grad_norm": 0.3714587092399597, |
| "learning_rate": 7.916050821284462e-06, |
| "loss": 0.43, |
| "step": 1035 |
| }, |
| { |
| "epoch": 1.1159784560143626, |
| "grad_norm": 0.31232473254203796, |
| "learning_rate": 7.910954751641157e-06, |
| "loss": 0.4043, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.1170556552962299, |
| "grad_norm": 0.32094812393188477, |
| "learning_rate": 7.905854103535128e-06, |
| "loss": 0.3947, |
| "step": 1037 |
| }, |
| { |
| "epoch": 1.1181328545780969, |
| "grad_norm": 0.3706560730934143, |
| "learning_rate": 7.90074888498887e-06, |
| "loss": 0.4713, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.119210053859964, |
| "grad_norm": 0.3783697187900543, |
| "learning_rate": 7.895639104032071e-06, |
| "loss": 0.3714, |
| "step": 1039 |
| }, |
| { |
| "epoch": 1.1202872531418313, |
| "grad_norm": 0.3523646295070648, |
| "learning_rate": 7.890524768701592e-06, |
| "loss": 0.428, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.1213644524236983, |
| "grad_norm": 0.37586677074432373, |
| "learning_rate": 7.88540588704146e-06, |
| "loss": 0.4014, |
| "step": 1041 |
| }, |
| { |
| "epoch": 1.1224416517055655, |
| "grad_norm": 0.36774709820747375, |
| "learning_rate": 7.880282467102847e-06, |
| "loss": 0.4297, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.1235188509874328, |
| "grad_norm": 0.3239872455596924, |
| "learning_rate": 7.87515451694407e-06, |
| "loss": 0.3849, |
| "step": 1043 |
| }, |
| { |
| "epoch": 1.1245960502692998, |
| "grad_norm": 0.41246679425239563, |
| "learning_rate": 7.870022044630569e-06, |
| "loss": 0.4338, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.125673249551167, |
| "grad_norm": 0.2990739941596985, |
| "learning_rate": 7.864885058234895e-06, |
| "loss": 0.3491, |
| "step": 1045 |
| }, |
| { |
| "epoch": 1.1267504488330342, |
| "grad_norm": 0.3962797224521637, |
| "learning_rate": 7.859743565836697e-06, |
| "loss": 0.4388, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.1278276481149012, |
| "grad_norm": 0.3545224964618683, |
| "learning_rate": 7.854597575522717e-06, |
| "loss": 0.4245, |
| "step": 1047 |
| }, |
| { |
| "epoch": 1.1289048473967684, |
| "grad_norm": 0.3659273087978363, |
| "learning_rate": 7.849447095386769e-06, |
| "loss": 0.3882, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.1299820466786357, |
| "grad_norm": 0.32875725626945496, |
| "learning_rate": 7.844292133529727e-06, |
| "loss": 0.4093, |
| "step": 1049 |
| }, |
| { |
| "epoch": 1.1310592459605027, |
| "grad_norm": 0.3866625130176544, |
| "learning_rate": 7.839132698059515e-06, |
| "loss": 0.4166, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.13213644524237, |
| "grad_norm": 0.38909971714019775, |
| "learning_rate": 7.833968797091094e-06, |
| "loss": 0.3943, |
| "step": 1051 |
| }, |
| { |
| "epoch": 1.133213644524237, |
| "grad_norm": 0.37287622690200806, |
| "learning_rate": 7.828800438746448e-06, |
| "loss": 0.4399, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.1342908438061041, |
| "grad_norm": 0.32692670822143555, |
| "learning_rate": 7.82362763115457e-06, |
| "loss": 0.3592, |
| "step": 1053 |
| }, |
| { |
| "epoch": 1.1353680430879713, |
| "grad_norm": 0.42668434977531433, |
| "learning_rate": 7.818450382451457e-06, |
| "loss": 0.4426, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.1364452423698383, |
| "grad_norm": 0.3298830986022949, |
| "learning_rate": 7.813268700780084e-06, |
| "loss": 0.3771, |
| "step": 1055 |
| }, |
| { |
| "epoch": 1.1375224416517056, |
| "grad_norm": 0.3884989321231842, |
| "learning_rate": 7.808082594290403e-06, |
| "loss": 0.43, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.1385996409335728, |
| "grad_norm": 0.3532108962535858, |
| "learning_rate": 7.80289207113932e-06, |
| "loss": 0.4357, |
| "step": 1057 |
| }, |
| { |
| "epoch": 1.1396768402154398, |
| "grad_norm": 0.3356606662273407, |
| "learning_rate": 7.797697139490694e-06, |
| "loss": 0.3986, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.140754039497307, |
| "grad_norm": 0.29802894592285156, |
| "learning_rate": 7.792497807515317e-06, |
| "loss": 0.3898, |
| "step": 1059 |
| }, |
| { |
| "epoch": 1.141831238779174, |
| "grad_norm": 0.3978642225265503, |
| "learning_rate": 7.787294083390898e-06, |
| "loss": 0.4507, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.1429084380610413, |
| "grad_norm": 0.34184718132019043, |
| "learning_rate": 7.782085975302055e-06, |
| "loss": 0.3965, |
| "step": 1061 |
| }, |
| { |
| "epoch": 1.1439856373429085, |
| "grad_norm": 0.3312958776950836, |
| "learning_rate": 7.776873491440308e-06, |
| "loss": 0.3956, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.1450628366247755, |
| "grad_norm": 0.36556777358055115, |
| "learning_rate": 7.77165664000405e-06, |
| "loss": 0.4046, |
| "step": 1063 |
| }, |
| { |
| "epoch": 1.1461400359066427, |
| "grad_norm": 0.39094069600105286, |
| "learning_rate": 7.766435429198547e-06, |
| "loss": 0.4484, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.14721723518851, |
| "grad_norm": 0.4019407629966736, |
| "learning_rate": 7.761209867235924e-06, |
| "loss": 0.4012, |
| "step": 1065 |
| }, |
| { |
| "epoch": 1.148294434470377, |
| "grad_norm": 0.32983553409576416, |
| "learning_rate": 7.755979962335149e-06, |
| "loss": 0.3952, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.1493716337522442, |
| "grad_norm": 0.3956191837787628, |
| "learning_rate": 7.750745722722017e-06, |
| "loss": 0.4241, |
| "step": 1067 |
| }, |
| { |
| "epoch": 1.1504488330341114, |
| "grad_norm": 0.4105871915817261, |
| "learning_rate": 7.745507156629145e-06, |
| "loss": 0.4112, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.1515260323159784, |
| "grad_norm": 0.3463190197944641, |
| "learning_rate": 7.740264272295954e-06, |
| "loss": 0.3904, |
| "step": 1069 |
| }, |
| { |
| "epoch": 1.1526032315978456, |
| "grad_norm": 0.4070665240287781, |
| "learning_rate": 7.735017077968652e-06, |
| "loss": 0.3769, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.1536804308797128, |
| "grad_norm": 0.373894065618515, |
| "learning_rate": 7.729765581900236e-06, |
| "loss": 0.4304, |
| "step": 1071 |
| }, |
| { |
| "epoch": 1.1547576301615798, |
| "grad_norm": 0.3551969528198242, |
| "learning_rate": 7.72450979235046e-06, |
| "loss": 0.4078, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.155834829443447, |
| "grad_norm": 0.3633342683315277, |
| "learning_rate": 7.719249717585833e-06, |
| "loss": 0.4228, |
| "step": 1073 |
| }, |
| { |
| "epoch": 1.1569120287253143, |
| "grad_norm": 0.3957405388355255, |
| "learning_rate": 7.713985365879607e-06, |
| "loss": 0.3653, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.1579892280071813, |
| "grad_norm": 0.4325840175151825, |
| "learning_rate": 7.708716745511757e-06, |
| "loss": 0.4429, |
| "step": 1075 |
| }, |
| { |
| "epoch": 1.1590664272890485, |
| "grad_norm": 0.34411635994911194, |
| "learning_rate": 7.703443864768976e-06, |
| "loss": 0.4174, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.1601436265709157, |
| "grad_norm": 0.36224544048309326, |
| "learning_rate": 7.698166731944653e-06, |
| "loss": 0.4144, |
| "step": 1077 |
| }, |
| { |
| "epoch": 1.1612208258527827, |
| "grad_norm": 0.3776390850543976, |
| "learning_rate": 7.69288535533887e-06, |
| "loss": 0.3908, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.16229802513465, |
| "grad_norm": 0.3394273519515991, |
| "learning_rate": 7.68759974325838e-06, |
| "loss": 0.3893, |
| "step": 1079 |
| }, |
| { |
| "epoch": 1.163375224416517, |
| "grad_norm": 0.3884154260158539, |
| "learning_rate": 7.6823099040166e-06, |
| "loss": 0.4181, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.1644524236983842, |
| "grad_norm": 0.42072558403015137, |
| "learning_rate": 7.677015845933596e-06, |
| "loss": 0.4164, |
| "step": 1081 |
| }, |
| { |
| "epoch": 1.1655296229802514, |
| "grad_norm": 0.39499741792678833, |
| "learning_rate": 7.671717577336062e-06, |
| "loss": 0.4016, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.1666068222621184, |
| "grad_norm": 0.3707713782787323, |
| "learning_rate": 7.666415106557329e-06, |
| "loss": 0.3994, |
| "step": 1083 |
| }, |
| { |
| "epoch": 1.1676840215439857, |
| "grad_norm": 0.4331463873386383, |
| "learning_rate": 7.661108441937321e-06, |
| "loss": 0.3845, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.1687612208258529, |
| "grad_norm": 0.37316879630088806, |
| "learning_rate": 7.655797591822573e-06, |
| "loss": 0.428, |
| "step": 1085 |
| }, |
| { |
| "epoch": 1.1698384201077199, |
| "grad_norm": 0.38761723041534424, |
| "learning_rate": 7.650482564566192e-06, |
| "loss": 0.3879, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.170915619389587, |
| "grad_norm": 0.36918097734451294, |
| "learning_rate": 7.645163368527863e-06, |
| "loss": 0.4015, |
| "step": 1087 |
| }, |
| { |
| "epoch": 1.171992818671454, |
| "grad_norm": 0.4075348377227783, |
| "learning_rate": 7.63984001207382e-06, |
| "loss": 0.4451, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.1730700179533213, |
| "grad_norm": 0.35503262281417847, |
| "learning_rate": 7.63451250357685e-06, |
| "loss": 0.4096, |
| "step": 1089 |
| }, |
| { |
| "epoch": 1.1741472172351886, |
| "grad_norm": 0.3352997303009033, |
| "learning_rate": 7.62918085141626e-06, |
| "loss": 0.3787, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.1752244165170556, |
| "grad_norm": 0.4400590658187866, |
| "learning_rate": 7.623845063977883e-06, |
| "loss": 0.4086, |
| "step": 1091 |
| }, |
| { |
| "epoch": 1.1763016157989228, |
| "grad_norm": 0.40944555401802063, |
| "learning_rate": 7.618505149654052e-06, |
| "loss": 0.4123, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.17737881508079, |
| "grad_norm": 0.3341138958930969, |
| "learning_rate": 7.613161116843592e-06, |
| "loss": 0.3994, |
| "step": 1093 |
| }, |
| { |
| "epoch": 1.178456014362657, |
| "grad_norm": 0.40557679533958435, |
| "learning_rate": 7.607812973951802e-06, |
| "loss": 0.4149, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.1795332136445242, |
| "grad_norm": 0.36699768900871277, |
| "learning_rate": 7.602460729390455e-06, |
| "loss": 0.4145, |
| "step": 1095 |
| }, |
| { |
| "epoch": 1.1806104129263915, |
| "grad_norm": 0.3463127315044403, |
| "learning_rate": 7.597104391577765e-06, |
| "loss": 0.3929, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.1816876122082585, |
| "grad_norm": 0.3644564151763916, |
| "learning_rate": 7.59174396893839e-06, |
| "loss": 0.3951, |
| "step": 1097 |
| }, |
| { |
| "epoch": 1.1827648114901257, |
| "grad_norm": 0.3460002541542053, |
| "learning_rate": 7.586379469903409e-06, |
| "loss": 0.3661, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.183842010771993, |
| "grad_norm": 0.39767634868621826, |
| "learning_rate": 7.581010902910316e-06, |
| "loss": 0.441, |
| "step": 1099 |
| }, |
| { |
| "epoch": 1.18491921005386, |
| "grad_norm": 0.3830143213272095, |
| "learning_rate": 7.575638276403003e-06, |
| "loss": 0.4086, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.1859964093357271, |
| "grad_norm": 0.3573359549045563, |
| "learning_rate": 7.570261598831743e-06, |
| "loss": 0.3944, |
| "step": 1101 |
| }, |
| { |
| "epoch": 1.1870736086175944, |
| "grad_norm": 0.35708191990852356, |
| "learning_rate": 7.564880878653183e-06, |
| "loss": 0.3755, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.1881508078994614, |
| "grad_norm": 0.4171488583087921, |
| "learning_rate": 7.559496124330334e-06, |
| "loss": 0.4145, |
| "step": 1103 |
| }, |
| { |
| "epoch": 1.1892280071813286, |
| "grad_norm": 0.3435444235801697, |
| "learning_rate": 7.55410734433254e-06, |
| "loss": 0.4272, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.1903052064631956, |
| "grad_norm": 0.37534239888191223, |
| "learning_rate": 7.548714547135487e-06, |
| "loss": 0.392, |
| "step": 1105 |
| }, |
| { |
| "epoch": 1.1913824057450628, |
| "grad_norm": 0.4662499725818634, |
| "learning_rate": 7.5433177412211765e-06, |
| "loss": 0.4038, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.19245960502693, |
| "grad_norm": 0.3813924789428711, |
| "learning_rate": 7.537916935077914e-06, |
| "loss": 0.4284, |
| "step": 1107 |
| }, |
| { |
| "epoch": 1.193536804308797, |
| "grad_norm": 0.3959798812866211, |
| "learning_rate": 7.532512137200296e-06, |
| "loss": 0.4041, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.1946140035906643, |
| "grad_norm": 0.3660750389099121, |
| "learning_rate": 7.5271033560892e-06, |
| "loss": 0.3628, |
| "step": 1109 |
| }, |
| { |
| "epoch": 1.1956912028725315, |
| "grad_norm": 0.412552148103714, |
| "learning_rate": 7.521690600251765e-06, |
| "loss": 0.4299, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.1967684021543985, |
| "grad_norm": 0.37209901213645935, |
| "learning_rate": 7.516273878201387e-06, |
| "loss": 0.4278, |
| "step": 1111 |
| }, |
| { |
| "epoch": 1.1978456014362657, |
| "grad_norm": 0.3807721734046936, |
| "learning_rate": 7.5108531984576945e-06, |
| "loss": 0.4098, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.1989228007181327, |
| "grad_norm": 0.3844991624355316, |
| "learning_rate": 7.505428569546542e-06, |
| "loss": 0.3867, |
| "step": 1113 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.3307826519012451, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.3747, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.2010771992818672, |
| "grad_norm": 0.37735849618911743, |
| "learning_rate": 7.494567498356332e-06, |
| "loss": 0.3997, |
| "step": 1115 |
| }, |
| { |
| "epoch": 1.2021543985637342, |
| "grad_norm": 0.4011041522026062, |
| "learning_rate": 7.489131073159988e-06, |
| "loss": 0.4328, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.2032315978456014, |
| "grad_norm": 0.39078488945961, |
| "learning_rate": 7.483690732961587e-06, |
| "loss": 0.4079, |
| "step": 1117 |
| }, |
| { |
| "epoch": 1.2043087971274686, |
| "grad_norm": 0.35534340143203735, |
| "learning_rate": 7.4782464863179085e-06, |
| "loss": 0.3727, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.2053859964093356, |
| "grad_norm": 0.3854304552078247, |
| "learning_rate": 7.472798341791877e-06, |
| "loss": 0.4314, |
| "step": 1119 |
| }, |
| { |
| "epoch": 1.2064631956912029, |
| "grad_norm": 0.4247446358203888, |
| "learning_rate": 7.467346307952544e-06, |
| "loss": 0.3992, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.20754039497307, |
| "grad_norm": 0.4052562415599823, |
| "learning_rate": 7.461890393375079e-06, |
| "loss": 0.3962, |
| "step": 1121 |
| }, |
| { |
| "epoch": 1.208617594254937, |
| "grad_norm": 0.38277021050453186, |
| "learning_rate": 7.456430606640757e-06, |
| "loss": 0.4331, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.2096947935368043, |
| "grad_norm": 0.4042188823223114, |
| "learning_rate": 7.450966956336946e-06, |
| "loss": 0.3912, |
| "step": 1123 |
| }, |
| { |
| "epoch": 1.2107719928186715, |
| "grad_norm": 0.3846050798892975, |
| "learning_rate": 7.445499451057083e-06, |
| "loss": 0.4148, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.2118491921005385, |
| "grad_norm": 0.3405422866344452, |
| "learning_rate": 7.4400280994006765e-06, |
| "loss": 0.4092, |
| "step": 1125 |
| }, |
| { |
| "epoch": 1.2129263913824058, |
| "grad_norm": 0.3947262763977051, |
| "learning_rate": 7.434552909973278e-06, |
| "loss": 0.4077, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.214003590664273, |
| "grad_norm": 0.4062022864818573, |
| "learning_rate": 7.429073891386479e-06, |
| "loss": 0.3901, |
| "step": 1127 |
| }, |
| { |
| "epoch": 1.21508078994614, |
| "grad_norm": 0.37780916690826416, |
| "learning_rate": 7.423591052257893e-06, |
| "loss": 0.4333, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.2161579892280072, |
| "grad_norm": 0.3507837951183319, |
| "learning_rate": 7.418104401211144e-06, |
| "loss": 0.4044, |
| "step": 1129 |
| }, |
| { |
| "epoch": 1.2172351885098744, |
| "grad_norm": 0.3761948049068451, |
| "learning_rate": 7.412613946875846e-06, |
| "loss": 0.4023, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.2183123877917414, |
| "grad_norm": 0.326219379901886, |
| "learning_rate": 7.407119697887603e-06, |
| "loss": 0.3822, |
| "step": 1131 |
| }, |
| { |
| "epoch": 1.2193895870736087, |
| "grad_norm": 0.42541274428367615, |
| "learning_rate": 7.4016216628879815e-06, |
| "loss": 0.4199, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.2204667863554757, |
| "grad_norm": 0.35826441645622253, |
| "learning_rate": 7.396119850524503e-06, |
| "loss": 0.4005, |
| "step": 1133 |
| }, |
| { |
| "epoch": 1.221543985637343, |
| "grad_norm": 0.2977360188961029, |
| "learning_rate": 7.390614269450633e-06, |
| "loss": 0.3382, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.2226211849192101, |
| "grad_norm": 0.39104917645454407, |
| "learning_rate": 7.385104928325766e-06, |
| "loss": 0.4011, |
| "step": 1135 |
| }, |
| { |
| "epoch": 1.2236983842010771, |
| "grad_norm": 0.3678983449935913, |
| "learning_rate": 7.379591835815204e-06, |
| "loss": 0.4436, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.2247755834829444, |
| "grad_norm": 0.3696475625038147, |
| "learning_rate": 7.374075000590155e-06, |
| "loss": 0.4438, |
| "step": 1137 |
| }, |
| { |
| "epoch": 1.2258527827648116, |
| "grad_norm": 0.34962400794029236, |
| "learning_rate": 7.36855443132771e-06, |
| "loss": 0.4115, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.2269299820466786, |
| "grad_norm": 0.37243911623954773, |
| "learning_rate": 7.363030136710837e-06, |
| "loss": 0.3826, |
| "step": 1139 |
| }, |
| { |
| "epoch": 1.2280071813285458, |
| "grad_norm": 0.34877726435661316, |
| "learning_rate": 7.357502125428359e-06, |
| "loss": 0.3973, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.2290843806104128, |
| "grad_norm": 0.37363243103027344, |
| "learning_rate": 7.351970406174951e-06, |
| "loss": 0.4348, |
| "step": 1141 |
| }, |
| { |
| "epoch": 1.23016157989228, |
| "grad_norm": 0.3667917251586914, |
| "learning_rate": 7.346434987651111e-06, |
| "loss": 0.3888, |
| "step": 1142 |
| }, |
| { |
| "epoch": 1.2312387791741473, |
| "grad_norm": 0.345958948135376, |
| "learning_rate": 7.3408958785631625e-06, |
| "loss": 0.3924, |
| "step": 1143 |
| }, |
| { |
| "epoch": 1.2323159784560143, |
| "grad_norm": 0.3950294852256775, |
| "learning_rate": 7.3353530876232315e-06, |
| "loss": 0.4324, |
| "step": 1144 |
| }, |
| { |
| "epoch": 1.2333931777378815, |
| "grad_norm": 0.35145139694213867, |
| "learning_rate": 7.329806623549236e-06, |
| "loss": 0.3679, |
| "step": 1145 |
| }, |
| { |
| "epoch": 1.2344703770197487, |
| "grad_norm": 0.3384093940258026, |
| "learning_rate": 7.324256495064866e-06, |
| "loss": 0.4611, |
| "step": 1146 |
| }, |
| { |
| "epoch": 1.2355475763016157, |
| "grad_norm": 0.3493618071079254, |
| "learning_rate": 7.318702710899584e-06, |
| "loss": 0.4004, |
| "step": 1147 |
| }, |
| { |
| "epoch": 1.236624775583483, |
| "grad_norm": 0.37390270829200745, |
| "learning_rate": 7.313145279788596e-06, |
| "loss": 0.4061, |
| "step": 1148 |
| }, |
| { |
| "epoch": 1.2377019748653502, |
| "grad_norm": 0.37359941005706787, |
| "learning_rate": 7.3075842104728445e-06, |
| "loss": 0.4111, |
| "step": 1149 |
| }, |
| { |
| "epoch": 1.2387791741472172, |
| "grad_norm": 0.3854544460773468, |
| "learning_rate": 7.302019511698998e-06, |
| "loss": 0.3844, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.2398563734290844, |
| "grad_norm": 0.4268665015697479, |
| "learning_rate": 7.296451192219427e-06, |
| "loss": 0.4398, |
| "step": 1151 |
| }, |
| { |
| "epoch": 1.2409335727109516, |
| "grad_norm": 0.3563729226589203, |
| "learning_rate": 7.290879260792203e-06, |
| "loss": 0.3937, |
| "step": 1152 |
| }, |
| { |
| "epoch": 1.2420107719928186, |
| "grad_norm": 0.4146507978439331, |
| "learning_rate": 7.285303726181077e-06, |
| "loss": 0.3851, |
| "step": 1153 |
| }, |
| { |
| "epoch": 1.2430879712746858, |
| "grad_norm": 0.3794476091861725, |
| "learning_rate": 7.279724597155463e-06, |
| "loss": 0.4188, |
| "step": 1154 |
| }, |
| { |
| "epoch": 1.244165170556553, |
| "grad_norm": 0.33285075426101685, |
| "learning_rate": 7.274141882490435e-06, |
| "loss": 0.4161, |
| "step": 1155 |
| }, |
| { |
| "epoch": 1.24524236983842, |
| "grad_norm": 0.3906962275505066, |
| "learning_rate": 7.2685555909667045e-06, |
| "loss": 0.3726, |
| "step": 1156 |
| }, |
| { |
| "epoch": 1.2463195691202873, |
| "grad_norm": 0.34079667925834656, |
| "learning_rate": 7.262965731370606e-06, |
| "loss": 0.4003, |
| "step": 1157 |
| }, |
| { |
| "epoch": 1.2473967684021545, |
| "grad_norm": 0.3254135251045227, |
| "learning_rate": 7.2573723124940876e-06, |
| "loss": 0.4046, |
| "step": 1158 |
| }, |
| { |
| "epoch": 1.2484739676840215, |
| "grad_norm": 0.36429592967033386, |
| "learning_rate": 7.251775343134695e-06, |
| "loss": 0.4357, |
| "step": 1159 |
| }, |
| { |
| "epoch": 1.2495511669658887, |
| "grad_norm": 0.36106106638908386, |
| "learning_rate": 7.246174832095562e-06, |
| "loss": 0.3967, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.2506283662477558, |
| "grad_norm": 0.3221151530742645, |
| "learning_rate": 7.2405707881853885e-06, |
| "loss": 0.3975, |
| "step": 1161 |
| }, |
| { |
| "epoch": 1.251705565529623, |
| "grad_norm": 0.3673859238624573, |
| "learning_rate": 7.23496322021843e-06, |
| "loss": 0.3945, |
| "step": 1162 |
| }, |
| { |
| "epoch": 1.2527827648114902, |
| "grad_norm": 0.3616444766521454, |
| "learning_rate": 7.22935213701449e-06, |
| "loss": 0.3969, |
| "step": 1163 |
| }, |
| { |
| "epoch": 1.2538599640933572, |
| "grad_norm": 0.3362361192703247, |
| "learning_rate": 7.223737547398898e-06, |
| "loss": 0.4019, |
| "step": 1164 |
| }, |
| { |
| "epoch": 1.2549371633752244, |
| "grad_norm": 0.34882697463035583, |
| "learning_rate": 7.218119460202499e-06, |
| "loss": 0.404, |
| "step": 1165 |
| }, |
| { |
| "epoch": 1.2560143626570914, |
| "grad_norm": 0.3654717206954956, |
| "learning_rate": 7.212497884261638e-06, |
| "loss": 0.4106, |
| "step": 1166 |
| }, |
| { |
| "epoch": 1.2570915619389587, |
| "grad_norm": 0.31606000661849976, |
| "learning_rate": 7.206872828418147e-06, |
| "loss": 0.3872, |
| "step": 1167 |
| }, |
| { |
| "epoch": 1.2581687612208259, |
| "grad_norm": 0.39388373494148254, |
| "learning_rate": 7.201244301519333e-06, |
| "loss": 0.4134, |
| "step": 1168 |
| }, |
| { |
| "epoch": 1.2592459605026929, |
| "grad_norm": 0.35163354873657227, |
| "learning_rate": 7.195612312417964e-06, |
| "loss": 0.3807, |
| "step": 1169 |
| }, |
| { |
| "epoch": 1.26032315978456, |
| "grad_norm": 0.37111741304397583, |
| "learning_rate": 7.189976869972249e-06, |
| "loss": 0.4145, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.2614003590664273, |
| "grad_norm": 0.32174402475357056, |
| "learning_rate": 7.184337983045831e-06, |
| "loss": 0.3836, |
| "step": 1171 |
| }, |
| { |
| "epoch": 1.2624775583482943, |
| "grad_norm": 0.36445340514183044, |
| "learning_rate": 7.17869566050777e-06, |
| "loss": 0.3976, |
| "step": 1172 |
| }, |
| { |
| "epoch": 1.2635547576301616, |
| "grad_norm": 0.33522090315818787, |
| "learning_rate": 7.1730499112325335e-06, |
| "loss": 0.3959, |
| "step": 1173 |
| }, |
| { |
| "epoch": 1.2646319569120288, |
| "grad_norm": 0.4076383709907532, |
| "learning_rate": 7.1674007440999706e-06, |
| "loss": 0.46, |
| "step": 1174 |
| }, |
| { |
| "epoch": 1.2657091561938958, |
| "grad_norm": 0.292697936296463, |
| "learning_rate": 7.161748167995312e-06, |
| "loss": 0.3617, |
| "step": 1175 |
| }, |
| { |
| "epoch": 1.266786355475763, |
| "grad_norm": 0.34674617648124695, |
| "learning_rate": 7.156092191809152e-06, |
| "loss": 0.4092, |
| "step": 1176 |
| }, |
| { |
| "epoch": 1.2678635547576302, |
| "grad_norm": 0.3220016360282898, |
| "learning_rate": 7.150432824437428e-06, |
| "loss": 0.3785, |
| "step": 1177 |
| }, |
| { |
| "epoch": 1.2689407540394972, |
| "grad_norm": 0.34475672245025635, |
| "learning_rate": 7.144770074781411e-06, |
| "loss": 0.4354, |
| "step": 1178 |
| }, |
| { |
| "epoch": 1.2700179533213645, |
| "grad_norm": 0.32504379749298096, |
| "learning_rate": 7.139103951747694e-06, |
| "loss": 0.4122, |
| "step": 1179 |
| }, |
| { |
| "epoch": 1.2710951526032317, |
| "grad_norm": 0.3236904442310333, |
| "learning_rate": 7.133434464248178e-06, |
| "loss": 0.3816, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.2721723518850987, |
| "grad_norm": 0.3493375778198242, |
| "learning_rate": 7.1277616212000524e-06, |
| "loss": 0.4048, |
| "step": 1181 |
| }, |
| { |
| "epoch": 1.273249551166966, |
| "grad_norm": 0.31458455324172974, |
| "learning_rate": 7.122085431525785e-06, |
| "loss": 0.3877, |
| "step": 1182 |
| }, |
| { |
| "epoch": 1.2743267504488331, |
| "grad_norm": 0.34635964035987854, |
| "learning_rate": 7.116405904153105e-06, |
| "loss": 0.4073, |
| "step": 1183 |
| }, |
| { |
| "epoch": 1.2754039497307001, |
| "grad_norm": 0.3543740510940552, |
| "learning_rate": 7.110723048014996e-06, |
| "loss": 0.4039, |
| "step": 1184 |
| }, |
| { |
| "epoch": 1.2764811490125674, |
| "grad_norm": 0.33583390712738037, |
| "learning_rate": 7.105036872049676e-06, |
| "loss": 0.393, |
| "step": 1185 |
| }, |
| { |
| "epoch": 1.2775583482944346, |
| "grad_norm": 0.38352057337760925, |
| "learning_rate": 7.09934738520058e-06, |
| "loss": 0.3927, |
| "step": 1186 |
| }, |
| { |
| "epoch": 1.2786355475763016, |
| "grad_norm": 0.3392017185688019, |
| "learning_rate": 7.093654596416357e-06, |
| "loss": 0.3783, |
| "step": 1187 |
| }, |
| { |
| "epoch": 1.2797127468581688, |
| "grad_norm": 0.4042876362800598, |
| "learning_rate": 7.0879585146508455e-06, |
| "loss": 0.4005, |
| "step": 1188 |
| }, |
| { |
| "epoch": 1.2807899461400358, |
| "grad_norm": 0.37154635787010193, |
| "learning_rate": 7.082259148863064e-06, |
| "loss": 0.4274, |
| "step": 1189 |
| }, |
| { |
| "epoch": 1.281867145421903, |
| "grad_norm": 0.34719568490982056, |
| "learning_rate": 7.076556508017196e-06, |
| "loss": 0.3759, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.2829443447037703, |
| "grad_norm": 0.3831841051578522, |
| "learning_rate": 7.0708506010825774e-06, |
| "loss": 0.4559, |
| "step": 1191 |
| }, |
| { |
| "epoch": 1.2840215439856373, |
| "grad_norm": 0.3383837938308716, |
| "learning_rate": 7.06514143703368e-06, |
| "loss": 0.3748, |
| "step": 1192 |
| }, |
| { |
| "epoch": 1.2850987432675045, |
| "grad_norm": 0.33272892236709595, |
| "learning_rate": 7.0594290248501e-06, |
| "loss": 0.387, |
| "step": 1193 |
| }, |
| { |
| "epoch": 1.2861759425493715, |
| "grad_norm": 0.35053086280822754, |
| "learning_rate": 7.053713373516538e-06, |
| "loss": 0.4227, |
| "step": 1194 |
| }, |
| { |
| "epoch": 1.2872531418312387, |
| "grad_norm": 0.3342570960521698, |
| "learning_rate": 7.0479944920227945e-06, |
| "loss": 0.4216, |
| "step": 1195 |
| }, |
| { |
| "epoch": 1.288330341113106, |
| "grad_norm": 0.33486494421958923, |
| "learning_rate": 7.042272389363749e-06, |
| "loss": 0.4162, |
| "step": 1196 |
| }, |
| { |
| "epoch": 1.289407540394973, |
| "grad_norm": 0.3178257644176483, |
| "learning_rate": 7.036547074539347e-06, |
| "loss": 0.3873, |
| "step": 1197 |
| }, |
| { |
| "epoch": 1.2904847396768402, |
| "grad_norm": 0.30604350566864014, |
| "learning_rate": 7.030818556554586e-06, |
| "loss": 0.3962, |
| "step": 1198 |
| }, |
| { |
| "epoch": 1.2915619389587074, |
| "grad_norm": 0.3792921304702759, |
| "learning_rate": 7.0250868444195e-06, |
| "loss": 0.3978, |
| "step": 1199 |
| }, |
| { |
| "epoch": 1.2926391382405744, |
| "grad_norm": 0.31305480003356934, |
| "learning_rate": 7.019351947149149e-06, |
| "loss": 0.4062, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.2937163375224416, |
| "grad_norm": 0.31872621178627014, |
| "learning_rate": 7.013613873763603e-06, |
| "loss": 0.3575, |
| "step": 1201 |
| }, |
| { |
| "epoch": 1.2947935368043089, |
| "grad_norm": 0.3531589210033417, |
| "learning_rate": 7.007872633287926e-06, |
| "loss": 0.4109, |
| "step": 1202 |
| }, |
| { |
| "epoch": 1.2958707360861759, |
| "grad_norm": 0.32369256019592285, |
| "learning_rate": 7.002128234752167e-06, |
| "loss": 0.402, |
| "step": 1203 |
| }, |
| { |
| "epoch": 1.296947935368043, |
| "grad_norm": 0.3656080961227417, |
| "learning_rate": 6.996380687191335e-06, |
| "loss": 0.3856, |
| "step": 1204 |
| }, |
| { |
| "epoch": 1.2980251346499103, |
| "grad_norm": 0.38284584879875183, |
| "learning_rate": 6.990629999645399e-06, |
| "loss": 0.4304, |
| "step": 1205 |
| }, |
| { |
| "epoch": 1.2991023339317773, |
| "grad_norm": 0.29890871047973633, |
| "learning_rate": 6.984876181159261e-06, |
| "loss": 0.4062, |
| "step": 1206 |
| }, |
| { |
| "epoch": 1.3001795332136445, |
| "grad_norm": 0.381234735250473, |
| "learning_rate": 6.979119240782753e-06, |
| "loss": 0.3795, |
| "step": 1207 |
| }, |
| { |
| "epoch": 1.3012567324955118, |
| "grad_norm": 0.356242835521698, |
| "learning_rate": 6.973359187570614e-06, |
| "loss": 0.3962, |
| "step": 1208 |
| }, |
| { |
| "epoch": 1.3023339317773788, |
| "grad_norm": 0.34831711649894714, |
| "learning_rate": 6.9675960305824785e-06, |
| "loss": 0.4346, |
| "step": 1209 |
| }, |
| { |
| "epoch": 1.303411131059246, |
| "grad_norm": 0.3573039770126343, |
| "learning_rate": 6.9618297788828635e-06, |
| "loss": 0.4217, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.3044883303411132, |
| "grad_norm": 0.4218464195728302, |
| "learning_rate": 6.956060441541155e-06, |
| "loss": 0.3697, |
| "step": 1211 |
| }, |
| { |
| "epoch": 1.3055655296229802, |
| "grad_norm": 0.3260680139064789, |
| "learning_rate": 6.9502880276315885e-06, |
| "loss": 0.4182, |
| "step": 1212 |
| }, |
| { |
| "epoch": 1.3066427289048475, |
| "grad_norm": 0.3798454999923706, |
| "learning_rate": 6.9445125462332455e-06, |
| "loss": 0.422, |
| "step": 1213 |
| }, |
| { |
| "epoch": 1.3077199281867147, |
| "grad_norm": 0.412260502576828, |
| "learning_rate": 6.9387340064300234e-06, |
| "loss": 0.4255, |
| "step": 1214 |
| }, |
| { |
| "epoch": 1.3087971274685817, |
| "grad_norm": 0.4004683792591095, |
| "learning_rate": 6.932952417310634e-06, |
| "loss": 0.4081, |
| "step": 1215 |
| }, |
| { |
| "epoch": 1.309874326750449, |
| "grad_norm": 0.34901517629623413, |
| "learning_rate": 6.927167787968589e-06, |
| "loss": 0.4045, |
| "step": 1216 |
| }, |
| { |
| "epoch": 1.310951526032316, |
| "grad_norm": 0.41624924540519714, |
| "learning_rate": 6.9213801275021744e-06, |
| "loss": 0.4067, |
| "step": 1217 |
| }, |
| { |
| "epoch": 1.3120287253141831, |
| "grad_norm": 0.36287832260131836, |
| "learning_rate": 6.915589445014448e-06, |
| "loss": 0.3913, |
| "step": 1218 |
| }, |
| { |
| "epoch": 1.3131059245960501, |
| "grad_norm": 0.375931054353714, |
| "learning_rate": 6.909795749613223e-06, |
| "loss": 0.4075, |
| "step": 1219 |
| }, |
| { |
| "epoch": 1.3141831238779174, |
| "grad_norm": 0.3683956563472748, |
| "learning_rate": 6.903999050411046e-06, |
| "loss": 0.3901, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.3152603231597846, |
| "grad_norm": 0.38740620017051697, |
| "learning_rate": 6.89819935652519e-06, |
| "loss": 0.3912, |
| "step": 1221 |
| }, |
| { |
| "epoch": 1.3163375224416516, |
| "grad_norm": 0.503844678401947, |
| "learning_rate": 6.892396677077641e-06, |
| "loss": 0.4379, |
| "step": 1222 |
| }, |
| { |
| "epoch": 1.3174147217235188, |
| "grad_norm": 0.3334852159023285, |
| "learning_rate": 6.886591021195077e-06, |
| "loss": 0.3847, |
| "step": 1223 |
| }, |
| { |
| "epoch": 1.318491921005386, |
| "grad_norm": 0.3811096251010895, |
| "learning_rate": 6.880782398008862e-06, |
| "loss": 0.4156, |
| "step": 1224 |
| }, |
| { |
| "epoch": 1.319569120287253, |
| "grad_norm": 0.4092799723148346, |
| "learning_rate": 6.874970816655021e-06, |
| "loss": 0.4026, |
| "step": 1225 |
| }, |
| { |
| "epoch": 1.3206463195691203, |
| "grad_norm": 0.4070848822593689, |
| "learning_rate": 6.8691562862742365e-06, |
| "loss": 0.4356, |
| "step": 1226 |
| }, |
| { |
| "epoch": 1.3217235188509875, |
| "grad_norm": 0.3308897018432617, |
| "learning_rate": 6.8633388160118265e-06, |
| "loss": 0.3495, |
| "step": 1227 |
| }, |
| { |
| "epoch": 1.3228007181328545, |
| "grad_norm": 0.345034122467041, |
| "learning_rate": 6.857518415017736e-06, |
| "loss": 0.4193, |
| "step": 1228 |
| }, |
| { |
| "epoch": 1.3238779174147217, |
| "grad_norm": 0.37432533502578735, |
| "learning_rate": 6.851695092446517e-06, |
| "loss": 0.4124, |
| "step": 1229 |
| }, |
| { |
| "epoch": 1.324955116696589, |
| "grad_norm": 0.3827821910381317, |
| "learning_rate": 6.8458688574573164e-06, |
| "loss": 0.4333, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.326032315978456, |
| "grad_norm": 0.31386035680770874, |
| "learning_rate": 6.840039719213864e-06, |
| "loss": 0.378, |
| "step": 1231 |
| }, |
| { |
| "epoch": 1.3271095152603232, |
| "grad_norm": 0.3906676769256592, |
| "learning_rate": 6.8342076868844556e-06, |
| "loss": 0.4205, |
| "step": 1232 |
| }, |
| { |
| "epoch": 1.3281867145421904, |
| "grad_norm": 0.3502320945262909, |
| "learning_rate": 6.828372769641938e-06, |
| "loss": 0.3764, |
| "step": 1233 |
| }, |
| { |
| "epoch": 1.3292639138240574, |
| "grad_norm": 0.39450767636299133, |
| "learning_rate": 6.822534976663695e-06, |
| "loss": 0.4607, |
| "step": 1234 |
| }, |
| { |
| "epoch": 1.3303411131059246, |
| "grad_norm": 0.34283024072647095, |
| "learning_rate": 6.816694317131634e-06, |
| "loss": 0.3987, |
| "step": 1235 |
| }, |
| { |
| "epoch": 1.3314183123877918, |
| "grad_norm": 0.3752940893173218, |
| "learning_rate": 6.8108508002321714e-06, |
| "loss": 0.3941, |
| "step": 1236 |
| }, |
| { |
| "epoch": 1.3324955116696588, |
| "grad_norm": 0.3372066915035248, |
| "learning_rate": 6.8050044351562185e-06, |
| "loss": 0.3973, |
| "step": 1237 |
| }, |
| { |
| "epoch": 1.333572710951526, |
| "grad_norm": 0.39815032482147217, |
| "learning_rate": 6.799155231099164e-06, |
| "loss": 0.4359, |
| "step": 1238 |
| }, |
| { |
| "epoch": 1.3346499102333933, |
| "grad_norm": 0.343666672706604, |
| "learning_rate": 6.7933031972608644e-06, |
| "loss": 0.3829, |
| "step": 1239 |
| }, |
| { |
| "epoch": 1.3357271095152603, |
| "grad_norm": 0.3752990663051605, |
| "learning_rate": 6.787448342845626e-06, |
| "loss": 0.3999, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.3368043087971275, |
| "grad_norm": 0.37065207958221436, |
| "learning_rate": 6.781590677062191e-06, |
| "loss": 0.405, |
| "step": 1241 |
| }, |
| { |
| "epoch": 1.3378815080789948, |
| "grad_norm": 0.3138620853424072, |
| "learning_rate": 6.775730209123722e-06, |
| "loss": 0.3885, |
| "step": 1242 |
| }, |
| { |
| "epoch": 1.3389587073608618, |
| "grad_norm": 0.3156231939792633, |
| "learning_rate": 6.769866948247793e-06, |
| "loss": 0.3818, |
| "step": 1243 |
| }, |
| { |
| "epoch": 1.340035906642729, |
| "grad_norm": 0.37582793831825256, |
| "learning_rate": 6.764000903656367e-06, |
| "loss": 0.4588, |
| "step": 1244 |
| }, |
| { |
| "epoch": 1.341113105924596, |
| "grad_norm": 0.3268308937549591, |
| "learning_rate": 6.758132084575791e-06, |
| "loss": 0.3828, |
| "step": 1245 |
| }, |
| { |
| "epoch": 1.3421903052064632, |
| "grad_norm": 0.30771052837371826, |
| "learning_rate": 6.752260500236764e-06, |
| "loss": 0.3833, |
| "step": 1246 |
| }, |
| { |
| "epoch": 1.3432675044883302, |
| "grad_norm": 0.3514450788497925, |
| "learning_rate": 6.746386159874348e-06, |
| "loss": 0.4094, |
| "step": 1247 |
| }, |
| { |
| "epoch": 1.3443447037701974, |
| "grad_norm": 0.38996556401252747, |
| "learning_rate": 6.740509072727931e-06, |
| "loss": 0.4503, |
| "step": 1248 |
| }, |
| { |
| "epoch": 1.3454219030520647, |
| "grad_norm": 0.32034119963645935, |
| "learning_rate": 6.734629248041226e-06, |
| "loss": 0.3973, |
| "step": 1249 |
| }, |
| { |
| "epoch": 1.3464991023339317, |
| "grad_norm": 0.34276941418647766, |
| "learning_rate": 6.728746695062249e-06, |
| "loss": 0.406, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.3475763016157989, |
| "grad_norm": 0.34551379084587097, |
| "learning_rate": 6.722861423043305e-06, |
| "loss": 0.383, |
| "step": 1251 |
| }, |
| { |
| "epoch": 1.3486535008976661, |
| "grad_norm": 0.40802478790283203, |
| "learning_rate": 6.716973441240982e-06, |
| "loss": 0.4247, |
| "step": 1252 |
| }, |
| { |
| "epoch": 1.3497307001795331, |
| "grad_norm": 0.343085378408432, |
| "learning_rate": 6.711082758916127e-06, |
| "loss": 0.4242, |
| "step": 1253 |
| }, |
| { |
| "epoch": 1.3508078994614003, |
| "grad_norm": 0.32430559396743774, |
| "learning_rate": 6.70518938533383e-06, |
| "loss": 0.3895, |
| "step": 1254 |
| }, |
| { |
| "epoch": 1.3518850987432676, |
| "grad_norm": 0.3160029351711273, |
| "learning_rate": 6.699293329763421e-06, |
| "loss": 0.36, |
| "step": 1255 |
| }, |
| { |
| "epoch": 1.3529622980251346, |
| "grad_norm": 0.34324607253074646, |
| "learning_rate": 6.693394601478447e-06, |
| "loss": 0.3892, |
| "step": 1256 |
| }, |
| { |
| "epoch": 1.3540394973070018, |
| "grad_norm": 0.3541221022605896, |
| "learning_rate": 6.687493209756652e-06, |
| "loss": 0.3938, |
| "step": 1257 |
| }, |
| { |
| "epoch": 1.355116696588869, |
| "grad_norm": 0.36511629819869995, |
| "learning_rate": 6.681589163879978e-06, |
| "loss": 0.442, |
| "step": 1258 |
| }, |
| { |
| "epoch": 1.356193895870736, |
| "grad_norm": 0.34376540780067444, |
| "learning_rate": 6.675682473134536e-06, |
| "loss": 0.3888, |
| "step": 1259 |
| }, |
| { |
| "epoch": 1.3572710951526032, |
| "grad_norm": 0.38606876134872437, |
| "learning_rate": 6.6697731468105985e-06, |
| "loss": 0.3869, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.3583482944344705, |
| "grad_norm": 0.3761198818683624, |
| "learning_rate": 6.663861194202588e-06, |
| "loss": 0.4477, |
| "step": 1261 |
| }, |
| { |
| "epoch": 1.3594254937163375, |
| "grad_norm": 0.32027143239974976, |
| "learning_rate": 6.657946624609046e-06, |
| "loss": 0.4213, |
| "step": 1262 |
| }, |
| { |
| "epoch": 1.3605026929982047, |
| "grad_norm": 0.36698633432388306, |
| "learning_rate": 6.6520294473326415e-06, |
| "loss": 0.3932, |
| "step": 1263 |
| }, |
| { |
| "epoch": 1.361579892280072, |
| "grad_norm": 0.3574357330799103, |
| "learning_rate": 6.64610967168014e-06, |
| "loss": 0.4246, |
| "step": 1264 |
| }, |
| { |
| "epoch": 1.362657091561939, |
| "grad_norm": 0.3355792760848999, |
| "learning_rate": 6.640187306962395e-06, |
| "loss": 0.3943, |
| "step": 1265 |
| }, |
| { |
| "epoch": 1.3637342908438062, |
| "grad_norm": 0.3679425120353699, |
| "learning_rate": 6.6342623624943325e-06, |
| "loss": 0.4204, |
| "step": 1266 |
| }, |
| { |
| "epoch": 1.3648114901256734, |
| "grad_norm": 0.35469403862953186, |
| "learning_rate": 6.6283348475949335e-06, |
| "loss": 0.4361, |
| "step": 1267 |
| }, |
| { |
| "epoch": 1.3658886894075404, |
| "grad_norm": 0.3616523742675781, |
| "learning_rate": 6.622404771587225e-06, |
| "loss": 0.3992, |
| "step": 1268 |
| }, |
| { |
| "epoch": 1.3669658886894076, |
| "grad_norm": 0.3479689657688141, |
| "learning_rate": 6.61647214379826e-06, |
| "loss": 0.4223, |
| "step": 1269 |
| }, |
| { |
| "epoch": 1.3680430879712746, |
| "grad_norm": 0.35383719205856323, |
| "learning_rate": 6.610536973559108e-06, |
| "loss": 0.4039, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.3691202872531418, |
| "grad_norm": 0.3851570188999176, |
| "learning_rate": 6.604599270204832e-06, |
| "loss": 0.3863, |
| "step": 1271 |
| }, |
| { |
| "epoch": 1.370197486535009, |
| "grad_norm": 0.3704424798488617, |
| "learning_rate": 6.598659043074487e-06, |
| "loss": 0.4074, |
| "step": 1272 |
| }, |
| { |
| "epoch": 1.371274685816876, |
| "grad_norm": 0.3516056537628174, |
| "learning_rate": 6.592716301511089e-06, |
| "loss": 0.4075, |
| "step": 1273 |
| }, |
| { |
| "epoch": 1.3723518850987433, |
| "grad_norm": 0.38077306747436523, |
| "learning_rate": 6.586771054861613e-06, |
| "loss": 0.3979, |
| "step": 1274 |
| }, |
| { |
| "epoch": 1.3734290843806103, |
| "grad_norm": 0.3695003092288971, |
| "learning_rate": 6.580823312476976e-06, |
| "loss": 0.4206, |
| "step": 1275 |
| }, |
| { |
| "epoch": 1.3745062836624775, |
| "grad_norm": 0.34240230917930603, |
| "learning_rate": 6.574873083712018e-06, |
| "loss": 0.4126, |
| "step": 1276 |
| }, |
| { |
| "epoch": 1.3755834829443447, |
| "grad_norm": 0.3861750066280365, |
| "learning_rate": 6.568920377925491e-06, |
| "loss": 0.4133, |
| "step": 1277 |
| }, |
| { |
| "epoch": 1.3766606822262117, |
| "grad_norm": 0.3501964211463928, |
| "learning_rate": 6.562965204480038e-06, |
| "loss": 0.4055, |
| "step": 1278 |
| }, |
| { |
| "epoch": 1.377737881508079, |
| "grad_norm": 0.42631974816322327, |
| "learning_rate": 6.55700757274219e-06, |
| "loss": 0.4126, |
| "step": 1279 |
| }, |
| { |
| "epoch": 1.3788150807899462, |
| "grad_norm": 0.37425652146339417, |
| "learning_rate": 6.5510474920823404e-06, |
| "loss": 0.4251, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.3798922800718132, |
| "grad_norm": 0.37494876980781555, |
| "learning_rate": 6.545084971874738e-06, |
| "loss": 0.3962, |
| "step": 1281 |
| }, |
| { |
| "epoch": 1.3809694793536804, |
| "grad_norm": 0.4501427114009857, |
| "learning_rate": 6.5391200214974645e-06, |
| "loss": 0.462, |
| "step": 1282 |
| }, |
| { |
| "epoch": 1.3820466786355476, |
| "grad_norm": 0.3058687150478363, |
| "learning_rate": 6.5331526503324235e-06, |
| "loss": 0.3501, |
| "step": 1283 |
| }, |
| { |
| "epoch": 1.3831238779174146, |
| "grad_norm": 0.3958328366279602, |
| "learning_rate": 6.527182867765333e-06, |
| "loss": 0.4446, |
| "step": 1284 |
| }, |
| { |
| "epoch": 1.3842010771992819, |
| "grad_norm": 0.32861238718032837, |
| "learning_rate": 6.5212106831856944e-06, |
| "loss": 0.4054, |
| "step": 1285 |
| }, |
| { |
| "epoch": 1.385278276481149, |
| "grad_norm": 0.43862593173980713, |
| "learning_rate": 6.515236105986795e-06, |
| "loss": 0.4333, |
| "step": 1286 |
| }, |
| { |
| "epoch": 1.386355475763016, |
| "grad_norm": 0.32525646686553955, |
| "learning_rate": 6.509259145565681e-06, |
| "loss": 0.3857, |
| "step": 1287 |
| }, |
| { |
| "epoch": 1.3874326750448833, |
| "grad_norm": 0.35433462262153625, |
| "learning_rate": 6.503279811323145e-06, |
| "loss": 0.4293, |
| "step": 1288 |
| }, |
| { |
| "epoch": 1.3885098743267505, |
| "grad_norm": 0.37304699420928955, |
| "learning_rate": 6.497298112663721e-06, |
| "loss": 0.3904, |
| "step": 1289 |
| }, |
| { |
| "epoch": 1.3895870736086176, |
| "grad_norm": 0.4006001055240631, |
| "learning_rate": 6.491314058995653e-06, |
| "loss": 0.4138, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.3906642728904848, |
| "grad_norm": 0.3498937487602234, |
| "learning_rate": 6.4853276597308955e-06, |
| "loss": 0.4247, |
| "step": 1291 |
| }, |
| { |
| "epoch": 1.391741472172352, |
| "grad_norm": 0.39775341749191284, |
| "learning_rate": 6.479338924285089e-06, |
| "loss": 0.398, |
| "step": 1292 |
| }, |
| { |
| "epoch": 1.392818671454219, |
| "grad_norm": 0.34264737367630005, |
| "learning_rate": 6.4733478620775515e-06, |
| "loss": 0.3777, |
| "step": 1293 |
| }, |
| { |
| "epoch": 1.3938958707360862, |
| "grad_norm": 0.3468000590801239, |
| "learning_rate": 6.467354482531254e-06, |
| "loss": 0.3946, |
| "step": 1294 |
| }, |
| { |
| "epoch": 1.3949730700179535, |
| "grad_norm": 0.3531480133533478, |
| "learning_rate": 6.46135879507282e-06, |
| "loss": 0.4241, |
| "step": 1295 |
| }, |
| { |
| "epoch": 1.3960502692998205, |
| "grad_norm": 0.32537776231765747, |
| "learning_rate": 6.455360809132497e-06, |
| "loss": 0.3745, |
| "step": 1296 |
| }, |
| { |
| "epoch": 1.3971274685816877, |
| "grad_norm": 0.34955230355262756, |
| "learning_rate": 6.449360534144154e-06, |
| "loss": 0.3936, |
| "step": 1297 |
| }, |
| { |
| "epoch": 1.3982046678635547, |
| "grad_norm": 0.3445831537246704, |
| "learning_rate": 6.443357979545254e-06, |
| "loss": 0.4084, |
| "step": 1298 |
| }, |
| { |
| "epoch": 1.399281867145422, |
| "grad_norm": 0.332506388425827, |
| "learning_rate": 6.437353154776848e-06, |
| "loss": 0.4327, |
| "step": 1299 |
| }, |
| { |
| "epoch": 1.400359066427289, |
| "grad_norm": 0.3073407709598541, |
| "learning_rate": 6.4313460692835586e-06, |
| "loss": 0.3687, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.4014362657091561, |
| "grad_norm": 0.36171579360961914, |
| "learning_rate": 6.425336732513564e-06, |
| "loss": 0.4231, |
| "step": 1301 |
| }, |
| { |
| "epoch": 1.4025134649910234, |
| "grad_norm": 0.29764875769615173, |
| "learning_rate": 6.419325153918581e-06, |
| "loss": 0.3665, |
| "step": 1302 |
| }, |
| { |
| "epoch": 1.4035906642728904, |
| "grad_norm": 0.372881680727005, |
| "learning_rate": 6.413311342953854e-06, |
| "loss": 0.4249, |
| "step": 1303 |
| }, |
| { |
| "epoch": 1.4046678635547576, |
| "grad_norm": 0.3532498776912689, |
| "learning_rate": 6.407295309078139e-06, |
| "loss": 0.4069, |
| "step": 1304 |
| }, |
| { |
| "epoch": 1.4057450628366248, |
| "grad_norm": 0.30739298462867737, |
| "learning_rate": 6.401277061753689e-06, |
| "loss": 0.3926, |
| "step": 1305 |
| }, |
| { |
| "epoch": 1.4068222621184918, |
| "grad_norm": 0.36615023016929626, |
| "learning_rate": 6.395256610446234e-06, |
| "loss": 0.4107, |
| "step": 1306 |
| }, |
| { |
| "epoch": 1.407899461400359, |
| "grad_norm": 0.31940293312072754, |
| "learning_rate": 6.389233964624977e-06, |
| "loss": 0.4299, |
| "step": 1307 |
| }, |
| { |
| "epoch": 1.4089766606822263, |
| "grad_norm": 0.3129923939704895, |
| "learning_rate": 6.383209133762569e-06, |
| "loss": 0.3991, |
| "step": 1308 |
| }, |
| { |
| "epoch": 1.4100538599640933, |
| "grad_norm": 0.35873693227767944, |
| "learning_rate": 6.377182127335096e-06, |
| "loss": 0.3943, |
| "step": 1309 |
| }, |
| { |
| "epoch": 1.4111310592459605, |
| "grad_norm": 0.3312750458717346, |
| "learning_rate": 6.3711529548220695e-06, |
| "loss": 0.3629, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.4122082585278277, |
| "grad_norm": 0.395020991563797, |
| "learning_rate": 6.365121625706405e-06, |
| "loss": 0.4153, |
| "step": 1311 |
| }, |
| { |
| "epoch": 1.4132854578096947, |
| "grad_norm": 0.303682804107666, |
| "learning_rate": 6.359088149474412e-06, |
| "loss": 0.3788, |
| "step": 1312 |
| }, |
| { |
| "epoch": 1.414362657091562, |
| "grad_norm": 0.3261646032333374, |
| "learning_rate": 6.3530525356157765e-06, |
| "loss": 0.4019, |
| "step": 1313 |
| }, |
| { |
| "epoch": 1.4154398563734292, |
| "grad_norm": 0.41472819447517395, |
| "learning_rate": 6.3470147936235485e-06, |
| "loss": 0.415, |
| "step": 1314 |
| }, |
| { |
| "epoch": 1.4165170556552962, |
| "grad_norm": 0.3044980764389038, |
| "learning_rate": 6.340974932994119e-06, |
| "loss": 0.3774, |
| "step": 1315 |
| }, |
| { |
| "epoch": 1.4175942549371634, |
| "grad_norm": 0.3422330319881439, |
| "learning_rate": 6.334932963227216e-06, |
| "loss": 0.4393, |
| "step": 1316 |
| }, |
| { |
| "epoch": 1.4186714542190306, |
| "grad_norm": 0.3625028431415558, |
| "learning_rate": 6.328888893825888e-06, |
| "loss": 0.4372, |
| "step": 1317 |
| }, |
| { |
| "epoch": 1.4197486535008976, |
| "grad_norm": 0.30885642766952515, |
| "learning_rate": 6.3228427342964785e-06, |
| "loss": 0.3729, |
| "step": 1318 |
| }, |
| { |
| "epoch": 1.4208258527827649, |
| "grad_norm": 0.33570152521133423, |
| "learning_rate": 6.316794494148625e-06, |
| "loss": 0.3929, |
| "step": 1319 |
| }, |
| { |
| "epoch": 1.421903052064632, |
| "grad_norm": 0.354067325592041, |
| "learning_rate": 6.310744182895231e-06, |
| "loss": 0.4338, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.422980251346499, |
| "grad_norm": 0.30694371461868286, |
| "learning_rate": 6.304691810052466e-06, |
| "loss": 0.3792, |
| "step": 1321 |
| }, |
| { |
| "epoch": 1.4240574506283663, |
| "grad_norm": 0.31922197341918945, |
| "learning_rate": 6.2986373851397305e-06, |
| "loss": 0.3964, |
| "step": 1322 |
| }, |
| { |
| "epoch": 1.4251346499102335, |
| "grad_norm": 0.3309083580970764, |
| "learning_rate": 6.292580917679665e-06, |
| "loss": 0.417, |
| "step": 1323 |
| }, |
| { |
| "epoch": 1.4262118491921005, |
| "grad_norm": 0.3300463557243347, |
| "learning_rate": 6.286522417198115e-06, |
| "loss": 0.4203, |
| "step": 1324 |
| }, |
| { |
| "epoch": 1.4272890484739678, |
| "grad_norm": 0.31575945019721985, |
| "learning_rate": 6.280461893224127e-06, |
| "loss": 0.3785, |
| "step": 1325 |
| }, |
| { |
| "epoch": 1.4283662477558348, |
| "grad_norm": 0.2900710701942444, |
| "learning_rate": 6.274399355289924e-06, |
| "loss": 0.3781, |
| "step": 1326 |
| }, |
| { |
| "epoch": 1.429443447037702, |
| "grad_norm": 0.30679771304130554, |
| "learning_rate": 6.2683348129309056e-06, |
| "loss": 0.3743, |
| "step": 1327 |
| }, |
| { |
| "epoch": 1.430520646319569, |
| "grad_norm": 0.33957985043525696, |
| "learning_rate": 6.262268275685617e-06, |
| "loss": 0.4142, |
| "step": 1328 |
| }, |
| { |
| "epoch": 1.4315978456014362, |
| "grad_norm": 0.31916379928588867, |
| "learning_rate": 6.256199753095745e-06, |
| "loss": 0.4061, |
| "step": 1329 |
| }, |
| { |
| "epoch": 1.4326750448833034, |
| "grad_norm": 0.33593031764030457, |
| "learning_rate": 6.250129254706099e-06, |
| "loss": 0.3854, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.4337522441651704, |
| "grad_norm": 0.32574328780174255, |
| "learning_rate": 6.244056790064591e-06, |
| "loss": 0.4029, |
| "step": 1331 |
| }, |
| { |
| "epoch": 1.4348294434470377, |
| "grad_norm": 0.33279579877853394, |
| "learning_rate": 6.237982368722232e-06, |
| "loss": 0.4166, |
| "step": 1332 |
| }, |
| { |
| "epoch": 1.435906642728905, |
| "grad_norm": 0.333748996257782, |
| "learning_rate": 6.231906000233108e-06, |
| "loss": 0.3701, |
| "step": 1333 |
| }, |
| { |
| "epoch": 1.436983842010772, |
| "grad_norm": 0.36953479051589966, |
| "learning_rate": 6.225827694154365e-06, |
| "loss": 0.4345, |
| "step": 1334 |
| }, |
| { |
| "epoch": 1.4380610412926391, |
| "grad_norm": 0.3151591718196869, |
| "learning_rate": 6.219747460046203e-06, |
| "loss": 0.3932, |
| "step": 1335 |
| }, |
| { |
| "epoch": 1.4391382405745063, |
| "grad_norm": 0.4083271920681, |
| "learning_rate": 6.213665307471846e-06, |
| "loss": 0.4524, |
| "step": 1336 |
| }, |
| { |
| "epoch": 1.4402154398563733, |
| "grad_norm": 0.3362996280193329, |
| "learning_rate": 6.207581245997544e-06, |
| "loss": 0.4069, |
| "step": 1337 |
| }, |
| { |
| "epoch": 1.4412926391382406, |
| "grad_norm": 0.3313988447189331, |
| "learning_rate": 6.201495285192543e-06, |
| "loss": 0.4126, |
| "step": 1338 |
| }, |
| { |
| "epoch": 1.4423698384201078, |
| "grad_norm": 0.3780890107154846, |
| "learning_rate": 6.1954074346290775e-06, |
| "loss": 0.3615, |
| "step": 1339 |
| }, |
| { |
| "epoch": 1.4434470377019748, |
| "grad_norm": 0.3669836223125458, |
| "learning_rate": 6.189317703882357e-06, |
| "loss": 0.4363, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.444524236983842, |
| "grad_norm": 0.3658630847930908, |
| "learning_rate": 6.183226102530547e-06, |
| "loss": 0.3859, |
| "step": 1341 |
| }, |
| { |
| "epoch": 1.4456014362657092, |
| "grad_norm": 0.34788578748703003, |
| "learning_rate": 6.177132640154754e-06, |
| "loss": 0.3998, |
| "step": 1342 |
| }, |
| { |
| "epoch": 1.4466786355475763, |
| "grad_norm": 0.3125394582748413, |
| "learning_rate": 6.171037326339011e-06, |
| "loss": 0.3862, |
| "step": 1343 |
| }, |
| { |
| "epoch": 1.4477558348294435, |
| "grad_norm": 0.3761058747768402, |
| "learning_rate": 6.164940170670266e-06, |
| "loss": 0.3958, |
| "step": 1344 |
| }, |
| { |
| "epoch": 1.4488330341113107, |
| "grad_norm": 0.42082053422927856, |
| "learning_rate": 6.1588411827383644e-06, |
| "loss": 0.458, |
| "step": 1345 |
| }, |
| { |
| "epoch": 1.4499102333931777, |
| "grad_norm": 0.34688693284988403, |
| "learning_rate": 6.152740372136028e-06, |
| "loss": 0.3899, |
| "step": 1346 |
| }, |
| { |
| "epoch": 1.450987432675045, |
| "grad_norm": 0.313841849565506, |
| "learning_rate": 6.1466377484588495e-06, |
| "loss": 0.3904, |
| "step": 1347 |
| }, |
| { |
| "epoch": 1.4520646319569122, |
| "grad_norm": 0.4089730679988861, |
| "learning_rate": 6.140533321305273e-06, |
| "loss": 0.4011, |
| "step": 1348 |
| }, |
| { |
| "epoch": 1.4531418312387792, |
| "grad_norm": 0.4075726568698883, |
| "learning_rate": 6.134427100276579e-06, |
| "loss": 0.4093, |
| "step": 1349 |
| }, |
| { |
| "epoch": 1.4542190305206464, |
| "grad_norm": 0.3309878706932068, |
| "learning_rate": 6.128319094976869e-06, |
| "loss": 0.4003, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.4552962298025134, |
| "grad_norm": 0.3432037830352783, |
| "learning_rate": 6.122209315013049e-06, |
| "loss": 0.4111, |
| "step": 1351 |
| }, |
| { |
| "epoch": 1.4563734290843806, |
| "grad_norm": 0.3966643810272217, |
| "learning_rate": 6.116097769994821e-06, |
| "loss": 0.3895, |
| "step": 1352 |
| }, |
| { |
| "epoch": 1.4574506283662478, |
| "grad_norm": 0.3343490660190582, |
| "learning_rate": 6.10998446953466e-06, |
| "loss": 0.4089, |
| "step": 1353 |
| }, |
| { |
| "epoch": 1.4585278276481148, |
| "grad_norm": 0.32433220744132996, |
| "learning_rate": 6.1038694232478e-06, |
| "loss": 0.3838, |
| "step": 1354 |
| }, |
| { |
| "epoch": 1.459605026929982, |
| "grad_norm": 0.35544490814208984, |
| "learning_rate": 6.097752640752227e-06, |
| "loss": 0.4038, |
| "step": 1355 |
| }, |
| { |
| "epoch": 1.460682226211849, |
| "grad_norm": 0.335077702999115, |
| "learning_rate": 6.0916341316686526e-06, |
| "loss": 0.3854, |
| "step": 1356 |
| }, |
| { |
| "epoch": 1.4617594254937163, |
| "grad_norm": 0.3600093126296997, |
| "learning_rate": 6.085513905620504e-06, |
| "loss": 0.4118, |
| "step": 1357 |
| }, |
| { |
| "epoch": 1.4628366247755835, |
| "grad_norm": 0.36083582043647766, |
| "learning_rate": 6.079391972233913e-06, |
| "loss": 0.3839, |
| "step": 1358 |
| }, |
| { |
| "epoch": 1.4639138240574505, |
| "grad_norm": 0.3288957476615906, |
| "learning_rate": 6.073268341137694e-06, |
| "loss": 0.3872, |
| "step": 1359 |
| }, |
| { |
| "epoch": 1.4649910233393177, |
| "grad_norm": 0.3327372372150421, |
| "learning_rate": 6.067143021963329e-06, |
| "loss": 0.3964, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.466068222621185, |
| "grad_norm": 0.34401968121528625, |
| "learning_rate": 6.061016024344962e-06, |
| "loss": 0.3825, |
| "step": 1361 |
| }, |
| { |
| "epoch": 1.467145421903052, |
| "grad_norm": 0.3438219428062439, |
| "learning_rate": 6.054887357919371e-06, |
| "loss": 0.3866, |
| "step": 1362 |
| }, |
| { |
| "epoch": 1.4682226211849192, |
| "grad_norm": 0.290068119764328, |
| "learning_rate": 6.048757032325959e-06, |
| "loss": 0.3764, |
| "step": 1363 |
| }, |
| { |
| "epoch": 1.4692998204667864, |
| "grad_norm": 0.3384719789028168, |
| "learning_rate": 6.042625057206742e-06, |
| "loss": 0.4162, |
| "step": 1364 |
| }, |
| { |
| "epoch": 1.4703770197486534, |
| "grad_norm": 0.341897577047348, |
| "learning_rate": 6.03649144220633e-06, |
| "loss": 0.4196, |
| "step": 1365 |
| }, |
| { |
| "epoch": 1.4714542190305206, |
| "grad_norm": 0.37803781032562256, |
| "learning_rate": 6.030356196971911e-06, |
| "loss": 0.4159, |
| "step": 1366 |
| }, |
| { |
| "epoch": 1.4725314183123879, |
| "grad_norm": 0.32955625653266907, |
| "learning_rate": 6.024219331153232e-06, |
| "loss": 0.381, |
| "step": 1367 |
| }, |
| { |
| "epoch": 1.4736086175942549, |
| "grad_norm": 0.34146472811698914, |
| "learning_rate": 6.018080854402599e-06, |
| "loss": 0.4089, |
| "step": 1368 |
| }, |
| { |
| "epoch": 1.474685816876122, |
| "grad_norm": 0.3598938286304474, |
| "learning_rate": 6.0119407763748465e-06, |
| "loss": 0.3758, |
| "step": 1369 |
| }, |
| { |
| "epoch": 1.4757630161579893, |
| "grad_norm": 0.3479284346103668, |
| "learning_rate": 6.005799106727324e-06, |
| "loss": 0.4202, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.4768402154398563, |
| "grad_norm": 0.31264403462409973, |
| "learning_rate": 5.999655855119893e-06, |
| "loss": 0.3895, |
| "step": 1371 |
| }, |
| { |
| "epoch": 1.4779174147217236, |
| "grad_norm": 0.33001115918159485, |
| "learning_rate": 5.993511031214895e-06, |
| "loss": 0.4032, |
| "step": 1372 |
| }, |
| { |
| "epoch": 1.4789946140035908, |
| "grad_norm": 0.29274216294288635, |
| "learning_rate": 5.987364644677148e-06, |
| "loss": 0.4061, |
| "step": 1373 |
| }, |
| { |
| "epoch": 1.4800718132854578, |
| "grad_norm": 0.30409663915634155, |
| "learning_rate": 5.98121670517393e-06, |
| "loss": 0.4165, |
| "step": 1374 |
| }, |
| { |
| "epoch": 1.481149012567325, |
| "grad_norm": 0.30471399426460266, |
| "learning_rate": 5.9750672223749574e-06, |
| "loss": 0.3834, |
| "step": 1375 |
| }, |
| { |
| "epoch": 1.4822262118491922, |
| "grad_norm": 0.3169211149215698, |
| "learning_rate": 5.968916205952374e-06, |
| "loss": 0.4026, |
| "step": 1376 |
| }, |
| { |
| "epoch": 1.4833034111310592, |
| "grad_norm": 0.3480145335197449, |
| "learning_rate": 5.962763665580741e-06, |
| "loss": 0.4518, |
| "step": 1377 |
| }, |
| { |
| "epoch": 1.4843806104129265, |
| "grad_norm": 0.28615716099739075, |
| "learning_rate": 5.95660961093701e-06, |
| "loss": 0.3795, |
| "step": 1378 |
| }, |
| { |
| "epoch": 1.4854578096947935, |
| "grad_norm": 0.3205227255821228, |
| "learning_rate": 5.950454051700519e-06, |
| "loss": 0.3821, |
| "step": 1379 |
| }, |
| { |
| "epoch": 1.4865350089766607, |
| "grad_norm": 0.3589239716529846, |
| "learning_rate": 5.944296997552968e-06, |
| "loss": 0.4231, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.4876122082585277, |
| "grad_norm": 0.33773183822631836, |
| "learning_rate": 5.938138458178414e-06, |
| "loss": 0.4315, |
| "step": 1381 |
| }, |
| { |
| "epoch": 1.488689407540395, |
| "grad_norm": 0.32655930519104004, |
| "learning_rate": 5.931978443263247e-06, |
| "loss": 0.3883, |
| "step": 1382 |
| }, |
| { |
| "epoch": 1.4897666068222621, |
| "grad_norm": 0.3508589565753937, |
| "learning_rate": 5.9258169624961745e-06, |
| "loss": 0.4055, |
| "step": 1383 |
| }, |
| { |
| "epoch": 1.4908438061041291, |
| "grad_norm": 0.3352316915988922, |
| "learning_rate": 5.919654025568216e-06, |
| "loss": 0.3968, |
| "step": 1384 |
| }, |
| { |
| "epoch": 1.4919210053859964, |
| "grad_norm": 0.30605775117874146, |
| "learning_rate": 5.9134896421726775e-06, |
| "loss": 0.3667, |
| "step": 1385 |
| }, |
| { |
| "epoch": 1.4929982046678636, |
| "grad_norm": 0.33124783635139465, |
| "learning_rate": 5.90732382200514e-06, |
| "loss": 0.4374, |
| "step": 1386 |
| }, |
| { |
| "epoch": 1.4940754039497306, |
| "grad_norm": 0.32206031680107117, |
| "learning_rate": 5.901156574763445e-06, |
| "loss": 0.4078, |
| "step": 1387 |
| }, |
| { |
| "epoch": 1.4951526032315978, |
| "grad_norm": 0.3807339072227478, |
| "learning_rate": 5.894987910147679e-06, |
| "loss": 0.4221, |
| "step": 1388 |
| }, |
| { |
| "epoch": 1.496229802513465, |
| "grad_norm": 0.2875092327594757, |
| "learning_rate": 5.8888178378601565e-06, |
| "loss": 0.3774, |
| "step": 1389 |
| }, |
| { |
| "epoch": 1.497307001795332, |
| "grad_norm": 0.3753995895385742, |
| "learning_rate": 5.882646367605409e-06, |
| "loss": 0.3996, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.4983842010771993, |
| "grad_norm": 0.35869738459587097, |
| "learning_rate": 5.876473509090164e-06, |
| "loss": 0.3722, |
| "step": 1391 |
| }, |
| { |
| "epoch": 1.4994614003590665, |
| "grad_norm": 0.34174516797065735, |
| "learning_rate": 5.87029927202333e-06, |
| "loss": 0.4506, |
| "step": 1392 |
| }, |
| { |
| "epoch": 1.5005385996409335, |
| "grad_norm": 0.3361514210700989, |
| "learning_rate": 5.864123666115991e-06, |
| "loss": 0.3368, |
| "step": 1393 |
| }, |
| { |
| "epoch": 1.5016157989228007, |
| "grad_norm": 0.37364038825035095, |
| "learning_rate": 5.85794670108138e-06, |
| "loss": 0.4076, |
| "step": 1394 |
| }, |
| { |
| "epoch": 1.502692998204668, |
| "grad_norm": 0.3185519278049469, |
| "learning_rate": 5.851768386634863e-06, |
| "loss": 0.4179, |
| "step": 1395 |
| }, |
| { |
| "epoch": 1.503770197486535, |
| "grad_norm": 0.3299560844898224, |
| "learning_rate": 5.845588732493936e-06, |
| "loss": 0.3884, |
| "step": 1396 |
| }, |
| { |
| "epoch": 1.5048473967684022, |
| "grad_norm": 0.3319254517555237, |
| "learning_rate": 5.839407748378202e-06, |
| "loss": 0.3817, |
| "step": 1397 |
| }, |
| { |
| "epoch": 1.5059245960502694, |
| "grad_norm": 0.30551543831825256, |
| "learning_rate": 5.8332254440093486e-06, |
| "loss": 0.4472, |
| "step": 1398 |
| }, |
| { |
| "epoch": 1.5070017953321364, |
| "grad_norm": 0.34325891733169556, |
| "learning_rate": 5.827041829111144e-06, |
| "loss": 0.4122, |
| "step": 1399 |
| }, |
| { |
| "epoch": 1.5080789946140036, |
| "grad_norm": 0.3262201249599457, |
| "learning_rate": 5.8208569134094205e-06, |
| "loss": 0.3872, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.5091561938958709, |
| "grad_norm": 0.3270936608314514, |
| "learning_rate": 5.814670706632054e-06, |
| "loss": 0.4357, |
| "step": 1401 |
| }, |
| { |
| "epoch": 1.5102333931777379, |
| "grad_norm": 0.3235735595226288, |
| "learning_rate": 5.808483218508949e-06, |
| "loss": 0.4109, |
| "step": 1402 |
| }, |
| { |
| "epoch": 1.5113105924596049, |
| "grad_norm": 0.354521781206131, |
| "learning_rate": 5.8022944587720285e-06, |
| "loss": 0.4008, |
| "step": 1403 |
| }, |
| { |
| "epoch": 1.5123877917414723, |
| "grad_norm": 0.31210362911224365, |
| "learning_rate": 5.796104437155213e-06, |
| "loss": 0.371, |
| "step": 1404 |
| }, |
| { |
| "epoch": 1.5134649910233393, |
| "grad_norm": 0.31420016288757324, |
| "learning_rate": 5.78991316339441e-06, |
| "loss": 0.4104, |
| "step": 1405 |
| }, |
| { |
| "epoch": 1.5145421903052063, |
| "grad_norm": 0.3347276449203491, |
| "learning_rate": 5.7837206472274955e-06, |
| "loss": 0.4055, |
| "step": 1406 |
| }, |
| { |
| "epoch": 1.5156193895870738, |
| "grad_norm": 0.3168209195137024, |
| "learning_rate": 5.777526898394298e-06, |
| "loss": 0.4153, |
| "step": 1407 |
| }, |
| { |
| "epoch": 1.5166965888689408, |
| "grad_norm": 0.32604894042015076, |
| "learning_rate": 5.7713319266365886e-06, |
| "loss": 0.4178, |
| "step": 1408 |
| }, |
| { |
| "epoch": 1.5177737881508078, |
| "grad_norm": 0.3354335427284241, |
| "learning_rate": 5.765135741698058e-06, |
| "loss": 0.3787, |
| "step": 1409 |
| }, |
| { |
| "epoch": 1.518850987432675, |
| "grad_norm": 0.3192979395389557, |
| "learning_rate": 5.758938353324308e-06, |
| "loss": 0.4114, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.5199281867145422, |
| "grad_norm": 0.30226173996925354, |
| "learning_rate": 5.75273977126283e-06, |
| "loss": 0.3713, |
| "step": 1411 |
| }, |
| { |
| "epoch": 1.5210053859964092, |
| "grad_norm": 0.33431458473205566, |
| "learning_rate": 5.746540005262994e-06, |
| "loss": 0.4055, |
| "step": 1412 |
| }, |
| { |
| "epoch": 1.5220825852782764, |
| "grad_norm": 0.33149486780166626, |
| "learning_rate": 5.740339065076036e-06, |
| "loss": 0.3925, |
| "step": 1413 |
| }, |
| { |
| "epoch": 1.5231597845601437, |
| "grad_norm": 0.3282601237297058, |
| "learning_rate": 5.734136960455035e-06, |
| "loss": 0.4061, |
| "step": 1414 |
| }, |
| { |
| "epoch": 1.5242369838420107, |
| "grad_norm": 0.3495166599750519, |
| "learning_rate": 5.727933701154899e-06, |
| "loss": 0.3943, |
| "step": 1415 |
| }, |
| { |
| "epoch": 1.525314183123878, |
| "grad_norm": 0.325663685798645, |
| "learning_rate": 5.721729296932358e-06, |
| "loss": 0.4149, |
| "step": 1416 |
| }, |
| { |
| "epoch": 1.5263913824057451, |
| "grad_norm": 0.34505152702331543, |
| "learning_rate": 5.71552375754594e-06, |
| "loss": 0.4459, |
| "step": 1417 |
| }, |
| { |
| "epoch": 1.5274685816876121, |
| "grad_norm": 0.3084740936756134, |
| "learning_rate": 5.709317092755956e-06, |
| "loss": 0.3692, |
| "step": 1418 |
| }, |
| { |
| "epoch": 1.5285457809694794, |
| "grad_norm": 0.3517147898674011, |
| "learning_rate": 5.703109312324493e-06, |
| "loss": 0.4125, |
| "step": 1419 |
| }, |
| { |
| "epoch": 1.5296229802513466, |
| "grad_norm": 0.3403216600418091, |
| "learning_rate": 5.696900426015386e-06, |
| "loss": 0.3974, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.5307001795332136, |
| "grad_norm": 0.27224382758140564, |
| "learning_rate": 5.6906904435942145e-06, |
| "loss": 0.3477, |
| "step": 1421 |
| }, |
| { |
| "epoch": 1.5317773788150808, |
| "grad_norm": 0.36993709206581116, |
| "learning_rate": 5.68447937482828e-06, |
| "loss": 0.4152, |
| "step": 1422 |
| }, |
| { |
| "epoch": 1.532854578096948, |
| "grad_norm": 0.3202926814556122, |
| "learning_rate": 5.678267229486592e-06, |
| "loss": 0.4153, |
| "step": 1423 |
| }, |
| { |
| "epoch": 1.533931777378815, |
| "grad_norm": 0.2956278324127197, |
| "learning_rate": 5.672054017339855e-06, |
| "loss": 0.401, |
| "step": 1424 |
| }, |
| { |
| "epoch": 1.5350089766606823, |
| "grad_norm": 0.29878416657447815, |
| "learning_rate": 5.66583974816045e-06, |
| "loss": 0.3923, |
| "step": 1425 |
| }, |
| { |
| "epoch": 1.5360861759425495, |
| "grad_norm": 0.3247506320476532, |
| "learning_rate": 5.659624431722421e-06, |
| "loss": 0.4041, |
| "step": 1426 |
| }, |
| { |
| "epoch": 1.5371633752244165, |
| "grad_norm": 0.310533344745636, |
| "learning_rate": 5.653408077801459e-06, |
| "loss": 0.4062, |
| "step": 1427 |
| }, |
| { |
| "epoch": 1.5382405745062837, |
| "grad_norm": 0.3016843795776367, |
| "learning_rate": 5.647190696174886e-06, |
| "loss": 0.3877, |
| "step": 1428 |
| }, |
| { |
| "epoch": 1.539317773788151, |
| "grad_norm": 0.34463533759117126, |
| "learning_rate": 5.640972296621644e-06, |
| "loss": 0.4253, |
| "step": 1429 |
| }, |
| { |
| "epoch": 1.540394973070018, |
| "grad_norm": 0.32542088627815247, |
| "learning_rate": 5.6347528889222715e-06, |
| "loss": 0.3706, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.541472172351885, |
| "grad_norm": 0.32016703486442566, |
| "learning_rate": 5.628532482858894e-06, |
| "loss": 0.4277, |
| "step": 1431 |
| }, |
| { |
| "epoch": 1.5425493716337524, |
| "grad_norm": 0.367384135723114, |
| "learning_rate": 5.622311088215209e-06, |
| "loss": 0.4243, |
| "step": 1432 |
| }, |
| { |
| "epoch": 1.5436265709156194, |
| "grad_norm": 0.351962685585022, |
| "learning_rate": 5.616088714776469e-06, |
| "loss": 0.4034, |
| "step": 1433 |
| }, |
| { |
| "epoch": 1.5447037701974864, |
| "grad_norm": 0.34793248772621155, |
| "learning_rate": 5.609865372329461e-06, |
| "loss": 0.4189, |
| "step": 1434 |
| }, |
| { |
| "epoch": 1.5457809694793538, |
| "grad_norm": 0.34114158153533936, |
| "learning_rate": 5.603641070662502e-06, |
| "loss": 0.3852, |
| "step": 1435 |
| }, |
| { |
| "epoch": 1.5468581687612208, |
| "grad_norm": 0.3659258186817169, |
| "learning_rate": 5.597415819565416e-06, |
| "loss": 0.4205, |
| "step": 1436 |
| }, |
| { |
| "epoch": 1.5479353680430878, |
| "grad_norm": 0.32382774353027344, |
| "learning_rate": 5.591189628829519e-06, |
| "loss": 0.3746, |
| "step": 1437 |
| }, |
| { |
| "epoch": 1.549012567324955, |
| "grad_norm": 0.35888931155204773, |
| "learning_rate": 5.584962508247605e-06, |
| "loss": 0.4222, |
| "step": 1438 |
| }, |
| { |
| "epoch": 1.5500897666068223, |
| "grad_norm": 0.34174293279647827, |
| "learning_rate": 5.578734467613933e-06, |
| "loss": 0.3859, |
| "step": 1439 |
| }, |
| { |
| "epoch": 1.5511669658886893, |
| "grad_norm": 0.32036033272743225, |
| "learning_rate": 5.572505516724207e-06, |
| "loss": 0.3988, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.5522441651705565, |
| "grad_norm": 0.35757505893707275, |
| "learning_rate": 5.56627566537556e-06, |
| "loss": 0.4279, |
| "step": 1441 |
| }, |
| { |
| "epoch": 1.5533213644524237, |
| "grad_norm": 0.26979249715805054, |
| "learning_rate": 5.560044923366549e-06, |
| "loss": 0.3469, |
| "step": 1442 |
| }, |
| { |
| "epoch": 1.5543985637342908, |
| "grad_norm": 0.33907294273376465, |
| "learning_rate": 5.5538133004971216e-06, |
| "loss": 0.4008, |
| "step": 1443 |
| }, |
| { |
| "epoch": 1.555475763016158, |
| "grad_norm": 0.3330419659614563, |
| "learning_rate": 5.547580806568621e-06, |
| "loss": 0.4267, |
| "step": 1444 |
| }, |
| { |
| "epoch": 1.5565529622980252, |
| "grad_norm": 0.36036917567253113, |
| "learning_rate": 5.541347451383755e-06, |
| "loss": 0.4155, |
| "step": 1445 |
| }, |
| { |
| "epoch": 1.5576301615798922, |
| "grad_norm": 0.3305310904979706, |
| "learning_rate": 5.535113244746585e-06, |
| "loss": 0.4039, |
| "step": 1446 |
| }, |
| { |
| "epoch": 1.5587073608617594, |
| "grad_norm": 0.34608691930770874, |
| "learning_rate": 5.528878196462514e-06, |
| "loss": 0.4116, |
| "step": 1447 |
| }, |
| { |
| "epoch": 1.5597845601436267, |
| "grad_norm": 0.36932167410850525, |
| "learning_rate": 5.522642316338268e-06, |
| "loss": 0.4179, |
| "step": 1448 |
| }, |
| { |
| "epoch": 1.5608617594254937, |
| "grad_norm": 0.33676591515541077, |
| "learning_rate": 5.516405614181883e-06, |
| "loss": 0.4099, |
| "step": 1449 |
| }, |
| { |
| "epoch": 1.5619389587073609, |
| "grad_norm": 0.3538258969783783, |
| "learning_rate": 5.5101680998026855e-06, |
| "loss": 0.4211, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.563016157989228, |
| "grad_norm": 0.3520103693008423, |
| "learning_rate": 5.503929783011279e-06, |
| "loss": 0.4095, |
| "step": 1451 |
| }, |
| { |
| "epoch": 1.564093357271095, |
| "grad_norm": 0.39654335379600525, |
| "learning_rate": 5.497690673619532e-06, |
| "loss": 0.383, |
| "step": 1452 |
| }, |
| { |
| "epoch": 1.5651705565529623, |
| "grad_norm": 0.3446529805660248, |
| "learning_rate": 5.4914507814405596e-06, |
| "loss": 0.4224, |
| "step": 1453 |
| }, |
| { |
| "epoch": 1.5662477558348296, |
| "grad_norm": 0.3643217384815216, |
| "learning_rate": 5.485210116288704e-06, |
| "loss": 0.415, |
| "step": 1454 |
| }, |
| { |
| "epoch": 1.5673249551166966, |
| "grad_norm": 0.33305594325065613, |
| "learning_rate": 5.478968687979527e-06, |
| "loss": 0.3999, |
| "step": 1455 |
| }, |
| { |
| "epoch": 1.5684021543985638, |
| "grad_norm": 0.31847333908081055, |
| "learning_rate": 5.472726506329789e-06, |
| "loss": 0.3692, |
| "step": 1456 |
| }, |
| { |
| "epoch": 1.569479353680431, |
| "grad_norm": 0.3522320091724396, |
| "learning_rate": 5.466483581157437e-06, |
| "loss": 0.4035, |
| "step": 1457 |
| }, |
| { |
| "epoch": 1.570556552962298, |
| "grad_norm": 0.35457131266593933, |
| "learning_rate": 5.460239922281586e-06, |
| "loss": 0.4142, |
| "step": 1458 |
| }, |
| { |
| "epoch": 1.571633752244165, |
| "grad_norm": 0.3094504773616791, |
| "learning_rate": 5.453995539522503e-06, |
| "loss": 0.4002, |
| "step": 1459 |
| }, |
| { |
| "epoch": 1.5727109515260325, |
| "grad_norm": 0.32812392711639404, |
| "learning_rate": 5.447750442701598e-06, |
| "loss": 0.3666, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.5737881508078995, |
| "grad_norm": 0.3192622661590576, |
| "learning_rate": 5.441504641641403e-06, |
| "loss": 0.3929, |
| "step": 1461 |
| }, |
| { |
| "epoch": 1.5748653500897665, |
| "grad_norm": 0.31926393508911133, |
| "learning_rate": 5.435258146165554e-06, |
| "loss": 0.401, |
| "step": 1462 |
| }, |
| { |
| "epoch": 1.575942549371634, |
| "grad_norm": 0.3556506931781769, |
| "learning_rate": 5.429010966098782e-06, |
| "loss": 0.3921, |
| "step": 1463 |
| }, |
| { |
| "epoch": 1.577019748653501, |
| "grad_norm": 0.3289359211921692, |
| "learning_rate": 5.4227631112668955e-06, |
| "loss": 0.3963, |
| "step": 1464 |
| }, |
| { |
| "epoch": 1.578096947935368, |
| "grad_norm": 0.35782623291015625, |
| "learning_rate": 5.416514591496764e-06, |
| "loss": 0.4222, |
| "step": 1465 |
| }, |
| { |
| "epoch": 1.5791741472172351, |
| "grad_norm": 0.3453352153301239, |
| "learning_rate": 5.410265416616301e-06, |
| "loss": 0.3899, |
| "step": 1466 |
| }, |
| { |
| "epoch": 1.5802513464991024, |
| "grad_norm": 0.34165066480636597, |
| "learning_rate": 5.404015596454451e-06, |
| "loss": 0.3652, |
| "step": 1467 |
| }, |
| { |
| "epoch": 1.5813285457809694, |
| "grad_norm": 0.336681067943573, |
| "learning_rate": 5.397765140841174e-06, |
| "loss": 0.4057, |
| "step": 1468 |
| }, |
| { |
| "epoch": 1.5824057450628366, |
| "grad_norm": 0.3774108588695526, |
| "learning_rate": 5.391514059607431e-06, |
| "loss": 0.4302, |
| "step": 1469 |
| }, |
| { |
| "epoch": 1.5834829443447038, |
| "grad_norm": 0.32935065031051636, |
| "learning_rate": 5.3852623625851655e-06, |
| "loss": 0.3606, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.5845601436265708, |
| "grad_norm": 0.3437838852405548, |
| "learning_rate": 5.379010059607288e-06, |
| "loss": 0.3976, |
| "step": 1471 |
| }, |
| { |
| "epoch": 1.585637342908438, |
| "grad_norm": 0.34462642669677734, |
| "learning_rate": 5.372757160507663e-06, |
| "loss": 0.4161, |
| "step": 1472 |
| }, |
| { |
| "epoch": 1.5867145421903053, |
| "grad_norm": 0.4068163335323334, |
| "learning_rate": 5.366503675121095e-06, |
| "loss": 0.4021, |
| "step": 1473 |
| }, |
| { |
| "epoch": 1.5877917414721723, |
| "grad_norm": 0.3602428436279297, |
| "learning_rate": 5.360249613283308e-06, |
| "loss": 0.4126, |
| "step": 1474 |
| }, |
| { |
| "epoch": 1.5888689407540395, |
| "grad_norm": 0.3262472152709961, |
| "learning_rate": 5.353994984830934e-06, |
| "loss": 0.429, |
| "step": 1475 |
| }, |
| { |
| "epoch": 1.5899461400359067, |
| "grad_norm": 0.3102273643016815, |
| "learning_rate": 5.347739799601494e-06, |
| "loss": 0.3751, |
| "step": 1476 |
| }, |
| { |
| "epoch": 1.5910233393177737, |
| "grad_norm": 0.3460737466812134, |
| "learning_rate": 5.341484067433388e-06, |
| "loss": 0.3969, |
| "step": 1477 |
| }, |
| { |
| "epoch": 1.592100538599641, |
| "grad_norm": 0.345058798789978, |
| "learning_rate": 5.335227798165874e-06, |
| "loss": 0.4097, |
| "step": 1478 |
| }, |
| { |
| "epoch": 1.5931777378815082, |
| "grad_norm": 0.3214409351348877, |
| "learning_rate": 5.328971001639054e-06, |
| "loss": 0.4073, |
| "step": 1479 |
| }, |
| { |
| "epoch": 1.5942549371633752, |
| "grad_norm": 0.343375563621521, |
| "learning_rate": 5.322713687693862e-06, |
| "loss": 0.4327, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.5953321364452424, |
| "grad_norm": 0.34606754779815674, |
| "learning_rate": 5.3164558661720456e-06, |
| "loss": 0.424, |
| "step": 1481 |
| }, |
| { |
| "epoch": 1.5964093357271096, |
| "grad_norm": 0.38280874490737915, |
| "learning_rate": 5.310197546916149e-06, |
| "loss": 0.4247, |
| "step": 1482 |
| }, |
| { |
| "epoch": 1.5974865350089766, |
| "grad_norm": 0.339191734790802, |
| "learning_rate": 5.303938739769498e-06, |
| "loss": 0.3879, |
| "step": 1483 |
| }, |
| { |
| "epoch": 1.5985637342908436, |
| "grad_norm": 0.3148673474788666, |
| "learning_rate": 5.2976794545761886e-06, |
| "loss": 0.3802, |
| "step": 1484 |
| }, |
| { |
| "epoch": 1.599640933572711, |
| "grad_norm": 0.3606517016887665, |
| "learning_rate": 5.291419701181069e-06, |
| "loss": 0.4378, |
| "step": 1485 |
| }, |
| { |
| "epoch": 1.600718132854578, |
| "grad_norm": 0.35120633244514465, |
| "learning_rate": 5.285159489429722e-06, |
| "loss": 0.4015, |
| "step": 1486 |
| }, |
| { |
| "epoch": 1.601795332136445, |
| "grad_norm": 0.3325619101524353, |
| "learning_rate": 5.2788988291684506e-06, |
| "loss": 0.3774, |
| "step": 1487 |
| }, |
| { |
| "epoch": 1.6028725314183125, |
| "grad_norm": 0.3451593518257141, |
| "learning_rate": 5.272637730244265e-06, |
| "loss": 0.4173, |
| "step": 1488 |
| }, |
| { |
| "epoch": 1.6039497307001795, |
| "grad_norm": 0.36361247301101685, |
| "learning_rate": 5.266376202504866e-06, |
| "loss": 0.3992, |
| "step": 1489 |
| }, |
| { |
| "epoch": 1.6050269299820465, |
| "grad_norm": 0.32423025369644165, |
| "learning_rate": 5.260114255798627e-06, |
| "loss": 0.3862, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.6061041292639138, |
| "grad_norm": 0.32100334763526917, |
| "learning_rate": 5.25385189997458e-06, |
| "loss": 0.4275, |
| "step": 1491 |
| }, |
| { |
| "epoch": 1.607181328545781, |
| "grad_norm": 0.35108858346939087, |
| "learning_rate": 5.2475891448824025e-06, |
| "loss": 0.3976, |
| "step": 1492 |
| }, |
| { |
| "epoch": 1.608258527827648, |
| "grad_norm": 0.3189638555049896, |
| "learning_rate": 5.2413260003724e-06, |
| "loss": 0.397, |
| "step": 1493 |
| }, |
| { |
| "epoch": 1.6093357271095152, |
| "grad_norm": 0.3238120675086975, |
| "learning_rate": 5.235062476295488e-06, |
| "loss": 0.4195, |
| "step": 1494 |
| }, |
| { |
| "epoch": 1.6104129263913824, |
| "grad_norm": 0.3653884530067444, |
| "learning_rate": 5.228798582503181e-06, |
| "loss": 0.387, |
| "step": 1495 |
| }, |
| { |
| "epoch": 1.6114901256732495, |
| "grad_norm": 0.3390481173992157, |
| "learning_rate": 5.222534328847574e-06, |
| "loss": 0.3873, |
| "step": 1496 |
| }, |
| { |
| "epoch": 1.6125673249551167, |
| "grad_norm": 0.3277999758720398, |
| "learning_rate": 5.21626972518133e-06, |
| "loss": 0.3865, |
| "step": 1497 |
| }, |
| { |
| "epoch": 1.613644524236984, |
| "grad_norm": 0.3376871943473816, |
| "learning_rate": 5.21000478135766e-06, |
| "loss": 0.4334, |
| "step": 1498 |
| }, |
| { |
| "epoch": 1.614721723518851, |
| "grad_norm": 0.4035407602787018, |
| "learning_rate": 5.203739507230311e-06, |
| "loss": 0.4109, |
| "step": 1499 |
| }, |
| { |
| "epoch": 1.6157989228007181, |
| "grad_norm": 0.33631792664527893, |
| "learning_rate": 5.197473912653549e-06, |
| "loss": 0.4145, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.6168761220825854, |
| "grad_norm": 0.33172816038131714, |
| "learning_rate": 5.191208007482145e-06, |
| "loss": 0.3723, |
| "step": 1501 |
| }, |
| { |
| "epoch": 1.6179533213644524, |
| "grad_norm": 0.3129732310771942, |
| "learning_rate": 5.184941801571359e-06, |
| "loss": 0.3939, |
| "step": 1502 |
| }, |
| { |
| "epoch": 1.6190305206463196, |
| "grad_norm": 0.32043594121932983, |
| "learning_rate": 5.1786753047769235e-06, |
| "loss": 0.3864, |
| "step": 1503 |
| }, |
| { |
| "epoch": 1.6201077199281868, |
| "grad_norm": 0.38129621744155884, |
| "learning_rate": 5.172408526955025e-06, |
| "loss": 0.3873, |
| "step": 1504 |
| }, |
| { |
| "epoch": 1.6211849192100538, |
| "grad_norm": 0.3243739902973175, |
| "learning_rate": 5.166141477962298e-06, |
| "loss": 0.4044, |
| "step": 1505 |
| }, |
| { |
| "epoch": 1.622262118491921, |
| "grad_norm": 0.30711236596107483, |
| "learning_rate": 5.1598741676557995e-06, |
| "loss": 0.3538, |
| "step": 1506 |
| }, |
| { |
| "epoch": 1.6233393177737883, |
| "grad_norm": 0.3542765974998474, |
| "learning_rate": 5.153606605892999e-06, |
| "loss": 0.43, |
| "step": 1507 |
| }, |
| { |
| "epoch": 1.6244165170556553, |
| "grad_norm": 0.39987432956695557, |
| "learning_rate": 5.147338802531762e-06, |
| "loss": 0.4364, |
| "step": 1508 |
| }, |
| { |
| "epoch": 1.6254937163375225, |
| "grad_norm": 0.34654441475868225, |
| "learning_rate": 5.141070767430331e-06, |
| "loss": 0.4161, |
| "step": 1509 |
| }, |
| { |
| "epoch": 1.6265709156193897, |
| "grad_norm": 0.3391593098640442, |
| "learning_rate": 5.134802510447318e-06, |
| "loss": 0.4211, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.6276481149012567, |
| "grad_norm": 0.36817681789398193, |
| "learning_rate": 5.128534041441677e-06, |
| "loss": 0.3796, |
| "step": 1511 |
| }, |
| { |
| "epoch": 1.6287253141831237, |
| "grad_norm": 0.3822740614414215, |
| "learning_rate": 5.1222653702727035e-06, |
| "loss": 0.3863, |
| "step": 1512 |
| }, |
| { |
| "epoch": 1.6298025134649912, |
| "grad_norm": 0.3610847294330597, |
| "learning_rate": 5.115996506800004e-06, |
| "loss": 0.3947, |
| "step": 1513 |
| }, |
| { |
| "epoch": 1.6308797127468582, |
| "grad_norm": 0.3673597574234009, |
| "learning_rate": 5.109727460883496e-06, |
| "loss": 0.4001, |
| "step": 1514 |
| }, |
| { |
| "epoch": 1.6319569120287252, |
| "grad_norm": 0.36558011174201965, |
| "learning_rate": 5.103458242383371e-06, |
| "loss": 0.3957, |
| "step": 1515 |
| }, |
| { |
| "epoch": 1.6330341113105926, |
| "grad_norm": 0.38542303442955017, |
| "learning_rate": 5.097188861160103e-06, |
| "loss": 0.4457, |
| "step": 1516 |
| }, |
| { |
| "epoch": 1.6341113105924596, |
| "grad_norm": 0.3248066008090973, |
| "learning_rate": 5.09091932707442e-06, |
| "loss": 0.3801, |
| "step": 1517 |
| }, |
| { |
| "epoch": 1.6351885098743266, |
| "grad_norm": 0.3572705388069153, |
| "learning_rate": 5.084649649987285e-06, |
| "loss": 0.3779, |
| "step": 1518 |
| }, |
| { |
| "epoch": 1.6362657091561938, |
| "grad_norm": 0.3426295220851898, |
| "learning_rate": 5.078379839759895e-06, |
| "loss": 0.4289, |
| "step": 1519 |
| }, |
| { |
| "epoch": 1.637342908438061, |
| "grad_norm": 0.313820481300354, |
| "learning_rate": 5.072109906253646e-06, |
| "loss": 0.4179, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.638420107719928, |
| "grad_norm": 0.31881383061408997, |
| "learning_rate": 5.065839859330134e-06, |
| "loss": 0.3918, |
| "step": 1521 |
| }, |
| { |
| "epoch": 1.6394973070017953, |
| "grad_norm": 0.34397974610328674, |
| "learning_rate": 5.059569708851136e-06, |
| "loss": 0.3973, |
| "step": 1522 |
| }, |
| { |
| "epoch": 1.6405745062836625, |
| "grad_norm": 0.32578712701797485, |
| "learning_rate": 5.053299464678583e-06, |
| "loss": 0.4084, |
| "step": 1523 |
| }, |
| { |
| "epoch": 1.6416517055655295, |
| "grad_norm": 0.3264220058917999, |
| "learning_rate": 5.047029136674563e-06, |
| "loss": 0.4077, |
| "step": 1524 |
| }, |
| { |
| "epoch": 1.6427289048473968, |
| "grad_norm": 0.37533849477767944, |
| "learning_rate": 5.040758734701289e-06, |
| "loss": 0.4076, |
| "step": 1525 |
| }, |
| { |
| "epoch": 1.643806104129264, |
| "grad_norm": 0.2892257273197174, |
| "learning_rate": 5.034488268621094e-06, |
| "loss": 0.3809, |
| "step": 1526 |
| }, |
| { |
| "epoch": 1.644883303411131, |
| "grad_norm": 0.3342662751674652, |
| "learning_rate": 5.028217748296409e-06, |
| "loss": 0.4308, |
| "step": 1527 |
| }, |
| { |
| "epoch": 1.6459605026929982, |
| "grad_norm": 0.30961301922798157, |
| "learning_rate": 5.021947183589753e-06, |
| "loss": 0.401, |
| "step": 1528 |
| }, |
| { |
| "epoch": 1.6470377019748654, |
| "grad_norm": 0.3177073299884796, |
| "learning_rate": 5.015676584363716e-06, |
| "loss": 0.3879, |
| "step": 1529 |
| }, |
| { |
| "epoch": 1.6481149012567324, |
| "grad_norm": 0.32608547806739807, |
| "learning_rate": 5.009405960480937e-06, |
| "loss": 0.4081, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.6491921005385997, |
| "grad_norm": 0.3337622582912445, |
| "learning_rate": 5.003135321804098e-06, |
| "loss": 0.4059, |
| "step": 1531 |
| }, |
| { |
| "epoch": 1.6502692998204669, |
| "grad_norm": 0.33355656266212463, |
| "learning_rate": 4.996864678195903e-06, |
| "loss": 0.4241, |
| "step": 1532 |
| }, |
| { |
| "epoch": 1.6513464991023339, |
| "grad_norm": 0.3020173907279968, |
| "learning_rate": 4.990594039519064e-06, |
| "loss": 0.4117, |
| "step": 1533 |
| }, |
| { |
| "epoch": 1.6524236983842011, |
| "grad_norm": 0.2995854318141937, |
| "learning_rate": 4.984323415636285e-06, |
| "loss": 0.3731, |
| "step": 1534 |
| }, |
| { |
| "epoch": 1.6535008976660683, |
| "grad_norm": 0.3445095121860504, |
| "learning_rate": 4.9780528164102475e-06, |
| "loss": 0.4398, |
| "step": 1535 |
| }, |
| { |
| "epoch": 1.6545780969479353, |
| "grad_norm": 0.3377833366394043, |
| "learning_rate": 4.971782251703591e-06, |
| "loss": 0.417, |
| "step": 1536 |
| }, |
| { |
| "epoch": 1.6556552962298026, |
| "grad_norm": 0.30527958273887634, |
| "learning_rate": 4.965511731378909e-06, |
| "loss": 0.3804, |
| "step": 1537 |
| }, |
| { |
| "epoch": 1.6567324955116698, |
| "grad_norm": 0.31118062138557434, |
| "learning_rate": 4.959241265298713e-06, |
| "loss": 0.3935, |
| "step": 1538 |
| }, |
| { |
| "epoch": 1.6578096947935368, |
| "grad_norm": 0.34053924679756165, |
| "learning_rate": 4.95297086332544e-06, |
| "loss": 0.3933, |
| "step": 1539 |
| }, |
| { |
| "epoch": 1.6588868940754038, |
| "grad_norm": 0.3675541877746582, |
| "learning_rate": 4.946700535321419e-06, |
| "loss": 0.4748, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.6599640933572712, |
| "grad_norm": 0.29740291833877563, |
| "learning_rate": 4.940430291148866e-06, |
| "loss": 0.3722, |
| "step": 1541 |
| }, |
| { |
| "epoch": 1.6610412926391382, |
| "grad_norm": 0.33205488324165344, |
| "learning_rate": 4.9341601406698675e-06, |
| "loss": 0.3982, |
| "step": 1542 |
| }, |
| { |
| "epoch": 1.6621184919210052, |
| "grad_norm": 0.34859946370124817, |
| "learning_rate": 4.927890093746356e-06, |
| "loss": 0.3955, |
| "step": 1543 |
| }, |
| { |
| "epoch": 1.6631956912028727, |
| "grad_norm": 0.3418346047401428, |
| "learning_rate": 4.921620160240107e-06, |
| "loss": 0.4003, |
| "step": 1544 |
| }, |
| { |
| "epoch": 1.6642728904847397, |
| "grad_norm": 0.3368319869041443, |
| "learning_rate": 4.915350350012714e-06, |
| "loss": 0.3984, |
| "step": 1545 |
| }, |
| { |
| "epoch": 1.6653500897666067, |
| "grad_norm": 0.32968056201934814, |
| "learning_rate": 4.909080672925581e-06, |
| "loss": 0.3682, |
| "step": 1546 |
| }, |
| { |
| "epoch": 1.666427289048474, |
| "grad_norm": 0.3352184593677521, |
| "learning_rate": 4.902811138839897e-06, |
| "loss": 0.416, |
| "step": 1547 |
| }, |
| { |
| "epoch": 1.6675044883303412, |
| "grad_norm": 0.33194318413734436, |
| "learning_rate": 4.896541757616632e-06, |
| "loss": 0.4123, |
| "step": 1548 |
| }, |
| { |
| "epoch": 1.6685816876122082, |
| "grad_norm": 0.33617234230041504, |
| "learning_rate": 4.890272539116508e-06, |
| "loss": 0.4266, |
| "step": 1549 |
| }, |
| { |
| "epoch": 1.6696588868940754, |
| "grad_norm": 0.3273670971393585, |
| "learning_rate": 4.884003493199997e-06, |
| "loss": 0.3765, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.6707360861759426, |
| "grad_norm": 0.32140910625457764, |
| "learning_rate": 4.877734629727299e-06, |
| "loss": 0.3841, |
| "step": 1551 |
| }, |
| { |
| "epoch": 1.6718132854578096, |
| "grad_norm": 0.3900831937789917, |
| "learning_rate": 4.8714659585583246e-06, |
| "loss": 0.4438, |
| "step": 1552 |
| }, |
| { |
| "epoch": 1.6728904847396768, |
| "grad_norm": 0.3028438091278076, |
| "learning_rate": 4.865197489552684e-06, |
| "loss": 0.383, |
| "step": 1553 |
| }, |
| { |
| "epoch": 1.673967684021544, |
| "grad_norm": 0.3399808704853058, |
| "learning_rate": 4.858929232569671e-06, |
| "loss": 0.4089, |
| "step": 1554 |
| }, |
| { |
| "epoch": 1.675044883303411, |
| "grad_norm": 0.3515014350414276, |
| "learning_rate": 4.85266119746824e-06, |
| "loss": 0.3845, |
| "step": 1555 |
| }, |
| { |
| "epoch": 1.6761220825852783, |
| "grad_norm": 0.3478013873100281, |
| "learning_rate": 4.846393394107001e-06, |
| "loss": 0.405, |
| "step": 1556 |
| }, |
| { |
| "epoch": 1.6771992818671455, |
| "grad_norm": 0.3799297511577606, |
| "learning_rate": 4.840125832344202e-06, |
| "loss": 0.4192, |
| "step": 1557 |
| }, |
| { |
| "epoch": 1.6782764811490125, |
| "grad_norm": 0.340212881565094, |
| "learning_rate": 4.8338585220377045e-06, |
| "loss": 0.3992, |
| "step": 1558 |
| }, |
| { |
| "epoch": 1.6793536804308797, |
| "grad_norm": 0.34044545888900757, |
| "learning_rate": 4.827591473044978e-06, |
| "loss": 0.4019, |
| "step": 1559 |
| }, |
| { |
| "epoch": 1.680430879712747, |
| "grad_norm": 0.3173620104789734, |
| "learning_rate": 4.82132469522308e-06, |
| "loss": 0.3811, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.681508078994614, |
| "grad_norm": 0.30734777450561523, |
| "learning_rate": 4.815058198428643e-06, |
| "loss": 0.378, |
| "step": 1561 |
| }, |
| { |
| "epoch": 1.6825852782764812, |
| "grad_norm": 0.29187920689582825, |
| "learning_rate": 4.808791992517857e-06, |
| "loss": 0.3649, |
| "step": 1562 |
| }, |
| { |
| "epoch": 1.6836624775583484, |
| "grad_norm": 0.32691147923469543, |
| "learning_rate": 4.802526087346453e-06, |
| "loss": 0.4017, |
| "step": 1563 |
| }, |
| { |
| "epoch": 1.6847396768402154, |
| "grad_norm": 0.3336152136325836, |
| "learning_rate": 4.796260492769691e-06, |
| "loss": 0.437, |
| "step": 1564 |
| }, |
| { |
| "epoch": 1.6858168761220824, |
| "grad_norm": 0.32136350870132446, |
| "learning_rate": 4.789995218642341e-06, |
| "loss": 0.4128, |
| "step": 1565 |
| }, |
| { |
| "epoch": 1.6868940754039499, |
| "grad_norm": 0.32937243580818176, |
| "learning_rate": 4.783730274818671e-06, |
| "loss": 0.4215, |
| "step": 1566 |
| }, |
| { |
| "epoch": 1.6879712746858169, |
| "grad_norm": 0.3161608576774597, |
| "learning_rate": 4.777465671152426e-06, |
| "loss": 0.3936, |
| "step": 1567 |
| }, |
| { |
| "epoch": 1.6890484739676839, |
| "grad_norm": 0.3174351155757904, |
| "learning_rate": 4.771201417496819e-06, |
| "loss": 0.4123, |
| "step": 1568 |
| }, |
| { |
| "epoch": 1.6901256732495513, |
| "grad_norm": 0.3189868927001953, |
| "learning_rate": 4.7649375237045135e-06, |
| "loss": 0.3904, |
| "step": 1569 |
| }, |
| { |
| "epoch": 1.6912028725314183, |
| "grad_norm": 0.28452205657958984, |
| "learning_rate": 4.7586739996276015e-06, |
| "loss": 0.3672, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.6922800718132853, |
| "grad_norm": 0.3064326047897339, |
| "learning_rate": 4.752410855117599e-06, |
| "loss": 0.3998, |
| "step": 1571 |
| }, |
| { |
| "epoch": 1.6933572710951525, |
| "grad_norm": 0.3178927004337311, |
| "learning_rate": 4.746148100025422e-06, |
| "loss": 0.4376, |
| "step": 1572 |
| }, |
| { |
| "epoch": 1.6944344703770198, |
| "grad_norm": 0.3099832236766815, |
| "learning_rate": 4.7398857442013746e-06, |
| "loss": 0.368, |
| "step": 1573 |
| }, |
| { |
| "epoch": 1.6955116696588868, |
| "grad_norm": 0.3031095266342163, |
| "learning_rate": 4.733623797495136e-06, |
| "loss": 0.3652, |
| "step": 1574 |
| }, |
| { |
| "epoch": 1.696588868940754, |
| "grad_norm": 0.32943809032440186, |
| "learning_rate": 4.727362269755736e-06, |
| "loss": 0.4372, |
| "step": 1575 |
| }, |
| { |
| "epoch": 1.6976660682226212, |
| "grad_norm": 0.32587704062461853, |
| "learning_rate": 4.72110117083155e-06, |
| "loss": 0.4416, |
| "step": 1576 |
| }, |
| { |
| "epoch": 1.6987432675044882, |
| "grad_norm": 0.31048640608787537, |
| "learning_rate": 4.714840510570278e-06, |
| "loss": 0.4197, |
| "step": 1577 |
| }, |
| { |
| "epoch": 1.6998204667863555, |
| "grad_norm": 0.27757468819618225, |
| "learning_rate": 4.708580298818931e-06, |
| "loss": 0.3919, |
| "step": 1578 |
| }, |
| { |
| "epoch": 1.7008976660682227, |
| "grad_norm": 0.3723500370979309, |
| "learning_rate": 4.702320545423814e-06, |
| "loss": 0.3957, |
| "step": 1579 |
| }, |
| { |
| "epoch": 1.7019748653500897, |
| "grad_norm": 0.3020727336406708, |
| "learning_rate": 4.696061260230504e-06, |
| "loss": 0.3796, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.703052064631957, |
| "grad_norm": 0.3209006190299988, |
| "learning_rate": 4.689802453083854e-06, |
| "loss": 0.4248, |
| "step": 1581 |
| }, |
| { |
| "epoch": 1.7041292639138241, |
| "grad_norm": 0.307170033454895, |
| "learning_rate": 4.683544133827955e-06, |
| "loss": 0.3971, |
| "step": 1582 |
| }, |
| { |
| "epoch": 1.7052064631956911, |
| "grad_norm": 0.3253774344921112, |
| "learning_rate": 4.677286312306139e-06, |
| "loss": 0.3774, |
| "step": 1583 |
| }, |
| { |
| "epoch": 1.7062836624775584, |
| "grad_norm": 0.345463365316391, |
| "learning_rate": 4.671028998360947e-06, |
| "loss": 0.3968, |
| "step": 1584 |
| }, |
| { |
| "epoch": 1.7073608617594256, |
| "grad_norm": 0.3188075125217438, |
| "learning_rate": 4.664772201834128e-06, |
| "loss": 0.4299, |
| "step": 1585 |
| }, |
| { |
| "epoch": 1.7084380610412926, |
| "grad_norm": 0.3115480840206146, |
| "learning_rate": 4.658515932566614e-06, |
| "loss": 0.3815, |
| "step": 1586 |
| }, |
| { |
| "epoch": 1.7095152603231598, |
| "grad_norm": 0.3210900127887726, |
| "learning_rate": 4.652260200398507e-06, |
| "loss": 0.4019, |
| "step": 1587 |
| }, |
| { |
| "epoch": 1.710592459605027, |
| "grad_norm": 0.33627891540527344, |
| "learning_rate": 4.646005015169067e-06, |
| "loss": 0.4139, |
| "step": 1588 |
| }, |
| { |
| "epoch": 1.711669658886894, |
| "grad_norm": 0.3372555375099182, |
| "learning_rate": 4.639750386716693e-06, |
| "loss": 0.415, |
| "step": 1589 |
| }, |
| { |
| "epoch": 1.7127468581687613, |
| "grad_norm": 0.28700023889541626, |
| "learning_rate": 4.633496324878906e-06, |
| "loss": 0.3562, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.7138240574506285, |
| "grad_norm": 0.3731818199157715, |
| "learning_rate": 4.627242839492339e-06, |
| "loss": 0.4113, |
| "step": 1591 |
| }, |
| { |
| "epoch": 1.7149012567324955, |
| "grad_norm": 0.29507315158843994, |
| "learning_rate": 4.620989940392715e-06, |
| "loss": 0.3995, |
| "step": 1592 |
| }, |
| { |
| "epoch": 1.7159784560143625, |
| "grad_norm": 0.32059916853904724, |
| "learning_rate": 4.614737637414836e-06, |
| "loss": 0.3934, |
| "step": 1593 |
| }, |
| { |
| "epoch": 1.71705565529623, |
| "grad_norm": 0.29595065116882324, |
| "learning_rate": 4.60848594039257e-06, |
| "loss": 0.3558, |
| "step": 1594 |
| }, |
| { |
| "epoch": 1.718132854578097, |
| "grad_norm": 0.3473470211029053, |
| "learning_rate": 4.602234859158827e-06, |
| "loss": 0.4578, |
| "step": 1595 |
| }, |
| { |
| "epoch": 1.719210053859964, |
| "grad_norm": 0.3321559727191925, |
| "learning_rate": 4.59598440354555e-06, |
| "loss": 0.3863, |
| "step": 1596 |
| }, |
| { |
| "epoch": 1.7202872531418314, |
| "grad_norm": 0.3324938714504242, |
| "learning_rate": 4.5897345833837e-06, |
| "loss": 0.4062, |
| "step": 1597 |
| }, |
| { |
| "epoch": 1.7213644524236984, |
| "grad_norm": 0.29092180728912354, |
| "learning_rate": 4.583485408503237e-06, |
| "loss": 0.3987, |
| "step": 1598 |
| }, |
| { |
| "epoch": 1.7224416517055654, |
| "grad_norm": 0.2973518967628479, |
| "learning_rate": 4.5772368887331044e-06, |
| "loss": 0.383, |
| "step": 1599 |
| }, |
| { |
| "epoch": 1.7235188509874326, |
| "grad_norm": 0.3359359800815582, |
| "learning_rate": 4.5709890339012205e-06, |
| "loss": 0.3992, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.7245960502692999, |
| "grad_norm": 0.396452397108078, |
| "learning_rate": 4.564741853834448e-06, |
| "loss": 0.4233, |
| "step": 1601 |
| }, |
| { |
| "epoch": 1.7256732495511669, |
| "grad_norm": 0.3256533145904541, |
| "learning_rate": 4.558495358358599e-06, |
| "loss": 0.4415, |
| "step": 1602 |
| }, |
| { |
| "epoch": 1.726750448833034, |
| "grad_norm": 0.37291306257247925, |
| "learning_rate": 4.552249557298403e-06, |
| "loss": 0.3855, |
| "step": 1603 |
| }, |
| { |
| "epoch": 1.7278276481149013, |
| "grad_norm": 0.3769271969795227, |
| "learning_rate": 4.5460044604774986e-06, |
| "loss": 0.4087, |
| "step": 1604 |
| }, |
| { |
| "epoch": 1.7289048473967683, |
| "grad_norm": 0.35868149995803833, |
| "learning_rate": 4.539760077718416e-06, |
| "loss": 0.3917, |
| "step": 1605 |
| }, |
| { |
| "epoch": 1.7299820466786355, |
| "grad_norm": 0.3206818699836731, |
| "learning_rate": 4.533516418842565e-06, |
| "loss": 0.4174, |
| "step": 1606 |
| }, |
| { |
| "epoch": 1.7310592459605028, |
| "grad_norm": 0.3528873920440674, |
| "learning_rate": 4.5272734936702116e-06, |
| "loss": 0.405, |
| "step": 1607 |
| }, |
| { |
| "epoch": 1.7321364452423698, |
| "grad_norm": 0.4061715006828308, |
| "learning_rate": 4.521031312020473e-06, |
| "loss": 0.4191, |
| "step": 1608 |
| }, |
| { |
| "epoch": 1.733213644524237, |
| "grad_norm": 0.3145693838596344, |
| "learning_rate": 4.514789883711296e-06, |
| "loss": 0.4227, |
| "step": 1609 |
| }, |
| { |
| "epoch": 1.7342908438061042, |
| "grad_norm": 0.28356069326400757, |
| "learning_rate": 4.508549218559441e-06, |
| "loss": 0.372, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.7353680430879712, |
| "grad_norm": 0.3727303743362427, |
| "learning_rate": 4.5023093263804705e-06, |
| "loss": 0.4493, |
| "step": 1611 |
| }, |
| { |
| "epoch": 1.7364452423698384, |
| "grad_norm": 0.33184897899627686, |
| "learning_rate": 4.496070216988723e-06, |
| "loss": 0.3889, |
| "step": 1612 |
| }, |
| { |
| "epoch": 1.7375224416517057, |
| "grad_norm": 0.32492300868034363, |
| "learning_rate": 4.489831900197317e-06, |
| "loss": 0.3952, |
| "step": 1613 |
| }, |
| { |
| "epoch": 1.7385996409335727, |
| "grad_norm": 0.3340592384338379, |
| "learning_rate": 4.483594385818119e-06, |
| "loss": 0.3764, |
| "step": 1614 |
| }, |
| { |
| "epoch": 1.73967684021544, |
| "grad_norm": 0.2960026264190674, |
| "learning_rate": 4.477357683661734e-06, |
| "loss": 0.3841, |
| "step": 1615 |
| }, |
| { |
| "epoch": 1.7407540394973071, |
| "grad_norm": 0.33512938022613525, |
| "learning_rate": 4.471121803537488e-06, |
| "loss": 0.3839, |
| "step": 1616 |
| }, |
| { |
| "epoch": 1.7418312387791741, |
| "grad_norm": 0.3327915370464325, |
| "learning_rate": 4.464886755253417e-06, |
| "loss": 0.4129, |
| "step": 1617 |
| }, |
| { |
| "epoch": 1.7429084380610413, |
| "grad_norm": 0.3029802143573761, |
| "learning_rate": 4.4586525486162465e-06, |
| "loss": 0.3737, |
| "step": 1618 |
| }, |
| { |
| "epoch": 1.7439856373429086, |
| "grad_norm": 0.2849656641483307, |
| "learning_rate": 4.452419193431379e-06, |
| "loss": 0.4006, |
| "step": 1619 |
| }, |
| { |
| "epoch": 1.7450628366247756, |
| "grad_norm": 0.3106905519962311, |
| "learning_rate": 4.4461866995028776e-06, |
| "loss": 0.3991, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.7461400359066426, |
| "grad_norm": 0.33045652508735657, |
| "learning_rate": 4.439955076633454e-06, |
| "loss": 0.3969, |
| "step": 1621 |
| }, |
| { |
| "epoch": 1.74721723518851, |
| "grad_norm": 0.4101937711238861, |
| "learning_rate": 4.433724334624441e-06, |
| "loss": 0.4023, |
| "step": 1622 |
| }, |
| { |
| "epoch": 1.748294434470377, |
| "grad_norm": 0.31196269392967224, |
| "learning_rate": 4.4274944832757964e-06, |
| "loss": 0.3797, |
| "step": 1623 |
| }, |
| { |
| "epoch": 1.749371633752244, |
| "grad_norm": 0.38965967297554016, |
| "learning_rate": 4.4212655323860685e-06, |
| "loss": 0.4432, |
| "step": 1624 |
| }, |
| { |
| "epoch": 1.7504488330341115, |
| "grad_norm": 0.32944896817207336, |
| "learning_rate": 4.4150374917523955e-06, |
| "loss": 0.3666, |
| "step": 1625 |
| }, |
| { |
| "epoch": 1.7515260323159785, |
| "grad_norm": 0.31117597222328186, |
| "learning_rate": 4.408810371170484e-06, |
| "loss": 0.3885, |
| "step": 1626 |
| }, |
| { |
| "epoch": 1.7526032315978455, |
| "grad_norm": 0.3230034112930298, |
| "learning_rate": 4.402584180434586e-06, |
| "loss": 0.3943, |
| "step": 1627 |
| }, |
| { |
| "epoch": 1.7536804308797127, |
| "grad_norm": 0.3342301845550537, |
| "learning_rate": 4.396358929337499e-06, |
| "loss": 0.3917, |
| "step": 1628 |
| }, |
| { |
| "epoch": 1.75475763016158, |
| "grad_norm": 0.34093886613845825, |
| "learning_rate": 4.39013462767054e-06, |
| "loss": 0.3816, |
| "step": 1629 |
| }, |
| { |
| "epoch": 1.755834829443447, |
| "grad_norm": 0.28704625368118286, |
| "learning_rate": 4.3839112852235335e-06, |
| "loss": 0.4086, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.7569120287253142, |
| "grad_norm": 0.3321066200733185, |
| "learning_rate": 4.377688911784792e-06, |
| "loss": 0.4045, |
| "step": 1631 |
| }, |
| { |
| "epoch": 1.7579892280071814, |
| "grad_norm": 0.314985454082489, |
| "learning_rate": 4.371467517141108e-06, |
| "loss": 0.4045, |
| "step": 1632 |
| }, |
| { |
| "epoch": 1.7590664272890484, |
| "grad_norm": 0.28622984886169434, |
| "learning_rate": 4.365247111077731e-06, |
| "loss": 0.3668, |
| "step": 1633 |
| }, |
| { |
| "epoch": 1.7601436265709156, |
| "grad_norm": 0.33727702498435974, |
| "learning_rate": 4.359027703378357e-06, |
| "loss": 0.4285, |
| "step": 1634 |
| }, |
| { |
| "epoch": 1.7612208258527828, |
| "grad_norm": 0.29879483580589294, |
| "learning_rate": 4.352809303825115e-06, |
| "loss": 0.3871, |
| "step": 1635 |
| }, |
| { |
| "epoch": 1.7622980251346498, |
| "grad_norm": 0.3572829067707062, |
| "learning_rate": 4.346591922198542e-06, |
| "loss": 0.4131, |
| "step": 1636 |
| }, |
| { |
| "epoch": 1.763375224416517, |
| "grad_norm": 0.3054465651512146, |
| "learning_rate": 4.34037556827758e-06, |
| "loss": 0.4006, |
| "step": 1637 |
| }, |
| { |
| "epoch": 1.7644524236983843, |
| "grad_norm": 0.3047214448451996, |
| "learning_rate": 4.334160251839552e-06, |
| "loss": 0.408, |
| "step": 1638 |
| }, |
| { |
| "epoch": 1.7655296229802513, |
| "grad_norm": 0.3058657646179199, |
| "learning_rate": 4.327945982660146e-06, |
| "loss": 0.3935, |
| "step": 1639 |
| }, |
| { |
| "epoch": 1.7666068222621185, |
| "grad_norm": 0.3060075342655182, |
| "learning_rate": 4.321732770513408e-06, |
| "loss": 0.4331, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.7676840215439857, |
| "grad_norm": 0.31800925731658936, |
| "learning_rate": 4.31552062517172e-06, |
| "loss": 0.4002, |
| "step": 1641 |
| }, |
| { |
| "epoch": 1.7687612208258527, |
| "grad_norm": 0.30724993348121643, |
| "learning_rate": 4.309309556405786e-06, |
| "loss": 0.4331, |
| "step": 1642 |
| }, |
| { |
| "epoch": 1.76983842010772, |
| "grad_norm": 0.2819603681564331, |
| "learning_rate": 4.303099573984617e-06, |
| "loss": 0.3929, |
| "step": 1643 |
| }, |
| { |
| "epoch": 1.7709156193895872, |
| "grad_norm": 0.3609313368797302, |
| "learning_rate": 4.29689068767551e-06, |
| "loss": 0.4467, |
| "step": 1644 |
| }, |
| { |
| "epoch": 1.7719928186714542, |
| "grad_norm": 0.2898065745830536, |
| "learning_rate": 4.290682907244046e-06, |
| "loss": 0.3949, |
| "step": 1645 |
| }, |
| { |
| "epoch": 1.7730700179533212, |
| "grad_norm": 0.29993054270744324, |
| "learning_rate": 4.284476242454062e-06, |
| "loss": 0.405, |
| "step": 1646 |
| }, |
| { |
| "epoch": 1.7741472172351886, |
| "grad_norm": 0.36077243089675903, |
| "learning_rate": 4.278270703067644e-06, |
| "loss": 0.422, |
| "step": 1647 |
| }, |
| { |
| "epoch": 1.7752244165170556, |
| "grad_norm": 0.3274693787097931, |
| "learning_rate": 4.272066298845102e-06, |
| "loss": 0.3884, |
| "step": 1648 |
| }, |
| { |
| "epoch": 1.7763016157989227, |
| "grad_norm": 0.29320403933525085, |
| "learning_rate": 4.265863039544967e-06, |
| "loss": 0.3889, |
| "step": 1649 |
| }, |
| { |
| "epoch": 1.77737881508079, |
| "grad_norm": 0.31313973665237427, |
| "learning_rate": 4.259660934923965e-06, |
| "loss": 0.4039, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.778456014362657, |
| "grad_norm": 0.30798840522766113, |
| "learning_rate": 4.253459994737006e-06, |
| "loss": 0.392, |
| "step": 1651 |
| }, |
| { |
| "epoch": 1.779533213644524, |
| "grad_norm": 0.32937532663345337, |
| "learning_rate": 4.247260228737171e-06, |
| "loss": 0.4135, |
| "step": 1652 |
| }, |
| { |
| "epoch": 1.7806104129263913, |
| "grad_norm": 0.31155073642730713, |
| "learning_rate": 4.241061646675695e-06, |
| "loss": 0.3711, |
| "step": 1653 |
| }, |
| { |
| "epoch": 1.7816876122082586, |
| "grad_norm": 0.3357556164264679, |
| "learning_rate": 4.234864258301943e-06, |
| "loss": 0.4044, |
| "step": 1654 |
| }, |
| { |
| "epoch": 1.7827648114901256, |
| "grad_norm": 0.3224906921386719, |
| "learning_rate": 4.228668073363413e-06, |
| "loss": 0.4244, |
| "step": 1655 |
| }, |
| { |
| "epoch": 1.7838420107719928, |
| "grad_norm": 0.3008199632167816, |
| "learning_rate": 4.222473101605703e-06, |
| "loss": 0.3974, |
| "step": 1656 |
| }, |
| { |
| "epoch": 1.78491921005386, |
| "grad_norm": 0.32574763894081116, |
| "learning_rate": 4.216279352772506e-06, |
| "loss": 0.4055, |
| "step": 1657 |
| }, |
| { |
| "epoch": 1.785996409335727, |
| "grad_norm": 0.2971203327178955, |
| "learning_rate": 4.210086836605592e-06, |
| "loss": 0.396, |
| "step": 1658 |
| }, |
| { |
| "epoch": 1.7870736086175942, |
| "grad_norm": 0.31350067257881165, |
| "learning_rate": 4.203895562844789e-06, |
| "loss": 0.3938, |
| "step": 1659 |
| }, |
| { |
| "epoch": 1.7881508078994615, |
| "grad_norm": 0.33718141913414, |
| "learning_rate": 4.197705541227973e-06, |
| "loss": 0.409, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.7892280071813285, |
| "grad_norm": 0.3132805824279785, |
| "learning_rate": 4.1915167814910515e-06, |
| "loss": 0.4023, |
| "step": 1661 |
| }, |
| { |
| "epoch": 1.7903052064631957, |
| "grad_norm": 0.3028797209262848, |
| "learning_rate": 4.1853292933679476e-06, |
| "loss": 0.4175, |
| "step": 1662 |
| }, |
| { |
| "epoch": 1.791382405745063, |
| "grad_norm": 0.31412598490715027, |
| "learning_rate": 4.1791430865905795e-06, |
| "loss": 0.3823, |
| "step": 1663 |
| }, |
| { |
| "epoch": 1.79245960502693, |
| "grad_norm": 0.298460453748703, |
| "learning_rate": 4.172958170888858e-06, |
| "loss": 0.3611, |
| "step": 1664 |
| }, |
| { |
| "epoch": 1.7935368043087971, |
| "grad_norm": 0.3055499792098999, |
| "learning_rate": 4.166774555990654e-06, |
| "loss": 0.4062, |
| "step": 1665 |
| }, |
| { |
| "epoch": 1.7946140035906644, |
| "grad_norm": 0.2997424900531769, |
| "learning_rate": 4.1605922516218e-06, |
| "loss": 0.4274, |
| "step": 1666 |
| }, |
| { |
| "epoch": 1.7956912028725314, |
| "grad_norm": 0.30274149775505066, |
| "learning_rate": 4.154411267506065e-06, |
| "loss": 0.397, |
| "step": 1667 |
| }, |
| { |
| "epoch": 1.7967684021543986, |
| "grad_norm": 0.36079904437065125, |
| "learning_rate": 4.148231613365138e-06, |
| "loss": 0.3914, |
| "step": 1668 |
| }, |
| { |
| "epoch": 1.7978456014362658, |
| "grad_norm": 0.32576999068260193, |
| "learning_rate": 4.142053298918622e-06, |
| "loss": 0.3782, |
| "step": 1669 |
| }, |
| { |
| "epoch": 1.7989228007181328, |
| "grad_norm": 0.3388504087924957, |
| "learning_rate": 4.135876333884009e-06, |
| "loss": 0.4293, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.3610943853855133, |
| "learning_rate": 4.129700727976671e-06, |
| "loss": 0.4118, |
| "step": 1671 |
| }, |
| { |
| "epoch": 1.8010771992818673, |
| "grad_norm": 0.3224228024482727, |
| "learning_rate": 4.123526490909837e-06, |
| "loss": 0.4185, |
| "step": 1672 |
| }, |
| { |
| "epoch": 1.8021543985637343, |
| "grad_norm": 0.2802104353904724, |
| "learning_rate": 4.11735363239459e-06, |
| "loss": 0.36, |
| "step": 1673 |
| }, |
| { |
| "epoch": 1.8032315978456013, |
| "grad_norm": 0.32841676473617554, |
| "learning_rate": 4.111182162139844e-06, |
| "loss": 0.3933, |
| "step": 1674 |
| }, |
| { |
| "epoch": 1.8043087971274687, |
| "grad_norm": 0.33936789631843567, |
| "learning_rate": 4.105012089852324e-06, |
| "loss": 0.3789, |
| "step": 1675 |
| }, |
| { |
| "epoch": 1.8053859964093357, |
| "grad_norm": 0.3322623670101166, |
| "learning_rate": 4.098843425236558e-06, |
| "loss": 0.4065, |
| "step": 1676 |
| }, |
| { |
| "epoch": 1.8064631956912027, |
| "grad_norm": 0.32670193910598755, |
| "learning_rate": 4.092676177994862e-06, |
| "loss": 0.4154, |
| "step": 1677 |
| }, |
| { |
| "epoch": 1.8075403949730702, |
| "grad_norm": 0.30702006816864014, |
| "learning_rate": 4.086510357827324e-06, |
| "loss": 0.4061, |
| "step": 1678 |
| }, |
| { |
| "epoch": 1.8086175942549372, |
| "grad_norm": 0.33039966225624084, |
| "learning_rate": 4.080345974431786e-06, |
| "loss": 0.415, |
| "step": 1679 |
| }, |
| { |
| "epoch": 1.8096947935368042, |
| "grad_norm": 0.30007797479629517, |
| "learning_rate": 4.074183037503827e-06, |
| "loss": 0.3897, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.8107719928186714, |
| "grad_norm": 0.3224261403083801, |
| "learning_rate": 4.068021556736755e-06, |
| "loss": 0.418, |
| "step": 1681 |
| }, |
| { |
| "epoch": 1.8118491921005386, |
| "grad_norm": 0.32132020592689514, |
| "learning_rate": 4.061861541821587e-06, |
| "loss": 0.4053, |
| "step": 1682 |
| }, |
| { |
| "epoch": 1.8129263913824056, |
| "grad_norm": 0.2934954762458801, |
| "learning_rate": 4.055703002447033e-06, |
| "loss": 0.3839, |
| "step": 1683 |
| }, |
| { |
| "epoch": 1.8140035906642729, |
| "grad_norm": 0.32513371109962463, |
| "learning_rate": 4.049545948299482e-06, |
| "loss": 0.4202, |
| "step": 1684 |
| }, |
| { |
| "epoch": 1.81508078994614, |
| "grad_norm": 0.3289554715156555, |
| "learning_rate": 4.043390389062993e-06, |
| "loss": 0.4068, |
| "step": 1685 |
| }, |
| { |
| "epoch": 1.816157989228007, |
| "grad_norm": 0.31976351141929626, |
| "learning_rate": 4.037236334419261e-06, |
| "loss": 0.437, |
| "step": 1686 |
| }, |
| { |
| "epoch": 1.8172351885098743, |
| "grad_norm": 0.2936280369758606, |
| "learning_rate": 4.0310837940476275e-06, |
| "loss": 0.3808, |
| "step": 1687 |
| }, |
| { |
| "epoch": 1.8183123877917415, |
| "grad_norm": 0.2943546175956726, |
| "learning_rate": 4.024932777625044e-06, |
| "loss": 0.403, |
| "step": 1688 |
| }, |
| { |
| "epoch": 1.8193895870736085, |
| "grad_norm": 0.3043786585330963, |
| "learning_rate": 4.018783294826071e-06, |
| "loss": 0.4002, |
| "step": 1689 |
| }, |
| { |
| "epoch": 1.8204667863554758, |
| "grad_norm": 0.32939180731773376, |
| "learning_rate": 4.0126353553228525e-06, |
| "loss": 0.3868, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.821543985637343, |
| "grad_norm": 0.30302363634109497, |
| "learning_rate": 4.006488968785106e-06, |
| "loss": 0.3876, |
| "step": 1691 |
| }, |
| { |
| "epoch": 1.82262118491921, |
| "grad_norm": 0.3275144398212433, |
| "learning_rate": 4.000344144880108e-06, |
| "loss": 0.4053, |
| "step": 1692 |
| }, |
| { |
| "epoch": 1.8236983842010772, |
| "grad_norm": 0.2963061034679413, |
| "learning_rate": 3.994200893272676e-06, |
| "loss": 0.3678, |
| "step": 1693 |
| }, |
| { |
| "epoch": 1.8247755834829444, |
| "grad_norm": 0.33487895131111145, |
| "learning_rate": 3.988059223625155e-06, |
| "loss": 0.402, |
| "step": 1694 |
| }, |
| { |
| "epoch": 1.8258527827648114, |
| "grad_norm": 0.31342634558677673, |
| "learning_rate": 3.981919145597404e-06, |
| "loss": 0.4244, |
| "step": 1695 |
| }, |
| { |
| "epoch": 1.8269299820466787, |
| "grad_norm": 0.30831387639045715, |
| "learning_rate": 3.97578066884677e-06, |
| "loss": 0.4158, |
| "step": 1696 |
| }, |
| { |
| "epoch": 1.828007181328546, |
| "grad_norm": 0.2899012863636017, |
| "learning_rate": 3.9696438030280925e-06, |
| "loss": 0.3551, |
| "step": 1697 |
| }, |
| { |
| "epoch": 1.829084380610413, |
| "grad_norm": 0.3651769459247589, |
| "learning_rate": 3.9635085577936706e-06, |
| "loss": 0.4014, |
| "step": 1698 |
| }, |
| { |
| "epoch": 1.8301615798922801, |
| "grad_norm": 0.3244316875934601, |
| "learning_rate": 3.957374942793259e-06, |
| "loss": 0.388, |
| "step": 1699 |
| }, |
| { |
| "epoch": 1.8312387791741473, |
| "grad_norm": 0.3279629349708557, |
| "learning_rate": 3.951242967674042e-06, |
| "loss": 0.4615, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.8323159784560143, |
| "grad_norm": 0.2855011224746704, |
| "learning_rate": 3.94511264208063e-06, |
| "loss": 0.35, |
| "step": 1701 |
| }, |
| { |
| "epoch": 1.8333931777378814, |
| "grad_norm": 0.3636470437049866, |
| "learning_rate": 3.938983975655039e-06, |
| "loss": 0.4525, |
| "step": 1702 |
| }, |
| { |
| "epoch": 1.8344703770197488, |
| "grad_norm": 0.35178375244140625, |
| "learning_rate": 3.932856978036671e-06, |
| "loss": 0.404, |
| "step": 1703 |
| }, |
| { |
| "epoch": 1.8355475763016158, |
| "grad_norm": 0.28978657722473145, |
| "learning_rate": 3.926731658862307e-06, |
| "loss": 0.379, |
| "step": 1704 |
| }, |
| { |
| "epoch": 1.8366247755834828, |
| "grad_norm": 0.28477418422698975, |
| "learning_rate": 3.920608027766086e-06, |
| "loss": 0.3963, |
| "step": 1705 |
| }, |
| { |
| "epoch": 1.8377019748653503, |
| "grad_norm": 0.39070865511894226, |
| "learning_rate": 3.914486094379497e-06, |
| "loss": 0.4023, |
| "step": 1706 |
| }, |
| { |
| "epoch": 1.8387791741472173, |
| "grad_norm": 0.34812501072883606, |
| "learning_rate": 3.90836586833135e-06, |
| "loss": 0.3978, |
| "step": 1707 |
| }, |
| { |
| "epoch": 1.8398563734290843, |
| "grad_norm": 0.2860909104347229, |
| "learning_rate": 3.902247359247775e-06, |
| "loss": 0.3905, |
| "step": 1708 |
| }, |
| { |
| "epoch": 1.8409335727109515, |
| "grad_norm": 0.3125012218952179, |
| "learning_rate": 3.8961305767522015e-06, |
| "loss": 0.4186, |
| "step": 1709 |
| }, |
| { |
| "epoch": 1.8420107719928187, |
| "grad_norm": 0.37062105536460876, |
| "learning_rate": 3.890015530465342e-06, |
| "loss": 0.437, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.8430879712746857, |
| "grad_norm": 0.3444374203681946, |
| "learning_rate": 3.883902230005181e-06, |
| "loss": 0.3863, |
| "step": 1711 |
| }, |
| { |
| "epoch": 1.844165170556553, |
| "grad_norm": 0.3282957077026367, |
| "learning_rate": 3.877790684986953e-06, |
| "loss": 0.4154, |
| "step": 1712 |
| }, |
| { |
| "epoch": 1.8452423698384202, |
| "grad_norm": 0.31650158762931824, |
| "learning_rate": 3.871680905023133e-06, |
| "loss": 0.3834, |
| "step": 1713 |
| }, |
| { |
| "epoch": 1.8463195691202872, |
| "grad_norm": 0.3248016834259033, |
| "learning_rate": 3.865572899723423e-06, |
| "loss": 0.4203, |
| "step": 1714 |
| }, |
| { |
| "epoch": 1.8473967684021544, |
| "grad_norm": 0.36757394671440125, |
| "learning_rate": 3.859466678694728e-06, |
| "loss": 0.4197, |
| "step": 1715 |
| }, |
| { |
| "epoch": 1.8484739676840216, |
| "grad_norm": 0.314170241355896, |
| "learning_rate": 3.853362251541153e-06, |
| "loss": 0.371, |
| "step": 1716 |
| }, |
| { |
| "epoch": 1.8495511669658886, |
| "grad_norm": 0.3025366961956024, |
| "learning_rate": 3.847259627863974e-06, |
| "loss": 0.3935, |
| "step": 1717 |
| }, |
| { |
| "epoch": 1.8506283662477558, |
| "grad_norm": 0.3148907423019409, |
| "learning_rate": 3.841158817261637e-06, |
| "loss": 0.3956, |
| "step": 1718 |
| }, |
| { |
| "epoch": 1.851705565529623, |
| "grad_norm": 0.3267936110496521, |
| "learning_rate": 3.8350598293297345e-06, |
| "loss": 0.4408, |
| "step": 1719 |
| }, |
| { |
| "epoch": 1.85278276481149, |
| "grad_norm": 0.3058549761772156, |
| "learning_rate": 3.82896267366099e-06, |
| "loss": 0.3488, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.8538599640933573, |
| "grad_norm": 0.3332759439945221, |
| "learning_rate": 3.822867359845248e-06, |
| "loss": 0.4289, |
| "step": 1721 |
| }, |
| { |
| "epoch": 1.8549371633752245, |
| "grad_norm": 0.31015682220458984, |
| "learning_rate": 3.816773897469454e-06, |
| "loss": 0.411, |
| "step": 1722 |
| }, |
| { |
| "epoch": 1.8560143626570915, |
| "grad_norm": 0.3165980279445648, |
| "learning_rate": 3.8106822961176443e-06, |
| "loss": 0.4186, |
| "step": 1723 |
| }, |
| { |
| "epoch": 1.8570915619389587, |
| "grad_norm": 0.3124646544456482, |
| "learning_rate": 3.8045925653709238e-06, |
| "loss": 0.3901, |
| "step": 1724 |
| }, |
| { |
| "epoch": 1.858168761220826, |
| "grad_norm": 0.30677077174186707, |
| "learning_rate": 3.7985047148074584e-06, |
| "loss": 0.4139, |
| "step": 1725 |
| }, |
| { |
| "epoch": 1.859245960502693, |
| "grad_norm": 0.2697823643684387, |
| "learning_rate": 3.792418754002457e-06, |
| "loss": 0.3514, |
| "step": 1726 |
| }, |
| { |
| "epoch": 1.8603231597845602, |
| "grad_norm": 0.32413095235824585, |
| "learning_rate": 3.7863346925281565e-06, |
| "loss": 0.4122, |
| "step": 1727 |
| }, |
| { |
| "epoch": 1.8614003590664274, |
| "grad_norm": 0.2940352261066437, |
| "learning_rate": 3.7802525399537997e-06, |
| "loss": 0.4128, |
| "step": 1728 |
| }, |
| { |
| "epoch": 1.8624775583482944, |
| "grad_norm": 0.32525020837783813, |
| "learning_rate": 3.774172305845636e-06, |
| "loss": 0.4161, |
| "step": 1729 |
| }, |
| { |
| "epoch": 1.8635547576301614, |
| "grad_norm": 0.30089908838272095, |
| "learning_rate": 3.7680939997668942e-06, |
| "loss": 0.402, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.8646319569120289, |
| "grad_norm": 0.28563210368156433, |
| "learning_rate": 3.76201763127777e-06, |
| "loss": 0.3704, |
| "step": 1731 |
| }, |
| { |
| "epoch": 1.8657091561938959, |
| "grad_norm": 0.3135564923286438, |
| "learning_rate": 3.755943209935411e-06, |
| "loss": 0.4636, |
| "step": 1732 |
| }, |
| { |
| "epoch": 1.8667863554757629, |
| "grad_norm": 0.28799933195114136, |
| "learning_rate": 3.749870745293903e-06, |
| "loss": 0.3825, |
| "step": 1733 |
| }, |
| { |
| "epoch": 1.86786355475763, |
| "grad_norm": 0.28616318106651306, |
| "learning_rate": 3.7438002469042567e-06, |
| "loss": 0.3783, |
| "step": 1734 |
| }, |
| { |
| "epoch": 1.8689407540394973, |
| "grad_norm": 0.31118789315223694, |
| "learning_rate": 3.737731724314384e-06, |
| "loss": 0.4277, |
| "step": 1735 |
| }, |
| { |
| "epoch": 1.8700179533213643, |
| "grad_norm": 0.30302414298057556, |
| "learning_rate": 3.7316651870690957e-06, |
| "loss": 0.3949, |
| "step": 1736 |
| }, |
| { |
| "epoch": 1.8710951526032316, |
| "grad_norm": 0.30837923288345337, |
| "learning_rate": 3.725600644710078e-06, |
| "loss": 0.3731, |
| "step": 1737 |
| }, |
| { |
| "epoch": 1.8721723518850988, |
| "grad_norm": 0.3579249083995819, |
| "learning_rate": 3.7195381067758755e-06, |
| "loss": 0.412, |
| "step": 1738 |
| }, |
| { |
| "epoch": 1.8732495511669658, |
| "grad_norm": 0.29725703597068787, |
| "learning_rate": 3.7134775828018864e-06, |
| "loss": 0.3809, |
| "step": 1739 |
| }, |
| { |
| "epoch": 1.874326750448833, |
| "grad_norm": 0.3607236444950104, |
| "learning_rate": 3.707419082320336e-06, |
| "loss": 0.4317, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.8754039497307002, |
| "grad_norm": 0.3588671386241913, |
| "learning_rate": 3.70136261486027e-06, |
| "loss": 0.3785, |
| "step": 1741 |
| }, |
| { |
| "epoch": 1.8764811490125672, |
| "grad_norm": 0.26367682218551636, |
| "learning_rate": 3.6953081899475363e-06, |
| "loss": 0.3452, |
| "step": 1742 |
| }, |
| { |
| "epoch": 1.8775583482944345, |
| "grad_norm": 0.3392777740955353, |
| "learning_rate": 3.68925581710477e-06, |
| "loss": 0.3989, |
| "step": 1743 |
| }, |
| { |
| "epoch": 1.8786355475763017, |
| "grad_norm": 0.3400457203388214, |
| "learning_rate": 3.683205505851377e-06, |
| "loss": 0.4203, |
| "step": 1744 |
| }, |
| { |
| "epoch": 1.8797127468581687, |
| "grad_norm": 0.3056473433971405, |
| "learning_rate": 3.6771572657035214e-06, |
| "loss": 0.3779, |
| "step": 1745 |
| }, |
| { |
| "epoch": 1.880789946140036, |
| "grad_norm": 0.31150609254837036, |
| "learning_rate": 3.6711111061741133e-06, |
| "loss": 0.4088, |
| "step": 1746 |
| }, |
| { |
| "epoch": 1.8818671454219031, |
| "grad_norm": 0.31468626856803894, |
| "learning_rate": 3.6650670367727843e-06, |
| "loss": 0.4061, |
| "step": 1747 |
| }, |
| { |
| "epoch": 1.8829443447037701, |
| "grad_norm": 0.3258722126483917, |
| "learning_rate": 3.6590250670058848e-06, |
| "loss": 0.3803, |
| "step": 1748 |
| }, |
| { |
| "epoch": 1.8840215439856374, |
| "grad_norm": 0.3498111367225647, |
| "learning_rate": 3.652985206376455e-06, |
| "loss": 0.414, |
| "step": 1749 |
| }, |
| { |
| "epoch": 1.8850987432675046, |
| "grad_norm": 0.3353995084762573, |
| "learning_rate": 3.646947464384224e-06, |
| "loss": 0.4106, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.8861759425493716, |
| "grad_norm": 0.34021976590156555, |
| "learning_rate": 3.6409118505255896e-06, |
| "loss": 0.4082, |
| "step": 1751 |
| }, |
| { |
| "epoch": 1.8872531418312388, |
| "grad_norm": 0.30451297760009766, |
| "learning_rate": 3.6348783742935967e-06, |
| "loss": 0.3847, |
| "step": 1752 |
| }, |
| { |
| "epoch": 1.888330341113106, |
| "grad_norm": 0.2944905757904053, |
| "learning_rate": 3.628847045177932e-06, |
| "loss": 0.3673, |
| "step": 1753 |
| }, |
| { |
| "epoch": 1.889407540394973, |
| "grad_norm": 0.2865719199180603, |
| "learning_rate": 3.622817872664905e-06, |
| "loss": 0.394, |
| "step": 1754 |
| }, |
| { |
| "epoch": 1.89048473967684, |
| "grad_norm": 0.33090847730636597, |
| "learning_rate": 3.616790866237433e-06, |
| "loss": 0.3883, |
| "step": 1755 |
| }, |
| { |
| "epoch": 1.8915619389587075, |
| "grad_norm": 0.32876595854759216, |
| "learning_rate": 3.610766035375023e-06, |
| "loss": 0.3951, |
| "step": 1756 |
| }, |
| { |
| "epoch": 1.8926391382405745, |
| "grad_norm": 0.3173961639404297, |
| "learning_rate": 3.6047433895537657e-06, |
| "loss": 0.4043, |
| "step": 1757 |
| }, |
| { |
| "epoch": 1.8937163375224415, |
| "grad_norm": 0.3128277659416199, |
| "learning_rate": 3.598722938246314e-06, |
| "loss": 0.4195, |
| "step": 1758 |
| }, |
| { |
| "epoch": 1.894793536804309, |
| "grad_norm": 0.3198891282081604, |
| "learning_rate": 3.5927046909218634e-06, |
| "loss": 0.396, |
| "step": 1759 |
| }, |
| { |
| "epoch": 1.895870736086176, |
| "grad_norm": 0.3302939832210541, |
| "learning_rate": 3.5866886570461486e-06, |
| "loss": 0.4144, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.896947935368043, |
| "grad_norm": 0.326718270778656, |
| "learning_rate": 3.580674846081421e-06, |
| "loss": 0.3844, |
| "step": 1761 |
| }, |
| { |
| "epoch": 1.8980251346499102, |
| "grad_norm": 0.30662721395492554, |
| "learning_rate": 3.574663267486438e-06, |
| "loss": 0.3882, |
| "step": 1762 |
| }, |
| { |
| "epoch": 1.8991023339317774, |
| "grad_norm": 0.3321872651576996, |
| "learning_rate": 3.5686539307164427e-06, |
| "loss": 0.4391, |
| "step": 1763 |
| }, |
| { |
| "epoch": 1.9001795332136444, |
| "grad_norm": 0.32872331142425537, |
| "learning_rate": 3.5626468452231534e-06, |
| "loss": 0.3723, |
| "step": 1764 |
| }, |
| { |
| "epoch": 1.9012567324955116, |
| "grad_norm": 0.3216870427131653, |
| "learning_rate": 3.556642020454747e-06, |
| "loss": 0.4034, |
| "step": 1765 |
| }, |
| { |
| "epoch": 1.9023339317773789, |
| "grad_norm": 0.3641759753227234, |
| "learning_rate": 3.5506394658558474e-06, |
| "loss": 0.4219, |
| "step": 1766 |
| }, |
| { |
| "epoch": 1.9034111310592459, |
| "grad_norm": 0.3454039990901947, |
| "learning_rate": 3.5446391908675036e-06, |
| "loss": 0.3849, |
| "step": 1767 |
| }, |
| { |
| "epoch": 1.904488330341113, |
| "grad_norm": 0.31155654788017273, |
| "learning_rate": 3.538641204927181e-06, |
| "loss": 0.369, |
| "step": 1768 |
| }, |
| { |
| "epoch": 1.9055655296229803, |
| "grad_norm": 0.3384184241294861, |
| "learning_rate": 3.532645517468748e-06, |
| "loss": 0.4074, |
| "step": 1769 |
| }, |
| { |
| "epoch": 1.9066427289048473, |
| "grad_norm": 0.3226218521595001, |
| "learning_rate": 3.5266521379224506e-06, |
| "loss": 0.3892, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.9077199281867145, |
| "grad_norm": 0.3048873245716095, |
| "learning_rate": 3.5206610757149123e-06, |
| "loss": 0.3762, |
| "step": 1771 |
| }, |
| { |
| "epoch": 1.9087971274685818, |
| "grad_norm": 0.3158170282840729, |
| "learning_rate": 3.5146723402691054e-06, |
| "loss": 0.4129, |
| "step": 1772 |
| }, |
| { |
| "epoch": 1.9098743267504488, |
| "grad_norm": 0.2803991436958313, |
| "learning_rate": 3.508685941004348e-06, |
| "loss": 0.3642, |
| "step": 1773 |
| }, |
| { |
| "epoch": 1.910951526032316, |
| "grad_norm": 0.28167566657066345, |
| "learning_rate": 3.50270188733628e-06, |
| "loss": 0.3804, |
| "step": 1774 |
| }, |
| { |
| "epoch": 1.9120287253141832, |
| "grad_norm": 0.32653680443763733, |
| "learning_rate": 3.496720188676856e-06, |
| "loss": 0.4318, |
| "step": 1775 |
| }, |
| { |
| "epoch": 1.9131059245960502, |
| "grad_norm": 0.3046547472476959, |
| "learning_rate": 3.490740854434321e-06, |
| "loss": 0.3754, |
| "step": 1776 |
| }, |
| { |
| "epoch": 1.9141831238779174, |
| "grad_norm": 0.305685430765152, |
| "learning_rate": 3.4847638940132054e-06, |
| "loss": 0.4053, |
| "step": 1777 |
| }, |
| { |
| "epoch": 1.9152603231597847, |
| "grad_norm": 0.3087405562400818, |
| "learning_rate": 3.478789316814306e-06, |
| "loss": 0.3818, |
| "step": 1778 |
| }, |
| { |
| "epoch": 1.9163375224416517, |
| "grad_norm": 0.31966203451156616, |
| "learning_rate": 3.472817132234669e-06, |
| "loss": 0.4288, |
| "step": 1779 |
| }, |
| { |
| "epoch": 1.917414721723519, |
| "grad_norm": 0.27336421608924866, |
| "learning_rate": 3.466847349667578e-06, |
| "loss": 0.3642, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.9184919210053861, |
| "grad_norm": 0.32802098989486694, |
| "learning_rate": 3.460879978502538e-06, |
| "loss": 0.427, |
| "step": 1781 |
| }, |
| { |
| "epoch": 1.9195691202872531, |
| "grad_norm": 0.314151406288147, |
| "learning_rate": 3.4549150281252635e-06, |
| "loss": 0.3953, |
| "step": 1782 |
| }, |
| { |
| "epoch": 1.9206463195691201, |
| "grad_norm": 0.275924950838089, |
| "learning_rate": 3.4489525079176612e-06, |
| "loss": 0.3801, |
| "step": 1783 |
| }, |
| { |
| "epoch": 1.9217235188509876, |
| "grad_norm": 0.290600061416626, |
| "learning_rate": 3.442992427257812e-06, |
| "loss": 0.4036, |
| "step": 1784 |
| }, |
| { |
| "epoch": 1.9228007181328546, |
| "grad_norm": 0.27883657813072205, |
| "learning_rate": 3.4370347955199634e-06, |
| "loss": 0.3671, |
| "step": 1785 |
| }, |
| { |
| "epoch": 1.9238779174147216, |
| "grad_norm": 0.29276221990585327, |
| "learning_rate": 3.43107962207451e-06, |
| "loss": 0.396, |
| "step": 1786 |
| }, |
| { |
| "epoch": 1.924955116696589, |
| "grad_norm": 0.3173641264438629, |
| "learning_rate": 3.4251269162879826e-06, |
| "loss": 0.4481, |
| "step": 1787 |
| }, |
| { |
| "epoch": 1.926032315978456, |
| "grad_norm": 0.2623595893383026, |
| "learning_rate": 3.419176687523024e-06, |
| "loss": 0.3533, |
| "step": 1788 |
| }, |
| { |
| "epoch": 1.927109515260323, |
| "grad_norm": 0.310444712638855, |
| "learning_rate": 3.4132289451383866e-06, |
| "loss": 0.4047, |
| "step": 1789 |
| }, |
| { |
| "epoch": 1.9281867145421903, |
| "grad_norm": 0.29494354128837585, |
| "learning_rate": 3.4072836984889137e-06, |
| "loss": 0.3804, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.9292639138240575, |
| "grad_norm": 0.3028341233730316, |
| "learning_rate": 3.401340956925515e-06, |
| "loss": 0.414, |
| "step": 1791 |
| }, |
| { |
| "epoch": 1.9303411131059245, |
| "grad_norm": 0.26845231652259827, |
| "learning_rate": 3.3954007297951693e-06, |
| "loss": 0.3837, |
| "step": 1792 |
| }, |
| { |
| "epoch": 1.9314183123877917, |
| "grad_norm": 0.26886746287345886, |
| "learning_rate": 3.3894630264408936e-06, |
| "loss": 0.3843, |
| "step": 1793 |
| }, |
| { |
| "epoch": 1.932495511669659, |
| "grad_norm": 0.3210623860359192, |
| "learning_rate": 3.3835278562017405e-06, |
| "loss": 0.4072, |
| "step": 1794 |
| }, |
| { |
| "epoch": 1.933572710951526, |
| "grad_norm": 0.2861291468143463, |
| "learning_rate": 3.3775952284127766e-06, |
| "loss": 0.3779, |
| "step": 1795 |
| }, |
| { |
| "epoch": 1.9346499102333932, |
| "grad_norm": 0.2961629331111908, |
| "learning_rate": 3.3716651524050677e-06, |
| "loss": 0.439, |
| "step": 1796 |
| }, |
| { |
| "epoch": 1.9357271095152604, |
| "grad_norm": 0.3172108829021454, |
| "learning_rate": 3.3657376375056684e-06, |
| "loss": 0.4195, |
| "step": 1797 |
| }, |
| { |
| "epoch": 1.9368043087971274, |
| "grad_norm": 0.3115575313568115, |
| "learning_rate": 3.3598126930376055e-06, |
| "loss": 0.3932, |
| "step": 1798 |
| }, |
| { |
| "epoch": 1.9378815080789946, |
| "grad_norm": 0.2706233561038971, |
| "learning_rate": 3.353890328319861e-06, |
| "loss": 0.3559, |
| "step": 1799 |
| }, |
| { |
| "epoch": 1.9389587073608618, |
| "grad_norm": 0.3000124394893646, |
| "learning_rate": 3.347970552667361e-06, |
| "loss": 0.4006, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.9400359066427288, |
| "grad_norm": 0.338463693857193, |
| "learning_rate": 3.3420533753909566e-06, |
| "loss": 0.3986, |
| "step": 1801 |
| }, |
| { |
| "epoch": 1.941113105924596, |
| "grad_norm": 0.3336849808692932, |
| "learning_rate": 3.3361388057974148e-06, |
| "loss": 0.4297, |
| "step": 1802 |
| }, |
| { |
| "epoch": 1.9421903052064633, |
| "grad_norm": 0.2952408492565155, |
| "learning_rate": 3.3302268531894023e-06, |
| "loss": 0.3764, |
| "step": 1803 |
| }, |
| { |
| "epoch": 1.9432675044883303, |
| "grad_norm": 0.3435702919960022, |
| "learning_rate": 3.3243175268654656e-06, |
| "loss": 0.4352, |
| "step": 1804 |
| }, |
| { |
| "epoch": 1.9443447037701975, |
| "grad_norm": 0.3342050611972809, |
| "learning_rate": 3.3184108361200235e-06, |
| "loss": 0.3898, |
| "step": 1805 |
| }, |
| { |
| "epoch": 1.9454219030520647, |
| "grad_norm": 0.3066212832927704, |
| "learning_rate": 3.3125067902433482e-06, |
| "loss": 0.3899, |
| "step": 1806 |
| }, |
| { |
| "epoch": 1.9464991023339318, |
| "grad_norm": 0.3134807050228119, |
| "learning_rate": 3.306605398521555e-06, |
| "loss": 0.4091, |
| "step": 1807 |
| }, |
| { |
| "epoch": 1.947576301615799, |
| "grad_norm": 0.3158106803894043, |
| "learning_rate": 3.300706670236579e-06, |
| "loss": 0.3984, |
| "step": 1808 |
| }, |
| { |
| "epoch": 1.9486535008976662, |
| "grad_norm": 0.32776862382888794, |
| "learning_rate": 3.29481061466617e-06, |
| "loss": 0.3836, |
| "step": 1809 |
| }, |
| { |
| "epoch": 1.9497307001795332, |
| "grad_norm": 0.2883926331996918, |
| "learning_rate": 3.2889172410838755e-06, |
| "loss": 0.4111, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.9508078994614002, |
| "grad_norm": 0.32595670223236084, |
| "learning_rate": 3.283026558759019e-06, |
| "loss": 0.3967, |
| "step": 1811 |
| }, |
| { |
| "epoch": 1.9518850987432677, |
| "grad_norm": 0.3464212417602539, |
| "learning_rate": 3.2771385769566976e-06, |
| "loss": 0.4291, |
| "step": 1812 |
| }, |
| { |
| "epoch": 1.9529622980251347, |
| "grad_norm": 0.3280077874660492, |
| "learning_rate": 3.2712533049377543e-06, |
| "loss": 0.369, |
| "step": 1813 |
| }, |
| { |
| "epoch": 1.9540394973070017, |
| "grad_norm": 0.28756144642829895, |
| "learning_rate": 3.2653707519587756e-06, |
| "loss": 0.3762, |
| "step": 1814 |
| }, |
| { |
| "epoch": 1.9551166965888689, |
| "grad_norm": 0.29988667368888855, |
| "learning_rate": 3.259490927272071e-06, |
| "loss": 0.3832, |
| "step": 1815 |
| }, |
| { |
| "epoch": 1.956193895870736, |
| "grad_norm": 0.29910799860954285, |
| "learning_rate": 3.253613840125654e-06, |
| "loss": 0.407, |
| "step": 1816 |
| }, |
| { |
| "epoch": 1.9572710951526031, |
| "grad_norm": 0.3187161982059479, |
| "learning_rate": 3.2477394997632373e-06, |
| "loss": 0.4497, |
| "step": 1817 |
| }, |
| { |
| "epoch": 1.9583482944344703, |
| "grad_norm": 0.2946453094482422, |
| "learning_rate": 3.241867915424211e-06, |
| "loss": 0.3617, |
| "step": 1818 |
| }, |
| { |
| "epoch": 1.9594254937163376, |
| "grad_norm": 0.3105182349681854, |
| "learning_rate": 3.235999096343633e-06, |
| "loss": 0.3864, |
| "step": 1819 |
| }, |
| { |
| "epoch": 1.9605026929982046, |
| "grad_norm": 0.3276148736476898, |
| "learning_rate": 3.230133051752207e-06, |
| "loss": 0.4242, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.9615798922800718, |
| "grad_norm": 0.30322229862213135, |
| "learning_rate": 3.22426979087628e-06, |
| "loss": 0.3693, |
| "step": 1821 |
| }, |
| { |
| "epoch": 1.962657091561939, |
| "grad_norm": 0.3252699673175812, |
| "learning_rate": 3.2184093229378117e-06, |
| "loss": 0.4265, |
| "step": 1822 |
| }, |
| { |
| "epoch": 1.963734290843806, |
| "grad_norm": 0.3165656626224518, |
| "learning_rate": 3.212551657154376e-06, |
| "loss": 0.427, |
| "step": 1823 |
| }, |
| { |
| "epoch": 1.9648114901256732, |
| "grad_norm": 0.29955458641052246, |
| "learning_rate": 3.2066968027391377e-06, |
| "loss": 0.3604, |
| "step": 1824 |
| }, |
| { |
| "epoch": 1.9658886894075405, |
| "grad_norm": 0.3262845277786255, |
| "learning_rate": 3.200844768900837e-06, |
| "loss": 0.4178, |
| "step": 1825 |
| }, |
| { |
| "epoch": 1.9669658886894075, |
| "grad_norm": 0.3196795582771301, |
| "learning_rate": 3.1949955648437824e-06, |
| "loss": 0.3918, |
| "step": 1826 |
| }, |
| { |
| "epoch": 1.9680430879712747, |
| "grad_norm": 0.34281331300735474, |
| "learning_rate": 3.1891491997678302e-06, |
| "loss": 0.3778, |
| "step": 1827 |
| }, |
| { |
| "epoch": 1.969120287253142, |
| "grad_norm": 0.3226211369037628, |
| "learning_rate": 3.1833056828683673e-06, |
| "loss": 0.3971, |
| "step": 1828 |
| }, |
| { |
| "epoch": 1.970197486535009, |
| "grad_norm": 0.29890337586402893, |
| "learning_rate": 3.177465023336306e-06, |
| "loss": 0.3857, |
| "step": 1829 |
| }, |
| { |
| "epoch": 1.9712746858168761, |
| "grad_norm": 0.3117425739765167, |
| "learning_rate": 3.171627230358063e-06, |
| "loss": 0.4003, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.9723518850987434, |
| "grad_norm": 0.31731322407722473, |
| "learning_rate": 3.1657923131155444e-06, |
| "loss": 0.3518, |
| "step": 1831 |
| }, |
| { |
| "epoch": 1.9734290843806104, |
| "grad_norm": 0.3363853693008423, |
| "learning_rate": 3.1599602807861374e-06, |
| "loss": 0.395, |
| "step": 1832 |
| }, |
| { |
| "epoch": 1.9745062836624776, |
| "grad_norm": 0.3119727075099945, |
| "learning_rate": 3.1541311425426856e-06, |
| "loss": 0.4121, |
| "step": 1833 |
| }, |
| { |
| "epoch": 1.9755834829443448, |
| "grad_norm": 0.3245994746685028, |
| "learning_rate": 3.1483049075534853e-06, |
| "loss": 0.4198, |
| "step": 1834 |
| }, |
| { |
| "epoch": 1.9766606822262118, |
| "grad_norm": 0.30005812644958496, |
| "learning_rate": 3.1424815849822665e-06, |
| "loss": 0.3959, |
| "step": 1835 |
| }, |
| { |
| "epoch": 1.9777378815080788, |
| "grad_norm": 0.3131362199783325, |
| "learning_rate": 3.136661183988175e-06, |
| "loss": 0.409, |
| "step": 1836 |
| }, |
| { |
| "epoch": 1.9788150807899463, |
| "grad_norm": 0.30022886395454407, |
| "learning_rate": 3.130843713725765e-06, |
| "loss": 0.3991, |
| "step": 1837 |
| }, |
| { |
| "epoch": 1.9798922800718133, |
| "grad_norm": 0.3111904263496399, |
| "learning_rate": 3.12502918334498e-06, |
| "loss": 0.3878, |
| "step": 1838 |
| }, |
| { |
| "epoch": 1.9809694793536803, |
| "grad_norm": 0.326963871717453, |
| "learning_rate": 3.119217601991139e-06, |
| "loss": 0.4197, |
| "step": 1839 |
| }, |
| { |
| "epoch": 1.9820466786355477, |
| "grad_norm": 0.310465544462204, |
| "learning_rate": 3.1134089788049226e-06, |
| "loss": 0.4112, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.9831238779174147, |
| "grad_norm": 0.2793821692466736, |
| "learning_rate": 3.107603322922359e-06, |
| "loss": 0.3555, |
| "step": 1841 |
| }, |
| { |
| "epoch": 1.9842010771992817, |
| "grad_norm": 0.32500728964805603, |
| "learning_rate": 3.1018006434748115e-06, |
| "loss": 0.4168, |
| "step": 1842 |
| }, |
| { |
| "epoch": 1.985278276481149, |
| "grad_norm": 0.31032678484916687, |
| "learning_rate": 3.0960009495889564e-06, |
| "loss": 0.3892, |
| "step": 1843 |
| }, |
| { |
| "epoch": 1.9863554757630162, |
| "grad_norm": 0.3267922103404999, |
| "learning_rate": 3.090204250386779e-06, |
| "loss": 0.4229, |
| "step": 1844 |
| }, |
| { |
| "epoch": 1.9874326750448832, |
| "grad_norm": 0.3038681447505951, |
| "learning_rate": 3.084410554985553e-06, |
| "loss": 0.3881, |
| "step": 1845 |
| }, |
| { |
| "epoch": 1.9885098743267504, |
| "grad_norm": 0.3073458969593048, |
| "learning_rate": 3.078619872497827e-06, |
| "loss": 0.3925, |
| "step": 1846 |
| }, |
| { |
| "epoch": 1.9895870736086176, |
| "grad_norm": 0.2960895597934723, |
| "learning_rate": 3.072832212031413e-06, |
| "loss": 0.3823, |
| "step": 1847 |
| }, |
| { |
| "epoch": 1.9906642728904846, |
| "grad_norm": 0.32073989510536194, |
| "learning_rate": 3.0670475826893663e-06, |
| "loss": 0.414, |
| "step": 1848 |
| }, |
| { |
| "epoch": 1.9917414721723519, |
| "grad_norm": 0.2960592806339264, |
| "learning_rate": 3.0612659935699774e-06, |
| "loss": 0.3875, |
| "step": 1849 |
| }, |
| { |
| "epoch": 1.992818671454219, |
| "grad_norm": 0.2826525866985321, |
| "learning_rate": 3.055487453766755e-06, |
| "loss": 0.3314, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.993895870736086, |
| "grad_norm": 0.3213357627391815, |
| "learning_rate": 3.049711972368411e-06, |
| "loss": 0.4022, |
| "step": 1851 |
| }, |
| { |
| "epoch": 1.9949730700179533, |
| "grad_norm": 0.3375919759273529, |
| "learning_rate": 3.043939558458846e-06, |
| "loss": 0.4042, |
| "step": 1852 |
| }, |
| { |
| "epoch": 1.9960502692998205, |
| "grad_norm": 0.3088492453098297, |
| "learning_rate": 3.038170221117138e-06, |
| "loss": 0.4278, |
| "step": 1853 |
| }, |
| { |
| "epoch": 1.9971274685816875, |
| "grad_norm": 0.31650954484939575, |
| "learning_rate": 3.032403969417523e-06, |
| "loss": 0.3807, |
| "step": 1854 |
| }, |
| { |
| "epoch": 1.9982046678635548, |
| "grad_norm": 0.3157062232494354, |
| "learning_rate": 3.026640812429388e-06, |
| "loss": 0.4152, |
| "step": 1855 |
| }, |
| { |
| "epoch": 1.999281867145422, |
| "grad_norm": 0.292804479598999, |
| "learning_rate": 3.0208807592172486e-06, |
| "loss": 0.4024, |
| "step": 1856 |
| }, |
| { |
| "epoch": 2.000359066427289, |
| "grad_norm": 0.5354664325714111, |
| "learning_rate": 3.01512381884074e-06, |
| "loss": 0.6262, |
| "step": 1857 |
| }, |
| { |
| "epoch": 2.001436265709156, |
| "grad_norm": 0.32860124111175537, |
| "learning_rate": 3.0093700003546023e-06, |
| "loss": 0.3948, |
| "step": 1858 |
| }, |
| { |
| "epoch": 2.0025134649910235, |
| "grad_norm": 0.29116785526275635, |
| "learning_rate": 3.0036193128086667e-06, |
| "loss": 0.3398, |
| "step": 1859 |
| }, |
| { |
| "epoch": 2.0035906642728905, |
| "grad_norm": 0.3160048723220825, |
| "learning_rate": 2.9978717652478343e-06, |
| "loss": 0.3915, |
| "step": 1860 |
| }, |
| { |
| "epoch": 2.0046678635547575, |
| "grad_norm": 0.27923333644866943, |
| "learning_rate": 2.9921273667120736e-06, |
| "loss": 0.359, |
| "step": 1861 |
| }, |
| { |
| "epoch": 2.005745062836625, |
| "grad_norm": 0.3296511769294739, |
| "learning_rate": 2.986386126236398e-06, |
| "loss": 0.3732, |
| "step": 1862 |
| }, |
| { |
| "epoch": 2.006822262118492, |
| "grad_norm": 0.3194176256656647, |
| "learning_rate": 2.980648052850852e-06, |
| "loss": 0.3832, |
| "step": 1863 |
| }, |
| { |
| "epoch": 2.007899461400359, |
| "grad_norm": 0.33017516136169434, |
| "learning_rate": 2.9749131555805035e-06, |
| "loss": 0.3889, |
| "step": 1864 |
| }, |
| { |
| "epoch": 2.0089766606822264, |
| "grad_norm": 0.2907126843929291, |
| "learning_rate": 2.969181443445417e-06, |
| "loss": 0.3683, |
| "step": 1865 |
| }, |
| { |
| "epoch": 2.0100538599640934, |
| "grad_norm": 0.2984246015548706, |
| "learning_rate": 2.963452925460654e-06, |
| "loss": 0.3749, |
| "step": 1866 |
| }, |
| { |
| "epoch": 2.0111310592459604, |
| "grad_norm": 0.30581575632095337, |
| "learning_rate": 2.9577276106362523e-06, |
| "loss": 0.3946, |
| "step": 1867 |
| }, |
| { |
| "epoch": 2.012208258527828, |
| "grad_norm": 0.2967156171798706, |
| "learning_rate": 2.952005507977207e-06, |
| "loss": 0.3681, |
| "step": 1868 |
| }, |
| { |
| "epoch": 2.013285457809695, |
| "grad_norm": 0.3053143322467804, |
| "learning_rate": 2.946286626483463e-06, |
| "loss": 0.3785, |
| "step": 1869 |
| }, |
| { |
| "epoch": 2.014362657091562, |
| "grad_norm": 0.2774108350276947, |
| "learning_rate": 2.9405709751499017e-06, |
| "loss": 0.3286, |
| "step": 1870 |
| }, |
| { |
| "epoch": 2.0154398563734293, |
| "grad_norm": 0.3262989819049835, |
| "learning_rate": 2.9348585629663213e-06, |
| "loss": 0.3738, |
| "step": 1871 |
| }, |
| { |
| "epoch": 2.0165170556552963, |
| "grad_norm": 0.3032521903514862, |
| "learning_rate": 2.9291493989174234e-06, |
| "loss": 0.384, |
| "step": 1872 |
| }, |
| { |
| "epoch": 2.0175942549371633, |
| "grad_norm": 0.30339065194129944, |
| "learning_rate": 2.923443491982804e-06, |
| "loss": 0.3763, |
| "step": 1873 |
| }, |
| { |
| "epoch": 2.0186714542190307, |
| "grad_norm": 0.30798229575157166, |
| "learning_rate": 2.9177408511369395e-06, |
| "loss": 0.3555, |
| "step": 1874 |
| }, |
| { |
| "epoch": 2.0197486535008977, |
| "grad_norm": 0.29715853929519653, |
| "learning_rate": 2.9120414853491574e-06, |
| "loss": 0.3691, |
| "step": 1875 |
| }, |
| { |
| "epoch": 2.0208258527827647, |
| "grad_norm": 0.30873653292655945, |
| "learning_rate": 2.9063454035836447e-06, |
| "loss": 0.3791, |
| "step": 1876 |
| }, |
| { |
| "epoch": 2.021903052064632, |
| "grad_norm": 0.3008333742618561, |
| "learning_rate": 2.900652614799422e-06, |
| "loss": 0.3797, |
| "step": 1877 |
| }, |
| { |
| "epoch": 2.022980251346499, |
| "grad_norm": 0.3410451114177704, |
| "learning_rate": 2.8949631279503265e-06, |
| "loss": 0.3612, |
| "step": 1878 |
| }, |
| { |
| "epoch": 2.024057450628366, |
| "grad_norm": 0.3563460409641266, |
| "learning_rate": 2.889276951985005e-06, |
| "loss": 0.3818, |
| "step": 1879 |
| }, |
| { |
| "epoch": 2.025134649910233, |
| "grad_norm": 0.29962658882141113, |
| "learning_rate": 2.8835940958468954e-06, |
| "loss": 0.3624, |
| "step": 1880 |
| }, |
| { |
| "epoch": 2.0262118491921006, |
| "grad_norm": 0.3057413399219513, |
| "learning_rate": 2.877914568474218e-06, |
| "loss": 0.3688, |
| "step": 1881 |
| }, |
| { |
| "epoch": 2.0272890484739676, |
| "grad_norm": 0.32542818784713745, |
| "learning_rate": 2.872238378799949e-06, |
| "loss": 0.395, |
| "step": 1882 |
| }, |
| { |
| "epoch": 2.0283662477558346, |
| "grad_norm": 0.2896184027194977, |
| "learning_rate": 2.866565535751822e-06, |
| "loss": 0.364, |
| "step": 1883 |
| }, |
| { |
| "epoch": 2.029443447037702, |
| "grad_norm": 0.34391382336616516, |
| "learning_rate": 2.8608960482523058e-06, |
| "loss": 0.3982, |
| "step": 1884 |
| }, |
| { |
| "epoch": 2.030520646319569, |
| "grad_norm": 0.31426018476486206, |
| "learning_rate": 2.8552299252185915e-06, |
| "loss": 0.3758, |
| "step": 1885 |
| }, |
| { |
| "epoch": 2.031597845601436, |
| "grad_norm": 0.29396316409111023, |
| "learning_rate": 2.849567175562574e-06, |
| "loss": 0.3304, |
| "step": 1886 |
| }, |
| { |
| "epoch": 2.0326750448833035, |
| "grad_norm": 0.3297179043292999, |
| "learning_rate": 2.8439078081908487e-06, |
| "loss": 0.424, |
| "step": 1887 |
| }, |
| { |
| "epoch": 2.0337522441651705, |
| "grad_norm": 0.2860085368156433, |
| "learning_rate": 2.8382518320046877e-06, |
| "loss": 0.3485, |
| "step": 1888 |
| }, |
| { |
| "epoch": 2.0348294434470375, |
| "grad_norm": 0.31872880458831787, |
| "learning_rate": 2.8325992559000315e-06, |
| "loss": 0.3862, |
| "step": 1889 |
| }, |
| { |
| "epoch": 2.035906642728905, |
| "grad_norm": 0.30101990699768066, |
| "learning_rate": 2.826950088767469e-06, |
| "loss": 0.373, |
| "step": 1890 |
| }, |
| { |
| "epoch": 2.036983842010772, |
| "grad_norm": 0.3038983643054962, |
| "learning_rate": 2.82130433949223e-06, |
| "loss": 0.3708, |
| "step": 1891 |
| }, |
| { |
| "epoch": 2.038061041292639, |
| "grad_norm": 0.27234700322151184, |
| "learning_rate": 2.8156620169541698e-06, |
| "loss": 0.3362, |
| "step": 1892 |
| }, |
| { |
| "epoch": 2.0391382405745064, |
| "grad_norm": 0.3316778242588043, |
| "learning_rate": 2.8100231300277514e-06, |
| "loss": 0.3572, |
| "step": 1893 |
| }, |
| { |
| "epoch": 2.0402154398563734, |
| "grad_norm": 0.3273511528968811, |
| "learning_rate": 2.8043876875820363e-06, |
| "loss": 0.3761, |
| "step": 1894 |
| }, |
| { |
| "epoch": 2.0412926391382404, |
| "grad_norm": 0.3026241660118103, |
| "learning_rate": 2.798755698480668e-06, |
| "loss": 0.3997, |
| "step": 1895 |
| }, |
| { |
| "epoch": 2.042369838420108, |
| "grad_norm": 0.2999473512172699, |
| "learning_rate": 2.793127171581854e-06, |
| "loss": 0.3829, |
| "step": 1896 |
| }, |
| { |
| "epoch": 2.043447037701975, |
| "grad_norm": 0.31387123465538025, |
| "learning_rate": 2.7875021157383634e-06, |
| "loss": 0.3659, |
| "step": 1897 |
| }, |
| { |
| "epoch": 2.044524236983842, |
| "grad_norm": 0.3155645430088043, |
| "learning_rate": 2.7818805397975034e-06, |
| "loss": 0.3849, |
| "step": 1898 |
| }, |
| { |
| "epoch": 2.0456014362657093, |
| "grad_norm": 0.31519466638565063, |
| "learning_rate": 2.776262452601104e-06, |
| "loss": 0.3828, |
| "step": 1899 |
| }, |
| { |
| "epoch": 2.0466786355475763, |
| "grad_norm": 0.3476317226886749, |
| "learning_rate": 2.770647862985512e-06, |
| "loss": 0.4074, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.0477558348294433, |
| "grad_norm": 0.2855173349380493, |
| "learning_rate": 2.765036779781571e-06, |
| "loss": 0.3435, |
| "step": 1901 |
| }, |
| { |
| "epoch": 2.048833034111311, |
| "grad_norm": 0.3068677484989166, |
| "learning_rate": 2.7594292118146136e-06, |
| "loss": 0.4177, |
| "step": 1902 |
| }, |
| { |
| "epoch": 2.049910233393178, |
| "grad_norm": 0.30308929085731506, |
| "learning_rate": 2.753825167904438e-06, |
| "loss": 0.3721, |
| "step": 1903 |
| }, |
| { |
| "epoch": 2.050987432675045, |
| "grad_norm": 0.3041457235813141, |
| "learning_rate": 2.748224656865304e-06, |
| "loss": 0.3664, |
| "step": 1904 |
| }, |
| { |
| "epoch": 2.0520646319569122, |
| "grad_norm": 0.29610303044319153, |
| "learning_rate": 2.7426276875059145e-06, |
| "loss": 0.3573, |
| "step": 1905 |
| }, |
| { |
| "epoch": 2.0531418312387792, |
| "grad_norm": 0.2744855284690857, |
| "learning_rate": 2.737034268629397e-06, |
| "loss": 0.372, |
| "step": 1906 |
| }, |
| { |
| "epoch": 2.0542190305206462, |
| "grad_norm": 0.2901430130004883, |
| "learning_rate": 2.731444409033297e-06, |
| "loss": 0.3437, |
| "step": 1907 |
| }, |
| { |
| "epoch": 2.0552962298025133, |
| "grad_norm": 0.30917778611183167, |
| "learning_rate": 2.7258581175095657e-06, |
| "loss": 0.3958, |
| "step": 1908 |
| }, |
| { |
| "epoch": 2.0563734290843807, |
| "grad_norm": 0.30461421608924866, |
| "learning_rate": 2.7202754028445375e-06, |
| "loss": 0.3906, |
| "step": 1909 |
| }, |
| { |
| "epoch": 2.0574506283662477, |
| "grad_norm": 0.2898961007595062, |
| "learning_rate": 2.7146962738189254e-06, |
| "loss": 0.3711, |
| "step": 1910 |
| }, |
| { |
| "epoch": 2.0585278276481147, |
| "grad_norm": 0.3014693856239319, |
| "learning_rate": 2.709120739207798e-06, |
| "loss": 0.3599, |
| "step": 1911 |
| }, |
| { |
| "epoch": 2.059605026929982, |
| "grad_norm": 0.34578463435173035, |
| "learning_rate": 2.7035488077805736e-06, |
| "loss": 0.4243, |
| "step": 1912 |
| }, |
| { |
| "epoch": 2.060682226211849, |
| "grad_norm": 0.2798299789428711, |
| "learning_rate": 2.6979804883010052e-06, |
| "loss": 0.3368, |
| "step": 1913 |
| }, |
| { |
| "epoch": 2.061759425493716, |
| "grad_norm": 0.31570667028427124, |
| "learning_rate": 2.6924157895271563e-06, |
| "loss": 0.4181, |
| "step": 1914 |
| }, |
| { |
| "epoch": 2.0628366247755836, |
| "grad_norm": 0.27935585379600525, |
| "learning_rate": 2.6868547202114047e-06, |
| "loss": 0.358, |
| "step": 1915 |
| }, |
| { |
| "epoch": 2.0639138240574506, |
| "grad_norm": 0.29281607270240784, |
| "learning_rate": 2.681297289100418e-06, |
| "loss": 0.393, |
| "step": 1916 |
| }, |
| { |
| "epoch": 2.0649910233393176, |
| "grad_norm": 0.2860967516899109, |
| "learning_rate": 2.6757435049351353e-06, |
| "loss": 0.3768, |
| "step": 1917 |
| }, |
| { |
| "epoch": 2.066068222621185, |
| "grad_norm": 0.3133350610733032, |
| "learning_rate": 2.670193376450767e-06, |
| "loss": 0.3925, |
| "step": 1918 |
| }, |
| { |
| "epoch": 2.067145421903052, |
| "grad_norm": 0.33049872517585754, |
| "learning_rate": 2.6646469123767694e-06, |
| "loss": 0.3693, |
| "step": 1919 |
| }, |
| { |
| "epoch": 2.068222621184919, |
| "grad_norm": 0.3234219253063202, |
| "learning_rate": 2.6591041214368383e-06, |
| "loss": 0.3943, |
| "step": 1920 |
| }, |
| { |
| "epoch": 2.0692998204667865, |
| "grad_norm": 0.2815258204936981, |
| "learning_rate": 2.6535650123488916e-06, |
| "loss": 0.3661, |
| "step": 1921 |
| }, |
| { |
| "epoch": 2.0703770197486535, |
| "grad_norm": 0.33411794900894165, |
| "learning_rate": 2.648029593825051e-06, |
| "loss": 0.3803, |
| "step": 1922 |
| }, |
| { |
| "epoch": 2.0714542190305205, |
| "grad_norm": 0.3197128474712372, |
| "learning_rate": 2.642497874571641e-06, |
| "loss": 0.3744, |
| "step": 1923 |
| }, |
| { |
| "epoch": 2.072531418312388, |
| "grad_norm": 0.29649585485458374, |
| "learning_rate": 2.636969863289164e-06, |
| "loss": 0.3939, |
| "step": 1924 |
| }, |
| { |
| "epoch": 2.073608617594255, |
| "grad_norm": 0.28606873750686646, |
| "learning_rate": 2.63144556867229e-06, |
| "loss": 0.3692, |
| "step": 1925 |
| }, |
| { |
| "epoch": 2.074685816876122, |
| "grad_norm": 0.2942201793193817, |
| "learning_rate": 2.6259249994098457e-06, |
| "loss": 0.3543, |
| "step": 1926 |
| }, |
| { |
| "epoch": 2.0757630161579894, |
| "grad_norm": 0.30836036801338196, |
| "learning_rate": 2.620408164184798e-06, |
| "loss": 0.3723, |
| "step": 1927 |
| }, |
| { |
| "epoch": 2.0768402154398564, |
| "grad_norm": 0.2905234396457672, |
| "learning_rate": 2.614895071674235e-06, |
| "loss": 0.3891, |
| "step": 1928 |
| }, |
| { |
| "epoch": 2.0779174147217234, |
| "grad_norm": 0.2854272425174713, |
| "learning_rate": 2.6093857305493666e-06, |
| "loss": 0.3601, |
| "step": 1929 |
| }, |
| { |
| "epoch": 2.078994614003591, |
| "grad_norm": 0.29839953780174255, |
| "learning_rate": 2.6038801494754994e-06, |
| "loss": 0.3751, |
| "step": 1930 |
| }, |
| { |
| "epoch": 2.080071813285458, |
| "grad_norm": 0.2869094908237457, |
| "learning_rate": 2.5983783371120214e-06, |
| "loss": 0.3696, |
| "step": 1931 |
| }, |
| { |
| "epoch": 2.081149012567325, |
| "grad_norm": 0.30146855115890503, |
| "learning_rate": 2.592880302112399e-06, |
| "loss": 0.404, |
| "step": 1932 |
| }, |
| { |
| "epoch": 2.082226211849192, |
| "grad_norm": 0.2710917890071869, |
| "learning_rate": 2.5873860531241544e-06, |
| "loss": 0.3731, |
| "step": 1933 |
| }, |
| { |
| "epoch": 2.0833034111310593, |
| "grad_norm": 0.29889118671417236, |
| "learning_rate": 2.581895598788857e-06, |
| "loss": 0.4159, |
| "step": 1934 |
| }, |
| { |
| "epoch": 2.0843806104129263, |
| "grad_norm": 0.2817692756652832, |
| "learning_rate": 2.5764089477421067e-06, |
| "loss": 0.3721, |
| "step": 1935 |
| }, |
| { |
| "epoch": 2.0854578096947933, |
| "grad_norm": 0.27690258622169495, |
| "learning_rate": 2.570926108613521e-06, |
| "loss": 0.3571, |
| "step": 1936 |
| }, |
| { |
| "epoch": 2.0865350089766608, |
| "grad_norm": 0.28059402108192444, |
| "learning_rate": 2.565447090026724e-06, |
| "loss": 0.3534, |
| "step": 1937 |
| }, |
| { |
| "epoch": 2.087612208258528, |
| "grad_norm": 0.3004077672958374, |
| "learning_rate": 2.5599719005993264e-06, |
| "loss": 0.4138, |
| "step": 1938 |
| }, |
| { |
| "epoch": 2.088689407540395, |
| "grad_norm": 0.31813308596611023, |
| "learning_rate": 2.5545005489429185e-06, |
| "loss": 0.3837, |
| "step": 1939 |
| }, |
| { |
| "epoch": 2.0897666068222622, |
| "grad_norm": 0.27622514963150024, |
| "learning_rate": 2.5490330436630563e-06, |
| "loss": 0.3602, |
| "step": 1940 |
| }, |
| { |
| "epoch": 2.0908438061041292, |
| "grad_norm": 0.2941945195198059, |
| "learning_rate": 2.543569393359243e-06, |
| "loss": 0.3663, |
| "step": 1941 |
| }, |
| { |
| "epoch": 2.0919210053859962, |
| "grad_norm": 0.31598734855651855, |
| "learning_rate": 2.538109606624922e-06, |
| "loss": 0.3766, |
| "step": 1942 |
| }, |
| { |
| "epoch": 2.0929982046678637, |
| "grad_norm": 0.29513019323349, |
| "learning_rate": 2.5326536920474576e-06, |
| "loss": 0.4152, |
| "step": 1943 |
| }, |
| { |
| "epoch": 2.0940754039497307, |
| "grad_norm": 0.27299997210502625, |
| "learning_rate": 2.5272016582081236e-06, |
| "loss": 0.3941, |
| "step": 1944 |
| }, |
| { |
| "epoch": 2.0951526032315977, |
| "grad_norm": 0.27696993947029114, |
| "learning_rate": 2.5217535136820927e-06, |
| "loss": 0.3733, |
| "step": 1945 |
| }, |
| { |
| "epoch": 2.096229802513465, |
| "grad_norm": 0.29740098118782043, |
| "learning_rate": 2.5163092670384144e-06, |
| "loss": 0.3629, |
| "step": 1946 |
| }, |
| { |
| "epoch": 2.097307001795332, |
| "grad_norm": 0.2868814468383789, |
| "learning_rate": 2.5108689268400133e-06, |
| "loss": 0.3731, |
| "step": 1947 |
| }, |
| { |
| "epoch": 2.098384201077199, |
| "grad_norm": 0.2974447011947632, |
| "learning_rate": 2.50543250164367e-06, |
| "loss": 0.3763, |
| "step": 1948 |
| }, |
| { |
| "epoch": 2.0994614003590666, |
| "grad_norm": 0.30036285519599915, |
| "learning_rate": 2.5000000000000015e-06, |
| "loss": 0.3798, |
| "step": 1949 |
| }, |
| { |
| "epoch": 2.1005385996409336, |
| "grad_norm": 0.2924955189228058, |
| "learning_rate": 2.4945714304534584e-06, |
| "loss": 0.3671, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.1016157989228006, |
| "grad_norm": 0.2933090925216675, |
| "learning_rate": 2.489146801542307e-06, |
| "loss": 0.3877, |
| "step": 1951 |
| }, |
| { |
| "epoch": 2.102692998204668, |
| "grad_norm": 0.2935760021209717, |
| "learning_rate": 2.4837261217986134e-06, |
| "loss": 0.3562, |
| "step": 1952 |
| }, |
| { |
| "epoch": 2.103770197486535, |
| "grad_norm": 0.2789822518825531, |
| "learning_rate": 2.4783093997482367e-06, |
| "loss": 0.3356, |
| "step": 1953 |
| }, |
| { |
| "epoch": 2.104847396768402, |
| "grad_norm": 0.33238011598587036, |
| "learning_rate": 2.472896643910802e-06, |
| "loss": 0.4118, |
| "step": 1954 |
| }, |
| { |
| "epoch": 2.1059245960502695, |
| "grad_norm": 0.2929084002971649, |
| "learning_rate": 2.4674878627997053e-06, |
| "loss": 0.3679, |
| "step": 1955 |
| }, |
| { |
| "epoch": 2.1070017953321365, |
| "grad_norm": 0.27412042021751404, |
| "learning_rate": 2.4620830649220874e-06, |
| "loss": 0.3467, |
| "step": 1956 |
| }, |
| { |
| "epoch": 2.1080789946140035, |
| "grad_norm": 0.43696802854537964, |
| "learning_rate": 2.4566822587788234e-06, |
| "loss": 0.3829, |
| "step": 1957 |
| }, |
| { |
| "epoch": 2.109156193895871, |
| "grad_norm": 0.2916047275066376, |
| "learning_rate": 2.4512854528645143e-06, |
| "loss": 0.3748, |
| "step": 1958 |
| }, |
| { |
| "epoch": 2.110233393177738, |
| "grad_norm": 0.2847961187362671, |
| "learning_rate": 2.445892655667462e-06, |
| "loss": 0.3807, |
| "step": 1959 |
| }, |
| { |
| "epoch": 2.111310592459605, |
| "grad_norm": 0.3016386330127716, |
| "learning_rate": 2.440503875669668e-06, |
| "loss": 0.3699, |
| "step": 1960 |
| }, |
| { |
| "epoch": 2.1123877917414724, |
| "grad_norm": 0.2906850576400757, |
| "learning_rate": 2.435119121346817e-06, |
| "loss": 0.3623, |
| "step": 1961 |
| }, |
| { |
| "epoch": 2.1134649910233394, |
| "grad_norm": 0.2936685085296631, |
| "learning_rate": 2.4297384011682594e-06, |
| "loss": 0.3895, |
| "step": 1962 |
| }, |
| { |
| "epoch": 2.1145421903052064, |
| "grad_norm": 0.28310006856918335, |
| "learning_rate": 2.4243617235969996e-06, |
| "loss": 0.3601, |
| "step": 1963 |
| }, |
| { |
| "epoch": 2.1156193895870734, |
| "grad_norm": 0.30220288038253784, |
| "learning_rate": 2.418989097089685e-06, |
| "loss": 0.3806, |
| "step": 1964 |
| }, |
| { |
| "epoch": 2.116696588868941, |
| "grad_norm": 0.29966917634010315, |
| "learning_rate": 2.413620530096592e-06, |
| "loss": 0.3688, |
| "step": 1965 |
| }, |
| { |
| "epoch": 2.117773788150808, |
| "grad_norm": 0.28805071115493774, |
| "learning_rate": 2.408256031061611e-06, |
| "loss": 0.3635, |
| "step": 1966 |
| }, |
| { |
| "epoch": 2.118850987432675, |
| "grad_norm": 0.3081320524215698, |
| "learning_rate": 2.402895608422235e-06, |
| "loss": 0.4241, |
| "step": 1967 |
| }, |
| { |
| "epoch": 2.1199281867145423, |
| "grad_norm": 0.26345717906951904, |
| "learning_rate": 2.3975392706095447e-06, |
| "loss": 0.3134, |
| "step": 1968 |
| }, |
| { |
| "epoch": 2.1210053859964093, |
| "grad_norm": 0.35153645277023315, |
| "learning_rate": 2.392187026048198e-06, |
| "loss": 0.4158, |
| "step": 1969 |
| }, |
| { |
| "epoch": 2.1220825852782763, |
| "grad_norm": 0.3314216434955597, |
| "learning_rate": 2.386838883156412e-06, |
| "loss": 0.4069, |
| "step": 1970 |
| }, |
| { |
| "epoch": 2.1231597845601438, |
| "grad_norm": 0.2866481840610504, |
| "learning_rate": 2.3814948503459504e-06, |
| "loss": 0.3425, |
| "step": 1971 |
| }, |
| { |
| "epoch": 2.1242369838420108, |
| "grad_norm": 0.31775057315826416, |
| "learning_rate": 2.376154936022119e-06, |
| "loss": 0.3743, |
| "step": 1972 |
| }, |
| { |
| "epoch": 2.1253141831238778, |
| "grad_norm": 0.30802255868911743, |
| "learning_rate": 2.370819148583741e-06, |
| "loss": 0.4065, |
| "step": 1973 |
| }, |
| { |
| "epoch": 2.126391382405745, |
| "grad_norm": 0.2827637493610382, |
| "learning_rate": 2.365487496423152e-06, |
| "loss": 0.3773, |
| "step": 1974 |
| }, |
| { |
| "epoch": 2.127468581687612, |
| "grad_norm": 0.2885690927505493, |
| "learning_rate": 2.3601599879261794e-06, |
| "loss": 0.3828, |
| "step": 1975 |
| }, |
| { |
| "epoch": 2.128545780969479, |
| "grad_norm": 0.3132476806640625, |
| "learning_rate": 2.3548366314721373e-06, |
| "loss": 0.3976, |
| "step": 1976 |
| }, |
| { |
| "epoch": 2.1296229802513467, |
| "grad_norm": 0.2819250226020813, |
| "learning_rate": 2.3495174354338083e-06, |
| "loss": 0.3635, |
| "step": 1977 |
| }, |
| { |
| "epoch": 2.1307001795332137, |
| "grad_norm": 0.2861512303352356, |
| "learning_rate": 2.344202408177428e-06, |
| "loss": 0.3617, |
| "step": 1978 |
| }, |
| { |
| "epoch": 2.1317773788150807, |
| "grad_norm": 0.3052145540714264, |
| "learning_rate": 2.3388915580626807e-06, |
| "loss": 0.3758, |
| "step": 1979 |
| }, |
| { |
| "epoch": 2.132854578096948, |
| "grad_norm": 0.27510857582092285, |
| "learning_rate": 2.333584893442675e-06, |
| "loss": 0.3805, |
| "step": 1980 |
| }, |
| { |
| "epoch": 2.133931777378815, |
| "grad_norm": 0.30943563580513, |
| "learning_rate": 2.3282824226639393e-06, |
| "loss": 0.3907, |
| "step": 1981 |
| }, |
| { |
| "epoch": 2.135008976660682, |
| "grad_norm": 0.29370906949043274, |
| "learning_rate": 2.3229841540664065e-06, |
| "loss": 0.3815, |
| "step": 1982 |
| }, |
| { |
| "epoch": 2.1360861759425496, |
| "grad_norm": 0.27837884426116943, |
| "learning_rate": 2.3176900959834e-06, |
| "loss": 0.3548, |
| "step": 1983 |
| }, |
| { |
| "epoch": 2.1371633752244166, |
| "grad_norm": 0.26997843384742737, |
| "learning_rate": 2.31240025674162e-06, |
| "loss": 0.364, |
| "step": 1984 |
| }, |
| { |
| "epoch": 2.1382405745062836, |
| "grad_norm": 0.3154236674308777, |
| "learning_rate": 2.3071146446611313e-06, |
| "loss": 0.3943, |
| "step": 1985 |
| }, |
| { |
| "epoch": 2.139317773788151, |
| "grad_norm": 0.317594975233078, |
| "learning_rate": 2.3018332680553478e-06, |
| "loss": 0.4027, |
| "step": 1986 |
| }, |
| { |
| "epoch": 2.140394973070018, |
| "grad_norm": 0.29514530301094055, |
| "learning_rate": 2.2965561352310257e-06, |
| "loss": 0.3693, |
| "step": 1987 |
| }, |
| { |
| "epoch": 2.141472172351885, |
| "grad_norm": 0.2793351411819458, |
| "learning_rate": 2.2912832544882434e-06, |
| "loss": 0.3678, |
| "step": 1988 |
| }, |
| { |
| "epoch": 2.142549371633752, |
| "grad_norm": 0.31178465485572815, |
| "learning_rate": 2.2860146341203936e-06, |
| "loss": 0.3883, |
| "step": 1989 |
| }, |
| { |
| "epoch": 2.1436265709156195, |
| "grad_norm": 0.27136895060539246, |
| "learning_rate": 2.280750282414169e-06, |
| "loss": 0.3579, |
| "step": 1990 |
| }, |
| { |
| "epoch": 2.1447037701974865, |
| "grad_norm": 0.27261051535606384, |
| "learning_rate": 2.2754902076495424e-06, |
| "loss": 0.367, |
| "step": 1991 |
| }, |
| { |
| "epoch": 2.1457809694793535, |
| "grad_norm": 0.2901833653450012, |
| "learning_rate": 2.270234418099765e-06, |
| "loss": 0.3525, |
| "step": 1992 |
| }, |
| { |
| "epoch": 2.146858168761221, |
| "grad_norm": 0.3003911077976227, |
| "learning_rate": 2.264982922031348e-06, |
| "loss": 0.4071, |
| "step": 1993 |
| }, |
| { |
| "epoch": 2.147935368043088, |
| "grad_norm": 0.2801694869995117, |
| "learning_rate": 2.2597357277040494e-06, |
| "loss": 0.4014, |
| "step": 1994 |
| }, |
| { |
| "epoch": 2.149012567324955, |
| "grad_norm": 0.27856895327568054, |
| "learning_rate": 2.254492843370857e-06, |
| "loss": 0.3261, |
| "step": 1995 |
| }, |
| { |
| "epoch": 2.1500897666068224, |
| "grad_norm": 0.31977221369743347, |
| "learning_rate": 2.249254277277984e-06, |
| "loss": 0.4686, |
| "step": 1996 |
| }, |
| { |
| "epoch": 2.1511669658886894, |
| "grad_norm": 0.2627192437648773, |
| "learning_rate": 2.2440200376648524e-06, |
| "loss": 0.3757, |
| "step": 1997 |
| }, |
| { |
| "epoch": 2.1522441651705564, |
| "grad_norm": 0.2889450192451477, |
| "learning_rate": 2.238790132764076e-06, |
| "loss": 0.3737, |
| "step": 1998 |
| }, |
| { |
| "epoch": 2.153321364452424, |
| "grad_norm": 0.267654150724411, |
| "learning_rate": 2.233564570801453e-06, |
| "loss": 0.3646, |
| "step": 1999 |
| }, |
| { |
| "epoch": 2.154398563734291, |
| "grad_norm": 0.30864495038986206, |
| "learning_rate": 2.2283433599959525e-06, |
| "loss": 0.3846, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.155475763016158, |
| "grad_norm": 0.3336288332939148, |
| "learning_rate": 2.2231265085596935e-06, |
| "loss": 0.3714, |
| "step": 2001 |
| }, |
| { |
| "epoch": 2.1565529622980253, |
| "grad_norm": 0.2831747531890869, |
| "learning_rate": 2.2179140246979463e-06, |
| "loss": 0.3615, |
| "step": 2002 |
| }, |
| { |
| "epoch": 2.1576301615798923, |
| "grad_norm": 0.2831881046295166, |
| "learning_rate": 2.2127059166091046e-06, |
| "loss": 0.3555, |
| "step": 2003 |
| }, |
| { |
| "epoch": 2.1587073608617593, |
| "grad_norm": 0.3034612238407135, |
| "learning_rate": 2.207502192484685e-06, |
| "loss": 0.3775, |
| "step": 2004 |
| }, |
| { |
| "epoch": 2.1597845601436267, |
| "grad_norm": 0.2869066298007965, |
| "learning_rate": 2.202302860509307e-06, |
| "loss": 0.389, |
| "step": 2005 |
| }, |
| { |
| "epoch": 2.1608617594254937, |
| "grad_norm": 0.3017926514148712, |
| "learning_rate": 2.1971079288606813e-06, |
| "loss": 0.3694, |
| "step": 2006 |
| }, |
| { |
| "epoch": 2.1619389587073607, |
| "grad_norm": 0.28795140981674194, |
| "learning_rate": 2.191917405709598e-06, |
| "loss": 0.3491, |
| "step": 2007 |
| }, |
| { |
| "epoch": 2.163016157989228, |
| "grad_norm": 0.30164435505867004, |
| "learning_rate": 2.186731299219915e-06, |
| "loss": 0.402, |
| "step": 2008 |
| }, |
| { |
| "epoch": 2.164093357271095, |
| "grad_norm": 0.30009475350379944, |
| "learning_rate": 2.1815496175485433e-06, |
| "loss": 0.3717, |
| "step": 2009 |
| }, |
| { |
| "epoch": 2.165170556552962, |
| "grad_norm": 0.3006815016269684, |
| "learning_rate": 2.1763723688454297e-06, |
| "loss": 0.3623, |
| "step": 2010 |
| }, |
| { |
| "epoch": 2.1662477558348296, |
| "grad_norm": 0.3288932144641876, |
| "learning_rate": 2.1711995612535547e-06, |
| "loss": 0.376, |
| "step": 2011 |
| }, |
| { |
| "epoch": 2.1673249551166966, |
| "grad_norm": 0.29935574531555176, |
| "learning_rate": 2.1660312029089083e-06, |
| "loss": 0.3943, |
| "step": 2012 |
| }, |
| { |
| "epoch": 2.1684021543985637, |
| "grad_norm": 0.2717684209346771, |
| "learning_rate": 2.1608673019404867e-06, |
| "loss": 0.38, |
| "step": 2013 |
| }, |
| { |
| "epoch": 2.1694793536804307, |
| "grad_norm": 0.29062172770500183, |
| "learning_rate": 2.1557078664702747e-06, |
| "loss": 0.3701, |
| "step": 2014 |
| }, |
| { |
| "epoch": 2.170556552962298, |
| "grad_norm": 0.28731608390808105, |
| "learning_rate": 2.1505529046132316e-06, |
| "loss": 0.3816, |
| "step": 2015 |
| }, |
| { |
| "epoch": 2.171633752244165, |
| "grad_norm": 0.29999956488609314, |
| "learning_rate": 2.145402424477283e-06, |
| "loss": 0.4037, |
| "step": 2016 |
| }, |
| { |
| "epoch": 2.172710951526032, |
| "grad_norm": 0.2899573743343353, |
| "learning_rate": 2.140256434163303e-06, |
| "loss": 0.3361, |
| "step": 2017 |
| }, |
| { |
| "epoch": 2.1737881508078996, |
| "grad_norm": 0.3206027150154114, |
| "learning_rate": 2.135114941765108e-06, |
| "loss": 0.3869, |
| "step": 2018 |
| }, |
| { |
| "epoch": 2.1748653500897666, |
| "grad_norm": 0.30229973793029785, |
| "learning_rate": 2.1299779553694323e-06, |
| "loss": 0.3607, |
| "step": 2019 |
| }, |
| { |
| "epoch": 2.1759425493716336, |
| "grad_norm": 0.2830967903137207, |
| "learning_rate": 2.1248454830559307e-06, |
| "loss": 0.4132, |
| "step": 2020 |
| }, |
| { |
| "epoch": 2.177019748653501, |
| "grad_norm": 0.2616431415081024, |
| "learning_rate": 2.119717532897155e-06, |
| "loss": 0.351, |
| "step": 2021 |
| }, |
| { |
| "epoch": 2.178096947935368, |
| "grad_norm": 0.2953941524028778, |
| "learning_rate": 2.1145941129585434e-06, |
| "loss": 0.4132, |
| "step": 2022 |
| }, |
| { |
| "epoch": 2.179174147217235, |
| "grad_norm": 0.27539801597595215, |
| "learning_rate": 2.1094752312984096e-06, |
| "loss": 0.336, |
| "step": 2023 |
| }, |
| { |
| "epoch": 2.1802513464991025, |
| "grad_norm": 0.30243703722953796, |
| "learning_rate": 2.1043608959679302e-06, |
| "loss": 0.3641, |
| "step": 2024 |
| }, |
| { |
| "epoch": 2.1813285457809695, |
| "grad_norm": 0.3062271475791931, |
| "learning_rate": 2.09925111501113e-06, |
| "loss": 0.389, |
| "step": 2025 |
| }, |
| { |
| "epoch": 2.1824057450628365, |
| "grad_norm": 0.2824309170246124, |
| "learning_rate": 2.0941458964648737e-06, |
| "loss": 0.3559, |
| "step": 2026 |
| }, |
| { |
| "epoch": 2.183482944344704, |
| "grad_norm": 0.30429500341415405, |
| "learning_rate": 2.0890452483588434e-06, |
| "loss": 0.3657, |
| "step": 2027 |
| }, |
| { |
| "epoch": 2.184560143626571, |
| "grad_norm": 0.2938910722732544, |
| "learning_rate": 2.0839491787155387e-06, |
| "loss": 0.3697, |
| "step": 2028 |
| }, |
| { |
| "epoch": 2.185637342908438, |
| "grad_norm": 0.3032033443450928, |
| "learning_rate": 2.0788576955502547e-06, |
| "loss": 0.3612, |
| "step": 2029 |
| }, |
| { |
| "epoch": 2.1867145421903054, |
| "grad_norm": 0.3238908648490906, |
| "learning_rate": 2.0737708068710753e-06, |
| "loss": 0.3952, |
| "step": 2030 |
| }, |
| { |
| "epoch": 2.1877917414721724, |
| "grad_norm": 0.3095376193523407, |
| "learning_rate": 2.0686885206788563e-06, |
| "loss": 0.4077, |
| "step": 2031 |
| }, |
| { |
| "epoch": 2.1888689407540394, |
| "grad_norm": 0.27973851561546326, |
| "learning_rate": 2.0636108449672167e-06, |
| "loss": 0.3812, |
| "step": 2032 |
| }, |
| { |
| "epoch": 2.189946140035907, |
| "grad_norm": 0.2777497172355652, |
| "learning_rate": 2.0585377877225176e-06, |
| "loss": 0.3681, |
| "step": 2033 |
| }, |
| { |
| "epoch": 2.191023339317774, |
| "grad_norm": 0.2898228168487549, |
| "learning_rate": 2.053469356923865e-06, |
| "loss": 0.3811, |
| "step": 2034 |
| }, |
| { |
| "epoch": 2.192100538599641, |
| "grad_norm": 0.296670138835907, |
| "learning_rate": 2.0484055605430807e-06, |
| "loss": 0.3853, |
| "step": 2035 |
| }, |
| { |
| "epoch": 2.1931777378815083, |
| "grad_norm": 0.31800577044487, |
| "learning_rate": 2.043346406544701e-06, |
| "loss": 0.3901, |
| "step": 2036 |
| }, |
| { |
| "epoch": 2.1942549371633753, |
| "grad_norm": 0.2928822636604309, |
| "learning_rate": 2.0382919028859606e-06, |
| "loss": 0.3608, |
| "step": 2037 |
| }, |
| { |
| "epoch": 2.1953321364452423, |
| "grad_norm": 0.2876681685447693, |
| "learning_rate": 2.033242057516779e-06, |
| "loss": 0.3788, |
| "step": 2038 |
| }, |
| { |
| "epoch": 2.1964093357271093, |
| "grad_norm": 0.2682286202907562, |
| "learning_rate": 2.028196878379749e-06, |
| "loss": 0.3644, |
| "step": 2039 |
| }, |
| { |
| "epoch": 2.1974865350089767, |
| "grad_norm": 0.2802661061286926, |
| "learning_rate": 2.0231563734101245e-06, |
| "loss": 0.3386, |
| "step": 2040 |
| }, |
| { |
| "epoch": 2.1985637342908437, |
| "grad_norm": 0.258585125207901, |
| "learning_rate": 2.0181205505358098e-06, |
| "loss": 0.3483, |
| "step": 2041 |
| }, |
| { |
| "epoch": 2.199640933572711, |
| "grad_norm": 0.2854132652282715, |
| "learning_rate": 2.013089417677338e-06, |
| "loss": 0.4009, |
| "step": 2042 |
| }, |
| { |
| "epoch": 2.200718132854578, |
| "grad_norm": 0.29215067625045776, |
| "learning_rate": 2.0080629827478755e-06, |
| "loss": 0.3803, |
| "step": 2043 |
| }, |
| { |
| "epoch": 2.201795332136445, |
| "grad_norm": 0.2704610526561737, |
| "learning_rate": 2.0030412536531896e-06, |
| "loss": 0.3511, |
| "step": 2044 |
| }, |
| { |
| "epoch": 2.202872531418312, |
| "grad_norm": 0.2924228608608246, |
| "learning_rate": 1.998024238291653e-06, |
| "loss": 0.3665, |
| "step": 2045 |
| }, |
| { |
| "epoch": 2.2039497307001796, |
| "grad_norm": 0.28420692682266235, |
| "learning_rate": 1.993011944554223e-06, |
| "loss": 0.3589, |
| "step": 2046 |
| }, |
| { |
| "epoch": 2.2050269299820466, |
| "grad_norm": 0.2870817482471466, |
| "learning_rate": 1.9880043803244285e-06, |
| "loss": 0.3864, |
| "step": 2047 |
| }, |
| { |
| "epoch": 2.2061041292639136, |
| "grad_norm": 0.2669335603713989, |
| "learning_rate": 1.9830015534783626e-06, |
| "loss": 0.3774, |
| "step": 2048 |
| }, |
| { |
| "epoch": 2.207181328545781, |
| "grad_norm": 0.28127819299697876, |
| "learning_rate": 1.9780034718846653e-06, |
| "loss": 0.3582, |
| "step": 2049 |
| }, |
| { |
| "epoch": 2.208258527827648, |
| "grad_norm": 0.3107064366340637, |
| "learning_rate": 1.9730101434045146e-06, |
| "loss": 0.4, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.209335727109515, |
| "grad_norm": 0.2876104414463043, |
| "learning_rate": 1.968021575891609e-06, |
| "loss": 0.3549, |
| "step": 2051 |
| }, |
| { |
| "epoch": 2.2104129263913825, |
| "grad_norm": 0.2810943126678467, |
| "learning_rate": 1.9630377771921624e-06, |
| "loss": 0.3469, |
| "step": 2052 |
| }, |
| { |
| "epoch": 2.2114901256732495, |
| "grad_norm": 0.33290818333625793, |
| "learning_rate": 1.9580587551448887e-06, |
| "loss": 0.3922, |
| "step": 2053 |
| }, |
| { |
| "epoch": 2.2125673249551165, |
| "grad_norm": 0.2748958170413971, |
| "learning_rate": 1.9530845175809838e-06, |
| "loss": 0.3654, |
| "step": 2054 |
| }, |
| { |
| "epoch": 2.213644524236984, |
| "grad_norm": 0.30484747886657715, |
| "learning_rate": 1.9481150723241236e-06, |
| "loss": 0.4034, |
| "step": 2055 |
| }, |
| { |
| "epoch": 2.214721723518851, |
| "grad_norm": 0.2513694763183594, |
| "learning_rate": 1.943150427190445e-06, |
| "loss": 0.3236, |
| "step": 2056 |
| }, |
| { |
| "epoch": 2.215798922800718, |
| "grad_norm": 0.3133890628814697, |
| "learning_rate": 1.9381905899885344e-06, |
| "loss": 0.4458, |
| "step": 2057 |
| }, |
| { |
| "epoch": 2.2168761220825854, |
| "grad_norm": 0.3194190561771393, |
| "learning_rate": 1.9332355685194182e-06, |
| "loss": 0.3784, |
| "step": 2058 |
| }, |
| { |
| "epoch": 2.2179533213644524, |
| "grad_norm": 0.2863844037055969, |
| "learning_rate": 1.9282853705765435e-06, |
| "loss": 0.3491, |
| "step": 2059 |
| }, |
| { |
| "epoch": 2.2190305206463194, |
| "grad_norm": 0.28616365790367126, |
| "learning_rate": 1.923340003945775e-06, |
| "loss": 0.3826, |
| "step": 2060 |
| }, |
| { |
| "epoch": 2.220107719928187, |
| "grad_norm": 0.27722132205963135, |
| "learning_rate": 1.918399476405378e-06, |
| "loss": 0.4063, |
| "step": 2061 |
| }, |
| { |
| "epoch": 2.221184919210054, |
| "grad_norm": 0.2680579423904419, |
| "learning_rate": 1.913463795726007e-06, |
| "loss": 0.3643, |
| "step": 2062 |
| }, |
| { |
| "epoch": 2.222262118491921, |
| "grad_norm": 0.30088430643081665, |
| "learning_rate": 1.90853296967069e-06, |
| "loss": 0.3749, |
| "step": 2063 |
| }, |
| { |
| "epoch": 2.2233393177737883, |
| "grad_norm": 0.2925657033920288, |
| "learning_rate": 1.9036070059948253e-06, |
| "loss": 0.3703, |
| "step": 2064 |
| }, |
| { |
| "epoch": 2.2244165170556554, |
| "grad_norm": 0.2966391146183014, |
| "learning_rate": 1.898685912446156e-06, |
| "loss": 0.3929, |
| "step": 2065 |
| }, |
| { |
| "epoch": 2.2254937163375224, |
| "grad_norm": 0.2713056206703186, |
| "learning_rate": 1.8937696967647735e-06, |
| "loss": 0.3553, |
| "step": 2066 |
| }, |
| { |
| "epoch": 2.22657091561939, |
| "grad_norm": 0.30269116163253784, |
| "learning_rate": 1.8888583666830878e-06, |
| "loss": 0.3775, |
| "step": 2067 |
| }, |
| { |
| "epoch": 2.227648114901257, |
| "grad_norm": 0.30684664845466614, |
| "learning_rate": 1.8839519299258325e-06, |
| "loss": 0.3848, |
| "step": 2068 |
| }, |
| { |
| "epoch": 2.228725314183124, |
| "grad_norm": 0.27545851469039917, |
| "learning_rate": 1.8790503942100413e-06, |
| "loss": 0.3534, |
| "step": 2069 |
| }, |
| { |
| "epoch": 2.229802513464991, |
| "grad_norm": 0.29838407039642334, |
| "learning_rate": 1.8741537672450406e-06, |
| "loss": 0.3833, |
| "step": 2070 |
| }, |
| { |
| "epoch": 2.2308797127468583, |
| "grad_norm": 0.29707691073417664, |
| "learning_rate": 1.8692620567324354e-06, |
| "loss": 0.3847, |
| "step": 2071 |
| }, |
| { |
| "epoch": 2.2319569120287253, |
| "grad_norm": 0.31610336899757385, |
| "learning_rate": 1.8643752703660978e-06, |
| "loss": 0.3871, |
| "step": 2072 |
| }, |
| { |
| "epoch": 2.2330341113105923, |
| "grad_norm": 0.2851659655570984, |
| "learning_rate": 1.859493415832157e-06, |
| "loss": 0.3901, |
| "step": 2073 |
| }, |
| { |
| "epoch": 2.2341113105924597, |
| "grad_norm": 0.3058148920536041, |
| "learning_rate": 1.8546165008089806e-06, |
| "loss": 0.4152, |
| "step": 2074 |
| }, |
| { |
| "epoch": 2.2351885098743267, |
| "grad_norm": 0.2662912905216217, |
| "learning_rate": 1.8497445329671725e-06, |
| "loss": 0.3692, |
| "step": 2075 |
| }, |
| { |
| "epoch": 2.2362657091561937, |
| "grad_norm": 0.2919093370437622, |
| "learning_rate": 1.8448775199695501e-06, |
| "loss": 0.3102, |
| "step": 2076 |
| }, |
| { |
| "epoch": 2.237342908438061, |
| "grad_norm": 0.29858067631721497, |
| "learning_rate": 1.8400154694711424e-06, |
| "loss": 0.4039, |
| "step": 2077 |
| }, |
| { |
| "epoch": 2.238420107719928, |
| "grad_norm": 0.2683444023132324, |
| "learning_rate": 1.835158389119171e-06, |
| "loss": 0.3416, |
| "step": 2078 |
| }, |
| { |
| "epoch": 2.239497307001795, |
| "grad_norm": 0.29827025532722473, |
| "learning_rate": 1.8303062865530407e-06, |
| "loss": 0.4094, |
| "step": 2079 |
| }, |
| { |
| "epoch": 2.2405745062836626, |
| "grad_norm": 0.3107103705406189, |
| "learning_rate": 1.8254591694043267e-06, |
| "loss": 0.3767, |
| "step": 2080 |
| }, |
| { |
| "epoch": 2.2416517055655296, |
| "grad_norm": 0.3018888831138611, |
| "learning_rate": 1.8206170452967636e-06, |
| "loss": 0.3604, |
| "step": 2081 |
| }, |
| { |
| "epoch": 2.2427289048473966, |
| "grad_norm": 0.2799268662929535, |
| "learning_rate": 1.8157799218462335e-06, |
| "loss": 0.3684, |
| "step": 2082 |
| }, |
| { |
| "epoch": 2.243806104129264, |
| "grad_norm": 0.29917651414871216, |
| "learning_rate": 1.8109478066607495e-06, |
| "loss": 0.3752, |
| "step": 2083 |
| }, |
| { |
| "epoch": 2.244883303411131, |
| "grad_norm": 0.2883894741535187, |
| "learning_rate": 1.8061207073404507e-06, |
| "loss": 0.3911, |
| "step": 2084 |
| }, |
| { |
| "epoch": 2.245960502692998, |
| "grad_norm": 0.29146096110343933, |
| "learning_rate": 1.8012986314775888e-06, |
| "loss": 0.3613, |
| "step": 2085 |
| }, |
| { |
| "epoch": 2.2470377019748655, |
| "grad_norm": 0.3059714734554291, |
| "learning_rate": 1.7964815866565088e-06, |
| "loss": 0.3653, |
| "step": 2086 |
| }, |
| { |
| "epoch": 2.2481149012567325, |
| "grad_norm": 0.310809463262558, |
| "learning_rate": 1.7916695804536477e-06, |
| "loss": 0.3495, |
| "step": 2087 |
| }, |
| { |
| "epoch": 2.2491921005385995, |
| "grad_norm": 0.2690834701061249, |
| "learning_rate": 1.786862620437515e-06, |
| "loss": 0.348, |
| "step": 2088 |
| }, |
| { |
| "epoch": 2.250269299820467, |
| "grad_norm": 0.30786246061325073, |
| "learning_rate": 1.7820607141686846e-06, |
| "loss": 0.4014, |
| "step": 2089 |
| }, |
| { |
| "epoch": 2.251346499102334, |
| "grad_norm": 0.2623322010040283, |
| "learning_rate": 1.7772638691997835e-06, |
| "loss": 0.3643, |
| "step": 2090 |
| }, |
| { |
| "epoch": 2.252423698384201, |
| "grad_norm": 0.29593855142593384, |
| "learning_rate": 1.7724720930754713e-06, |
| "loss": 0.3535, |
| "step": 2091 |
| }, |
| { |
| "epoch": 2.2535008976660684, |
| "grad_norm": 0.30418071150779724, |
| "learning_rate": 1.7676853933324423e-06, |
| "loss": 0.3811, |
| "step": 2092 |
| }, |
| { |
| "epoch": 2.2545780969479354, |
| "grad_norm": 0.31614336371421814, |
| "learning_rate": 1.762903777499404e-06, |
| "loss": 0.3894, |
| "step": 2093 |
| }, |
| { |
| "epoch": 2.2556552962298024, |
| "grad_norm": 0.2974374294281006, |
| "learning_rate": 1.7581272530970666e-06, |
| "loss": 0.3793, |
| "step": 2094 |
| }, |
| { |
| "epoch": 2.2567324955116694, |
| "grad_norm": 0.27945080399513245, |
| "learning_rate": 1.7533558276381351e-06, |
| "loss": 0.3461, |
| "step": 2095 |
| }, |
| { |
| "epoch": 2.257809694793537, |
| "grad_norm": 0.296705424785614, |
| "learning_rate": 1.7485895086272903e-06, |
| "loss": 0.3794, |
| "step": 2096 |
| }, |
| { |
| "epoch": 2.258886894075404, |
| "grad_norm": 0.31906142830848694, |
| "learning_rate": 1.7438283035611847e-06, |
| "loss": 0.3702, |
| "step": 2097 |
| }, |
| { |
| "epoch": 2.2599640933572713, |
| "grad_norm": 0.29481953382492065, |
| "learning_rate": 1.7390722199284287e-06, |
| "loss": 0.3912, |
| "step": 2098 |
| }, |
| { |
| "epoch": 2.2610412926391383, |
| "grad_norm": 0.27198755741119385, |
| "learning_rate": 1.734321265209572e-06, |
| "loss": 0.3504, |
| "step": 2099 |
| }, |
| { |
| "epoch": 2.2621184919210053, |
| "grad_norm": 0.2756825089454651, |
| "learning_rate": 1.7295754468771026e-06, |
| "loss": 0.4126, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.2631956912028723, |
| "grad_norm": 0.28191620111465454, |
| "learning_rate": 1.724834772395428e-06, |
| "loss": 0.3646, |
| "step": 2101 |
| }, |
| { |
| "epoch": 2.26427289048474, |
| "grad_norm": 0.2847255766391754, |
| "learning_rate": 1.7200992492208647e-06, |
| "loss": 0.3734, |
| "step": 2102 |
| }, |
| { |
| "epoch": 2.265350089766607, |
| "grad_norm": 0.31416910886764526, |
| "learning_rate": 1.7153688848016277e-06, |
| "loss": 0.3747, |
| "step": 2103 |
| }, |
| { |
| "epoch": 2.266427289048474, |
| "grad_norm": 0.30127301812171936, |
| "learning_rate": 1.7106436865778182e-06, |
| "loss": 0.3609, |
| "step": 2104 |
| }, |
| { |
| "epoch": 2.2675044883303412, |
| "grad_norm": 0.283171683549881, |
| "learning_rate": 1.7059236619814128e-06, |
| "loss": 0.3874, |
| "step": 2105 |
| }, |
| { |
| "epoch": 2.2685816876122082, |
| "grad_norm": 0.28598931431770325, |
| "learning_rate": 1.7012088184362469e-06, |
| "loss": 0.3628, |
| "step": 2106 |
| }, |
| { |
| "epoch": 2.2696588868940752, |
| "grad_norm": 0.2925755977630615, |
| "learning_rate": 1.6964991633580118e-06, |
| "loss": 0.3387, |
| "step": 2107 |
| }, |
| { |
| "epoch": 2.2707360861759427, |
| "grad_norm": 0.2768424451351166, |
| "learning_rate": 1.6917947041542342e-06, |
| "loss": 0.3738, |
| "step": 2108 |
| }, |
| { |
| "epoch": 2.2718132854578097, |
| "grad_norm": 0.26826032996177673, |
| "learning_rate": 1.6870954482242707e-06, |
| "loss": 0.3727, |
| "step": 2109 |
| }, |
| { |
| "epoch": 2.2728904847396767, |
| "grad_norm": 0.2744354009628296, |
| "learning_rate": 1.6824014029592944e-06, |
| "loss": 0.368, |
| "step": 2110 |
| }, |
| { |
| "epoch": 2.273967684021544, |
| "grad_norm": 0.3166108727455139, |
| "learning_rate": 1.6777125757422813e-06, |
| "loss": 0.3999, |
| "step": 2111 |
| }, |
| { |
| "epoch": 2.275044883303411, |
| "grad_norm": 0.27384570240974426, |
| "learning_rate": 1.6730289739480015e-06, |
| "loss": 0.3329, |
| "step": 2112 |
| }, |
| { |
| "epoch": 2.276122082585278, |
| "grad_norm": 0.3060706555843353, |
| "learning_rate": 1.668350604943006e-06, |
| "loss": 0.4192, |
| "step": 2113 |
| }, |
| { |
| "epoch": 2.2771992818671456, |
| "grad_norm": 0.279412180185318, |
| "learning_rate": 1.663677476085616e-06, |
| "loss": 0.3569, |
| "step": 2114 |
| }, |
| { |
| "epoch": 2.2782764811490126, |
| "grad_norm": 0.30697518587112427, |
| "learning_rate": 1.6590095947259083e-06, |
| "loss": 0.3993, |
| "step": 2115 |
| }, |
| { |
| "epoch": 2.2793536804308796, |
| "grad_norm": 0.27257391810417175, |
| "learning_rate": 1.6543469682057105e-06, |
| "loss": 0.3356, |
| "step": 2116 |
| }, |
| { |
| "epoch": 2.280430879712747, |
| "grad_norm": 0.2825731337070465, |
| "learning_rate": 1.6496896038585796e-06, |
| "loss": 0.3501, |
| "step": 2117 |
| }, |
| { |
| "epoch": 2.281508078994614, |
| "grad_norm": 0.2653917074203491, |
| "learning_rate": 1.6450375090098003e-06, |
| "loss": 0.3694, |
| "step": 2118 |
| }, |
| { |
| "epoch": 2.282585278276481, |
| "grad_norm": 0.2905532121658325, |
| "learning_rate": 1.6403906909763688e-06, |
| "loss": 0.368, |
| "step": 2119 |
| }, |
| { |
| "epoch": 2.283662477558348, |
| "grad_norm": 0.2730623781681061, |
| "learning_rate": 1.6357491570669814e-06, |
| "loss": 0.3547, |
| "step": 2120 |
| }, |
| { |
| "epoch": 2.2847396768402155, |
| "grad_norm": 0.2855893671512604, |
| "learning_rate": 1.631112914582022e-06, |
| "loss": 0.3762, |
| "step": 2121 |
| }, |
| { |
| "epoch": 2.2858168761220825, |
| "grad_norm": 0.2811420261859894, |
| "learning_rate": 1.6264819708135549e-06, |
| "loss": 0.3816, |
| "step": 2122 |
| }, |
| { |
| "epoch": 2.28689407540395, |
| "grad_norm": 0.33978739380836487, |
| "learning_rate": 1.6218563330453052e-06, |
| "loss": 0.3769, |
| "step": 2123 |
| }, |
| { |
| "epoch": 2.287971274685817, |
| "grad_norm": 0.27539363503456116, |
| "learning_rate": 1.6172360085526567e-06, |
| "loss": 0.3776, |
| "step": 2124 |
| }, |
| { |
| "epoch": 2.289048473967684, |
| "grad_norm": 0.28592661023139954, |
| "learning_rate": 1.6126210046026364e-06, |
| "loss": 0.381, |
| "step": 2125 |
| }, |
| { |
| "epoch": 2.290125673249551, |
| "grad_norm": 0.28629782795906067, |
| "learning_rate": 1.6080113284539011e-06, |
| "loss": 0.3909, |
| "step": 2126 |
| }, |
| { |
| "epoch": 2.2912028725314184, |
| "grad_norm": 0.28644952178001404, |
| "learning_rate": 1.6034069873567305e-06, |
| "loss": 0.3965, |
| "step": 2127 |
| }, |
| { |
| "epoch": 2.2922800718132854, |
| "grad_norm": 0.2881433665752411, |
| "learning_rate": 1.5988079885530073e-06, |
| "loss": 0.3642, |
| "step": 2128 |
| }, |
| { |
| "epoch": 2.2933572710951524, |
| "grad_norm": 0.2940407395362854, |
| "learning_rate": 1.5942143392762178e-06, |
| "loss": 0.3544, |
| "step": 2129 |
| }, |
| { |
| "epoch": 2.29443447037702, |
| "grad_norm": 0.29156753420829773, |
| "learning_rate": 1.5896260467514335e-06, |
| "loss": 0.3703, |
| "step": 2130 |
| }, |
| { |
| "epoch": 2.295511669658887, |
| "grad_norm": 0.29485079646110535, |
| "learning_rate": 1.5850431181952953e-06, |
| "loss": 0.4186, |
| "step": 2131 |
| }, |
| { |
| "epoch": 2.296588868940754, |
| "grad_norm": 0.27262139320373535, |
| "learning_rate": 1.5804655608160135e-06, |
| "loss": 0.3508, |
| "step": 2132 |
| }, |
| { |
| "epoch": 2.2976660682226213, |
| "grad_norm": 0.2714279592037201, |
| "learning_rate": 1.5758933818133482e-06, |
| "loss": 0.3392, |
| "step": 2133 |
| }, |
| { |
| "epoch": 2.2987432675044883, |
| "grad_norm": 0.29111814498901367, |
| "learning_rate": 1.5713265883786e-06, |
| "loss": 0.3844, |
| "step": 2134 |
| }, |
| { |
| "epoch": 2.2998204667863553, |
| "grad_norm": 0.2989579439163208, |
| "learning_rate": 1.5667651876945994e-06, |
| "loss": 0.3799, |
| "step": 2135 |
| }, |
| { |
| "epoch": 2.3008976660682228, |
| "grad_norm": 0.29087311029434204, |
| "learning_rate": 1.5622091869356937e-06, |
| "loss": 0.3532, |
| "step": 2136 |
| }, |
| { |
| "epoch": 2.3019748653500898, |
| "grad_norm": 0.29848843812942505, |
| "learning_rate": 1.5576585932677407e-06, |
| "loss": 0.4171, |
| "step": 2137 |
| }, |
| { |
| "epoch": 2.3030520646319568, |
| "grad_norm": 0.2713443338871002, |
| "learning_rate": 1.5531134138480863e-06, |
| "loss": 0.3645, |
| "step": 2138 |
| }, |
| { |
| "epoch": 2.304129263913824, |
| "grad_norm": 0.27279627323150635, |
| "learning_rate": 1.54857365582557e-06, |
| "loss": 0.3689, |
| "step": 2139 |
| }, |
| { |
| "epoch": 2.3052064631956912, |
| "grad_norm": 0.28853660821914673, |
| "learning_rate": 1.544039326340495e-06, |
| "loss": 0.3914, |
| "step": 2140 |
| }, |
| { |
| "epoch": 2.3062836624775582, |
| "grad_norm": 0.287783682346344, |
| "learning_rate": 1.5395104325246336e-06, |
| "loss": 0.3859, |
| "step": 2141 |
| }, |
| { |
| "epoch": 2.3073608617594257, |
| "grad_norm": 0.30437979102134705, |
| "learning_rate": 1.5349869815012053e-06, |
| "loss": 0.3802, |
| "step": 2142 |
| }, |
| { |
| "epoch": 2.3084380610412927, |
| "grad_norm": 0.2932701110839844, |
| "learning_rate": 1.5304689803848699e-06, |
| "loss": 0.4123, |
| "step": 2143 |
| }, |
| { |
| "epoch": 2.3095152603231597, |
| "grad_norm": 0.2476838082075119, |
| "learning_rate": 1.5259564362817147e-06, |
| "loss": 0.3413, |
| "step": 2144 |
| }, |
| { |
| "epoch": 2.3105924596050267, |
| "grad_norm": 0.27603915333747864, |
| "learning_rate": 1.521449356289245e-06, |
| "loss": 0.3702, |
| "step": 2145 |
| }, |
| { |
| "epoch": 2.311669658886894, |
| "grad_norm": 0.26957961916923523, |
| "learning_rate": 1.5169477474963722e-06, |
| "loss": 0.3523, |
| "step": 2146 |
| }, |
| { |
| "epoch": 2.312746858168761, |
| "grad_norm": 0.2828165888786316, |
| "learning_rate": 1.512451616983399e-06, |
| "loss": 0.3947, |
| "step": 2147 |
| }, |
| { |
| "epoch": 2.3138240574506286, |
| "grad_norm": 0.2874089777469635, |
| "learning_rate": 1.5079609718220167e-06, |
| "loss": 0.4005, |
| "step": 2148 |
| }, |
| { |
| "epoch": 2.3149012567324956, |
| "grad_norm": 0.2528451085090637, |
| "learning_rate": 1.5034758190752836e-06, |
| "loss": 0.3726, |
| "step": 2149 |
| }, |
| { |
| "epoch": 2.3159784560143626, |
| "grad_norm": 0.27315106987953186, |
| "learning_rate": 1.4989961657976237e-06, |
| "loss": 0.3915, |
| "step": 2150 |
| }, |
| { |
| "epoch": 2.3170556552962296, |
| "grad_norm": 0.2935149371623993, |
| "learning_rate": 1.4945220190348103e-06, |
| "loss": 0.3522, |
| "step": 2151 |
| }, |
| { |
| "epoch": 2.318132854578097, |
| "grad_norm": 0.30114105343818665, |
| "learning_rate": 1.4900533858239542e-06, |
| "loss": 0.3763, |
| "step": 2152 |
| }, |
| { |
| "epoch": 2.319210053859964, |
| "grad_norm": 0.2741653025150299, |
| "learning_rate": 1.4855902731934962e-06, |
| "loss": 0.3671, |
| "step": 2153 |
| }, |
| { |
| "epoch": 2.3202872531418315, |
| "grad_norm": 0.28554973006248474, |
| "learning_rate": 1.4811326881631937e-06, |
| "loss": 0.3763, |
| "step": 2154 |
| }, |
| { |
| "epoch": 2.3213644524236985, |
| "grad_norm": 0.26286450028419495, |
| "learning_rate": 1.4766806377441078e-06, |
| "loss": 0.369, |
| "step": 2155 |
| }, |
| { |
| "epoch": 2.3224416517055655, |
| "grad_norm": 0.28501445055007935, |
| "learning_rate": 1.4722341289385978e-06, |
| "loss": 0.3913, |
| "step": 2156 |
| }, |
| { |
| "epoch": 2.3235188509874325, |
| "grad_norm": 0.2689116597175598, |
| "learning_rate": 1.4677931687403046e-06, |
| "loss": 0.3667, |
| "step": 2157 |
| }, |
| { |
| "epoch": 2.3245960502693, |
| "grad_norm": 0.2794966697692871, |
| "learning_rate": 1.4633577641341445e-06, |
| "loss": 0.3657, |
| "step": 2158 |
| }, |
| { |
| "epoch": 2.325673249551167, |
| "grad_norm": 0.2858525514602661, |
| "learning_rate": 1.4589279220962922e-06, |
| "loss": 0.3643, |
| "step": 2159 |
| }, |
| { |
| "epoch": 2.326750448833034, |
| "grad_norm": 0.28977170586586, |
| "learning_rate": 1.454503649594176e-06, |
| "loss": 0.3835, |
| "step": 2160 |
| }, |
| { |
| "epoch": 2.3278276481149014, |
| "grad_norm": 0.2896460294723511, |
| "learning_rate": 1.4500849535864636e-06, |
| "loss": 0.3782, |
| "step": 2161 |
| }, |
| { |
| "epoch": 2.3289048473967684, |
| "grad_norm": 0.30394282937049866, |
| "learning_rate": 1.4456718410230541e-06, |
| "loss": 0.3838, |
| "step": 2162 |
| }, |
| { |
| "epoch": 2.3299820466786354, |
| "grad_norm": 0.28289011120796204, |
| "learning_rate": 1.4412643188450581e-06, |
| "loss": 0.3606, |
| "step": 2163 |
| }, |
| { |
| "epoch": 2.331059245960503, |
| "grad_norm": 0.27398374676704407, |
| "learning_rate": 1.4368623939848003e-06, |
| "loss": 0.3863, |
| "step": 2164 |
| }, |
| { |
| "epoch": 2.33213644524237, |
| "grad_norm": 0.2954217493534088, |
| "learning_rate": 1.4324660733657985e-06, |
| "loss": 0.3795, |
| "step": 2165 |
| }, |
| { |
| "epoch": 2.333213644524237, |
| "grad_norm": 0.2950679361820221, |
| "learning_rate": 1.4280753639027567e-06, |
| "loss": 0.4029, |
| "step": 2166 |
| }, |
| { |
| "epoch": 2.3342908438061043, |
| "grad_norm": 0.2834983766078949, |
| "learning_rate": 1.4236902725015533e-06, |
| "loss": 0.3542, |
| "step": 2167 |
| }, |
| { |
| "epoch": 2.3353680430879713, |
| "grad_norm": 0.3374176323413849, |
| "learning_rate": 1.4193108060592308e-06, |
| "loss": 0.3786, |
| "step": 2168 |
| }, |
| { |
| "epoch": 2.3364452423698383, |
| "grad_norm": 0.29156172275543213, |
| "learning_rate": 1.4149369714639856e-06, |
| "loss": 0.386, |
| "step": 2169 |
| }, |
| { |
| "epoch": 2.3375224416517058, |
| "grad_norm": 0.29786962270736694, |
| "learning_rate": 1.4105687755951508e-06, |
| "loss": 0.4045, |
| "step": 2170 |
| }, |
| { |
| "epoch": 2.3385996409335728, |
| "grad_norm": 0.2667715847492218, |
| "learning_rate": 1.4062062253231983e-06, |
| "loss": 0.3425, |
| "step": 2171 |
| }, |
| { |
| "epoch": 2.3396768402154398, |
| "grad_norm": 0.31166592240333557, |
| "learning_rate": 1.401849327509714e-06, |
| "loss": 0.3904, |
| "step": 2172 |
| }, |
| { |
| "epoch": 2.340754039497307, |
| "grad_norm": 0.28892043232917786, |
| "learning_rate": 1.3974980890073968e-06, |
| "loss": 0.3565, |
| "step": 2173 |
| }, |
| { |
| "epoch": 2.341831238779174, |
| "grad_norm": 0.3395155072212219, |
| "learning_rate": 1.3931525166600447e-06, |
| "loss": 0.3847, |
| "step": 2174 |
| }, |
| { |
| "epoch": 2.342908438061041, |
| "grad_norm": 0.302751362323761, |
| "learning_rate": 1.3888126173025412e-06, |
| "loss": 0.3638, |
| "step": 2175 |
| }, |
| { |
| "epoch": 2.343985637342908, |
| "grad_norm": 0.2783910930156708, |
| "learning_rate": 1.3844783977608494e-06, |
| "loss": 0.3612, |
| "step": 2176 |
| }, |
| { |
| "epoch": 2.3450628366247757, |
| "grad_norm": 0.291925311088562, |
| "learning_rate": 1.3801498648519984e-06, |
| "loss": 0.3712, |
| "step": 2177 |
| }, |
| { |
| "epoch": 2.3461400359066427, |
| "grad_norm": 0.2917797863483429, |
| "learning_rate": 1.3758270253840745e-06, |
| "loss": 0.3646, |
| "step": 2178 |
| }, |
| { |
| "epoch": 2.34721723518851, |
| "grad_norm": 0.303262323141098, |
| "learning_rate": 1.371509886156206e-06, |
| "loss": 0.3972, |
| "step": 2179 |
| }, |
| { |
| "epoch": 2.348294434470377, |
| "grad_norm": 0.2622956335544586, |
| "learning_rate": 1.3671984539585548e-06, |
| "loss": 0.3108, |
| "step": 2180 |
| }, |
| { |
| "epoch": 2.349371633752244, |
| "grad_norm": 0.3135432302951813, |
| "learning_rate": 1.3628927355723115e-06, |
| "loss": 0.4323, |
| "step": 2181 |
| }, |
| { |
| "epoch": 2.350448833034111, |
| "grad_norm": 0.3027063310146332, |
| "learning_rate": 1.3585927377696766e-06, |
| "loss": 0.3626, |
| "step": 2182 |
| }, |
| { |
| "epoch": 2.3515260323159786, |
| "grad_norm": 0.2851846218109131, |
| "learning_rate": 1.3542984673138542e-06, |
| "loss": 0.3655, |
| "step": 2183 |
| }, |
| { |
| "epoch": 2.3526032315978456, |
| "grad_norm": 0.30676692724227905, |
| "learning_rate": 1.3500099309590397e-06, |
| "loss": 0.3758, |
| "step": 2184 |
| }, |
| { |
| "epoch": 2.3536804308797126, |
| "grad_norm": 0.2884361743927002, |
| "learning_rate": 1.3457271354504097e-06, |
| "loss": 0.3817, |
| "step": 2185 |
| }, |
| { |
| "epoch": 2.35475763016158, |
| "grad_norm": 0.28114891052246094, |
| "learning_rate": 1.341450087524112e-06, |
| "loss": 0.3488, |
| "step": 2186 |
| }, |
| { |
| "epoch": 2.355834829443447, |
| "grad_norm": 0.3103155791759491, |
| "learning_rate": 1.3371787939072523e-06, |
| "loss": 0.4043, |
| "step": 2187 |
| }, |
| { |
| "epoch": 2.356912028725314, |
| "grad_norm": 0.27932849526405334, |
| "learning_rate": 1.332913261317887e-06, |
| "loss": 0.3594, |
| "step": 2188 |
| }, |
| { |
| "epoch": 2.3579892280071815, |
| "grad_norm": 0.26250553131103516, |
| "learning_rate": 1.3286534964650121e-06, |
| "loss": 0.3657, |
| "step": 2189 |
| }, |
| { |
| "epoch": 2.3590664272890485, |
| "grad_norm": 0.2756526470184326, |
| "learning_rate": 1.3243995060485537e-06, |
| "loss": 0.3922, |
| "step": 2190 |
| }, |
| { |
| "epoch": 2.3601436265709155, |
| "grad_norm": 0.24424968659877777, |
| "learning_rate": 1.3201512967593487e-06, |
| "loss": 0.368, |
| "step": 2191 |
| }, |
| { |
| "epoch": 2.361220825852783, |
| "grad_norm": 0.284087598323822, |
| "learning_rate": 1.3159088752791483e-06, |
| "loss": 0.3749, |
| "step": 2192 |
| }, |
| { |
| "epoch": 2.36229802513465, |
| "grad_norm": 0.2854723334312439, |
| "learning_rate": 1.3116722482805972e-06, |
| "loss": 0.3817, |
| "step": 2193 |
| }, |
| { |
| "epoch": 2.363375224416517, |
| "grad_norm": 0.2786845564842224, |
| "learning_rate": 1.3074414224272287e-06, |
| "loss": 0.4066, |
| "step": 2194 |
| }, |
| { |
| "epoch": 2.3644524236983844, |
| "grad_norm": 0.25960221886634827, |
| "learning_rate": 1.303216404373447e-06, |
| "loss": 0.3488, |
| "step": 2195 |
| }, |
| { |
| "epoch": 2.3655296229802514, |
| "grad_norm": 0.2924199104309082, |
| "learning_rate": 1.2989972007645262e-06, |
| "loss": 0.3796, |
| "step": 2196 |
| }, |
| { |
| "epoch": 2.3666068222621184, |
| "grad_norm": 0.2700774073600769, |
| "learning_rate": 1.2947838182365941e-06, |
| "loss": 0.3809, |
| "step": 2197 |
| }, |
| { |
| "epoch": 2.367684021543986, |
| "grad_norm": 0.2660199701786041, |
| "learning_rate": 1.2905762634166214e-06, |
| "loss": 0.3645, |
| "step": 2198 |
| }, |
| { |
| "epoch": 2.368761220825853, |
| "grad_norm": 0.28819847106933594, |
| "learning_rate": 1.2863745429224145e-06, |
| "loss": 0.3933, |
| "step": 2199 |
| }, |
| { |
| "epoch": 2.36983842010772, |
| "grad_norm": 0.28640761971473694, |
| "learning_rate": 1.2821786633626038e-06, |
| "loss": 0.3623, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.370915619389587, |
| "grad_norm": 0.2778407335281372, |
| "learning_rate": 1.2779886313366291e-06, |
| "loss": 0.3997, |
| "step": 2201 |
| }, |
| { |
| "epoch": 2.3719928186714543, |
| "grad_norm": 0.30692175030708313, |
| "learning_rate": 1.2738044534347366e-06, |
| "loss": 0.3731, |
| "step": 2202 |
| }, |
| { |
| "epoch": 2.3730700179533213, |
| "grad_norm": 0.27536556124687195, |
| "learning_rate": 1.2696261362379653e-06, |
| "loss": 0.3854, |
| "step": 2203 |
| }, |
| { |
| "epoch": 2.3741472172351887, |
| "grad_norm": 0.283770352602005, |
| "learning_rate": 1.2654536863181328e-06, |
| "loss": 0.3499, |
| "step": 2204 |
| }, |
| { |
| "epoch": 2.3752244165170557, |
| "grad_norm": 0.3203579783439636, |
| "learning_rate": 1.2612871102378305e-06, |
| "loss": 0.3899, |
| "step": 2205 |
| }, |
| { |
| "epoch": 2.3763016157989227, |
| "grad_norm": 0.2789762020111084, |
| "learning_rate": 1.2571264145504125e-06, |
| "loss": 0.3474, |
| "step": 2206 |
| }, |
| { |
| "epoch": 2.3773788150807897, |
| "grad_norm": 0.27259716391563416, |
| "learning_rate": 1.2529716057999819e-06, |
| "loss": 0.3495, |
| "step": 2207 |
| }, |
| { |
| "epoch": 2.378456014362657, |
| "grad_norm": 0.30078408122062683, |
| "learning_rate": 1.248822690521383e-06, |
| "loss": 0.381, |
| "step": 2208 |
| }, |
| { |
| "epoch": 2.379533213644524, |
| "grad_norm": 0.29212328791618347, |
| "learning_rate": 1.2446796752401912e-06, |
| "loss": 0.4109, |
| "step": 2209 |
| }, |
| { |
| "epoch": 2.380610412926391, |
| "grad_norm": 0.26162827014923096, |
| "learning_rate": 1.2405425664727044e-06, |
| "loss": 0.3562, |
| "step": 2210 |
| }, |
| { |
| "epoch": 2.3816876122082586, |
| "grad_norm": 0.26936033368110657, |
| "learning_rate": 1.2364113707259251e-06, |
| "loss": 0.3897, |
| "step": 2211 |
| }, |
| { |
| "epoch": 2.3827648114901256, |
| "grad_norm": 0.29068368673324585, |
| "learning_rate": 1.2322860944975573e-06, |
| "loss": 0.3808, |
| "step": 2212 |
| }, |
| { |
| "epoch": 2.3838420107719926, |
| "grad_norm": 0.285015344619751, |
| "learning_rate": 1.2281667442759977e-06, |
| "loss": 0.3758, |
| "step": 2213 |
| }, |
| { |
| "epoch": 2.38491921005386, |
| "grad_norm": 0.2871081531047821, |
| "learning_rate": 1.22405332654032e-06, |
| "loss": 0.4035, |
| "step": 2214 |
| }, |
| { |
| "epoch": 2.385996409335727, |
| "grad_norm": 0.3040642738342285, |
| "learning_rate": 1.219945847760267e-06, |
| "loss": 0.4132, |
| "step": 2215 |
| }, |
| { |
| "epoch": 2.387073608617594, |
| "grad_norm": 0.26370567083358765, |
| "learning_rate": 1.2158443143962423e-06, |
| "loss": 0.3389, |
| "step": 2216 |
| }, |
| { |
| "epoch": 2.3881508078994615, |
| "grad_norm": 0.2896377146244049, |
| "learning_rate": 1.2117487328992954e-06, |
| "loss": 0.3861, |
| "step": 2217 |
| }, |
| { |
| "epoch": 2.3892280071813286, |
| "grad_norm": 0.2631506621837616, |
| "learning_rate": 1.2076591097111184e-06, |
| "loss": 0.3681, |
| "step": 2218 |
| }, |
| { |
| "epoch": 2.3903052064631956, |
| "grad_norm": 0.27425822615623474, |
| "learning_rate": 1.2035754512640263e-06, |
| "loss": 0.3973, |
| "step": 2219 |
| }, |
| { |
| "epoch": 2.391382405745063, |
| "grad_norm": 0.2854865491390228, |
| "learning_rate": 1.1994977639809575e-06, |
| "loss": 0.3818, |
| "step": 2220 |
| }, |
| { |
| "epoch": 2.39245960502693, |
| "grad_norm": 0.29627755284309387, |
| "learning_rate": 1.1954260542754575e-06, |
| "loss": 0.3397, |
| "step": 2221 |
| }, |
| { |
| "epoch": 2.393536804308797, |
| "grad_norm": 0.27232253551483154, |
| "learning_rate": 1.191360328551668e-06, |
| "loss": 0.388, |
| "step": 2222 |
| }, |
| { |
| "epoch": 2.3946140035906645, |
| "grad_norm": 0.26810529828071594, |
| "learning_rate": 1.1873005932043202e-06, |
| "loss": 0.3534, |
| "step": 2223 |
| }, |
| { |
| "epoch": 2.3956912028725315, |
| "grad_norm": 0.3001575171947479, |
| "learning_rate": 1.1832468546187248e-06, |
| "loss": 0.3726, |
| "step": 2224 |
| }, |
| { |
| "epoch": 2.3967684021543985, |
| "grad_norm": 0.28122803568840027, |
| "learning_rate": 1.179199119170759e-06, |
| "loss": 0.3838, |
| "step": 2225 |
| }, |
| { |
| "epoch": 2.3978456014362655, |
| "grad_norm": 0.2687775194644928, |
| "learning_rate": 1.175157393226859e-06, |
| "loss": 0.4249, |
| "step": 2226 |
| }, |
| { |
| "epoch": 2.398922800718133, |
| "grad_norm": 0.2854040861129761, |
| "learning_rate": 1.1711216831440086e-06, |
| "loss": 0.374, |
| "step": 2227 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.27205950021743774, |
| "learning_rate": 1.1670919952697267e-06, |
| "loss": 0.3657, |
| "step": 2228 |
| }, |
| { |
| "epoch": 2.4010771992818674, |
| "grad_norm": 0.2628454267978668, |
| "learning_rate": 1.1630683359420653e-06, |
| "loss": 0.3695, |
| "step": 2229 |
| }, |
| { |
| "epoch": 2.4021543985637344, |
| "grad_norm": 0.30919772386550903, |
| "learning_rate": 1.1590507114895915e-06, |
| "loss": 0.3898, |
| "step": 2230 |
| }, |
| { |
| "epoch": 2.4032315978456014, |
| "grad_norm": 0.28211405873298645, |
| "learning_rate": 1.1550391282313817e-06, |
| "loss": 0.358, |
| "step": 2231 |
| }, |
| { |
| "epoch": 2.4043087971274684, |
| "grad_norm": 0.27124226093292236, |
| "learning_rate": 1.1510335924770106e-06, |
| "loss": 0.3383, |
| "step": 2232 |
| }, |
| { |
| "epoch": 2.405385996409336, |
| "grad_norm": 0.26909205317497253, |
| "learning_rate": 1.1470341105265375e-06, |
| "loss": 0.3386, |
| "step": 2233 |
| }, |
| { |
| "epoch": 2.406463195691203, |
| "grad_norm": 0.31158143281936646, |
| "learning_rate": 1.1430406886705053e-06, |
| "loss": 0.3917, |
| "step": 2234 |
| }, |
| { |
| "epoch": 2.4075403949730703, |
| "grad_norm": 0.2712906002998352, |
| "learning_rate": 1.1390533331899235e-06, |
| "loss": 0.3817, |
| "step": 2235 |
| }, |
| { |
| "epoch": 2.4086175942549373, |
| "grad_norm": 0.27246353030204773, |
| "learning_rate": 1.1350720503562574e-06, |
| "loss": 0.3796, |
| "step": 2236 |
| }, |
| { |
| "epoch": 2.4096947935368043, |
| "grad_norm": 0.26785191893577576, |
| "learning_rate": 1.1310968464314249e-06, |
| "loss": 0.367, |
| "step": 2237 |
| }, |
| { |
| "epoch": 2.4107719928186713, |
| "grad_norm": 0.2866846024990082, |
| "learning_rate": 1.1271277276677805e-06, |
| "loss": 0.3888, |
| "step": 2238 |
| }, |
| { |
| "epoch": 2.4118491921005387, |
| "grad_norm": 0.2787204384803772, |
| "learning_rate": 1.1231647003081092e-06, |
| "loss": 0.3736, |
| "step": 2239 |
| }, |
| { |
| "epoch": 2.4129263913824057, |
| "grad_norm": 0.2773076295852661, |
| "learning_rate": 1.119207770585614e-06, |
| "loss": 0.3504, |
| "step": 2240 |
| }, |
| { |
| "epoch": 2.4140035906642727, |
| "grad_norm": 0.2891734540462494, |
| "learning_rate": 1.1152569447239076e-06, |
| "loss": 0.4098, |
| "step": 2241 |
| }, |
| { |
| "epoch": 2.41508078994614, |
| "grad_norm": 0.28226038813591003, |
| "learning_rate": 1.1113122289370037e-06, |
| "loss": 0.3683, |
| "step": 2242 |
| }, |
| { |
| "epoch": 2.416157989228007, |
| "grad_norm": 0.28407686948776245, |
| "learning_rate": 1.1073736294293035e-06, |
| "loss": 0.3708, |
| "step": 2243 |
| }, |
| { |
| "epoch": 2.417235188509874, |
| "grad_norm": 0.2887764871120453, |
| "learning_rate": 1.103441152395588e-06, |
| "loss": 0.3508, |
| "step": 2244 |
| }, |
| { |
| "epoch": 2.4183123877917416, |
| "grad_norm": 0.28351515531539917, |
| "learning_rate": 1.0995148040210108e-06, |
| "loss": 0.3838, |
| "step": 2245 |
| }, |
| { |
| "epoch": 2.4193895870736086, |
| "grad_norm": 0.28088366985321045, |
| "learning_rate": 1.0955945904810855e-06, |
| "loss": 0.3517, |
| "step": 2246 |
| }, |
| { |
| "epoch": 2.4204667863554756, |
| "grad_norm": 0.2925315201282501, |
| "learning_rate": 1.0916805179416762e-06, |
| "loss": 0.3881, |
| "step": 2247 |
| }, |
| { |
| "epoch": 2.421543985637343, |
| "grad_norm": 0.2957456409931183, |
| "learning_rate": 1.0877725925589883e-06, |
| "loss": 0.3794, |
| "step": 2248 |
| }, |
| { |
| "epoch": 2.42262118491921, |
| "grad_norm": 0.2947244942188263, |
| "learning_rate": 1.0838708204795584e-06, |
| "loss": 0.385, |
| "step": 2249 |
| }, |
| { |
| "epoch": 2.423698384201077, |
| "grad_norm": 0.29007092118263245, |
| "learning_rate": 1.079975207840247e-06, |
| "loss": 0.35, |
| "step": 2250 |
| }, |
| { |
| "epoch": 2.4247755834829445, |
| "grad_norm": 0.2962624132633209, |
| "learning_rate": 1.0760857607682218e-06, |
| "loss": 0.377, |
| "step": 2251 |
| }, |
| { |
| "epoch": 2.4258527827648115, |
| "grad_norm": 0.29235324263572693, |
| "learning_rate": 1.0722024853809576e-06, |
| "loss": 0.4015, |
| "step": 2252 |
| }, |
| { |
| "epoch": 2.4269299820466785, |
| "grad_norm": 0.2606956660747528, |
| "learning_rate": 1.0683253877862226e-06, |
| "loss": 0.3588, |
| "step": 2253 |
| }, |
| { |
| "epoch": 2.428007181328546, |
| "grad_norm": 0.26373934745788574, |
| "learning_rate": 1.064454474082064e-06, |
| "loss": 0.3628, |
| "step": 2254 |
| }, |
| { |
| "epoch": 2.429084380610413, |
| "grad_norm": 0.2741117775440216, |
| "learning_rate": 1.0605897503568058e-06, |
| "loss": 0.3531, |
| "step": 2255 |
| }, |
| { |
| "epoch": 2.43016157989228, |
| "grad_norm": 0.30139395594596863, |
| "learning_rate": 1.0567312226890365e-06, |
| "loss": 0.4021, |
| "step": 2256 |
| }, |
| { |
| "epoch": 2.431238779174147, |
| "grad_norm": 0.2639596462249756, |
| "learning_rate": 1.0528788971475973e-06, |
| "loss": 0.3396, |
| "step": 2257 |
| }, |
| { |
| "epoch": 2.4323159784560144, |
| "grad_norm": 0.2918408215045929, |
| "learning_rate": 1.0490327797915767e-06, |
| "loss": 0.3917, |
| "step": 2258 |
| }, |
| { |
| "epoch": 2.4333931777378814, |
| "grad_norm": 0.2750086486339569, |
| "learning_rate": 1.045192876670298e-06, |
| "loss": 0.375, |
| "step": 2259 |
| }, |
| { |
| "epoch": 2.434470377019749, |
| "grad_norm": 0.2692476511001587, |
| "learning_rate": 1.041359193823307e-06, |
| "loss": 0.3554, |
| "step": 2260 |
| }, |
| { |
| "epoch": 2.435547576301616, |
| "grad_norm": 0.263949990272522, |
| "learning_rate": 1.0375317372803711e-06, |
| "loss": 0.3842, |
| "step": 2261 |
| }, |
| { |
| "epoch": 2.436624775583483, |
| "grad_norm": 0.29045379161834717, |
| "learning_rate": 1.0337105130614627e-06, |
| "loss": 0.3828, |
| "step": 2262 |
| }, |
| { |
| "epoch": 2.43770197486535, |
| "grad_norm": 0.2991545498371124, |
| "learning_rate": 1.0298955271767513e-06, |
| "loss": 0.3536, |
| "step": 2263 |
| }, |
| { |
| "epoch": 2.4387791741472173, |
| "grad_norm": 0.3013278543949127, |
| "learning_rate": 1.0260867856265967e-06, |
| "loss": 0.4122, |
| "step": 2264 |
| }, |
| { |
| "epoch": 2.4398563734290843, |
| "grad_norm": 0.27581077814102173, |
| "learning_rate": 1.0222842944015327e-06, |
| "loss": 0.3984, |
| "step": 2265 |
| }, |
| { |
| "epoch": 2.4409335727109513, |
| "grad_norm": 0.26801231503486633, |
| "learning_rate": 1.0184880594822661e-06, |
| "loss": 0.3426, |
| "step": 2266 |
| }, |
| { |
| "epoch": 2.442010771992819, |
| "grad_norm": 0.3020249903202057, |
| "learning_rate": 1.0146980868396644e-06, |
| "loss": 0.3663, |
| "step": 2267 |
| }, |
| { |
| "epoch": 2.443087971274686, |
| "grad_norm": 0.2986231744289398, |
| "learning_rate": 1.0109143824347411e-06, |
| "loss": 0.3812, |
| "step": 2268 |
| }, |
| { |
| "epoch": 2.444165170556553, |
| "grad_norm": 0.2531813979148865, |
| "learning_rate": 1.0071369522186546e-06, |
| "loss": 0.3607, |
| "step": 2269 |
| }, |
| { |
| "epoch": 2.4452423698384202, |
| "grad_norm": 0.2960926294326782, |
| "learning_rate": 1.0033658021326947e-06, |
| "loss": 0.3886, |
| "step": 2270 |
| }, |
| { |
| "epoch": 2.4463195691202873, |
| "grad_norm": 0.27651941776275635, |
| "learning_rate": 9.996009381082717e-07, |
| "loss": 0.3885, |
| "step": 2271 |
| }, |
| { |
| "epoch": 2.4473967684021543, |
| "grad_norm": 0.28147345781326294, |
| "learning_rate": 9.95842366066911e-07, |
| "loss": 0.3872, |
| "step": 2272 |
| }, |
| { |
| "epoch": 2.4484739676840217, |
| "grad_norm": 0.2894003987312317, |
| "learning_rate": 9.920900919202398e-07, |
| "loss": 0.3668, |
| "step": 2273 |
| }, |
| { |
| "epoch": 2.4495511669658887, |
| "grad_norm": 0.29143601655960083, |
| "learning_rate": 9.883441215699824e-07, |
| "loss": 0.3959, |
| "step": 2274 |
| }, |
| { |
| "epoch": 2.4506283662477557, |
| "grad_norm": 0.27094566822052, |
| "learning_rate": 9.846044609079454e-07, |
| "loss": 0.3823, |
| "step": 2275 |
| }, |
| { |
| "epoch": 2.451705565529623, |
| "grad_norm": 0.2588837146759033, |
| "learning_rate": 9.808711158160105e-07, |
| "loss": 0.3565, |
| "step": 2276 |
| }, |
| { |
| "epoch": 2.45278276481149, |
| "grad_norm": 0.2719583809375763, |
| "learning_rate": 9.7714409216613e-07, |
| "loss": 0.4037, |
| "step": 2277 |
| }, |
| { |
| "epoch": 2.453859964093357, |
| "grad_norm": 0.30284786224365234, |
| "learning_rate": 9.734233958203109e-07, |
| "loss": 0.3905, |
| "step": 2278 |
| }, |
| { |
| "epoch": 2.4549371633752246, |
| "grad_norm": 0.27402400970458984, |
| "learning_rate": 9.697090326306096e-07, |
| "loss": 0.3302, |
| "step": 2279 |
| }, |
| { |
| "epoch": 2.4560143626570916, |
| "grad_norm": 0.2703023850917816, |
| "learning_rate": 9.660010084391197e-07, |
| "loss": 0.3708, |
| "step": 2280 |
| }, |
| { |
| "epoch": 2.4570915619389586, |
| "grad_norm": 0.2773868143558502, |
| "learning_rate": 9.622993290779665e-07, |
| "loss": 0.3925, |
| "step": 2281 |
| }, |
| { |
| "epoch": 2.4581687612208256, |
| "grad_norm": 0.28041738271713257, |
| "learning_rate": 9.586040003692965e-07, |
| "loss": 0.3696, |
| "step": 2282 |
| }, |
| { |
| "epoch": 2.459245960502693, |
| "grad_norm": 0.28617560863494873, |
| "learning_rate": 9.549150281252633e-07, |
| "loss": 0.3695, |
| "step": 2283 |
| }, |
| { |
| "epoch": 2.46032315978456, |
| "grad_norm": 0.272877037525177, |
| "learning_rate": 9.51232418148027e-07, |
| "loss": 0.3525, |
| "step": 2284 |
| }, |
| { |
| "epoch": 2.4614003590664275, |
| "grad_norm": 0.2908807396888733, |
| "learning_rate": 9.475561762297414e-07, |
| "loss": 0.3659, |
| "step": 2285 |
| }, |
| { |
| "epoch": 2.4624775583482945, |
| "grad_norm": 0.30039674043655396, |
| "learning_rate": 9.438863081525396e-07, |
| "loss": 0.3795, |
| "step": 2286 |
| }, |
| { |
| "epoch": 2.4635547576301615, |
| "grad_norm": 0.2751771807670593, |
| "learning_rate": 9.402228196885343e-07, |
| "loss": 0.3942, |
| "step": 2287 |
| }, |
| { |
| "epoch": 2.4646319569120285, |
| "grad_norm": 0.27453452348709106, |
| "learning_rate": 9.365657165998021e-07, |
| "loss": 0.3745, |
| "step": 2288 |
| }, |
| { |
| "epoch": 2.465709156193896, |
| "grad_norm": 0.2895946502685547, |
| "learning_rate": 9.329150046383773e-07, |
| "loss": 0.3879, |
| "step": 2289 |
| }, |
| { |
| "epoch": 2.466786355475763, |
| "grad_norm": 0.2859017550945282, |
| "learning_rate": 9.292706895462411e-07, |
| "loss": 0.3745, |
| "step": 2290 |
| }, |
| { |
| "epoch": 2.46786355475763, |
| "grad_norm": 0.28885552287101746, |
| "learning_rate": 9.256327770553152e-07, |
| "loss": 0.3641, |
| "step": 2291 |
| }, |
| { |
| "epoch": 2.4689407540394974, |
| "grad_norm": 0.29805275797843933, |
| "learning_rate": 9.220012728874472e-07, |
| "loss": 0.3989, |
| "step": 2292 |
| }, |
| { |
| "epoch": 2.4700179533213644, |
| "grad_norm": 0.26521188020706177, |
| "learning_rate": 9.183761827544096e-07, |
| "loss": 0.3662, |
| "step": 2293 |
| }, |
| { |
| "epoch": 2.4710951526032314, |
| "grad_norm": 0.27879083156585693, |
| "learning_rate": 9.147575123578845e-07, |
| "loss": 0.3732, |
| "step": 2294 |
| }, |
| { |
| "epoch": 2.472172351885099, |
| "grad_norm": 0.2808539569377899, |
| "learning_rate": 9.111452673894589e-07, |
| "loss": 0.3924, |
| "step": 2295 |
| }, |
| { |
| "epoch": 2.473249551166966, |
| "grad_norm": 0.28251829743385315, |
| "learning_rate": 9.075394535306087e-07, |
| "loss": 0.3991, |
| "step": 2296 |
| }, |
| { |
| "epoch": 2.474326750448833, |
| "grad_norm": 0.28753092885017395, |
| "learning_rate": 9.039400764527001e-07, |
| "loss": 0.3656, |
| "step": 2297 |
| }, |
| { |
| "epoch": 2.4754039497307003, |
| "grad_norm": 0.2704468369483948, |
| "learning_rate": 9.003471418169734e-07, |
| "loss": 0.3321, |
| "step": 2298 |
| }, |
| { |
| "epoch": 2.4764811490125673, |
| "grad_norm": 0.28937360644340515, |
| "learning_rate": 8.967606552745361e-07, |
| "loss": 0.3983, |
| "step": 2299 |
| }, |
| { |
| "epoch": 2.4775583482944343, |
| "grad_norm": 0.28396716713905334, |
| "learning_rate": 8.93180622466352e-07, |
| "loss": 0.4039, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.478635547576302, |
| "grad_norm": 0.26640549302101135, |
| "learning_rate": 8.896070490232361e-07, |
| "loss": 0.3373, |
| "step": 2301 |
| }, |
| { |
| "epoch": 2.479712746858169, |
| "grad_norm": 0.298631489276886, |
| "learning_rate": 8.860399405658443e-07, |
| "loss": 0.3641, |
| "step": 2302 |
| }, |
| { |
| "epoch": 2.480789946140036, |
| "grad_norm": 0.29027625918388367, |
| "learning_rate": 8.824793027046636e-07, |
| "loss": 0.3973, |
| "step": 2303 |
| }, |
| { |
| "epoch": 2.4818671454219032, |
| "grad_norm": 0.27970612049102783, |
| "learning_rate": 8.789251410400024e-07, |
| "loss": 0.3939, |
| "step": 2304 |
| }, |
| { |
| "epoch": 2.4829443447037702, |
| "grad_norm": 0.26370441913604736, |
| "learning_rate": 8.753774611619853e-07, |
| "loss": 0.3535, |
| "step": 2305 |
| }, |
| { |
| "epoch": 2.4840215439856372, |
| "grad_norm": 0.2663581669330597, |
| "learning_rate": 8.718362686505422e-07, |
| "loss": 0.3637, |
| "step": 2306 |
| }, |
| { |
| "epoch": 2.4850987432675042, |
| "grad_norm": 0.2874703109264374, |
| "learning_rate": 8.68301569075396e-07, |
| "loss": 0.3948, |
| "step": 2307 |
| }, |
| { |
| "epoch": 2.4861759425493717, |
| "grad_norm": 0.263696551322937, |
| "learning_rate": 8.647733679960596e-07, |
| "loss": 0.3467, |
| "step": 2308 |
| }, |
| { |
| "epoch": 2.4872531418312387, |
| "grad_norm": 0.28646519780158997, |
| "learning_rate": 8.612516709618251e-07, |
| "loss": 0.4307, |
| "step": 2309 |
| }, |
| { |
| "epoch": 2.488330341113106, |
| "grad_norm": 0.24721655249595642, |
| "learning_rate": 8.577364835117552e-07, |
| "loss": 0.3457, |
| "step": 2310 |
| }, |
| { |
| "epoch": 2.489407540394973, |
| "grad_norm": 0.2845896780490875, |
| "learning_rate": 8.542278111746722e-07, |
| "loss": 0.373, |
| "step": 2311 |
| }, |
| { |
| "epoch": 2.49048473967684, |
| "grad_norm": 0.2640857696533203, |
| "learning_rate": 8.507256594691532e-07, |
| "loss": 0.3591, |
| "step": 2312 |
| }, |
| { |
| "epoch": 2.491561938958707, |
| "grad_norm": 0.28080788254737854, |
| "learning_rate": 8.472300339035178e-07, |
| "loss": 0.387, |
| "step": 2313 |
| }, |
| { |
| "epoch": 2.4926391382405746, |
| "grad_norm": 0.27218419313430786, |
| "learning_rate": 8.437409399758234e-07, |
| "loss": 0.3759, |
| "step": 2314 |
| }, |
| { |
| "epoch": 2.4937163375224416, |
| "grad_norm": 0.2938312292098999, |
| "learning_rate": 8.402583831738504e-07, |
| "loss": 0.3598, |
| "step": 2315 |
| }, |
| { |
| "epoch": 2.494793536804309, |
| "grad_norm": 0.28441736102104187, |
| "learning_rate": 8.367823689751009e-07, |
| "loss": 0.3511, |
| "step": 2316 |
| }, |
| { |
| "epoch": 2.495870736086176, |
| "grad_norm": 0.2931486964225769, |
| "learning_rate": 8.333129028467829e-07, |
| "loss": 0.4074, |
| "step": 2317 |
| }, |
| { |
| "epoch": 2.496947935368043, |
| "grad_norm": 0.2538676857948303, |
| "learning_rate": 8.29849990245809e-07, |
| "loss": 0.3539, |
| "step": 2318 |
| }, |
| { |
| "epoch": 2.49802513464991, |
| "grad_norm": 0.27099522948265076, |
| "learning_rate": 8.263936366187825e-07, |
| "loss": 0.4047, |
| "step": 2319 |
| }, |
| { |
| "epoch": 2.4991023339317775, |
| "grad_norm": 0.2580559551715851, |
| "learning_rate": 8.229438474019913e-07, |
| "loss": 0.3774, |
| "step": 2320 |
| }, |
| { |
| "epoch": 2.5001795332136445, |
| "grad_norm": 0.2654803395271301, |
| "learning_rate": 8.195006280213969e-07, |
| "loss": 0.3555, |
| "step": 2321 |
| }, |
| { |
| "epoch": 2.5012567324955115, |
| "grad_norm": 0.2501416504383087, |
| "learning_rate": 8.160639838926293e-07, |
| "loss": 0.3478, |
| "step": 2322 |
| }, |
| { |
| "epoch": 2.502333931777379, |
| "grad_norm": 0.3007952570915222, |
| "learning_rate": 8.126339204209765e-07, |
| "loss": 0.4177, |
| "step": 2323 |
| }, |
| { |
| "epoch": 2.503411131059246, |
| "grad_norm": 0.28114473819732666, |
| "learning_rate": 8.092104430013737e-07, |
| "loss": 0.3829, |
| "step": 2324 |
| }, |
| { |
| "epoch": 2.504488330341113, |
| "grad_norm": 0.2905210852622986, |
| "learning_rate": 8.057935570184e-07, |
| "loss": 0.384, |
| "step": 2325 |
| }, |
| { |
| "epoch": 2.5055655296229804, |
| "grad_norm": 0.2917923033237457, |
| "learning_rate": 8.023832678462667e-07, |
| "loss": 0.3722, |
| "step": 2326 |
| }, |
| { |
| "epoch": 2.5066427289048474, |
| "grad_norm": 0.27999940514564514, |
| "learning_rate": 7.989795808488098e-07, |
| "loss": 0.3433, |
| "step": 2327 |
| }, |
| { |
| "epoch": 2.5077199281867144, |
| "grad_norm": 0.2836602032184601, |
| "learning_rate": 7.955825013794793e-07, |
| "loss": 0.3905, |
| "step": 2328 |
| }, |
| { |
| "epoch": 2.508797127468582, |
| "grad_norm": 0.27342960238456726, |
| "learning_rate": 7.921920347813333e-07, |
| "loss": 0.3556, |
| "step": 2329 |
| }, |
| { |
| "epoch": 2.509874326750449, |
| "grad_norm": 0.2859393060207367, |
| "learning_rate": 7.888081863870307e-07, |
| "loss": 0.3864, |
| "step": 2330 |
| }, |
| { |
| "epoch": 2.510951526032316, |
| "grad_norm": 0.2840021550655365, |
| "learning_rate": 7.8543096151882e-07, |
| "loss": 0.3682, |
| "step": 2331 |
| }, |
| { |
| "epoch": 2.512028725314183, |
| "grad_norm": 0.2775287330150604, |
| "learning_rate": 7.820603654885301e-07, |
| "loss": 0.3865, |
| "step": 2332 |
| }, |
| { |
| "epoch": 2.5131059245960503, |
| "grad_norm": 0.28231337666511536, |
| "learning_rate": 7.786964035975658e-07, |
| "loss": 0.3742, |
| "step": 2333 |
| }, |
| { |
| "epoch": 2.5141831238779173, |
| "grad_norm": 0.2635897696018219, |
| "learning_rate": 7.753390811368972e-07, |
| "loss": 0.3271, |
| "step": 2334 |
| }, |
| { |
| "epoch": 2.5152603231597848, |
| "grad_norm": 0.28646957874298096, |
| "learning_rate": 7.719884033870523e-07, |
| "loss": 0.4104, |
| "step": 2335 |
| }, |
| { |
| "epoch": 2.5163375224416518, |
| "grad_norm": 0.27592933177948, |
| "learning_rate": 7.686443756181067e-07, |
| "loss": 0.3701, |
| "step": 2336 |
| }, |
| { |
| "epoch": 2.5174147217235188, |
| "grad_norm": 0.30637457966804504, |
| "learning_rate": 7.653070030896775e-07, |
| "loss": 0.3996, |
| "step": 2337 |
| }, |
| { |
| "epoch": 2.5184919210053858, |
| "grad_norm": 0.28058868646621704, |
| "learning_rate": 7.619762910509132e-07, |
| "loss": 0.356, |
| "step": 2338 |
| }, |
| { |
| "epoch": 2.519569120287253, |
| "grad_norm": 0.2630109488964081, |
| "learning_rate": 7.586522447404882e-07, |
| "loss": 0.3661, |
| "step": 2339 |
| }, |
| { |
| "epoch": 2.52064631956912, |
| "grad_norm": 0.29673612117767334, |
| "learning_rate": 7.553348693865897e-07, |
| "loss": 0.3669, |
| "step": 2340 |
| }, |
| { |
| "epoch": 2.5217235188509877, |
| "grad_norm": 0.2780836224555969, |
| "learning_rate": 7.520241702069158e-07, |
| "loss": 0.3861, |
| "step": 2341 |
| }, |
| { |
| "epoch": 2.5228007181328547, |
| "grad_norm": 0.26055651903152466, |
| "learning_rate": 7.487201524086629e-07, |
| "loss": 0.3731, |
| "step": 2342 |
| }, |
| { |
| "epoch": 2.5238779174147217, |
| "grad_norm": 0.26952463388442993, |
| "learning_rate": 7.454228211885184e-07, |
| "loss": 0.3758, |
| "step": 2343 |
| }, |
| { |
| "epoch": 2.5249551166965887, |
| "grad_norm": 0.2588259279727936, |
| "learning_rate": 7.421321817326527e-07, |
| "loss": 0.363, |
| "step": 2344 |
| }, |
| { |
| "epoch": 2.526032315978456, |
| "grad_norm": 0.29151490330696106, |
| "learning_rate": 7.388482392167118e-07, |
| "loss": 0.3748, |
| "step": 2345 |
| }, |
| { |
| "epoch": 2.527109515260323, |
| "grad_norm": 0.28330057859420776, |
| "learning_rate": 7.355709988058091e-07, |
| "loss": 0.3261, |
| "step": 2346 |
| }, |
| { |
| "epoch": 2.5281867145421906, |
| "grad_norm": 0.28915736079216003, |
| "learning_rate": 7.32300465654513e-07, |
| "loss": 0.4078, |
| "step": 2347 |
| }, |
| { |
| "epoch": 2.5292639138240576, |
| "grad_norm": 0.2703227400779724, |
| "learning_rate": 7.290366449068482e-07, |
| "loss": 0.3573, |
| "step": 2348 |
| }, |
| { |
| "epoch": 2.5303411131059246, |
| "grad_norm": 0.2650614380836487, |
| "learning_rate": 7.257795416962754e-07, |
| "loss": 0.3383, |
| "step": 2349 |
| }, |
| { |
| "epoch": 2.5314183123877916, |
| "grad_norm": 0.29982319474220276, |
| "learning_rate": 7.225291611456947e-07, |
| "loss": 0.4243, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.532495511669659, |
| "grad_norm": 0.2780900299549103, |
| "learning_rate": 7.19285508367431e-07, |
| "loss": 0.3339, |
| "step": 2351 |
| }, |
| { |
| "epoch": 2.533572710951526, |
| "grad_norm": 0.2761061489582062, |
| "learning_rate": 7.160485884632279e-07, |
| "loss": 0.3345, |
| "step": 2352 |
| }, |
| { |
| "epoch": 2.534649910233393, |
| "grad_norm": 0.30426251888275146, |
| "learning_rate": 7.128184065242377e-07, |
| "loss": 0.4213, |
| "step": 2353 |
| }, |
| { |
| "epoch": 2.5357271095152605, |
| "grad_norm": 0.26618361473083496, |
| "learning_rate": 7.095949676310171e-07, |
| "loss": 0.3556, |
| "step": 2354 |
| }, |
| { |
| "epoch": 2.5368043087971275, |
| "grad_norm": 0.2708311378955841, |
| "learning_rate": 7.06378276853516e-07, |
| "loss": 0.3681, |
| "step": 2355 |
| }, |
| { |
| "epoch": 2.5378815080789945, |
| "grad_norm": 0.27327191829681396, |
| "learning_rate": 7.031683392510696e-07, |
| "loss": 0.3747, |
| "step": 2356 |
| }, |
| { |
| "epoch": 2.5389587073608615, |
| "grad_norm": 0.2743578553199768, |
| "learning_rate": 6.999651598723928e-07, |
| "loss": 0.3714, |
| "step": 2357 |
| }, |
| { |
| "epoch": 2.540035906642729, |
| "grad_norm": 0.27341485023498535, |
| "learning_rate": 6.96768743755572e-07, |
| "loss": 0.3602, |
| "step": 2358 |
| }, |
| { |
| "epoch": 2.541113105924596, |
| "grad_norm": 0.30134743452072144, |
| "learning_rate": 6.935790959280525e-07, |
| "loss": 0.4088, |
| "step": 2359 |
| }, |
| { |
| "epoch": 2.5421903052064634, |
| "grad_norm": 0.28071579337120056, |
| "learning_rate": 6.903962214066367e-07, |
| "loss": 0.366, |
| "step": 2360 |
| }, |
| { |
| "epoch": 2.5432675044883304, |
| "grad_norm": 0.28438490629196167, |
| "learning_rate": 6.872201251974747e-07, |
| "loss": 0.3826, |
| "step": 2361 |
| }, |
| { |
| "epoch": 2.5443447037701974, |
| "grad_norm": 0.2743411660194397, |
| "learning_rate": 6.840508122960526e-07, |
| "loss": 0.3389, |
| "step": 2362 |
| }, |
| { |
| "epoch": 2.5454219030520644, |
| "grad_norm": 0.2804628908634186, |
| "learning_rate": 6.808882876871908e-07, |
| "loss": 0.3996, |
| "step": 2363 |
| }, |
| { |
| "epoch": 2.546499102333932, |
| "grad_norm": 0.27134260535240173, |
| "learning_rate": 6.777325563450282e-07, |
| "loss": 0.3802, |
| "step": 2364 |
| }, |
| { |
| "epoch": 2.547576301615799, |
| "grad_norm": 0.2600853741168976, |
| "learning_rate": 6.745836232330227e-07, |
| "loss": 0.352, |
| "step": 2365 |
| }, |
| { |
| "epoch": 2.5486535008976663, |
| "grad_norm": 0.271705687046051, |
| "learning_rate": 6.714414933039398e-07, |
| "loss": 0.3787, |
| "step": 2366 |
| }, |
| { |
| "epoch": 2.5497307001795333, |
| "grad_norm": 0.27931052446365356, |
| "learning_rate": 6.683061714998418e-07, |
| "loss": 0.3745, |
| "step": 2367 |
| }, |
| { |
| "epoch": 2.5508078994614003, |
| "grad_norm": 0.2835497558116913, |
| "learning_rate": 6.651776627520856e-07, |
| "loss": 0.336, |
| "step": 2368 |
| }, |
| { |
| "epoch": 2.5518850987432673, |
| "grad_norm": 0.3035619854927063, |
| "learning_rate": 6.62055971981313e-07, |
| "loss": 0.3856, |
| "step": 2369 |
| }, |
| { |
| "epoch": 2.5529622980251347, |
| "grad_norm": 0.27398329973220825, |
| "learning_rate": 6.589411040974369e-07, |
| "loss": 0.3663, |
| "step": 2370 |
| }, |
| { |
| "epoch": 2.5540394973070017, |
| "grad_norm": 0.28608840703964233, |
| "learning_rate": 6.558330639996457e-07, |
| "loss": 0.3688, |
| "step": 2371 |
| }, |
| { |
| "epoch": 2.555116696588869, |
| "grad_norm": 0.30372244119644165, |
| "learning_rate": 6.527318565763829e-07, |
| "loss": 0.3981, |
| "step": 2372 |
| }, |
| { |
| "epoch": 2.556193895870736, |
| "grad_norm": 0.27598169445991516, |
| "learning_rate": 6.496374867053496e-07, |
| "loss": 0.3459, |
| "step": 2373 |
| }, |
| { |
| "epoch": 2.557271095152603, |
| "grad_norm": 0.28201863169670105, |
| "learning_rate": 6.465499592534902e-07, |
| "loss": 0.3984, |
| "step": 2374 |
| }, |
| { |
| "epoch": 2.55834829443447, |
| "grad_norm": 0.27516603469848633, |
| "learning_rate": 6.434692790769886e-07, |
| "loss": 0.3742, |
| "step": 2375 |
| }, |
| { |
| "epoch": 2.5594254937163377, |
| "grad_norm": 0.2833200693130493, |
| "learning_rate": 6.403954510212585e-07, |
| "loss": 0.354, |
| "step": 2376 |
| }, |
| { |
| "epoch": 2.5605026929982047, |
| "grad_norm": 0.2715960741043091, |
| "learning_rate": 6.373284799209351e-07, |
| "loss": 0.3626, |
| "step": 2377 |
| }, |
| { |
| "epoch": 2.5615798922800717, |
| "grad_norm": 0.27197495102882385, |
| "learning_rate": 6.342683705998714e-07, |
| "loss": 0.3797, |
| "step": 2378 |
| }, |
| { |
| "epoch": 2.562657091561939, |
| "grad_norm": 0.2932741343975067, |
| "learning_rate": 6.312151278711237e-07, |
| "loss": 0.4351, |
| "step": 2379 |
| }, |
| { |
| "epoch": 2.563734290843806, |
| "grad_norm": 0.29426777362823486, |
| "learning_rate": 6.281687565369537e-07, |
| "loss": 0.3912, |
| "step": 2380 |
| }, |
| { |
| "epoch": 2.564811490125673, |
| "grad_norm": 0.309602826833725, |
| "learning_rate": 6.251292613888094e-07, |
| "loss": 0.4179, |
| "step": 2381 |
| }, |
| { |
| "epoch": 2.5658886894075406, |
| "grad_norm": 0.2364678680896759, |
| "learning_rate": 6.220966472073286e-07, |
| "loss": 0.324, |
| "step": 2382 |
| }, |
| { |
| "epoch": 2.5669658886894076, |
| "grad_norm": 0.2689834237098694, |
| "learning_rate": 6.190709187623245e-07, |
| "loss": 0.3866, |
| "step": 2383 |
| }, |
| { |
| "epoch": 2.5680430879712746, |
| "grad_norm": 0.27198678255081177, |
| "learning_rate": 6.160520808127807e-07, |
| "loss": 0.3846, |
| "step": 2384 |
| }, |
| { |
| "epoch": 2.569120287253142, |
| "grad_norm": 0.2812858521938324, |
| "learning_rate": 6.130401381068424e-07, |
| "loss": 0.3881, |
| "step": 2385 |
| }, |
| { |
| "epoch": 2.570197486535009, |
| "grad_norm": 0.2781459093093872, |
| "learning_rate": 6.100350953818102e-07, |
| "loss": 0.3525, |
| "step": 2386 |
| }, |
| { |
| "epoch": 2.571274685816876, |
| "grad_norm": 0.2592630088329315, |
| "learning_rate": 6.070369573641327e-07, |
| "loss": 0.3588, |
| "step": 2387 |
| }, |
| { |
| "epoch": 2.572351885098743, |
| "grad_norm": 0.25368958711624146, |
| "learning_rate": 6.040457287693963e-07, |
| "loss": 0.3677, |
| "step": 2388 |
| }, |
| { |
| "epoch": 2.5734290843806105, |
| "grad_norm": 0.2784026265144348, |
| "learning_rate": 6.010614143023231e-07, |
| "loss": 0.4123, |
| "step": 2389 |
| }, |
| { |
| "epoch": 2.5745062836624775, |
| "grad_norm": 0.2718239724636078, |
| "learning_rate": 5.980840186567582e-07, |
| "loss": 0.3505, |
| "step": 2390 |
| }, |
| { |
| "epoch": 2.575583482944345, |
| "grad_norm": 0.27137839794158936, |
| "learning_rate": 5.951135465156649e-07, |
| "loss": 0.3843, |
| "step": 2391 |
| }, |
| { |
| "epoch": 2.576660682226212, |
| "grad_norm": 0.28062236309051514, |
| "learning_rate": 5.921500025511174e-07, |
| "loss": 0.397, |
| "step": 2392 |
| }, |
| { |
| "epoch": 2.577737881508079, |
| "grad_norm": 0.2782232165336609, |
| "learning_rate": 5.89193391424292e-07, |
| "loss": 0.3403, |
| "step": 2393 |
| }, |
| { |
| "epoch": 2.578815080789946, |
| "grad_norm": 0.2708394527435303, |
| "learning_rate": 5.862437177854629e-07, |
| "loss": 0.3857, |
| "step": 2394 |
| }, |
| { |
| "epoch": 2.5798922800718134, |
| "grad_norm": 0.2777789533138275, |
| "learning_rate": 5.833009862739919e-07, |
| "loss": 0.3518, |
| "step": 2395 |
| }, |
| { |
| "epoch": 2.5809694793536804, |
| "grad_norm": 0.2794034481048584, |
| "learning_rate": 5.803652015183192e-07, |
| "loss": 0.4027, |
| "step": 2396 |
| }, |
| { |
| "epoch": 2.582046678635548, |
| "grad_norm": 0.27170926332473755, |
| "learning_rate": 5.774363681359624e-07, |
| "loss": 0.3731, |
| "step": 2397 |
| }, |
| { |
| "epoch": 2.583123877917415, |
| "grad_norm": 0.2742154896259308, |
| "learning_rate": 5.745144907335043e-07, |
| "loss": 0.3842, |
| "step": 2398 |
| }, |
| { |
| "epoch": 2.584201077199282, |
| "grad_norm": 0.2668771743774414, |
| "learning_rate": 5.715995739065877e-07, |
| "loss": 0.3445, |
| "step": 2399 |
| }, |
| { |
| "epoch": 2.585278276481149, |
| "grad_norm": 0.2665964961051941, |
| "learning_rate": 5.686916222399069e-07, |
| "loss": 0.3923, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.5863554757630163, |
| "grad_norm": 0.27437713742256165, |
| "learning_rate": 5.657906403072e-07, |
| "loss": 0.3829, |
| "step": 2401 |
| }, |
| { |
| "epoch": 2.5874326750448833, |
| "grad_norm": 0.25516101717948914, |
| "learning_rate": 5.628966326712453e-07, |
| "loss": 0.3505, |
| "step": 2402 |
| }, |
| { |
| "epoch": 2.5885098743267507, |
| "grad_norm": 0.2651563882827759, |
| "learning_rate": 5.60009603883851e-07, |
| "loss": 0.3806, |
| "step": 2403 |
| }, |
| { |
| "epoch": 2.5895870736086177, |
| "grad_norm": 0.2705548107624054, |
| "learning_rate": 5.571295584858466e-07, |
| "loss": 0.3707, |
| "step": 2404 |
| }, |
| { |
| "epoch": 2.5906642728904847, |
| "grad_norm": 0.2829221189022064, |
| "learning_rate": 5.542565010070799e-07, |
| "loss": 0.3625, |
| "step": 2405 |
| }, |
| { |
| "epoch": 2.5917414721723517, |
| "grad_norm": 0.27439743280410767, |
| "learning_rate": 5.513904359664074e-07, |
| "loss": 0.3762, |
| "step": 2406 |
| }, |
| { |
| "epoch": 2.592818671454219, |
| "grad_norm": 0.26863783597946167, |
| "learning_rate": 5.485313678716875e-07, |
| "loss": 0.3776, |
| "step": 2407 |
| }, |
| { |
| "epoch": 2.593895870736086, |
| "grad_norm": 0.26877492666244507, |
| "learning_rate": 5.456793012197736e-07, |
| "loss": 0.3579, |
| "step": 2408 |
| }, |
| { |
| "epoch": 2.594973070017953, |
| "grad_norm": 0.2610160708427429, |
| "learning_rate": 5.428342404965076e-07, |
| "loss": 0.3658, |
| "step": 2409 |
| }, |
| { |
| "epoch": 2.5960502692998206, |
| "grad_norm": 0.27619680762290955, |
| "learning_rate": 5.399961901767115e-07, |
| "loss": 0.4095, |
| "step": 2410 |
| }, |
| { |
| "epoch": 2.5971274685816876, |
| "grad_norm": 0.2557905912399292, |
| "learning_rate": 5.371651547241802e-07, |
| "loss": 0.344, |
| "step": 2411 |
| }, |
| { |
| "epoch": 2.5982046678635546, |
| "grad_norm": 0.2994793951511383, |
| "learning_rate": 5.343411385916769e-07, |
| "loss": 0.4031, |
| "step": 2412 |
| }, |
| { |
| "epoch": 2.5992818671454216, |
| "grad_norm": 0.2592993676662445, |
| "learning_rate": 5.315241462209231e-07, |
| "loss": 0.3794, |
| "step": 2413 |
| }, |
| { |
| "epoch": 2.600359066427289, |
| "grad_norm": 0.2614355683326721, |
| "learning_rate": 5.287141820425945e-07, |
| "loss": 0.3978, |
| "step": 2414 |
| }, |
| { |
| "epoch": 2.601436265709156, |
| "grad_norm": 0.28487980365753174, |
| "learning_rate": 5.259112504763115e-07, |
| "loss": 0.3626, |
| "step": 2415 |
| }, |
| { |
| "epoch": 2.6025134649910235, |
| "grad_norm": 0.2808648645877838, |
| "learning_rate": 5.23115355930634e-07, |
| "loss": 0.3786, |
| "step": 2416 |
| }, |
| { |
| "epoch": 2.6035906642728905, |
| "grad_norm": 0.29835447669029236, |
| "learning_rate": 5.203265028030541e-07, |
| "loss": 0.3756, |
| "step": 2417 |
| }, |
| { |
| "epoch": 2.6046678635547575, |
| "grad_norm": 0.28749880194664, |
| "learning_rate": 5.175446954799874e-07, |
| "loss": 0.3932, |
| "step": 2418 |
| }, |
| { |
| "epoch": 2.6057450628366245, |
| "grad_norm": 0.27930009365081787, |
| "learning_rate": 5.147699383367705e-07, |
| "loss": 0.3375, |
| "step": 2419 |
| }, |
| { |
| "epoch": 2.606822262118492, |
| "grad_norm": 0.2683153450489044, |
| "learning_rate": 5.120022357376464e-07, |
| "loss": 0.3645, |
| "step": 2420 |
| }, |
| { |
| "epoch": 2.607899461400359, |
| "grad_norm": 0.28572866320610046, |
| "learning_rate": 5.092415920357674e-07, |
| "loss": 0.3883, |
| "step": 2421 |
| }, |
| { |
| "epoch": 2.6089766606822264, |
| "grad_norm": 0.27519190311431885, |
| "learning_rate": 5.064880115731796e-07, |
| "loss": 0.3894, |
| "step": 2422 |
| }, |
| { |
| "epoch": 2.6100538599640934, |
| "grad_norm": 0.2632756233215332, |
| "learning_rate": 5.03741498680822e-07, |
| "loss": 0.382, |
| "step": 2423 |
| }, |
| { |
| "epoch": 2.6111310592459605, |
| "grad_norm": 0.24517042934894562, |
| "learning_rate": 5.010020576785174e-07, |
| "loss": 0.3316, |
| "step": 2424 |
| }, |
| { |
| "epoch": 2.6122082585278275, |
| "grad_norm": 0.3185754418373108, |
| "learning_rate": 4.982696928749642e-07, |
| "loss": 0.4429, |
| "step": 2425 |
| }, |
| { |
| "epoch": 2.613285457809695, |
| "grad_norm": 0.2543036937713623, |
| "learning_rate": 4.955444085677319e-07, |
| "loss": 0.3481, |
| "step": 2426 |
| }, |
| { |
| "epoch": 2.614362657091562, |
| "grad_norm": 0.27936050295829773, |
| "learning_rate": 4.928262090432556e-07, |
| "loss": 0.3887, |
| "step": 2427 |
| }, |
| { |
| "epoch": 2.6154398563734294, |
| "grad_norm": 0.28514647483825684, |
| "learning_rate": 4.901150985768216e-07, |
| "loss": 0.3859, |
| "step": 2428 |
| }, |
| { |
| "epoch": 2.6165170556552964, |
| "grad_norm": 0.24403955042362213, |
| "learning_rate": 4.874110814325723e-07, |
| "loss": 0.3109, |
| "step": 2429 |
| }, |
| { |
| "epoch": 2.6175942549371634, |
| "grad_norm": 0.2888749837875366, |
| "learning_rate": 4.847141618634899e-07, |
| "loss": 0.3842, |
| "step": 2430 |
| }, |
| { |
| "epoch": 2.6186714542190304, |
| "grad_norm": 0.2979486882686615, |
| "learning_rate": 4.820243441113942e-07, |
| "loss": 0.4229, |
| "step": 2431 |
| }, |
| { |
| "epoch": 2.619748653500898, |
| "grad_norm": 0.25003379583358765, |
| "learning_rate": 4.793416324069372e-07, |
| "loss": 0.3338, |
| "step": 2432 |
| }, |
| { |
| "epoch": 2.620825852782765, |
| "grad_norm": 0.2611185908317566, |
| "learning_rate": 4.7666603096958865e-07, |
| "loss": 0.3617, |
| "step": 2433 |
| }, |
| { |
| "epoch": 2.621903052064632, |
| "grad_norm": 0.25409772992134094, |
| "learning_rate": 4.739975440076405e-07, |
| "loss": 0.3648, |
| "step": 2434 |
| }, |
| { |
| "epoch": 2.6229802513464993, |
| "grad_norm": 0.25264662504196167, |
| "learning_rate": 4.713361757181917e-07, |
| "loss": 0.3572, |
| "step": 2435 |
| }, |
| { |
| "epoch": 2.6240574506283663, |
| "grad_norm": 0.2889323830604553, |
| "learning_rate": 4.6868193028714814e-07, |
| "loss": 0.3884, |
| "step": 2436 |
| }, |
| { |
| "epoch": 2.6251346499102333, |
| "grad_norm": 0.27489691972732544, |
| "learning_rate": 4.6603481188920664e-07, |
| "loss": 0.39, |
| "step": 2437 |
| }, |
| { |
| "epoch": 2.6262118491921003, |
| "grad_norm": 0.27223941683769226, |
| "learning_rate": 4.6339482468786e-07, |
| "loss": 0.3569, |
| "step": 2438 |
| }, |
| { |
| "epoch": 2.6272890484739677, |
| "grad_norm": 0.2794973850250244, |
| "learning_rate": 4.607619728353818e-07, |
| "loss": 0.3976, |
| "step": 2439 |
| }, |
| { |
| "epoch": 2.6283662477558347, |
| "grad_norm": 0.2633703351020813, |
| "learning_rate": 4.581362604728246e-07, |
| "loss": 0.3686, |
| "step": 2440 |
| }, |
| { |
| "epoch": 2.629443447037702, |
| "grad_norm": 0.26582613587379456, |
| "learning_rate": 4.5551769173001024e-07, |
| "loss": 0.3471, |
| "step": 2441 |
| }, |
| { |
| "epoch": 2.630520646319569, |
| "grad_norm": 0.27101296186447144, |
| "learning_rate": 4.529062707255261e-07, |
| "loss": 0.3827, |
| "step": 2442 |
| }, |
| { |
| "epoch": 2.631597845601436, |
| "grad_norm": 0.28216707706451416, |
| "learning_rate": 4.5030200156671534e-07, |
| "loss": 0.3713, |
| "step": 2443 |
| }, |
| { |
| "epoch": 2.632675044883303, |
| "grad_norm": 0.2950674891471863, |
| "learning_rate": 4.4770488834967486e-07, |
| "loss": 0.3577, |
| "step": 2444 |
| }, |
| { |
| "epoch": 2.6337522441651706, |
| "grad_norm": 0.2715361714363098, |
| "learning_rate": 4.4511493515924373e-07, |
| "loss": 0.3805, |
| "step": 2445 |
| }, |
| { |
| "epoch": 2.6348294434470376, |
| "grad_norm": 0.27372029423713684, |
| "learning_rate": 4.425321460690024e-07, |
| "loss": 0.3833, |
| "step": 2446 |
| }, |
| { |
| "epoch": 2.635906642728905, |
| "grad_norm": 0.2518730163574219, |
| "learning_rate": 4.3995652514126077e-07, |
| "loss": 0.3553, |
| "step": 2447 |
| }, |
| { |
| "epoch": 2.636983842010772, |
| "grad_norm": 0.2758457064628601, |
| "learning_rate": 4.3738807642705663e-07, |
| "loss": 0.3737, |
| "step": 2448 |
| }, |
| { |
| "epoch": 2.638061041292639, |
| "grad_norm": 0.2756761908531189, |
| "learning_rate": 4.348268039661452e-07, |
| "loss": 0.3953, |
| "step": 2449 |
| }, |
| { |
| "epoch": 2.639138240574506, |
| "grad_norm": 0.26307833194732666, |
| "learning_rate": 4.322727117869951e-07, |
| "loss": 0.3816, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.6402154398563735, |
| "grad_norm": 0.2856947183609009, |
| "learning_rate": 4.2972580390678307e-07, |
| "loss": 0.3782, |
| "step": 2451 |
| }, |
| { |
| "epoch": 2.6412926391382405, |
| "grad_norm": 0.28101542592048645, |
| "learning_rate": 4.271860843313835e-07, |
| "loss": 0.3745, |
| "step": 2452 |
| }, |
| { |
| "epoch": 2.642369838420108, |
| "grad_norm": 0.26329493522644043, |
| "learning_rate": 4.246535570553667e-07, |
| "loss": 0.3696, |
| "step": 2453 |
| }, |
| { |
| "epoch": 2.643447037701975, |
| "grad_norm": 0.25769349932670593, |
| "learning_rate": 4.221282260619891e-07, |
| "loss": 0.3783, |
| "step": 2454 |
| }, |
| { |
| "epoch": 2.644524236983842, |
| "grad_norm": 0.26422208547592163, |
| "learning_rate": 4.196100953231896e-07, |
| "loss": 0.3716, |
| "step": 2455 |
| }, |
| { |
| "epoch": 2.645601436265709, |
| "grad_norm": 0.25533580780029297, |
| "learning_rate": 4.1709916879958237e-07, |
| "loss": 0.3562, |
| "step": 2456 |
| }, |
| { |
| "epoch": 2.6466786355475764, |
| "grad_norm": 0.2956748902797699, |
| "learning_rate": 4.145954504404498e-07, |
| "loss": 0.4045, |
| "step": 2457 |
| }, |
| { |
| "epoch": 2.6477558348294434, |
| "grad_norm": 0.2733031213283539, |
| "learning_rate": 4.120989441837381e-07, |
| "loss": 0.3681, |
| "step": 2458 |
| }, |
| { |
| "epoch": 2.6488330341113104, |
| "grad_norm": 0.2781358063220978, |
| "learning_rate": 4.0960965395605015e-07, |
| "loss": 0.365, |
| "step": 2459 |
| }, |
| { |
| "epoch": 2.649910233393178, |
| "grad_norm": 0.2737163007259369, |
| "learning_rate": 4.0712758367263573e-07, |
| "loss": 0.3497, |
| "step": 2460 |
| }, |
| { |
| "epoch": 2.650987432675045, |
| "grad_norm": 0.28244391083717346, |
| "learning_rate": 4.046527372373932e-07, |
| "loss": 0.3984, |
| "step": 2461 |
| }, |
| { |
| "epoch": 2.652064631956912, |
| "grad_norm": 0.2620738446712494, |
| "learning_rate": 4.021851185428566e-07, |
| "loss": 0.3524, |
| "step": 2462 |
| }, |
| { |
| "epoch": 2.6531418312387793, |
| "grad_norm": 0.2576216459274292, |
| "learning_rate": 3.9972473147019354e-07, |
| "loss": 0.3849, |
| "step": 2463 |
| }, |
| { |
| "epoch": 2.6542190305206463, |
| "grad_norm": 0.2805669903755188, |
| "learning_rate": 3.972715798891952e-07, |
| "loss": 0.3732, |
| "step": 2464 |
| }, |
| { |
| "epoch": 2.6552962298025133, |
| "grad_norm": 0.2930372655391693, |
| "learning_rate": 3.9482566765827346e-07, |
| "loss": 0.4041, |
| "step": 2465 |
| }, |
| { |
| "epoch": 2.656373429084381, |
| "grad_norm": 0.2730962932109833, |
| "learning_rate": 3.92386998624455e-07, |
| "loss": 0.3635, |
| "step": 2466 |
| }, |
| { |
| "epoch": 2.657450628366248, |
| "grad_norm": 0.30867379903793335, |
| "learning_rate": 3.899555766233726e-07, |
| "loss": 0.4031, |
| "step": 2467 |
| }, |
| { |
| "epoch": 2.658527827648115, |
| "grad_norm": 0.24889758229255676, |
| "learning_rate": 3.8753140547926224e-07, |
| "loss": 0.3513, |
| "step": 2468 |
| }, |
| { |
| "epoch": 2.659605026929982, |
| "grad_norm": 0.2548186779022217, |
| "learning_rate": 3.851144890049535e-07, |
| "loss": 0.3589, |
| "step": 2469 |
| }, |
| { |
| "epoch": 2.6606822262118492, |
| "grad_norm": 0.26514631509780884, |
| "learning_rate": 3.827048310018661e-07, |
| "loss": 0.386, |
| "step": 2470 |
| }, |
| { |
| "epoch": 2.6617594254937162, |
| "grad_norm": 0.28183451294898987, |
| "learning_rate": 3.803024352600049e-07, |
| "loss": 0.4103, |
| "step": 2471 |
| }, |
| { |
| "epoch": 2.6628366247755837, |
| "grad_norm": 0.2597326636314392, |
| "learning_rate": 3.7790730555795076e-07, |
| "loss": 0.3604, |
| "step": 2472 |
| }, |
| { |
| "epoch": 2.6639138240574507, |
| "grad_norm": 0.25949424505233765, |
| "learning_rate": 3.755194456628569e-07, |
| "loss": 0.3447, |
| "step": 2473 |
| }, |
| { |
| "epoch": 2.6649910233393177, |
| "grad_norm": 0.27464714646339417, |
| "learning_rate": 3.731388593304425e-07, |
| "loss": 0.3852, |
| "step": 2474 |
| }, |
| { |
| "epoch": 2.6660682226211847, |
| "grad_norm": 0.2679389417171478, |
| "learning_rate": 3.7076555030498505e-07, |
| "loss": 0.3914, |
| "step": 2475 |
| }, |
| { |
| "epoch": 2.667145421903052, |
| "grad_norm": 0.25600937008857727, |
| "learning_rate": 3.6839952231931877e-07, |
| "loss": 0.3257, |
| "step": 2476 |
| }, |
| { |
| "epoch": 2.668222621184919, |
| "grad_norm": 0.2627735137939453, |
| "learning_rate": 3.6604077909482283e-07, |
| "loss": 0.377, |
| "step": 2477 |
| }, |
| { |
| "epoch": 2.6692998204667866, |
| "grad_norm": 0.29077112674713135, |
| "learning_rate": 3.636893243414208e-07, |
| "loss": 0.4152, |
| "step": 2478 |
| }, |
| { |
| "epoch": 2.6703770197486536, |
| "grad_norm": 0.2747618854045868, |
| "learning_rate": 3.6134516175757193e-07, |
| "loss": 0.3744, |
| "step": 2479 |
| }, |
| { |
| "epoch": 2.6714542190305206, |
| "grad_norm": 0.2674185037612915, |
| "learning_rate": 3.5900829503026644e-07, |
| "loss": 0.3881, |
| "step": 2480 |
| }, |
| { |
| "epoch": 2.6725314183123876, |
| "grad_norm": 0.2776051163673401, |
| "learning_rate": 3.5667872783501924e-07, |
| "loss": 0.3811, |
| "step": 2481 |
| }, |
| { |
| "epoch": 2.673608617594255, |
| "grad_norm": 0.32454177737236023, |
| "learning_rate": 3.5435646383586374e-07, |
| "loss": 0.3891, |
| "step": 2482 |
| }, |
| { |
| "epoch": 2.674685816876122, |
| "grad_norm": 0.2767637073993683, |
| "learning_rate": 3.520415066853483e-07, |
| "loss": 0.3896, |
| "step": 2483 |
| }, |
| { |
| "epoch": 2.6757630161579895, |
| "grad_norm": 0.2673652768135071, |
| "learning_rate": 3.497338600245254e-07, |
| "loss": 0.3767, |
| "step": 2484 |
| }, |
| { |
| "epoch": 2.6768402154398565, |
| "grad_norm": 0.26100921630859375, |
| "learning_rate": 3.474335274829532e-07, |
| "loss": 0.3611, |
| "step": 2485 |
| }, |
| { |
| "epoch": 2.6779174147217235, |
| "grad_norm": 0.2723924219608307, |
| "learning_rate": 3.4514051267868275e-07, |
| "loss": 0.3531, |
| "step": 2486 |
| }, |
| { |
| "epoch": 2.6789946140035905, |
| "grad_norm": 0.2669923007488251, |
| "learning_rate": 3.428548192182568e-07, |
| "loss": 0.3876, |
| "step": 2487 |
| }, |
| { |
| "epoch": 2.680071813285458, |
| "grad_norm": 0.27219128608703613, |
| "learning_rate": 3.4057645069670353e-07, |
| "loss": 0.3646, |
| "step": 2488 |
| }, |
| { |
| "epoch": 2.681149012567325, |
| "grad_norm": 0.26371678709983826, |
| "learning_rate": 3.383054106975292e-07, |
| "loss": 0.3396, |
| "step": 2489 |
| }, |
| { |
| "epoch": 2.682226211849192, |
| "grad_norm": 0.2728726267814636, |
| "learning_rate": 3.3604170279271375e-07, |
| "loss": 0.3623, |
| "step": 2490 |
| }, |
| { |
| "epoch": 2.6833034111310594, |
| "grad_norm": 0.24541893601417542, |
| "learning_rate": 3.337853305427063e-07, |
| "loss": 0.3716, |
| "step": 2491 |
| }, |
| { |
| "epoch": 2.6843806104129264, |
| "grad_norm": 0.262883722782135, |
| "learning_rate": 3.315362974964142e-07, |
| "loss": 0.3806, |
| "step": 2492 |
| }, |
| { |
| "epoch": 2.6854578096947934, |
| "grad_norm": 0.26691654324531555, |
| "learning_rate": 3.292946071912051e-07, |
| "loss": 0.3643, |
| "step": 2493 |
| }, |
| { |
| "epoch": 2.6865350089766604, |
| "grad_norm": 0.2629173696041107, |
| "learning_rate": 3.270602631528968e-07, |
| "loss": 0.3559, |
| "step": 2494 |
| }, |
| { |
| "epoch": 2.687612208258528, |
| "grad_norm": 0.27307501435279846, |
| "learning_rate": 3.2483326889575394e-07, |
| "loss": 0.3508, |
| "step": 2495 |
| }, |
| { |
| "epoch": 2.688689407540395, |
| "grad_norm": 0.2881261110305786, |
| "learning_rate": 3.226136279224762e-07, |
| "loss": 0.409, |
| "step": 2496 |
| }, |
| { |
| "epoch": 2.6897666068222623, |
| "grad_norm": 0.27742505073547363, |
| "learning_rate": 3.2040134372420373e-07, |
| "loss": 0.3763, |
| "step": 2497 |
| }, |
| { |
| "epoch": 2.6908438061041293, |
| "grad_norm": 0.2708519697189331, |
| "learning_rate": 3.1819641978050207e-07, |
| "loss": 0.3737, |
| "step": 2498 |
| }, |
| { |
| "epoch": 2.6919210053859963, |
| "grad_norm": 0.2512258291244507, |
| "learning_rate": 3.159988595593616e-07, |
| "loss": 0.3473, |
| "step": 2499 |
| }, |
| { |
| "epoch": 2.6929982046678633, |
| "grad_norm": 0.27164483070373535, |
| "learning_rate": 3.1380866651719075e-07, |
| "loss": 0.399, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.6940754039497308, |
| "grad_norm": 0.2618269920349121, |
| "learning_rate": 3.1162584409880904e-07, |
| "loss": 0.3835, |
| "step": 2501 |
| }, |
| { |
| "epoch": 2.6951526032315978, |
| "grad_norm": 0.24939392507076263, |
| "learning_rate": 3.0945039573744564e-07, |
| "loss": 0.3768, |
| "step": 2502 |
| }, |
| { |
| "epoch": 2.6962298025134652, |
| "grad_norm": 0.2539052665233612, |
| "learning_rate": 3.0728232485472967e-07, |
| "loss": 0.3644, |
| "step": 2503 |
| }, |
| { |
| "epoch": 2.6973070017953322, |
| "grad_norm": 0.27746522426605225, |
| "learning_rate": 3.051216348606867e-07, |
| "loss": 0.3805, |
| "step": 2504 |
| }, |
| { |
| "epoch": 2.6983842010771992, |
| "grad_norm": 0.2758882939815521, |
| "learning_rate": 3.02968329153735e-07, |
| "loss": 0.3835, |
| "step": 2505 |
| }, |
| { |
| "epoch": 2.6994614003590662, |
| "grad_norm": 0.26927635073661804, |
| "learning_rate": 3.0082241112067755e-07, |
| "loss": 0.3565, |
| "step": 2506 |
| }, |
| { |
| "epoch": 2.7005385996409337, |
| "grad_norm": 0.26184743642807007, |
| "learning_rate": 2.986838841366962e-07, |
| "loss": 0.3568, |
| "step": 2507 |
| }, |
| { |
| "epoch": 2.7016157989228007, |
| "grad_norm": 0.2422487586736679, |
| "learning_rate": 2.96552751565351e-07, |
| "loss": 0.3557, |
| "step": 2508 |
| }, |
| { |
| "epoch": 2.702692998204668, |
| "grad_norm": 0.27083149552345276, |
| "learning_rate": 2.944290167585684e-07, |
| "loss": 0.3853, |
| "step": 2509 |
| }, |
| { |
| "epoch": 2.703770197486535, |
| "grad_norm": 0.27866464853286743, |
| "learning_rate": 2.9231268305664193e-07, |
| "loss": 0.3637, |
| "step": 2510 |
| }, |
| { |
| "epoch": 2.704847396768402, |
| "grad_norm": 0.26805853843688965, |
| "learning_rate": 2.9020375378822297e-07, |
| "loss": 0.3994, |
| "step": 2511 |
| }, |
| { |
| "epoch": 2.705924596050269, |
| "grad_norm": 0.25744813680648804, |
| "learning_rate": 2.8810223227031753e-07, |
| "loss": 0.3614, |
| "step": 2512 |
| }, |
| { |
| "epoch": 2.7070017953321366, |
| "grad_norm": 0.2774718999862671, |
| "learning_rate": 2.860081218082805e-07, |
| "loss": 0.3848, |
| "step": 2513 |
| }, |
| { |
| "epoch": 2.7080789946140036, |
| "grad_norm": 0.25574401021003723, |
| "learning_rate": 2.839214256958106e-07, |
| "loss": 0.3555, |
| "step": 2514 |
| }, |
| { |
| "epoch": 2.7091561938958706, |
| "grad_norm": 0.2627975046634674, |
| "learning_rate": 2.818421472149446e-07, |
| "loss": 0.3541, |
| "step": 2515 |
| }, |
| { |
| "epoch": 2.710233393177738, |
| "grad_norm": 0.2711409032344818, |
| "learning_rate": 2.7977028963605214e-07, |
| "loss": 0.3993, |
| "step": 2516 |
| }, |
| { |
| "epoch": 2.711310592459605, |
| "grad_norm": 0.26041677594184875, |
| "learning_rate": 2.7770585621782973e-07, |
| "loss": 0.3713, |
| "step": 2517 |
| }, |
| { |
| "epoch": 2.712387791741472, |
| "grad_norm": 0.263475239276886, |
| "learning_rate": 2.756488502073007e-07, |
| "loss": 0.3449, |
| "step": 2518 |
| }, |
| { |
| "epoch": 2.713464991023339, |
| "grad_norm": 0.2730863094329834, |
| "learning_rate": 2.7359927483980254e-07, |
| "loss": 0.3822, |
| "step": 2519 |
| }, |
| { |
| "epoch": 2.7145421903052065, |
| "grad_norm": 0.3727039098739624, |
| "learning_rate": 2.7155713333898826e-07, |
| "loss": 0.3547, |
| "step": 2520 |
| }, |
| { |
| "epoch": 2.7156193895870735, |
| "grad_norm": 0.29259535670280457, |
| "learning_rate": 2.6952242891681635e-07, |
| "loss": 0.3839, |
| "step": 2521 |
| }, |
| { |
| "epoch": 2.716696588868941, |
| "grad_norm": 0.29923248291015625, |
| "learning_rate": 2.674951647735491e-07, |
| "loss": 0.4094, |
| "step": 2522 |
| }, |
| { |
| "epoch": 2.717773788150808, |
| "grad_norm": 0.25811460614204407, |
| "learning_rate": 2.654753440977481e-07, |
| "loss": 0.3599, |
| "step": 2523 |
| }, |
| { |
| "epoch": 2.718850987432675, |
| "grad_norm": 0.2672567367553711, |
| "learning_rate": 2.634629700662628e-07, |
| "loss": 0.3805, |
| "step": 2524 |
| }, |
| { |
| "epoch": 2.719928186714542, |
| "grad_norm": 0.2677706778049469, |
| "learning_rate": 2.6145804584423505e-07, |
| "loss": 0.3528, |
| "step": 2525 |
| }, |
| { |
| "epoch": 2.7210053859964094, |
| "grad_norm": 0.2716928720474243, |
| "learning_rate": 2.5946057458508757e-07, |
| "loss": 0.3736, |
| "step": 2526 |
| }, |
| { |
| "epoch": 2.7220825852782764, |
| "grad_norm": 0.27958476543426514, |
| "learning_rate": 2.5747055943052044e-07, |
| "loss": 0.3762, |
| "step": 2527 |
| }, |
| { |
| "epoch": 2.723159784560144, |
| "grad_norm": 0.2665049135684967, |
| "learning_rate": 2.5548800351050673e-07, |
| "loss": 0.3379, |
| "step": 2528 |
| }, |
| { |
| "epoch": 2.724236983842011, |
| "grad_norm": 0.27530208230018616, |
| "learning_rate": 2.5351290994328703e-07, |
| "loss": 0.4127, |
| "step": 2529 |
| }, |
| { |
| "epoch": 2.725314183123878, |
| "grad_norm": 0.24859978258609772, |
| "learning_rate": 2.5154528183536584e-07, |
| "loss": 0.3384, |
| "step": 2530 |
| }, |
| { |
| "epoch": 2.726391382405745, |
| "grad_norm": 0.26949816942214966, |
| "learning_rate": 2.495851222815049e-07, |
| "loss": 0.4023, |
| "step": 2531 |
| }, |
| { |
| "epoch": 2.7274685816876123, |
| "grad_norm": 0.2622813284397125, |
| "learning_rate": 2.476324343647202e-07, |
| "loss": 0.3549, |
| "step": 2532 |
| }, |
| { |
| "epoch": 2.7285457809694793, |
| "grad_norm": 0.2698315978050232, |
| "learning_rate": 2.456872211562733e-07, |
| "loss": 0.3559, |
| "step": 2533 |
| }, |
| { |
| "epoch": 2.7296229802513468, |
| "grad_norm": 0.26423153281211853, |
| "learning_rate": 2.4374948571567246e-07, |
| "loss": 0.3805, |
| "step": 2534 |
| }, |
| { |
| "epoch": 2.7307001795332138, |
| "grad_norm": 0.262080579996109, |
| "learning_rate": 2.4181923109066254e-07, |
| "loss": 0.3553, |
| "step": 2535 |
| }, |
| { |
| "epoch": 2.7317773788150808, |
| "grad_norm": 0.2714017331600189, |
| "learning_rate": 2.398964603172238e-07, |
| "loss": 0.3891, |
| "step": 2536 |
| }, |
| { |
| "epoch": 2.7328545780969478, |
| "grad_norm": 0.26109498739242554, |
| "learning_rate": 2.3798117641956498e-07, |
| "loss": 0.3773, |
| "step": 2537 |
| }, |
| { |
| "epoch": 2.733931777378815, |
| "grad_norm": 0.2694284915924072, |
| "learning_rate": 2.3607338241011745e-07, |
| "loss": 0.3514, |
| "step": 2538 |
| }, |
| { |
| "epoch": 2.735008976660682, |
| "grad_norm": 0.2672230005264282, |
| "learning_rate": 2.3417308128953486e-07, |
| "loss": 0.3893, |
| "step": 2539 |
| }, |
| { |
| "epoch": 2.736086175942549, |
| "grad_norm": 0.26709240674972534, |
| "learning_rate": 2.3228027604668523e-07, |
| "loss": 0.3556, |
| "step": 2540 |
| }, |
| { |
| "epoch": 2.7371633752244167, |
| "grad_norm": 0.27405399084091187, |
| "learning_rate": 2.303949696586444e-07, |
| "loss": 0.3685, |
| "step": 2541 |
| }, |
| { |
| "epoch": 2.7382405745062837, |
| "grad_norm": 0.2825201451778412, |
| "learning_rate": 2.28517165090697e-07, |
| "loss": 0.3937, |
| "step": 2542 |
| }, |
| { |
| "epoch": 2.7393177737881507, |
| "grad_norm": 0.2691894769668579, |
| "learning_rate": 2.2664686529632608e-07, |
| "loss": 0.3695, |
| "step": 2543 |
| }, |
| { |
| "epoch": 2.740394973070018, |
| "grad_norm": 0.2729867696762085, |
| "learning_rate": 2.2478407321721295e-07, |
| "loss": 0.3741, |
| "step": 2544 |
| }, |
| { |
| "epoch": 2.741472172351885, |
| "grad_norm": 0.26905032992362976, |
| "learning_rate": 2.2292879178322845e-07, |
| "loss": 0.389, |
| "step": 2545 |
| }, |
| { |
| "epoch": 2.742549371633752, |
| "grad_norm": 0.2744308412075043, |
| "learning_rate": 2.2108102391243114e-07, |
| "loss": 0.382, |
| "step": 2546 |
| }, |
| { |
| "epoch": 2.7436265709156196, |
| "grad_norm": 0.2637563645839691, |
| "learning_rate": 2.1924077251106346e-07, |
| "loss": 0.3387, |
| "step": 2547 |
| }, |
| { |
| "epoch": 2.7447037701974866, |
| "grad_norm": 0.2808258831501007, |
| "learning_rate": 2.1740804047354348e-07, |
| "loss": 0.4189, |
| "step": 2548 |
| }, |
| { |
| "epoch": 2.7457809694793536, |
| "grad_norm": 0.29728829860687256, |
| "learning_rate": 2.1558283068246254e-07, |
| "loss": 0.3739, |
| "step": 2549 |
| }, |
| { |
| "epoch": 2.7468581687612206, |
| "grad_norm": 0.254711776971817, |
| "learning_rate": 2.1376514600858212e-07, |
| "loss": 0.3437, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.747935368043088, |
| "grad_norm": 0.2539973556995392, |
| "learning_rate": 2.1195498931082748e-07, |
| "loss": 0.3561, |
| "step": 2551 |
| }, |
| { |
| "epoch": 2.749012567324955, |
| "grad_norm": 0.27282753586769104, |
| "learning_rate": 2.101523634362834e-07, |
| "loss": 0.3857, |
| "step": 2552 |
| }, |
| { |
| "epoch": 2.7500897666068225, |
| "grad_norm": 0.2659188210964203, |
| "learning_rate": 2.0835727122018978e-07, |
| "loss": 0.392, |
| "step": 2553 |
| }, |
| { |
| "epoch": 2.7511669658886895, |
| "grad_norm": 0.2596139907836914, |
| "learning_rate": 2.065697154859375e-07, |
| "loss": 0.3891, |
| "step": 2554 |
| }, |
| { |
| "epoch": 2.7522441651705565, |
| "grad_norm": 0.24862676858901978, |
| "learning_rate": 2.0478969904506373e-07, |
| "loss": 0.3352, |
| "step": 2555 |
| }, |
| { |
| "epoch": 2.7533213644524235, |
| "grad_norm": 0.2672061324119568, |
| "learning_rate": 2.0301722469724728e-07, |
| "loss": 0.4056, |
| "step": 2556 |
| }, |
| { |
| "epoch": 2.754398563734291, |
| "grad_norm": 0.2543858289718628, |
| "learning_rate": 2.012522952303042e-07, |
| "loss": 0.3626, |
| "step": 2557 |
| }, |
| { |
| "epoch": 2.755475763016158, |
| "grad_norm": 0.26948270201683044, |
| "learning_rate": 1.9949491342018568e-07, |
| "loss": 0.3884, |
| "step": 2558 |
| }, |
| { |
| "epoch": 2.7565529622980254, |
| "grad_norm": 0.2788434624671936, |
| "learning_rate": 1.9774508203096843e-07, |
| "loss": 0.3752, |
| "step": 2559 |
| }, |
| { |
| "epoch": 2.7576301615798924, |
| "grad_norm": 0.27419593930244446, |
| "learning_rate": 1.9600280381485537e-07, |
| "loss": 0.3721, |
| "step": 2560 |
| }, |
| { |
| "epoch": 2.7587073608617594, |
| "grad_norm": 0.2659021019935608, |
| "learning_rate": 1.9426808151217002e-07, |
| "loss": 0.3973, |
| "step": 2561 |
| }, |
| { |
| "epoch": 2.7597845601436264, |
| "grad_norm": 0.25331631302833557, |
| "learning_rate": 1.9254091785135154e-07, |
| "loss": 0.3541, |
| "step": 2562 |
| }, |
| { |
| "epoch": 2.760861759425494, |
| "grad_norm": 0.26807159185409546, |
| "learning_rate": 1.9082131554894857e-07, |
| "loss": 0.3751, |
| "step": 2563 |
| }, |
| { |
| "epoch": 2.761938958707361, |
| "grad_norm": 0.27599242329597473, |
| "learning_rate": 1.8910927730962038e-07, |
| "loss": 0.3777, |
| "step": 2564 |
| }, |
| { |
| "epoch": 2.7630161579892283, |
| "grad_norm": 0.2834921181201935, |
| "learning_rate": 1.874048058261252e-07, |
| "loss": 0.3834, |
| "step": 2565 |
| }, |
| { |
| "epoch": 2.7640933572710953, |
| "grad_norm": 0.23908206820487976, |
| "learning_rate": 1.8570790377932302e-07, |
| "loss": 0.3368, |
| "step": 2566 |
| }, |
| { |
| "epoch": 2.7651705565529623, |
| "grad_norm": 0.27744874358177185, |
| "learning_rate": 1.8401857383816667e-07, |
| "loss": 0.3842, |
| "step": 2567 |
| }, |
| { |
| "epoch": 2.7662477558348293, |
| "grad_norm": 0.2755833566188812, |
| "learning_rate": 1.8233681865970076e-07, |
| "loss": 0.3594, |
| "step": 2568 |
| }, |
| { |
| "epoch": 2.7673249551166967, |
| "grad_norm": 0.2750590145587921, |
| "learning_rate": 1.806626408890555e-07, |
| "loss": 0.3762, |
| "step": 2569 |
| }, |
| { |
| "epoch": 2.7684021543985637, |
| "grad_norm": 0.26196321845054626, |
| "learning_rate": 1.789960431594412e-07, |
| "loss": 0.3395, |
| "step": 2570 |
| }, |
| { |
| "epoch": 2.7694793536804307, |
| "grad_norm": 0.2838890552520752, |
| "learning_rate": 1.7733702809214826e-07, |
| "loss": 0.3816, |
| "step": 2571 |
| }, |
| { |
| "epoch": 2.770556552962298, |
| "grad_norm": 0.2661447823047638, |
| "learning_rate": 1.7568559829654107e-07, |
| "loss": 0.3941, |
| "step": 2572 |
| }, |
| { |
| "epoch": 2.771633752244165, |
| "grad_norm": 0.26285287737846375, |
| "learning_rate": 1.7404175637005083e-07, |
| "loss": 0.359, |
| "step": 2573 |
| }, |
| { |
| "epoch": 2.772710951526032, |
| "grad_norm": 0.2819158434867859, |
| "learning_rate": 1.7240550489817652e-07, |
| "loss": 0.3722, |
| "step": 2574 |
| }, |
| { |
| "epoch": 2.773788150807899, |
| "grad_norm": 0.267031192779541, |
| "learning_rate": 1.7077684645447846e-07, |
| "loss": 0.3265, |
| "step": 2575 |
| }, |
| { |
| "epoch": 2.7748653500897666, |
| "grad_norm": 0.27363553643226624, |
| "learning_rate": 1.6915578360057417e-07, |
| "loss": 0.4008, |
| "step": 2576 |
| }, |
| { |
| "epoch": 2.7759425493716336, |
| "grad_norm": 0.2736669182777405, |
| "learning_rate": 1.6754231888613304e-07, |
| "loss": 0.3864, |
| "step": 2577 |
| }, |
| { |
| "epoch": 2.777019748653501, |
| "grad_norm": 0.2563254237174988, |
| "learning_rate": 1.6593645484887677e-07, |
| "loss": 0.3768, |
| "step": 2578 |
| }, |
| { |
| "epoch": 2.778096947935368, |
| "grad_norm": 0.28097283840179443, |
| "learning_rate": 1.6433819401456996e-07, |
| "loss": 0.3893, |
| "step": 2579 |
| }, |
| { |
| "epoch": 2.779174147217235, |
| "grad_norm": 0.2700108289718628, |
| "learning_rate": 1.62747538897019e-07, |
| "loss": 0.3506, |
| "step": 2580 |
| }, |
| { |
| "epoch": 2.780251346499102, |
| "grad_norm": 0.2690648138523102, |
| "learning_rate": 1.611644919980676e-07, |
| "loss": 0.3503, |
| "step": 2581 |
| }, |
| { |
| "epoch": 2.7813285457809696, |
| "grad_norm": 0.2958345115184784, |
| "learning_rate": 1.5958905580759464e-07, |
| "loss": 0.3996, |
| "step": 2582 |
| }, |
| { |
| "epoch": 2.7824057450628366, |
| "grad_norm": 0.2642570734024048, |
| "learning_rate": 1.5802123280350634e-07, |
| "loss": 0.389, |
| "step": 2583 |
| }, |
| { |
| "epoch": 2.783482944344704, |
| "grad_norm": 0.25878316164016724, |
| "learning_rate": 1.5646102545173625e-07, |
| "loss": 0.3485, |
| "step": 2584 |
| }, |
| { |
| "epoch": 2.784560143626571, |
| "grad_norm": 0.2650509178638458, |
| "learning_rate": 1.5490843620623865e-07, |
| "loss": 0.3921, |
| "step": 2585 |
| }, |
| { |
| "epoch": 2.785637342908438, |
| "grad_norm": 0.2722346782684326, |
| "learning_rate": 1.5336346750898678e-07, |
| "loss": 0.3795, |
| "step": 2586 |
| }, |
| { |
| "epoch": 2.786714542190305, |
| "grad_norm": 0.2627926170825958, |
| "learning_rate": 1.5182612178996803e-07, |
| "loss": 0.349, |
| "step": 2587 |
| }, |
| { |
| "epoch": 2.7877917414721725, |
| "grad_norm": 0.28062254190444946, |
| "learning_rate": 1.5029640146717762e-07, |
| "loss": 0.3906, |
| "step": 2588 |
| }, |
| { |
| "epoch": 2.7888689407540395, |
| "grad_norm": 0.27385613322257996, |
| "learning_rate": 1.4877430894662037e-07, |
| "loss": 0.3863, |
| "step": 2589 |
| }, |
| { |
| "epoch": 2.789946140035907, |
| "grad_norm": 0.270166277885437, |
| "learning_rate": 1.472598466223024e-07, |
| "loss": 0.3743, |
| "step": 2590 |
| }, |
| { |
| "epoch": 2.791023339317774, |
| "grad_norm": 0.24716700613498688, |
| "learning_rate": 1.457530168762289e-07, |
| "loss": 0.3562, |
| "step": 2591 |
| }, |
| { |
| "epoch": 2.792100538599641, |
| "grad_norm": 0.24457189440727234, |
| "learning_rate": 1.4425382207839955e-07, |
| "loss": 0.3454, |
| "step": 2592 |
| }, |
| { |
| "epoch": 2.793177737881508, |
| "grad_norm": 0.2678474187850952, |
| "learning_rate": 1.4276226458680653e-07, |
| "loss": 0.3815, |
| "step": 2593 |
| }, |
| { |
| "epoch": 2.7942549371633754, |
| "grad_norm": 0.2846844792366028, |
| "learning_rate": 1.412783467474299e-07, |
| "loss": 0.4095, |
| "step": 2594 |
| }, |
| { |
| "epoch": 2.7953321364452424, |
| "grad_norm": 0.2793724536895752, |
| "learning_rate": 1.3980207089423326e-07, |
| "loss": 0.3911, |
| "step": 2595 |
| }, |
| { |
| "epoch": 2.7964093357271094, |
| "grad_norm": 0.2453213334083557, |
| "learning_rate": 1.3833343934916032e-07, |
| "loss": 0.3559, |
| "step": 2596 |
| }, |
| { |
| "epoch": 2.797486535008977, |
| "grad_norm": 0.28029677271842957, |
| "learning_rate": 1.368724544221317e-07, |
| "loss": 0.3974, |
| "step": 2597 |
| }, |
| { |
| "epoch": 2.798563734290844, |
| "grad_norm": 0.23296788334846497, |
| "learning_rate": 1.3541911841104149e-07, |
| "loss": 0.326, |
| "step": 2598 |
| }, |
| { |
| "epoch": 2.799640933572711, |
| "grad_norm": 0.28019076585769653, |
| "learning_rate": 1.3397343360175287e-07, |
| "loss": 0.4153, |
| "step": 2599 |
| }, |
| { |
| "epoch": 2.800718132854578, |
| "grad_norm": 0.2549351751804352, |
| "learning_rate": 1.3253540226809524e-07, |
| "loss": 0.3551, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.8017953321364453, |
| "grad_norm": 0.2683469355106354, |
| "learning_rate": 1.3110502667186e-07, |
| "loss": 0.3321, |
| "step": 2601 |
| }, |
| { |
| "epoch": 2.8028725314183123, |
| "grad_norm": 0.25505879521369934, |
| "learning_rate": 1.2968230906279745e-07, |
| "loss": 0.3689, |
| "step": 2602 |
| }, |
| { |
| "epoch": 2.8039497307001797, |
| "grad_norm": 0.28051871061325073, |
| "learning_rate": 1.282672516786132e-07, |
| "loss": 0.4006, |
| "step": 2603 |
| }, |
| { |
| "epoch": 2.8050269299820467, |
| "grad_norm": 0.24571263790130615, |
| "learning_rate": 1.268598567449647e-07, |
| "loss": 0.3344, |
| "step": 2604 |
| }, |
| { |
| "epoch": 2.8061041292639137, |
| "grad_norm": 0.27598923444747925, |
| "learning_rate": 1.2546012647545735e-07, |
| "loss": 0.4113, |
| "step": 2605 |
| }, |
| { |
| "epoch": 2.8071813285457807, |
| "grad_norm": 0.2610470950603485, |
| "learning_rate": 1.240680630716401e-07, |
| "loss": 0.3859, |
| "step": 2606 |
| }, |
| { |
| "epoch": 2.808258527827648, |
| "grad_norm": 0.2540163993835449, |
| "learning_rate": 1.2268366872300596e-07, |
| "loss": 0.3663, |
| "step": 2607 |
| }, |
| { |
| "epoch": 2.809335727109515, |
| "grad_norm": 0.26400068402290344, |
| "learning_rate": 1.2130694560698376e-07, |
| "loss": 0.361, |
| "step": 2608 |
| }, |
| { |
| "epoch": 2.8104129263913826, |
| "grad_norm": 0.26972126960754395, |
| "learning_rate": 1.1993789588893634e-07, |
| "loss": 0.379, |
| "step": 2609 |
| }, |
| { |
| "epoch": 2.8114901256732496, |
| "grad_norm": 0.28470125794410706, |
| "learning_rate": 1.1857652172215905e-07, |
| "loss": 0.3672, |
| "step": 2610 |
| }, |
| { |
| "epoch": 2.8125673249551166, |
| "grad_norm": 0.28178441524505615, |
| "learning_rate": 1.1722282524787465e-07, |
| "loss": 0.376, |
| "step": 2611 |
| }, |
| { |
| "epoch": 2.8136445242369836, |
| "grad_norm": 0.27330464124679565, |
| "learning_rate": 1.1587680859522832e-07, |
| "loss": 0.3682, |
| "step": 2612 |
| }, |
| { |
| "epoch": 2.814721723518851, |
| "grad_norm": 0.26830774545669556, |
| "learning_rate": 1.1453847388128714e-07, |
| "loss": 0.3501, |
| "step": 2613 |
| }, |
| { |
| "epoch": 2.815798922800718, |
| "grad_norm": 0.281019926071167, |
| "learning_rate": 1.1320782321103673e-07, |
| "loss": 0.4017, |
| "step": 2614 |
| }, |
| { |
| "epoch": 2.8168761220825855, |
| "grad_norm": 0.2757217288017273, |
| "learning_rate": 1.1188485867737631e-07, |
| "loss": 0.373, |
| "step": 2615 |
| }, |
| { |
| "epoch": 2.8179533213644525, |
| "grad_norm": 0.2641966640949249, |
| "learning_rate": 1.1056958236111526e-07, |
| "loss": 0.3667, |
| "step": 2616 |
| }, |
| { |
| "epoch": 2.8190305206463195, |
| "grad_norm": 0.257794588804245, |
| "learning_rate": 1.0926199633097156e-07, |
| "loss": 0.3732, |
| "step": 2617 |
| }, |
| { |
| "epoch": 2.8201077199281865, |
| "grad_norm": 0.2658846378326416, |
| "learning_rate": 1.0796210264356787e-07, |
| "loss": 0.3771, |
| "step": 2618 |
| }, |
| { |
| "epoch": 2.821184919210054, |
| "grad_norm": 0.27144813537597656, |
| "learning_rate": 1.0666990334342708e-07, |
| "loss": 0.3828, |
| "step": 2619 |
| }, |
| { |
| "epoch": 2.822262118491921, |
| "grad_norm": 0.251426100730896, |
| "learning_rate": 1.0538540046296952e-07, |
| "loss": 0.3348, |
| "step": 2620 |
| }, |
| { |
| "epoch": 2.823339317773788, |
| "grad_norm": 0.28454217314720154, |
| "learning_rate": 1.04108596022513e-07, |
| "loss": 0.3895, |
| "step": 2621 |
| }, |
| { |
| "epoch": 2.8244165170556554, |
| "grad_norm": 0.24853797256946564, |
| "learning_rate": 1.0283949203026333e-07, |
| "loss": 0.3614, |
| "step": 2622 |
| }, |
| { |
| "epoch": 2.8254937163375224, |
| "grad_norm": 0.2648945748806, |
| "learning_rate": 1.015780904823177e-07, |
| "loss": 0.3516, |
| "step": 2623 |
| }, |
| { |
| "epoch": 2.8265709156193894, |
| "grad_norm": 0.25767838954925537, |
| "learning_rate": 1.0032439336265742e-07, |
| "loss": 0.3738, |
| "step": 2624 |
| }, |
| { |
| "epoch": 2.827648114901257, |
| "grad_norm": 0.2541263997554779, |
| "learning_rate": 9.907840264314572e-08, |
| "loss": 0.354, |
| "step": 2625 |
| }, |
| { |
| "epoch": 2.828725314183124, |
| "grad_norm": 0.2915237247943878, |
| "learning_rate": 9.784012028352496e-08, |
| "loss": 0.3833, |
| "step": 2626 |
| }, |
| { |
| "epoch": 2.829802513464991, |
| "grad_norm": 0.2747429609298706, |
| "learning_rate": 9.660954823141443e-08, |
| "loss": 0.3822, |
| "step": 2627 |
| }, |
| { |
| "epoch": 2.8308797127468583, |
| "grad_norm": 0.2576390504837036, |
| "learning_rate": 9.538668842230536e-08, |
| "loss": 0.3304, |
| "step": 2628 |
| }, |
| { |
| "epoch": 2.8319569120287253, |
| "grad_norm": 0.24892747402191162, |
| "learning_rate": 9.417154277955864e-08, |
| "loss": 0.3679, |
| "step": 2629 |
| }, |
| { |
| "epoch": 2.8330341113105924, |
| "grad_norm": 0.26770374178886414, |
| "learning_rate": 9.29641132144038e-08, |
| "loss": 0.4067, |
| "step": 2630 |
| }, |
| { |
| "epoch": 2.8341113105924594, |
| "grad_norm": 0.27645477652549744, |
| "learning_rate": 9.17644016259317e-08, |
| "loss": 0.3675, |
| "step": 2631 |
| }, |
| { |
| "epoch": 2.835188509874327, |
| "grad_norm": 0.2851516008377075, |
| "learning_rate": 9.057240990109628e-08, |
| "loss": 0.395, |
| "step": 2632 |
| }, |
| { |
| "epoch": 2.836265709156194, |
| "grad_norm": 0.27293872833251953, |
| "learning_rate": 8.93881399147073e-08, |
| "loss": 0.3828, |
| "step": 2633 |
| }, |
| { |
| "epoch": 2.8373429084380613, |
| "grad_norm": 0.2663024663925171, |
| "learning_rate": 8.821159352943142e-08, |
| "loss": 0.3886, |
| "step": 2634 |
| }, |
| { |
| "epoch": 2.8384201077199283, |
| "grad_norm": 0.2755454480648041, |
| "learning_rate": 8.704277259578675e-08, |
| "loss": 0.3865, |
| "step": 2635 |
| }, |
| { |
| "epoch": 2.8394973070017953, |
| "grad_norm": 0.2564866840839386, |
| "learning_rate": 8.588167895213994e-08, |
| "loss": 0.3608, |
| "step": 2636 |
| }, |
| { |
| "epoch": 2.8405745062836623, |
| "grad_norm": 0.27022016048431396, |
| "learning_rate": 8.472831442470408e-08, |
| "loss": 0.4055, |
| "step": 2637 |
| }, |
| { |
| "epoch": 2.8416517055655297, |
| "grad_norm": 0.27274903655052185, |
| "learning_rate": 8.358268082753529e-08, |
| "loss": 0.3771, |
| "step": 2638 |
| }, |
| { |
| "epoch": 2.8427289048473967, |
| "grad_norm": 0.2588178813457489, |
| "learning_rate": 8.244477996253109e-08, |
| "loss": 0.3805, |
| "step": 2639 |
| }, |
| { |
| "epoch": 2.843806104129264, |
| "grad_norm": 0.2533237338066101, |
| "learning_rate": 8.13146136194265e-08, |
| "loss": 0.3515, |
| "step": 2640 |
| }, |
| { |
| "epoch": 2.844883303411131, |
| "grad_norm": 0.2688315212726593, |
| "learning_rate": 8.019218357579073e-08, |
| "loss": 0.4067, |
| "step": 2641 |
| }, |
| { |
| "epoch": 2.845960502692998, |
| "grad_norm": 0.2488754689693451, |
| "learning_rate": 7.907749159702549e-08, |
| "loss": 0.3264, |
| "step": 2642 |
| }, |
| { |
| "epoch": 2.847037701974865, |
| "grad_norm": 0.25835171341896057, |
| "learning_rate": 7.797053943636113e-08, |
| "loss": 0.3775, |
| "step": 2643 |
| }, |
| { |
| "epoch": 2.8481149012567326, |
| "grad_norm": 0.2600803077220917, |
| "learning_rate": 7.687132883485548e-08, |
| "loss": 0.3735, |
| "step": 2644 |
| }, |
| { |
| "epoch": 2.8491921005385996, |
| "grad_norm": 0.26888731122016907, |
| "learning_rate": 7.57798615213895e-08, |
| "loss": 0.3967, |
| "step": 2645 |
| }, |
| { |
| "epoch": 2.850269299820467, |
| "grad_norm": 0.2576195299625397, |
| "learning_rate": 7.46961392126655e-08, |
| "loss": 0.3677, |
| "step": 2646 |
| }, |
| { |
| "epoch": 2.851346499102334, |
| "grad_norm": 0.25903722643852234, |
| "learning_rate": 7.362016361320389e-08, |
| "loss": 0.374, |
| "step": 2647 |
| }, |
| { |
| "epoch": 2.852423698384201, |
| "grad_norm": 0.2514986991882324, |
| "learning_rate": 7.255193641534097e-08, |
| "loss": 0.3528, |
| "step": 2648 |
| }, |
| { |
| "epoch": 2.853500897666068, |
| "grad_norm": 0.26224809885025024, |
| "learning_rate": 7.149145929922607e-08, |
| "loss": 0.3463, |
| "step": 2649 |
| }, |
| { |
| "epoch": 2.8545780969479355, |
| "grad_norm": 0.2829777002334595, |
| "learning_rate": 7.043873393281831e-08, |
| "loss": 0.3867, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.8556552962298025, |
| "grad_norm": 0.27816978096961975, |
| "learning_rate": 6.939376197188652e-08, |
| "loss": 0.3638, |
| "step": 2651 |
| }, |
| { |
| "epoch": 2.8567324955116695, |
| "grad_norm": 0.27708882093429565, |
| "learning_rate": 6.835654506000101e-08, |
| "loss": 0.3958, |
| "step": 2652 |
| }, |
| { |
| "epoch": 2.857809694793537, |
| "grad_norm": 0.2444148063659668, |
| "learning_rate": 6.732708482853845e-08, |
| "loss": 0.3552, |
| "step": 2653 |
| }, |
| { |
| "epoch": 2.858886894075404, |
| "grad_norm": 0.25524911284446716, |
| "learning_rate": 6.630538289667365e-08, |
| "loss": 0.369, |
| "step": 2654 |
| }, |
| { |
| "epoch": 2.859964093357271, |
| "grad_norm": 0.28397855162620544, |
| "learning_rate": 6.52914408713784e-08, |
| "loss": 0.4338, |
| "step": 2655 |
| }, |
| { |
| "epoch": 2.861041292639138, |
| "grad_norm": 0.24332918226718903, |
| "learning_rate": 6.428526034742033e-08, |
| "loss": 0.3409, |
| "step": 2656 |
| }, |
| { |
| "epoch": 2.8621184919210054, |
| "grad_norm": 0.258176326751709, |
| "learning_rate": 6.328684290735965e-08, |
| "loss": 0.3791, |
| "step": 2657 |
| }, |
| { |
| "epoch": 2.8631956912028724, |
| "grad_norm": 0.29297754168510437, |
| "learning_rate": 6.229619012154575e-08, |
| "loss": 0.4531, |
| "step": 2658 |
| }, |
| { |
| "epoch": 2.86427289048474, |
| "grad_norm": 0.23918956518173218, |
| "learning_rate": 6.131330354811616e-08, |
| "loss": 0.3272, |
| "step": 2659 |
| }, |
| { |
| "epoch": 2.865350089766607, |
| "grad_norm": 0.274402379989624, |
| "learning_rate": 6.033818473299369e-08, |
| "loss": 0.3965, |
| "step": 2660 |
| }, |
| { |
| "epoch": 2.866427289048474, |
| "grad_norm": 0.2573271095752716, |
| "learning_rate": 5.9370835209881516e-08, |
| "loss": 0.3579, |
| "step": 2661 |
| }, |
| { |
| "epoch": 2.867504488330341, |
| "grad_norm": 0.26528796553611755, |
| "learning_rate": 5.8411256500265356e-08, |
| "loss": 0.3499, |
| "step": 2662 |
| }, |
| { |
| "epoch": 2.8685816876122083, |
| "grad_norm": 0.2704995274543762, |
| "learning_rate": 5.745945011340792e-08, |
| "loss": 0.3639, |
| "step": 2663 |
| }, |
| { |
| "epoch": 2.8696588868940753, |
| "grad_norm": 0.2413834184408188, |
| "learning_rate": 5.651541754634726e-08, |
| "loss": 0.3574, |
| "step": 2664 |
| }, |
| { |
| "epoch": 2.870736086175943, |
| "grad_norm": 0.2598327398300171, |
| "learning_rate": 5.557916028389454e-08, |
| "loss": 0.3721, |
| "step": 2665 |
| }, |
| { |
| "epoch": 2.87181328545781, |
| "grad_norm": 0.2590698301792145, |
| "learning_rate": 5.465067979863126e-08, |
| "loss": 0.3465, |
| "step": 2666 |
| }, |
| { |
| "epoch": 2.872890484739677, |
| "grad_norm": 0.26652219891548157, |
| "learning_rate": 5.372997755090758e-08, |
| "loss": 0.3741, |
| "step": 2667 |
| }, |
| { |
| "epoch": 2.873967684021544, |
| "grad_norm": 0.28296002745628357, |
| "learning_rate": 5.281705498884071e-08, |
| "loss": 0.418, |
| "step": 2668 |
| }, |
| { |
| "epoch": 2.8750448833034112, |
| "grad_norm": 0.2394905537366867, |
| "learning_rate": 5.1911913548309266e-08, |
| "loss": 0.3704, |
| "step": 2669 |
| }, |
| { |
| "epoch": 2.8761220825852782, |
| "grad_norm": 0.2725091576576233, |
| "learning_rate": 5.101455465295557e-08, |
| "loss": 0.372, |
| "step": 2670 |
| }, |
| { |
| "epoch": 2.8771992818671457, |
| "grad_norm": 0.27804869413375854, |
| "learning_rate": 5.0124979714181173e-08, |
| "loss": 0.3972, |
| "step": 2671 |
| }, |
| { |
| "epoch": 2.8782764811490127, |
| "grad_norm": 0.27069342136383057, |
| "learning_rate": 4.924319013114298e-08, |
| "loss": 0.3521, |
| "step": 2672 |
| }, |
| { |
| "epoch": 2.8793536804308797, |
| "grad_norm": 0.2562558948993683, |
| "learning_rate": 4.836918729075435e-08, |
| "loss": 0.3564, |
| "step": 2673 |
| }, |
| { |
| "epoch": 2.8804308797127467, |
| "grad_norm": 0.30231666564941406, |
| "learning_rate": 4.750297256768177e-08, |
| "loss": 0.4005, |
| "step": 2674 |
| }, |
| { |
| "epoch": 2.881508078994614, |
| "grad_norm": 0.26869866251945496, |
| "learning_rate": 4.664454732433987e-08, |
| "loss": 0.371, |
| "step": 2675 |
| }, |
| { |
| "epoch": 2.882585278276481, |
| "grad_norm": 0.25346073508262634, |
| "learning_rate": 4.579391291089419e-08, |
| "loss": 0.3183, |
| "step": 2676 |
| }, |
| { |
| "epoch": 2.883662477558348, |
| "grad_norm": 0.2803615629673004, |
| "learning_rate": 4.495107066525561e-08, |
| "loss": 0.4109, |
| "step": 2677 |
| }, |
| { |
| "epoch": 2.8847396768402156, |
| "grad_norm": 0.2551611363887787, |
| "learning_rate": 4.411602191307873e-08, |
| "loss": 0.3617, |
| "step": 2678 |
| }, |
| { |
| "epoch": 2.8858168761220826, |
| "grad_norm": 0.2504459023475647, |
| "learning_rate": 4.328876796776071e-08, |
| "loss": 0.3653, |
| "step": 2679 |
| }, |
| { |
| "epoch": 2.8868940754039496, |
| "grad_norm": 0.26026296615600586, |
| "learning_rate": 4.246931013043909e-08, |
| "loss": 0.3905, |
| "step": 2680 |
| }, |
| { |
| "epoch": 2.8879712746858166, |
| "grad_norm": 0.2713085412979126, |
| "learning_rate": 4.165764968998842e-08, |
| "loss": 0.3933, |
| "step": 2681 |
| }, |
| { |
| "epoch": 2.889048473967684, |
| "grad_norm": 0.2717359960079193, |
| "learning_rate": 4.0853787923020304e-08, |
| "loss": 0.3231, |
| "step": 2682 |
| }, |
| { |
| "epoch": 2.890125673249551, |
| "grad_norm": 0.28143253922462463, |
| "learning_rate": 4.0057726093880036e-08, |
| "loss": 0.3637, |
| "step": 2683 |
| }, |
| { |
| "epoch": 2.8912028725314185, |
| "grad_norm": 0.26846539974212646, |
| "learning_rate": 3.926946545464327e-08, |
| "loss": 0.3853, |
| "step": 2684 |
| }, |
| { |
| "epoch": 2.8922800718132855, |
| "grad_norm": 0.25803130865097046, |
| "learning_rate": 3.848900724511828e-08, |
| "loss": 0.3675, |
| "step": 2685 |
| }, |
| { |
| "epoch": 2.8933572710951525, |
| "grad_norm": 0.2654448449611664, |
| "learning_rate": 3.7716352692839796e-08, |
| "loss": 0.4028, |
| "step": 2686 |
| }, |
| { |
| "epoch": 2.8944344703770195, |
| "grad_norm": 0.2661076486110687, |
| "learning_rate": 3.6951503013067934e-08, |
| "loss": 0.3566, |
| "step": 2687 |
| }, |
| { |
| "epoch": 2.895511669658887, |
| "grad_norm": 0.2689468562602997, |
| "learning_rate": 3.6194459408789294e-08, |
| "loss": 0.348, |
| "step": 2688 |
| }, |
| { |
| "epoch": 2.896588868940754, |
| "grad_norm": 0.28917914628982544, |
| "learning_rate": 3.544522307071085e-08, |
| "loss": 0.386, |
| "step": 2689 |
| }, |
| { |
| "epoch": 2.8976660682226214, |
| "grad_norm": 0.2678031623363495, |
| "learning_rate": 3.4703795177260526e-08, |
| "loss": 0.3732, |
| "step": 2690 |
| }, |
| { |
| "epoch": 2.8987432675044884, |
| "grad_norm": 0.2567662298679352, |
| "learning_rate": 3.3970176894585485e-08, |
| "loss": 0.3477, |
| "step": 2691 |
| }, |
| { |
| "epoch": 2.8998204667863554, |
| "grad_norm": 0.279474675655365, |
| "learning_rate": 3.324436937654829e-08, |
| "loss": 0.3822, |
| "step": 2692 |
| }, |
| { |
| "epoch": 2.9008976660682224, |
| "grad_norm": 0.260716050863266, |
| "learning_rate": 3.252637376472745e-08, |
| "loss": 0.3616, |
| "step": 2693 |
| }, |
| { |
| "epoch": 2.90197486535009, |
| "grad_norm": 0.2681705057621002, |
| "learning_rate": 3.181619118841517e-08, |
| "loss": 0.3529, |
| "step": 2694 |
| }, |
| { |
| "epoch": 2.903052064631957, |
| "grad_norm": 0.2688203752040863, |
| "learning_rate": 3.111382276461294e-08, |
| "loss": 0.3403, |
| "step": 2695 |
| }, |
| { |
| "epoch": 2.9041292639138243, |
| "grad_norm": 0.2653854489326477, |
| "learning_rate": 3.041926959803376e-08, |
| "loss": 0.3706, |
| "step": 2696 |
| }, |
| { |
| "epoch": 2.9052064631956913, |
| "grad_norm": 0.2677724063396454, |
| "learning_rate": 2.9732532781097668e-08, |
| "loss": 0.3572, |
| "step": 2697 |
| }, |
| { |
| "epoch": 2.9062836624775583, |
| "grad_norm": 0.2722340226173401, |
| "learning_rate": 2.90536133939312e-08, |
| "loss": 0.3611, |
| "step": 2698 |
| }, |
| { |
| "epoch": 2.9073608617594253, |
| "grad_norm": 0.2826050817966461, |
| "learning_rate": 2.838251250436519e-08, |
| "loss": 0.3964, |
| "step": 2699 |
| }, |
| { |
| "epoch": 2.9084380610412928, |
| "grad_norm": 0.2590412199497223, |
| "learning_rate": 2.771923116793307e-08, |
| "loss": 0.3684, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.9095152603231598, |
| "grad_norm": 0.26464948058128357, |
| "learning_rate": 2.706377042786923e-08, |
| "loss": 0.3682, |
| "step": 2701 |
| }, |
| { |
| "epoch": 2.9105924596050268, |
| "grad_norm": 0.27499884366989136, |
| "learning_rate": 2.6416131315107895e-08, |
| "loss": 0.3724, |
| "step": 2702 |
| }, |
| { |
| "epoch": 2.911669658886894, |
| "grad_norm": 0.26174575090408325, |
| "learning_rate": 2.577631484828147e-08, |
| "loss": 0.3434, |
| "step": 2703 |
| }, |
| { |
| "epoch": 2.912746858168761, |
| "grad_norm": 0.2640121579170227, |
| "learning_rate": 2.5144322033717748e-08, |
| "loss": 0.4157, |
| "step": 2704 |
| }, |
| { |
| "epoch": 2.9138240574506282, |
| "grad_norm": 0.2796087861061096, |
| "learning_rate": 2.4520153865439377e-08, |
| "loss": 0.3622, |
| "step": 2705 |
| }, |
| { |
| "epoch": 2.9149012567324957, |
| "grad_norm": 0.26384004950523376, |
| "learning_rate": 2.3903811325163285e-08, |
| "loss": 0.36, |
| "step": 2706 |
| }, |
| { |
| "epoch": 2.9159784560143627, |
| "grad_norm": 0.2627621293067932, |
| "learning_rate": 2.329529538229569e-08, |
| "loss": 0.391, |
| "step": 2707 |
| }, |
| { |
| "epoch": 2.9170556552962297, |
| "grad_norm": 0.2715895175933838, |
| "learning_rate": 2.2694606993934886e-08, |
| "loss": 0.3764, |
| "step": 2708 |
| }, |
| { |
| "epoch": 2.918132854578097, |
| "grad_norm": 0.2541747987270355, |
| "learning_rate": 2.210174710486679e-08, |
| "loss": 0.3667, |
| "step": 2709 |
| }, |
| { |
| "epoch": 2.919210053859964, |
| "grad_norm": 0.26594072580337524, |
| "learning_rate": 2.1516716647564383e-08, |
| "loss": 0.3646, |
| "step": 2710 |
| }, |
| { |
| "epoch": 2.920287253141831, |
| "grad_norm": 0.2747040092945099, |
| "learning_rate": 2.0939516542186066e-08, |
| "loss": 0.3754, |
| "step": 2711 |
| }, |
| { |
| "epoch": 2.921364452423698, |
| "grad_norm": 0.2700275182723999, |
| "learning_rate": 2.0370147696574528e-08, |
| "loss": 0.3537, |
| "step": 2712 |
| }, |
| { |
| "epoch": 2.9224416517055656, |
| "grad_norm": 0.2625963091850281, |
| "learning_rate": 1.9808611006256196e-08, |
| "loss": 0.3662, |
| "step": 2713 |
| }, |
| { |
| "epoch": 2.9235188509874326, |
| "grad_norm": 0.2688051164150238, |
| "learning_rate": 1.9254907354436804e-08, |
| "loss": 0.4109, |
| "step": 2714 |
| }, |
| { |
| "epoch": 2.9245960502693, |
| "grad_norm": 0.26145994663238525, |
| "learning_rate": 1.8709037612003044e-08, |
| "loss": 0.3506, |
| "step": 2715 |
| }, |
| { |
| "epoch": 2.925673249551167, |
| "grad_norm": 0.2681027054786682, |
| "learning_rate": 1.8171002637520362e-08, |
| "loss": 0.3838, |
| "step": 2716 |
| }, |
| { |
| "epoch": 2.926750448833034, |
| "grad_norm": 0.26508158445358276, |
| "learning_rate": 1.764080327723128e-08, |
| "loss": 0.3774, |
| "step": 2717 |
| }, |
| { |
| "epoch": 2.927827648114901, |
| "grad_norm": 0.2793397605419159, |
| "learning_rate": 1.7118440365053723e-08, |
| "loss": 0.3811, |
| "step": 2718 |
| }, |
| { |
| "epoch": 2.9289048473967685, |
| "grad_norm": 0.2447550743818283, |
| "learning_rate": 1.6603914722579938e-08, |
| "loss": 0.3566, |
| "step": 2719 |
| }, |
| { |
| "epoch": 2.9299820466786355, |
| "grad_norm": 0.26184481382369995, |
| "learning_rate": 1.6097227159075912e-08, |
| "loss": 0.3635, |
| "step": 2720 |
| }, |
| { |
| "epoch": 2.931059245960503, |
| "grad_norm": 0.2621864974498749, |
| "learning_rate": 1.559837847148027e-08, |
| "loss": 0.3668, |
| "step": 2721 |
| }, |
| { |
| "epoch": 2.93213644524237, |
| "grad_norm": 0.26011908054351807, |
| "learning_rate": 1.51073694444015e-08, |
| "loss": 0.3523, |
| "step": 2722 |
| }, |
| { |
| "epoch": 2.933213644524237, |
| "grad_norm": 0.2705157399177551, |
| "learning_rate": 1.4624200850116844e-08, |
| "loss": 0.4118, |
| "step": 2723 |
| }, |
| { |
| "epoch": 2.934290843806104, |
| "grad_norm": 0.2857845723628998, |
| "learning_rate": 1.4148873448573408e-08, |
| "loss": 0.4051, |
| "step": 2724 |
| }, |
| { |
| "epoch": 2.9353680430879714, |
| "grad_norm": 0.2570638060569763, |
| "learning_rate": 1.368138798738372e-08, |
| "loss": 0.3463, |
| "step": 2725 |
| }, |
| { |
| "epoch": 2.9364452423698384, |
| "grad_norm": 0.26908525824546814, |
| "learning_rate": 1.3221745201828507e-08, |
| "loss": 0.406, |
| "step": 2726 |
| }, |
| { |
| "epoch": 2.937522441651706, |
| "grad_norm": 0.26163509488105774, |
| "learning_rate": 1.2769945814850582e-08, |
| "loss": 0.3665, |
| "step": 2727 |
| }, |
| { |
| "epoch": 2.938599640933573, |
| "grad_norm": 0.26050251722335815, |
| "learning_rate": 1.2325990537057631e-08, |
| "loss": 0.3469, |
| "step": 2728 |
| }, |
| { |
| "epoch": 2.93967684021544, |
| "grad_norm": 0.25835105776786804, |
| "learning_rate": 1.1889880066720538e-08, |
| "loss": 0.3636, |
| "step": 2729 |
| }, |
| { |
| "epoch": 2.940754039497307, |
| "grad_norm": 0.2773009240627289, |
| "learning_rate": 1.1461615089770062e-08, |
| "loss": 0.4345, |
| "step": 2730 |
| }, |
| { |
| "epoch": 2.9418312387791743, |
| "grad_norm": 0.272200345993042, |
| "learning_rate": 1.1041196279798493e-08, |
| "loss": 0.3362, |
| "step": 2731 |
| }, |
| { |
| "epoch": 2.9429084380610413, |
| "grad_norm": 0.2865847647190094, |
| "learning_rate": 1.0628624298056888e-08, |
| "loss": 0.3953, |
| "step": 2732 |
| }, |
| { |
| "epoch": 2.9439856373429083, |
| "grad_norm": 0.25535234808921814, |
| "learning_rate": 1.0223899793453951e-08, |
| "loss": 0.3538, |
| "step": 2733 |
| }, |
| { |
| "epoch": 2.9450628366247757, |
| "grad_norm": 0.275097519159317, |
| "learning_rate": 9.827023402556035e-09, |
| "loss": 0.3907, |
| "step": 2734 |
| }, |
| { |
| "epoch": 2.9461400359066428, |
| "grad_norm": 0.27777495980262756, |
| "learning_rate": 9.437995749586593e-09, |
| "loss": 0.3304, |
| "step": 2735 |
| }, |
| { |
| "epoch": 2.9472172351885098, |
| "grad_norm": 0.297230988740921, |
| "learning_rate": 9.05681744642284e-09, |
| "loss": 0.3531, |
| "step": 2736 |
| }, |
| { |
| "epoch": 2.9482944344703768, |
| "grad_norm": 0.26701468229293823, |
| "learning_rate": 8.68348909259742e-09, |
| "loss": 0.3728, |
| "step": 2737 |
| }, |
| { |
| "epoch": 2.949371633752244, |
| "grad_norm": 0.2648324966430664, |
| "learning_rate": 8.318011275294525e-09, |
| "loss": 0.3594, |
| "step": 2738 |
| }, |
| { |
| "epoch": 2.950448833034111, |
| "grad_norm": 0.2587242126464844, |
| "learning_rate": 7.96038456935322e-09, |
| "loss": 0.3478, |
| "step": 2739 |
| }, |
| { |
| "epoch": 2.9515260323159787, |
| "grad_norm": 0.25219130516052246, |
| "learning_rate": 7.610609537261337e-09, |
| "loss": 0.3576, |
| "step": 2740 |
| }, |
| { |
| "epoch": 2.9526032315978457, |
| "grad_norm": 0.2940714657306671, |
| "learning_rate": 7.268686729159369e-09, |
| "loss": 0.3884, |
| "step": 2741 |
| }, |
| { |
| "epoch": 2.9536804308797127, |
| "grad_norm": 0.2683965265750885, |
| "learning_rate": 6.9346166828371256e-09, |
| "loss": 0.3951, |
| "step": 2742 |
| }, |
| { |
| "epoch": 2.9547576301615797, |
| "grad_norm": 0.2479734867811203, |
| "learning_rate": 6.6083999237320786e-09, |
| "loss": 0.3471, |
| "step": 2743 |
| }, |
| { |
| "epoch": 2.955834829443447, |
| "grad_norm": 0.2499818354845047, |
| "learning_rate": 6.2900369649315785e-09, |
| "loss": 0.3435, |
| "step": 2744 |
| }, |
| { |
| "epoch": 2.956912028725314, |
| "grad_norm": 0.2655635178089142, |
| "learning_rate": 5.979528307168414e-09, |
| "loss": 0.3973, |
| "step": 2745 |
| }, |
| { |
| "epoch": 2.9579892280071816, |
| "grad_norm": 0.2900417745113373, |
| "learning_rate": 5.676874438823032e-09, |
| "loss": 0.3786, |
| "step": 2746 |
| }, |
| { |
| "epoch": 2.9590664272890486, |
| "grad_norm": 0.25159966945648193, |
| "learning_rate": 5.382075835921319e-09, |
| "loss": 0.3546, |
| "step": 2747 |
| }, |
| { |
| "epoch": 2.9601436265709156, |
| "grad_norm": 0.284942626953125, |
| "learning_rate": 5.095132962134042e-09, |
| "loss": 0.4079, |
| "step": 2748 |
| }, |
| { |
| "epoch": 2.9612208258527826, |
| "grad_norm": 0.2491726577281952, |
| "learning_rate": 4.816046268775742e-09, |
| "loss": 0.3631, |
| "step": 2749 |
| }, |
| { |
| "epoch": 2.96229802513465, |
| "grad_norm": 0.2669207453727722, |
| "learning_rate": 4.5448161948047355e-09, |
| "loss": 0.3941, |
| "step": 2750 |
| }, |
| { |
| "epoch": 2.963375224416517, |
| "grad_norm": 0.26818183064460754, |
| "learning_rate": 4.281443166822552e-09, |
| "loss": 0.3969, |
| "step": 2751 |
| }, |
| { |
| "epoch": 2.9644524236983845, |
| "grad_norm": 0.25657200813293457, |
| "learning_rate": 4.0259275990722764e-09, |
| "loss": 0.3578, |
| "step": 2752 |
| }, |
| { |
| "epoch": 2.9655296229802515, |
| "grad_norm": 0.2637154161930084, |
| "learning_rate": 3.778269893439101e-09, |
| "loss": 0.3702, |
| "step": 2753 |
| }, |
| { |
| "epoch": 2.9666068222621185, |
| "grad_norm": 0.2604050636291504, |
| "learning_rate": 3.538470439448105e-09, |
| "loss": 0.3806, |
| "step": 2754 |
| }, |
| { |
| "epoch": 2.9676840215439855, |
| "grad_norm": 0.264647901058197, |
| "learning_rate": 3.3065296142659188e-09, |
| "loss": 0.3662, |
| "step": 2755 |
| }, |
| { |
| "epoch": 2.968761220825853, |
| "grad_norm": 0.26899972558021545, |
| "learning_rate": 3.0824477826979504e-09, |
| "loss": 0.3555, |
| "step": 2756 |
| }, |
| { |
| "epoch": 2.96983842010772, |
| "grad_norm": 0.2733704149723053, |
| "learning_rate": 2.86622529718783e-09, |
| "loss": 0.4007, |
| "step": 2757 |
| }, |
| { |
| "epoch": 2.970915619389587, |
| "grad_norm": 0.25085026025772095, |
| "learning_rate": 2.657862497820185e-09, |
| "loss": 0.346, |
| "step": 2758 |
| }, |
| { |
| "epoch": 2.9719928186714544, |
| "grad_norm": 0.2898995280265808, |
| "learning_rate": 2.4573597123145333e-09, |
| "loss": 0.3975, |
| "step": 2759 |
| }, |
| { |
| "epoch": 2.9730700179533214, |
| "grad_norm": 0.255732923746109, |
| "learning_rate": 2.264717256030835e-09, |
| "loss": 0.4053, |
| "step": 2760 |
| }, |
| { |
| "epoch": 2.9741472172351884, |
| "grad_norm": 0.24689188599586487, |
| "learning_rate": 2.079935431963942e-09, |
| "loss": 0.365, |
| "step": 2761 |
| }, |
| { |
| "epoch": 2.9752244165170554, |
| "grad_norm": 0.2516534924507141, |
| "learning_rate": 1.903014530745817e-09, |
| "loss": 0.3502, |
| "step": 2762 |
| }, |
| { |
| "epoch": 2.976301615798923, |
| "grad_norm": 0.280439168214798, |
| "learning_rate": 1.7339548306449794e-09, |
| "loss": 0.4066, |
| "step": 2763 |
| }, |
| { |
| "epoch": 2.97737881508079, |
| "grad_norm": 0.2619931101799011, |
| "learning_rate": 1.5727565975642844e-09, |
| "loss": 0.3337, |
| "step": 2764 |
| }, |
| { |
| "epoch": 2.9784560143626573, |
| "grad_norm": 0.2771995961666107, |
| "learning_rate": 1.419420085043699e-09, |
| "loss": 0.4036, |
| "step": 2765 |
| }, |
| { |
| "epoch": 2.9795332136445243, |
| "grad_norm": 0.2701093852519989, |
| "learning_rate": 1.2739455342558605e-09, |
| "loss": 0.406, |
| "step": 2766 |
| }, |
| { |
| "epoch": 2.9806104129263913, |
| "grad_norm": 0.2730431854724884, |
| "learning_rate": 1.1363331740094075e-09, |
| "loss": 0.3554, |
| "step": 2767 |
| }, |
| { |
| "epoch": 2.9816876122082583, |
| "grad_norm": 0.2705381512641907, |
| "learning_rate": 1.0065832207462045e-09, |
| "loss": 0.3695, |
| "step": 2768 |
| }, |
| { |
| "epoch": 2.9827648114901257, |
| "grad_norm": 0.2739293575286865, |
| "learning_rate": 8.846958785418969e-10, |
| "loss": 0.3781, |
| "step": 2769 |
| }, |
| { |
| "epoch": 2.9838420107719927, |
| "grad_norm": 0.2708894610404968, |
| "learning_rate": 7.706713391059107e-10, |
| "loss": 0.37, |
| "step": 2770 |
| }, |
| { |
| "epoch": 2.98491921005386, |
| "grad_norm": 0.26479482650756836, |
| "learning_rate": 6.645097817797874e-10, |
| "loss": 0.3348, |
| "step": 2771 |
| }, |
| { |
| "epoch": 2.985996409335727, |
| "grad_norm": 0.26794207096099854, |
| "learning_rate": 5.662113735394048e-10, |
| "loss": 0.3752, |
| "step": 2772 |
| }, |
| { |
| "epoch": 2.987073608617594, |
| "grad_norm": 0.25619226694107056, |
| "learning_rate": 4.757762689922008e-10, |
| "loss": 0.3704, |
| "step": 2773 |
| }, |
| { |
| "epoch": 2.988150807899461, |
| "grad_norm": 0.28731483221054077, |
| "learning_rate": 3.9320461037772873e-10, |
| "loss": 0.4023, |
| "step": 2774 |
| }, |
| { |
| "epoch": 2.9892280071813286, |
| "grad_norm": 0.280960351228714, |
| "learning_rate": 3.184965275676577e-10, |
| "loss": 0.4169, |
| "step": 2775 |
| }, |
| { |
| "epoch": 2.9903052064631956, |
| "grad_norm": 0.26428189873695374, |
| "learning_rate": 2.5165213806632726e-10, |
| "loss": 0.3637, |
| "step": 2776 |
| }, |
| { |
| "epoch": 2.991382405745063, |
| "grad_norm": 0.2421150952577591, |
| "learning_rate": 1.926715470090823e-10, |
| "loss": 0.3619, |
| "step": 2777 |
| }, |
| { |
| "epoch": 2.99245960502693, |
| "grad_norm": 0.2729114592075348, |
| "learning_rate": 1.4155484716227296e-10, |
| "loss": 0.3948, |
| "step": 2778 |
| }, |
| { |
| "epoch": 2.993536804308797, |
| "grad_norm": 0.2836900055408478, |
| "learning_rate": 9.830211892492004e-11, |
| "loss": 0.3908, |
| "step": 2779 |
| }, |
| { |
| "epoch": 2.994614003590664, |
| "grad_norm": 0.2800293266773224, |
| "learning_rate": 6.291343032649444e-11, |
| "loss": 0.37, |
| "step": 2780 |
| }, |
| { |
| "epoch": 2.9956912028725315, |
| "grad_norm": 0.2719917595386505, |
| "learning_rate": 3.538883702747242e-11, |
| "loss": 0.3686, |
| "step": 2781 |
| }, |
| { |
| "epoch": 2.9967684021543985, |
| "grad_norm": 0.27564868330955505, |
| "learning_rate": 1.5728382319890602e-11, |
| "loss": 0.3806, |
| "step": 2782 |
| }, |
| { |
| "epoch": 2.9978456014362656, |
| "grad_norm": 0.28030478954315186, |
| "learning_rate": 3.9320971262357676e-12, |
| "loss": 0.3715, |
| "step": 2783 |
| }, |
| { |
| "epoch": 2.998922800718133, |
| "grad_norm": 0.25694915652275085, |
| "learning_rate": 0.0, |
| "loss": 0.3591, |
| "step": 2784 |
| }, |
| { |
| "epoch": 2.998922800718133, |
| "step": 2784, |
| "total_flos": 4113291007754240.0, |
| "train_loss": 0.41809357445815515, |
| "train_runtime": 59036.9055, |
| "train_samples_per_second": 4.529, |
| "train_steps_per_second": 0.047 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 2784, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4113291007754240.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|